From 4a6e101410c8b77c42513bf3a9b5409ae9968779 Mon Sep 17 00:00:00 2001 From: Loretta Date: Tue, 26 May 2026 16:24:33 +0200 Subject: [PATCH] Unify AcBinary string marker; prefix-tier VarUInt encoding Refactored AcBinary to use a single String marker (167) for long-form strings, replacing StringLen8/16/32. Implemented prefix-tier VarUInt encoding for string lengths, introduced FixStrCount constant, and removed legacy LEB128 code paths. Updated all serialization/deserialization logic and documentation to match the new format. Includes related micro-optimizations and code cleanup. --- .../AcBinarySourceGenerator.GenReader.cs | 6 +- ...lizer.BinaryDeserializationContext.Read.cs | 137 +++++++----------- .../Binaries/AcBinaryDeserializer.cs | 33 +---- ...rySerializer.BinarySerializationContext.cs | 101 ++++++++----- .../Serializers/Binaries/BinaryTypeCode.cs | 42 +++--- .../Binaries/BufferWriterBinaryOutput.cs | 26 +++- AyCode.Core/docs/BINARY/BINARY_FORMAT.md | 116 ++++++++++----- AyCode.Core/docs/BINARY/BINARY_TODO.md | 67 +++++++++ 8 files changed, 322 insertions(+), 206 deletions(-) diff --git a/AyCode.Core.Serializers.SourceGenerator/AcBinarySourceGenerator.GenReader.cs b/AyCode.Core.Serializers.SourceGenerator/AcBinarySourceGenerator.GenReader.cs index 59e2331..8e95d00 100644 --- a/AyCode.Core.Serializers.SourceGenerator/AcBinarySourceGenerator.GenReader.cs +++ b/AyCode.Core.Serializers.SourceGenerator/AcBinarySourceGenerator.GenReader.cs @@ -242,10 +242,8 @@ public partial class AcBinarySourceGenerator sb.AppendLine($"{i} case BinaryTypeCode.StringUtf16:"); sb.AppendLine($"{i} {a} = context.ReadStringUtf16Marker();"); sb.AppendLine($"{i} break;"); - sb.AppendLine($"{i} case BinaryTypeCode.StringLen8:"); - sb.AppendLine($"{i} case BinaryTypeCode.StringLen16:"); - sb.AppendLine($"{i} case BinaryTypeCode.StringLen32:"); - sb.AppendLine($"{i} {a} = context.ReadUniversalLongStringByMarker({tc});"); + sb.AppendLine($"{i} case BinaryTypeCode.String:"); + sb.AppendLine($"{i} {a} = context.ReadUniversalLongString();"); sb.AppendLine($"{i} break;"); // Interning first-occurrence cases — see comment above. if (enableInternString) diff --git a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.Read.cs b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.Read.cs index 4bc05d6..4b32c7f 100644 --- a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.Read.cs +++ b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.Read.cs @@ -270,67 +270,59 @@ public static partial class AcBinaryDeserializer //if (FastWire) { return ReadRaw(); } var raw = ReadVarUInt(); - var value = (int)(raw >> 1) ^ -(int)(raw & 1); - return value; + return (int)(raw >> 1) ^ -(int)(raw & 1); } + /// + /// Prefix-tier VarUInt decode (UTF-8-style). Wire-size identical to legacy LEB128 across all + /// 5 tiers (7/14/21/28/32 bit); decode is loop-less — the first-byte prefix determines total + /// size in O(1), and each subsequent byte is read incrementally (no continuation-loop, no + /// per-byte shift cascade). + /// Tier table (first-byte pattern → total bytes → value range): + /// + /// 0xxxxxxx → 1 byte → 0..127 + /// 10xxxxxx → 2 byte → 128..16 383 + /// 110xxxxx → 3 byte → 16 384..2 097 151 + /// 1110xxxx → 4 byte → 2 097 152..268 435 455 + /// 1111xxxx → 5 byte → 268 435 456..4 294 967 295 (prefix nibble unused) + /// + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public uint ReadVarUInt() { //if (FastWire) { return ReadRaw(); } - // Multi-segment safety: ensure at least 1 byte before direct buffer access. - // ArrayBinaryInput: TryAdvanceSegment => false (JIT eliminates this branch). - if (_position >= _bufferLength) - { - if (!Input.TryAdvanceSegment(ref _buffer, ref _position, ref _bufferLength, 1)) - throw new AcBinaryDeserializationException("Unexpected end of binary payload.", _position); - } + // ReadByte() routes through EnsureAvailable(1) — ArrayBinaryInput JIT-eliminates the + // bounds-check, multi-segment / AsyncPipeReaderInput advances cross-segment as needed. + // All tiers are cross-segment safe without explicit segment-handling here. - // Fast path: single byte (0-127) - ~70% of cases - var b0 = _buffer[_position]; - if ((b0 & 0x80) == 0) - { - _position++; - return b0; - } + // 1-byte tier (0..127) — ~70% of cases (the most common one for small ids, counts, indices). + var b0 = ReadByte(); + if (b0 < 0x80) return b0; - // Fast path: two bytes (128-16383) - ~25% of cases - if (_position + 1 < _bufferLength) - { - var b1 = _buffer[_position + 1]; - if ((b1 & 0x80) == 0) - { - _position += 2; - return (uint)(b0 & 0x7F) | ((uint)b1 << 7); - } - } + // 2-byte tier (128..16 383) — 10xxxxxx + 1B raw. + var b1 = ReadByte(); + if (b0 < 0xC0) return ((uint)(b0 & 0x3F) << 8) | b1; - // Slow path: 3+ bytes or cross-segment boundary — uses ReadByte() per byte - return ReadVarUIntSlow(); + // 3-byte tier (16 384..2 097 151) — 110xxxxx + 2B LE. + var b2 = ReadByte(); + if (b0 < 0xE0) return ((uint)(b0 & 0x1F) << 16) | ((uint)b2 << 8) | b1; + + // 4 / 5-byte tiers (rare — value ≥ 2 097 152) — handed off to non-inlined slow path + // with already-read bytes passed as args (no re-read). + return ReadVarUIntSlow(b0, b1, b2); } - private uint ReadVarUIntSlow() + [MethodImpl(MethodImplOptions.NoInlining)] + private uint ReadVarUIntSlow(byte b0, byte b1, byte b2) { - uint value = 0; - var shift = 0; - while (true) - { - var b = ReadByte(); - value |= (uint)(b & 0x7F) << shift; - if ((b & 0x80) == 0) - { - break; - } + // 4-byte tier (2 097 152 .268 435 455) — 1110xxxx + 3B LE. + var b3 = ReadByte(); + if (b0 < 0xF0) return ((uint)(b0 & 0x0F) << 24) | ((uint)b3 << 16) | ((uint)b2 << 8) | b1; - shift += 7; - if (shift > 35) - { - throw new AcBinaryDeserializationException("Invalid VarUInt encoding.", _position); - } - } - - return value; + // 5-byte tier (268 435 456..4 294 967 295) — 1111xxxx + 4B LE (prefix nibble unused). + var b4 = ReadByte(); + return ((uint)b4 << 24) | ((uint)b3 << 16) | ((uint)b2 << 8) | b1; } [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -700,14 +692,7 @@ public static partial class AcBinaryDeserializer [MethodImpl(MethodImplOptions.AggressiveInlining)] internal string ReadUniversalLongString() { - ReadUniversalLongStringHeader(BinaryTypeCode.StringLen32, out var charLength, out var excess); - return ReadStringByUnsignedExcess(charLength, excess); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal string ReadUniversalLongStringByMarker(byte marker) - { - ReadUniversalLongStringHeader(marker, out var charLength, out var excess); + ReadUniversalLongStringHeader(out var charLength, out var excess); return ReadStringByUnsignedExcess(charLength, excess); } @@ -718,24 +703,15 @@ public static partial class AcBinaryDeserializer excess = ReadByte(); } + /// + /// Reads the long-form string header — single marker followed by + /// prefix-tier VarUInt charLength (offset by ) and a 1/2/4-byte + /// unsigned excess slot whose width is derived from charLength (see ). + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal void ReadUniversalLongStringHeader(byte marker, out int charLength, out uint excess) + internal void ReadUniversalLongStringHeader(out int charLength, out uint excess) { - if (marker == BinaryTypeCode.StringLen8) - { - charLength = ReadByte(); - } - else if (marker == BinaryTypeCode.StringLen16) - { - charLength = ReadUInt16Unsafe(); - } - else - { - charLength = ReadInt32Unsafe(); - if (charLength < 0) - throw new AcBinaryDeserializationException($"Invalid string header: negative charLength ({charLength}).", _position); - } - + charLength = (int)ReadVarUInt() + BinaryTypeCode.FixStrCount; var slotSize = BinaryTypeCode.GetUniversalStringExcessSlotSize(charLength); if (slotSize == 1) excess = ReadByte(); @@ -825,10 +801,8 @@ public static partial class AcBinaryDeserializer case BinaryTypeCode.StringUtf16: value = ReadStringUtf16Marker(); return true; - case BinaryTypeCode.StringLen8: - case BinaryTypeCode.StringLen16: - case BinaryTypeCode.StringLen32: - ReadUniversalLongStringHeader(tc, out charLength, out excess); + case BinaryTypeCode.String: + ReadUniversalLongStringHeader(out charLength, out excess); break; case BinaryTypeCode.Null: return true; @@ -929,14 +903,13 @@ public static partial class AcBinaryDeserializer // ArrayBinaryInput → if (true) return; → method body entirely eliminated // SequenceBinaryInput → if (false) return; → guard eliminated, bounds-check kept // AsyncPipeReaderInput → if (false) return; → guard eliminated, bounds-check kept - if (TInput.IsTrustedSingleSegment) return; - if (_position > _bufferLength - length) - { - if (!Input.TryAdvanceSegment(ref _buffer, ref _position, ref _bufferLength, length)) - throw new AcBinaryDeserializationException("Unexpected end of binary payload.", _position); - AssertGuarantee(length); - } + if (TInput.IsTrustedSingleSegment || _position <= _bufferLength - length) return; + + if (!Input.TryAdvanceSegment(ref _buffer, ref _position, ref _bufferLength, length)) + throw new AcBinaryDeserializationException("Unexpected end of binary payload.", _position); + + AssertGuarantee(length); } [Conditional("DEBUG")] diff --git a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs index bc3c6f4..4e87a00 100644 --- a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs +++ b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs @@ -105,9 +105,7 @@ public static partial class AcBinaryDeserializer // H2Q6 interning tier readers (Compact mode only — Big tier never engages on interning path) readers[BinaryTypeCode.StringInternFirstSmall] = static (ctx, _) => ctx.ReadAndRegisterInternedStringSmall(); readers[BinaryTypeCode.StringInternFirstMedium] = static (ctx, _) => ctx.ReadAndRegisterInternedStringMedium(); - readers[BinaryTypeCode.StringLen8] = static (ctx, _) => ctx.ReadUniversalLongStringByMarker(BinaryTypeCode.StringLen8); - readers[BinaryTypeCode.StringLen16] = static (ctx, _) => ctx.ReadUniversalLongStringByMarker(BinaryTypeCode.StringLen16); - readers[BinaryTypeCode.StringLen32] = static (ctx, _) => ctx.ReadUniversalLongStringByMarker(BinaryTypeCode.StringLen32); + readers[BinaryTypeCode.String] = static (ctx, _) => ctx.ReadUniversalLongString(); readers[BinaryTypeCode.DateTime] = static (ctx, _) => ctx.ReadDateTimeUnsafe(); readers[BinaryTypeCode.DateTimeOffset] = static (ctx, _) => ctx.ReadDateTimeOffsetUnsafe(); readers[BinaryTypeCode.TimeSpan] = static (ctx, _) => ctx.ReadTimeSpanUnsafe(); @@ -1050,10 +1048,8 @@ public static partial class AcBinaryDeserializer case BinaryTypeCode.StringUtf16: propInfo.SetValue(target, context.ReadStringUtf16Marker()); return true; - case BinaryTypeCode.StringLen8: - case BinaryTypeCode.StringLen16: - case BinaryTypeCode.StringLen32: - propInfo.SetValue(target, context.ReadUniversalLongStringByMarker(typeCode)); + case BinaryTypeCode.String: + propInfo.SetValue(target, context.ReadUniversalLongString()); return true; case BinaryTypeCode.StringEmpty: propInfo.SetValue(target, string.Empty); @@ -2045,27 +2041,10 @@ public static partial class AcBinaryDeserializer case BinaryTypeCode.Decimal: context.Skip(16); return; - case BinaryTypeCode.StringLen8: - case BinaryTypeCode.StringLen16: - case BinaryTypeCode.StringLen32: - // Skip layout: [charLength:1|2|4 by marker][unsigned excess slot][bytes] + case BinaryTypeCode.String: + // Skip layout: [VarUInt(charLength - FixStrCount)][unsigned excess slot:1|2|4][bytes] { - int charLength; - if (typeCode == BinaryTypeCode.StringLen8) - { - charLength = context.ReadByte(); - } - else if (typeCode == BinaryTypeCode.StringLen16) - { - charLength = context.ReadUInt16Unsafe(); - } - else - { - charLength = context.ReadInt32Unsafe(); - if (charLength < 0) - throw new AcBinaryDeserializationException($"Invalid string header while skipping: negative charLength ({charLength}).", context.Position); - } - + var charLength = (int)context.ReadVarUInt() + BinaryTypeCode.FixStrCount; var slotSize = BinaryTypeCode.GetUniversalStringExcessSlotSize(charLength); var excess = slotSize switch { diff --git a/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs b/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs index 542487a..19e62ae 100644 --- a/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs +++ b/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs @@ -606,18 +606,47 @@ public static partial class AcBinarySerializer BufferAt(_position++) = (byte)value; return; } + WriteVarUIntMultiByteUnsafe(value); } + /// + /// Prefix-tier VarUInt encoding (UTF-8-style: first byte's high bits encode total size). + /// Compact path: picks the tier (2/3/4) in O(1), then a single + /// <> stores [prefix-byte | value-bytes LE] + /// in one machine instruction. 5-byte tier uses one byte + one uint32 store. + /// Tier table: + /// 0xxxxxxx → 1 byte (handled inline by caller) + /// 10xxxxxx + 1B → 2 byte, 128..16 383 (14 bit) + /// 110xxxxx + 2B LE → 3 byte, 16 384..2 097 151 (21 bit) + /// 1110xxxx + 3B LE → 4 byte, 2 097 152..268 435 455 (28 bit) + /// 1111xxxx + 4B LE → 5 byte, 268 435 456..uint.MaxValue (32 bit; prefix nibble unused) + /// Caller MUST ensure ≥5 bytes of buffer space (interface contract) — the uint32 store on the + /// 2/3/4-byte tiers writes 4 bytes even though only `tier` bytes are advanced; the trailing + /// 1-2 bytes get overwritten by the next encoded element. Little-endian host assumed (all + /// shipping .NET 9 platforms). + /// [MethodImpl(MethodImplOptions.NoInlining)] private void WriteVarUIntMultiByteUnsafe(uint value) { - while (value >= 0x80) + if (value < 0x10000000) { - BufferAt(_position++) = (byte)(value | 0x80); - value >>= 7; + // 2/3/4-byte tier: tier ∈ {2,3,4}, shift ∈ {8,16,24}, prefix ∈ {0x80,0xC0,0xE0}. + // Packed uint32: byte0 = prefix | (value >> shift); bytes 1..3 = value LE (low 24 bits). + var tier = (BitOperations.Log2(value) / 7) + 1; + var shift = (tier - 1) << 3; + var prefix = (0xFF00u >> (tier - 1)) & 0xFFu; + var packed = (prefix | (value >> shift)) | (value << 8); + + Unsafe.WriteUnaligned(ref BufferAt(_position), packed); + _position += tier; + return; } - BufferAt(_position++) = (byte)value; + + // 5-byte tier: 0xF0 marker + full uint32 LE (prefix nibble unused). + BufferAt(_position) = 0xF0; + Unsafe.WriteUnaligned(ref BufferAt(_position + 1), value); + _position += 5; } [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -868,7 +897,7 @@ public static partial class AcBinarySerializer /// Header is fully determined before encode: /// /// charLength <= 31: [FixStr(marker carries charLength)][unsigned excess:1] - /// charLength > 31: [StringLen8|StringLen16|StringLen32][charLength:1|2|4][unsigned excess:1|2|4] + /// charLength > 31: [String][VarUInt(charLength - FixStrCount)][unsigned excess:1|2|4] — single marker with prefix-tier VarUInt charLength /// /// Body is UTF-8-encoded exactly once to the final destination (encodeStart) — no post-encode /// body shift/copy. For the current path, excess = bytesWritten - charLength is expected to be @@ -892,42 +921,46 @@ public static partial class AcBinarySerializer // Tight UTF-8 upper bound for valid UTF-16 input: max 3 bytes per UTF-16 code unit. var maxBytes = charLength * 3; - var isFixStr = charLength <= BinaryTypeCode.FixStrMaxLength; - var charLengthSize = isFixStr ? 0 : charLength <= byte.MaxValue ? 1 : charLength <= ushort.MaxValue ? 2 : 4; + + // Single branch on FixStr vs long-form — replaces the previous 4 ternary-on-isFixStr cascade. // IMPORTANT: the slot VALUE (excess) is not known before UTF-8 encode, but the slot SIZE is. // We reserve the slot by width (1/2/4) from charLength, so encodeStart is final and no body shift is needed. - var slotSize = isFixStr ? 1 : BinaryTypeCode.GetUniversalStringExcessSlotSize(charLength); - var headerSize = isFixStr ? 2 : 1 + charLengthSize + slotSize; - - EnsureCapacity(headerSize + maxBytes); - - var headerPos = _position; - var slotPos = isFixStr ? headerPos + 1 : headerPos + 1 + charLengthSize; - var encodeStart = headerPos + headerSize; - - if (isFixStr) + int slotSize, headerSize, headerPos, slotPos, encodeStart; + if (charLength <= BinaryTypeCode.FixStrMaxLength) { - // Universal short-form string marker with unsigned excess slot. + // FixStr: header = [marker:1][slot:1] + slotSize = 1; + headerSize = 2; + + EnsureCapacity(headerSize + maxBytes); + + headerPos = _position; + slotPos = headerPos + 1; + encodeStart = headerPos + 2; + BufferAt(headerPos) = BinaryTypeCode.EncodeFixStr(charLength); } else { - // Universal long-form markers with marker-coded charLength width. - if (charLengthSize == 1) - { - BufferAt(headerPos) = BinaryTypeCode.StringLen8; - BufferAt(headerPos + 1) = unchecked((byte)charLength); - } - else if (charLengthSize == 2) - { - BufferAt(headerPos) = BinaryTypeCode.StringLen16; - Unsafe.WriteUnaligned(ref BufferAt(headerPos + 1), unchecked((ushort)charLength)); - } - else - { - BufferAt(headerPos) = BinaryTypeCode.StringLen32; - Unsafe.WriteUnaligned(ref BufferAt(headerPos + 1), charLength); - } + // Long-form: header = [marker:1][VarUInt(charLength - FixStrCount)][slot:1|2|4] + // FixStr already covers 0..FixStrMaxLength, so wireLen = charLength - FixStrCount + // keeps the small-band 1-byte VarUInt populated. + slotSize = BinaryTypeCode.GetUniversalStringExcessSlotSize(charLength); + var varUIntSize = VarUIntSize((uint)(charLength - BinaryTypeCode.FixStrCount)); + headerSize = 1 + varUIntSize + slotSize; + + EnsureCapacity(headerSize + maxBytes); + + headerPos = _position; + slotPos = headerPos + 1 + varUIntSize; + encodeStart = headerPos + headerSize; + + BufferAt(headerPos) = BinaryTypeCode.String; + + _position = headerPos + 1; + WriteVarUIntUnsafe((uint)(charLength - BinaryTypeCode.FixStrCount)); + // _position now == slotPos. Slot write below uses Unsafe.WriteUnaligned at slotPos; + // _position is finalized at the end via `_position = encodeStart + bytesWritten`. } var status = System.Text.Unicode.Utf8.FromUtf16(value.AsSpan(), _buffer.AsSpan(encodeStart, maxBytes), out _, out var bytesWritten, replaceInvalidSequences: false); diff --git a/AyCode.Core/Serializers/Binaries/BinaryTypeCode.cs b/AyCode.Core/Serializers/Binaries/BinaryTypeCode.cs index 43d5323..350e902 100644 --- a/AyCode.Core/Serializers/Binaries/BinaryTypeCode.cs +++ b/AyCode.Core/Serializers/Binaries/BinaryTypeCode.cs @@ -70,8 +70,8 @@ internal static class BinaryTypeCode // Marker 91 is reserved for FastWire UTF-16 payloads: // [StringUtf16][charLen:int32 LE][UTF-16 raw bytes] // - // Universal compact-mode strings use FixStr (135..166) + StringLen8/16/32 (167..169): - // [FixStr] / [StringLenN][charLen:N][unsigned excess slot][UTF-8 bytes] + // Universal compact-mode strings use FixStr (135..166) + String (167): + // [FixStr] / [String][VarUInt(charLength - FixStrCount)][unsigned excess slot][UTF-8 bytes] // // Interning tiers keep dedicated markers. public const byte StringUtf16 = SlotCount + 27; // 91 — FastWire UTF-16 marker payload @@ -123,23 +123,31 @@ internal static class BinaryTypeCode public const byte FixStrBase = SlotCount + 71; // 135 public const byte FixStrMax = FixStrBase + 31; // 166 public const int FixStrMaxLength = 31; + /// + /// Number of FixStr marker slots (= + 1 = 32). Also used as the + /// wire-format offset for the long-form marker's VarUInt-encoded charLength: + /// wireLen = charLength - FixStrCount. Single source of truth — if the FixStr range + /// ever expands (e.g. 32 → 64 slots), this constant updates the offset everywhere consistently. + /// + public const int FixStrCount = FixStrMaxLength + 1; // 32 // Backward-compatible aliases (old naming) public const byte FixStrAsciiBase = FixStrBase; public const byte FixStrAsciiMax = FixStrMax; public const int FixStrAsciiMaxLength = FixStrMaxLength; - // Long universal string markers (marker encodes charLength field width) - // Layout: - // StringLen8 (167): [marker][charLen:1][excess slot][bytes] - // StringLen16 (168): [marker][charLen:2][excess slot][bytes] - // StringLen32 (169): [marker][charLen:4][excess slot][bytes] - public const byte StringLen8 = SlotCount + 103; // 167 - public const byte StringLen16 = SlotCount + 104; // 168 - public const byte StringLen32 = SlotCount + 105; // 169 + // Single universal long-form string marker (formerly split into StringLen8/16/32 at 167/168/169 — + // unified via prefix-tier VarUInt charLength encoding; magnitude-tier-elés is exactly what VarUInt + // does, so the 3-way marker discrimination became redundant). + // + // Wire layout: [String:1] [VarUInt(charLength - FixStrCount)] [excess slot:1|2|4] [UTF-8 bytes] + // + // charLength here is always > FixStrMaxLength (smaller values use FixStr). + public const byte String = SlotCount + 103; // 167 - // Backward-compatible aliases - public const byte String = StringLen32; - public const byte StringAscii = StringLen32; + // 168..169 — Reserved (formerly StringLen16/32, freed by VarUInt-based length unification). + // Available for future marker allocation per BINARY_TODO.md marker-tier reorganization plan. + public const byte ReservedStringSlotMin = SlotCount + 104; // 168 + public const byte ReservedStringSlotMax = SlotCount + 105; // 169 // Reserved slot block: 170..175 (6 slots) for future string-related markers // (e.g., StringCompressed, StringEncoded, StringMixedAscii, etc.). Keeping the 135..169 range @@ -193,7 +201,7 @@ internal static class BinaryTypeCode || typeCode == StringEmpty || typeCode == StringInternFirstSmall || typeCode == StringInternFirstMedium - || (typeCode is >= FixStrBase and <= StringLen32); // 135..169: FixStr + StringLen8/16/32 + || (typeCode is >= FixStrBase and <= String); // 135..167: FixStr + String /// /// Check if type code is the FastWire UTF-16 string marker. @@ -210,11 +218,11 @@ internal static class BinaryTypeCode => typeCode == StringInternFirstSmall || typeCode == StringInternFirstMedium; /// - /// Check if type code is any universal string marker — FixStr (short) or StringLen8/16/32 (long). - /// Single contiguous range (135..169) for branch-friendly dispatch on the reader hot path. + /// Check if type code is any universal string marker — FixStr (short, 135..166) or String (long, 167). + /// Single contiguous range (135..167) for branch-friendly dispatch on the reader hot path. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static bool IsStringUniversalMarker(byte typeCode) => typeCode is >= FixStrBase and <= StringLen32; + public static bool IsStringUniversalMarker(byte typeCode) => typeCode is >= FixStrBase and <= String; [MethodImpl(MethodImplOptions.AggressiveInlining)] public static bool IsAsciiString(byte typeCode) => IsStringUniversalMarker(typeCode); diff --git a/AyCode.Core/Serializers/Binaries/BufferWriterBinaryOutput.cs b/AyCode.Core/Serializers/Binaries/BufferWriterBinaryOutput.cs index 14c9b34..830efd8 100644 --- a/AyCode.Core/Serializers/Binaries/BufferWriterBinaryOutput.cs +++ b/AyCode.Core/Serializers/Binaries/BufferWriterBinaryOutput.cs @@ -1,5 +1,6 @@ using System; using System.Buffers; +using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Text; @@ -221,15 +222,32 @@ public struct BufferWriterBinaryOutput : IBinaryOutputBase WriteVarUIntMultiByteUnsafe(value); } + /// + /// Prefix-tier VarUInt encoding (UTF-8-style). Compact: 2/3/4-byte tiers packed into a single + /// <> store via + /// -derived tier; 5-byte tier uses one byte + one uint32 store. + /// Caller must ensure ≥5 bytes of buffer space. See + /// for the + /// full tier table. + /// [MethodImpl(MethodImplOptions.NoInlining)] private void WriteVarUIntMultiByteUnsafe(uint value) { - while (value >= 0x80) + if (value < 0x10000000) { - _buffer[_position++] = (byte)(value | 0x80); - value >>= 7; + // 2/3/4-byte tier: tier ∈ {2,3,4}, shift ∈ {8,16,24}, prefix ∈ {0x80,0xC0,0xE0}. + var tier = (BitOperations.Log2(value) / 7) + 1; + var shift = (tier - 1) << 3; + var prefix = (0xFF00u >> (tier - 1)) & 0xFFu; + var packed = (prefix | (value >> shift)) | (value << 8); + Unsafe.WriteUnaligned(ref _buffer[_position], packed); + _position += tier; + return; } - _buffer[_position++] = (byte)value; + // 5-byte tier: 0xF0 marker + full uint32 LE (prefix nibble unused). + _buffer[_position] = 0xF0; + Unsafe.WriteUnaligned(ref _buffer[_position + 1], value); + _position += 5; } [MethodImpl(MethodImplOptions.AggressiveInlining)] diff --git a/AyCode.Core/docs/BINARY/BINARY_FORMAT.md b/AyCode.Core/docs/BINARY/BINARY_FORMAT.md index 41cc694..c54d2e8 100644 --- a/AyCode.Core/docs/BINARY/BINARY_FORMAT.md +++ b/AyCode.Core/docs/BINARY/BINARY_FORMAT.md @@ -30,20 +30,31 @@ The flags byte uses `0xB0` (176) as base with bit flags in the lower nibble. (Mo ## Variable-Length Encoding -### VarUInt (unsigned) +### VarUInt (unsigned 32-bit) -LEB128: 7 data bits per byte, MSB = continuation flag. +Prefix-tier encoding (UTF-8-style): the first byte's high bits determine total size, +then the remaining bytes are read as a fixed-size little-endian value. No continuation-loop. -``` -value < 128 → 1 byte [0xxxxxxx] -value < 16384 → 2 bytes [1xxxxxxx] [0xxxxxxx] -value < 2097152 → 3 bytes ... -(max 5 bytes for uint32) -``` +| First byte | Total | Bit-budget | Value range | +|---|---|---|---| +| `0xxxxxxx` | 1 byte | 7 | `0 .. 127` | +| `10xxxxxx` + 1B | 2 byte | 14 | `128 .. 16 383` | +| `110xxxxx` + 2B LE | 3 byte | 21 | `16 384 .. 2 097 151` | +| `1110xxxx` + 3B LE | 4 byte | 28 | `2 097 152 .. 268 435 455` | +| `1111xxxx` + 4B LE | 5 byte | 32 | `268 435 456 .. 4 294 967 295` (prefix nibble unused) | -### VarInt (signed) +Wire-size is identical to the legacy LEB128 encoding across all 5 tier-boundaries (7/14/21/28/32 bit) — +auto-inc IDs and other large values pay the same byte count. Decode wins: 1 if-ladder (max 4 branches, +JIT jump-table-friendly) + 1 fix-sized load on 3+ byte tiers, instead of N×ReadByte + N×shift + +N×continuation-check. -ZigZag encoding maps signed to unsigned, then LEB128: +Encode (`WriteVarUIntMultiByteUnsafe`) uses `BitOperations.Log2` to pick the tier in O(1), then +packs the prefix byte + value bytes into a single `Unsafe.WriteUnaligned` store on the 2/3/4-byte +tiers (5-byte tier: one byte + one uint32 store). + +### VarInt (signed 32-bit) + +ZigZag encoding maps signed to unsigned, then VarUInt (prefix-tier above): ``` encode: (value << 1) ^ (value >> 31) @@ -52,9 +63,12 @@ decode: (raw >> 1) ^ -(raw & 1) Maps: `0 → 0`, `-1 → 1`, `1 → 2`, `-2 → 3`, etc. -### VarULong (unsigned 64-bit) +### VarULong / VarLong (unsigned / signed 64-bit) -Same LEB128 encoding, max 10 bytes for uint64. +Legacy LEB128 encoding (7 data bits per byte, MSB = continuation flag). Max 10 bytes for uint64. +The 64-bit variants kept LEB128 — the prefix-tier benefit is concentrated in the 32-bit `VarUInt` +hot-path (auto-inc IDs, cache-indices, length prefixes, charLength offsets); 64-bit values are +rarely VarUInt-encoded. ## Type Markers @@ -116,17 +130,35 @@ Second occurrence of a referenced polymorphic object uses plain `ObjectRef(65)` | 89 | Decimal | `[89] [16 bytes]` | | 90 | Char | `[90] [VarUInt]` | -### Strings (91–94, 167) +### Strings (91, 92, 93, 104, 105, 167) | Code | Name | Wire format | |------|------|-------------| -| 91 | String | `[91] [VarUInt byteLength] [UTF-8 bytes]` — generic UTF-8 (any content) | -| 92 | StringInterned | `[92] [VarUInt cacheIndex]` — 2nd+ occurrence | -| 93 | StringEmpty | `[93]` — no payload | -| 94 | StringInternFirst | `[94] [VarUInt cacheIndex] [VarUInt byteLength] [UTF-8 bytes]` — 1st occurrence | -| 167 | StringAscii | `[167] [VarUInt byteLength] [ASCII bytes]` — pure ASCII (every byte < 0x80); reader byte→char widens, no UTF-8 decode | +| 91 | StringUtf16 | `[91] [charLen:int32 LE] [UTF-16 raw bytes]` — FastWire mode UTF-16 payload (no UTF-8 transcoding; speed > size) | +| 92 | StringInterned | `[92] [VarUInt cacheIndex]` — 2nd+ occurrence of an interned string | +| 93 | StringEmpty | `[93]` — empty string, no payload | +| 104 | StringInternFirstSmall | `[104] [VarUInt cacheIdx] [charLen:8][utf8Len:8] [UTF-8 bytes]` — 1st occurrence interning tier, packed dual-length (utf8Len ≤ 255) | +| 105 | StringInternFirstMedium | `[105] [VarUInt cacheIdx] [charLen:16 LE][utf8Len:16 LE] [UTF-8 bytes]` — 1st occurrence interning tier, packed dual-length (utf8Len ≤ 65535) | +| 167 | String | `[167] [VarUInt(charLength - FixStrCount)] [unsigned excess:1\|2\|4] [UTF-8 bytes]` — universal long-form (charLength > FixStrMaxLength) | -The writer detects ASCII via `bytesWritten == charLength` after a single-pass UTF-8 encode (every UTF-16 char < 0x80 produces exactly 1 UTF-8 byte; non-ASCII chars always produce 2-4 bytes), then emits `StringAscii` (167) or `String` (91) accordingly. The reader uses the marker as the ASCII-validity contract — `StringAscii` bypasses UTF-8 decode entirely. +The 167 `String` marker is the universal long-form string. It unifies the former `StringLen8/16/32` +(at 167/168/169) into a single marker with prefix-tier VarUInt charLength encoding (offset by +`FixStrCount = FixStrMaxLength + 1 = 32`, since FixStr already covers 0..31). The excess slot +width (1 / 2 / 4 bytes) is selected from charLength via `GetUniversalStringExcessSlotSize` and +encodes `bytesWritten - charLength` (= UTF-8 byte excess over UTF-16 char count) for the reader's +`string.Create(charLen) + Utf8.ToUtf16` fast path (avoids the pre-scan `Encoding.UTF8.GetCharCount` pass). + +The interning tiers (`StringInternFirstSmall` / `Medium` at 104/105) keep their packed dual-length +(charLen + utf8Len) format — the post-encode tier choice exploits the typical interning workload +(short property names, enum strings) for fast deserialize via single packed-read. + +ASCII strings flow through the same `String` / FixStr markers — the reader uses `excess == 0` as +the ASCII-validity discriminator (every UTF-16 char < 0x80 produces exactly 1 UTF-8 byte; non-ASCII +chars always produce 2-4 bytes). On excess=0 the reader takes the byte→char widen fast path +(`Encoding.Latin1.GetString` SIMD); on excess>0 the UTF-8 decode runs. + +Slots 168, 169 are **Reserved** — freed by the StringLen8/16/32 unification, available for future +marker allocation (see Reserved Ranges below). ### Date/Time (95–98) @@ -146,33 +178,41 @@ The writer detects ASCII via `bytesWritten == charLength` after a single-pass UT | 101 | NoMetadataHeader | Legacy: implies `RefHandling=true`, no metadata | | 102 | PropertySkip | `[102]` — marks skipped property (default/null value) | -### FixStr (103–134) — short UTF-8 strings +### FixStr (135–166) — short universal string marker -Short strings (any UTF-8 content) encoded in a single marker byte + raw UTF-8 bytes (no length prefix): +Short strings encoded in a single marker byte + raw UTF-8 bytes (no length prefix — charLength +encoded in the marker offset). The H2Q6 reorganization merged the formerly-split FixStr (UTF-8 at +103-134) and FixStrAscii (135-166) into one universal FixStr block at 135-166; codepoints 103..134 +are now part of the Reserved Range (see below). ``` -[FixStrBase + byteLength] [UTF-8 bytes] +[FixStrBase + charLength] [UTF-8 bytes] ``` -- Length range: 0–31 **bytes** (`FixStrBase=103`, `FixStrMax=134`) -- Saves 1 byte vs `String` marker + VarUInt length +- Length range: 0..31 chars (`FixStrBase = 135`, `FixStrMax = 166`, `FixStrMaxLength = 31`) +- `FixStrCount = FixStrMaxLength + 1 = 32` — single source of truth for the FixStr slot count, + also the wire-format offset for the long-form `String` marker's VarUInt charLength (`wireLen = + charLength - FixStrCount`). If the FixStr range ever expands (e.g. 32 → 64 slots), this constant + updates the offset everywhere consistently. +- Saves header bytes vs `String` marker + VarUInt length (1 marker byte total vs 3+ byte header) - Content semantics: UTF-8 (may contain multi-byte sequences for non-ASCII chars) -- Reader dispatches via the (universal-)UTF-8 decode path +- ASCII discriminator: the reader uses the post-decode `excess == 0` check; ASCII-only strings + bypass the UTF-8 decode via `Encoding.Latin1.GetString` SIMD byte→char widen. -### FixStrAscii (135–166) — short ASCII strings +### Reserved ranges -Short ASCII-only strings encoded in a single marker byte + raw ASCII bytes: - -``` -[FixStrAsciiBase + byteLength] [ASCII bytes] -``` - -- Length range: 0–31 **bytes** = chars (1:1 for ASCII) (`FixStrAsciiBase=135`, `FixStrAsciiMax=166`) -- Same wire size as `FixStr` (1 marker byte + bytes), but the marker IS the ASCII-validity contract -- Reader byte→char widens directly (`Encoding.Latin1.GetString` SIMD-accelerated path) — no UTF-8 decode, no run-time `Ascii.IsValid` scan -- Writer chooses between `FixStrAscii` and `FixStr` post-encode via `bytesWritten == charLength` - -Codepoints **168–175** are reserved for future string-related markers (e.g., compressed / base64 / mixed-ASCII variants), keeping the 91–167 range a single contiguous string-marker block. +- **103..134** (29 slots): freed by H2Q6 FixStr unification (formerly the non-ASCII FixStr range). + Active reservations per `BINARY_TODO.md` marker-tier reorganization plan: + - 104: `StringInternFirstSmall` (active) + - 105: `StringInternFirstMedium` (active) + - 106..134: reserved for `ACCORE-BIN-T-L9Y3` FixArray short-list count (16 values), `S5L8` + sentinel-length tiers (5 values), `S2X9` markerless schema lane (4 values), `F3W6` dedicated + FastWire string marker (1 value), general reserve (3 values) +- **168..169** (2 slots): freed by `String` marker unification (formerly StringLen16, StringLen32). + Available for the upcoming `FixStr` range expansion (32→64 chars) — would extend FixStr from + `135..166` to `135..198`, absorbing 168/169 and pushing the next free slot to 199. +- **170..175** (6 slots): pre-existing reserve for future string-related markers (e.g., compressed + / base64 / mixed-ASCII variants), keeping the 91..167 range a single contiguous string-marker block. ### TinyInt (192–255) diff --git a/AyCode.Core/docs/BINARY/BINARY_TODO.md b/AyCode.Core/docs/BINARY/BINARY_TODO.md index 591aebf..55109a4 100644 --- a/AyCode.Core/docs/BINARY/BINARY_TODO.md +++ b/AyCode.Core/docs/BINARY/BINARY_TODO.md @@ -8,6 +8,73 @@ This page covers planned work for the **binary serializer core** (format, SGen, > **Archived entries**: see `BINARY_TODO_2026_04.md` and `BINARY_TODO_2026_05.md` (year-month bucket archives per LLMP-DEC retention policy). > Archive files are not auto-loaded — read on demand if relevant context is suspected (regression hint, supersession reference, ID lookup for archived entry). +## ACCORE-BIN-T-V3P9: ~~Prefix-tier VarUInt + String marker unification + FixStrCount constant~~ +**Status:** Closed (2026-05-26) · **Priority:** ~~P1~~ · **Type:** ~~Performance + Wire format~~ + +~~Three coordinated changes (single wire-format-breaking iteration, no FormatVersion bump per user +decision — old payloads not deserializable with the new code):~~ + +### Resolution (2026-05-26) + +**1. Prefix-tier VarUInt encoding (replaces LEB128).** `WriteVarUIntMultiByteUnsafe` and `ReadVarUInt` +in `AcBinarySerializer.BinarySerializationContext` + `AcBinaryDeserializer.BinaryDeserializationContext` ++ `BufferWriterBinaryOutput` rewritten from LEB128 continuation-loop to UTF-8-style prefix-tier +encoding. First-byte prefix bits (`0xxxxxxx` / `10xxxxxx` / `110xxxxx` / `1110xxxx` / `1111xxxx`) +determine total size in O(1); remaining bytes are fixed-size little-endian. Wire-size identical to +LEB128 across all 5 tiers (7/14/21/28/32 bit) — auto-inc IDs pay the same byte count. Decode wins: +1 if-ladder + 1 fix-sized load on 3+ byte tiers instead of N×ReadByte + N×shift + N×continuation-check. +Encoder uses `BitOperations.Log2` + single `Unsafe.WriteUnaligned` store for the 2/3/4-byte tiers +(branch count reduced from 4 to 1). See `BINARY_FORMAT.md` Variable-Length Encoding section for the +tier table. + +**2. String marker unification (`StringLen8/16/32` → single `String` marker).** The 3-marker +magnitude-tier dispatch (167 `StringLen8` + 168 `StringLen16` + 169 `StringLen32`) was redundant +once VarUInt itself became prefix-tier. Collapsed into a single `String` marker at 167 with VarUInt +charLength + slot. Slots 168, 169 freed for future marker allocation. Wire layout: +`[String:1] [VarUInt(charLength - FixStrCount)] [excess slot:1|2|4] [UTF-8 bytes]`. The slot-size +(1/2/4) is still derived from `charLength` via `GetUniversalStringExcessSlotSize`. `BinaryTypeCode.cs`, +`WriteStringWithDispatch`, `ReadUniversalLongString*`, `TryReadStringProperty`, `AcBinaryDeserializer` +reader registration / PopulateProperty switch / Skip path, and `AcBinarySourceGenerator.GenReader` +emit all updated. `StringAscii` alias deleted. + +**3. `FixStrCount` constant.** New `public const int FixStrCount = FixStrMaxLength + 1` in +`BinaryTypeCode.cs`. Single source of truth for the FixStr slot count (= 32), also the wire-format +offset for the long-form `String` marker's VarUInt charLength (`wireLen = charLength - FixStrCount`). +If the FixStr range ever expands (e.g. 32 → 64 slots), this constant updates the offset everywhere +consistently — both the writer (`charLength - FixStrCount`) and reader (`wireLen + FixStrCount`). + +**4. `EnsureAvailable` micro-optimization.** Combined `if (!TInput.IsTrustedSingleSegment && ...)` +short-circuit (was two separate ifs). Single branch, better Tier-0 / cold-path / AOT codegen. +Hot-path JIT (Tier-1) was already CSE-equivalent. + +**5. `ReadVarUInt` incremental-byte slow path.** The 4/5-byte tier slow path now accepts `b0/b1/b2` +as parameters (no re-read of bytes the inline fast path already consumed). Inline fast path covers +1/2/3-byte tiers (cross-segment safe via `ReadByte()` → `EnsureAvailable(1)` JIT-eliminate on +ArrayBinaryInput). + +**6. `WriteStringWithDispatch` if-cascade reorganization.** Replaced 4 `isFixStr ? ... : ...` +ternaries with a single explicit `if (isFixStr) { ... } else { ... }` block. Each branch holds +its own constants (no cross-branch ternary CSE pressure on the JIT). Tier-0 / cold-path codegen +slightly cleaner; Tier-1 hot-path was already CSE-equivalent (bench-confirmed neutral, but smaller +IL — `?:` in C# is 4 separate `brfalse/brtrue` instructions, not 1). + +### Acceptance criteria met + +- ✅ Full solution build (`AyCode.Core.sln`) — 0 errors. +- ✅ Benchmark snapshot (Latin1Short, 2026-05-26 14:20): AcBinary vs MemoryPack Ser median **−3.1%** + (was −1.8% before), Ser geo **−3.2%** (was −2.5%). Per-cell improvements 0.3..2.2% on Ser, + 2.2..3.7% on Deser. Bench file: `Test_Benchmark_Results/Benchmark/Console.FullBenchmark_Release_2026-05-26_14-20-29.LLM`. +- ✅ Doc-sync: `BINARY_FORMAT.md` Variable-Length Encoding + Strings + FixStr sections updated to + reflect the new wire layout. Reserved-range table added (103..134, 168..169, 170..175 buckets). + +### Wire-format breaking note + +This change set is **wire-format breaking** — payloads serialized by the pre-V3P9 code (LEB128 + +StringLen8/16/32) are NOT deserializable by the new code. Per user decision the `FormatVersion` +header byte was NOT bumped (silent breaking; AcBinary is consumer-private, no cross-deployment +compatibility surface). If future versioned compat is desired, a `FormatVersion 1 → 2` bump would +be the conventional approach. + ## ACCORE-BIN-T-N4P8: ~~SGen reference-property null-check parity across all four emit branches~~ **Status:** Closed (2026-05-23) · **Priority:** ~~P1~~ · **Type:** ~~Bug fix~~