diff --git a/AyCode.Core.Serializers.SourceGenerator/AcBinarySourceGenerator.cs b/AyCode.Core.Serializers.SourceGenerator/AcBinarySourceGenerator.cs
index 45e1eff..31137b5 100644
--- a/AyCode.Core.Serializers.SourceGenerator/AcBinarySourceGenerator.cs
+++ b/AyCode.Core.Serializers.SourceGenerator/AcBinarySourceGenerator.cs
@@ -1820,37 +1820,63 @@ public class AcBinarySourceGenerator : IIncrementalGenerator
}
///
- /// Emits inline string read from type code. Handles all string wire formats:
- /// FixStr (UTF-8 short, 103-134), FixStrAscii (ASCII short, 135-166), String (UTF-8 long, 91),
- /// StringAscii (ASCII long, 167), StringInterned, StringEmpty, StringInternFirst, Null.
- /// FixStr/FixStrAscii are checked first as hot paths for short strings — ASCII variant
- /// dispatches to ReadAsciiBytesAsString (byte→char widen, no UTF-8 decode).
+ /// Emits inline string read from type code. Handles all H2Q6 (v3 wire format) string markers:
+ /// FixStrAscii (ASCII short, 135-166), StringAscii (ASCII long, 167),
+ /// StringSmall/Medium/Big (non-ASCII tiers, 91/94/103),
+ /// StringInternFirstSmall/Medium (interning tiers, 104/105),
+ /// StringInterned (cache ref, 92), StringEmpty (93), Null.
+ ///
+ /// FixStrAscii is checked first as the hot path for short ASCII property names; non-ASCII
+ /// tier markers carry both charLen and utf8Len in fixed-width headers (1-pass decode).
///
private static void EmitReadString(StringBuilder sb, string a, string tc, string i)
{
- // FixStr is the hot path — most strings are short (1-31 bytes, encoded in the type code itself)
- sb.AppendLine($"{i}if (BinaryTypeCode.IsFixStr({tc}))");
- sb.AppendLine($"{i}{{");
- sb.AppendLine($"{i} var flen = BinaryTypeCode.DecodeFixStrLength({tc});");
- sb.AppendLine($"{i} {a} = flen == 0 ? string.Empty : context.ReadStringUtf8(flen);");
- sb.AppendLine($"{i}}}");
- // FixStrAscii — ASCII short strings, byte→char widen path (skips UTF-8 decode).
- sb.AppendLine($"{i}else if (BinaryTypeCode.IsFixStrAscii({tc}))");
+ // FixStrAscii is the hot path — most short strings (property names) are ASCII.
+ sb.AppendLine($"{i}if (BinaryTypeCode.IsFixStrAscii({tc}))");
sb.AppendLine($"{i}{{");
sb.AppendLine($"{i} var falen = BinaryTypeCode.DecodeFixStrAsciiLength({tc});");
sb.AppendLine($"{i} {a} = falen == 0 ? string.Empty : context.ReadAsciiBytesAsString(falen);");
sb.AppendLine($"{i}}}");
- // Switch gives O(1) dispatch via JIT jump table for the long markers.
- // StringInterned is the hot path for repeated interned strings.
+ // Switch gives O(1) dispatch via JIT jump table for the remaining markers.
sb.AppendLine($"{i}else switch ({tc})");
sb.AppendLine($"{i}{{");
sb.AppendLine($"{i} case BinaryTypeCode.StringInterned:");
sb.AppendLine($"{i} {a} = context.GetInternedString((int)context.ReadVarUInt());");
sb.AppendLine($"{i} break;");
- sb.AppendLine($"{i} case BinaryTypeCode.String:");
+ // H2Q6 StringSmall — non-ASCII utf8Len ≤ 255 — wire: [charLen:8][utf8Len:8][bytes], 1-pass decode.
+ // FastWire mode shares the marker value (=91); reader dispatches by mode.
+ sb.AppendLine($"{i} case BinaryTypeCode.StringSmall:");
sb.AppendLine($"{i} {{");
- sb.AppendLine($"{i} var slen = (int)context.ReadVarUInt();");
- sb.AppendLine($"{i} {a} = slen == 0 ? string.Empty : context.ReadStringUtf8(slen);");
+ sb.AppendLine($"{i} if (context.FastWire)");
+ sb.AppendLine($"{i} {{");
+ sb.AppendLine($"{i} var fwlen = (int)context.ReadVarUInt();");
+ sb.AppendLine($"{i} {a} = fwlen == 0 ? string.Empty : context.ReadStringUtf8(fwlen);");
+ sb.AppendLine($"{i} }}");
+ sb.AppendLine($"{i} else");
+ sb.AppendLine($"{i} {{");
+ sb.AppendLine($"{i} var sshdr = context.ReadTwoBytesUnsafe();");
+ sb.AppendLine($"{i} var sscharLen = (byte)sshdr;");
+ sb.AppendLine($"{i} var ssbyteLen = (byte)(sshdr >> 8);");
+ sb.AppendLine($"{i} {a} = ssbyteLen == 0 ? string.Empty : context.ReadStringUtf8WithCharLen(sscharLen, ssbyteLen);");
+ sb.AppendLine($"{i} }}");
+ sb.AppendLine($"{i} break;");
+ sb.AppendLine($"{i} }}");
+ // H2Q6 StringMedium — utf8Len ≤ 65535 — single uint read packs charLen:16 + utf8Len:16
+ sb.AppendLine($"{i} case BinaryTypeCode.StringMedium:");
+ sb.AppendLine($"{i} {{");
+ sb.AppendLine($"{i} var smpacked = context.ReadUInt32Unsafe();");
+ sb.AppendLine($"{i} var smcharLen = (ushort)smpacked;");
+ sb.AppendLine($"{i} var smbyteLen = (ushort)(smpacked >> 16);");
+ sb.AppendLine($"{i} {a} = smbyteLen == 0 ? string.Empty : context.ReadStringUtf8WithCharLen(smcharLen, smbyteLen);");
+ sb.AppendLine($"{i} break;");
+ sb.AppendLine($"{i} }}");
+ // H2Q6 StringBig — utf8Len > 65535 — single ulong read packs charLen:32 + utf8Len:32
+ sb.AppendLine($"{i} case BinaryTypeCode.StringBig:");
+ sb.AppendLine($"{i} {{");
+ sb.AppendLine($"{i} var sbpacked = context.ReadUInt64Unsafe();");
+ sb.AppendLine($"{i} var sbcharLen = (int)(uint)sbpacked;");
+ sb.AppendLine($"{i} var sbbyteLen = (int)(uint)(sbpacked >> 32);");
+ sb.AppendLine($"{i} {a} = sbbyteLen == 0 ? string.Empty : context.ReadStringUtf8WithCharLen(sbcharLen, sbbyteLen);");
sb.AppendLine($"{i} break;");
sb.AppendLine($"{i} }}");
sb.AppendLine($"{i} case BinaryTypeCode.StringAscii:");
@@ -1859,14 +1885,30 @@ public class AcBinarySourceGenerator : IIncrementalGenerator
sb.AppendLine($"{i} {a} = salen == 0 ? string.Empty : context.ReadAsciiBytesAsString(salen);");
sb.AppendLine($"{i} break;");
sb.AppendLine($"{i} }}");
- sb.AppendLine($"{i} case BinaryTypeCode.StringInternFirst:");
+ // H2Q6 interning — Small tier
+ sb.AppendLine($"{i} case BinaryTypeCode.StringInternFirstSmall:");
sb.AppendLine($"{i} {{");
sb.AppendLine($"{i} context.DisableStringCaching();");
- sb.AppendLine($"{i} var sci = (int)context.ReadVarUInt();");
- sb.AppendLine($"{i} var slen2 = (int)context.ReadVarUInt();");
- sb.AppendLine($"{i} var sv = slen2 == 0 ? string.Empty : context.ReadStringUtf8(slen2);");
- sb.AppendLine($"{i} context.RegisterInternedValueAt(sci, sv);");
- sb.AppendLine($"{i} {a} = sv;");
+ sb.AppendLine($"{i} var iscIdx = (int)context.ReadVarUInt();");
+ sb.AppendLine($"{i} var ishdr = context.ReadTwoBytesUnsafe();");
+ sb.AppendLine($"{i} var ischarLen = (byte)ishdr;");
+ sb.AppendLine($"{i} var isbyteLen = (byte)(ishdr >> 8);");
+ sb.AppendLine($"{i} var isv = isbyteLen == 0 ? string.Empty : context.ReadStringUtf8WithCharLen(ischarLen, isbyteLen);");
+ sb.AppendLine($"{i} context.RegisterInternedValueAt(iscIdx, isv);");
+ sb.AppendLine($"{i} {a} = isv;");
+ sb.AppendLine($"{i} break;");
+ sb.AppendLine($"{i} }}");
+ // H2Q6 interning — Medium tier — single uint header read
+ sb.AppendLine($"{i} case BinaryTypeCode.StringInternFirstMedium:");
+ sb.AppendLine($"{i} {{");
+ sb.AppendLine($"{i} context.DisableStringCaching();");
+ sb.AppendLine($"{i} var imcIdx = (int)context.ReadVarUInt();");
+ sb.AppendLine($"{i} var impacked = context.ReadUInt32Unsafe();");
+ sb.AppendLine($"{i} var imcharLen = (ushort)impacked;");
+ sb.AppendLine($"{i} var imbyteLen = (ushort)(impacked >> 16);");
+ sb.AppendLine($"{i} var imv = imbyteLen == 0 ? string.Empty : context.ReadStringUtf8WithCharLen(imcharLen, imbyteLen);");
+ sb.AppendLine($"{i} context.RegisterInternedValueAt(imcIdx, imv);");
+ sb.AppendLine($"{i} {a} = imv;");
sb.AppendLine($"{i} break;");
sb.AppendLine($"{i} }}");
sb.AppendLine($"{i} case BinaryTypeCode.Null:");
diff --git a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.Read.cs b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.Read.cs
index e71dc3b..fab35ec 100644
--- a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.Read.cs
+++ b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.Read.cs
@@ -105,6 +105,47 @@ public static partial class AcBinaryDeserializer
return value;
}
+ ///
+ /// H2Q6 helper — reads 2 bytes as little-endian ushort (low byte = first byte, high byte = second).
+ /// Used by StringSmall / StringInternFirstSmall readers to grab charLen:8 | utf8Len:8
+ /// in a single 2-byte aligned-load + EnsureAvailable.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public ushort ReadTwoBytesUnsafe()
+ {
+ EnsureAvailable(2);
+ var value = Unsafe.ReadUnaligned(ref _buffer[_position]);
+ _position += 2;
+ return value;
+ }
+
+ ///
+ /// Reads a 4-byte unsigned integer (little-endian on Intel/AMD, native-endian elsewhere — wire format
+ /// is little-endian by convention; on big-endian hosts this would need BinaryPrimitives.ReverseEndianness).
+ /// Used by StringBig reader to grab charLen:32 and utf8Len:32.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public uint ReadUInt32Unsafe()
+ {
+ EnsureAvailable(4);
+ var value = Unsafe.ReadUnaligned(ref _buffer[_position]);
+ _position += 4;
+ return value;
+ }
+
+ ///
+ /// Reads an 8-byte unsigned integer (little-endian on Intel/AMD, native-endian elsewhere).
+ /// Used by H2Q6 StringBig reader to grab packed charLen:32 | utf8Len:32 in a single load.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public ulong ReadUInt64Unsafe()
+ {
+ EnsureAvailable(8);
+ var value = Unsafe.ReadUnaligned(ref _buffer[_position]);
+ _position += 8;
+ return value;
+ }
+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public char ReadCharUnsafe()
{
@@ -492,6 +533,41 @@ public static partial class AcBinaryDeserializer
});
}
+ ///
+ /// H2Q6 1-pass UTF-8 string read — both and
+ /// come from the wire (StringSmall/Medium/Big tier headers), eliminating the
+ /// Pass 1.
+ ///
+ ///
+ /// Wire context: tier markers (StringSmall/Medium/Big, StringInternFirstSmall/Medium) carry the
+ /// char count alongside the byte count, so this method can
+ /// directly with the known target capacity and decode in a single pass through the bytes.
+ ///
+ /// Compact mode only — FastWire mode never emits H2Q6 tier markers (its
+ /// path handles UTF-16 raw memcpy).
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public string ReadStringUtf8WithCharLen(int charLength, int byteLength)
+ {
+ if (byteLength == 0) return string.Empty;
+
+ EnsureAvailable(byteLength);
+
+ // WASM string-cache fast path — if cached, byte-cmp validates and returns the canonical instance
+ if (_useStringCaching && byteLength <= _maxCachedStringLength)
+ {
+ return ReadStringUtf8Cached(byteLength);
+ }
+
+ var pos = _position;
+ _position += byteLength;
+
+ return string.Create(charLength, (Buffer: _buffer, Pos: pos, Len: byteLength), static (chars, state) =>
+ {
+ Utf8Transcoder.DecodeUtf8SinglePass(state.Buffer.AsSpan(state.Pos, state.Len), chars);
+ });
+ }
+
private string ReadStringUtf8Cached(int length)
{
var slice = _buffer.AsSpan(_position, length);
diff --git a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs
index dabbfd5..ae76e81 100644
--- a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs
+++ b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs
@@ -1,5 +1,6 @@
using System;
using System.Buffers;
+using System.Buffers.Binary;
using System.Collections;
using System.Collections.Concurrent;
using System.Collections.Frozen;
@@ -96,10 +97,16 @@ public static partial class AcBinaryDeserializer
readers[BinaryTypeCode.Float64] = static (ctx, _, _) => ctx.ReadDoubleUnsafe();
readers[BinaryTypeCode.Decimal] = static (ctx, _, _) => ctx.ReadDecimalUnsafe();
readers[BinaryTypeCode.Char] = static (ctx, _, _) => ctx.ReadCharUnsafe();
- readers[BinaryTypeCode.String] = static (ctx, _, _) => ReadPlainString(ctx);
+ // H2Q6 non-ASCII tier readers (Compact mode): fixed-width header [charLen][utf8Len] + 1-pass decode.
+ // FastWire mode dispatches the StringSmall (=91) marker through the same handler — see ReadStringSmall.
+ readers[BinaryTypeCode.StringSmall] = static (ctx, _, _) => ReadStringSmall(ctx);
+ readers[BinaryTypeCode.StringMedium] = static (ctx, _, _) => ReadStringMedium(ctx);
+ readers[BinaryTypeCode.StringBig] = static (ctx, _, _) => ReadStringBig(ctx);
readers[BinaryTypeCode.StringInterned] = static (ctx, _, _) => ctx.GetInternedString((int)ctx.ReadVarUInt());
readers[BinaryTypeCode.StringEmpty] = static (_, _, _) => string.Empty;
- readers[BinaryTypeCode.StringInternFirst] = static (ctx, _, _) => ReadAndRegisterInternedString(ctx);
+ // H2Q6 interning tier readers (Compact mode only — Big tier never engages on interning path)
+ readers[BinaryTypeCode.StringInternFirstSmall] = static (ctx, _, _) => ReadAndRegisterInternedStringSmall(ctx);
+ readers[BinaryTypeCode.StringInternFirstMedium] = static (ctx, _, _) => ReadAndRegisterInternedStringMedium(ctx);
readers[BinaryTypeCode.StringAscii] = static (ctx, _, _) => ReadPlainStringAscii(ctx);
readers[BinaryTypeCode.DateTime] = static (ctx, _, _) => ctx.ReadDateTimeUnsafe();
readers[BinaryTypeCode.DateTimeOffset] = static (ctx, _, _) => ctx.ReadDateTimeOffsetUnsafe();
@@ -119,12 +126,8 @@ public static partial class AcBinaryDeserializer
readers[BinaryTypeCode.Dictionary] = ReadDictionary;
readers[BinaryTypeCode.ByteArray] = static (ctx, _, _) => ReadByteArray(ctx);
- // Register FixStr readers
- for (var code = BinaryTypeCode.FixStrBase; code <= BinaryTypeCode.FixStrMax; code++)
- {
- var length = BinaryTypeCode.DecodeFixStrLength(code);
- readers[code] = CreateFixStrReader(length);
- }
+ // V4N5 cleanup (2026-05-06): FixStr (UTF-8 short non-ASCII, 103..134) range REMOVED.
+ // Non-ASCII short strings now use StringSmall tier marker (registered above).
// Register FixStrAscii readers (135..166) — pure-ASCII short-string fast path.
// The marker IS the validity contract — reader byte→char widens without UTF-8 decode.
@@ -142,16 +145,8 @@ public static partial class AcBinaryDeserializer
}
- ///
- /// Creates a reader for FixStr with the given length.
- ///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- private static TypeReader CreateFixStrReader(int length) where TInput : struct, IBinaryInputBase
- {
- if (length == 0) return static (_, _, _) => string.Empty;
-
- return (ctx, _, _) => ctx.ReadStringUtf8(length);
- }
+ // V4N5 cleanup (2026-05-06): CreateFixStrReader removed — non-ASCII short strings now use
+ // StringSmall tier reader (see ReadStringSmall below).
///
/// Creates a reader for FixStrAscii with the given byte length (also char count, ASCII = 1:1).
@@ -1041,42 +1036,42 @@ public static partial class AcBinaryDeserializer
break;
case PropertyAccessorType.String:
- if (BinaryTypeCode.IsFixStr(typeCode))
- {
- var length = BinaryTypeCode.DecodeFixStrLength(typeCode);
- propInfo.SetValue(target, length == 0 ? string.Empty : context.ReadStringUtf8(length));
- return true;
- }
+ // FixStrAscii is a range (135-166), can't go in switch — keep as range-check first.
+ // Hot path on ASCII property names; the marker carries the length, byte→char widen only.
if (BinaryTypeCode.IsFixStrAscii(typeCode))
{
var length = BinaryTypeCode.DecodeFixStrAsciiLength(typeCode);
propInfo.SetValue(target, length == 0 ? string.Empty : context.ReadAsciiBytesAsString(length));
return true;
}
- if (typeCode == BinaryTypeCode.String)
+ // Single-value markers — switch lowers to a JIT/AOT jump table for O(1) dispatch
+ // (vs. sequential if-chain that branches per non-matching marker).
+ switch (typeCode)
{
- propInfo.SetValue(target, ReadPlainString(context));
- return true;
- }
- if (typeCode == BinaryTypeCode.StringAscii)
- {
- propInfo.SetValue(target, ReadPlainStringAscii(context));
- return true;
- }
- if (typeCode == BinaryTypeCode.StringEmpty)
- {
- propInfo.SetValue(target, string.Empty);
- return true;
- }
- if (typeCode == BinaryTypeCode.StringInterned)
- {
- propInfo.SetValue(target, context.GetInternedString((int)context.ReadVarUInt()));
- return true;
- }
- if (typeCode == BinaryTypeCode.StringInternFirst)
- {
- propInfo.SetValue(target, ReadAndRegisterInternedString(context));
- return true;
+ case BinaryTypeCode.StringSmall:
+ propInfo.SetValue(target, ReadStringSmall(context));
+ return true;
+ case BinaryTypeCode.StringMedium:
+ propInfo.SetValue(target, ReadStringMedium(context));
+ return true;
+ case BinaryTypeCode.StringBig:
+ propInfo.SetValue(target, ReadStringBig(context));
+ return true;
+ case BinaryTypeCode.StringAscii:
+ propInfo.SetValue(target, ReadPlainStringAscii(context));
+ return true;
+ case BinaryTypeCode.StringEmpty:
+ propInfo.SetValue(target, string.Empty);
+ return true;
+ case BinaryTypeCode.StringInterned:
+ propInfo.SetValue(target, context.GetInternedString((int)context.ReadVarUInt()));
+ return true;
+ case BinaryTypeCode.StringInternFirstSmall:
+ propInfo.SetValue(target, ReadAndRegisterInternedStringSmall(context));
+ return true;
+ case BinaryTypeCode.StringInternFirstMedium:
+ propInfo.SetValue(target, ReadAndRegisterInternedStringMedium(context));
+ return true;
}
break;
}
@@ -1115,19 +1110,13 @@ public static partial class AcBinaryDeserializer
// Handle null
if (typeCode == BinaryTypeCode.Null) return null;
- // Handle FixStr (short strings with length in type code)
- if (BinaryTypeCode.IsFixStr(typeCode))
- {
- var length = BinaryTypeCode.DecodeFixStrLength(typeCode);
- return length == 0 ? string.Empty : context.ReadStringUtf8(length);
- }
-
// Handle FixStrAscii (short ASCII strings — byte→char widen, no UTF-8 decode)
if (BinaryTypeCode.IsFixStrAscii(typeCode))
{
var length = BinaryTypeCode.DecodeFixStrAsciiLength(typeCode);
return length == 0 ? string.Empty : context.ReadAsciiBytesAsString(length);
}
+ // H2Q6: non-ASCII short strings now use StringSmall tier (handled below via TypeReaderTable dispatch).
var reader = TypeReaderTable.Readers[typeCode];
if (reader != null)
@@ -1141,7 +1130,11 @@ public static partial class AcBinaryDeserializer
}
///
- /// Sima string olvas�sa - NEM regisztr�l az intern t�bl�ba.
+ /// Body-only string read for marker-less paths (polymorphism: assembly-qualified type-name).
+ /// Wire format: [VarUInt utf8Len][UTF-8 bytes] — caller already consumed any marker.
+ /// Used by ReadObjectWithTypeName / ReadObjectWithTypeNameRefFirst after their
+ /// outer marker has been read; symmetric to
+ /// on the writer side.
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static string ReadPlainString(BinaryDeserializationContext context)
@@ -1152,6 +1145,58 @@ public static partial class AcBinaryDeserializer
return context.ReadStringUtf8(length);
}
+ ///
+ /// H2Q6 StringSmall reader (Compact mode): wire [charLen:8][utf8Len:8][UTF-8 bytes] after the
+ /// marker has been consumed. 1-pass decode (no CountUtf8Chars). FastWire mode uses the same
+ /// marker (=91) but a different layout — handled via
+ /// when the deserializer is in FastWire mode.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static string ReadStringSmall(BinaryDeserializationContext context)
+ where TInput : struct, IBinaryInputBase
+ {
+ if (context.FastWire)
+ {
+ // Mode-shared marker: FastWire payload is [VarUInt charCount][UTF-16 raw bytes]
+ var charLenF = (int)context.ReadVarUInt();
+ return context.ReadStringUtf8(charLenF);
+ }
+
+ // Compact mode — H2Q6 StringSmall: [charLen:8][utf8Len:8][bytes]
+ var header = context.ReadTwoBytesUnsafe();
+ var charLength = (byte)header;
+ var byteLength = (byte)(header >> 8);
+ return context.ReadStringUtf8WithCharLen(charLength, byteLength);
+ }
+
+ ///
+ /// H2Q6 StringMedium reader: wire [charLen:16 LE][utf8Len:16 LE][UTF-8 bytes]. 1-pass decode.
+ /// Header read in a single uint load (vs 2 ushort loads).
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static string ReadStringMedium(BinaryDeserializationContext context)
+ where TInput : struct, IBinaryInputBase
+ {
+ var packed = context.ReadUInt32Unsafe();
+ var charLength = (ushort)packed;
+ var byteLength = (ushort)(packed >> 16);
+ return context.ReadStringUtf8WithCharLen(charLength, byteLength);
+ }
+
+ ///
+ /// H2Q6 StringBig reader: wire [charLen:32 LE][utf8Len:32 LE][UTF-8 bytes]. 1-pass decode.
+ /// Header read in a single ulong load (vs 2 uint loads).
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static string ReadStringBig(BinaryDeserializationContext context)
+ where TInput : struct, IBinaryInputBase
+ {
+ var packed = context.ReadUInt64Unsafe();
+ var charLength = (int)(uint)packed;
+ var byteLength = (int)(uint)(packed >> 32);
+ return context.ReadStringUtf8WithCharLen(charLength, byteLength);
+ }
+
///
/// Reads a long ASCII string payload (after the StringAscii marker has been consumed).
/// Wire format: [VarUInt byteCount][ASCII bytes]. Byte→char widen, no UTF-8 decode.
@@ -1166,20 +1211,51 @@ public static partial class AcBinaryDeserializer
}
///
- /// Read interned string (StringInternFirst marker) and register in cache at specified index.
- /// Wire format: [StringInternFirst][VarUInt cacheIndex][VarUInt length][UTF8 bytes]
+ /// H2Q6 StringInternFirstSmall reader: wire [cacheIdx:VarUInt][charLen:8][utf8Len:8][bytes]
+ /// after the marker has been consumed. Registers the decoded string in the intern cache.
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- private static string ReadAndRegisterInternedString(BinaryDeserializationContext context)
+ private static string ReadAndRegisterInternedStringSmall(BinaryDeserializationContext context)
where TInput : struct, IBinaryInputBase
{
- // First StringInternFirst marker proves payload uses string interning →
- // plain String entries appear only once, so _stringCache would never hit
+ // First interning marker proves payload uses string interning → plain String entries
+ // appear only once, so _stringCache would never hit on them.
context.DisableStringCaching();
var cacheIndex = (int)context.ReadVarUInt();
- var length = (int)context.ReadVarUInt();
- if (length == 0) return string.Empty;
- var str = context.ReadStringUtf8(length);
+ var header = context.ReadTwoBytesUnsafe();
+ var charLength = (byte)header;
+ var byteLength = (byte)(header >> 8);
+ if (byteLength == 0)
+ {
+ context.RegisterInternedValueAt(cacheIndex, string.Empty);
+ return string.Empty;
+ }
+ var str = context.ReadStringUtf8WithCharLen(charLength, byteLength);
+ context.RegisterInternedValueAt(cacheIndex, str);
+ return str;
+ }
+
+ ///
+ /// H2Q6 StringInternFirstMedium reader: wire [cacheIdx:VarUInt][charLen:16 LE][utf8Len:16 LE][bytes].
+ /// Registers the decoded string in the intern cache. (Big tier never engages on the interning path —
+ /// see H2Q6 layout comment.)
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static string ReadAndRegisterInternedStringMedium(BinaryDeserializationContext context)
+ where TInput : struct, IBinaryInputBase
+ {
+ context.DisableStringCaching();
+ var cacheIndex = (int)context.ReadVarUInt();
+ // Pack charLen:16 | utf8Len:16 read in a single uint load
+ var packed = context.ReadUInt32Unsafe();
+ var charLength = (ushort)packed;
+ var byteLength = (ushort)(packed >> 16);
+ if (byteLength == 0)
+ {
+ context.RegisterInternedValueAt(cacheIndex, string.Empty);
+ return string.Empty;
+ }
+ var str = context.ReadStringUtf8WithCharLen(charLength, byteLength);
context.RegisterInternedValueAt(cacheIndex, str);
return str;
}
@@ -2032,16 +2108,7 @@ public static partial class AcBinaryDeserializer
if (BinaryTypeCode.IsTinyInt(typeCode)) return;
- // Handle FixStr (short strings)
- if (BinaryTypeCode.IsFixStr(typeCode))
- {
- var length = BinaryTypeCode.DecodeFixStrLength(typeCode);
- if (length > 0)
- context.Skip(length);
- return;
- }
-
- // Handle FixStrAscii (short ASCII strings — same skip layout as FixStr, just different marker range)
+ // Handle FixStrAscii (short ASCII strings — marker carries length, ASCII payload)
if (BinaryTypeCode.IsFixStrAscii(typeCode))
{
var length = BinaryTypeCode.DecodeFixStrAsciiLength(typeCode);
@@ -2049,6 +2116,7 @@ public static partial class AcBinaryDeserializer
context.Skip(length);
return;
}
+ // H2Q6: non-ASCII short strings now use StringSmall tier (handled in switch below).
switch (typeCode)
{
@@ -2094,17 +2162,44 @@ public static partial class AcBinaryDeserializer
case BinaryTypeCode.Decimal:
context.Skip(16);
return;
- case BinaryTypeCode.String:
case BinaryTypeCode.StringAscii:
- // Same skip layout: [VarUInt byteCount][bytes]. ASCII vs UTF-8 distinction is content-only.
+ // Skip layout: [VarUInt byteCount][bytes]
SkipPlainString(context);
return;
+ case BinaryTypeCode.StringSmall:
+ // H2Q6 Small tier: [charLen:8][utf8Len:8][bytes] — skip 2 byte header + utf8Len bytes
+ {
+ var header = context.ReadTwoBytesUnsafe();
+ var utf8Len = (byte)(header >> 8);
+ if (utf8Len > 0) context.Skip(utf8Len);
+ }
+ return;
+ case BinaryTypeCode.StringMedium:
+ // H2Q6 Medium tier: [charLen:16][utf8Len:16][bytes] — single uint read
+ {
+ var packed = context.ReadUInt32Unsafe();
+ var utf8Len = (int)(packed >> 16);
+ if (utf8Len > 0) context.Skip(utf8Len);
+ }
+ return;
+ case BinaryTypeCode.StringBig:
+ // H2Q6 Big tier: [charLen:32][utf8Len:32][bytes] — single ulong read
+ {
+ var packed = context.ReadUInt64Unsafe();
+ var utf8Len = (int)(uint)(packed >> 32);
+ if (utf8Len > 0) context.Skip(utf8Len);
+ }
+ return;
case BinaryTypeCode.StringInterned:
context.ReadVarUInt();
return;
- case BinaryTypeCode.StringInternFirst:
- // First occurrence - must register even when skipping
- SkipAndRegisterInternedString(context);
+ case BinaryTypeCode.StringInternFirstSmall:
+ // H2Q6 interning Small: [cacheIdx:VarUInt][charLen:8][utf8Len:8][bytes] — register decoded string
+ SkipAndRegisterInternedStringSmall(context);
+ return;
+ case BinaryTypeCode.StringInternFirstMedium:
+ // H2Q6 interning Medium: [cacheIdx:VarUInt][charLen:16][utf8Len:16][bytes]
+ SkipAndRegisterInternedStringMedium(context);
return;
case BinaryTypeCode.ByteArray:
var byteLen = (int)context.ReadVarUInt();
@@ -2157,17 +2252,44 @@ public static partial class AcBinaryDeserializer
}
///
- /// Skip an interned string (StringInternFirst) - must still read cacheIndex and register in cache.
- /// Wire format: [StringInternFirst][VarUInt cacheIndex][VarUInt length][UTF8 bytes]
+ /// Skip an H2Q6 interning Small first-occurrence — still register in cache for subsequent ref reads.
+ /// Wire format: [StringInternFirstSmall][cacheIdx:VarUInt][charLen:8][utf8Len:8][UTF-8 bytes]
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- private static void SkipAndRegisterInternedString(BinaryDeserializationContext context)
+ private static void SkipAndRegisterInternedStringSmall(BinaryDeserializationContext context)
where TInput : struct, IBinaryInputBase
{
var cacheIndex = (int)context.ReadVarUInt();
- var byteLen = (int)context.ReadVarUInt();
- if (byteLen == 0) return;
- var str = context.ReadStringUtf8(byteLen);
+ var header = context.ReadTwoBytesUnsafe();
+ var charLen = (byte)header;
+ var byteLen = (byte)(header >> 8);
+ if (byteLen == 0)
+ {
+ context.RegisterInternedValueAt(cacheIndex, string.Empty);
+ return;
+ }
+ var str = context.ReadStringUtf8WithCharLen(charLen, byteLen);
+ context.RegisterInternedValueAt(cacheIndex, str);
+ }
+
+ ///
+ /// Skip an H2Q6 interning Medium first-occurrence — still register in cache for subsequent ref reads.
+ /// Wire format: [StringInternFirstMedium][cacheIdx:VarUInt][charLen:16 LE][utf8Len:16 LE][UTF-8 bytes]
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static void SkipAndRegisterInternedStringMedium(BinaryDeserializationContext context)
+ where TInput : struct, IBinaryInputBase
+ {
+ var cacheIndex = (int)context.ReadVarUInt();
+ var packed = context.ReadUInt32Unsafe();
+ var charLen = (ushort)packed;
+ var byteLen = (ushort)(packed >> 16);
+ if (byteLen == 0)
+ {
+ context.RegisterInternedValueAt(cacheIndex, string.Empty);
+ return;
+ }
+ var str = context.ReadStringUtf8WithCharLen(charLen, byteLen);
context.RegisterInternedValueAt(cacheIndex, str);
}
diff --git a/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs b/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs
index ce9af11..c6c868f 100644
--- a/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs
+++ b/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs
@@ -1,5 +1,6 @@
using System;
using System.Buffers;
+using System.Buffers.Binary;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Numerics;
@@ -715,30 +716,42 @@ public static partial class AcBinarySerializer
}
///
- /// Writes a non-empty string with marker-dispatch: detects ASCII vs UTF-8 in-place from the
- /// encoder's byte count and emits the appropriate wire marker (FixStrAscii,
- /// FixStr, StringAscii, or String). The reader uses the marker as an
- /// ASCII-validity contract — pure-ASCII payloads skip UTF-8 decode entirely (byte→char widen).
+ /// Writes a non-empty string with marker-dispatch: detects ASCII vs non-ASCII in-place from
+ /// the encoder's byte count, then emits the appropriate wire marker:
+ ///
+ /// - ASCII ≤ 31 byte → FixStrAscii (1-byte header, length in marker)
+ /// - ASCII > 31 byte → StringAscii (1+VarUInt header)
+ /// - Non-ASCII utf8Len ≤ 255 → StringSmall (3-byte header: marker + charLen:8 + utf8Len:8)
+ /// - Non-ASCII utf8Len ≤ 65535 → StringMedium (5-byte header: marker + charLen:16 + utf8Len:16)
+ /// - Non-ASCII utf8Len > 65535 → StringBig (9-byte header: marker + charLen:32 + utf8Len:32)
+ ///
///
///
- /// Layout (Compact wire): [marker: 1 byte][optional VarUInt byteCount][encoded bytes]
- /// — VarUInt is omitted for FixStr/FixStrAscii (length is encoded in the marker).
+ /// H2Q6 wire format v3 — non-ASCII tiers carry both charLen and utf8Len in the header,
+ /// enabling 1-pass deserialize (no CountUtf8Chars Pass 1). ASCII path unchanged from M3R7.
///
- /// ASCII detection is free: bytesWritten == charLength after a UTF-8 encode is a
- /// necessary AND sufficient condition for the input being pure ASCII (every UTF-16 char
- /// < 0x80 produces exactly 1 UTF-8 byte; non-ASCII chars always produce 2-4 bytes).
+ /// Optimistic encode position is chosen by tier-prediction from charLength
+ /// (worst-case 4 byte/char): ≤ 63 char → Small (3-byte header reserved); ≤ 16383 char → Medium
+ /// (5-byte header reserved); else Big (9-byte). After encoding, bytesWritten determines
+ /// the actual tier and the body is left-shifted only if the actual header is smaller than
+ /// reserved (rare on Magyar text — short Hungarian content stays in Small tier with 0 shift).
///
- /// Caller MUST guarantee non-empty input (value.Length > 0) — empty strings are
- /// handled by the higher-level WriteString via the StringEmpty marker.
+ /// FastWire mode: re-uses the StringSmall marker value (91) as a generic
+ /// "string marker" — body layout differs (UTF-16 raw + VarUInt charCount) and the reader
+ /// dispatches by serializer mode, NOT by re-interpreting the marker. The 91 value is
+ /// mode-shared because the wire envelope is mode-tagged at the header level.
+ ///
+ /// Caller MUST guarantee non-empty input (value.Length > 0) — empty strings
+ /// are handled by the higher-level WriteString via the StringEmpty marker.
///
public void WriteStringWithDispatch(string value)
{
if (FastWire)
{
- // FastWire: char count (VarUInt) + raw UTF-16 memcopy. ASCII detection adds no value
- // here — the wire size is identical (2 bytes/char) and the read path is memcpy-based,
- // so the encoder/decoder UTF-8 cost (which the ASCII marker would skip) doesn't apply.
- WriteByte(BinaryTypeCode.String);
+ // FastWire: [StringSmall marker][VarUInt charCount][UTF-16 raw bytes]
+ // Marker value 91 is mode-shared (Compact StringSmall vs FastWire string marker);
+ // reader dispatches by deserializer mode, NOT by re-interpreting the marker.
+ WriteByte(BinaryTypeCode.StringSmall);
var charLenF = value.Length;
var byteLenF = charLenF * 2;
WriteVarUInt((uint)charLenF);
@@ -748,134 +761,186 @@ public static partial class AcBinarySerializer
return;
}
+ // Compact mode — H2Q6 post-encode tier dispatch (wire-optimal).
+ //
+ // Two-step tier logic:
+ // 1. reserveHeader (from charLength, worst-case 4 byte/char): bounds the buffer allocation
+ // AND the encode position. Tight reserve (3/5/9) avoids large memmove on the hot path.
+ // 2. actualHeader (from bytesWritten after encode): chooses the smallest fitting tier.
+ // A mostly-ASCII string in the 64-16383 char band gets Small (3 byte header) even though
+ // reserve was Medium (5 byte) — body is left-shifted by 2 bytes to compact.
+ //
+ // Why post-encode tier choice (vs. pre-chosen): mostly-ASCII content (English description fields,
+ // log/error messages, URL paths) at 64+ char would otherwise pay +2 byte/string for Medium
+ // header when Small fits. Production payloads include both Magyar/CJK multi-byte AND ASCII-
+ // dominated strings; wire-size narrative ("smallest") matters across the realistic mix.
+ //
+ // ASCII override (bytesWritten == charLength) emits FixStrAscii / StringAscii with their own
+ // compact headers (1 byte / 1+VarUInt) — body shifted left from the encode position.
var charLength = value.Length;
+ var maxBytes = charLength * 4;
- // Hot-path split: encode position is chosen to MINIMIZE post-encode shifts.
- //
- // • charLength ≤ 31 → MIGHT be FixStr (bytesWritten ≤ 31) or long String (multibyte
- // expansion). Encode optimistically at savedPos+1 (FixStr position). FixStr hit ⇒ 0 shift,
- // only marker byte write. Long-fallback (rare, requires Hungarian/CJK chars in a
- // short-char string AND post-expand size > 31) ⇒ shift bytes RIGHT by 1 (since the
- // long lane needs 1 VarUInt byte after the marker; charLength ≤ 31 ⇒ maxBytes ≤ 124
- // ⇒ VarUInt size = 1).
- //
- // • charLength > 31 → ALWAYS long String (bytesWritten ≥ charLength > 31). Use full
- // D-2 layout [marker][reserveVarUInt][bytes], encode at savedPos+1+reserveVarUInt.
- // Backfill compacts only when actual VarUInt size < reserved (rare).
- if (charLength <= BinaryTypeCode.FixStrMaxLength)
+ int reserveHeader;
+ if (charLength <= 63) reserveHeader = 3;
+ else if (charLength <= 16383) reserveHeader = 5;
+ else reserveHeader = 9;
+
+ EnsureCapacity(reserveHeader + maxBytes);
+
+ var savedPos = _position;
+ var encodeStart = savedPos + reserveHeader;
+ var bytesWritten = Utf8Transcoder.EncodeUtf8SinglePass(value.AsSpan(), _buffer.AsSpan(encodeStart, maxBytes));
+
+ if (bytesWritten == charLength)
{
- var maxBytesShort = charLength * 4; // ≤ 124, fits in 1-byte VarUInt
- EnsureCapacity(2 + maxBytesShort); // marker + 1-byte VarUInt + bytes (worst case)
-
- var savedPosShort = _position;
- var bytesWrittenShort = Utf8Transcoder.EncodeUtf8SinglePass(
- value.AsSpan(),
- _buffer.AsSpan(savedPosShort + 1, maxBytesShort));
- var isAsciiShort = bytesWrittenShort == charLength;
-
- if (bytesWrittenShort <= BinaryTypeCode.FixStrMaxLength)
+ // ASCII override — FixStrAscii (≤31) or StringAscii (>31) with compact header
+ if (bytesWritten <= BinaryTypeCode.FixStrAsciiMaxLength)
{
- // Hot path: FixStr hit → bytes already at savedPos+1, no shift.
- _buffer[savedPosShort] = isAsciiShort
- ? BinaryTypeCode.EncodeFixStrAscii(bytesWrittenShort)
- : BinaryTypeCode.EncodeFixStr(bytesWrittenShort);
- _position = savedPosShort + 1 + bytesWrittenShort;
+ var shift = reserveHeader - 1;
+ _buffer.AsSpan(encodeStart, bytesWritten).CopyTo(_buffer.AsSpan(savedPos + 1, bytesWritten));
+ _buffer[savedPos] = BinaryTypeCode.EncodeFixStrAscii(bytesWritten);
+ _position = savedPos + 1 + bytesWritten;
}
else
{
- // Cold: multibyte expansion pushed bytes > 31 → become long String/StringAscii.
- // Shift bytes right by 1 to insert the 1-byte VarUInt slot.
- _buffer.AsSpan(savedPosShort + 1, bytesWrittenShort)
- .CopyTo(_buffer.AsSpan(savedPosShort + 2, bytesWrittenShort));
- _buffer[savedPosShort] = isAsciiShort ? BinaryTypeCode.StringAscii : BinaryTypeCode.String;
- _position = savedPosShort + 1;
- WriteVarUIntUnsafe((uint)bytesWrittenShort);
- _position += bytesWrittenShort;
+ var actualVarUIntSize = VarUIntSize((uint)bytesWritten);
+ var asciiHeader = 1 + actualVarUIntSize;
+ var shift = reserveHeader - asciiHeader;
+ if (shift > 0)
+ _buffer.AsSpan(encodeStart, bytesWritten).CopyTo(_buffer.AsSpan(encodeStart - shift, bytesWritten));
+ _buffer[savedPos] = BinaryTypeCode.StringAscii;
+ _position = savedPos + 1;
+ WriteVarUIntUnsafe((uint)bytesWritten);
+ _position += bytesWritten;
}
-
- return;
- }
-
- // Long path: charLength > 31 ⇒ bytesWritten > 31 ⇒ always String / StringAscii.
- // D-2 layout [marker:1][VarUInt slot:reserveVarUInt][bytes], encode at savedPos+1+reserveVarUInt.
- var maxBytes = charLength * 4;
- var reserveVarUInt = VarUIntSize((uint)maxBytes);
-
- EnsureCapacity(1 + reserveVarUInt + maxBytes);
-
- var savedPos = _position;
- var encodeStart = savedPos + 1 + reserveVarUInt;
- var bytesWritten = Utf8Transcoder.EncodeUtf8SinglePass(value.AsSpan(), _buffer.AsSpan(encodeStart, maxBytes));
- var isAscii = bytesWritten == charLength;
-
- _buffer[savedPos] = isAscii ? BinaryTypeCode.StringAscii : BinaryTypeCode.String;
- var actualVarUIntSize = VarUIntSize((uint)bytesWritten);
- if (actualVarUIntSize < reserveVarUInt)
- {
- var shift = reserveVarUInt - actualVarUIntSize;
- _buffer.AsSpan(encodeStart, bytesWritten).CopyTo(_buffer.AsSpan(encodeStart - shift, bytesWritten));
- }
- _position = savedPos + 1;
- WriteVarUIntUnsafe((uint)bytesWritten);
- _position += bytesWritten;
- }
-
- public void WriteFixStr(string value)
- {
- var length = value.Length;
- EnsureCapacity(1 + length);
- _buffer[_position++] = BinaryTypeCode.EncodeFixStr(length);
- Ascii.FromUtf16(value.AsSpan(), _buffer.AsSpan(_position, length), out _);
- _position += length;
- }
-
- public void WriteFixStrDirect(string value)
- {
- var length = value.Length;
- EnsureCapacity(1 + length);
-
- var destSpan = _buffer.AsSpan(_position + 1, length);
- var status = Ascii.FromUtf16(value.AsSpan(), destSpan, out var bytesWritten);
-
- if (status == OperationStatus.Done && bytesWritten == length)
- {
- _buffer[_position] = BinaryTypeCode.EncodeFixStr(length);
- _position += 1 + length;
}
else
{
- _buffer[_position++] = BinaryTypeCode.String;
- WriteStringUtf8Internal(value);
+ // Non-ASCII — post-encode tier choice from bytesWritten (smallest fitting tier wins)
+ int actualHeader;
+ byte tierMarker;
+ switch (bytesWritten)
+ {
+ case <= 255:
+ actualHeader = 3;
+ tierMarker = BinaryTypeCode.StringSmall;
+ break;
+ case <= 65535:
+ actualHeader = 5;
+ tierMarker = BinaryTypeCode.StringMedium;
+ break;
+ default:
+ actualHeader = 9;
+ tierMarker = BinaryTypeCode.StringBig;
+ break;
+ }
+
+ var shift = reserveHeader - actualHeader;
+ if (shift > 0)
+ _buffer.AsSpan(encodeStart, bytesWritten).CopyTo(_buffer.AsSpan(encodeStart - shift, bytesWritten));
+
+ _buffer[savedPos] = tierMarker;
+ switch (actualHeader)
+ {
+ case 3:
+ {
+ // Pack charLen:8 | utf8Len:8 → single ushort store (vs 2 byte-stores)
+ var packed = (ushort)(charLength | (bytesWritten << 8));
+ Unsafe.WriteUnaligned(ref _buffer[savedPos + 1], packed);
+ break;
+ }
+ case 5:
+ {
+ // Pack charLen:16 | utf8Len:16 → single uint store, LE (vs 2 ushort-stores)
+ var packed = (uint)charLength | ((uint)bytesWritten << 16);
+ Unsafe.WriteUnaligned(ref _buffer[savedPos + 1], packed);
+ break;
+ }
+ default:
+ {
+ // Pack charLen:32 | utf8Len:32 → single ulong store, LE (vs 2 uint-stores)
+ var packed = (ulong)(uint)charLength | ((ulong)(uint)bytesWritten << 32);
+ Unsafe.WriteUnaligned(ref _buffer[savedPos + 1], packed);
+ break;
+ }
+ }
+ _position = savedPos + actualHeader + bytesWritten;
}
}
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public void WriteFixStrBytes(ReadOnlySpan utf8Bytes)
+ ///
+ /// Writes the first-occurrence body of an interned string with H2Q6 tier-marker dispatch.
+ /// Used by the runtime/SGen string-intern write path; subsequent occurrences use cache-index ref.
+ ///
+ ///
+ /// Wire layout per tier:
+ ///
+ /// - StringInternFirstSmall: [marker:1][cacheIdx:VarUInt][charLen:8][utf8Len:8][bytes] — utf8Len ≤ 255
+ /// - StringInternFirstMedium: [marker:1][cacheIdx:VarUInt][charLen:16][utf8Len:16][bytes] — utf8Len ≤ 65535
+ ///
+ ///
+ /// Big tier never engages — MaxStringInternLength is byte-typed in
+ /// AcBinarySerializerOptions (abszolút max 255 char × 4 byte/char = 1020 byte fits in Medium).
+ ///
+ /// Tier prediction by charLength: ≤ 63 char → Small (worst-case 252 byte ≤ 255);
+ /// > 63 char → Medium. Body is left-shifted by 2 bytes only when a long mostly-ASCII interning
+ /// string drops back into Small tier (rare).
+ ///
+ public void WriteStringInternFirstWithDispatch(string value, int cacheMapIndex)
{
- var length = utf8Bytes.Length;
- EnsureCapacity(1 + length);
- _buffer[_position++] = BinaryTypeCode.EncodeFixStr(length);
- utf8Bytes.CopyTo(_buffer.AsSpan(_position, length));
- _position += length;
- }
-
- public void WritePreencodedPropertyName(ReadOnlySpan utf8Name)
- {
- WriteByte(BinaryTypeCode.String);
- WriteVarUInt((uint)utf8Name.Length);
- WriteBytes(utf8Name);
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- private void WriteStringUtf8Internal(string value)
- {
- var byteCount = Utf8NoBom.GetByteCount(value);
- WriteVarUInt((uint)byteCount);
- EnsureCapacity(byteCount);
- Utf8NoBom.GetBytes(value.AsSpan(), _buffer.AsSpan(_position, byteCount));
- _position += byteCount;
+ // Post-encode tier choice (wire-optimal): mostly-ASCII interning string in the 64+ char band
+ // emits Small tier (3 byte) when bytesWritten ≤ 255, instead of Medium (5 byte). Big tier
+ // never engages — MaxStringInternLength byte-typed (max 255 char × 4 byte = 1020 byte fits in Medium).
+ var charLength = value.Length;
+ var maxBytes = charLength * 4;
+ var cacheIdxSize = VarUIntSize((uint)cacheMapIndex);
+
+ // reserveHeader: charLength ≤ 63 → guaranteed Small (252 byte ≤ 255); else Medium-reserve.
+ var reserveHeader = charLength <= 63 ? 3 : 5;
+
+ EnsureCapacity(cacheIdxSize + reserveHeader + maxBytes);
+
+ var savedPos = _position;
+ var encodeStart = savedPos + cacheIdxSize + reserveHeader;
+ var bytesWritten = Utf8Transcoder.EncodeUtf8SinglePass(value.AsSpan(), _buffer.AsSpan(encodeStart, maxBytes));
+
+ // Choose tier from actual bytesWritten (smallest fits)
+ var actualHeader = bytesWritten <= 255 ? 3 : 5;
+ var tierMarker = actualHeader == 3 ? BinaryTypeCode.StringInternFirstSmall : BinaryTypeCode.StringInternFirstMedium;
+
+ var shift = reserveHeader - actualHeader;
+ if (shift > 0)
+ _buffer.AsSpan(encodeStart, bytesWritten).CopyTo(_buffer.AsSpan(encodeStart - shift, bytesWritten));
+
+ // Write [marker][cacheIdx VarUInt][charLen + utf8Len header][bytes]
+ _buffer[savedPos] = tierMarker;
+ _position = savedPos + 1;
+ WriteVarUIntUnsafe((uint)cacheMapIndex);
+
+ if (actualHeader == 3)
+ {
+ // Pack charLen:8 | utf8Len:8 → single ushort store
+ var packed = (ushort)(charLength | (bytesWritten << 8));
+ Unsafe.WriteUnaligned(ref _buffer[_position], packed);
+ _position += 2;
+ }
+ else
+ {
+ // Pack charLen:16 | utf8Len:16 → single uint store, LE
+ var packed = (uint)charLength | ((uint)bytesWritten << 16);
+ Unsafe.WriteUnaligned(ref _buffer[_position], packed);
+ _position += 4;
+ }
+
+ _position += bytesWritten;
}
+ // ─────────────────────────────────────────────────────────────────
+ // V4N5 dead-code cleanup (2026-05-06): WriteFixStr, WriteFixStrDirect, WriteFixStrBytes,
+ // WritePreencodedPropertyName, and WriteStringUtf8Internal removed — these were unreachable
+ // (no core call site, no SourceGenerator template hit, no test, no reflection path).
+ // The hot-path string writes go through WriteStringWithDispatch (M3R7 + H2Q6 marker dispatch).
+ // ─────────────────────────────────────────────────────────────────
#endregion
diff --git a/AyCode.Core/Serializers/Binaries/AcBinarySerializer.cs b/AyCode.Core/Serializers/Binaries/AcBinarySerializer.cs
index 4f54e76..ad3dfab 100644
--- a/AyCode.Core/Serializers/Binaries/AcBinarySerializer.cs
+++ b/AyCode.Core/Serializers/Binaries/AcBinarySerializer.cs
@@ -1427,10 +1427,11 @@ public static partial class AcBinarySerializer
ValidateWritePlanString(in planEntry, value);
if (planEntry.IsFirst)
{
- // StringFirst: write interned string + cache index + data (Value holds the string)
- context.WriteByte(BinaryTypeCode.StringInternFirst);
- context.WriteVarUInt((uint)planEntry.CacheMapIndex);
- context.WriteStringUtf8(planEntry.Value ?? value);
+ // H2Q6 v3 wire format — StringFirst with tier-marker dispatch (Small/Medium):
+ // [StringInternFirstSmall][cacheIdx:VarUInt][charLen:8][utf8Len:8][bytes] if utf8Len ≤ 255
+ // [StringInternFirstMedium][cacheIdx:VarUInt][charLen:16][utf8Len:16][bytes] if utf8Len ≤ 65535
+ // 1-pass decode: charLen carried in header, no CountUtf8Chars Pass 1.
+ context.WriteStringInternFirstWithDispatch(planEntry.Value ?? value, planEntry.CacheMapIndex);
}
else
{
diff --git a/AyCode.Core/Serializers/Binaries/BinaryTypeCode.cs b/AyCode.Core/Serializers/Binaries/BinaryTypeCode.cs
index fceaae1..d63d908 100644
--- a/AyCode.Core/Serializers/Binaries/BinaryTypeCode.cs
+++ b/AyCode.Core/Serializers/Binaries/BinaryTypeCode.cs
@@ -63,11 +63,30 @@ internal static class BinaryTypeCode
public const byte Decimal = SlotCount + 25; // 89
public const byte Char = SlotCount + 26; // 90
+ // ============================================================================
+ // String types — H2Q6 layout (post 2026-05-06 marker reorg, wire format v3)
+ // ============================================================================
+ //
+ // Non-ASCII strings use fixed-width header tier markers (NO VarUInt utf8Len),
+ // enabling 1-pass decode (CountUtf8Chars Pass 1 eliminated).
+ //
+ // Tier dispatch (writer chooses smallest fitting tier based on utf8Len):
+ // StringSmall — utf8Len ≤ 255 — header: 1 marker + 1 charLen + 1 utf8Len = 3 byte
+ // StringMedium — utf8Len ≤ 65535 — header: 1 marker + 2 charLen + 2 utf8Len = 5 byte
+ // StringBig — utf8Len > 65535 — header: 1 marker + 4 charLen + 4 utf8Len = 9 byte
+ //
+ // Interning tiers (writer chooses based on utf8Len; Big never engages — MaxStringInternLength
+ // is byte-typed (max 255 char × max 4 byte/char = 1020 byte fits in Medium):
+ // StringInternFirstSmall — utf8Len ≤ 255 — header: 1 + cacheIdx-VarUInt + 1 + 1
+ // StringInternFirstMedium — utf8Len ≤ 65535 — header: 1 + cacheIdx-VarUInt + 2 + 2
+ //
+ // ASCII strings continue to use FixStrAscii (135..166) and StringAscii (167) — unchanged from M3R7.
+ //
// String types (SlotCount + 27..30)
- public const byte String = SlotCount + 27; // 91 — Inline UTF8 string (non-interned)
- public const byte StringInterned = SlotCount + 28; // 92 — Reference to interned string by index (2+ occurrence)
- public const byte StringEmpty = SlotCount + 29; // 93 — Empty string marker
- public const byte StringInternFirst = SlotCount + 30; // 94 — First occurrence of interned string
+ public const byte StringSmall = SlotCount + 27; // 91 — Non-ASCII tier 1: [marker:1][charLen:8][utf8Len:8][bytes], utf8Len ≤ 255
+ public const byte StringInterned = SlotCount + 28; // 92 — Reference to interned string by index (2+ occurrence) — UNCHANGED
+ public const byte StringEmpty = SlotCount + 29; // 93 — Empty string marker — UNCHANGED
+ public const byte StringMedium = SlotCount + 30; // 94 — Non-ASCII tier 2: [marker:1][charLen:16][utf8Len:16][bytes], utf8Len ≤ 65535
// Date/Time types (SlotCount + 31..34)
public const byte DateTime = SlotCount + 31; // 95
@@ -85,14 +104,29 @@ internal static class BinaryTypeCode
// Property skip marker (SlotCount + 38)
public const byte PropertySkip = SlotCount + 38; // 102 — Marks a property with default/null value (skipped during serialization)
- // FixStr range (UTF-8 short strings): 103..134 (32 values for byte lengths 0-31)
- // FixStr encoding: FixStrBase + byteLength
- // Saves 1 byte for short strings by combining type + length in single byte.
- // Content semantics: UTF-8 bytes (may be ASCII or multi-byte). The reader-side decoder dispatches
- // on content via the new ASCII variant range below — this range is the "universal short" / UTF-8 lane.
- public const byte FixStrBase = SlotCount + 39; // 103
- public const byte FixStrMax = FixStrBase + 31; // 134
- public const int FixStrMaxLength = 31;
+ // ============================================================================
+ // Reserved/Extended marker range (post 2026-05-06 H2Q6 marker reorg, v3 wire format)
+ // ============================================================================
+ //
+ // Range 103..134 (32 values) was previously the FixStr range (UTF-8 short, byteLength 0-31).
+ // FixStr (non-ASCII) markers REMOVED in H2Q6 — non-ASCII strings now use Small/Medium/Big tiers
+ // for 1-pass decode (eliminated CountUtf8Chars Pass 1).
+ //
+ // CURRENT ALLOCATION (5 of 32 used):
+ public const byte StringBig = SlotCount + 39; // 103 — Non-ASCII tier 3: [marker:1][charLen:32][utf8Len:32][bytes], utf8Len > 65535
+ public const byte StringInternFirstSmall = SlotCount + 40; // 104 — Interning tier 1: [marker:1][cacheIdx:VarUInt][charLen:8][utf8Len:8][bytes]
+ public const byte StringInternFirstMedium = SlotCount + 41; // 105 — Interning tier 2: [marker:1][cacheIdx:VarUInt][charLen:16][utf8Len:16][bytes]
+
+ // RESERVED (27 values: 106..134) — strategic future-feature reservation per BINARY_TODO.md V4N3 marker address space plan:
+ // 106..121 (16 values): ACCORE-BIN-T-L9Y3 — FixArray short-list count in marker (count 0-15)
+ // 122..126 (5 values): ACCORE-BIN-T-S5L8 — sentinel-length encoding tiers
+ // 127..130 (4 values): ACCORE-BIN-T-S2X9 — markerless schema lane opt-in
+ // 131..134 (4 values): general reserve
+ //
+ // Readers MUST throw "unknown marker" on any value in 106..134 until the corresponding feature
+ // activates within the v3 wire format envelope (no further wire-format break needed).
+ public const byte ReservedRangeMin = SlotCount + 42; // 106 — first reserved value (post-H2Q6 future-feature range)
+ public const byte ReservedRangeMax = SlotCount + 70; // 134 — last reserved value
// FixStrAscii range (ASCII-only short strings): 135..166 (32 values for byte lengths 0-31)
// FixStrAscii encoding: FixStrAsciiBase + byteLength
@@ -105,7 +139,7 @@ internal static class BinaryTypeCode
// Long ASCII string marker: 167
// Layout: [StringAscii] [VarUInt byteCount] [ASCII bytes]
- // Counterpart to String (91) which is the universal/UTF-8 long-string marker.
+ // Counterpart to StringSmall/Medium/Big — but ASCII content (charLen == byteCount, no UTF-8 decode).
// Reader fast-widens via byte→char without UTF-8 decode or IsValid scan.
public const byte StringAscii = SlotCount + 103; // 167
@@ -139,41 +173,32 @@ internal static class BinaryTypeCode
public static bool IsReference(byte typeCode) => typeCode is StringInterned or ObjectRef;
///
- /// Check if type code is any string-related marker — long inline (String / StringAscii),
- /// interning markers (StringInterned, StringInternFirst), empty marker, or any FixStr variant
- /// (UTF-8 or ASCII). Centralized predicate so adding/removing string markers requires updating
- /// only this method, not every dispatch site.
+ /// Check if type code is any string-related marker — H2Q6 non-ASCII tiers (Small/Medium/Big),
+ /// H2Q6 interning tiers (InternFirstSmall/Medium), interning ref (StringInterned), empty marker,
+ /// or any ASCII variant (FixStrAscii / StringAscii). Centralized predicate so adding/removing
+ /// string markers requires updating only this method, not every dispatch site.
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool IsString(byte typeCode)
- => (typeCode is >= String and <= StringInternFirst) // 91..94: String, StringInterned, StringEmpty, StringInternFirst
- || (typeCode is >= FixStrBase and <= StringAscii); // 103..167: FixStr (UTF-8 short) + FixStrAscii (ASCII short) + StringAscii (ASCII long)
+ => (typeCode is >= StringSmall and <= StringMedium) // 91..94: StringSmall, StringInterned, StringEmpty, StringMedium
+ || (typeCode is >= StringBig and <= StringInternFirstMedium) // 103..105: StringBig, StringInternFirstSmall, StringInternFirstMedium
+ || (typeCode is >= FixStrAsciiBase and <= StringAscii); // 135..167: FixStrAscii + StringAscii
///
- /// Check if type code is a FixStr (UTF-8 short string with byte length encoded in type code).
- /// Does NOT match FixStrAscii — use for that, or
- /// for the full ASCII-string range.
+ /// Check if type code is one of the H2Q6 non-ASCII string tier markers (StringSmall / StringMedium / StringBig).
+ /// Excludes interning tier markers (use ) and ASCII markers (use ).
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static bool IsFixStr(byte typeCode) => typeCode is >= FixStrBase and <= FixStrMax;
+ public static bool IsStringTier(byte typeCode)
+ => typeCode == StringSmall || typeCode == StringMedium || typeCode == StringBig;
///
- /// Decode FixStr byte length from type code.
+ /// Check if type code is a H2Q6 interning first-occurrence tier marker (StringInternFirstSmall / Medium).
+ /// (Big tier never engages on the interning path — see BinaryTypeCode header comment for rationale.)
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static int DecodeFixStrLength(byte typeCode) => typeCode - FixStrBase;
-
- ///
- /// Encode FixStr type code for given byte length (0-31). Caller asserts UTF-8 content semantics.
- ///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static byte EncodeFixStr(int byteLength) => (byte)(FixStrBase + byteLength);
-
- ///
- /// Check if byte length can be encoded as FixStr (UTF-8 short string, 0..31 bytes).
- ///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static bool CanEncodeAsFixStr(int byteLength) => byteLength is >= 0 and <= 31;
+ public static bool IsStringInternFirst(byte typeCode)
+ => typeCode == StringInternFirstSmall || typeCode == StringInternFirstMedium;
///
/// Check if type code is any ASCII string marker — FixStrAscii (short) or StringAscii (long).
diff --git a/AyCode.Core/docs/BINARY/BINARY_FEATURES.md b/AyCode.Core/docs/BINARY/BINARY_FEATURES.md
index 526ba83..e50f01f 100644
--- a/AyCode.Core/docs/BINARY/BINARY_FEATURES.md
+++ b/AyCode.Core/docs/BINARY/BINARY_FEATURES.md
@@ -2,6 +2,18 @@
Advanced serialization features on top of the wire format. Wire format: `BINARY_FORMAT.md` | Options/presets: `BINARY_OPTIONS.md` | Internal architecture: `BINARY_IMPLEMENTATION.md` | Source generation: `BINARY_SGEN.md`.
+## Optimization Policy (LLM)
+
+AcBinary is a **general-purpose serializer**, not a benchmark-only implementation.
+
+When proposing or implementing performance work, optimize for broad real-world workloads and maintain balanced trade-offs across:
+
+- mixed payload shapes (small/medium/large/deep)
+- language distributions (ASCII-heavy, mixed Latin, multi-byte UTF-8 such as CJK)
+- throughput, latency, allocation, and wire size
+
+Do not accept a change solely because one benchmark cell improves. Any optimization should be validated across multiple representative scenarios and must avoid benchmark-specific overfitting.
+
## Compact Encoding Selection
The serializer applies compact encodings automatically:
diff --git a/AyCode.Core/docs/BINARY/BINARY_ISSUES.md b/AyCode.Core/docs/BINARY/BINARY_ISSUES.md
index 776c264..31c4430 100644
--- a/AyCode.Core/docs/BINARY/BINARY_ISSUES.md
+++ b/AyCode.Core/docs/BINARY/BINARY_ISSUES.md
@@ -240,6 +240,48 @@ The serializer writes a 1-byte `PropertySkip` marker for any property whose valu
- **Option flag**: `AcBinarySerializerOptions.OmitDefaults` (default `true` for back-compat); `false` writes every property's full value regardless. Lets consumers opt out for fragile-class-evolution scenarios.
- **Hybrid**: ship doc + flag, default `true`.
+## Wire Format / Cross-platform
+
+### ACCORE-BIN-I-E4N9: Wire format is host-native-endian, NOT canonical little-endian
+
+**Status:** Open
+**Affects:** Serializer + Deserializer — all primitive readers/writers (`Int16/UInt16/Int32/UInt32/Int64/UInt64/Float32/Float64/Decimal/Guid` etc.), H2Q6 string tier headers (`StringSmall/Medium/Big` charLen+utf8Len pack), `WireMode.Fast` UTF-16 raw memcopy, every `Unsafe.WriteUnaligned` / `Unsafe.ReadUnaligned` call site.
+
+The serializer/deserializer write/read multi-byte fields via `Unsafe.WriteUnaligned(ref byte, T)` / `Unsafe.ReadUnaligned(ref byte)`, which use **host-native endianness**. On little-endian hosts (x86, x64, ARM64, WASM) this happens to match the wire-format-canonical little-endian; on big-endian hosts (PowerPC big-endian, MIPS big-endian, IBM-Z / S390x, SPARC big-endian) the bytes are reversed. A wire produced on a big-endian host cannot be read by a little-endian reader (and vice versa).
+
+**Impact:** Cross-platform serialization between hosts of different endianness is currently **silently broken** — payloads decode with byte-reversed integers, floats, and headers. Same-endianness round-trips work correctly. Affects H2Q6 string tier headers (charLen / utf8Len byte-swap), all multi-byte primitives, and `WireMode.Fast` UTF-16 raw payloads.
+
+**Currently supported NuGet release platforms — all little-endian**:
+- Cloud server (x64): Intel, AMD
+- Desktop (x64): Intel, AMD
+- Apple Silicon (ARM64): macOS, iOS, Mac Catalyst
+- Blazor WASM (WASM SIMD spec mandates little-endian)
+
+**Currently NOT supported (would silently emit/read wrong-endian wire on these hosts)**:
+- PowerPC big-endian (legacy IBM Power)
+- MIPS big-endian (legacy embedded)
+- IBM-Z / S390x (mainframe)
+- SPARC big-endian (legacy Sun/Oracle)
+
+**Possible fix directions:**
+
+1. **Document NuGet contract as little-endian-only** — zero implementation cost, current code matches. NuGet readme + `AcBinarySerializer` XML doc-comment explicitly state the LE platform requirement; big-endian is "undefined behavior, wire silently incompatible". Pragmatic for the modern target platforms; matches the current de-facto reality.
+
+2. **Defensive endian-guard at every `Unsafe.*Unaligned` call site**:
+ ```csharp
+ if (BitConverter.IsLittleEndian)
+ Unsafe.WriteUnaligned(ref _buffer[pos], value);
+ else
+ BinaryPrimitives.WriteUInt16LittleEndian(_buffer.AsSpan(pos, 2), value);
+ ```
+ `BitConverter.IsLittleEndian` is `[Intrinsic]` and constant-folds at JIT/AOT — **zero cost on LE hosts** (the else branch is dead-code-eliminated). On big-endian hosts the `BinaryPrimitives.*LittleEndian` family does the byte-swap via `BinaryPrimitives.ReverseEndianness`. Adds conditional code at all primitive r/w sites + H2Q6 headers (~10-15 sites).
+
+3. **Replace all `Unsafe.WriteUnaligned`/`ReadUnaligned` with `BinaryPrimitives.*LittleEndian`** unconditionally — these wrap `Unsafe.WriteUnaligned` + (conditional, JIT-folded on LE) `ReverseEndianness`. Slightly higher Span-creation overhead vs. direct `ref byte` writes; marginal perf cost on LE hosts. Over-engineering vs direction 2.
+
+**Pragmatic recommendation:** Direction 1 (document) is the NuGet release minimum — explicitly state the LE constraint in the readme and XML doc-comments. Direction 2 (full endian-guard) is the comprehensive fix and should be a separate sprint covering ALL primitive r/w sites, H2Q6 string headers, `WireMode.Fast` UTF-16 path, and Float32/Float64/Decimal/Guid edge cases. A full BE-readiness audit is a non-trivial undertaking — only justified when there is concrete BE platform demand from a consumer.
+
+**Related TODO:** worth tracking a follow-up TODO entry if direction 2 is chosen — covers writer + reader + SGen template + new round-trip tests on a BE-emulated host.
+
## Cross-cutting (canonical home: `../XCUT/`)
### ACCORE-XCUT-I-X8Q1: JSON-in-Binary request parameters — cross-ref
diff --git a/AyCode.Core/docs/BINARY/BINARY_TODO.md b/AyCode.Core/docs/BINARY/BINARY_TODO.md
index f32a8f2..6c0a621 100644
--- a/AyCode.Core/docs/BINARY/BINARY_TODO.md
+++ b/AyCode.Core/docs/BINARY/BINARY_TODO.md
@@ -2,6 +2,12 @@
This page covers planned work for the **binary serializer core** (format, SGen, options, deserialization context, buffer writer). Work specific to the **streaming I/O layer** (`AsyncPipeReaderInput` + `AsyncPipeWriterOutput`, multi-message wire framing, sliding-window buffer, producer-consumer synchronization) is tracked separately in [`BINARY_ASYNCPIPE_TODO.md`](BINARY_ASYNCPIPE_TODO.md).
+## Optimization policy reminder (LLM)
+
+AcBinary is a universal serializer. Performance TODO execution must avoid benchmark-only overfitting.
+
+For each optimization item, validate gains on multiple representative workloads (ASCII-heavy, mixed Latin, multi-byte UTF-8; small/medium/large/deep payloads) and evaluate throughput + latency + allocation + wire-size together.
+
## Priority legend
- **P0** blocker · **P1** important · **P2** nice-to-have · **P3** idea
@@ -851,12 +857,66 @@ Writer picks the smallest fitting tier; reader dispatches by marker and reads fi
- Performance evaluation target is non-ASCII-heavy data (ASCII-shortcuts intentionally not primary)
- Wire-format backward compatibility is not required for this development phase
+### Marker layout decision (2026-05-06)
+
+After analysis on the new "all UTF-8 Magyar" benchmark baseline (`2026-05-06_13-10-30.LLM` — Compact +5-25% slower than MemPack on every cell):
+
+**Confirmed**: the previous benchmark's Compact-vs-MemPack advantage was an artifact of ASCII property names hitting the `FixStrAscii` / Latin1-widen fast path; once string property values are also UTF-8 Magyar, the actual hot path (`EncodeUtf8SinglePass` + two-pass `CountUtf8Chars` + `DecodeUtf8SinglePass`) becomes the bottleneck.
+
+**Marker scope decision** — clean split between ASCII fast path and non-ASCII tier dispatch:
+
+**MEGMARAD (changeless)**:
+- `FixStrAscii` (≤31 byte ASCII) — kompakt 1-byte header + Latin1 widen, zero UTF-8 decode pipeline
+- `StringAscii` (>31 byte ASCII) — long ASCII fast path, Latin1 widen
+- `StringInternRef` — 2nd+ occurrence of interned string (no body, just cache index — not affected by 2-pass problem)
+- `StringEmpty`, `Null` — sentinel markers
+
+**MEGSZŰNIK (replaced by H2Q6 tiers)**:
+- `FixStr` (32 marker values 103-134 — non-ASCII short) → replaced by `StringSmall`
+- `String` (1 marker value 91 — non-ASCII long with VarUInt utf8Len) → replaced by `StringSmall` / `StringMedium` / `StringBig`
+- `StringInternFirst` (1 marker value 94 — VarUInt utf8Len interning) → replaced by `StringInternFirstSmall` / `StringInternFirstMedium`
+
+**ÚJ markers** (5 total):
+- `StringSmall` — non-ASCII, `[marker:1][charLen:8][utf8Len:8][bytes]`, utf8Len ≤ 255
+- `StringMedium` — non-ASCII, `[marker:1][charLen:16][utf8Len:16][bytes]`, utf8Len ≤ 65535
+- `StringBig` — non-ASCII, `[marker:1][charLen:32][utf8Len:32][bytes]`, utf8Len > 65535
+- `StringInternFirstSmall` — `[marker:1][cacheIdx:VarUInt][charLen:8][utf8Len:8][bytes]`
+- `StringInternFirstMedium` — `[marker:1][cacheIdx:VarUInt][charLen:16][utf8Len:16][bytes]`
+
+**Trade-off justification**:
+- Wire cost on short non-ASCII strings: +2 byte/string header (3 vs 1) → ~0.07-0.36% wire growth on Repeated cell (10 short Magyar string × 2 byte / 28 KB)
+- CPU saving: `CountUtf8Chars` Pass 1 eliminated on every non-ASCII string decode → directly attacks the +25% Deser baseline gap
+- The 2-byte hybrid `FixStr` (non-ASCII) variant (1 byte marker + 1 byte charLen) was considered but **rejected**: marginal wire saving (-1 byte vs StringSmall) does not justify the +1 marker complexity given the tiny absolute wire impact on the Repeated cell. Cleaner to have ASCII-vs-non-ASCII at the marker level (FixStrAscii vs StringSmall/Medium/Big).
+
+**Interning tier sizing rationale**:
+- `MaxStringInternLength` is `byte`-typed (`AcBinarySerializerOptions.cs:125`, default 64, abszolút max 255 char)
+- Worst-case: 255 char × 4 byte/char (emoji-only) = 1020 byte → fits in Medium tier (utf8Len ≤ 65535)
+- Realistic Magyar/CJK: 64 char × 2-3 byte = 128-192 byte → Small tier
+- **Big tier never engages on the interning path** — only Small + Medium needed (+2 markers, not +3)
+
+### Marker address space reservation (post-H2Q6)
+
+The marker reorg frees **34 marker values** (32 `FixStr` non-ASCII + `String` + `StringInternFirst`). After allocating 5 for H2Q6, **29 values remain free**. Strategic reservation plan to prevent ad-hoc consumption and minimize future wire-format breaks:
+
+| Reserved range | Count | Future feature | Status |
+|---|---|---|---|
+| `StringSmall` / `StringMedium` / `StringBig` | 3 | H2Q6 Compact tiers | **active (this entry)** |
+| `StringInternFirstSmall` / `StringInternFirstMedium` | 2 | H2Q6 interning tiers | **active (this entry)** |
+| `FixArrayBase..FixArrayMax` | 16 | `ACCORE-BIN-T-L9Y3` (FixArray short-list count in marker) | reserved, future |
+| Sentinel-length string tier markers | ~5 | `ACCORE-BIN-T-S5L8` (sentinel-length encoding) | reserved, future |
+| Markerless schema lane | ~4 | `ACCORE-BIN-T-S2X9` (markerless schema lane opt-in) | reserved, future |
+| General reserve | 4-8 | unallocated | tartalék |
+
+**Wire-format version bump**: v2 → v3 at H2Q6 landing. The reserved-but-unimplemented marker values are documented but not yet decoded — readers throw `unknown marker` if wire contains them. Future activation of `FixArray` / sentinel-length / markerless schema lane within the **same v3 wire format** is non-breaking for already-deployed v3 consumers (they reject unknown markers cleanly; producers opt in to emit them).
+
### Acceptance
-- New string markers implemented for Small/Medium/Big tiers
+- New string markers implemented for Small/Medium/Big tiers + InternFirstSmall/InternFirstMedium tiers
- Deserialize path for these markers performs single-pass decode without `CountUtf8Chars`
-- Existing round-trip tests pass, plus new boundary tests for tier transitions
-- Benchmark report includes before/after for Compact mode on non-ASCII dataset (Ser/Deser/RT + Size)
+- 29 freed marker values strategically reserved per the address-space reservation table; documented in `BinaryTypeCode.cs` with `// Reserved for ACCORE-BIN-T-XXXX (future)` comments
+- Wire-format version bump v2 → v3 documented in `BINARY_FORMAT.md`
+- Existing round-trip tests pass, plus new boundary tests for tier transitions (utf8Len = 254/255/256/65534/65535/65536) and interning tier transitions
+- Benchmark report includes before/after for Compact mode on non-ASCII dataset (Ser/Deser/RT + Size) vs the `2026-05-06_13-10-30.LLM` baseline
## ACCORE-BIN-T-S5L8: Sentinel-length encoding for strings (wire-size optimization, both modes)
**Priority:** P3 · **Type:** Wire-format optimization · **Related:** `AcBinarySerializer.WriteString`, `AcBinaryDeserializer.ReadValue` string dispatch
@@ -1211,3 +1271,66 @@ The pair forms a closed dead loop (`WriteFixStrDirect` → `WriteStringUtf8Inter
- Pre-NuGet release housekeeping pass
- Or: any future refactor that touches `BinarySerializationContext` string-write methods (then decide rather than leave the dead pair behind)
+## ACCORE-BIN-T-L9Y3: FixArray marker tier — short-list count encoded in marker
+**Priority:** P3 · **Type:** Wire-format optimization · **Status:** Open · **Related:** `Array` (66) marker, `VarUInt itemCount`, `ACCORE-BIN-T-H2Q6` marker reservation
+
+Analog to `FixStr` — short list count (0-15) encoded in marker, eliminating the `VarUInt itemCount` byte for typical DTO collections (Tags, Categories, Items, Properties, Variations, etc. — any list whose size statistically lands in the 0-15 range).
+
+### Wire format
+
+**Current**: `[Array marker:1][VarUInt itemCount][items]` — header 2-6 byte
+**FixArray**: `[FixArrayBase + N marker:1][items]` — header 1 byte (N = item count, 0-15)
+
+Writer dispatch (in `WriteArray` / scan-pass list-writer equivalents):
+- `itemCount ≤ 15` → `FixArrayBase + itemCount` marker (1 byte total header)
+- `itemCount > 15` → existing `Array` marker + `VarUInt` count (2-6 byte total header)
+
+### Marker reservation
+
+**16 marker values** pre-reserved in the post-H2Q6 marker layout (see `ACCORE-BIN-T-H2Q6` "Marker address space reservation" table). The reservation guarantees that activating FixArray does NOT require another wire-format-version bump after H2Q6 lands at v3 — producers opt in to emit FixArray markers within the same v3 envelope, consumers extend their dispatch to decode them.
+
+Activation steps when implementing:
+
+1. Allocate `FixArrayBase` (16 contiguous values from the H2Q6-freed range)
+2. Add `IsFixArray(byte marker)`, `DecodeFixArrayCount(byte marker)`, `EncodeFixArray(int count)` helpers in `BinaryTypeCode.cs`
+3. Writer: branch in `WriteArray` and equivalent ScanPass list-writers, emit FixArray for `count ≤ 15`
+4. Reader: extend marker dispatch in `ReadValue` / `SkipValue` / `ReadArray`
+5. SGen: regenerate readers/writers with `IsFixArray` dispatch in the array-typed property paths
+6. Round-trip tests for boundary `itemCount` values: 0, 1, 14, 15, 16, 17 (last tier transition)
+
+### Why P3
+
+- **Wire saving**: -1 byte per short list. Realistic per-cell estimates:
+ - **Repeated** (10 OrderItem, ~50 list overall): ~50 byte / 28 KB = **~0.18%** wire reduction (marginal)
+ - **Large** (5×5×5×10 nested, ~6000 list): ~6 KB / 118 KB = **~5%** wire reduction ✓
+ - **Medium**: ~500 byte / 21 KB = **~2.4%** wire reduction
+ - **Deep** (2×4×4×8 nested): similar to Medium, ~2-3% wire reduction
+- **CPU saving**: marginal (~1-2 ns/list — `VarUInt` short-loop replaced by 1-byte marker decode). NOT a hot-path mover for the current Repeated-cell baseline gap.
+- **Release-narrative value**: complements the post-H2Q6 wire-size advantage, particularly on deep-nested structures (Large benchmark). Sharpens the "smallest AND fastest" claim once the CPU gap closes via V4N2 Phase 3 + V4N4.
+
+### Why not P2/P1 — and why not now
+
+- The current `2026-05-06_13-10-30.LLM` baseline's primary problem is **CPU** (Compact +5-25% slower than MemPack on every cell), NOT wire size. FixArray addresses wire size, marginal CPU.
+- Activation **after** H2Q6 + V4N2 Phase 3 + V4N4 is the natural sequence: CPU gap closes first, then wire-saver features sharpen the release narrative.
+- The marker reservation lets us defer activation indefinitely without losing the address-space slot.
+
+### Acceptance
+
+- 16 marker values aligned in `BinaryTypeCode.cs` (`FixArrayBase..FixArrayMax`) with `IsFixArray`, `DecodeFixArrayCount`, `EncodeFixArray` helpers
+- Writer + reader dispatch with boundary tests (count = 0, 1, 14, 15, 16, 17)
+- SGen-regenerated readers/writers correctly dispatch via `IsFixArray` for array-typed properties
+- Round-trip tests pass, no Ser/Deser regression vs current `Array` path
+- Wire-size benchmark: ≥-2% on Medium, ≥-3% on Deep, ≥-4% on Large, no regression on any cell
+- Documentation update in `BINARY_FORMAT.md` (new marker range + dispatch rules)
+
+### Trigger
+
+- After `ACCORE-BIN-T-H2Q6` lands (marker reservation must be active first)
+- After CPU gap closes (V4N2 Phase 3 + V4N4) — wire-saver value clearer once "fast" is settled
+- Pre-NuGet release housekeeping for the wire-size narrative (along with `S5L8` / `S2X9` if their scope justifies)
+
+### Future extension (not part of this entry)
+
+- **`FixDict` analog** — same pattern for `Dictionary` marker (67) with `kvCount` 0-15. Worth considering only if a benchmark workload demonstrates dictionary-heavy structures; the current bench data (Order DTOs) does not. **Defer until evidence.**
+- **`FixArray 0-31`** — wider count range (32 markers). Marginal additional saving (16-31 elem list-ek ritkák); would consume nearly all freed marker space, leaving no slack for `S5L8`/`S2X9`. **Reject unless evidence warrants.**
+