diff --git a/AyCode.Core.Serializers.SourceGenerator/AcBinarySourceGenerator.GenReader.cs b/AyCode.Core.Serializers.SourceGenerator/AcBinarySourceGenerator.GenReader.cs
index e68326c..8e95d00 100644
--- a/AyCode.Core.Serializers.SourceGenerator/AcBinarySourceGenerator.GenReader.cs
+++ b/AyCode.Core.Serializers.SourceGenerator/AcBinarySourceGenerator.GenReader.cs
@@ -207,21 +207,20 @@ public partial class AcBinarySourceGenerator
///
/// Emits inline string read from type code. Handles all H2Q6 (v3 wire format) string markers:
- /// FixStrAscii (ASCII short, 135-166), StringAscii (ASCII long, 167),
- /// StringSmall/Medium/Big (non-ASCII tiers, 91/94/103),
+ /// FixStr (short-form universal, 135-166), String (long-form universal, 167),
+ /// StringUtf16 (FastWire marker, 91),
/// StringInternFirstSmall/Medium (interning tiers, 104/105),
/// StringInterned (cache ref, 92), StringEmpty (93), Null.
///
- /// FixStrAscii is checked first as the hot path for short ASCII property names; non-ASCII
+ /// FixStr is checked first as the hot path for short strings; non-ASCII
/// tier markers carry both charLen and utf8Len in fixed-width headers (1-pass decode).
///
private static void EmitReadString(StringBuilder sb, string a, string tc, string i, bool enableInternString)
{
- // FixStrAscii is the hot path — most short strings (property names) are ASCII.
- sb.AppendLine($"{i}if (BinaryTypeCode.IsFixStrAscii({tc}))");
+ // FixStr is the hot path — short-form universal marker with charLength in the marker.
+ sb.AppendLine($"{i}if (BinaryTypeCode.IsFixStr({tc}))");
sb.AppendLine($"{i}{{");
- sb.AppendLine($"{i} var falen = BinaryTypeCode.DecodeFixStrAsciiLength({tc});");
- sb.AppendLine($"{i} {a} = falen == 0 ? string.Empty : context.ReadAsciiBytesAsString(falen);");
+ sb.AppendLine($"{i} {a} = context.ReadUniversalFixStr({tc});");
sb.AppendLine($"{i}}}");
// Switch gives O(1) dispatch via JIT jump table for the remaining markers.
sb.AppendLine($"{i}else switch ({tc})");
@@ -235,24 +234,16 @@ public partial class AcBinarySourceGenerator
sb.AppendLine($"{i} {a} = context.GetInternedString((int)context.ReadVarUInt());");
sb.AppendLine($"{i} break;");
}
- // H2Q6 string-tier markers + StringAscii. Wire-decode body is shared with the runtime path
- // (TypeReaderTable + cross-type populate) — see context.ReadStringSmall/Medium/Big, ReadPlainStringAscii.
+ // StringUtf16 marker + String. Wire-decode body is shared with the runtime path
+ // (TypeReaderTable + cross-type populate) — see context.ReadStringUtf16Marker()
+ // and ReadUniversalLongString.
// These markers are feature-independent: writer emits them on any string property regardless of
// intern setting (intern is opt-in per-property via [AcStringIntern] + InternBit).
- sb.AppendLine($"{i} case BinaryTypeCode.StringSmall:");
- // FastWire mode reuses the StringSmall (=91) marker but with a different body — emit
- // inline ternary so call sites that can run in either mode (Dictionary key/value, runtime
- // cross-type populate) dispatch without an extra method-frame.
- sb.AppendLine($"{i} {a} = context.FastWire ? context.ReadStringSmallFastWire() : context.ReadStringSmallCompact();");
+ sb.AppendLine($"{i} case BinaryTypeCode.StringUtf16:");
+ sb.AppendLine($"{i} {a} = context.ReadStringUtf16Marker();");
sb.AppendLine($"{i} break;");
- sb.AppendLine($"{i} case BinaryTypeCode.StringMedium:");
- sb.AppendLine($"{i} {a} = context.ReadStringMedium();");
- sb.AppendLine($"{i} break;");
- sb.AppendLine($"{i} case BinaryTypeCode.StringBig:");
- sb.AppendLine($"{i} {a} = context.ReadStringBig();");
- sb.AppendLine($"{i} break;");
- sb.AppendLine($"{i} case BinaryTypeCode.StringAscii:");
- sb.AppendLine($"{i} {a} = context.ReadPlainStringAscii();");
+ sb.AppendLine($"{i} case BinaryTypeCode.String:");
+ sb.AppendLine($"{i} {a} = context.ReadUniversalLongString();");
sb.AppendLine($"{i} break;");
// Interning first-occurrence cases — see comment above.
if (enableInternString)
diff --git a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.Read.cs b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.Read.cs
index c0d47f3..c63140f 100644
--- a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.Read.cs
+++ b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.Read.cs
@@ -625,19 +625,12 @@ public static partial class AcBinaryDeserializer
}
///
- /// H2Q6 StringSmall reader — Compact-mode-only body: wire [charLen:8][utf8Len:8][UTF-8 bytes]
- /// after the marker has been consumed. 1-pass decode (no CountUtf8Chars).
- /// Call this directly when the call site has ALREADY established FastWire == false
- /// (e.g. hot path, where the SGen-emit caller short-circuits
- /// FastWire on a separate ag via ReadStringUtf16Markerless). Skips the redundant
- /// FastWire branch — call sites that may run in either mode inline
- /// FastWire ? ReadStringSmallFastWire() : ReadStringSmallCompact() ternary instead of a
- /// shared dispatcher (no method-frame overhead).
+ /// Legacy compact StringSmall reader retained only for backward-compat payloads.
///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- internal string ReadStringSmallCompact()
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ internal string ReadStringSmallCompactLegacy()
{
- System.Diagnostics.Debug.Assert(!FastWire, "ReadStringSmallCompact called with FastWire=true — call sites that may run in FastWire mode must inline the `FastWire ? ReadStringSmallFastWire() : ReadStringSmallCompact()` ternary.");
+ System.Diagnostics.Debug.Assert(!FastWire, "ReadStringSmallCompactLegacy called with FastWire=true.");
// H2Q6 StringSmall body: [charLen:8][utf8Len:8][UTF-8 bytes]
var header = ReadTwoBytesUnsafe();
@@ -647,84 +640,28 @@ public static partial class AcBinaryDeserializer
}
///
- /// H2Q6 StringSmall reader — FastWire-mode-only body: wire [charLen:int32 LE][UTF-16 raw bytes]
+ /// StringUtf16 reader — FastWire-mode-only body: wire [charLen:int32 LE][UTF-16 raw bytes]
/// after the (mode-shared) marker has been consumed. Engaged only on the runtime
/// path when FastWire==true and the declared target
/// type is NOT string (the string-typed FastWire short-circuit in
/// bypasses the marker entirely via ReadStringUtf16Markerless).
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- internal string ReadStringSmallFastWire()
+ internal string ReadStringUtf16FastWire()
{
- // Mode-shared marker (=91) FastWire payload — fix-int charLen (matches MemPack WriteUtf16 shape).
+ // StringUtf16 (=91) payload — fix-int charLen (matches MemPack WriteUtf16 shape).
var charLenF = ReadInt32Unsafe();
return ReadStringUtf16(charLenF);
}
- // No combined ReadStringSmall() dispatcher — every call site already has the FastWire flag
- // in scope (compile-time invariant on the SGen-emit hot path; runtime field check on the
- // dispatcher-callers). Call sites inline the ternary `FastWire ? ReadStringSmallFastWire()
- // : ReadStringSmallCompact()` when they need mode-awareness, saving a method-call frame.
-
///
- /// H2Q6 StringMedium reader: wire [charLen:16 LE][utf8Len:16 LE][UTF-8 bytes] after the marker
- /// has been consumed. 1-pass decode. Header read in a single uint load (vs 2 ushort loads). Shared
- /// by runtime dispatch + SGen-emit.
- ///
- [MethodImpl(MethodImplOptions.AggressiveOptimization)]
- internal string ReadStringMedium()
- {
- var packed = ReadUInt32Unsafe();
- var charLength = (ushort)packed;
- var byteLength = (ushort)(packed >> 16);
- return ReadStringUtf8WithCharLen(charLength, byteLength);
- }
-
- ///
- /// H2Q6 StringBig reader: wire [charLen:32 LE][utf8Len:32 LE][UTF-8 bytes] after the marker
- /// has been consumed. 1-pass decode. Header read in a single ulong load (vs 2 uint loads). Includes
- /// a corrupted-wire guard for negative casts from uint values > Int32.MaxValue. Shared by
- /// runtime dispatch + SGen-emit.
- ///
- [MethodImpl(MethodImplOptions.AggressiveOptimization)]
- internal string ReadStringBig()
- {
- var packed = ReadUInt64Unsafe();
- var charLength = (int)(uint)packed;
- var byteLength = (int)(uint)(packed >> 32);
-
-#if DEBUG
- // Single bitwise-OR + sign-test catches negative casts from corrupted-wire uint values
- // (when the wire-side uint > Int32.MaxValue, the (int)(uint) cast yields a negative int).
- // Predict-friendly: always false on a valid wire.
- if ((charLength | byteLength) < 0) ThrowCorruptedBigWire(charLength, byteLength);
-#endif
-
- return ReadStringUtf8WithCharLen(charLength, byteLength);
- }
-
- ///
- /// Throw helper for the corrupted-wire guard in . NoInlining
- /// keeps the hot-path reader compact — the JIT/AOT lifts the throw-site out of the inlined caller body.
- ///
- [MethodImpl(MethodImplOptions.NoInlining)]
- private void ThrowCorruptedBigWire(int charLength, int byteLength) =>
- throw new AcBinaryDeserializationException(
- $"Wire format corruption: StringBig header has out-of-range length values (charLength={charLength}, byteLength={byteLength}). " +
- $"This indicates a corrupted or maliciously-crafted payload — uint wire values larger than Int32.MaxValue produce negative ints when cast.",
- -1);
-
- ///
- /// Reads a long ASCII string payload (after the StringAscii marker has been consumed).
- /// Wire format: [VarUInt byteCount][ASCII bytes]. Byte→char widen, no UTF-8 decode. Shared
- /// by runtime dispatch + SGen-emit.
+ /// Unified reader for marker (91).
+ /// FastWire path reads UTF-16 payload; non-FastWire path keeps legacy compact payload compatibility.
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- internal string ReadPlainStringAscii()
+ internal string ReadStringUtf16Marker()
{
- var length = (int)ReadVarUInt();
- if (length == 0) return string.Empty;
- return ReadAsciiBytesAsString(length);
+ return FastWire ? ReadStringUtf16FastWire() : ReadStringSmallCompactLegacy();
}
///
@@ -753,6 +690,67 @@ public static partial class AcBinaryDeserializer
return str;
}
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ internal string ReadUniversalFixStr(byte marker)
+ {
+ ReadUniversalFixStrHeader(marker, out var charLength, out var excess);
+ return ReadStringByUnsignedExcess(charLength, excess);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ internal string ReadUniversalLongString()
+ {
+ ReadUniversalLongStringHeader(out var charLength, out var excess);
+ return ReadStringByUnsignedExcess(charLength, excess);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ internal void ReadUniversalFixStrHeader(byte marker, out int charLength, out uint excess)
+ {
+ charLength = BinaryTypeCode.DecodeFixStrLength(marker);
+ excess = ReadByte();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ internal void ReadUniversalLongStringHeader(out int charLength, out uint excess)
+ {
+ charLength = (int)ReadVarUInt();
+ var slotSize = BinaryTypeCode.GetUniversalStringExcessSlotSize(charLength);
+
+ if (slotSize == 1) excess = ReadByte();
+ else if (slotSize == 2) excess = ReadUInt16Unsafe();
+ else excess = ReadVarUInt32Unchecked();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private string ReadStringByUnsignedExcess(int charLength, uint excess)
+ {
+ if (charLength == 0)
+ {
+ if (excess == 0) return string.Empty;
+ throw new AcBinaryDeserializationException($"Invalid string header: charLength=0 requires excess=0, got {excess}.", _position);
+ }
+
+ if (excess == 0)
+ {
+ return ReadAsciiBytesAsString(charLength);
+ }
+
+#if DEBUG
+ if (excess > (uint)(int.MaxValue - charLength))
+ throw new AcBinaryDeserializationException($"Invalid string header: byteLength overflow (charLength={charLength}, excess={excess}).", _position);
+#endif
+
+ var byteLength = charLength + (int)excess;
+ return ReadStringUtf8WithCharLen(charLength, byteLength);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private uint ReadVarUInt32Unchecked()
+ {
+ return ReadUInt32Unsafe();
+ }
+
///
/// H2Q6 StringInternFirstMedium reader: wire [cacheIdx:VarUInt][charLen:16 LE][utf8Len:16 LE][bytes].
/// Registers the decoded string in the intern cache and returns it. (Big tier never engages on the
@@ -797,49 +795,17 @@ public static partial class AcBinaryDeserializer
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal bool TryReadStringProperty(byte tc, out string? value)
{
- // Hot-path invariant: SGen-emit + property-marker callers MUST short-circuit FastWire on a
- // separate ag (markerless decode) — so by the time the marker byte reaches this switch,
- // FastWire is guaranteed false (the StringSmall body is the Compact-mode decode).
+ // Hot-path invariant: property marker paths remain mode-aware for legacy payload compatibility.
value = null;
int charLength;
- int byteLength;
-
+ uint excess;
switch (tc)
{
- case BinaryTypeCode.StringSmall:
- {
- // [charLen:8][utf8Len:8]
- var header = ReadTwoBytesUnsafe();
- charLength = (byte)header;
- byteLength = (byte)(header >> 8);
- break;
- }
- case BinaryTypeCode.StringMedium:
- {
- // [charLen:16 LE][utf8Len:16 LE] — single uint load
- var packed = ReadUInt32Unsafe();
- charLength = (ushort)packed;
- byteLength = (ushort)(packed >> 16);
- break;
- }
- case BinaryTypeCode.StringBig:
- {
- // [charLen:32 LE][utf8Len:32 LE] — single ulong load + corrupted-wire guard
- var packed = ReadUInt64Unsafe();
- charLength = (int)(uint)packed;
- byteLength = (int)(uint)(packed >> 32);
-
-#if DEBUG
- if ((charLength | byteLength) < 0) ThrowCorruptedBigWire(charLength, byteLength);
-#endif
-
- break;
- }
- case BinaryTypeCode.StringAscii:
- // Long ASCII: [VarUInt byteLen]. byteLength = -1 sentinel → routes to the ASCII tail.
- charLength = (int)ReadVarUInt();
- byteLength = -1;
-
+ case BinaryTypeCode.StringUtf16:
+ value = ReadStringUtf16Marker();
+ return true;
+ case BinaryTypeCode.String:
+ ReadUniversalLongStringHeader(out charLength, out excess);
break;
case BinaryTypeCode.Null:
return true;
@@ -848,11 +814,10 @@ public static partial class AcBinaryDeserializer
return true;
default:
- // FixStrAscii (short ASCII — property codes, IDs, names): the marker carries the length.
- if (BinaryTypeCode.IsFixStrAscii(tc))
+ // FixStr (short universal string): marker carries char length.
+ if (BinaryTypeCode.IsFixStr(tc))
{
- charLength = BinaryTypeCode.DecodeFixStrAsciiLength(tc);
- byteLength = -1; // ASCII sentinel
+ ReadUniversalFixStrHeader(tc, out charLength, out excess);
break;
}
// Interning marker, PropertySkip, or unknown — caller continues via short-circuit ||
@@ -860,16 +825,14 @@ public static partial class AcBinaryDeserializer
return false;
}
- // Single per-family decode site. ASCII (byteLength < 0): charLength IS the byte count
- // (1:1 widen, no UTF-8 decode). UTF-8 tiers: 1-pass decode with both lengths from the wire.
- value = byteLength < 0 ? ReadAsciiBytesAsString(charLength) : ReadStringUtf8WithCharLen(charLength, byteLength);
+ value = ReadStringByUnsignedExcess(charLength, excess);
return true;
}
///
/// Interning-marker companion to — dispatches the 3 interning
/// markers only (StringInterned, StringInternFirstSmall, StringInternFirstMedium). Every other
- /// string marker (FixStrAscii, StringAscii, StringSmall/Medium/Big, Null, StringEmpty) is handled
+ /// string marker (FixStr, String, StringUtf16, Null, StringEmpty) is handled
/// by ; this method is emitted into generated readers ONLY for
/// types whose string-interning feature flag is enabled — non-interning types skip it entirely
/// (the writer never produces interning markers for them, so
diff --git a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs
index abb03d6..9bf890c 100644
--- a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs
+++ b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs
@@ -97,18 +97,15 @@ public static partial class AcBinaryDeserializer
readers[BinaryTypeCode.Float64] = static (ctx, _) => ctx.ReadDoubleUnsafe();
readers[BinaryTypeCode.Decimal] = static (ctx, _) => ctx.ReadDecimalUnsafe();
readers[BinaryTypeCode.Char] = static (ctx, _) => ctx.ReadCharUnsafe();
- // H2Q6 non-ASCII tier readers (Compact mode): fixed-width header [charLen][utf8Len] + 1-pass decode.
- // FastWire mode reuses the StringSmall (=91) marker but with a different body — inline ternary
- // dispatches by ctx.FastWire (no method-frame overhead vs the old ReadStringSmall dispatcher).
- readers[BinaryTypeCode.StringSmall] = static (ctx, _) => ctx.FastWire ? ctx.ReadStringSmallFastWire() : ctx.ReadStringSmallCompact();
- readers[BinaryTypeCode.StringMedium] = static (ctx, _) => ctx.ReadStringMedium();
- readers[BinaryTypeCode.StringBig] = static (ctx, _) => ctx.ReadStringBig();
+ // Marker 91 now represents StringUtf16. Decoding is centralized in context.ReadStringUtf16Marker()
+ // which keeps FastWire payload and compact legacy-compat behavior in one place.
+ readers[BinaryTypeCode.StringUtf16] = static (ctx, _) => ctx.ReadStringUtf16Marker();
readers[BinaryTypeCode.StringInterned] = static (ctx, _) => ctx.GetInternedString((int)ctx.ReadVarUInt());
readers[BinaryTypeCode.StringEmpty] = static (_, _) => string.Empty;
// H2Q6 interning tier readers (Compact mode only — Big tier never engages on interning path)
readers[BinaryTypeCode.StringInternFirstSmall] = static (ctx, _) => ctx.ReadAndRegisterInternedStringSmall();
readers[BinaryTypeCode.StringInternFirstMedium] = static (ctx, _) => ctx.ReadAndRegisterInternedStringMedium();
- readers[BinaryTypeCode.StringAscii] = static (ctx, _) => ctx.ReadPlainStringAscii();
+ readers[BinaryTypeCode.String] = static (ctx, _) => ctx.ReadUniversalLongString();
readers[BinaryTypeCode.DateTime] = static (ctx, _) => ctx.ReadDateTimeUnsafe();
readers[BinaryTypeCode.DateTimeOffset] = static (ctx, _) => ctx.ReadDateTimeOffsetUnsafe();
readers[BinaryTypeCode.TimeSpan] = static (ctx, _) => ctx.ReadTimeSpanUnsafe();
@@ -130,12 +127,12 @@ public static partial class AcBinaryDeserializer
// V4N5 cleanup (2026-05-06): FixStr (UTF-8 short non-ASCII, 103..134) range REMOVED.
// Non-ASCII short strings now use StringSmall tier marker (registered above).
- // Register FixStrAscii readers (135..166) — pure-ASCII short-string fast path.
- // The marker IS the validity contract — reader byte→char widens without UTF-8 decode.
- for (var code = BinaryTypeCode.FixStrAsciiBase; code <= BinaryTypeCode.FixStrAsciiMax; code++)
+ // Register FixStr readers (135..166) — universal short-form markers in the new
+ // signed-excess layout; the per-marker charLength lives in the marker nibble/range.
+ for (var code = BinaryTypeCode.FixStrBase; code <= BinaryTypeCode.FixStrMax; code++)
{
- var length = BinaryTypeCode.DecodeFixStrAsciiLength(code);
- readers[code] = CreateFixStrAsciiReader(length);
+ var length = BinaryTypeCode.DecodeFixStrLength(code);
+ readers[code] = CreateFixStrReader(code, length);
}
// Register FixObj slot readers (0..SlotCount-1)
@@ -146,20 +143,19 @@ public static partial class AcBinaryDeserializer
}
- // V4N5 cleanup (2026-05-06): CreateFixStrReader removed — non-ASCII short strings now use
- // StringSmall tier reader (see ReadStringSmallCompact + ReadStringSmallFastWire in
- // BinaryDeserializationContext.Read.cs).
+ // V4N5 cleanup note updated: compact string payloads now route through FixStr/String universals;
+ // marker 91 is StringUtf16 (FastWire) with compact legacy compatibility.
///
- /// Creates a reader for FixStrAscii with the given byte length (also char count, ASCII = 1:1).
- /// Skips UTF-8 decode — byte→char widen only. Marker enforces ASCII validity.
+ /// Creates a reader for the short-form universal string marker. Char length comes from the marker;
+ /// payload codec is selected by the signed excess slot read by ReadUniversalFixStr.
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- private static TypeReader CreateFixStrAsciiReader(int length) where TInput : struct, IBinaryInputBase
+ private static TypeReader CreateFixStrReader(byte marker, int length) where TInput : struct, IBinaryInputBase
{
if (length == 0) return static (_, _) => string.Empty;
- return (ctx, _) => ctx.ReadAsciiBytesAsString(length);
+ return (ctx, _) => ctx.ReadUniversalFixStr(marker);
}
///
@@ -1038,30 +1034,22 @@ public static partial class AcBinaryDeserializer
break;
case PropertyAccessorType.String:
- // FixStrAscii is a range (135-166), can't go in switch — keep as range-check first.
- // Hot path on ASCII property names; the marker carries the length, byte→char widen only.
- if (BinaryTypeCode.IsFixStrAscii(typeCode))
+ // FixStr is a range (135-166), can't go in switch — keep as range-check first.
+ // Universal short-form marker: marker carries char length and slot selects payload path.
+ if (BinaryTypeCode.IsFixStr(typeCode))
{
- var length = BinaryTypeCode.DecodeFixStrAsciiLength(typeCode);
- propInfo.SetValue(target, length == 0 ? string.Empty : context.ReadAsciiBytesAsString(length));
+ propInfo.SetValue(target, context.ReadUniversalFixStr(typeCode));
return true;
}
// Single-value markers — switch lowers to a JIT/AOT jump table for O(1) dispatch
// (vs. sequential if-chain that branches per non-matching marker).
switch (typeCode)
{
- case BinaryTypeCode.StringSmall:
- // FastWire reuses StringSmall (=91) marker — inline mode dispatch (no method-frame overhead).
- propInfo.SetValue(target, context.FastWire ? context.ReadStringSmallFastWire() : context.ReadStringSmallCompact());
+ case BinaryTypeCode.StringUtf16:
+ propInfo.SetValue(target, context.ReadStringUtf16Marker());
return true;
- case BinaryTypeCode.StringMedium:
- propInfo.SetValue(target, context.ReadStringMedium());
- return true;
- case BinaryTypeCode.StringBig:
- propInfo.SetValue(target, context.ReadStringBig());
- return true;
- case BinaryTypeCode.StringAscii:
- propInfo.SetValue(target, context.ReadPlainStringAscii());
+ case BinaryTypeCode.String:
+ propInfo.SetValue(target, context.ReadUniversalLongString());
return true;
case BinaryTypeCode.StringEmpty:
propInfo.SetValue(target, string.Empty);
@@ -1123,11 +1111,10 @@ public static partial class AcBinaryDeserializer
// Handle null
if (typeCode == BinaryTypeCode.Null) return null;
- // Handle FixStrAscii (short ASCII strings — byte→char widen, no UTF-8 decode)
- if (BinaryTypeCode.IsFixStrAscii(typeCode))
+ // Handle short-form universal marker (FixStr range reused with signed excess slot).
+ if (BinaryTypeCode.IsFixStr(typeCode))
{
- var length = BinaryTypeCode.DecodeFixStrAsciiLength(typeCode);
- return length == 0 ? string.Empty : context.ReadAsciiBytesAsString(length);
+ return context.ReadUniversalFixStr(typeCode);
}
// H2Q6: non-ASCII short strings now use StringSmall tier (handled below via TypeReaderTable dispatch).
@@ -1151,7 +1138,7 @@ public static partial class AcBinaryDeserializer
return length == 0 ? string.Empty : context.ReadStringUtf8(length);
}
- // ReadStringSmall / Medium / Big / PlainStringAscii and ReadAndRegisterInternedStringSmall / Medium
+ // StringUtf16 / ReadUniversalLongString and ReadAndRegisterInternedStringSmall / Medium
// (+ the cold ThrowCorruptedBigWire helper) all moved to BinaryDeserializationContext as instance
// methods — single source of wire-decode shared by TypeReaderTable dispatch, PopulateProperty
// cross-type path, and the SGen-emitted string-property switch. See
@@ -1998,12 +1985,14 @@ public static partial class AcBinaryDeserializer
if (BinaryTypeCode.IsTinyInt(typeCode)) return;
- // Handle FixStrAscii (short ASCII strings — marker carries length, ASCII payload)
- if (BinaryTypeCode.IsFixStrAscii(typeCode))
+ // Handle FixStr (short universal strings — marker carries char length)
+ if (BinaryTypeCode.IsFixStr(typeCode))
{
- var length = BinaryTypeCode.DecodeFixStrAsciiLength(typeCode);
- if (length > 0)
- context.Skip(length);
+ var charLength = BinaryTypeCode.DecodeFixStrLength(typeCode);
+ var excess = context.ReadByte();
+ var byteLength = charLength + excess;
+ if (byteLength > 0)
+ context.Skip(byteLength);
return;
}
// H2Q6: non-ASCII short strings now use StringSmall tier (handled in switch below).
@@ -2052,32 +2041,37 @@ public static partial class AcBinaryDeserializer
case BinaryTypeCode.Decimal:
context.Skip(16);
return;
- case BinaryTypeCode.StringAscii:
- // Skip layout: [VarUInt byteCount][bytes]
- SkipPlainString(context);
- return;
- case BinaryTypeCode.StringSmall:
- // H2Q6 Small tier: [charLen:8][utf8Len:8][bytes] — skip 2 byte header + utf8Len bytes
+ case BinaryTypeCode.String:
+ // Skip layout: [VarUInt charLength][unsigned excess slot][bytes]
{
- var header = context.ReadTwoBytesUnsafe();
- var utf8Len = (byte)(header >> 8);
- if (utf8Len > 0) context.Skip(utf8Len);
+ var charLength = (int)context.ReadVarUInt();
+ var slotSize = BinaryTypeCode.GetUniversalStringExcessSlotSize(charLength);
+ var excess = slotSize switch
+ {
+ 1 => context.ReadByte(),
+ 2 => context.ReadUInt16Unsafe(),
+ _ => context.ReadUInt32Unsafe()
+ };
+ var byteLength = charLength + (int)excess;
+ if (byteLength > 0) context.Skip(byteLength);
}
return;
- case BinaryTypeCode.StringMedium:
- // H2Q6 Medium tier: [charLen:16][utf8Len:16][bytes] — single uint read
+ case BinaryTypeCode.StringUtf16:
+ // FastWire payload: [charLen:int32 LE][UTF-16 raw bytes].
+ // For compact backward-compat payloads this marker may still carry legacy StringSmall shape;
+ // skip-path remains dual-mode based on context.FastWire.
{
- var packed = context.ReadUInt32Unsafe();
- var utf8Len = (int)(packed >> 16);
- if (utf8Len > 0) context.Skip(utf8Len);
- }
- return;
- case BinaryTypeCode.StringBig:
- // H2Q6 Big tier: [charLen:32][utf8Len:32][bytes] — single ulong read
- {
- var packed = context.ReadUInt64Unsafe();
- var utf8Len = (int)(uint)(packed >> 32);
- if (utf8Len > 0) context.Skip(utf8Len);
+ if (context.FastWire)
+ {
+ var charLen = context.ReadInt32Unsafe();
+ if (charLen > 0) context.Skip(charLen * 2);
+ }
+ else
+ {
+ var header = context.ReadTwoBytesUnsafe();
+ var utf8Len = (byte)(header >> 8);
+ if (utf8Len > 0) context.Skip(utf8Len);
+ }
}
return;
case BinaryTypeCode.StringInterned:
diff --git a/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs b/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs
index df66d8d..2bec662 100644
--- a/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs
+++ b/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs
@@ -833,7 +833,8 @@ public static partial class AcBinarySerializer
//
// Span.CopyTo is overlap-safe via Buffer.Memmove on byte arrays.
var charLength = value.Length;
- var maxBytes = charLength * 4;
+ // Tight UTF-8 upper bound for valid UTF-16 input: max 3 bytes per UTF-16 code unit.
+ var maxBytes = charLength * 3;
var reserveSize = VarUIntSize((uint)maxBytes);
EnsureCapacity(reserveSize + maxBytes);
@@ -856,127 +857,88 @@ public static partial class AcBinarySerializer
}
///
- /// Writes a non-empty string with marker-dispatch — emits the appropriate wire marker:
- ///
- /// - ASCII ≤ 31 byte → FixStrAscii (1-byte header, length in marker)
- /// - ASCII > 31 byte → StringAscii (1+VarUInt header)
- /// - Non-ASCII utf8Len ≤ 255 → StringSmall (3-byte header: marker + charLen:8 + utf8Len:8)
- /// - Non-ASCII utf8Len ≤ 65535 → StringMedium (5-byte header: marker + charLen:16 + utf8Len:16)
- /// - Non-ASCII utf8Len > 65535 → StringBig (9-byte header: marker + charLen:32 + utf8Len:32)
- ///
+ /// Writes a non-empty UTF-8 string in a shift-free layout with an unsigned excess slot.
///
///
- /// H2Q6 wire format v3 — non-ASCII tiers carry both charLen and utf8Len in the header,
- /// enabling 1-pass deserialize (no CountUtf8Chars Pass 1). Wire output is unchanged.
+ /// Header is fully determined before encode:
+ ///
+ /// - charLength <= 31: [FixStr(marker carries charLength)][unsigned excess:1]
+ /// - charLength > 31: [String][VarUInt(charLength)][unsigned excess:1|2|4]
+ ///
+ /// Body is UTF-8-encoded exactly once to the final destination (encodeStart) — no post-encode
+ /// body shift/copy. For the current path, excess = bytesWritten - charLength is expected to be
+ /// non-negative (ASCII=0, UTF-8=>0). UTF-16 signed-negative slot usage remains on the existing FastWire
+ /// path for now and is intentionally not activated in this method.
///
- /// ASCII-predict, single encode pass. The body is UTF-8-encoded once with
- /// Utf8.FromUtf16 straight onto the ASCII-optimistic offset savedPos + asciiHeader,
- /// where asciiHeader is the EXACT header an all-ASCII string needs — FixStrAscii = 1 byte,
- /// StringAscii = 1 + VarUInt(charLength) (ASCII ⇒ utf8Len == charLength, so the VarUInt
- /// width is known pre-encode). bytesWritten == charLength ⇒ pure ASCII ⇒ the body is already
- /// at its final offset → zero body-shift (the common case). A non-ASCII string needs the
- /// larger 3/5/9 tier header, so shifts the body right by a few
- /// bytes — the same single memcpy, moved off the common path onto the rare one. Never encodes twice.
- ///
- /// The prior design reserved the non-ASCII header (3/5/9) up-front and left-shifted the body
- /// on every ASCII string — penalising the common case to spare the rare one. This reverses it.
- ///
- /// Caller MUST guarantee non-empty input (value.Length > 0) — empty strings are
- /// handled by the higher-level WriteString via the StringEmpty marker. FastWire never
- /// reaches here — callers take the markerless UTF-16 path via WriteStringUtf16Markerless first.
+ /// Caller MUST guarantee non-empty input (value.Length > 0) — empty strings are handled by
+ /// the higher-level WriteString via the StringEmpty marker.
///
- // Hot/cold split (mirrors the reader-side TryReadStringProperty/TryReadStringColdPath, K9M3): the
- // AggressiveInlining hot entry keeps the encode + the zero-shift ASCII header inline; the rarer
- // non-ASCII tiers (Small/Medium/Big) — which need a body right-shift — move to the [NoInlining]
- // WriteStringNonAsciiTail. WriteStringWithDispatch is the shared string-write chokepoint — SGen
- // WriteProperties AND runtime WritePropertyOrSkip / TryWritePrimitive all funnel here.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void WriteStringWithDispatch(string value)
{
var charLength = value.Length;
#if DEBUG
- // Overflow guard (O7G2) — predict-friendly (always false on realistic input). NoInlining throw helper.
- if ((uint)charLength > BinaryTypeCode.MaxStringCharLength) ThrowStringTooLong(charLength);
+ System.Diagnostics.Debug.Assert(charLength > 0, "WriteStringWithDispatch expects non-empty string; empty is handled by StringEmpty marker in WriteString.");
#endif
- var maxBytes = charLength * 4;
+ // Overflow guard (O7G2) — predict-friendly (always false on realistic input). NoInlining throw helper.
+ if ((uint)charLength > BinaryTypeCode.MaxStringCharLength) ThrowStringTooLong(charLength);
- // ASCII-optimistic reserve: the EXACT header an all-ASCII string needs (FixStrAscii = 1,
- // StringAscii = 1 + VarUInt(charLength)). Capacity covers the non-ASCII Big-tier worst case
- // (9-byte header) so the right-shift in WriteStringNonAsciiTail never re-grows.
- var asciiHeader = charLength <= BinaryTypeCode.FixStrAsciiMaxLength ? 1 : 1 + VarUIntSize((uint)charLength);
- EnsureCapacity(9 + maxBytes);
+ // Tight UTF-8 upper bound for valid UTF-16 input: max 3 bytes per UTF-16 code unit.
+ var maxBytes = charLength * 3;
+ var isFixStr = charLength <= BinaryTypeCode.FixStrMaxLength;
+ // IMPORTANT: the slot VALUE (excess) is not known before UTF-8 encode, but the slot SIZE is.
+ // We reserve the slot by width (1/2/4) from charLength, so encodeStart is final and no body shift is needed.
+ var slotSize = isFixStr ? 1 : BinaryTypeCode.GetUniversalStringExcessSlotSize(charLength);
+ var varUIntSize = isFixStr ? 0 : VarUIntSize((uint)charLength);
+ var headerSize = isFixStr ? 2 : 1 + varUIntSize + slotSize;
- var encodeStart = _position + asciiHeader;
+ EnsureCapacity(headerSize + maxBytes);
- // Single UTF-8 encode (handles ASCII and non-ASCII alike) onto the ASCII-optimistic offset.
- System.Text.Unicode.Utf8.FromUtf16(value.AsSpan(), _buffer.AsSpan(encodeStart, maxBytes), out _, out var bytesWritten, replaceInvalidSequences: false);
+ var headerPos = _position;
+ var slotPos = isFixStr ? headerPos + 1 : headerPos + 1 + varUIntSize;
+ var encodeStart = headerPos + headerSize;
- if (bytesWritten == charLength)
+ if (isFixStr)
{
- // Pure ASCII — body already at its final offset, header is exactly asciiHeader → zero shift.
- if (asciiHeader == 1)
- {
- BufferAt(_position) = BinaryTypeCode.EncodeFixStrAscii(charLength);
- }
- else
- {
- BufferAt(_position) = BinaryTypeCode.StringAscii;
-
- _position++;
- WriteVarUIntUnsafe((uint)charLength); // exactly fills [savedPos+1, encodeStart)
- }
-
- _position = encodeStart + charLength;
- return;
+ // Universal short-form string marker with unsigned excess slot.
+ BufferAt(headerPos) = BinaryTypeCode.EncodeFixStr(charLength);
+ }
+ else
+ {
+ // Universal long-form string marker + VarUInt(charLength) + unsigned excess slot.
+ BufferAt(headerPos) = BinaryTypeCode.String;
+ _position = headerPos + 1;
+ WriteVarUIntUnsafe((uint)charLength);
}
- switch (bytesWritten)
- {
- case <= 255:
- {
- // Small tier: 3-byte header [marker:1][charLen:8][utf8Len:8]
- var shift = 3 - asciiHeader;
- if (shift > 0) _buffer.AsSpan(encodeStart, bytesWritten).CopyTo(_buffer.AsSpan(encodeStart + shift, bytesWritten));
+ var status = System.Text.Unicode.Utf8.FromUtf16(value.AsSpan(), _buffer.AsSpan(encodeStart, maxBytes), out _, out var bytesWritten, replaceInvalidSequences: false);
+ var excess = bytesWritten - charLength;
- BufferAt(_position) = BinaryTypeCode.StringSmall;
- Unsafe.WriteUnaligned(ref BufferAt(++_position), (ushort)(charLength | (bytesWritten << 8)));
+ if (status != OperationStatus.Done) ThrowStringEncodingFailed(status);
- _position = _position + 2 + bytesWritten;
- return;
- }
- case <= 65535:
- {
- // Medium tier: 5-byte header [marker:1][charLen:16][utf8Len:16]
- var shift = 5 - asciiHeader;
- if (shift > 0) _buffer.AsSpan(encodeStart, bytesWritten).CopyTo(_buffer.AsSpan(encodeStart + shift, bytesWritten));
+#if DEBUG
+ // With status==Done, UTF-8 path mathematically implies bytesWritten >= charLength.
+ System.Diagnostics.Debug.Assert(excess >= 0, "WriteStringWithDispatch invariant broken: UTF-8 path produced negative excess.");
+#endif
- BufferAt(_position) = BinaryTypeCode.StringMedium;
- Unsafe.WriteUnaligned(ref BufferAt(++_position), (uint)charLength | ((uint)bytesWritten << 16));
+ // UTF16 branch remains on the existing FastWire path for now.
+ // Current universal slot is unsigned (ASCII=0, UTF8>0). If UTF16-via-slot is introduced later,
+ // the discriminator design must be revisited (separate flag/marker or signed slot variant).
- _position = _position + 4 + bytesWritten;
- return;
- }
- default:
- {
- WriteStringBigTierColdPath(encodeStart, charLength, bytesWritten, 9 - asciiHeader);
- return;
- }
- }
+ if (slotSize == 1) Unsafe.WriteUnaligned(ref BufferAt(slotPos), unchecked((byte)excess));
+ else if (slotSize == 2) Unsafe.WriteUnaligned(ref BufferAt(slotPos), unchecked((ushort)excess));
+ else Unsafe.WriteUnaligned(ref BufferAt(slotPos), excess);
+
+ _position = encodeStart + bytesWritten;
}
[MethodImpl(MethodImplOptions.NoInlining)]
- private void WriteStringBigTierColdPath(int encodeStart, int charLength, int bytesWritten, int shift)
- {
- // Big tier: 9-byte header [marker:1][charLen:32][utf8Len:32]
- if (shift > 0) _buffer.AsSpan(encodeStart, bytesWritten).CopyTo(_buffer.AsSpan(encodeStart + shift, bytesWritten));
-
- BufferAt(_position) = BinaryTypeCode.StringBig;
- Unsafe.WriteUnaligned(ref BufferAt(++_position), (uint)charLength | ((ulong)(uint)bytesWritten << 32));
-
- _position = _position + 8 + bytesWritten;
- }
+ private static void ThrowStringEncodingFailed(OperationStatus status) =>
+ throw new InvalidOperationException(
+ $"String UTF-8 encode failed in WriteStringWithDispatch: status={status}. " +
+ "This indicates an unexpected encoder failure (e.g. destination sizing or invalid input state)." );
///
/// Writes the first-occurrence body of an interned string with H2Q6 tier-marker dispatch.
diff --git a/AyCode.Core/Serializers/Binaries/ArrayBinaryOutput.cs b/AyCode.Core/Serializers/Binaries/ArrayBinaryOutput.cs
index 9a81058..e2afbee 100644
--- a/AyCode.Core/Serializers/Binaries/ArrayBinaryOutput.cs
+++ b/AyCode.Core/Serializers/Binaries/ArrayBinaryOutput.cs
@@ -17,7 +17,7 @@ public struct ArrayBinaryOutput : IBinaryOutputBase, IDisposable
private const int MaxKeepBufferSize = 32 * 1024; // 32KB — below this, keep for reuse
private readonly int _initialCapacity;
- private byte[] _rentedBuffer;
+ private byte[]? _rentedBuffer;
public ArrayBinaryOutput(int initialCapacity = 65535)
{
@@ -75,13 +75,13 @@ public struct ArrayBinaryOutput : IBinaryOutputBase, IDisposable
return result;
}
- /// Copies the written data to an IBufferWriter (single memcpy).
- public void WriteTo(IBufferWriter writer, byte[] buffer, int position)
- {
- var span = writer.GetSpan(position);
- buffer.AsSpan(0, position).CopyTo(span);
- writer.Advance(position);
- }
+ ///// Copies the written data to an IBufferWriter (single memcpy).
+ //public void WriteTo(IBufferWriter writer, byte[] buffer, int position)
+ //{
+ // var span = writer.GetSpan(position);
+ // buffer.AsSpan(0, position).CopyTo(span);
+ // writer.Advance(position);
+ //}
//TODO: miért nem static a DetachResult?
///
diff --git a/AyCode.Core/Serializers/Binaries/BinaryTypeCode.cs b/AyCode.Core/Serializers/Binaries/BinaryTypeCode.cs
index acfe249..6ea4f15 100644
--- a/AyCode.Core/Serializers/Binaries/BinaryTypeCode.cs
+++ b/AyCode.Core/Serializers/Binaries/BinaryTypeCode.cs
@@ -64,29 +64,19 @@ internal static class BinaryTypeCode
public const byte Char = SlotCount + 26; // 90
// ============================================================================
- // String types — H2Q6 layout (post 2026-05-06 marker reorg, wire format v3)
+ // String types
// ============================================================================
//
- // Non-ASCII strings use fixed-width header tier markers (NO VarUInt utf8Len),
- // enabling 1-pass decode (CountUtf8Chars Pass 1 eliminated).
+ // Marker 91 is reserved for FastWire UTF-16 payloads:
+ // [StringUtf16][charLen:int32 LE][UTF-16 raw bytes]
//
- // Tier dispatch (writer chooses smallest fitting tier based on utf8Len):
- // StringSmall — utf8Len ≤ 255 — header: 1 marker + 1 charLen + 1 utf8Len = 3 byte
- // StringMedium — utf8Len ≤ 65535 — header: 1 marker + 2 charLen + 2 utf8Len = 5 byte
- // StringBig — utf8Len > 65535 — header: 1 marker + 4 charLen + 4 utf8Len = 9 byte
+ // Universal compact-mode strings use FixStr (135..166) + String (167):
+ // [FixStr] / [String][VarUInt charLen][unsigned excess slot][UTF-8 bytes]
//
- // Interning tiers (writer chooses based on utf8Len; Big never engages — MaxStringInternLength
- // is byte-typed (max 255 char × max 4 byte/char = 1020 byte fits in Medium):
- // StringInternFirstSmall — utf8Len ≤ 255 — header: 1 + cacheIdx-VarUInt + 1 + 1
- // StringInternFirstMedium — utf8Len ≤ 65535 — header: 1 + cacheIdx-VarUInt + 2 + 2
- //
- // ASCII strings continue to use FixStrAscii (135..166) and StringAscii (167) — unchanged from M3R7.
- //
- // String types (SlotCount + 27..30)
- public const byte StringSmall = SlotCount + 27; // 91 — Non-ASCII tier 1: [marker:1][charLen:8][utf8Len:8][bytes], utf8Len ≤ 255
- public const byte StringInterned = SlotCount + 28; // 92 — Reference to interned string by index (2+ occurrence) — UNCHANGED
- public const byte StringEmpty = SlotCount + 29; // 93 — Empty string marker — UNCHANGED
- public const byte StringMedium = SlotCount + 30; // 94 — Non-ASCII tier 2: [marker:1][charLen:16][utf8Len:16][bytes], utf8Len ≤ 65535
+ // Interning tiers keep dedicated markers.
+ public const byte StringUtf16 = SlotCount + 27; // 91 — FastWire UTF-16 marker payload
+ public const byte StringInterned = SlotCount + 28; // 92 — Reference to interned string by index (2+ occurrence)
+ public const byte StringEmpty = SlotCount + 29; // 93 — Empty string marker
// Date/Time types (SlotCount + 31..34)
public const byte DateTime = SlotCount + 31; // 95
@@ -112,8 +102,7 @@ internal static class BinaryTypeCode
// FixStr (non-ASCII) markers REMOVED in H2Q6 — non-ASCII strings now use Small/Medium/Big tiers
// for 1-pass decode (eliminated CountUtf8Chars Pass 1).
//
- // CURRENT ALLOCATION (5 of 32 used):
- public const byte StringBig = SlotCount + 39; // 103 — Non-ASCII tier 3: [marker:1][charLen:32][utf8Len:32][bytes], utf8Len > 65535
+ // CURRENT ALLOCATION:
public const byte StringInternFirstSmall = SlotCount + 40; // 104 — Interning tier 1: [marker:1][cacheIdx:VarUInt][charLen:8][utf8Len:8][bytes]
public const byte StringInternFirstMedium = SlotCount + 41; // 105 — Interning tier 2: [marker:1][cacheIdx:VarUInt][charLen:16][utf8Len:16][bytes]
@@ -129,20 +118,21 @@ internal static class BinaryTypeCode
public const byte ReservedRangeMin = SlotCount + 42; // 106 — first reserved value (post-H2Q6 future-feature range)
public const byte ReservedRangeMax = SlotCount + 70; // 134 — last reserved value
- // FixStrAscii range (ASCII-only short strings): 135..166 (32 values for byte lengths 0-31)
- // FixStrAscii encoding: FixStrAsciiBase + byteLength
- // Content semantics: pure ASCII bytes (every byte < 0x80). Reader can use byte→char widening
- // without UTF-8 decode or ASCII validation — the marker itself is the validation contract.
- // Writer emits this when it can prove the content is ASCII (e.g., GetBytes returns byteCount == charLength).
- public const byte FixStrAsciiBase = SlotCount + 71; // 135
- public const byte FixStrAsciiMax = FixStrAsciiBase + 31; // 166
- public const int FixStrAsciiMaxLength = 31;
+ // FixStr range (short universal string marker): 135..166 (32 values for char lengths 0-31)
+ // Encoding: FixStrBase + charLength
+ public const byte FixStrBase = SlotCount + 71; // 135
+ public const byte FixStrMax = FixStrBase + 31; // 166
+ public const int FixStrMaxLength = 31;
+ // Backward-compatible aliases (old naming)
+ public const byte FixStrAsciiBase = FixStrBase;
+ public const byte FixStrAsciiMax = FixStrMax;
+ public const int FixStrAsciiMaxLength = FixStrMaxLength;
- // Long ASCII string marker: 167
- // Layout: [StringAscii] [VarUInt byteCount] [ASCII bytes]
- // Counterpart to StringSmall/Medium/Big — but ASCII content (charLen == byteCount, no UTF-8 decode).
- // Reader fast-widens via byte→char without UTF-8 decode or IsValid scan.
- public const byte StringAscii = SlotCount + 103; // 167
+ // Long universal string marker: 167
+ // Layout: [String] [VarUInt charLength] [excess slot] [bytes]
+ public const byte String = SlotCount + 103; // 167
+ // Backward-compatible alias (old naming)
+ public const byte StringAscii = String;
// Reserved slot block: 168..175 (8 slots) for future string-related markers
// (e.g., StringCompressed, StringEncoded, StringMixedAscii, etc.). Keeping the 135..167 range
@@ -191,17 +181,18 @@ internal static class BinaryTypeCode
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool IsString(byte typeCode)
- => (typeCode is >= StringSmall and <= StringMedium) // 91..94: StringSmall, StringInterned, StringEmpty, StringMedium
- || (typeCode is >= StringBig and <= StringInternFirstMedium) // 103..105: StringBig, StringInternFirstSmall, StringInternFirstMedium
- || (typeCode is >= FixStrAsciiBase and <= StringAscii); // 135..167: FixStrAscii + StringAscii
+ => typeCode == StringUtf16
+ || typeCode == StringInterned
+ || typeCode == StringEmpty
+ || typeCode == StringInternFirstSmall
+ || typeCode == StringInternFirstMedium
+ || (typeCode is >= FixStrBase and <= String); // 135..167: FixStr + String
///
- /// Check if type code is one of the H2Q6 non-ASCII string tier markers (StringSmall / StringMedium / StringBig).
- /// Excludes interning tier markers (use ) and ASCII markers (use ).
+ /// Check if type code is the FastWire UTF-16 string marker.
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static bool IsStringTier(byte typeCode)
- => typeCode == StringSmall || typeCode == StringMedium || typeCode == StringBig;
+ public static bool IsStringUtf16(byte typeCode) => typeCode == StringUtf16;
///
/// Check if type code is a H2Q6 interning first-occurrence tier marker (StringInternFirstSmall / Medium).
@@ -212,36 +203,69 @@ internal static class BinaryTypeCode
=> typeCode == StringInternFirstSmall || typeCode == StringInternFirstMedium;
///
- /// Check if type code is any ASCII string marker — FixStrAscii (short) or StringAscii (long).
+ /// Check if type code is any universal string marker — FixStr (short) or String (long).
/// Single contiguous range (135..167) for branch-friendly dispatch on the reader hot path.
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static bool IsAsciiString(byte typeCode) => typeCode is >= FixStrAsciiBase and <= StringAscii;
+ public static bool IsStringUniversalMarker(byte typeCode) => typeCode is >= FixStrBase and <= String;
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static bool IsAsciiString(byte typeCode) => IsStringUniversalMarker(typeCode);
///
- /// Check if type code is a FixStrAscii (ASCII short string with byte length encoded in type code).
+ /// Check if type code is a FixStr (short universal marker with char length encoded in type code).
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static bool IsFixStrAscii(byte typeCode) => typeCode is >= FixStrAsciiBase and <= FixStrAsciiMax;
+ public static bool IsFixStr(byte typeCode) => typeCode is >= FixStrBase and <= FixStrMax;
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static bool IsFixStrAscii(byte typeCode) => IsFixStr(typeCode);
///
- /// Decode FixStrAscii byte length from type code. Length is also the char count (1 byte = 1 char for ASCII).
+ /// Decode FixStr char length from type code.
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static int DecodeFixStrAsciiLength(byte typeCode) => typeCode - FixStrAsciiBase;
+ public static int DecodeFixStrLength(byte typeCode) => typeCode - FixStrBase;
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static int DecodeFixStrAsciiLength(byte typeCode) => DecodeFixStrLength(typeCode);
///
- /// Encode FixStrAscii type code for given byte length (0-31). Caller asserts ASCII content semantics
- /// (every byte less than 0x80). Misuse on non-ASCII content corrupts decode.
+ /// Encode FixStr type code for given char length (0-31).
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static byte EncodeFixStrAscii(int byteLength) => (byte)(FixStrAsciiBase + byteLength);
+ public static byte EncodeFixStr(int charLength) => (byte)(FixStrBase + charLength);
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte EncodeFixStrAscii(int charLength) => EncodeFixStr(charLength);
///
- /// Check if byte length can be encoded as FixStrAscii (ASCII short string, 0..31 bytes).
+ /// Check if char length can be encoded as FixStr (short string, 0..31 chars).
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static bool CanEncodeAsFixStrAscii(int byteLength) => byteLength is >= 0 and <= 31;
+ public static bool CanEncodeAsFixStr(int charLength) => charLength is >= 0 and <= 31;
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static bool CanEncodeAsFixStrAscii(int charLength) => CanEncodeAsFixStr(charLength);
+
+ ///
+ /// Universal string excess-slot width selector shared by writer and reader.
+ ///
+ /// Current wire contract uses UNSIGNED excess (ASCII=0, UTF8>0), so the slot thresholds are:
+ /// 1-byte: max excess 255 => charLength <= 127 (worst-case excess = 2 * charLength)
+ /// 2-byte: max excess 65535 => charLength <= 32767
+ /// 4-byte: fallback
+ ///
+ /// IMPORTANT: if the protocol switches back to SIGNED excess (e.g., UTF16 discriminator via negative
+ /// values), these thresholds MUST be reduced here as well (typically 63 / 16383 / 4-byte fallback).
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static int GetUniversalStringExcessSlotSize(int charLength)
+ {
+ if (charLength <= 127) return 1;
+ if (charLength <= 32767) return 2;
+ return 4;
+ }
///
/// Check if type code is a tiny int (single byte int32 encoding).
diff --git a/AyCode.Core/docs/BINARY/BINARY_ISSUES.md b/AyCode.Core/docs/BINARY/BINARY_ISSUES.md
index c0e939c..5842634 100644
--- a/AyCode.Core/docs/BINARY/BINARY_ISSUES.md
+++ b/AyCode.Core/docs/BINARY/BINARY_ISSUES.md
@@ -156,6 +156,52 @@ Assigning a `BufferWriterBinaryOutput` value creates an independent copy. State
A single instance must not use context + standalone modes simultaneously — buffer states desynchronize. One mode per lifecycle phase; `FlushAndReset()` as boundary between modes.
+### ACCORE-BIN-I-Q4P7: ArrayBinaryOutput.DetachResult ownership transfer missing → pooled buffer double-return
+
+**Status:** Open · **Severity:** Critical (latent — silent cross-talk corruption)
+**Affects:** `ArrayBinaryOutput.DetachResult`, `ArrayBinaryOutput.Reset`, `ArrayBinaryOutput.Dispose`
+
+`DetachResult` returns `new BinarySerializationResult(resultBuffer, ..., pooled: true)`, which transfers buffer ownership to the result object (caller disposes result → buffer returned to `ArrayPool`). But `_rentedBuffer` keeps referencing the same array after detach. Later `Reset` (large-buffer branch) or `Dispose` returns `_rentedBuffer` again, causing a double-return of the same array to `ArrayPool.Shared`.
+
+**Impact:** Silent, intermittent data corruption. ArrayPool can hand out the same physical array to multiple renters after double-return, enabling cross-talk between unrelated serialization operations.
+
+**Why this is active in default config:** ctor default `initialCapacity=65535`, while `MaxKeepBufferSize=32KB`; the detached default buffer is considered "large", so `Reset` naturally enters the return-to-pool path.
+
+**Fix direction:** Treat `DetachResult` as a strict ownership transfer boundary. After detach, `_rentedBuffer` must no longer point to the detached array. Possible implementation variants:
+- **Eager replacement:** rent a replacement buffer immediately in `DetachResult`.
+- **Lazy replacement:** set `_rentedBuffer = null` in `DetachResult`, and perform lazy-rent in `Initialize` (`_rentedBuffer ??= ArrayPool.Shared.Rent(_initialCapacity)`).
+
+Lazy replacement avoids redundant rent/return churn when `DetachResult` is followed by `Reset` or `Dispose`, while preserving single-owner semantics (the detached `BinarySerializationResult` remains the only owner until it calls `Return` in `Dispose`). Ensure exactly one return path per rented array.
+
+### ACCORE-BIN-I-R2D6: ArrayBinaryOutput.Reset may set `_rentedBuffer` to null while `Initialize` assumes non-null
+
+**Status:** Open
+**Affects:** `ArrayBinaryOutput.Reset`, `ArrayBinaryOutput.Initialize`
+
+`Reset` currently does:
+
+`_rentedBuffer = nextCapacity == _initialCapacity ? null : ArrayPool.Shared.Rent(nextCapacity);`
+
+So `_rentedBuffer` can become null after returning a large buffer, while `Initialize` unconditionally reads `_rentedBuffer` and `_rentedBuffer.Length`.
+
+**Impact:** Deterministic `NullReferenceException` on the next initialization path when the null branch is taken.
+
+**Fix direction:** Keep `_rentedBuffer` always non-null by renting `_initialCapacity` in that branch, or add lazy-rent null handling in `Initialize` before dereference.
+
+### ACCORE-BIN-I-V8N4: BinarySerializationResult accessors remain usable after Dispose → pooled-buffer use-after-return
+
+**Status:** Open · **Severity:** Critical (latent — silent data corruption)
+**Affects:** `BinarySerializationResult.Buffer`, `BinarySerializationResult.Span`, `BinarySerializationResult.Memory`, `BinarySerializationResult.Dispose`
+
+`BinarySerializationResult.Dispose` returns the underlying array to `ArrayPool` when `pooled=true`, but public accessors (`Buffer` / `Span` / `Memory`) remain callable without a disposed guard. After dispose, the same array may already be re-rented and mutated by unrelated operations; reading the old result then becomes use-after-return on pooled memory.
+
+**Impact:** Silent, non-deterministic cross-talk corruption. Consumers may observe stale/foreign bytes through `Span` / `Memory` / `Buffer` with no exception signal.
+
+**Possible fix directions:**
+- Add `_disposed` guard to all accessors (`ObjectDisposedException` after dispose).
+- Optionally scrub/neutralize post-dispose state (e.g., replace exposed buffer reference with `Array.Empty()`) to reduce accidental reuse risk.
+- Clarify API ownership contract in docs: disposed result is terminal and must not be accessed.
+
## Configuration / Options
### ACCORE-BIN-I-L8N5: AcBinarySerializerOptions thread-safety — mutable properties on shared instances