diff --git a/AyCode.Core.Serializers.SourceGenerator/AcBinarySourceGenerator.GenReader.cs b/AyCode.Core.Serializers.SourceGenerator/AcBinarySourceGenerator.GenReader.cs index e68326c..8e95d00 100644 --- a/AyCode.Core.Serializers.SourceGenerator/AcBinarySourceGenerator.GenReader.cs +++ b/AyCode.Core.Serializers.SourceGenerator/AcBinarySourceGenerator.GenReader.cs @@ -207,21 +207,20 @@ public partial class AcBinarySourceGenerator /// /// Emits inline string read from type code. Handles all H2Q6 (v3 wire format) string markers: - /// FixStrAscii (ASCII short, 135-166), StringAscii (ASCII long, 167), - /// StringSmall/Medium/Big (non-ASCII tiers, 91/94/103), + /// FixStr (short-form universal, 135-166), String (long-form universal, 167), + /// StringUtf16 (FastWire marker, 91), /// StringInternFirstSmall/Medium (interning tiers, 104/105), /// StringInterned (cache ref, 92), StringEmpty (93), Null. /// - /// FixStrAscii is checked first as the hot path for short ASCII property names; non-ASCII + /// FixStr is checked first as the hot path for short strings; non-ASCII /// tier markers carry both charLen and utf8Len in fixed-width headers (1-pass decode). /// private static void EmitReadString(StringBuilder sb, string a, string tc, string i, bool enableInternString) { - // FixStrAscii is the hot path — most short strings (property names) are ASCII. - sb.AppendLine($"{i}if (BinaryTypeCode.IsFixStrAscii({tc}))"); + // FixStr is the hot path — short-form universal marker with charLength in the marker. + sb.AppendLine($"{i}if (BinaryTypeCode.IsFixStr({tc}))"); sb.AppendLine($"{i}{{"); - sb.AppendLine($"{i} var falen = BinaryTypeCode.DecodeFixStrAsciiLength({tc});"); - sb.AppendLine($"{i} {a} = falen == 0 ? string.Empty : context.ReadAsciiBytesAsString(falen);"); + sb.AppendLine($"{i} {a} = context.ReadUniversalFixStr({tc});"); sb.AppendLine($"{i}}}"); // Switch gives O(1) dispatch via JIT jump table for the remaining markers. sb.AppendLine($"{i}else switch ({tc})"); @@ -235,24 +234,16 @@ public partial class AcBinarySourceGenerator sb.AppendLine($"{i} {a} = context.GetInternedString((int)context.ReadVarUInt());"); sb.AppendLine($"{i} break;"); } - // H2Q6 string-tier markers + StringAscii. Wire-decode body is shared with the runtime path - // (TypeReaderTable + cross-type populate) — see context.ReadStringSmall/Medium/Big, ReadPlainStringAscii. + // StringUtf16 marker + String. Wire-decode body is shared with the runtime path + // (TypeReaderTable + cross-type populate) — see context.ReadStringUtf16Marker() + // and ReadUniversalLongString. // These markers are feature-independent: writer emits them on any string property regardless of // intern setting (intern is opt-in per-property via [AcStringIntern] + InternBit). - sb.AppendLine($"{i} case BinaryTypeCode.StringSmall:"); - // FastWire mode reuses the StringSmall (=91) marker but with a different body — emit - // inline ternary so call sites that can run in either mode (Dictionary key/value, runtime - // cross-type populate) dispatch without an extra method-frame. - sb.AppendLine($"{i} {a} = context.FastWire ? context.ReadStringSmallFastWire() : context.ReadStringSmallCompact();"); + sb.AppendLine($"{i} case BinaryTypeCode.StringUtf16:"); + sb.AppendLine($"{i} {a} = context.ReadStringUtf16Marker();"); sb.AppendLine($"{i} break;"); - sb.AppendLine($"{i} case BinaryTypeCode.StringMedium:"); - sb.AppendLine($"{i} {a} = context.ReadStringMedium();"); - sb.AppendLine($"{i} break;"); - sb.AppendLine($"{i} case BinaryTypeCode.StringBig:"); - sb.AppendLine($"{i} {a} = context.ReadStringBig();"); - sb.AppendLine($"{i} break;"); - sb.AppendLine($"{i} case BinaryTypeCode.StringAscii:"); - sb.AppendLine($"{i} {a} = context.ReadPlainStringAscii();"); + sb.AppendLine($"{i} case BinaryTypeCode.String:"); + sb.AppendLine($"{i} {a} = context.ReadUniversalLongString();"); sb.AppendLine($"{i} break;"); // Interning first-occurrence cases — see comment above. if (enableInternString) diff --git a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.Read.cs b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.Read.cs index c0d47f3..c63140f 100644 --- a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.Read.cs +++ b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.Read.cs @@ -625,19 +625,12 @@ public static partial class AcBinaryDeserializer } /// - /// H2Q6 StringSmall reader — Compact-mode-only body: wire [charLen:8][utf8Len:8][UTF-8 bytes] - /// after the marker has been consumed. 1-pass decode (no CountUtf8Chars). - /// Call this directly when the call site has ALREADY established FastWire == false - /// (e.g. hot path, where the SGen-emit caller short-circuits - /// FastWire on a separate ag via ReadStringUtf16Markerless). Skips the redundant - /// FastWire branch — call sites that may run in either mode inline - /// FastWire ? ReadStringSmallFastWire() : ReadStringSmallCompact() ternary instead of a - /// shared dispatcher (no method-frame overhead). + /// Legacy compact StringSmall reader retained only for backward-compat payloads. /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal string ReadStringSmallCompact() + [MethodImpl(MethodImplOptions.NoInlining)] + internal string ReadStringSmallCompactLegacy() { - System.Diagnostics.Debug.Assert(!FastWire, "ReadStringSmallCompact called with FastWire=true — call sites that may run in FastWire mode must inline the `FastWire ? ReadStringSmallFastWire() : ReadStringSmallCompact()` ternary."); + System.Diagnostics.Debug.Assert(!FastWire, "ReadStringSmallCompactLegacy called with FastWire=true."); // H2Q6 StringSmall body: [charLen:8][utf8Len:8][UTF-8 bytes] var header = ReadTwoBytesUnsafe(); @@ -647,84 +640,28 @@ public static partial class AcBinaryDeserializer } /// - /// H2Q6 StringSmall reader — FastWire-mode-only body: wire [charLen:int32 LE][UTF-16 raw bytes] + /// StringUtf16 reader — FastWire-mode-only body: wire [charLen:int32 LE][UTF-16 raw bytes] /// after the (mode-shared) marker has been consumed. Engaged only on the runtime /// path when FastWire==true and the declared target /// type is NOT string (the string-typed FastWire short-circuit in /// bypasses the marker entirely via ReadStringUtf16Markerless). /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal string ReadStringSmallFastWire() + internal string ReadStringUtf16FastWire() { - // Mode-shared marker (=91) FastWire payload — fix-int charLen (matches MemPack WriteUtf16 shape). + // StringUtf16 (=91) payload — fix-int charLen (matches MemPack WriteUtf16 shape). var charLenF = ReadInt32Unsafe(); return ReadStringUtf16(charLenF); } - // No combined ReadStringSmall() dispatcher — every call site already has the FastWire flag - // in scope (compile-time invariant on the SGen-emit hot path; runtime field check on the - // dispatcher-callers). Call sites inline the ternary `FastWire ? ReadStringSmallFastWire() - // : ReadStringSmallCompact()` when they need mode-awareness, saving a method-call frame. - /// - /// H2Q6 StringMedium reader: wire [charLen:16 LE][utf8Len:16 LE][UTF-8 bytes] after the marker - /// has been consumed. 1-pass decode. Header read in a single uint load (vs 2 ushort loads). Shared - /// by runtime dispatch + SGen-emit. - /// - [MethodImpl(MethodImplOptions.AggressiveOptimization)] - internal string ReadStringMedium() - { - var packed = ReadUInt32Unsafe(); - var charLength = (ushort)packed; - var byteLength = (ushort)(packed >> 16); - return ReadStringUtf8WithCharLen(charLength, byteLength); - } - - /// - /// H2Q6 StringBig reader: wire [charLen:32 LE][utf8Len:32 LE][UTF-8 bytes] after the marker - /// has been consumed. 1-pass decode. Header read in a single ulong load (vs 2 uint loads). Includes - /// a corrupted-wire guard for negative casts from uint values > Int32.MaxValue. Shared by - /// runtime dispatch + SGen-emit. - /// - [MethodImpl(MethodImplOptions.AggressiveOptimization)] - internal string ReadStringBig() - { - var packed = ReadUInt64Unsafe(); - var charLength = (int)(uint)packed; - var byteLength = (int)(uint)(packed >> 32); - -#if DEBUG - // Single bitwise-OR + sign-test catches negative casts from corrupted-wire uint values - // (when the wire-side uint > Int32.MaxValue, the (int)(uint) cast yields a negative int). - // Predict-friendly: always false on a valid wire. - if ((charLength | byteLength) < 0) ThrowCorruptedBigWire(charLength, byteLength); -#endif - - return ReadStringUtf8WithCharLen(charLength, byteLength); - } - - /// - /// Throw helper for the corrupted-wire guard in . NoInlining - /// keeps the hot-path reader compact — the JIT/AOT lifts the throw-site out of the inlined caller body. - /// - [MethodImpl(MethodImplOptions.NoInlining)] - private void ThrowCorruptedBigWire(int charLength, int byteLength) => - throw new AcBinaryDeserializationException( - $"Wire format corruption: StringBig header has out-of-range length values (charLength={charLength}, byteLength={byteLength}). " + - $"This indicates a corrupted or maliciously-crafted payload — uint wire values larger than Int32.MaxValue produce negative ints when cast.", - -1); - - /// - /// Reads a long ASCII string payload (after the StringAscii marker has been consumed). - /// Wire format: [VarUInt byteCount][ASCII bytes]. Byte→char widen, no UTF-8 decode. Shared - /// by runtime dispatch + SGen-emit. + /// Unified reader for marker (91). + /// FastWire path reads UTF-16 payload; non-FastWire path keeps legacy compact payload compatibility. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal string ReadPlainStringAscii() + internal string ReadStringUtf16Marker() { - var length = (int)ReadVarUInt(); - if (length == 0) return string.Empty; - return ReadAsciiBytesAsString(length); + return FastWire ? ReadStringUtf16FastWire() : ReadStringSmallCompactLegacy(); } /// @@ -753,6 +690,67 @@ public static partial class AcBinaryDeserializer return str; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal string ReadUniversalFixStr(byte marker) + { + ReadUniversalFixStrHeader(marker, out var charLength, out var excess); + return ReadStringByUnsignedExcess(charLength, excess); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal string ReadUniversalLongString() + { + ReadUniversalLongStringHeader(out var charLength, out var excess); + return ReadStringByUnsignedExcess(charLength, excess); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal void ReadUniversalFixStrHeader(byte marker, out int charLength, out uint excess) + { + charLength = BinaryTypeCode.DecodeFixStrLength(marker); + excess = ReadByte(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal void ReadUniversalLongStringHeader(out int charLength, out uint excess) + { + charLength = (int)ReadVarUInt(); + var slotSize = BinaryTypeCode.GetUniversalStringExcessSlotSize(charLength); + + if (slotSize == 1) excess = ReadByte(); + else if (slotSize == 2) excess = ReadUInt16Unsafe(); + else excess = ReadVarUInt32Unchecked(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private string ReadStringByUnsignedExcess(int charLength, uint excess) + { + if (charLength == 0) + { + if (excess == 0) return string.Empty; + throw new AcBinaryDeserializationException($"Invalid string header: charLength=0 requires excess=0, got {excess}.", _position); + } + + if (excess == 0) + { + return ReadAsciiBytesAsString(charLength); + } + +#if DEBUG + if (excess > (uint)(int.MaxValue - charLength)) + throw new AcBinaryDeserializationException($"Invalid string header: byteLength overflow (charLength={charLength}, excess={excess}).", _position); +#endif + + var byteLength = charLength + (int)excess; + return ReadStringUtf8WithCharLen(charLength, byteLength); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private uint ReadVarUInt32Unchecked() + { + return ReadUInt32Unsafe(); + } + /// /// H2Q6 StringInternFirstMedium reader: wire [cacheIdx:VarUInt][charLen:16 LE][utf8Len:16 LE][bytes]. /// Registers the decoded string in the intern cache and returns it. (Big tier never engages on the @@ -797,49 +795,17 @@ public static partial class AcBinaryDeserializer [MethodImpl(MethodImplOptions.AggressiveInlining)] internal bool TryReadStringProperty(byte tc, out string? value) { - // Hot-path invariant: SGen-emit + property-marker callers MUST short-circuit FastWire on a - // separate ag (markerless decode) — so by the time the marker byte reaches this switch, - // FastWire is guaranteed false (the StringSmall body is the Compact-mode decode). + // Hot-path invariant: property marker paths remain mode-aware for legacy payload compatibility. value = null; int charLength; - int byteLength; - + uint excess; switch (tc) { - case BinaryTypeCode.StringSmall: - { - // [charLen:8][utf8Len:8] - var header = ReadTwoBytesUnsafe(); - charLength = (byte)header; - byteLength = (byte)(header >> 8); - break; - } - case BinaryTypeCode.StringMedium: - { - // [charLen:16 LE][utf8Len:16 LE] — single uint load - var packed = ReadUInt32Unsafe(); - charLength = (ushort)packed; - byteLength = (ushort)(packed >> 16); - break; - } - case BinaryTypeCode.StringBig: - { - // [charLen:32 LE][utf8Len:32 LE] — single ulong load + corrupted-wire guard - var packed = ReadUInt64Unsafe(); - charLength = (int)(uint)packed; - byteLength = (int)(uint)(packed >> 32); - -#if DEBUG - if ((charLength | byteLength) < 0) ThrowCorruptedBigWire(charLength, byteLength); -#endif - - break; - } - case BinaryTypeCode.StringAscii: - // Long ASCII: [VarUInt byteLen]. byteLength = -1 sentinel → routes to the ASCII tail. - charLength = (int)ReadVarUInt(); - byteLength = -1; - + case BinaryTypeCode.StringUtf16: + value = ReadStringUtf16Marker(); + return true; + case BinaryTypeCode.String: + ReadUniversalLongStringHeader(out charLength, out excess); break; case BinaryTypeCode.Null: return true; @@ -848,11 +814,10 @@ public static partial class AcBinaryDeserializer return true; default: - // FixStrAscii (short ASCII — property codes, IDs, names): the marker carries the length. - if (BinaryTypeCode.IsFixStrAscii(tc)) + // FixStr (short universal string): marker carries char length. + if (BinaryTypeCode.IsFixStr(tc)) { - charLength = BinaryTypeCode.DecodeFixStrAsciiLength(tc); - byteLength = -1; // ASCII sentinel + ReadUniversalFixStrHeader(tc, out charLength, out excess); break; } // Interning marker, PropertySkip, or unknown — caller continues via short-circuit || @@ -860,16 +825,14 @@ public static partial class AcBinaryDeserializer return false; } - // Single per-family decode site. ASCII (byteLength < 0): charLength IS the byte count - // (1:1 widen, no UTF-8 decode). UTF-8 tiers: 1-pass decode with both lengths from the wire. - value = byteLength < 0 ? ReadAsciiBytesAsString(charLength) : ReadStringUtf8WithCharLen(charLength, byteLength); + value = ReadStringByUnsignedExcess(charLength, excess); return true; } /// /// Interning-marker companion to — dispatches the 3 interning /// markers only (StringInterned, StringInternFirstSmall, StringInternFirstMedium). Every other - /// string marker (FixStrAscii, StringAscii, StringSmall/Medium/Big, Null, StringEmpty) is handled + /// string marker (FixStr, String, StringUtf16, Null, StringEmpty) is handled /// by ; this method is emitted into generated readers ONLY for /// types whose string-interning feature flag is enabled — non-interning types skip it entirely /// (the writer never produces interning markers for them, so diff --git a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs index abb03d6..9bf890c 100644 --- a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs +++ b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs @@ -97,18 +97,15 @@ public static partial class AcBinaryDeserializer readers[BinaryTypeCode.Float64] = static (ctx, _) => ctx.ReadDoubleUnsafe(); readers[BinaryTypeCode.Decimal] = static (ctx, _) => ctx.ReadDecimalUnsafe(); readers[BinaryTypeCode.Char] = static (ctx, _) => ctx.ReadCharUnsafe(); - // H2Q6 non-ASCII tier readers (Compact mode): fixed-width header [charLen][utf8Len] + 1-pass decode. - // FastWire mode reuses the StringSmall (=91) marker but with a different body — inline ternary - // dispatches by ctx.FastWire (no method-frame overhead vs the old ReadStringSmall dispatcher). - readers[BinaryTypeCode.StringSmall] = static (ctx, _) => ctx.FastWire ? ctx.ReadStringSmallFastWire() : ctx.ReadStringSmallCompact(); - readers[BinaryTypeCode.StringMedium] = static (ctx, _) => ctx.ReadStringMedium(); - readers[BinaryTypeCode.StringBig] = static (ctx, _) => ctx.ReadStringBig(); + // Marker 91 now represents StringUtf16. Decoding is centralized in context.ReadStringUtf16Marker() + // which keeps FastWire payload and compact legacy-compat behavior in one place. + readers[BinaryTypeCode.StringUtf16] = static (ctx, _) => ctx.ReadStringUtf16Marker(); readers[BinaryTypeCode.StringInterned] = static (ctx, _) => ctx.GetInternedString((int)ctx.ReadVarUInt()); readers[BinaryTypeCode.StringEmpty] = static (_, _) => string.Empty; // H2Q6 interning tier readers (Compact mode only — Big tier never engages on interning path) readers[BinaryTypeCode.StringInternFirstSmall] = static (ctx, _) => ctx.ReadAndRegisterInternedStringSmall(); readers[BinaryTypeCode.StringInternFirstMedium] = static (ctx, _) => ctx.ReadAndRegisterInternedStringMedium(); - readers[BinaryTypeCode.StringAscii] = static (ctx, _) => ctx.ReadPlainStringAscii(); + readers[BinaryTypeCode.String] = static (ctx, _) => ctx.ReadUniversalLongString(); readers[BinaryTypeCode.DateTime] = static (ctx, _) => ctx.ReadDateTimeUnsafe(); readers[BinaryTypeCode.DateTimeOffset] = static (ctx, _) => ctx.ReadDateTimeOffsetUnsafe(); readers[BinaryTypeCode.TimeSpan] = static (ctx, _) => ctx.ReadTimeSpanUnsafe(); @@ -130,12 +127,12 @@ public static partial class AcBinaryDeserializer // V4N5 cleanup (2026-05-06): FixStr (UTF-8 short non-ASCII, 103..134) range REMOVED. // Non-ASCII short strings now use StringSmall tier marker (registered above). - // Register FixStrAscii readers (135..166) — pure-ASCII short-string fast path. - // The marker IS the validity contract — reader byte→char widens without UTF-8 decode. - for (var code = BinaryTypeCode.FixStrAsciiBase; code <= BinaryTypeCode.FixStrAsciiMax; code++) + // Register FixStr readers (135..166) — universal short-form markers in the new + // signed-excess layout; the per-marker charLength lives in the marker nibble/range. + for (var code = BinaryTypeCode.FixStrBase; code <= BinaryTypeCode.FixStrMax; code++) { - var length = BinaryTypeCode.DecodeFixStrAsciiLength(code); - readers[code] = CreateFixStrAsciiReader(length); + var length = BinaryTypeCode.DecodeFixStrLength(code); + readers[code] = CreateFixStrReader(code, length); } // Register FixObj slot readers (0..SlotCount-1) @@ -146,20 +143,19 @@ public static partial class AcBinaryDeserializer } - // V4N5 cleanup (2026-05-06): CreateFixStrReader removed — non-ASCII short strings now use - // StringSmall tier reader (see ReadStringSmallCompact + ReadStringSmallFastWire in - // BinaryDeserializationContext.Read.cs). + // V4N5 cleanup note updated: compact string payloads now route through FixStr/String universals; + // marker 91 is StringUtf16 (FastWire) with compact legacy compatibility. /// - /// Creates a reader for FixStrAscii with the given byte length (also char count, ASCII = 1:1). - /// Skips UTF-8 decode — byte→char widen only. Marker enforces ASCII validity. + /// Creates a reader for the short-form universal string marker. Char length comes from the marker; + /// payload codec is selected by the signed excess slot read by ReadUniversalFixStr. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static TypeReader CreateFixStrAsciiReader(int length) where TInput : struct, IBinaryInputBase + private static TypeReader CreateFixStrReader(byte marker, int length) where TInput : struct, IBinaryInputBase { if (length == 0) return static (_, _) => string.Empty; - return (ctx, _) => ctx.ReadAsciiBytesAsString(length); + return (ctx, _) => ctx.ReadUniversalFixStr(marker); } /// @@ -1038,30 +1034,22 @@ public static partial class AcBinaryDeserializer break; case PropertyAccessorType.String: - // FixStrAscii is a range (135-166), can't go in switch — keep as range-check first. - // Hot path on ASCII property names; the marker carries the length, byte→char widen only. - if (BinaryTypeCode.IsFixStrAscii(typeCode)) + // FixStr is a range (135-166), can't go in switch — keep as range-check first. + // Universal short-form marker: marker carries char length and slot selects payload path. + if (BinaryTypeCode.IsFixStr(typeCode)) { - var length = BinaryTypeCode.DecodeFixStrAsciiLength(typeCode); - propInfo.SetValue(target, length == 0 ? string.Empty : context.ReadAsciiBytesAsString(length)); + propInfo.SetValue(target, context.ReadUniversalFixStr(typeCode)); return true; } // Single-value markers — switch lowers to a JIT/AOT jump table for O(1) dispatch // (vs. sequential if-chain that branches per non-matching marker). switch (typeCode) { - case BinaryTypeCode.StringSmall: - // FastWire reuses StringSmall (=91) marker — inline mode dispatch (no method-frame overhead). - propInfo.SetValue(target, context.FastWire ? context.ReadStringSmallFastWire() : context.ReadStringSmallCompact()); + case BinaryTypeCode.StringUtf16: + propInfo.SetValue(target, context.ReadStringUtf16Marker()); return true; - case BinaryTypeCode.StringMedium: - propInfo.SetValue(target, context.ReadStringMedium()); - return true; - case BinaryTypeCode.StringBig: - propInfo.SetValue(target, context.ReadStringBig()); - return true; - case BinaryTypeCode.StringAscii: - propInfo.SetValue(target, context.ReadPlainStringAscii()); + case BinaryTypeCode.String: + propInfo.SetValue(target, context.ReadUniversalLongString()); return true; case BinaryTypeCode.StringEmpty: propInfo.SetValue(target, string.Empty); @@ -1123,11 +1111,10 @@ public static partial class AcBinaryDeserializer // Handle null if (typeCode == BinaryTypeCode.Null) return null; - // Handle FixStrAscii (short ASCII strings — byte→char widen, no UTF-8 decode) - if (BinaryTypeCode.IsFixStrAscii(typeCode)) + // Handle short-form universal marker (FixStr range reused with signed excess slot). + if (BinaryTypeCode.IsFixStr(typeCode)) { - var length = BinaryTypeCode.DecodeFixStrAsciiLength(typeCode); - return length == 0 ? string.Empty : context.ReadAsciiBytesAsString(length); + return context.ReadUniversalFixStr(typeCode); } // H2Q6: non-ASCII short strings now use StringSmall tier (handled below via TypeReaderTable dispatch). @@ -1151,7 +1138,7 @@ public static partial class AcBinaryDeserializer return length == 0 ? string.Empty : context.ReadStringUtf8(length); } - // ReadStringSmall / Medium / Big / PlainStringAscii and ReadAndRegisterInternedStringSmall / Medium + // StringUtf16 / ReadUniversalLongString and ReadAndRegisterInternedStringSmall / Medium // (+ the cold ThrowCorruptedBigWire helper) all moved to BinaryDeserializationContext as instance // methods — single source of wire-decode shared by TypeReaderTable dispatch, PopulateProperty // cross-type path, and the SGen-emitted string-property switch. See @@ -1998,12 +1985,14 @@ public static partial class AcBinaryDeserializer if (BinaryTypeCode.IsTinyInt(typeCode)) return; - // Handle FixStrAscii (short ASCII strings — marker carries length, ASCII payload) - if (BinaryTypeCode.IsFixStrAscii(typeCode)) + // Handle FixStr (short universal strings — marker carries char length) + if (BinaryTypeCode.IsFixStr(typeCode)) { - var length = BinaryTypeCode.DecodeFixStrAsciiLength(typeCode); - if (length > 0) - context.Skip(length); + var charLength = BinaryTypeCode.DecodeFixStrLength(typeCode); + var excess = context.ReadByte(); + var byteLength = charLength + excess; + if (byteLength > 0) + context.Skip(byteLength); return; } // H2Q6: non-ASCII short strings now use StringSmall tier (handled in switch below). @@ -2052,32 +2041,37 @@ public static partial class AcBinaryDeserializer case BinaryTypeCode.Decimal: context.Skip(16); return; - case BinaryTypeCode.StringAscii: - // Skip layout: [VarUInt byteCount][bytes] - SkipPlainString(context); - return; - case BinaryTypeCode.StringSmall: - // H2Q6 Small tier: [charLen:8][utf8Len:8][bytes] — skip 2 byte header + utf8Len bytes + case BinaryTypeCode.String: + // Skip layout: [VarUInt charLength][unsigned excess slot][bytes] { - var header = context.ReadTwoBytesUnsafe(); - var utf8Len = (byte)(header >> 8); - if (utf8Len > 0) context.Skip(utf8Len); + var charLength = (int)context.ReadVarUInt(); + var slotSize = BinaryTypeCode.GetUniversalStringExcessSlotSize(charLength); + var excess = slotSize switch + { + 1 => context.ReadByte(), + 2 => context.ReadUInt16Unsafe(), + _ => context.ReadUInt32Unsafe() + }; + var byteLength = charLength + (int)excess; + if (byteLength > 0) context.Skip(byteLength); } return; - case BinaryTypeCode.StringMedium: - // H2Q6 Medium tier: [charLen:16][utf8Len:16][bytes] — single uint read + case BinaryTypeCode.StringUtf16: + // FastWire payload: [charLen:int32 LE][UTF-16 raw bytes]. + // For compact backward-compat payloads this marker may still carry legacy StringSmall shape; + // skip-path remains dual-mode based on context.FastWire. { - var packed = context.ReadUInt32Unsafe(); - var utf8Len = (int)(packed >> 16); - if (utf8Len > 0) context.Skip(utf8Len); - } - return; - case BinaryTypeCode.StringBig: - // H2Q6 Big tier: [charLen:32][utf8Len:32][bytes] — single ulong read - { - var packed = context.ReadUInt64Unsafe(); - var utf8Len = (int)(uint)(packed >> 32); - if (utf8Len > 0) context.Skip(utf8Len); + if (context.FastWire) + { + var charLen = context.ReadInt32Unsafe(); + if (charLen > 0) context.Skip(charLen * 2); + } + else + { + var header = context.ReadTwoBytesUnsafe(); + var utf8Len = (byte)(header >> 8); + if (utf8Len > 0) context.Skip(utf8Len); + } } return; case BinaryTypeCode.StringInterned: diff --git a/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs b/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs index df66d8d..2bec662 100644 --- a/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs +++ b/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs @@ -833,7 +833,8 @@ public static partial class AcBinarySerializer // // Span.CopyTo is overlap-safe via Buffer.Memmove on byte arrays. var charLength = value.Length; - var maxBytes = charLength * 4; + // Tight UTF-8 upper bound for valid UTF-16 input: max 3 bytes per UTF-16 code unit. + var maxBytes = charLength * 3; var reserveSize = VarUIntSize((uint)maxBytes); EnsureCapacity(reserveSize + maxBytes); @@ -856,127 +857,88 @@ public static partial class AcBinarySerializer } /// - /// Writes a non-empty string with marker-dispatch — emits the appropriate wire marker: - /// - /// ASCII ≤ 31 byte → FixStrAscii (1-byte header, length in marker) - /// ASCII > 31 byte → StringAscii (1+VarUInt header) - /// Non-ASCII utf8Len ≤ 255 → StringSmall (3-byte header: marker + charLen:8 + utf8Len:8) - /// Non-ASCII utf8Len ≤ 65535 → StringMedium (5-byte header: marker + charLen:16 + utf8Len:16) - /// Non-ASCII utf8Len > 65535 → StringBig (9-byte header: marker + charLen:32 + utf8Len:32) - /// + /// Writes a non-empty UTF-8 string in a shift-free layout with an unsigned excess slot. /// /// - /// H2Q6 wire format v3 — non-ASCII tiers carry both charLen and utf8Len in the header, - /// enabling 1-pass deserialize (no CountUtf8Chars Pass 1). Wire output is unchanged. + /// Header is fully determined before encode: + /// + /// charLength <= 31: [FixStr(marker carries charLength)][unsigned excess:1] + /// charLength > 31: [String][VarUInt(charLength)][unsigned excess:1|2|4] + /// + /// Body is UTF-8-encoded exactly once to the final destination (encodeStart) — no post-encode + /// body shift/copy. For the current path, excess = bytesWritten - charLength is expected to be + /// non-negative (ASCII=0, UTF-8=>0). UTF-16 signed-negative slot usage remains on the existing FastWire + /// path for now and is intentionally not activated in this method. /// - /// ASCII-predict, single encode pass. The body is UTF-8-encoded once with - /// Utf8.FromUtf16 straight onto the ASCII-optimistic offset savedPos + asciiHeader, - /// where asciiHeader is the EXACT header an all-ASCII string needs — FixStrAscii = 1 byte, - /// StringAscii = 1 + VarUInt(charLength) (ASCII ⇒ utf8Len == charLength, so the VarUInt - /// width is known pre-encode). bytesWritten == charLength ⇒ pure ASCII ⇒ the body is already - /// at its final offset → zero body-shift (the common case). A non-ASCII string needs the - /// larger 3/5/9 tier header, so shifts the body right by a few - /// bytes — the same single memcpy, moved off the common path onto the rare one. Never encodes twice. - /// - /// The prior design reserved the non-ASCII header (3/5/9) up-front and left-shifted the body - /// on every ASCII string — penalising the common case to spare the rare one. This reverses it. - /// - /// Caller MUST guarantee non-empty input (value.Length > 0) — empty strings are - /// handled by the higher-level WriteString via the StringEmpty marker. FastWire never - /// reaches here — callers take the markerless UTF-16 path via WriteStringUtf16Markerless first. + /// Caller MUST guarantee non-empty input (value.Length > 0) — empty strings are handled by + /// the higher-level WriteString via the StringEmpty marker. /// - // Hot/cold split (mirrors the reader-side TryReadStringProperty/TryReadStringColdPath, K9M3): the - // AggressiveInlining hot entry keeps the encode + the zero-shift ASCII header inline; the rarer - // non-ASCII tiers (Small/Medium/Big) — which need a body right-shift — move to the [NoInlining] - // WriteStringNonAsciiTail. WriteStringWithDispatch is the shared string-write chokepoint — SGen - // WriteProperties AND runtime WritePropertyOrSkip / TryWritePrimitive all funnel here. [MethodImpl(MethodImplOptions.AggressiveInlining)] public void WriteStringWithDispatch(string value) { var charLength = value.Length; #if DEBUG - // Overflow guard (O7G2) — predict-friendly (always false on realistic input). NoInlining throw helper. - if ((uint)charLength > BinaryTypeCode.MaxStringCharLength) ThrowStringTooLong(charLength); + System.Diagnostics.Debug.Assert(charLength > 0, "WriteStringWithDispatch expects non-empty string; empty is handled by StringEmpty marker in WriteString."); #endif - var maxBytes = charLength * 4; + // Overflow guard (O7G2) — predict-friendly (always false on realistic input). NoInlining throw helper. + if ((uint)charLength > BinaryTypeCode.MaxStringCharLength) ThrowStringTooLong(charLength); - // ASCII-optimistic reserve: the EXACT header an all-ASCII string needs (FixStrAscii = 1, - // StringAscii = 1 + VarUInt(charLength)). Capacity covers the non-ASCII Big-tier worst case - // (9-byte header) so the right-shift in WriteStringNonAsciiTail never re-grows. - var asciiHeader = charLength <= BinaryTypeCode.FixStrAsciiMaxLength ? 1 : 1 + VarUIntSize((uint)charLength); - EnsureCapacity(9 + maxBytes); + // Tight UTF-8 upper bound for valid UTF-16 input: max 3 bytes per UTF-16 code unit. + var maxBytes = charLength * 3; + var isFixStr = charLength <= BinaryTypeCode.FixStrMaxLength; + // IMPORTANT: the slot VALUE (excess) is not known before UTF-8 encode, but the slot SIZE is. + // We reserve the slot by width (1/2/4) from charLength, so encodeStart is final and no body shift is needed. + var slotSize = isFixStr ? 1 : BinaryTypeCode.GetUniversalStringExcessSlotSize(charLength); + var varUIntSize = isFixStr ? 0 : VarUIntSize((uint)charLength); + var headerSize = isFixStr ? 2 : 1 + varUIntSize + slotSize; - var encodeStart = _position + asciiHeader; + EnsureCapacity(headerSize + maxBytes); - // Single UTF-8 encode (handles ASCII and non-ASCII alike) onto the ASCII-optimistic offset. - System.Text.Unicode.Utf8.FromUtf16(value.AsSpan(), _buffer.AsSpan(encodeStart, maxBytes), out _, out var bytesWritten, replaceInvalidSequences: false); + var headerPos = _position; + var slotPos = isFixStr ? headerPos + 1 : headerPos + 1 + varUIntSize; + var encodeStart = headerPos + headerSize; - if (bytesWritten == charLength) + if (isFixStr) { - // Pure ASCII — body already at its final offset, header is exactly asciiHeader → zero shift. - if (asciiHeader == 1) - { - BufferAt(_position) = BinaryTypeCode.EncodeFixStrAscii(charLength); - } - else - { - BufferAt(_position) = BinaryTypeCode.StringAscii; - - _position++; - WriteVarUIntUnsafe((uint)charLength); // exactly fills [savedPos+1, encodeStart) - } - - _position = encodeStart + charLength; - return; + // Universal short-form string marker with unsigned excess slot. + BufferAt(headerPos) = BinaryTypeCode.EncodeFixStr(charLength); + } + else + { + // Universal long-form string marker + VarUInt(charLength) + unsigned excess slot. + BufferAt(headerPos) = BinaryTypeCode.String; + _position = headerPos + 1; + WriteVarUIntUnsafe((uint)charLength); } - switch (bytesWritten) - { - case <= 255: - { - // Small tier: 3-byte header [marker:1][charLen:8][utf8Len:8] - var shift = 3 - asciiHeader; - if (shift > 0) _buffer.AsSpan(encodeStart, bytesWritten).CopyTo(_buffer.AsSpan(encodeStart + shift, bytesWritten)); + var status = System.Text.Unicode.Utf8.FromUtf16(value.AsSpan(), _buffer.AsSpan(encodeStart, maxBytes), out _, out var bytesWritten, replaceInvalidSequences: false); + var excess = bytesWritten - charLength; - BufferAt(_position) = BinaryTypeCode.StringSmall; - Unsafe.WriteUnaligned(ref BufferAt(++_position), (ushort)(charLength | (bytesWritten << 8))); + if (status != OperationStatus.Done) ThrowStringEncodingFailed(status); - _position = _position + 2 + bytesWritten; - return; - } - case <= 65535: - { - // Medium tier: 5-byte header [marker:1][charLen:16][utf8Len:16] - var shift = 5 - asciiHeader; - if (shift > 0) _buffer.AsSpan(encodeStart, bytesWritten).CopyTo(_buffer.AsSpan(encodeStart + shift, bytesWritten)); +#if DEBUG + // With status==Done, UTF-8 path mathematically implies bytesWritten >= charLength. + System.Diagnostics.Debug.Assert(excess >= 0, "WriteStringWithDispatch invariant broken: UTF-8 path produced negative excess."); +#endif - BufferAt(_position) = BinaryTypeCode.StringMedium; - Unsafe.WriteUnaligned(ref BufferAt(++_position), (uint)charLength | ((uint)bytesWritten << 16)); + // UTF16 branch remains on the existing FastWire path for now. + // Current universal slot is unsigned (ASCII=0, UTF8>0). If UTF16-via-slot is introduced later, + // the discriminator design must be revisited (separate flag/marker or signed slot variant). - _position = _position + 4 + bytesWritten; - return; - } - default: - { - WriteStringBigTierColdPath(encodeStart, charLength, bytesWritten, 9 - asciiHeader); - return; - } - } + if (slotSize == 1) Unsafe.WriteUnaligned(ref BufferAt(slotPos), unchecked((byte)excess)); + else if (slotSize == 2) Unsafe.WriteUnaligned(ref BufferAt(slotPos), unchecked((ushort)excess)); + else Unsafe.WriteUnaligned(ref BufferAt(slotPos), excess); + + _position = encodeStart + bytesWritten; } [MethodImpl(MethodImplOptions.NoInlining)] - private void WriteStringBigTierColdPath(int encodeStart, int charLength, int bytesWritten, int shift) - { - // Big tier: 9-byte header [marker:1][charLen:32][utf8Len:32] - if (shift > 0) _buffer.AsSpan(encodeStart, bytesWritten).CopyTo(_buffer.AsSpan(encodeStart + shift, bytesWritten)); - - BufferAt(_position) = BinaryTypeCode.StringBig; - Unsafe.WriteUnaligned(ref BufferAt(++_position), (uint)charLength | ((ulong)(uint)bytesWritten << 32)); - - _position = _position + 8 + bytesWritten; - } + private static void ThrowStringEncodingFailed(OperationStatus status) => + throw new InvalidOperationException( + $"String UTF-8 encode failed in WriteStringWithDispatch: status={status}. " + + "This indicates an unexpected encoder failure (e.g. destination sizing or invalid input state)." ); /// /// Writes the first-occurrence body of an interned string with H2Q6 tier-marker dispatch. diff --git a/AyCode.Core/Serializers/Binaries/ArrayBinaryOutput.cs b/AyCode.Core/Serializers/Binaries/ArrayBinaryOutput.cs index 9a81058..e2afbee 100644 --- a/AyCode.Core/Serializers/Binaries/ArrayBinaryOutput.cs +++ b/AyCode.Core/Serializers/Binaries/ArrayBinaryOutput.cs @@ -17,7 +17,7 @@ public struct ArrayBinaryOutput : IBinaryOutputBase, IDisposable private const int MaxKeepBufferSize = 32 * 1024; // 32KB — below this, keep for reuse private readonly int _initialCapacity; - private byte[] _rentedBuffer; + private byte[]? _rentedBuffer; public ArrayBinaryOutput(int initialCapacity = 65535) { @@ -75,13 +75,13 @@ public struct ArrayBinaryOutput : IBinaryOutputBase, IDisposable return result; } - /// Copies the written data to an IBufferWriter (single memcpy). - public void WriteTo(IBufferWriter writer, byte[] buffer, int position) - { - var span = writer.GetSpan(position); - buffer.AsSpan(0, position).CopyTo(span); - writer.Advance(position); - } + ///// Copies the written data to an IBufferWriter (single memcpy). + //public void WriteTo(IBufferWriter writer, byte[] buffer, int position) + //{ + // var span = writer.GetSpan(position); + // buffer.AsSpan(0, position).CopyTo(span); + // writer.Advance(position); + //} //TODO: miért nem static a DetachResult? /// diff --git a/AyCode.Core/Serializers/Binaries/BinaryTypeCode.cs b/AyCode.Core/Serializers/Binaries/BinaryTypeCode.cs index acfe249..6ea4f15 100644 --- a/AyCode.Core/Serializers/Binaries/BinaryTypeCode.cs +++ b/AyCode.Core/Serializers/Binaries/BinaryTypeCode.cs @@ -64,29 +64,19 @@ internal static class BinaryTypeCode public const byte Char = SlotCount + 26; // 90 // ============================================================================ - // String types — H2Q6 layout (post 2026-05-06 marker reorg, wire format v3) + // String types // ============================================================================ // - // Non-ASCII strings use fixed-width header tier markers (NO VarUInt utf8Len), - // enabling 1-pass decode (CountUtf8Chars Pass 1 eliminated). + // Marker 91 is reserved for FastWire UTF-16 payloads: + // [StringUtf16][charLen:int32 LE][UTF-16 raw bytes] // - // Tier dispatch (writer chooses smallest fitting tier based on utf8Len): - // StringSmall — utf8Len ≤ 255 — header: 1 marker + 1 charLen + 1 utf8Len = 3 byte - // StringMedium — utf8Len ≤ 65535 — header: 1 marker + 2 charLen + 2 utf8Len = 5 byte - // StringBig — utf8Len > 65535 — header: 1 marker + 4 charLen + 4 utf8Len = 9 byte + // Universal compact-mode strings use FixStr (135..166) + String (167): + // [FixStr] / [String][VarUInt charLen][unsigned excess slot][UTF-8 bytes] // - // Interning tiers (writer chooses based on utf8Len; Big never engages — MaxStringInternLength - // is byte-typed (max 255 char × max 4 byte/char = 1020 byte fits in Medium): - // StringInternFirstSmall — utf8Len ≤ 255 — header: 1 + cacheIdx-VarUInt + 1 + 1 - // StringInternFirstMedium — utf8Len ≤ 65535 — header: 1 + cacheIdx-VarUInt + 2 + 2 - // - // ASCII strings continue to use FixStrAscii (135..166) and StringAscii (167) — unchanged from M3R7. - // - // String types (SlotCount + 27..30) - public const byte StringSmall = SlotCount + 27; // 91 — Non-ASCII tier 1: [marker:1][charLen:8][utf8Len:8][bytes], utf8Len ≤ 255 - public const byte StringInterned = SlotCount + 28; // 92 — Reference to interned string by index (2+ occurrence) — UNCHANGED - public const byte StringEmpty = SlotCount + 29; // 93 — Empty string marker — UNCHANGED - public const byte StringMedium = SlotCount + 30; // 94 — Non-ASCII tier 2: [marker:1][charLen:16][utf8Len:16][bytes], utf8Len ≤ 65535 + // Interning tiers keep dedicated markers. + public const byte StringUtf16 = SlotCount + 27; // 91 — FastWire UTF-16 marker payload + public const byte StringInterned = SlotCount + 28; // 92 — Reference to interned string by index (2+ occurrence) + public const byte StringEmpty = SlotCount + 29; // 93 — Empty string marker // Date/Time types (SlotCount + 31..34) public const byte DateTime = SlotCount + 31; // 95 @@ -112,8 +102,7 @@ internal static class BinaryTypeCode // FixStr (non-ASCII) markers REMOVED in H2Q6 — non-ASCII strings now use Small/Medium/Big tiers // for 1-pass decode (eliminated CountUtf8Chars Pass 1). // - // CURRENT ALLOCATION (5 of 32 used): - public const byte StringBig = SlotCount + 39; // 103 — Non-ASCII tier 3: [marker:1][charLen:32][utf8Len:32][bytes], utf8Len > 65535 + // CURRENT ALLOCATION: public const byte StringInternFirstSmall = SlotCount + 40; // 104 — Interning tier 1: [marker:1][cacheIdx:VarUInt][charLen:8][utf8Len:8][bytes] public const byte StringInternFirstMedium = SlotCount + 41; // 105 — Interning tier 2: [marker:1][cacheIdx:VarUInt][charLen:16][utf8Len:16][bytes] @@ -129,20 +118,21 @@ internal static class BinaryTypeCode public const byte ReservedRangeMin = SlotCount + 42; // 106 — first reserved value (post-H2Q6 future-feature range) public const byte ReservedRangeMax = SlotCount + 70; // 134 — last reserved value - // FixStrAscii range (ASCII-only short strings): 135..166 (32 values for byte lengths 0-31) - // FixStrAscii encoding: FixStrAsciiBase + byteLength - // Content semantics: pure ASCII bytes (every byte < 0x80). Reader can use byte→char widening - // without UTF-8 decode or ASCII validation — the marker itself is the validation contract. - // Writer emits this when it can prove the content is ASCII (e.g., GetBytes returns byteCount == charLength). - public const byte FixStrAsciiBase = SlotCount + 71; // 135 - public const byte FixStrAsciiMax = FixStrAsciiBase + 31; // 166 - public const int FixStrAsciiMaxLength = 31; + // FixStr range (short universal string marker): 135..166 (32 values for char lengths 0-31) + // Encoding: FixStrBase + charLength + public const byte FixStrBase = SlotCount + 71; // 135 + public const byte FixStrMax = FixStrBase + 31; // 166 + public const int FixStrMaxLength = 31; + // Backward-compatible aliases (old naming) + public const byte FixStrAsciiBase = FixStrBase; + public const byte FixStrAsciiMax = FixStrMax; + public const int FixStrAsciiMaxLength = FixStrMaxLength; - // Long ASCII string marker: 167 - // Layout: [StringAscii] [VarUInt byteCount] [ASCII bytes] - // Counterpart to StringSmall/Medium/Big — but ASCII content (charLen == byteCount, no UTF-8 decode). - // Reader fast-widens via byte→char without UTF-8 decode or IsValid scan. - public const byte StringAscii = SlotCount + 103; // 167 + // Long universal string marker: 167 + // Layout: [String] [VarUInt charLength] [excess slot] [bytes] + public const byte String = SlotCount + 103; // 167 + // Backward-compatible alias (old naming) + public const byte StringAscii = String; // Reserved slot block: 168..175 (8 slots) for future string-related markers // (e.g., StringCompressed, StringEncoded, StringMixedAscii, etc.). Keeping the 135..167 range @@ -191,17 +181,18 @@ internal static class BinaryTypeCode /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public static bool IsString(byte typeCode) - => (typeCode is >= StringSmall and <= StringMedium) // 91..94: StringSmall, StringInterned, StringEmpty, StringMedium - || (typeCode is >= StringBig and <= StringInternFirstMedium) // 103..105: StringBig, StringInternFirstSmall, StringInternFirstMedium - || (typeCode is >= FixStrAsciiBase and <= StringAscii); // 135..167: FixStrAscii + StringAscii + => typeCode == StringUtf16 + || typeCode == StringInterned + || typeCode == StringEmpty + || typeCode == StringInternFirstSmall + || typeCode == StringInternFirstMedium + || (typeCode is >= FixStrBase and <= String); // 135..167: FixStr + String /// - /// Check if type code is one of the H2Q6 non-ASCII string tier markers (StringSmall / StringMedium / StringBig). - /// Excludes interning tier markers (use ) and ASCII markers (use ). + /// Check if type code is the FastWire UTF-16 string marker. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static bool IsStringTier(byte typeCode) - => typeCode == StringSmall || typeCode == StringMedium || typeCode == StringBig; + public static bool IsStringUtf16(byte typeCode) => typeCode == StringUtf16; /// /// Check if type code is a H2Q6 interning first-occurrence tier marker (StringInternFirstSmall / Medium). @@ -212,36 +203,69 @@ internal static class BinaryTypeCode => typeCode == StringInternFirstSmall || typeCode == StringInternFirstMedium; /// - /// Check if type code is any ASCII string marker — FixStrAscii (short) or StringAscii (long). + /// Check if type code is any universal string marker — FixStr (short) or String (long). /// Single contiguous range (135..167) for branch-friendly dispatch on the reader hot path. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static bool IsAsciiString(byte typeCode) => typeCode is >= FixStrAsciiBase and <= StringAscii; + public static bool IsStringUniversalMarker(byte typeCode) => typeCode is >= FixStrBase and <= String; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static bool IsAsciiString(byte typeCode) => IsStringUniversalMarker(typeCode); /// - /// Check if type code is a FixStrAscii (ASCII short string with byte length encoded in type code). + /// Check if type code is a FixStr (short universal marker with char length encoded in type code). /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static bool IsFixStrAscii(byte typeCode) => typeCode is >= FixStrAsciiBase and <= FixStrAsciiMax; + public static bool IsFixStr(byte typeCode) => typeCode is >= FixStrBase and <= FixStrMax; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static bool IsFixStrAscii(byte typeCode) => IsFixStr(typeCode); /// - /// Decode FixStrAscii byte length from type code. Length is also the char count (1 byte = 1 char for ASCII). + /// Decode FixStr char length from type code. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static int DecodeFixStrAsciiLength(byte typeCode) => typeCode - FixStrAsciiBase; + public static int DecodeFixStrLength(byte typeCode) => typeCode - FixStrBase; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int DecodeFixStrAsciiLength(byte typeCode) => DecodeFixStrLength(typeCode); /// - /// Encode FixStrAscii type code for given byte length (0-31). Caller asserts ASCII content semantics - /// (every byte less than 0x80). Misuse on non-ASCII content corrupts decode. + /// Encode FixStr type code for given char length (0-31). /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static byte EncodeFixStrAscii(int byteLength) => (byte)(FixStrAsciiBase + byteLength); + public static byte EncodeFixStr(int charLength) => (byte)(FixStrBase + charLength); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static byte EncodeFixStrAscii(int charLength) => EncodeFixStr(charLength); /// - /// Check if byte length can be encoded as FixStrAscii (ASCII short string, 0..31 bytes). + /// Check if char length can be encoded as FixStr (short string, 0..31 chars). /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static bool CanEncodeAsFixStrAscii(int byteLength) => byteLength is >= 0 and <= 31; + public static bool CanEncodeAsFixStr(int charLength) => charLength is >= 0 and <= 31; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static bool CanEncodeAsFixStrAscii(int charLength) => CanEncodeAsFixStr(charLength); + + /// + /// Universal string excess-slot width selector shared by writer and reader. + /// + /// Current wire contract uses UNSIGNED excess (ASCII=0, UTF8>0), so the slot thresholds are: + /// 1-byte: max excess 255 => charLength <= 127 (worst-case excess = 2 * charLength) + /// 2-byte: max excess 65535 => charLength <= 32767 + /// 4-byte: fallback + /// + /// IMPORTANT: if the protocol switches back to SIGNED excess (e.g., UTF16 discriminator via negative + /// values), these thresholds MUST be reduced here as well (typically 63 / 16383 / 4-byte fallback). + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int GetUniversalStringExcessSlotSize(int charLength) + { + if (charLength <= 127) return 1; + if (charLength <= 32767) return 2; + return 4; + } /// /// Check if type code is a tiny int (single byte int32 encoding). diff --git a/AyCode.Core/docs/BINARY/BINARY_ISSUES.md b/AyCode.Core/docs/BINARY/BINARY_ISSUES.md index c0e939c..5842634 100644 --- a/AyCode.Core/docs/BINARY/BINARY_ISSUES.md +++ b/AyCode.Core/docs/BINARY/BINARY_ISSUES.md @@ -156,6 +156,52 @@ Assigning a `BufferWriterBinaryOutput` value creates an independent copy. State A single instance must not use context + standalone modes simultaneously — buffer states desynchronize. One mode per lifecycle phase; `FlushAndReset()` as boundary between modes. +### ACCORE-BIN-I-Q4P7: ArrayBinaryOutput.DetachResult ownership transfer missing → pooled buffer double-return + +**Status:** Open · **Severity:** Critical (latent — silent cross-talk corruption) +**Affects:** `ArrayBinaryOutput.DetachResult`, `ArrayBinaryOutput.Reset`, `ArrayBinaryOutput.Dispose` + +`DetachResult` returns `new BinarySerializationResult(resultBuffer, ..., pooled: true)`, which transfers buffer ownership to the result object (caller disposes result → buffer returned to `ArrayPool`). But `_rentedBuffer` keeps referencing the same array after detach. Later `Reset` (large-buffer branch) or `Dispose` returns `_rentedBuffer` again, causing a double-return of the same array to `ArrayPool.Shared`. + +**Impact:** Silent, intermittent data corruption. ArrayPool can hand out the same physical array to multiple renters after double-return, enabling cross-talk between unrelated serialization operations. + +**Why this is active in default config:** ctor default `initialCapacity=65535`, while `MaxKeepBufferSize=32KB`; the detached default buffer is considered "large", so `Reset` naturally enters the return-to-pool path. + +**Fix direction:** Treat `DetachResult` as a strict ownership transfer boundary. After detach, `_rentedBuffer` must no longer point to the detached array. Possible implementation variants: +- **Eager replacement:** rent a replacement buffer immediately in `DetachResult`. +- **Lazy replacement:** set `_rentedBuffer = null` in `DetachResult`, and perform lazy-rent in `Initialize` (`_rentedBuffer ??= ArrayPool.Shared.Rent(_initialCapacity)`). + +Lazy replacement avoids redundant rent/return churn when `DetachResult` is followed by `Reset` or `Dispose`, while preserving single-owner semantics (the detached `BinarySerializationResult` remains the only owner until it calls `Return` in `Dispose`). Ensure exactly one return path per rented array. + +### ACCORE-BIN-I-R2D6: ArrayBinaryOutput.Reset may set `_rentedBuffer` to null while `Initialize` assumes non-null + +**Status:** Open +**Affects:** `ArrayBinaryOutput.Reset`, `ArrayBinaryOutput.Initialize` + +`Reset` currently does: + +`_rentedBuffer = nextCapacity == _initialCapacity ? null : ArrayPool.Shared.Rent(nextCapacity);` + +So `_rentedBuffer` can become null after returning a large buffer, while `Initialize` unconditionally reads `_rentedBuffer` and `_rentedBuffer.Length`. + +**Impact:** Deterministic `NullReferenceException` on the next initialization path when the null branch is taken. + +**Fix direction:** Keep `_rentedBuffer` always non-null by renting `_initialCapacity` in that branch, or add lazy-rent null handling in `Initialize` before dereference. + +### ACCORE-BIN-I-V8N4: BinarySerializationResult accessors remain usable after Dispose → pooled-buffer use-after-return + +**Status:** Open · **Severity:** Critical (latent — silent data corruption) +**Affects:** `BinarySerializationResult.Buffer`, `BinarySerializationResult.Span`, `BinarySerializationResult.Memory`, `BinarySerializationResult.Dispose` + +`BinarySerializationResult.Dispose` returns the underlying array to `ArrayPool` when `pooled=true`, but public accessors (`Buffer` / `Span` / `Memory`) remain callable without a disposed guard. After dispose, the same array may already be re-rented and mutated by unrelated operations; reading the old result then becomes use-after-return on pooled memory. + +**Impact:** Silent, non-deterministic cross-talk corruption. Consumers may observe stale/foreign bytes through `Span` / `Memory` / `Buffer` with no exception signal. + +**Possible fix directions:** +- Add `_disposed` guard to all accessors (`ObjectDisposedException` after dispose). +- Optionally scrub/neutralize post-dispose state (e.g., replace exposed buffer reference with `Array.Empty()`) to reduce accidental reuse risk. +- Clarify API ownership contract in docs: disposed result is terminal and must not be accessed. + ## Configuration / Options ### ACCORE-BIN-I-L8N5: AcBinarySerializerOptions thread-safety — mutable properties on shared instances