From 853aa23e37adb3ebcc8fea75ba1e307f9886ffa7 Mon Sep 17 00:00:00 2001 From: Loretta Date: Fri, 15 May 2026 10:43:49 +0200 Subject: [PATCH] Refactor string deserialization logic to context methods Moved StringSmall/Medium/Big/Ascii readers from static helpers in AcBinaryDeserializer to instance methods on BinaryDeserializationContext. Updated all call sites (runtime, SGen, type reader table) to use the new methods. Improved documentation, clarified wire format handling, and added a corrupted-wire guard for StringBig. Removes duplication and centralizes string wire-decode logic. --- .../AcBinarySourceGenerator.cs | 46 ++------ ...lizer.BinaryDeserializationContext.Read.cs | 82 ++++++++++++++ .../Binaries/AcBinaryDeserializer.cs | 103 ++---------------- 3 files changed, 99 insertions(+), 132 deletions(-) diff --git a/AyCode.Core.Serializers.SourceGenerator/AcBinarySourceGenerator.cs b/AyCode.Core.Serializers.SourceGenerator/AcBinarySourceGenerator.cs index 1cb0719..78c77a2 100644 --- a/AyCode.Core.Serializers.SourceGenerator/AcBinarySourceGenerator.cs +++ b/AyCode.Core.Serializers.SourceGenerator/AcBinarySourceGenerator.cs @@ -1963,53 +1963,21 @@ public class AcBinarySourceGenerator : IIncrementalGenerator sb.AppendLine($"{i} case BinaryTypeCode.StringInterned:"); sb.AppendLine($"{i} {a} = context.GetInternedString((int)context.ReadVarUInt());"); sb.AppendLine($"{i} break;"); - // H2Q6 StringSmall — non-ASCII utf8Len ≤ 255 — wire: [charLen:8][utf8Len:8][bytes], 1-pass decode. - // FastWire mode shares the marker value (=91); reader dispatches by mode. + // H2Q6 string-tier markers + StringAscii + interning tiers. Wire-decode body is shared with + // the runtime path (TypeReaderTable + cross-type populate) — see context.ReadStringSmall/Medium/Big, + // ReadPlainStringAscii, ReadAndRegisterInternedStringSmall/Medium. sb.AppendLine($"{i} case BinaryTypeCode.StringSmall:"); - sb.AppendLine($"{i} {{"); - sb.AppendLine($"{i} if (context.FastWire)"); - sb.AppendLine($"{i} {{"); - sb.AppendLine($"{i} // Collection/dictionary element strings: markered FastWire body — int32 charLen + UTF-16 bytes."); - sb.AppendLine($"{i} // (Property-level strings take a separate markerless path in EmitReadProp; this case handles"); - sb.AppendLine($"{i} // the markered StringSmall variant emitted by WriteStringWithDispatch from collection/runtime paths.)"); - sb.AppendLine($"{i} var fwlen = context.ReadInt32Unsafe();"); - sb.AppendLine($"{i} {a} = context.ReadStringUtf16(fwlen);"); - sb.AppendLine($"{i} }}"); - sb.AppendLine($"{i} else"); - sb.AppendLine($"{i} {{"); - sb.AppendLine($"{i} var sshdr = context.ReadTwoBytesUnsafe();"); - sb.AppendLine($"{i} var sscharLen = (byte)sshdr;"); - sb.AppendLine($"{i} var ssbyteLen = (byte)(sshdr >> 8);"); - sb.AppendLine($"{i} {a} = ssbyteLen == 0 ? string.Empty : context.ReadStringUtf8WithCharLen(sscharLen, ssbyteLen);"); - sb.AppendLine($"{i} }}"); + sb.AppendLine($"{i} {a} = context.ReadStringSmall();"); sb.AppendLine($"{i} break;"); - sb.AppendLine($"{i} }}"); - // H2Q6 StringMedium — utf8Len ≤ 65535 — single uint read packs charLen:16 + utf8Len:16 sb.AppendLine($"{i} case BinaryTypeCode.StringMedium:"); - sb.AppendLine($"{i} {{"); - sb.AppendLine($"{i} var smpacked = context.ReadUInt32Unsafe();"); - sb.AppendLine($"{i} var smcharLen = (ushort)smpacked;"); - sb.AppendLine($"{i} var smbyteLen = (ushort)(smpacked >> 16);"); - sb.AppendLine($"{i} {a} = smbyteLen == 0 ? string.Empty : context.ReadStringUtf8WithCharLen(smcharLen, smbyteLen);"); + sb.AppendLine($"{i} {a} = context.ReadStringMedium();"); sb.AppendLine($"{i} break;"); - sb.AppendLine($"{i} }}"); - // H2Q6 StringBig — utf8Len > 65535 — single ulong read packs charLen:32 + utf8Len:32 sb.AppendLine($"{i} case BinaryTypeCode.StringBig:"); - sb.AppendLine($"{i} {{"); - sb.AppendLine($"{i} var sbpacked = context.ReadUInt64Unsafe();"); - sb.AppendLine($"{i} var sbcharLen = (int)(uint)sbpacked;"); - sb.AppendLine($"{i} var sbbyteLen = (int)(uint)(sbpacked >> 32);"); - sb.AppendLine($"{i} {a} = sbbyteLen == 0 ? string.Empty : context.ReadStringUtf8WithCharLen(sbcharLen, sbbyteLen);"); + sb.AppendLine($"{i} {a} = context.ReadStringBig();"); sb.AppendLine($"{i} break;"); - sb.AppendLine($"{i} }}"); sb.AppendLine($"{i} case BinaryTypeCode.StringAscii:"); - sb.AppendLine($"{i} {{"); - sb.AppendLine($"{i} var salen = (int)context.ReadVarUInt();"); - sb.AppendLine($"{i} {a} = salen == 0 ? string.Empty : context.ReadAsciiBytesAsString(salen);"); + sb.AppendLine($"{i} {a} = context.ReadPlainStringAscii();"); sb.AppendLine($"{i} break;"); - sb.AppendLine($"{i} }}"); - // H2Q6 interning — Small / Medium tiers. Wire-decode body is shared with the runtime path - // (TypeReaderTable + cross-type populate) — see context.ReadAndRegisterInternedStringSmall/Medium. sb.AppendLine($"{i} case BinaryTypeCode.StringInternFirstSmall:"); sb.AppendLine($"{i} {a} = context.ReadAndRegisterInternedStringSmall();"); sb.AppendLine($"{i} break;"); diff --git a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.Read.cs b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.Read.cs index 93752a3..d831c78 100644 --- a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.Read.cs +++ b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.Read.cs @@ -619,6 +619,88 @@ public static partial class AcBinaryDeserializer return value; } + /// + /// H2Q6 StringSmall reader (Compact mode): wire [charLen:8][utf8Len:8][UTF-8 bytes] after the + /// marker has been consumed. 1-pass decode (no CountUtf8Chars). FastWire mode reuses the same + /// marker value (=91) but a different layout — [charLen:int32 LE][UTF-16 raw bytes]; this method + /// dispatches by FastWire flag. Single source of wire-decode shared by runtime TypeReaderTable, + /// cross-type populate, AND SGen-emit. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal string ReadStringSmall() + { + if (FastWire) + { + // Mode-shared marker: FastWire payload is [charLen:int32 LE][UTF-16 raw bytes]. + // Fix-int charLen (matches MemPack WriteUtf16 shape) — single 4-byte read, no VarUInt loop. + var charLenF = ReadInt32Unsafe(); + return ReadStringUtf16(charLenF); + } + + // Compact mode — H2Q6 StringSmall: [charLen:8][utf8Len:8][bytes] + var header = ReadTwoBytesUnsafe(); + var charLength = (byte)header; + var byteLength = (byte)(header >> 8); + return ReadStringUtf8WithCharLen(charLength, byteLength); + } + + /// + /// H2Q6 StringMedium reader: wire [charLen:16 LE][utf8Len:16 LE][UTF-8 bytes] after the marker + /// has been consumed. 1-pass decode. Header read in a single uint load (vs 2 ushort loads). Shared + /// by runtime dispatch + SGen-emit. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal string ReadStringMedium() + { + var packed = ReadUInt32Unsafe(); + var charLength = (ushort)packed; + var byteLength = (ushort)(packed >> 16); + return ReadStringUtf8WithCharLen(charLength, byteLength); + } + + /// + /// H2Q6 StringBig reader: wire [charLen:32 LE][utf8Len:32 LE][UTF-8 bytes] after the marker + /// has been consumed. 1-pass decode. Header read in a single ulong load (vs 2 uint loads). Includes + /// a corrupted-wire guard for negative casts from uint values > Int32.MaxValue. Shared by + /// runtime dispatch + SGen-emit. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal string ReadStringBig() + { + var packed = ReadUInt64Unsafe(); + var charLength = (int)(uint)packed; + var byteLength = (int)(uint)(packed >> 32); + // Single bitwise-OR + sign-test catches negative casts from corrupted-wire uint values + // (when the wire-side uint > Int32.MaxValue, the (int)(uint) cast yields a negative int). + // Predict-friendly: always false on a valid wire. + if ((charLength | byteLength) < 0) ThrowCorruptedBigWire(charLength, byteLength); + return ReadStringUtf8WithCharLen(charLength, byteLength); + } + + /// + /// Throw helper for the corrupted-wire guard in . NoInlining + /// keeps the hot-path reader compact — the JIT/AOT lifts the throw-site out of the inlined caller body. + /// + [MethodImpl(MethodImplOptions.NoInlining)] + private void ThrowCorruptedBigWire(int charLength, int byteLength) => + throw new AcBinaryDeserializationException( + $"Wire format corruption: StringBig header has out-of-range length values (charLength={charLength}, byteLength={byteLength}). " + + $"This indicates a corrupted or maliciously-crafted payload — uint wire values larger than Int32.MaxValue produce negative ints when cast.", + -1); + + /// + /// Reads a long ASCII string payload (after the StringAscii marker has been consumed). + /// Wire format: [VarUInt byteCount][ASCII bytes]. Byte→char widen, no UTF-8 decode. Shared + /// by runtime dispatch + SGen-emit. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal string ReadPlainStringAscii() + { + var length = (int)ReadVarUInt(); + if (length == 0) return string.Empty; + return ReadAsciiBytesAsString(length); + } + /// /// H2Q6 StringInternFirstSmall reader: wire [cacheIdx:VarUInt][charLen:8][utf8Len:8][bytes] /// after the marker has been consumed. Registers the decoded string in the intern cache and returns it. diff --git a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs index 501f86a..4c3e66b 100644 --- a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs +++ b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs @@ -99,15 +99,15 @@ public static partial class AcBinaryDeserializer readers[BinaryTypeCode.Char] = static (ctx, _) => ctx.ReadCharUnsafe(); // H2Q6 non-ASCII tier readers (Compact mode): fixed-width header [charLen][utf8Len] + 1-pass decode. // FastWire mode dispatches the StringSmall (=91) marker through the same handler — see ReadStringSmall. - readers[BinaryTypeCode.StringSmall] = static (ctx, _) => ReadStringSmall(ctx); - readers[BinaryTypeCode.StringMedium] = static (ctx, _) => ReadStringMedium(ctx); - readers[BinaryTypeCode.StringBig] = static (ctx, _) => ReadStringBig(ctx); + readers[BinaryTypeCode.StringSmall] = static (ctx, _) => ctx.ReadStringSmall(); + readers[BinaryTypeCode.StringMedium] = static (ctx, _) => ctx.ReadStringMedium(); + readers[BinaryTypeCode.StringBig] = static (ctx, _) => ctx.ReadStringBig(); readers[BinaryTypeCode.StringInterned] = static (ctx, _) => ctx.GetInternedString((int)ctx.ReadVarUInt()); readers[BinaryTypeCode.StringEmpty] = static (_, _) => string.Empty; // H2Q6 interning tier readers (Compact mode only — Big tier never engages on interning path) readers[BinaryTypeCode.StringInternFirstSmall] = static (ctx, _) => ctx.ReadAndRegisterInternedStringSmall(); readers[BinaryTypeCode.StringInternFirstMedium] = static (ctx, _) => ctx.ReadAndRegisterInternedStringMedium(); - readers[BinaryTypeCode.StringAscii] = static (ctx, _) => ReadPlainStringAscii(ctx); + readers[BinaryTypeCode.StringAscii] = static (ctx, _) => ctx.ReadPlainStringAscii(); readers[BinaryTypeCode.DateTime] = static (ctx, _) => ctx.ReadDateTimeUnsafe(); readers[BinaryTypeCode.DateTimeOffset] = static (ctx, _) => ctx.ReadDateTimeOffsetUnsafe(); readers[BinaryTypeCode.TimeSpan] = static (ctx, _) => ctx.ReadTimeSpanUnsafe(); @@ -1049,16 +1049,16 @@ public static partial class AcBinaryDeserializer switch (typeCode) { case BinaryTypeCode.StringSmall: - propInfo.SetValue(target, ReadStringSmall(context)); + propInfo.SetValue(target, context.ReadStringSmall()); return true; case BinaryTypeCode.StringMedium: - propInfo.SetValue(target, ReadStringMedium(context)); + propInfo.SetValue(target, context.ReadStringMedium()); return true; case BinaryTypeCode.StringBig: - propInfo.SetValue(target, ReadStringBig(context)); + propInfo.SetValue(target, context.ReadStringBig()); return true; case BinaryTypeCode.StringAscii: - propInfo.SetValue(target, ReadPlainStringAscii(context)); + propInfo.SetValue(target, context.ReadPlainStringAscii()); return true; case BinaryTypeCode.StringEmpty: propInfo.SetValue(target, string.Empty); @@ -1155,91 +1155,8 @@ public static partial class AcBinaryDeserializer return context.ReadStringUtf8(length); } - /// - /// H2Q6 StringSmall reader (Compact mode): wire [charLen:8][utf8Len:8][UTF-8 bytes] after the - /// marker has been consumed. 1-pass decode (no CountUtf8Chars). FastWire mode uses the same - /// marker (=91) but a different layout — handled via - /// when the deserializer is in FastWire mode. - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static string ReadStringSmall(BinaryDeserializationContext context) - where TInput : struct, IBinaryInputBase - { - if (context.FastWire) - { - // Mode-shared marker: FastWire payload is [charLen:int32 LE][UTF-16 raw bytes]. - // Fix-int charLen (matches MemPack WriteUtf16 shape) — single 4-byte read, no VarUInt loop. - // Path used by collection/dictionary element string reads (markered) and runtime path. - // SGen property-level strings take the markerless EmitReadProp path which calls - // `ReadStringUtf16` directly, bypassing the `ReadStringSmall` marker dispatch. - var charLenF = context.ReadInt32Unsafe(); - return context.ReadStringUtf16(charLenF); - } - - // Compact mode — H2Q6 StringSmall: [charLen:8][utf8Len:8][bytes] - var header = context.ReadTwoBytesUnsafe(); - var charLength = (byte)header; - var byteLength = (byte)(header >> 8); - return context.ReadStringUtf8WithCharLen(charLength, byteLength); - } - - /// - /// H2Q6 StringMedium reader: wire [charLen:16 LE][utf8Len:16 LE][UTF-8 bytes]. 1-pass decode. - /// Header read in a single uint load (vs 2 ushort loads). - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static string ReadStringMedium(BinaryDeserializationContext context) - where TInput : struct, IBinaryInputBase - { - var packed = context.ReadUInt32Unsafe(); - var charLength = (ushort)packed; - var byteLength = (ushort)(packed >> 16); - return context.ReadStringUtf8WithCharLen(charLength, byteLength); - } - - /// - /// H2Q6 StringBig reader: wire [charLen:32 LE][utf8Len:32 LE][UTF-8 bytes]. 1-pass decode. - /// Header read in a single ulong load (vs 2 uint loads). - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static string ReadStringBig(BinaryDeserializationContext context) - where TInput : struct, IBinaryInputBase - { - var packed = context.ReadUInt64Unsafe(); - var charLength = (int)(uint)packed; - var byteLength = (int)(uint)(packed >> 32); - // Single bitwise-OR + sign-test catches negative casts from corrupted-wire uint values - // (when the wire-side uint > Int32.MaxValue, the (int)(uint) cast yields a negative int). - // Predict-friendly: always false on a valid wire. - if ((charLength | byteLength) < 0) ThrowCorruptedBigWire(charLength, byteLength); - return context.ReadStringUtf8WithCharLen(charLength, byteLength); - } - - /// - /// Throw helper for the corrupted-wire guard in . NoInlining - /// keeps the hot-path reader compact — the JIT/AOT lifts the throw-site out of the inlined caller body. - /// - [MethodImpl(MethodImplOptions.NoInlining)] - private static void ThrowCorruptedBigWire(int charLength, int byteLength) => - throw new AcBinaryDeserializationException( - $"Wire format corruption: StringBig header has out-of-range length values (charLength={charLength}, byteLength={byteLength}). " + - $"This indicates a corrupted or maliciously-crafted payload — uint wire values larger than Int32.MaxValue produce negative ints when cast.", - -1); - - /// - /// Reads a long ASCII string payload (after the StringAscii marker has been consumed). - /// Wire format: [VarUInt byteCount][ASCII bytes]. Byte→char widen, no UTF-8 decode. - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static string ReadPlainStringAscii(BinaryDeserializationContext context) - where TInput : struct, IBinaryInputBase - { - var length = (int)context.ReadVarUInt(); - if (length == 0) return string.Empty; - return context.ReadAsciiBytesAsString(length); - } - - // ReadAndRegisterInternedStringSmall / Medium moved to BinaryDeserializationContext as instance + // ReadStringSmall / Medium / Big / PlainStringAscii and ReadAndRegisterInternedStringSmall / Medium + // (+ the cold ThrowCorruptedBigWire helper) all moved to BinaryDeserializationContext as instance // methods — single source of wire-decode shared by TypeReaderTable dispatch, PopulateProperty // cross-type path, and the SGen-emitted string-property switch. See // `BinaryDeserializationContext.Read.cs` for the implementations.