diff --git a/AyCode.Core.Serializers.SourceGenerator/AcBinarySourceGenerator.cs b/AyCode.Core.Serializers.SourceGenerator/AcBinarySourceGenerator.cs index 1cb0719..78c77a2 100644 --- a/AyCode.Core.Serializers.SourceGenerator/AcBinarySourceGenerator.cs +++ b/AyCode.Core.Serializers.SourceGenerator/AcBinarySourceGenerator.cs @@ -1963,53 +1963,21 @@ public class AcBinarySourceGenerator : IIncrementalGenerator sb.AppendLine($"{i} case BinaryTypeCode.StringInterned:"); sb.AppendLine($"{i} {a} = context.GetInternedString((int)context.ReadVarUInt());"); sb.AppendLine($"{i} break;"); - // H2Q6 StringSmall — non-ASCII utf8Len ≤ 255 — wire: [charLen:8][utf8Len:8][bytes], 1-pass decode. - // FastWire mode shares the marker value (=91); reader dispatches by mode. + // H2Q6 string-tier markers + StringAscii + interning tiers. Wire-decode body is shared with + // the runtime path (TypeReaderTable + cross-type populate) — see context.ReadStringSmall/Medium/Big, + // ReadPlainStringAscii, ReadAndRegisterInternedStringSmall/Medium. sb.AppendLine($"{i} case BinaryTypeCode.StringSmall:"); - sb.AppendLine($"{i} {{"); - sb.AppendLine($"{i} if (context.FastWire)"); - sb.AppendLine($"{i} {{"); - sb.AppendLine($"{i} // Collection/dictionary element strings: markered FastWire body — int32 charLen + UTF-16 bytes."); - sb.AppendLine($"{i} // (Property-level strings take a separate markerless path in EmitReadProp; this case handles"); - sb.AppendLine($"{i} // the markered StringSmall variant emitted by WriteStringWithDispatch from collection/runtime paths.)"); - sb.AppendLine($"{i} var fwlen = context.ReadInt32Unsafe();"); - sb.AppendLine($"{i} {a} = context.ReadStringUtf16(fwlen);"); - sb.AppendLine($"{i} }}"); - sb.AppendLine($"{i} else"); - sb.AppendLine($"{i} {{"); - sb.AppendLine($"{i} var sshdr = context.ReadTwoBytesUnsafe();"); - sb.AppendLine($"{i} var sscharLen = (byte)sshdr;"); - sb.AppendLine($"{i} var ssbyteLen = (byte)(sshdr >> 8);"); - sb.AppendLine($"{i} {a} = ssbyteLen == 0 ? string.Empty : context.ReadStringUtf8WithCharLen(sscharLen, ssbyteLen);"); - sb.AppendLine($"{i} }}"); + sb.AppendLine($"{i} {a} = context.ReadStringSmall();"); sb.AppendLine($"{i} break;"); - sb.AppendLine($"{i} }}"); - // H2Q6 StringMedium — utf8Len ≤ 65535 — single uint read packs charLen:16 + utf8Len:16 sb.AppendLine($"{i} case BinaryTypeCode.StringMedium:"); - sb.AppendLine($"{i} {{"); - sb.AppendLine($"{i} var smpacked = context.ReadUInt32Unsafe();"); - sb.AppendLine($"{i} var smcharLen = (ushort)smpacked;"); - sb.AppendLine($"{i} var smbyteLen = (ushort)(smpacked >> 16);"); - sb.AppendLine($"{i} {a} = smbyteLen == 0 ? string.Empty : context.ReadStringUtf8WithCharLen(smcharLen, smbyteLen);"); + sb.AppendLine($"{i} {a} = context.ReadStringMedium();"); sb.AppendLine($"{i} break;"); - sb.AppendLine($"{i} }}"); - // H2Q6 StringBig — utf8Len > 65535 — single ulong read packs charLen:32 + utf8Len:32 sb.AppendLine($"{i} case BinaryTypeCode.StringBig:"); - sb.AppendLine($"{i} {{"); - sb.AppendLine($"{i} var sbpacked = context.ReadUInt64Unsafe();"); - sb.AppendLine($"{i} var sbcharLen = (int)(uint)sbpacked;"); - sb.AppendLine($"{i} var sbbyteLen = (int)(uint)(sbpacked >> 32);"); - sb.AppendLine($"{i} {a} = sbbyteLen == 0 ? string.Empty : context.ReadStringUtf8WithCharLen(sbcharLen, sbbyteLen);"); + sb.AppendLine($"{i} {a} = context.ReadStringBig();"); sb.AppendLine($"{i} break;"); - sb.AppendLine($"{i} }}"); sb.AppendLine($"{i} case BinaryTypeCode.StringAscii:"); - sb.AppendLine($"{i} {{"); - sb.AppendLine($"{i} var salen = (int)context.ReadVarUInt();"); - sb.AppendLine($"{i} {a} = salen == 0 ? string.Empty : context.ReadAsciiBytesAsString(salen);"); + sb.AppendLine($"{i} {a} = context.ReadPlainStringAscii();"); sb.AppendLine($"{i} break;"); - sb.AppendLine($"{i} }}"); - // H2Q6 interning — Small / Medium tiers. Wire-decode body is shared with the runtime path - // (TypeReaderTable + cross-type populate) — see context.ReadAndRegisterInternedStringSmall/Medium. sb.AppendLine($"{i} case BinaryTypeCode.StringInternFirstSmall:"); sb.AppendLine($"{i} {a} = context.ReadAndRegisterInternedStringSmall();"); sb.AppendLine($"{i} break;"); diff --git a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.Read.cs b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.Read.cs index 93752a3..d831c78 100644 --- a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.Read.cs +++ b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.Read.cs @@ -619,6 +619,88 @@ public static partial class AcBinaryDeserializer return value; } + /// + /// H2Q6 StringSmall reader (Compact mode): wire [charLen:8][utf8Len:8][UTF-8 bytes] after the + /// marker has been consumed. 1-pass decode (no CountUtf8Chars). FastWire mode reuses the same + /// marker value (=91) but a different layout — [charLen:int32 LE][UTF-16 raw bytes]; this method + /// dispatches by FastWire flag. Single source of wire-decode shared by runtime TypeReaderTable, + /// cross-type populate, AND SGen-emit. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal string ReadStringSmall() + { + if (FastWire) + { + // Mode-shared marker: FastWire payload is [charLen:int32 LE][UTF-16 raw bytes]. + // Fix-int charLen (matches MemPack WriteUtf16 shape) — single 4-byte read, no VarUInt loop. + var charLenF = ReadInt32Unsafe(); + return ReadStringUtf16(charLenF); + } + + // Compact mode — H2Q6 StringSmall: [charLen:8][utf8Len:8][bytes] + var header = ReadTwoBytesUnsafe(); + var charLength = (byte)header; + var byteLength = (byte)(header >> 8); + return ReadStringUtf8WithCharLen(charLength, byteLength); + } + + /// + /// H2Q6 StringMedium reader: wire [charLen:16 LE][utf8Len:16 LE][UTF-8 bytes] after the marker + /// has been consumed. 1-pass decode. Header read in a single uint load (vs 2 ushort loads). Shared + /// by runtime dispatch + SGen-emit. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal string ReadStringMedium() + { + var packed = ReadUInt32Unsafe(); + var charLength = (ushort)packed; + var byteLength = (ushort)(packed >> 16); + return ReadStringUtf8WithCharLen(charLength, byteLength); + } + + /// + /// H2Q6 StringBig reader: wire [charLen:32 LE][utf8Len:32 LE][UTF-8 bytes] after the marker + /// has been consumed. 1-pass decode. Header read in a single ulong load (vs 2 uint loads). Includes + /// a corrupted-wire guard for negative casts from uint values > Int32.MaxValue. Shared by + /// runtime dispatch + SGen-emit. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal string ReadStringBig() + { + var packed = ReadUInt64Unsafe(); + var charLength = (int)(uint)packed; + var byteLength = (int)(uint)(packed >> 32); + // Single bitwise-OR + sign-test catches negative casts from corrupted-wire uint values + // (when the wire-side uint > Int32.MaxValue, the (int)(uint) cast yields a negative int). + // Predict-friendly: always false on a valid wire. + if ((charLength | byteLength) < 0) ThrowCorruptedBigWire(charLength, byteLength); + return ReadStringUtf8WithCharLen(charLength, byteLength); + } + + /// + /// Throw helper for the corrupted-wire guard in . NoInlining + /// keeps the hot-path reader compact — the JIT/AOT lifts the throw-site out of the inlined caller body. + /// + [MethodImpl(MethodImplOptions.NoInlining)] + private void ThrowCorruptedBigWire(int charLength, int byteLength) => + throw new AcBinaryDeserializationException( + $"Wire format corruption: StringBig header has out-of-range length values (charLength={charLength}, byteLength={byteLength}). " + + $"This indicates a corrupted or maliciously-crafted payload — uint wire values larger than Int32.MaxValue produce negative ints when cast.", + -1); + + /// + /// Reads a long ASCII string payload (after the StringAscii marker has been consumed). + /// Wire format: [VarUInt byteCount][ASCII bytes]. Byte→char widen, no UTF-8 decode. Shared + /// by runtime dispatch + SGen-emit. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal string ReadPlainStringAscii() + { + var length = (int)ReadVarUInt(); + if (length == 0) return string.Empty; + return ReadAsciiBytesAsString(length); + } + /// /// H2Q6 StringInternFirstSmall reader: wire [cacheIdx:VarUInt][charLen:8][utf8Len:8][bytes] /// after the marker has been consumed. Registers the decoded string in the intern cache and returns it. diff --git a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs index 501f86a..4c3e66b 100644 --- a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs +++ b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs @@ -99,15 +99,15 @@ public static partial class AcBinaryDeserializer readers[BinaryTypeCode.Char] = static (ctx, _) => ctx.ReadCharUnsafe(); // H2Q6 non-ASCII tier readers (Compact mode): fixed-width header [charLen][utf8Len] + 1-pass decode. // FastWire mode dispatches the StringSmall (=91) marker through the same handler — see ReadStringSmall. - readers[BinaryTypeCode.StringSmall] = static (ctx, _) => ReadStringSmall(ctx); - readers[BinaryTypeCode.StringMedium] = static (ctx, _) => ReadStringMedium(ctx); - readers[BinaryTypeCode.StringBig] = static (ctx, _) => ReadStringBig(ctx); + readers[BinaryTypeCode.StringSmall] = static (ctx, _) => ctx.ReadStringSmall(); + readers[BinaryTypeCode.StringMedium] = static (ctx, _) => ctx.ReadStringMedium(); + readers[BinaryTypeCode.StringBig] = static (ctx, _) => ctx.ReadStringBig(); readers[BinaryTypeCode.StringInterned] = static (ctx, _) => ctx.GetInternedString((int)ctx.ReadVarUInt()); readers[BinaryTypeCode.StringEmpty] = static (_, _) => string.Empty; // H2Q6 interning tier readers (Compact mode only — Big tier never engages on interning path) readers[BinaryTypeCode.StringInternFirstSmall] = static (ctx, _) => ctx.ReadAndRegisterInternedStringSmall(); readers[BinaryTypeCode.StringInternFirstMedium] = static (ctx, _) => ctx.ReadAndRegisterInternedStringMedium(); - readers[BinaryTypeCode.StringAscii] = static (ctx, _) => ReadPlainStringAscii(ctx); + readers[BinaryTypeCode.StringAscii] = static (ctx, _) => ctx.ReadPlainStringAscii(); readers[BinaryTypeCode.DateTime] = static (ctx, _) => ctx.ReadDateTimeUnsafe(); readers[BinaryTypeCode.DateTimeOffset] = static (ctx, _) => ctx.ReadDateTimeOffsetUnsafe(); readers[BinaryTypeCode.TimeSpan] = static (ctx, _) => ctx.ReadTimeSpanUnsafe(); @@ -1049,16 +1049,16 @@ public static partial class AcBinaryDeserializer switch (typeCode) { case BinaryTypeCode.StringSmall: - propInfo.SetValue(target, ReadStringSmall(context)); + propInfo.SetValue(target, context.ReadStringSmall()); return true; case BinaryTypeCode.StringMedium: - propInfo.SetValue(target, ReadStringMedium(context)); + propInfo.SetValue(target, context.ReadStringMedium()); return true; case BinaryTypeCode.StringBig: - propInfo.SetValue(target, ReadStringBig(context)); + propInfo.SetValue(target, context.ReadStringBig()); return true; case BinaryTypeCode.StringAscii: - propInfo.SetValue(target, ReadPlainStringAscii(context)); + propInfo.SetValue(target, context.ReadPlainStringAscii()); return true; case BinaryTypeCode.StringEmpty: propInfo.SetValue(target, string.Empty); @@ -1155,91 +1155,8 @@ public static partial class AcBinaryDeserializer return context.ReadStringUtf8(length); } - /// - /// H2Q6 StringSmall reader (Compact mode): wire [charLen:8][utf8Len:8][UTF-8 bytes] after the - /// marker has been consumed. 1-pass decode (no CountUtf8Chars). FastWire mode uses the same - /// marker (=91) but a different layout — handled via - /// when the deserializer is in FastWire mode. - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static string ReadStringSmall(BinaryDeserializationContext context) - where TInput : struct, IBinaryInputBase - { - if (context.FastWire) - { - // Mode-shared marker: FastWire payload is [charLen:int32 LE][UTF-16 raw bytes]. - // Fix-int charLen (matches MemPack WriteUtf16 shape) — single 4-byte read, no VarUInt loop. - // Path used by collection/dictionary element string reads (markered) and runtime path. - // SGen property-level strings take the markerless EmitReadProp path which calls - // `ReadStringUtf16` directly, bypassing the `ReadStringSmall` marker dispatch. - var charLenF = context.ReadInt32Unsafe(); - return context.ReadStringUtf16(charLenF); - } - - // Compact mode — H2Q6 StringSmall: [charLen:8][utf8Len:8][bytes] - var header = context.ReadTwoBytesUnsafe(); - var charLength = (byte)header; - var byteLength = (byte)(header >> 8); - return context.ReadStringUtf8WithCharLen(charLength, byteLength); - } - - /// - /// H2Q6 StringMedium reader: wire [charLen:16 LE][utf8Len:16 LE][UTF-8 bytes]. 1-pass decode. - /// Header read in a single uint load (vs 2 ushort loads). - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static string ReadStringMedium(BinaryDeserializationContext context) - where TInput : struct, IBinaryInputBase - { - var packed = context.ReadUInt32Unsafe(); - var charLength = (ushort)packed; - var byteLength = (ushort)(packed >> 16); - return context.ReadStringUtf8WithCharLen(charLength, byteLength); - } - - /// - /// H2Q6 StringBig reader: wire [charLen:32 LE][utf8Len:32 LE][UTF-8 bytes]. 1-pass decode. - /// Header read in a single ulong load (vs 2 uint loads). - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static string ReadStringBig(BinaryDeserializationContext context) - where TInput : struct, IBinaryInputBase - { - var packed = context.ReadUInt64Unsafe(); - var charLength = (int)(uint)packed; - var byteLength = (int)(uint)(packed >> 32); - // Single bitwise-OR + sign-test catches negative casts from corrupted-wire uint values - // (when the wire-side uint > Int32.MaxValue, the (int)(uint) cast yields a negative int). - // Predict-friendly: always false on a valid wire. - if ((charLength | byteLength) < 0) ThrowCorruptedBigWire(charLength, byteLength); - return context.ReadStringUtf8WithCharLen(charLength, byteLength); - } - - /// - /// Throw helper for the corrupted-wire guard in . NoInlining - /// keeps the hot-path reader compact — the JIT/AOT lifts the throw-site out of the inlined caller body. - /// - [MethodImpl(MethodImplOptions.NoInlining)] - private static void ThrowCorruptedBigWire(int charLength, int byteLength) => - throw new AcBinaryDeserializationException( - $"Wire format corruption: StringBig header has out-of-range length values (charLength={charLength}, byteLength={byteLength}). " + - $"This indicates a corrupted or maliciously-crafted payload — uint wire values larger than Int32.MaxValue produce negative ints when cast.", - -1); - - /// - /// Reads a long ASCII string payload (after the StringAscii marker has been consumed). - /// Wire format: [VarUInt byteCount][ASCII bytes]. Byte→char widen, no UTF-8 decode. - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static string ReadPlainStringAscii(BinaryDeserializationContext context) - where TInput : struct, IBinaryInputBase - { - var length = (int)context.ReadVarUInt(); - if (length == 0) return string.Empty; - return context.ReadAsciiBytesAsString(length); - } - - // ReadAndRegisterInternedStringSmall / Medium moved to BinaryDeserializationContext as instance + // ReadStringSmall / Medium / Big / PlainStringAscii and ReadAndRegisterInternedStringSmall / Medium + // (+ the cold ThrowCorruptedBigWire helper) all moved to BinaryDeserializationContext as instance // methods — single source of wire-decode shared by TypeReaderTable dispatch, PopulateProperty // cross-type path, and the SGen-emitted string-property switch. See // `BinaryDeserializationContext.Read.cs` for the implementations.