Refactor string deserialization logic to context methods

Moved StringSmall/Medium/Big/Ascii readers from static helpers in AcBinaryDeserializer to instance methods on BinaryDeserializationContext. Updated all call sites (runtime, SGen, type reader table) to use the new methods. Improved documentation, clarified wire format handling, and added a corrupted-wire guard for StringBig. Removes duplication and centralizes string wire-decode logic.
This commit is contained in:
Loretta 2026-05-15 10:43:49 +02:00
parent 8293a6edd1
commit 853aa23e37
3 changed files with 99 additions and 132 deletions

View File

@ -1963,53 +1963,21 @@ public class AcBinarySourceGenerator : IIncrementalGenerator
sb.AppendLine($"{i} case BinaryTypeCode.StringInterned:"); sb.AppendLine($"{i} case BinaryTypeCode.StringInterned:");
sb.AppendLine($"{i} {a} = context.GetInternedString((int)context.ReadVarUInt());"); sb.AppendLine($"{i} {a} = context.GetInternedString((int)context.ReadVarUInt());");
sb.AppendLine($"{i} break;"); sb.AppendLine($"{i} break;");
// H2Q6 StringSmall — non-ASCII utf8Len ≤ 255 — wire: [charLen:8][utf8Len:8][bytes], 1-pass decode. // H2Q6 string-tier markers + StringAscii + interning tiers. Wire-decode body is shared with
// FastWire mode shares the marker value (=91); reader dispatches by mode. // the runtime path (TypeReaderTable + cross-type populate) — see context.ReadStringSmall/Medium/Big,
// ReadPlainStringAscii, ReadAndRegisterInternedStringSmall/Medium.
sb.AppendLine($"{i} case BinaryTypeCode.StringSmall:"); sb.AppendLine($"{i} case BinaryTypeCode.StringSmall:");
sb.AppendLine($"{i} {{"); sb.AppendLine($"{i} {a} = context.ReadStringSmall();");
sb.AppendLine($"{i} if (context.FastWire)");
sb.AppendLine($"{i} {{");
sb.AppendLine($"{i} // Collection/dictionary element strings: markered FastWire body — int32 charLen + UTF-16 bytes.");
sb.AppendLine($"{i} // (Property-level strings take a separate markerless path in EmitReadProp; this case handles");
sb.AppendLine($"{i} // the markered StringSmall variant emitted by WriteStringWithDispatch from collection/runtime paths.)");
sb.AppendLine($"{i} var fwlen = context.ReadInt32Unsafe();");
sb.AppendLine($"{i} {a} = context.ReadStringUtf16(fwlen);");
sb.AppendLine($"{i} }}");
sb.AppendLine($"{i} else");
sb.AppendLine($"{i} {{");
sb.AppendLine($"{i} var sshdr = context.ReadTwoBytesUnsafe();");
sb.AppendLine($"{i} var sscharLen = (byte)sshdr;");
sb.AppendLine($"{i} var ssbyteLen = (byte)(sshdr >> 8);");
sb.AppendLine($"{i} {a} = ssbyteLen == 0 ? string.Empty : context.ReadStringUtf8WithCharLen(sscharLen, ssbyteLen);");
sb.AppendLine($"{i} }}");
sb.AppendLine($"{i} break;"); sb.AppendLine($"{i} break;");
sb.AppendLine($"{i} }}");
// H2Q6 StringMedium — utf8Len ≤ 65535 — single uint read packs charLen:16 + utf8Len:16
sb.AppendLine($"{i} case BinaryTypeCode.StringMedium:"); sb.AppendLine($"{i} case BinaryTypeCode.StringMedium:");
sb.AppendLine($"{i} {{"); sb.AppendLine($"{i} {a} = context.ReadStringMedium();");
sb.AppendLine($"{i} var smpacked = context.ReadUInt32Unsafe();");
sb.AppendLine($"{i} var smcharLen = (ushort)smpacked;");
sb.AppendLine($"{i} var smbyteLen = (ushort)(smpacked >> 16);");
sb.AppendLine($"{i} {a} = smbyteLen == 0 ? string.Empty : context.ReadStringUtf8WithCharLen(smcharLen, smbyteLen);");
sb.AppendLine($"{i} break;"); sb.AppendLine($"{i} break;");
sb.AppendLine($"{i} }}");
// H2Q6 StringBig — utf8Len > 65535 — single ulong read packs charLen:32 + utf8Len:32
sb.AppendLine($"{i} case BinaryTypeCode.StringBig:"); sb.AppendLine($"{i} case BinaryTypeCode.StringBig:");
sb.AppendLine($"{i} {{"); sb.AppendLine($"{i} {a} = context.ReadStringBig();");
sb.AppendLine($"{i} var sbpacked = context.ReadUInt64Unsafe();");
sb.AppendLine($"{i} var sbcharLen = (int)(uint)sbpacked;");
sb.AppendLine($"{i} var sbbyteLen = (int)(uint)(sbpacked >> 32);");
sb.AppendLine($"{i} {a} = sbbyteLen == 0 ? string.Empty : context.ReadStringUtf8WithCharLen(sbcharLen, sbbyteLen);");
sb.AppendLine($"{i} break;"); sb.AppendLine($"{i} break;");
sb.AppendLine($"{i} }}");
sb.AppendLine($"{i} case BinaryTypeCode.StringAscii:"); sb.AppendLine($"{i} case BinaryTypeCode.StringAscii:");
sb.AppendLine($"{i} {{"); sb.AppendLine($"{i} {a} = context.ReadPlainStringAscii();");
sb.AppendLine($"{i} var salen = (int)context.ReadVarUInt();");
sb.AppendLine($"{i} {a} = salen == 0 ? string.Empty : context.ReadAsciiBytesAsString(salen);");
sb.AppendLine($"{i} break;"); sb.AppendLine($"{i} break;");
sb.AppendLine($"{i} }}");
// H2Q6 interning — Small / Medium tiers. Wire-decode body is shared with the runtime path
// (TypeReaderTable + cross-type populate) — see context.ReadAndRegisterInternedStringSmall/Medium.
sb.AppendLine($"{i} case BinaryTypeCode.StringInternFirstSmall:"); sb.AppendLine($"{i} case BinaryTypeCode.StringInternFirstSmall:");
sb.AppendLine($"{i} {a} = context.ReadAndRegisterInternedStringSmall();"); sb.AppendLine($"{i} {a} = context.ReadAndRegisterInternedStringSmall();");
sb.AppendLine($"{i} break;"); sb.AppendLine($"{i} break;");

View File

@ -619,6 +619,88 @@ public static partial class AcBinaryDeserializer
return value; return value;
} }
/// <summary>
/// H2Q6 StringSmall reader (Compact mode): wire <c>[charLen:8][utf8Len:8][UTF-8 bytes]</c> after the
/// marker has been consumed. 1-pass decode (no <c>CountUtf8Chars</c>). FastWire mode reuses the same
/// marker value (=91) but a different layout — <c>[charLen:int32 LE][UTF-16 raw bytes]</c>; this method
/// dispatches by <c>FastWire</c> flag. Single source of wire-decode shared by runtime <c>TypeReaderTable</c>,
/// cross-type populate, AND SGen-emit.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal string ReadStringSmall()
{
if (FastWire)
{
// Mode-shared marker: FastWire payload is [charLen:int32 LE][UTF-16 raw bytes].
// Fix-int charLen (matches MemPack WriteUtf16 shape) — single 4-byte read, no VarUInt loop.
var charLenF = ReadInt32Unsafe();
return ReadStringUtf16(charLenF);
}
// Compact mode — H2Q6 StringSmall: [charLen:8][utf8Len:8][bytes]
var header = ReadTwoBytesUnsafe();
var charLength = (byte)header;
var byteLength = (byte)(header >> 8);
return ReadStringUtf8WithCharLen(charLength, byteLength);
}
/// <summary>
/// H2Q6 StringMedium reader: wire <c>[charLen:16 LE][utf8Len:16 LE][UTF-8 bytes]</c> after the marker
/// has been consumed. 1-pass decode. Header read in a single uint load (vs 2 ushort loads). Shared
/// by runtime dispatch + SGen-emit.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal string ReadStringMedium()
{
var packed = ReadUInt32Unsafe();
var charLength = (ushort)packed;
var byteLength = (ushort)(packed >> 16);
return ReadStringUtf8WithCharLen(charLength, byteLength);
}
/// <summary>
/// H2Q6 StringBig reader: wire <c>[charLen:32 LE][utf8Len:32 LE][UTF-8 bytes]</c> after the marker
/// has been consumed. 1-pass decode. Header read in a single ulong load (vs 2 uint loads). Includes
/// a corrupted-wire guard for negative casts from uint values > <c>Int32.MaxValue</c>. Shared by
/// runtime dispatch + SGen-emit.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal string ReadStringBig()
{
var packed = ReadUInt64Unsafe();
var charLength = (int)(uint)packed;
var byteLength = (int)(uint)(packed >> 32);
// Single bitwise-OR + sign-test catches negative casts from corrupted-wire uint values
// (when the wire-side uint > Int32.MaxValue, the (int)(uint) cast yields a negative int).
// Predict-friendly: always false on a valid wire.
if ((charLength | byteLength) < 0) ThrowCorruptedBigWire(charLength, byteLength);
return ReadStringUtf8WithCharLen(charLength, byteLength);
}
/// <summary>
/// Throw helper for the corrupted-wire guard in <see cref="ReadStringBig"/>. <c>NoInlining</c>
/// keeps the hot-path reader compact — the JIT/AOT lifts the throw-site out of the inlined caller body.
/// </summary>
[MethodImpl(MethodImplOptions.NoInlining)]
private void ThrowCorruptedBigWire(int charLength, int byteLength) =>
throw new AcBinaryDeserializationException(
$"Wire format corruption: StringBig header has out-of-range length values (charLength={charLength}, byteLength={byteLength}). " +
$"This indicates a corrupted or maliciously-crafted payload — uint wire values larger than Int32.MaxValue produce negative ints when cast.",
-1);
/// <summary>
/// Reads a long ASCII string payload (after the <c>StringAscii</c> marker has been consumed).
/// Wire format: <c>[VarUInt byteCount][ASCII bytes]</c>. Byte→char widen, no UTF-8 decode. Shared
/// by runtime dispatch + SGen-emit.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal string ReadPlainStringAscii()
{
var length = (int)ReadVarUInt();
if (length == 0) return string.Empty;
return ReadAsciiBytesAsString(length);
}
/// <summary> /// <summary>
/// H2Q6 StringInternFirstSmall reader: wire <c>[cacheIdx:VarUInt][charLen:8][utf8Len:8][bytes]</c> /// H2Q6 StringInternFirstSmall reader: wire <c>[cacheIdx:VarUInt][charLen:8][utf8Len:8][bytes]</c>
/// after the marker has been consumed. Registers the decoded string in the intern cache and returns it. /// after the marker has been consumed. Registers the decoded string in the intern cache and returns it.

View File

@ -99,15 +99,15 @@ public static partial class AcBinaryDeserializer
readers[BinaryTypeCode.Char] = static (ctx, _) => ctx.ReadCharUnsafe(); readers[BinaryTypeCode.Char] = static (ctx, _) => ctx.ReadCharUnsafe();
// H2Q6 non-ASCII tier readers (Compact mode): fixed-width header [charLen][utf8Len] + 1-pass decode. // H2Q6 non-ASCII tier readers (Compact mode): fixed-width header [charLen][utf8Len] + 1-pass decode.
// FastWire mode dispatches the StringSmall (=91) marker through the same handler — see ReadStringSmall. // FastWire mode dispatches the StringSmall (=91) marker through the same handler — see ReadStringSmall.
readers[BinaryTypeCode.StringSmall] = static (ctx, _) => ReadStringSmall(ctx); readers[BinaryTypeCode.StringSmall] = static (ctx, _) => ctx.ReadStringSmall();
readers[BinaryTypeCode.StringMedium] = static (ctx, _) => ReadStringMedium(ctx); readers[BinaryTypeCode.StringMedium] = static (ctx, _) => ctx.ReadStringMedium();
readers[BinaryTypeCode.StringBig] = static (ctx, _) => ReadStringBig(ctx); readers[BinaryTypeCode.StringBig] = static (ctx, _) => ctx.ReadStringBig();
readers[BinaryTypeCode.StringInterned] = static (ctx, _) => ctx.GetInternedString((int)ctx.ReadVarUInt()); readers[BinaryTypeCode.StringInterned] = static (ctx, _) => ctx.GetInternedString((int)ctx.ReadVarUInt());
readers[BinaryTypeCode.StringEmpty] = static (_, _) => string.Empty; readers[BinaryTypeCode.StringEmpty] = static (_, _) => string.Empty;
// H2Q6 interning tier readers (Compact mode only — Big tier never engages on interning path) // H2Q6 interning tier readers (Compact mode only — Big tier never engages on interning path)
readers[BinaryTypeCode.StringInternFirstSmall] = static (ctx, _) => ctx.ReadAndRegisterInternedStringSmall(); readers[BinaryTypeCode.StringInternFirstSmall] = static (ctx, _) => ctx.ReadAndRegisterInternedStringSmall();
readers[BinaryTypeCode.StringInternFirstMedium] = static (ctx, _) => ctx.ReadAndRegisterInternedStringMedium(); readers[BinaryTypeCode.StringInternFirstMedium] = static (ctx, _) => ctx.ReadAndRegisterInternedStringMedium();
readers[BinaryTypeCode.StringAscii] = static (ctx, _) => ReadPlainStringAscii(ctx); readers[BinaryTypeCode.StringAscii] = static (ctx, _) => ctx.ReadPlainStringAscii();
readers[BinaryTypeCode.DateTime] = static (ctx, _) => ctx.ReadDateTimeUnsafe(); readers[BinaryTypeCode.DateTime] = static (ctx, _) => ctx.ReadDateTimeUnsafe();
readers[BinaryTypeCode.DateTimeOffset] = static (ctx, _) => ctx.ReadDateTimeOffsetUnsafe(); readers[BinaryTypeCode.DateTimeOffset] = static (ctx, _) => ctx.ReadDateTimeOffsetUnsafe();
readers[BinaryTypeCode.TimeSpan] = static (ctx, _) => ctx.ReadTimeSpanUnsafe(); readers[BinaryTypeCode.TimeSpan] = static (ctx, _) => ctx.ReadTimeSpanUnsafe();
@ -1049,16 +1049,16 @@ public static partial class AcBinaryDeserializer
switch (typeCode) switch (typeCode)
{ {
case BinaryTypeCode.StringSmall: case BinaryTypeCode.StringSmall:
propInfo.SetValue(target, ReadStringSmall(context)); propInfo.SetValue(target, context.ReadStringSmall());
return true; return true;
case BinaryTypeCode.StringMedium: case BinaryTypeCode.StringMedium:
propInfo.SetValue(target, ReadStringMedium(context)); propInfo.SetValue(target, context.ReadStringMedium());
return true; return true;
case BinaryTypeCode.StringBig: case BinaryTypeCode.StringBig:
propInfo.SetValue(target, ReadStringBig(context)); propInfo.SetValue(target, context.ReadStringBig());
return true; return true;
case BinaryTypeCode.StringAscii: case BinaryTypeCode.StringAscii:
propInfo.SetValue(target, ReadPlainStringAscii(context)); propInfo.SetValue(target, context.ReadPlainStringAscii());
return true; return true;
case BinaryTypeCode.StringEmpty: case BinaryTypeCode.StringEmpty:
propInfo.SetValue(target, string.Empty); propInfo.SetValue(target, string.Empty);
@ -1155,91 +1155,8 @@ public static partial class AcBinaryDeserializer
return context.ReadStringUtf8(length); return context.ReadStringUtf8(length);
} }
/// <summary> // ReadStringSmall / Medium / Big / PlainStringAscii and ReadAndRegisterInternedStringSmall / Medium
/// H2Q6 StringSmall reader (Compact mode): wire <c>[charLen:8][utf8Len:8][UTF-8 bytes]</c> after the // (+ the cold ThrowCorruptedBigWire helper) all moved to BinaryDeserializationContext as instance
/// marker has been consumed. 1-pass decode (no <c>CountUtf8Chars</c>). FastWire mode uses the same
/// marker (=91) but a different layout — handled via <see cref="BinaryDeserializationContext{T}.ReadStringUtf8"/>
/// when the deserializer is in FastWire mode.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static string ReadStringSmall<TInput>(BinaryDeserializationContext<TInput> context)
where TInput : struct, IBinaryInputBase
{
if (context.FastWire)
{
// Mode-shared marker: FastWire payload is [charLen:int32 LE][UTF-16 raw bytes].
// Fix-int charLen (matches MemPack WriteUtf16 shape) — single 4-byte read, no VarUInt loop.
// Path used by collection/dictionary element string reads (markered) and runtime path.
// SGen property-level strings take the markerless EmitReadProp path which calls
// `ReadStringUtf16` directly, bypassing the `ReadStringSmall` marker dispatch.
var charLenF = context.ReadInt32Unsafe();
return context.ReadStringUtf16(charLenF);
}
// Compact mode — H2Q6 StringSmall: [charLen:8][utf8Len:8][bytes]
var header = context.ReadTwoBytesUnsafe();
var charLength = (byte)header;
var byteLength = (byte)(header >> 8);
return context.ReadStringUtf8WithCharLen(charLength, byteLength);
}
/// <summary>
/// H2Q6 StringMedium reader: wire <c>[charLen:16 LE][utf8Len:16 LE][UTF-8 bytes]</c>. 1-pass decode.
/// Header read in a single uint load (vs 2 ushort loads).
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static string ReadStringMedium<TInput>(BinaryDeserializationContext<TInput> context)
where TInput : struct, IBinaryInputBase
{
var packed = context.ReadUInt32Unsafe();
var charLength = (ushort)packed;
var byteLength = (ushort)(packed >> 16);
return context.ReadStringUtf8WithCharLen(charLength, byteLength);
}
/// <summary>
/// H2Q6 StringBig reader: wire <c>[charLen:32 LE][utf8Len:32 LE][UTF-8 bytes]</c>. 1-pass decode.
/// Header read in a single ulong load (vs 2 uint loads).
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static string ReadStringBig<TInput>(BinaryDeserializationContext<TInput> context)
where TInput : struct, IBinaryInputBase
{
var packed = context.ReadUInt64Unsafe();
var charLength = (int)(uint)packed;
var byteLength = (int)(uint)(packed >> 32);
// Single bitwise-OR + sign-test catches negative casts from corrupted-wire uint values
// (when the wire-side uint > Int32.MaxValue, the (int)(uint) cast yields a negative int).
// Predict-friendly: always false on a valid wire.
if ((charLength | byteLength) < 0) ThrowCorruptedBigWire(charLength, byteLength);
return context.ReadStringUtf8WithCharLen(charLength, byteLength);
}
/// <summary>
/// Throw helper for the corrupted-wire guard in <see cref="ReadStringBig{TInput}"/>. <c>NoInlining</c>
/// keeps the hot-path reader compact — the JIT/AOT lifts the throw-site out of the inlined caller body.
/// </summary>
[MethodImpl(MethodImplOptions.NoInlining)]
private static void ThrowCorruptedBigWire(int charLength, int byteLength) =>
throw new AcBinaryDeserializationException(
$"Wire format corruption: StringBig header has out-of-range length values (charLength={charLength}, byteLength={byteLength}). " +
$"This indicates a corrupted or maliciously-crafted payload — uint wire values larger than Int32.MaxValue produce negative ints when cast.",
-1);
/// <summary>
/// Reads a long ASCII string payload (after the <c>StringAscii</c> marker has been consumed).
/// Wire format: <c>[VarUInt byteCount][ASCII bytes]</c>. Byte→char widen, no UTF-8 decode.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static string ReadPlainStringAscii<TInput>(BinaryDeserializationContext<TInput> context)
where TInput : struct, IBinaryInputBase
{
var length = (int)context.ReadVarUInt();
if (length == 0) return string.Empty;
return context.ReadAsciiBytesAsString(length);
}
// ReadAndRegisterInternedStringSmall / Medium moved to BinaryDeserializationContext as instance
// methods — single source of wire-decode shared by TypeReaderTable dispatch, PopulateProperty // methods — single source of wire-decode shared by TypeReaderTable dispatch, PopulateProperty
// cross-type path, and the SGen-emitted string-property switch. See // cross-type path, and the SGen-emitted string-property switch. See
// `BinaryDeserializationContext.Read.cs` for the implementations. // `BinaryDeserializationContext.Read.cs` for the implementations.