Defensive string guards, cleanup, and SGen/RT tests

- Add overflow/corruption guards to string (de)serialization (writer/reader now throw on invalid lengths)
- Remove dead string serialization methods per BINARY_TODO.md audit
- Update BINARY_TODO.md with closure/resolution for H2Q6, O7G2, V4N5, and related entries
- Add MaxStringCharLength constant and update marker reservations in BinaryTypeCode
- Simplify string cache ASCII verification in deserializer
- Add SGen/Runtime round-trip compatibility tests for large/deep data
- Minor code modernization and style improvements
This commit is contained in:
Loretta 2026-05-07 14:33:39 +02:00
parent fa48596dbf
commit 17ef0904d9
6 changed files with 452 additions and 36 deletions

View File

@ -0,0 +1,148 @@
using System.Text.Json;
using System.Text.Json.Serialization;
using AyCode.Core.Serializers;
using AyCode.Core.Serializers.Binaries;
using AyCode.Core.Tests.TestModels;
namespace AyCode.Core.Tests.Serialization;
[TestClass]
public class AcBinarySerializerSGenRuntimeCompatibilityTests
{
private static readonly JsonSerializerOptions StjOptions = new()
{
ReferenceHandler = ReferenceHandler.IgnoreCycles
};
[TestMethod]
public void SerializeWithSGen_DeserializeWithRuntime_LargeAndDeepData_MultipleOptions_RoundTrip()
{
foreach (var dataSet in GetTargetDataSets())
{
foreach (var optionFactory in GetOptionFactories())
{
var serializeOptions = optionFactory();
serializeOptions.UseGeneratedCode = true;
var deserializeOptions = optionFactory();
deserializeOptions.UseGeneratedCode = false;
var expectedJson = JsonSerializer.Serialize(dataSet.Order, StjOptions);
var bytes = AcBinarySerializer.Serialize(dataSet.Order, serializeOptions);
var roundTrip = AcBinaryDeserializer.Deserialize<TestOrder>(bytes, deserializeOptions);
var actualJson = JsonSerializer.Serialize(roundTrip, StjOptions);
Assert.AreEqual(expectedJson, actualJson, $"STJ mismatch. Dataset={dataSet.Name}, WireMode={serializeOptions.WireMode}, BaseOptions={serializeOptions.ReferenceHandling}/{serializeOptions.UseStringInterning}");
AssertOrderEquivalent(dataSet.Order, roundTrip, $"Dataset={dataSet.Name}, WireMode={serializeOptions.WireMode}, BaseOptions={serializeOptions.ReferenceHandling}/{serializeOptions.UseStringInterning}");
}
}
}
[TestMethod]
public void SerializeWithRuntime_DeserializeWithSGen_LargeAndDeepData_MultipleOptions_RoundTrip()
{
foreach (var dataSet in GetTargetDataSets())
{
foreach (var optionFactory in GetOptionFactories())
{
var serializeOptions = optionFactory();
serializeOptions.UseGeneratedCode = false;
var deserializeOptions = optionFactory();
deserializeOptions.UseGeneratedCode = true;
var expectedJson = JsonSerializer.Serialize(dataSet.Order, StjOptions);
var bytes = AcBinarySerializer.Serialize(dataSet.Order, serializeOptions);
var roundTrip = AcBinaryDeserializer.Deserialize<TestOrder>(bytes, deserializeOptions);
var actualJson = JsonSerializer.Serialize(roundTrip, StjOptions);
Assert.AreEqual(expectedJson, actualJson, $"STJ mismatch. Dataset={dataSet.Name}, WireMode={serializeOptions.WireMode}, BaseOptions={serializeOptions.ReferenceHandling}/{serializeOptions.UseStringInterning}");
AssertOrderEquivalent(dataSet.Order, roundTrip, $"Dataset={dataSet.Name}, WireMode={serializeOptions.WireMode}, BaseOptions={serializeOptions.ReferenceHandling}/{serializeOptions.UseStringInterning}");
}
}
}
private static IEnumerable<TestDataSet> GetTargetDataSets()
{
return BenchmarkTestDataProvider
.CreateTestDataSets()
.Where(x => x.Name.StartsWith("Large") || x.Name.StartsWith("Deep"));
}
private static IEnumerable<Func<AcBinarySerializerOptions>> GetOptionFactories()
{
yield return static () =>
{
var options = AcBinarySerializerOptions.FastMode;
options.WireMode = WireMode.Compact;
return options;
};
yield return static () =>
{
var options = AcBinarySerializerOptions.FastMode;
options.WireMode = WireMode.Fast;
return options;
};
yield return static () =>
{
var options = AcBinarySerializerOptions.Default;
options.WireMode = WireMode.Compact;
return options;
};
}
private static void AssertOrderEquivalent(TestOrder expected, TestOrder? actual, string context)
{
Assert.IsNotNull(actual, context);
Assert.AreEqual(expected.Id, actual.Id, context);
Assert.AreEqual(expected.OrderNumber, actual.OrderNumber, context);
Assert.AreEqual(expected.Status, actual.Status, context);
Assert.AreEqual(expected.Items.Count, actual.Items.Count, context);
for (var itemIndex = 0; itemIndex < expected.Items.Count; itemIndex++)
{
var expectedItem = expected.Items[itemIndex];
var actualItem = actual.Items[itemIndex];
Assert.AreEqual(expectedItem.Id, actualItem.Id, context);
Assert.AreEqual(expectedItem.ProductName, actualItem.ProductName, context);
Assert.AreEqual(expectedItem.Status, actualItem.Status, context);
Assert.AreEqual(expectedItem.Pallets.Count, actualItem.Pallets.Count, context);
for (var palletIndex = 0; palletIndex < expectedItem.Pallets.Count; palletIndex++)
{
var expectedPallet = expectedItem.Pallets[palletIndex];
var actualPallet = actualItem.Pallets[palletIndex];
Assert.AreEqual(expectedPallet.Id, actualPallet.Id, context);
Assert.AreEqual(expectedPallet.PalletCode, actualPallet.PalletCode, context);
Assert.AreEqual(expectedPallet.Measurements.Count, actualPallet.Measurements.Count, context);
for (var measurementIndex = 0; measurementIndex < expectedPallet.Measurements.Count; measurementIndex++)
{
var expectedMeasurement = expectedPallet.Measurements[measurementIndex];
var actualMeasurement = actualPallet.Measurements[measurementIndex];
Assert.AreEqual(expectedMeasurement.Id, actualMeasurement.Id, context);
Assert.AreEqual(expectedMeasurement.Name, actualMeasurement.Name, context);
Assert.AreEqual(expectedMeasurement.Points.Count, actualMeasurement.Points.Count, context);
for (var pointIndex = 0; pointIndex < expectedMeasurement.Points.Count; pointIndex++)
{
var expectedPoint = expectedMeasurement.Points[pointIndex];
var actualPoint = actualMeasurement.Points[pointIndex];
Assert.AreEqual(expectedPoint.Id, actualPoint.Id, context);
Assert.AreEqual(expectedPoint.Label, actualPoint.Label, context);
}
}
}
}
}
}

View File

@ -1,11 +1,6 @@
using System;
using System.Buffers;
using System.Collections.Generic;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using System.Text;
namespace AyCode.Core.Serializers.Binaries;
@ -386,7 +381,7 @@ public static partial class AcBinaryDeserializer
{
if (length == 0)
{
return Array.Empty<byte>();
return [];
}
EnsureAvailable(length);
@ -575,7 +570,7 @@ public static partial class AcBinaryDeserializer
if (_stringCache!.TryGetValue(hash, out var cached))
{
if (cached.Length == length && VerifyAsciiUtf8Match(cached, slice))
if (cached.Length == length && Ascii.Equals(slice, cached))
{
_position += length;
return cached;
@ -588,12 +583,6 @@ public static partial class AcBinaryDeserializer
return value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static bool VerifyAsciiUtf8Match(string cached, ReadOnlySpan<byte> utf8Bytes)
{
return Ascii.Equals(utf8Bytes, cached);
}
/// <summary>
/// Full-content hash for string caching.
/// CRITICAL: DO NOT SIMPLIFY <20> prevents hash collisions for similar property names.

View File

@ -1194,9 +1194,24 @@ public static partial class AcBinaryDeserializer
var packed = context.ReadUInt64Unsafe();
var charLength = (int)(uint)packed;
var byteLength = (int)(uint)(packed >> 32);
// Single bitwise-OR + sign-test catches negative casts from corrupted-wire uint values
// (when the wire-side uint > Int32.MaxValue, the (int)(uint) cast yields a negative int).
// Predict-friendly: always false on a valid wire.
if ((charLength | byteLength) < 0) ThrowCorruptedBigWire(charLength, byteLength);
return context.ReadStringUtf8WithCharLen(charLength, byteLength);
}
/// <summary>
/// Throw helper for the corrupted-wire guard in <see cref="ReadStringBig{TInput}"/>. <c>NoInlining</c>
/// keeps the hot-path reader compact — the JIT/AOT lifts the throw-site out of the inlined caller body.
/// </summary>
[MethodImpl(MethodImplOptions.NoInlining)]
private static void ThrowCorruptedBigWire(int charLength, int byteLength) =>
throw new AcBinaryDeserializationException(
$"Wire format corruption: StringBig header has out-of-range length values (charLength={charLength}, byteLength={byteLength}). " +
$"This indicates a corrupted or maliciously-crafted payload — uint wire values larger than Int32.MaxValue produce negative ints when cast.",
-1);
/// <summary>
/// Reads a long ASCII string payload (after the <c>StringAscii</c> marker has been consumed).
/// Wire format: <c>[VarUInt byteCount][ASCII bytes]</c>. Byte→char widen, no UTF-8 decode.

View File

@ -1,15 +1,8 @@
using System;
using System.Buffers;
using System.Buffers.Binary;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using System.Text;
using System.Threading;
using static AyCode.Core.Helpers.JsonUtilities;
namespace AyCode.Core.Serializers.Binaries;
@ -63,8 +56,6 @@ public static partial class AcBinarySerializer
: SerializationContextBase<BinarySerializeTypeMetadata, AcBinarySerializerOptions>, IDisposable
where TOutput : struct, IBinaryOutputBase
{
private static readonly Encoding Utf8NoBom = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false);
private const int PropertyIndexBufferMaxCache = 512;
private const int PropertyStateBufferMaxCache = 512;
@ -746,15 +737,19 @@ public static partial class AcBinarySerializer
/// </remarks>
public void WriteStringWithDispatch(string value)
{
var charLength = value.Length;
// Single overflow guard: catches charLength > MaxStringCharLength where charLength*4 would wrap.
// Predict-friendly (always false on realistic input). NoInlining throw helper keeps the hot path tight.
if ((uint)charLength > BinaryTypeCode.MaxStringCharLength) ThrowStringTooLong(charLength);
if (FastWire)
{
// FastWire: [StringSmall marker][VarUInt charCount][UTF-16 raw bytes]
// Marker value 91 is mode-shared (Compact StringSmall vs FastWire string marker);
// reader dispatches by deserializer mode, NOT by re-interpreting the marker.
WriteByte(BinaryTypeCode.StringSmall);
var charLenF = value.Length;
var byteLenF = charLenF * 2;
WriteVarUInt((uint)charLenF);
var byteLenF = charLength * 2; // safe: charLength ≤ 0x1FFFFFFF guarantees no overflow
WriteVarUInt((uint)charLength);
EnsureCapacity(byteLenF);
MemoryMarshal.AsBytes(value.AsSpan()).CopyTo(_buffer.AsSpan(_position, byteLenF));
_position += byteLenF;
@ -777,7 +772,7 @@ public static partial class AcBinarySerializer
//
// ASCII override (bytesWritten == charLength) emits FixStrAscii / StringAscii with their own
// compact headers (1 byte / 1+VarUInt) — body shifted left from the encode position.
var charLength = value.Length;
// (charLength already validated at method entry — charLength * 4 cannot overflow here.)
var maxBytes = charLength * 4;
int reserveHeader;
@ -796,8 +791,8 @@ public static partial class AcBinarySerializer
// ASCII override — FixStrAscii (≤31) or StringAscii (>31) with compact header
if (bytesWritten <= BinaryTypeCode.FixStrAsciiMaxLength)
{
var shift = reserveHeader - 1;
_buffer.AsSpan(encodeStart, bytesWritten).CopyTo(_buffer.AsSpan(savedPos + 1, bytesWritten));
_buffer[savedPos] = BinaryTypeCode.EncodeFixStrAscii(bytesWritten);
_position = savedPos + 1 + bytesWritten;
}
@ -806,10 +801,12 @@ public static partial class AcBinarySerializer
var actualVarUIntSize = VarUIntSize((uint)bytesWritten);
var asciiHeader = 1 + actualVarUIntSize;
var shift = reserveHeader - asciiHeader;
if (shift > 0)
_buffer.AsSpan(encodeStart, bytesWritten).CopyTo(_buffer.AsSpan(encodeStart - shift, bytesWritten));
if (shift > 0) _buffer.AsSpan(encodeStart, bytesWritten).CopyTo(_buffer.AsSpan(encodeStart - shift, bytesWritten));
_buffer[savedPos] = BinaryTypeCode.StringAscii;
_position = savedPos + 1;
WriteVarUIntUnsafe((uint)bytesWritten);
_position += bytesWritten;
}
@ -836,8 +833,7 @@ public static partial class AcBinarySerializer
}
var shift = reserveHeader - actualHeader;
if (shift > 0)
_buffer.AsSpan(encodeStart, bytesWritten).CopyTo(_buffer.AsSpan(encodeStart - shift, bytesWritten));
if (shift > 0) _buffer.AsSpan(encodeStart, bytesWritten).CopyTo(_buffer.AsSpan(encodeStart - shift, bytesWritten));
_buffer[savedPos] = tierMarker;
switch (actualHeader)
@ -892,6 +888,10 @@ public static partial class AcBinarySerializer
// emits Small tier (3 byte) when bytesWritten ≤ 255, instead of Medium (5 byte). Big tier
// never engages — MaxStringInternLength byte-typed (max 255 char × 4 byte = 1020 byte fits in Medium).
var charLength = value.Length;
// Overflow guard (defensive — interning length is byte-typed so this should never trigger,
// but stays consistent with WriteStringWithDispatch and protects against future refactors).
if ((uint)charLength > BinaryTypeCode.MaxStringCharLength) ThrowStringTooLong(charLength);
var maxBytes = charLength * 4;
var cacheIdxSize = VarUIntSize((uint)cacheMapIndex);
@ -915,6 +915,7 @@ public static partial class AcBinarySerializer
// Write [marker][cacheIdx VarUInt][charLen + utf8Len header][bytes]
_buffer[savedPos] = tierMarker;
_position = savedPos + 1;
WriteVarUIntUnsafe((uint)cacheMapIndex);
if (actualHeader == 3)
@ -942,6 +943,18 @@ public static partial class AcBinarySerializer
// The hot-path string writes go through WriteStringWithDispatch (M3R7 + H2Q6 marker dispatch).
// ─────────────────────────────────────────────────────────────────
/// <summary>
/// Throw helper for the overflow guard in <see cref="WriteStringWithDispatch"/> and
/// <see cref="WriteStringInternFirstWithDispatch"/>. Marked <c>NoInlining</c> so the hot path
/// stays compact — the JIT/AOT keeps the throw-site out of the inlined caller body.
/// </summary>
[MethodImpl(MethodImplOptions.NoInlining)]
private static void ThrowStringTooLong(int charLength) =>
throw new InvalidOperationException(
$"String too long for binary serialization: {charLength} chars exceeds {BinaryTypeCode.MaxStringCharLength}. " +
$"This limit is dictated by the writer's worst-case 'charLength * 4' UTF-8 byte allocation; " +
$"larger inputs would silently overflow int arithmetic.");
#endregion
#region Bulk Array Writes inline

View File

@ -117,11 +117,12 @@ internal static class BinaryTypeCode
public const byte StringInternFirstSmall = SlotCount + 40; // 104 — Interning tier 1: [marker:1][cacheIdx:VarUInt][charLen:8][utf8Len:8][bytes]
public const byte StringInternFirstMedium = SlotCount + 41; // 105 — Interning tier 2: [marker:1][cacheIdx:VarUInt][charLen:16][utf8Len:16][bytes]
// RESERVED (27 values: 106..134) — strategic future-feature reservation per BINARY_TODO.md V4N3 marker address space plan:
// RESERVED (29 values: 106..134) — strategic future-feature reservation per BINARY_TODO.md H2Q6 marker address space plan:
// 106..121 (16 values): ACCORE-BIN-T-L9Y3 — FixArray short-list count in marker (count 0-15)
// 122..126 (5 values): ACCORE-BIN-T-S5L8 — sentinel-length encoding tiers
// 127..130 (4 values): ACCORE-BIN-T-S2X9 — markerless schema lane opt-in
// 131..134 (4 values): general reserve
// 131 (1 value): ACCORE-BIN-T-F3W6 — dedicated FastWire string marker (split mode-shared StringSmall)
// 132..134 (3 values): general reserve
//
// Readers MUST throw "unknown marker" on any value in 106..134 until the corresponding feature
// activates within the v3 wire format envelope (no further wire-format break needed).
@ -166,6 +167,16 @@ internal static class BinaryTypeCode
public const byte Int32Tiny = 192; // -16 to 47 stored in single byte (value = code - 192 - 16)
public const byte Int32TinyMax = 255; // Upper bound for tiny int (192 + 64 - 1 = 255)
/// <summary>
/// Largest <c>string.Length</c> value safe for binary serialization — <c>charLength * 4</c> (UTF-8
/// worst-case byte count) MUST fit in <c>int</c>. Above this, the writer's <c>maxBytes = charLength * 4</c>
/// computation overflows: at exactly <c>0x40000000</c> chars it wraps to 0 (silent zero-overflow → wire
/// emits empty string with the original charLength claim, silent data loss). The single
/// <c>(uint)charLength &gt; MaxStringCharLength</c> guard catches the overflow band cheaply
/// (one unsigned compare on the writer hot path, predict-friendly — always false on realistic input).
/// </summary>
public const int MaxStringCharLength = 0x1FFFFFFF; // 536_870_911 — largest charLength where charLength * 4 fits in int
/// <summary>
/// Check if type code represents a reference (string or object).
/// </summary>

File diff suppressed because one or more lines are too long