Defensive string guards, cleanup, and SGen/RT tests
- Add overflow/corruption guards to string (de)serialization (writer/reader now throw on invalid lengths) - Remove dead string serialization methods per BINARY_TODO.md audit - Update BINARY_TODO.md with closure/resolution for H2Q6, O7G2, V4N5, and related entries - Add MaxStringCharLength constant and update marker reservations in BinaryTypeCode - Simplify string cache ASCII verification in deserializer - Add SGen/Runtime round-trip compatibility tests for large/deep data - Minor code modernization and style improvements
This commit is contained in:
parent
fa48596dbf
commit
17ef0904d9
|
|
@ -0,0 +1,148 @@
|
|||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using AyCode.Core.Serializers;
|
||||
using AyCode.Core.Serializers.Binaries;
|
||||
using AyCode.Core.Tests.TestModels;
|
||||
|
||||
namespace AyCode.Core.Tests.Serialization;
|
||||
|
||||
[TestClass]
|
||||
public class AcBinarySerializerSGenRuntimeCompatibilityTests
|
||||
{
|
||||
private static readonly JsonSerializerOptions StjOptions = new()
|
||||
{
|
||||
ReferenceHandler = ReferenceHandler.IgnoreCycles
|
||||
};
|
||||
|
||||
[TestMethod]
|
||||
public void SerializeWithSGen_DeserializeWithRuntime_LargeAndDeepData_MultipleOptions_RoundTrip()
|
||||
{
|
||||
foreach (var dataSet in GetTargetDataSets())
|
||||
{
|
||||
foreach (var optionFactory in GetOptionFactories())
|
||||
{
|
||||
var serializeOptions = optionFactory();
|
||||
serializeOptions.UseGeneratedCode = true;
|
||||
|
||||
var deserializeOptions = optionFactory();
|
||||
deserializeOptions.UseGeneratedCode = false;
|
||||
|
||||
var expectedJson = JsonSerializer.Serialize(dataSet.Order, StjOptions);
|
||||
|
||||
var bytes = AcBinarySerializer.Serialize(dataSet.Order, serializeOptions);
|
||||
var roundTrip = AcBinaryDeserializer.Deserialize<TestOrder>(bytes, deserializeOptions);
|
||||
var actualJson = JsonSerializer.Serialize(roundTrip, StjOptions);
|
||||
|
||||
Assert.AreEqual(expectedJson, actualJson, $"STJ mismatch. Dataset={dataSet.Name}, WireMode={serializeOptions.WireMode}, BaseOptions={serializeOptions.ReferenceHandling}/{serializeOptions.UseStringInterning}");
|
||||
|
||||
AssertOrderEquivalent(dataSet.Order, roundTrip, $"Dataset={dataSet.Name}, WireMode={serializeOptions.WireMode}, BaseOptions={serializeOptions.ReferenceHandling}/{serializeOptions.UseStringInterning}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void SerializeWithRuntime_DeserializeWithSGen_LargeAndDeepData_MultipleOptions_RoundTrip()
|
||||
{
|
||||
foreach (var dataSet in GetTargetDataSets())
|
||||
{
|
||||
foreach (var optionFactory in GetOptionFactories())
|
||||
{
|
||||
var serializeOptions = optionFactory();
|
||||
serializeOptions.UseGeneratedCode = false;
|
||||
|
||||
var deserializeOptions = optionFactory();
|
||||
deserializeOptions.UseGeneratedCode = true;
|
||||
|
||||
var expectedJson = JsonSerializer.Serialize(dataSet.Order, StjOptions);
|
||||
|
||||
var bytes = AcBinarySerializer.Serialize(dataSet.Order, serializeOptions);
|
||||
var roundTrip = AcBinaryDeserializer.Deserialize<TestOrder>(bytes, deserializeOptions);
|
||||
var actualJson = JsonSerializer.Serialize(roundTrip, StjOptions);
|
||||
|
||||
Assert.AreEqual(expectedJson, actualJson, $"STJ mismatch. Dataset={dataSet.Name}, WireMode={serializeOptions.WireMode}, BaseOptions={serializeOptions.ReferenceHandling}/{serializeOptions.UseStringInterning}");
|
||||
|
||||
AssertOrderEquivalent(dataSet.Order, roundTrip, $"Dataset={dataSet.Name}, WireMode={serializeOptions.WireMode}, BaseOptions={serializeOptions.ReferenceHandling}/{serializeOptions.UseStringInterning}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static IEnumerable<TestDataSet> GetTargetDataSets()
|
||||
{
|
||||
return BenchmarkTestDataProvider
|
||||
.CreateTestDataSets()
|
||||
.Where(x => x.Name.StartsWith("Large") || x.Name.StartsWith("Deep"));
|
||||
}
|
||||
|
||||
private static IEnumerable<Func<AcBinarySerializerOptions>> GetOptionFactories()
|
||||
{
|
||||
yield return static () =>
|
||||
{
|
||||
var options = AcBinarySerializerOptions.FastMode;
|
||||
options.WireMode = WireMode.Compact;
|
||||
return options;
|
||||
};
|
||||
|
||||
yield return static () =>
|
||||
{
|
||||
var options = AcBinarySerializerOptions.FastMode;
|
||||
options.WireMode = WireMode.Fast;
|
||||
return options;
|
||||
};
|
||||
|
||||
yield return static () =>
|
||||
{
|
||||
var options = AcBinarySerializerOptions.Default;
|
||||
options.WireMode = WireMode.Compact;
|
||||
return options;
|
||||
};
|
||||
}
|
||||
|
||||
private static void AssertOrderEquivalent(TestOrder expected, TestOrder? actual, string context)
|
||||
{
|
||||
Assert.IsNotNull(actual, context);
|
||||
Assert.AreEqual(expected.Id, actual.Id, context);
|
||||
Assert.AreEqual(expected.OrderNumber, actual.OrderNumber, context);
|
||||
Assert.AreEqual(expected.Status, actual.Status, context);
|
||||
Assert.AreEqual(expected.Items.Count, actual.Items.Count, context);
|
||||
|
||||
for (var itemIndex = 0; itemIndex < expected.Items.Count; itemIndex++)
|
||||
{
|
||||
var expectedItem = expected.Items[itemIndex];
|
||||
var actualItem = actual.Items[itemIndex];
|
||||
|
||||
Assert.AreEqual(expectedItem.Id, actualItem.Id, context);
|
||||
Assert.AreEqual(expectedItem.ProductName, actualItem.ProductName, context);
|
||||
Assert.AreEqual(expectedItem.Status, actualItem.Status, context);
|
||||
Assert.AreEqual(expectedItem.Pallets.Count, actualItem.Pallets.Count, context);
|
||||
|
||||
for (var palletIndex = 0; palletIndex < expectedItem.Pallets.Count; palletIndex++)
|
||||
{
|
||||
var expectedPallet = expectedItem.Pallets[palletIndex];
|
||||
var actualPallet = actualItem.Pallets[palletIndex];
|
||||
|
||||
Assert.AreEqual(expectedPallet.Id, actualPallet.Id, context);
|
||||
Assert.AreEqual(expectedPallet.PalletCode, actualPallet.PalletCode, context);
|
||||
Assert.AreEqual(expectedPallet.Measurements.Count, actualPallet.Measurements.Count, context);
|
||||
|
||||
for (var measurementIndex = 0; measurementIndex < expectedPallet.Measurements.Count; measurementIndex++)
|
||||
{
|
||||
var expectedMeasurement = expectedPallet.Measurements[measurementIndex];
|
||||
var actualMeasurement = actualPallet.Measurements[measurementIndex];
|
||||
|
||||
Assert.AreEqual(expectedMeasurement.Id, actualMeasurement.Id, context);
|
||||
Assert.AreEqual(expectedMeasurement.Name, actualMeasurement.Name, context);
|
||||
Assert.AreEqual(expectedMeasurement.Points.Count, actualMeasurement.Points.Count, context);
|
||||
|
||||
for (var pointIndex = 0; pointIndex < expectedMeasurement.Points.Count; pointIndex++)
|
||||
{
|
||||
var expectedPoint = expectedMeasurement.Points[pointIndex];
|
||||
var actualPoint = actualMeasurement.Points[pointIndex];
|
||||
|
||||
Assert.AreEqual(expectedPoint.Id, actualPoint.Id, context);
|
||||
Assert.AreEqual(expectedPoint.Label, actualPoint.Label, context);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,11 +1,6 @@
|
|||
using System;
|
||||
using System.Buffers;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Runtime.Intrinsics;
|
||||
using System.Runtime.Intrinsics.X86;
|
||||
using System.Text;
|
||||
|
||||
namespace AyCode.Core.Serializers.Binaries;
|
||||
|
|
@ -386,7 +381,7 @@ public static partial class AcBinaryDeserializer
|
|||
{
|
||||
if (length == 0)
|
||||
{
|
||||
return Array.Empty<byte>();
|
||||
return [];
|
||||
}
|
||||
|
||||
EnsureAvailable(length);
|
||||
|
|
@ -575,7 +570,7 @@ public static partial class AcBinaryDeserializer
|
|||
|
||||
if (_stringCache!.TryGetValue(hash, out var cached))
|
||||
{
|
||||
if (cached.Length == length && VerifyAsciiUtf8Match(cached, slice))
|
||||
if (cached.Length == length && Ascii.Equals(slice, cached))
|
||||
{
|
||||
_position += length;
|
||||
return cached;
|
||||
|
|
@ -588,12 +583,6 @@ public static partial class AcBinaryDeserializer
|
|||
return value;
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private static bool VerifyAsciiUtf8Match(string cached, ReadOnlySpan<byte> utf8Bytes)
|
||||
{
|
||||
return Ascii.Equals(utf8Bytes, cached);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Full-content hash for string caching.
|
||||
/// CRITICAL: DO NOT SIMPLIFY <20> prevents hash collisions for similar property names.
|
||||
|
|
|
|||
|
|
@ -1194,9 +1194,24 @@ public static partial class AcBinaryDeserializer
|
|||
var packed = context.ReadUInt64Unsafe();
|
||||
var charLength = (int)(uint)packed;
|
||||
var byteLength = (int)(uint)(packed >> 32);
|
||||
// Single bitwise-OR + sign-test catches negative casts from corrupted-wire uint values
|
||||
// (when the wire-side uint > Int32.MaxValue, the (int)(uint) cast yields a negative int).
|
||||
// Predict-friendly: always false on a valid wire.
|
||||
if ((charLength | byteLength) < 0) ThrowCorruptedBigWire(charLength, byteLength);
|
||||
return context.ReadStringUtf8WithCharLen(charLength, byteLength);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Throw helper for the corrupted-wire guard in <see cref="ReadStringBig{TInput}"/>. <c>NoInlining</c>
|
||||
/// keeps the hot-path reader compact — the JIT/AOT lifts the throw-site out of the inlined caller body.
|
||||
/// </summary>
|
||||
[MethodImpl(MethodImplOptions.NoInlining)]
|
||||
private static void ThrowCorruptedBigWire(int charLength, int byteLength) =>
|
||||
throw new AcBinaryDeserializationException(
|
||||
$"Wire format corruption: StringBig header has out-of-range length values (charLength={charLength}, byteLength={byteLength}). " +
|
||||
$"This indicates a corrupted or maliciously-crafted payload — uint wire values larger than Int32.MaxValue produce negative ints when cast.",
|
||||
-1);
|
||||
|
||||
/// <summary>
|
||||
/// Reads a long ASCII string payload (after the <c>StringAscii</c> marker has been consumed).
|
||||
/// Wire format: <c>[VarUInt byteCount][ASCII bytes]</c>. Byte→char widen, no UTF-8 decode.
|
||||
|
|
|
|||
|
|
@ -1,15 +1,8 @@
|
|||
using System;
|
||||
using System.Buffers;
|
||||
using System.Buffers.Binary;
|
||||
using System.Collections.Concurrent;
|
||||
using System.Collections.Generic;
|
||||
using System.Numerics;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Runtime.Intrinsics;
|
||||
using System.Runtime.Intrinsics.X86;
|
||||
using System.Text;
|
||||
using System.Threading;
|
||||
using static AyCode.Core.Helpers.JsonUtilities;
|
||||
|
||||
namespace AyCode.Core.Serializers.Binaries;
|
||||
|
|
@ -63,8 +56,6 @@ public static partial class AcBinarySerializer
|
|||
: SerializationContextBase<BinarySerializeTypeMetadata, AcBinarySerializerOptions>, IDisposable
|
||||
where TOutput : struct, IBinaryOutputBase
|
||||
{
|
||||
private static readonly Encoding Utf8NoBom = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false);
|
||||
|
||||
private const int PropertyIndexBufferMaxCache = 512;
|
||||
private const int PropertyStateBufferMaxCache = 512;
|
||||
|
||||
|
|
@ -746,15 +737,19 @@ public static partial class AcBinarySerializer
|
|||
/// </remarks>
|
||||
public void WriteStringWithDispatch(string value)
|
||||
{
|
||||
var charLength = value.Length;
|
||||
// Single overflow guard: catches charLength > MaxStringCharLength where charLength*4 would wrap.
|
||||
// Predict-friendly (always false on realistic input). NoInlining throw helper keeps the hot path tight.
|
||||
if ((uint)charLength > BinaryTypeCode.MaxStringCharLength) ThrowStringTooLong(charLength);
|
||||
|
||||
if (FastWire)
|
||||
{
|
||||
// FastWire: [StringSmall marker][VarUInt charCount][UTF-16 raw bytes]
|
||||
// Marker value 91 is mode-shared (Compact StringSmall vs FastWire string marker);
|
||||
// reader dispatches by deserializer mode, NOT by re-interpreting the marker.
|
||||
WriteByte(BinaryTypeCode.StringSmall);
|
||||
var charLenF = value.Length;
|
||||
var byteLenF = charLenF * 2;
|
||||
WriteVarUInt((uint)charLenF);
|
||||
var byteLenF = charLength * 2; // safe: charLength ≤ 0x1FFFFFFF guarantees no overflow
|
||||
WriteVarUInt((uint)charLength);
|
||||
EnsureCapacity(byteLenF);
|
||||
MemoryMarshal.AsBytes(value.AsSpan()).CopyTo(_buffer.AsSpan(_position, byteLenF));
|
||||
_position += byteLenF;
|
||||
|
|
@ -777,7 +772,7 @@ public static partial class AcBinarySerializer
|
|||
//
|
||||
// ASCII override (bytesWritten == charLength) emits FixStrAscii / StringAscii with their own
|
||||
// compact headers (1 byte / 1+VarUInt) — body shifted left from the encode position.
|
||||
var charLength = value.Length;
|
||||
// (charLength already validated at method entry — charLength * 4 cannot overflow here.)
|
||||
var maxBytes = charLength * 4;
|
||||
|
||||
int reserveHeader;
|
||||
|
|
@ -796,8 +791,8 @@ public static partial class AcBinarySerializer
|
|||
// ASCII override — FixStrAscii (≤31) or StringAscii (>31) with compact header
|
||||
if (bytesWritten <= BinaryTypeCode.FixStrAsciiMaxLength)
|
||||
{
|
||||
var shift = reserveHeader - 1;
|
||||
_buffer.AsSpan(encodeStart, bytesWritten).CopyTo(_buffer.AsSpan(savedPos + 1, bytesWritten));
|
||||
|
||||
_buffer[savedPos] = BinaryTypeCode.EncodeFixStrAscii(bytesWritten);
|
||||
_position = savedPos + 1 + bytesWritten;
|
||||
}
|
||||
|
|
@ -806,10 +801,12 @@ public static partial class AcBinarySerializer
|
|||
var actualVarUIntSize = VarUIntSize((uint)bytesWritten);
|
||||
var asciiHeader = 1 + actualVarUIntSize;
|
||||
var shift = reserveHeader - asciiHeader;
|
||||
if (shift > 0)
|
||||
_buffer.AsSpan(encodeStart, bytesWritten).CopyTo(_buffer.AsSpan(encodeStart - shift, bytesWritten));
|
||||
|
||||
if (shift > 0) _buffer.AsSpan(encodeStart, bytesWritten).CopyTo(_buffer.AsSpan(encodeStart - shift, bytesWritten));
|
||||
|
||||
_buffer[savedPos] = BinaryTypeCode.StringAscii;
|
||||
_position = savedPos + 1;
|
||||
|
||||
WriteVarUIntUnsafe((uint)bytesWritten);
|
||||
_position += bytesWritten;
|
||||
}
|
||||
|
|
@ -836,8 +833,7 @@ public static partial class AcBinarySerializer
|
|||
}
|
||||
|
||||
var shift = reserveHeader - actualHeader;
|
||||
if (shift > 0)
|
||||
_buffer.AsSpan(encodeStart, bytesWritten).CopyTo(_buffer.AsSpan(encodeStart - shift, bytesWritten));
|
||||
if (shift > 0) _buffer.AsSpan(encodeStart, bytesWritten).CopyTo(_buffer.AsSpan(encodeStart - shift, bytesWritten));
|
||||
|
||||
_buffer[savedPos] = tierMarker;
|
||||
switch (actualHeader)
|
||||
|
|
@ -892,6 +888,10 @@ public static partial class AcBinarySerializer
|
|||
// emits Small tier (3 byte) when bytesWritten ≤ 255, instead of Medium (5 byte). Big tier
|
||||
// never engages — MaxStringInternLength byte-typed (max 255 char × 4 byte = 1020 byte fits in Medium).
|
||||
var charLength = value.Length;
|
||||
// Overflow guard (defensive — interning length is byte-typed so this should never trigger,
|
||||
// but stays consistent with WriteStringWithDispatch and protects against future refactors).
|
||||
if ((uint)charLength > BinaryTypeCode.MaxStringCharLength) ThrowStringTooLong(charLength);
|
||||
|
||||
var maxBytes = charLength * 4;
|
||||
var cacheIdxSize = VarUIntSize((uint)cacheMapIndex);
|
||||
|
||||
|
|
@ -915,6 +915,7 @@ public static partial class AcBinarySerializer
|
|||
// Write [marker][cacheIdx VarUInt][charLen + utf8Len header][bytes]
|
||||
_buffer[savedPos] = tierMarker;
|
||||
_position = savedPos + 1;
|
||||
|
||||
WriteVarUIntUnsafe((uint)cacheMapIndex);
|
||||
|
||||
if (actualHeader == 3)
|
||||
|
|
@ -942,6 +943,18 @@ public static partial class AcBinarySerializer
|
|||
// The hot-path string writes go through WriteStringWithDispatch (M3R7 + H2Q6 marker dispatch).
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
|
||||
/// <summary>
|
||||
/// Throw helper for the overflow guard in <see cref="WriteStringWithDispatch"/> and
|
||||
/// <see cref="WriteStringInternFirstWithDispatch"/>. Marked <c>NoInlining</c> so the hot path
|
||||
/// stays compact — the JIT/AOT keeps the throw-site out of the inlined caller body.
|
||||
/// </summary>
|
||||
[MethodImpl(MethodImplOptions.NoInlining)]
|
||||
private static void ThrowStringTooLong(int charLength) =>
|
||||
throw new InvalidOperationException(
|
||||
$"String too long for binary serialization: {charLength} chars exceeds {BinaryTypeCode.MaxStringCharLength}. " +
|
||||
$"This limit is dictated by the writer's worst-case 'charLength * 4' UTF-8 byte allocation; " +
|
||||
$"larger inputs would silently overflow int arithmetic.");
|
||||
|
||||
#endregion
|
||||
|
||||
#region Bulk Array Writes — inline
|
||||
|
|
|
|||
|
|
@ -117,11 +117,12 @@ internal static class BinaryTypeCode
|
|||
public const byte StringInternFirstSmall = SlotCount + 40; // 104 — Interning tier 1: [marker:1][cacheIdx:VarUInt][charLen:8][utf8Len:8][bytes]
|
||||
public const byte StringInternFirstMedium = SlotCount + 41; // 105 — Interning tier 2: [marker:1][cacheIdx:VarUInt][charLen:16][utf8Len:16][bytes]
|
||||
|
||||
// RESERVED (27 values: 106..134) — strategic future-feature reservation per BINARY_TODO.md V4N3 marker address space plan:
|
||||
// RESERVED (29 values: 106..134) — strategic future-feature reservation per BINARY_TODO.md H2Q6 marker address space plan:
|
||||
// 106..121 (16 values): ACCORE-BIN-T-L9Y3 — FixArray short-list count in marker (count 0-15)
|
||||
// 122..126 (5 values): ACCORE-BIN-T-S5L8 — sentinel-length encoding tiers
|
||||
// 127..130 (4 values): ACCORE-BIN-T-S2X9 — markerless schema lane opt-in
|
||||
// 131..134 (4 values): general reserve
|
||||
// 131 (1 value): ACCORE-BIN-T-F3W6 — dedicated FastWire string marker (split mode-shared StringSmall)
|
||||
// 132..134 (3 values): general reserve
|
||||
//
|
||||
// Readers MUST throw "unknown marker" on any value in 106..134 until the corresponding feature
|
||||
// activates within the v3 wire format envelope (no further wire-format break needed).
|
||||
|
|
@ -166,6 +167,16 @@ internal static class BinaryTypeCode
|
|||
public const byte Int32Tiny = 192; // -16 to 47 stored in single byte (value = code - 192 - 16)
|
||||
public const byte Int32TinyMax = 255; // Upper bound for tiny int (192 + 64 - 1 = 255)
|
||||
|
||||
/// <summary>
|
||||
/// Largest <c>string.Length</c> value safe for binary serialization — <c>charLength * 4</c> (UTF-8
|
||||
/// worst-case byte count) MUST fit in <c>int</c>. Above this, the writer's <c>maxBytes = charLength * 4</c>
|
||||
/// computation overflows: at exactly <c>0x40000000</c> chars it wraps to 0 (silent zero-overflow → wire
|
||||
/// emits empty string with the original charLength claim, silent data loss). The single
|
||||
/// <c>(uint)charLength > MaxStringCharLength</c> guard catches the overflow band cheaply
|
||||
/// (one unsigned compare on the writer hot path, predict-friendly — always false on realistic input).
|
||||
/// </summary>
|
||||
public const int MaxStringCharLength = 0x1FFFFFFF; // 536_870_911 — largest charLength where charLength * 4 fits in int
|
||||
|
||||
/// <summary>
|
||||
/// Check if type code represents a reference (string or object).
|
||||
/// </summary>
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue