diff --git a/AyCode.Core.Tests/Serialization/AcBinarySerializerSGenRuntimeCompatibilityTests.cs b/AyCode.Core.Tests/Serialization/AcBinarySerializerSGenRuntimeCompatibilityTests.cs new file mode 100644 index 0000000..685bd03 --- /dev/null +++ b/AyCode.Core.Tests/Serialization/AcBinarySerializerSGenRuntimeCompatibilityTests.cs @@ -0,0 +1,148 @@ +using System.Text.Json; +using System.Text.Json.Serialization; +using AyCode.Core.Serializers; +using AyCode.Core.Serializers.Binaries; +using AyCode.Core.Tests.TestModels; + +namespace AyCode.Core.Tests.Serialization; + +[TestClass] +public class AcBinarySerializerSGenRuntimeCompatibilityTests +{ + private static readonly JsonSerializerOptions StjOptions = new() + { + ReferenceHandler = ReferenceHandler.IgnoreCycles + }; + + [TestMethod] + public void SerializeWithSGen_DeserializeWithRuntime_LargeAndDeepData_MultipleOptions_RoundTrip() + { + foreach (var dataSet in GetTargetDataSets()) + { + foreach (var optionFactory in GetOptionFactories()) + { + var serializeOptions = optionFactory(); + serializeOptions.UseGeneratedCode = true; + + var deserializeOptions = optionFactory(); + deserializeOptions.UseGeneratedCode = false; + + var expectedJson = JsonSerializer.Serialize(dataSet.Order, StjOptions); + + var bytes = AcBinarySerializer.Serialize(dataSet.Order, serializeOptions); + var roundTrip = AcBinaryDeserializer.Deserialize(bytes, deserializeOptions); + var actualJson = JsonSerializer.Serialize(roundTrip, StjOptions); + + Assert.AreEqual(expectedJson, actualJson, $"STJ mismatch. Dataset={dataSet.Name}, WireMode={serializeOptions.WireMode}, BaseOptions={serializeOptions.ReferenceHandling}/{serializeOptions.UseStringInterning}"); + + AssertOrderEquivalent(dataSet.Order, roundTrip, $"Dataset={dataSet.Name}, WireMode={serializeOptions.WireMode}, BaseOptions={serializeOptions.ReferenceHandling}/{serializeOptions.UseStringInterning}"); + } + } + } + + [TestMethod] + public void SerializeWithRuntime_DeserializeWithSGen_LargeAndDeepData_MultipleOptions_RoundTrip() + { + foreach (var dataSet in GetTargetDataSets()) + { + foreach (var optionFactory in GetOptionFactories()) + { + var serializeOptions = optionFactory(); + serializeOptions.UseGeneratedCode = false; + + var deserializeOptions = optionFactory(); + deserializeOptions.UseGeneratedCode = true; + + var expectedJson = JsonSerializer.Serialize(dataSet.Order, StjOptions); + + var bytes = AcBinarySerializer.Serialize(dataSet.Order, serializeOptions); + var roundTrip = AcBinaryDeserializer.Deserialize(bytes, deserializeOptions); + var actualJson = JsonSerializer.Serialize(roundTrip, StjOptions); + + Assert.AreEqual(expectedJson, actualJson, $"STJ mismatch. Dataset={dataSet.Name}, WireMode={serializeOptions.WireMode}, BaseOptions={serializeOptions.ReferenceHandling}/{serializeOptions.UseStringInterning}"); + + AssertOrderEquivalent(dataSet.Order, roundTrip, $"Dataset={dataSet.Name}, WireMode={serializeOptions.WireMode}, BaseOptions={serializeOptions.ReferenceHandling}/{serializeOptions.UseStringInterning}"); + } + } + } + + private static IEnumerable GetTargetDataSets() + { + return BenchmarkTestDataProvider + .CreateTestDataSets() + .Where(x => x.Name.StartsWith("Large") || x.Name.StartsWith("Deep")); + } + + private static IEnumerable> GetOptionFactories() + { + yield return static () => + { + var options = AcBinarySerializerOptions.FastMode; + options.WireMode = WireMode.Compact; + return options; + }; + + yield return static () => + { + var options = AcBinarySerializerOptions.FastMode; + options.WireMode = WireMode.Fast; + return options; + }; + + yield return static () => + { + var options = AcBinarySerializerOptions.Default; + options.WireMode = WireMode.Compact; + return options; + }; + } + + private static void AssertOrderEquivalent(TestOrder expected, TestOrder? actual, string context) + { + Assert.IsNotNull(actual, context); + Assert.AreEqual(expected.Id, actual.Id, context); + Assert.AreEqual(expected.OrderNumber, actual.OrderNumber, context); + Assert.AreEqual(expected.Status, actual.Status, context); + Assert.AreEqual(expected.Items.Count, actual.Items.Count, context); + + for (var itemIndex = 0; itemIndex < expected.Items.Count; itemIndex++) + { + var expectedItem = expected.Items[itemIndex]; + var actualItem = actual.Items[itemIndex]; + + Assert.AreEqual(expectedItem.Id, actualItem.Id, context); + Assert.AreEqual(expectedItem.ProductName, actualItem.ProductName, context); + Assert.AreEqual(expectedItem.Status, actualItem.Status, context); + Assert.AreEqual(expectedItem.Pallets.Count, actualItem.Pallets.Count, context); + + for (var palletIndex = 0; palletIndex < expectedItem.Pallets.Count; palletIndex++) + { + var expectedPallet = expectedItem.Pallets[palletIndex]; + var actualPallet = actualItem.Pallets[palletIndex]; + + Assert.AreEqual(expectedPallet.Id, actualPallet.Id, context); + Assert.AreEqual(expectedPallet.PalletCode, actualPallet.PalletCode, context); + Assert.AreEqual(expectedPallet.Measurements.Count, actualPallet.Measurements.Count, context); + + for (var measurementIndex = 0; measurementIndex < expectedPallet.Measurements.Count; measurementIndex++) + { + var expectedMeasurement = expectedPallet.Measurements[measurementIndex]; + var actualMeasurement = actualPallet.Measurements[measurementIndex]; + + Assert.AreEqual(expectedMeasurement.Id, actualMeasurement.Id, context); + Assert.AreEqual(expectedMeasurement.Name, actualMeasurement.Name, context); + Assert.AreEqual(expectedMeasurement.Points.Count, actualMeasurement.Points.Count, context); + + for (var pointIndex = 0; pointIndex < expectedMeasurement.Points.Count; pointIndex++) + { + var expectedPoint = expectedMeasurement.Points[pointIndex]; + var actualPoint = actualMeasurement.Points[pointIndex]; + + Assert.AreEqual(expectedPoint.Id, actualPoint.Id, context); + Assert.AreEqual(expectedPoint.Label, actualPoint.Label, context); + } + } + } + } + } +} diff --git a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.Read.cs b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.Read.cs index fab35ec..9921025 100644 --- a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.Read.cs +++ b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.Read.cs @@ -1,11 +1,6 @@ -using System; -using System.Buffers; -using System.Collections.Generic; using System.Diagnostics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; using System.Text; namespace AyCode.Core.Serializers.Binaries; @@ -386,7 +381,7 @@ public static partial class AcBinaryDeserializer { if (length == 0) { - return Array.Empty(); + return []; } EnsureAvailable(length); @@ -575,7 +570,7 @@ public static partial class AcBinaryDeserializer if (_stringCache!.TryGetValue(hash, out var cached)) { - if (cached.Length == length && VerifyAsciiUtf8Match(cached, slice)) + if (cached.Length == length && Ascii.Equals(slice, cached)) { _position += length; return cached; @@ -588,12 +583,6 @@ public static partial class AcBinaryDeserializer return value; } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static bool VerifyAsciiUtf8Match(string cached, ReadOnlySpan utf8Bytes) - { - return Ascii.Equals(utf8Bytes, cached); - } - /// /// Full-content hash for string caching. /// CRITICAL: DO NOT SIMPLIFY � prevents hash collisions for similar property names. diff --git a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs index ae76e81..5490bff 100644 --- a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs +++ b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs @@ -1194,9 +1194,24 @@ public static partial class AcBinaryDeserializer var packed = context.ReadUInt64Unsafe(); var charLength = (int)(uint)packed; var byteLength = (int)(uint)(packed >> 32); + // Single bitwise-OR + sign-test catches negative casts from corrupted-wire uint values + // (when the wire-side uint > Int32.MaxValue, the (int)(uint) cast yields a negative int). + // Predict-friendly: always false on a valid wire. + if ((charLength | byteLength) < 0) ThrowCorruptedBigWire(charLength, byteLength); return context.ReadStringUtf8WithCharLen(charLength, byteLength); } + /// + /// Throw helper for the corrupted-wire guard in . NoInlining + /// keeps the hot-path reader compact — the JIT/AOT lifts the throw-site out of the inlined caller body. + /// + [MethodImpl(MethodImplOptions.NoInlining)] + private static void ThrowCorruptedBigWire(int charLength, int byteLength) => + throw new AcBinaryDeserializationException( + $"Wire format corruption: StringBig header has out-of-range length values (charLength={charLength}, byteLength={byteLength}). " + + $"This indicates a corrupted or maliciously-crafted payload — uint wire values larger than Int32.MaxValue produce negative ints when cast.", + -1); + /// /// Reads a long ASCII string payload (after the StringAscii marker has been consumed). /// Wire format: [VarUInt byteCount][ASCII bytes]. Byte→char widen, no UTF-8 decode. diff --git a/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs b/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs index c6c868f..ab5cc5a 100644 --- a/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs +++ b/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs @@ -1,15 +1,8 @@ -using System; using System.Buffers; -using System.Buffers.Binary; using System.Collections.Concurrent; -using System.Collections.Generic; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; -using System.Text; -using System.Threading; using static AyCode.Core.Helpers.JsonUtilities; namespace AyCode.Core.Serializers.Binaries; @@ -63,8 +56,6 @@ public static partial class AcBinarySerializer : SerializationContextBase, IDisposable where TOutput : struct, IBinaryOutputBase { - private static readonly Encoding Utf8NoBom = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false); - private const int PropertyIndexBufferMaxCache = 512; private const int PropertyStateBufferMaxCache = 512; @@ -746,15 +737,19 @@ public static partial class AcBinarySerializer /// public void WriteStringWithDispatch(string value) { + var charLength = value.Length; + // Single overflow guard: catches charLength > MaxStringCharLength where charLength*4 would wrap. + // Predict-friendly (always false on realistic input). NoInlining throw helper keeps the hot path tight. + if ((uint)charLength > BinaryTypeCode.MaxStringCharLength) ThrowStringTooLong(charLength); + if (FastWire) { // FastWire: [StringSmall marker][VarUInt charCount][UTF-16 raw bytes] // Marker value 91 is mode-shared (Compact StringSmall vs FastWire string marker); // reader dispatches by deserializer mode, NOT by re-interpreting the marker. WriteByte(BinaryTypeCode.StringSmall); - var charLenF = value.Length; - var byteLenF = charLenF * 2; - WriteVarUInt((uint)charLenF); + var byteLenF = charLength * 2; // safe: charLength ≤ 0x1FFFFFFF guarantees no overflow + WriteVarUInt((uint)charLength); EnsureCapacity(byteLenF); MemoryMarshal.AsBytes(value.AsSpan()).CopyTo(_buffer.AsSpan(_position, byteLenF)); _position += byteLenF; @@ -777,7 +772,7 @@ public static partial class AcBinarySerializer // // ASCII override (bytesWritten == charLength) emits FixStrAscii / StringAscii with their own // compact headers (1 byte / 1+VarUInt) — body shifted left from the encode position. - var charLength = value.Length; + // (charLength already validated at method entry — charLength * 4 cannot overflow here.) var maxBytes = charLength * 4; int reserveHeader; @@ -796,8 +791,8 @@ public static partial class AcBinarySerializer // ASCII override — FixStrAscii (≤31) or StringAscii (>31) with compact header if (bytesWritten <= BinaryTypeCode.FixStrAsciiMaxLength) { - var shift = reserveHeader - 1; _buffer.AsSpan(encodeStart, bytesWritten).CopyTo(_buffer.AsSpan(savedPos + 1, bytesWritten)); + _buffer[savedPos] = BinaryTypeCode.EncodeFixStrAscii(bytesWritten); _position = savedPos + 1 + bytesWritten; } @@ -806,10 +801,12 @@ public static partial class AcBinarySerializer var actualVarUIntSize = VarUIntSize((uint)bytesWritten); var asciiHeader = 1 + actualVarUIntSize; var shift = reserveHeader - asciiHeader; - if (shift > 0) - _buffer.AsSpan(encodeStart, bytesWritten).CopyTo(_buffer.AsSpan(encodeStart - shift, bytesWritten)); + + if (shift > 0) _buffer.AsSpan(encodeStart, bytesWritten).CopyTo(_buffer.AsSpan(encodeStart - shift, bytesWritten)); + _buffer[savedPos] = BinaryTypeCode.StringAscii; _position = savedPos + 1; + WriteVarUIntUnsafe((uint)bytesWritten); _position += bytesWritten; } @@ -836,8 +833,7 @@ public static partial class AcBinarySerializer } var shift = reserveHeader - actualHeader; - if (shift > 0) - _buffer.AsSpan(encodeStart, bytesWritten).CopyTo(_buffer.AsSpan(encodeStart - shift, bytesWritten)); + if (shift > 0) _buffer.AsSpan(encodeStart, bytesWritten).CopyTo(_buffer.AsSpan(encodeStart - shift, bytesWritten)); _buffer[savedPos] = tierMarker; switch (actualHeader) @@ -892,6 +888,10 @@ public static partial class AcBinarySerializer // emits Small tier (3 byte) when bytesWritten ≤ 255, instead of Medium (5 byte). Big tier // never engages — MaxStringInternLength byte-typed (max 255 char × 4 byte = 1020 byte fits in Medium). var charLength = value.Length; + // Overflow guard (defensive — interning length is byte-typed so this should never trigger, + // but stays consistent with WriteStringWithDispatch and protects against future refactors). + if ((uint)charLength > BinaryTypeCode.MaxStringCharLength) ThrowStringTooLong(charLength); + var maxBytes = charLength * 4; var cacheIdxSize = VarUIntSize((uint)cacheMapIndex); @@ -915,6 +915,7 @@ public static partial class AcBinarySerializer // Write [marker][cacheIdx VarUInt][charLen + utf8Len header][bytes] _buffer[savedPos] = tierMarker; _position = savedPos + 1; + WriteVarUIntUnsafe((uint)cacheMapIndex); if (actualHeader == 3) @@ -942,6 +943,18 @@ public static partial class AcBinarySerializer // The hot-path string writes go through WriteStringWithDispatch (M3R7 + H2Q6 marker dispatch). // ───────────────────────────────────────────────────────────────── + /// + /// Throw helper for the overflow guard in and + /// . Marked NoInlining so the hot path + /// stays compact — the JIT/AOT keeps the throw-site out of the inlined caller body. + /// + [MethodImpl(MethodImplOptions.NoInlining)] + private static void ThrowStringTooLong(int charLength) => + throw new InvalidOperationException( + $"String too long for binary serialization: {charLength} chars exceeds {BinaryTypeCode.MaxStringCharLength}. " + + $"This limit is dictated by the writer's worst-case 'charLength * 4' UTF-8 byte allocation; " + + $"larger inputs would silently overflow int arithmetic."); + #endregion #region Bulk Array Writes — inline diff --git a/AyCode.Core/Serializers/Binaries/BinaryTypeCode.cs b/AyCode.Core/Serializers/Binaries/BinaryTypeCode.cs index d63d908..acfe249 100644 --- a/AyCode.Core/Serializers/Binaries/BinaryTypeCode.cs +++ b/AyCode.Core/Serializers/Binaries/BinaryTypeCode.cs @@ -117,11 +117,12 @@ internal static class BinaryTypeCode public const byte StringInternFirstSmall = SlotCount + 40; // 104 — Interning tier 1: [marker:1][cacheIdx:VarUInt][charLen:8][utf8Len:8][bytes] public const byte StringInternFirstMedium = SlotCount + 41; // 105 — Interning tier 2: [marker:1][cacheIdx:VarUInt][charLen:16][utf8Len:16][bytes] - // RESERVED (27 values: 106..134) — strategic future-feature reservation per BINARY_TODO.md V4N3 marker address space plan: + // RESERVED (29 values: 106..134) — strategic future-feature reservation per BINARY_TODO.md H2Q6 marker address space plan: // 106..121 (16 values): ACCORE-BIN-T-L9Y3 — FixArray short-list count in marker (count 0-15) // 122..126 (5 values): ACCORE-BIN-T-S5L8 — sentinel-length encoding tiers // 127..130 (4 values): ACCORE-BIN-T-S2X9 — markerless schema lane opt-in - // 131..134 (4 values): general reserve + // 131 (1 value): ACCORE-BIN-T-F3W6 — dedicated FastWire string marker (split mode-shared StringSmall) + // 132..134 (3 values): general reserve // // Readers MUST throw "unknown marker" on any value in 106..134 until the corresponding feature // activates within the v3 wire format envelope (no further wire-format break needed). @@ -166,6 +167,16 @@ internal static class BinaryTypeCode public const byte Int32Tiny = 192; // -16 to 47 stored in single byte (value = code - 192 - 16) public const byte Int32TinyMax = 255; // Upper bound for tiny int (192 + 64 - 1 = 255) + /// + /// Largest string.Length value safe for binary serialization — charLength * 4 (UTF-8 + /// worst-case byte count) MUST fit in int. Above this, the writer's maxBytes = charLength * 4 + /// computation overflows: at exactly 0x40000000 chars it wraps to 0 (silent zero-overflow → wire + /// emits empty string with the original charLength claim, silent data loss). The single + /// (uint)charLength > MaxStringCharLength guard catches the overflow band cheaply + /// (one unsigned compare on the writer hot path, predict-friendly — always false on realistic input). + /// + public const int MaxStringCharLength = 0x1FFFFFFF; // 536_870_911 — largest charLength where charLength * 4 fits in int + /// /// Check if type code represents a reference (string or object). /// diff --git a/AyCode.Core/docs/BINARY/BINARY_TODO.md b/AyCode.Core/docs/BINARY/BINARY_TODO.md index 6c0a621..be08689 100644 --- a/AyCode.Core/docs/BINARY/BINARY_TODO.md +++ b/AyCode.Core/docs/BINARY/BINARY_TODO.md @@ -833,7 +833,7 @@ while (s < src.Length) - Phase 1+2 (AVX-512BW + Vector128 in `CountUtf8Chars` + `EncodeUtf8SinglePass` Phase 1) **landed 2026-05-05** — covered by existing round-trip tests, no regression on non-AVX-512 hosts (validated on AVX2-host bench) ## ACCORE-BIN-T-H2Q6: Fixed-width dual-length string header (Small/Medium/Big) for 1-pass decode -**Priority:** P1 · **Type:** Wire-format + Performance · **Related:** `DecodeUtf8SinglePass`, `CountUtf8Chars`, `WriteStringWithDispatch`, `ReadStringUtf8` +**Priority:** P1 · **Type:** Wire-format + Performance · **Status:** Closed (2026-05-06) · **Related:** `DecodeUtf8SinglePass`, `CountUtf8Chars`, `WriteStringWithDispatch`, `ReadStringUtf8` Current Compact string decode uses two-pass flow for non-ASCII payloads (`CountUtf8Chars` + `DecodeUtf8SinglePass`). Planned direction: remove VarUInt-based string-length path for the new string wire variant, and carry both lengths in a fixed-width header so deserialize can allocate target `string` immediately and decode in a single pass. @@ -905,7 +905,8 @@ The marker reorg frees **34 marker values** (32 `FixStr` non-ASCII + `String` + | `FixArrayBase..FixArrayMax` | 16 | `ACCORE-BIN-T-L9Y3` (FixArray short-list count in marker) | reserved, future | | Sentinel-length string tier markers | ~5 | `ACCORE-BIN-T-S5L8` (sentinel-length encoding) | reserved, future | | Markerless schema lane | ~4 | `ACCORE-BIN-T-S2X9` (markerless schema lane opt-in) | reserved, future | -| General reserve | 4-8 | unallocated | tartalék | +| `StringFastWire` | 1 | `ACCORE-BIN-T-F3W6` (dedicated FastWire string marker) | reserved, future | +| General reserve | 3 | unallocated | tartalék | **Wire-format version bump**: v2 → v3 at H2Q6 landing. The reserved-but-unimplemented marker values are documented but not yet decoded — readers throw `unknown marker` if wire contains them. Future activation of `FixArray` / sentinel-length / markerless schema lane within the **same v3 wire format** is non-breaking for already-deployed v3 consumers (they reject unknown markers cleanly; producers opt in to emit them). @@ -918,6 +919,32 @@ The marker reorg frees **34 marker values** (32 `FixStr` non-ASCII + `String` + - Existing round-trip tests pass, plus new boundary tests for tier transitions (utf8Len = 254/255/256/65534/65535/65536) and interning tier transitions - Benchmark report includes before/after for Compact mode on non-ASCII dataset (Ser/Deser/RT + Size) vs the `2026-05-06_13-10-30.LLM` baseline +### Resolution + +Landed 2026-05-06. End-to-end implementation: marker reorg + writer tier-dispatch + reader tier-readers + SGen template + skip path + interning path. Five new markers (`StringSmall`/`Medium`/`Big`/`InternFirstSmall`/`InternFirstMedium`) replacing the old `String`/`StringInternFirst`/`FixStrBase..Max` (32 + 1 + 1 = 34 marker values freed, 5 used; 29 reserved for future features per the address-space plan). Wire format version bumped v2 → v3. + +Follow-up A-direction header pack-write/read optimization landed in the same window: `Unsafe.WriteUnaligned` (Small) / `` (Medium) / `` (Big) replace 2× byte / 2× ushort / 2× uint stores; reader uses single `uint`/`ulong` loads with bit-extract. Direct `ref byte` writes (no Span-shape overhead). + +**Tests:** 222 pass / 13 pre-existing GuidIId failures (unchanged). 55/55 Utf8TranscoderTests pass. + +**Benchmark vs `2026-05-06_13-10-30.LLM` baseline (`2026-05-07_08-55-49.LLM`):** +- Compact-vs-MemPack Deser ratio improvement on baseline gap: **-14 to -28 percentage points** across cells +- Deser: **4/5 cells now faster than MemPack** (Small -6%, Medium -3%, Large -9%, Deep -7%); Repeated cell remaining +5% gap (V4N2 Phase 3 SIMD multi-byte transcoder targets this) +- Wire size: **5/5 cells smaller than MemPack** (-8% to -11%) +- Ser: 1/5 win (Large -9%), 1/5 tie (Medium 0%), 3/5 minor lag (+2-7% Small/Repeated/Deep) — host-noise band + +**Critical algorithmic correctness lesson** (from V4N3 follow-up `GetUtf8ByteCount`): the initial 4-popcount formula assumed `lowSur == highSur` per chunk. Fix: 5-popcount closed-form. Caught by surrogate-pair-split-across-chunk regression tests. Documented in Utf8Transcoder. + +**Marker address space (post-H2Q6, v3 wire):** +- 91 → StringSmall (was String) +- 94 → StringMedium (was StringInternFirst) +- 103 → StringBig +- 104 → StringInternFirstSmall +- 105 → StringInternFirstMedium +- 106..134 reserved (29 values: 16 for `L9Y3` FixArray, 5 for `S5L8` sentinel-length, 4 for `S2X9` markerless schema lane, 1 for `F3W6` FastWire dedicated marker, 3 reserve) + +**Related follow-up TODO entries (now Open):** `O7G2` (overflow guard), `S6F2` (shift-mentes Small fast path), `W2C8` (WASM string-cache H2Q6 maximalizálás). + ## ACCORE-BIN-T-S5L8: Sentinel-length encoding for strings (wire-size optimization, both modes) **Priority:** P3 · **Type:** Wire-format optimization · **Related:** `AcBinarySerializer.WriteString`, `AcBinaryDeserializer.ReadValue` string dispatch @@ -1238,7 +1265,7 @@ Hypothesis: NativeAOT (the benchmark target environment) does not match Tier 1 J - Disasm + bench correlation step before any code change (no speculative refactoring) ## ACCORE-BIN-T-V4N5: Dead-code review — `WriteFixStrDirect` + `WriteStringUtf8Internal` -**Priority:** P3 · **Type:** Refactor / hygiene · **Related:** `BinarySerializationContext.WriteFixStrDirect` (line 832), `WriteStringUtf8Internal` (line 875) +**Priority:** P3 · **Type:** Refactor / hygiene · **Status:** Closed (2026-05-06) · **Related:** `BinarySerializationContext.cs` V4N3 audit surfaced two methods with no callers in the entire workspace: @@ -1271,6 +1298,18 @@ The pair forms a closed dead loop (`WriteFixStrDirect` → `WriteStringUtf8Inter - Pre-NuGet release housekeeping pass - Or: any future refactor that touches `BinarySerializationContext` string-write methods (then decide rather than leave the dead pair behind) +### Resolution + +Disposition: **Delete (Option 1)**. Landed 2026-05-06 together with the H2Q6 marker reorg commit. Five dead methods removed in a single cleanup pass: + +- `WriteFixStrDirect(string)` — uncalled public method +- `WriteStringUtf8Internal(string)` — uncalled private method (only called from `WriteFixStrDirect`) +- `WriteFixStr(string)` — uncalled public method (audit surfaced; was originally listed as live) +- `WriteFixStrBytes(ReadOnlySpan)` — uncalled public method (audit surfaced) +- `WritePreencodedPropertyName(ReadOnlySpan)` — uncalled public method (audit surfaced) + +All five had zero call sites across core, SourceGenerator template, tests, and reflection. The hot-path string write continues through `WriteStringWithDispatch` (M3R7 + H2Q6 marker dispatch) and `WriteStringInternFirstWithDispatch` (interning tier dispatch). Public surface reduced; binary test suite unchanged (222 pass / 13 pre-existing GuidIId failures). + ## ACCORE-BIN-T-L9Y3: FixArray marker tier — short-list count encoded in marker **Priority:** P3 · **Type:** Wire-format optimization · **Status:** Open · **Related:** `Array` (66) marker, `VarUInt itemCount`, `ACCORE-BIN-T-H2Q6` marker reservation @@ -1334,3 +1373,204 @@ Activation steps when implementing: - **`FixDict` analog** — same pattern for `Dictionary` marker (67) with `kvCount` 0-15. Worth considering only if a benchmark workload demonstrates dictionary-heavy structures; the current bench data (Order DTOs) does not. **Defer until evidence.** - **`FixArray 0-31`** — wider count range (32 markers). Marginal additional saving (16-31 elem list-ek ritkák); would consume nearly all freed marker space, leaving no slack for `S5L8`/`S2X9`. **Reject unless evidence warrants.** +## ACCORE-BIN-T-O7G2: Overflow guard on `charLength * 4` writer arithmetic + corrupted-wire `ReadStringBig` +**Priority:** P3 · **Type:** Defensive / safety · **Status:** Closed (2026-05-06) · **Related:** `WriteStringWithDispatch`, `WriteStringInternFirstWithDispatch`, `ReadStringBig`, `BinaryTypeCode.MaxStringCharLength` + +Defensive guards covering two latent failure modes in the H2Q6 string serialization paths: + +**Writer overflow (silent zero corruption)** — `charLength * 4` overflows `int` when `charLength > 0x1FFFFFFF` (~537M). At exactly `0x40000000` chars the multiplication wraps to **0**, causing: +- `EnsureCapacity(reserveHeader + 0)` to silently succeed (no buffer growth) +- `EncodeUtf8SinglePass(value, emptySpan)` to write 0 bytes, returning `bytesWritten = 0` +- The H2Q6 tier choice picks Small (`bytesWritten ≤ 255`), writing `[StringSmall][0][0]` to the wire +- **The string content is lost silently — no exception, wire claims an empty string** + +Other overflow values (e.g. `charLength = 600M` → `maxBytes` becomes negative) eventually surface as `ArgumentOutOfRangeException` from `Span.AsSpan(start, length)`, but the message ("length cannot be negative") is misleading and arrives after the buffer has already been partially mutated. + +**Reader corrupted wire (negative cast from oversized uint)** — in `ReadStringBig`, the wire-side `charLen:32` and `utf8Len:32` are read as `uint`, then cast to `int`. Corrupted or maliciously-crafted payloads with values > `Int32.MaxValue` produce negative ints, leading to `string.Create(negative, ...)` exceptions or position-state desync — at best a misleading message, at worst a partial decode with wire-position shifted incorrectly. + +### Resolution + +Landed 2026-05-06 (this commit window). + +**Writer side** — `WriteStringWithDispatch` and `WriteStringInternFirstWithDispatch` each gain one method-entry guard: + +```csharp +var charLength = value.Length; +if ((uint)charLength > BinaryTypeCode.MaxStringCharLength) ThrowStringTooLong(charLength); +``` + +A single unsigned compare catches the overflow band; predict-friendly (always false on realistic input). The throw helper is `[MethodImpl(MethodImplOptions.NoInlining)]` so the JIT/AOT keeps the throw site out of the inlined hot path. The same `charLength` value is reused across the FastWire and Compact branches — no duplicate guard. + +**Reader side** — `ReadStringBig` gains a single bitwise-OR + sign-test: + +```csharp +var packed = context.ReadUInt64Unsafe(); +var charLength = (int)(uint)packed; +var byteLength = (int)(uint)(packed >> 32); +if ((charLength | byteLength) < 0) ThrowCorruptedBigWire(charLength, byteLength); +``` + +The OR + sign-test catches negative casts (any wire-side uint > `Int32.MaxValue` produces a negative int after cast; OR of two positives is positive, sign-test cheap). One instruction effective; predict-friendly. + +**New constant**: `BinaryTypeCode.MaxStringCharLength = 0x1FFFFFFF` (536_870_911 — largest charLength where `charLength * 4` fits in int). + +**Hot-path cost**: ~0% on realistic input — single unsigned compare on the writer, single OR + sign-test on the reader Big tier (Small/Medium readers untouched since their wire values are bounded by `byte` / `ushort` types and cannot overflow). Throw helpers `NoInlining` keep the inlined caller body compact. Tests 222 pass / 13 pre-existing failures unchanged. + +### Why P3 + +- No correctness impact for realistic inputs (the overflow band is far outside any real DTO scenario) +- Defensive value: prevents silent data loss in the `charLength = 1.07G` zero-overflow edge case + provides clear error messages on out-of-range inputs +- Security value: corrupted/malicious wire payloads on the reader Big tier path are now caught early instead of producing inconsistent position state +- NuGet release professional-quality signal — explicit, defensive guards over silent-corruption paths + +## ACCORE-BIN-T-S6F2: Shift-mentes Small fast path in `WriteStringWithDispatch` +**Priority:** P3 · **Type:** Performance · **Related:** `WriteStringWithDispatch`, `BinaryTypeCode.StringSmall` + +The H2Q6 writer's post-encode tier choice runs a 3-way switch (`bytesWritten ≤ 255 → StringSmall`, `≤ 65535 → StringMedium`, `else StringBig`) and a header-write switch (3 / 5 / 9 byte) for every non-ASCII string. On the Repeated benchmark cell (Magyar content, ~10-15 char strings dominant) **99%+ of writes resolve to StringSmall** — the 3-way switch decision is statistically determinate from `charLength ≤ 63` alone (worst-case `charLength * 4 ≤ 252 ≤ 255` ⇒ Small tier guaranteed). + +A specialized fast path for `charLength ≤ 63` could eliminate: +- The `int actualHeader; byte tierMarker;` runtime-resolved variables +- The 3-way `bytesWritten` switch +- The 3-way `actualHeader` header-write switch +- The `shift = reserveHeader - actualHeader` compute (always 0 in this branch) + +Sketch: +```csharp +if (charLength <= 63) +{ + EnsureCapacity(3 + charLength * 4); + var savedPos = _position; + var encodeStart = savedPos + 3; + var bytesWritten = Utf8Transcoder.EncodeUtf8SinglePass(value.AsSpan(), _buffer.AsSpan(encodeStart, charLength * 4)); + if (bytesWritten == charLength) { /* ASCII override — FixStrAscii inline */ } + else + { + // StringSmall — 0 shift, inline header write (constant-folded) + _buffer[savedPos] = BinaryTypeCode.StringSmall; + Unsafe.WriteUnaligned(ref _buffer[savedPos + 1], + (ushort)(charLength | (bytesWritten << 8))); + _position = savedPos + 3 + bytesWritten; + } + return; +} +// charLength > 63 → fall through to existing post-encode tier dispatch +``` + +### Why P3 + +- Repeated cell hot path benefit (~99% of writes on Magyar content are charLength ≤ 63) +- Estimated +1-3% Ser improvement on Repeated/Medium cells (where short non-ASCII strings dominate) +- Constant-folded tier choice + inline header write — no branch overhead vs. the generic post-encode path +- Trade-off: ~30 lines of duplicated specialized code; the generic post-encode path remains for charLength > 63 long-string scenarios + +### Acceptance + +- `WriteStringWithDispatch` Small fast path emits identical wire bytes as the generic path for `charLength ≤ 63` (round-trip parity) +- Benchmark on Repeated/Medium cells shows ≥ 1% Ser improvement vs. post-A-direction baseline (`2026-05-07_09-39-09.LLM` or later) +- No regression on Large/Deep cells (long-string path untouched) +- Round-trip tests pass on the boundary `charLength = 63` and `charLength = 64` cases + +### Trigger + +- After A-direction (header pack-write) bench result is conclusive +- Pre-NuGet release if the Repeated cell Compact-vs-MemPack Ser ratio still has measurable headroom + +## ACCORE-BIN-T-W2C8: WASM string-cache H2Q6 maximalizálás (`ReadStringUtf8Cached` MISS path) +**Priority:** P2 (WASM target) / P3 (otherwise) · **Type:** Performance · **Related:** `BinaryDeserializationContext.Read.cs::ReadStringUtf8Cached`, `ReadStringUtf8WithCharLen`, `Utf8Transcoder.DecodeUtf8SinglePass` + +H2Q6's primary win is **1-pass decode** on the reader side: tier markers carry both `charLen` and `utf8Len`, so the reader allocates the target string with the known char count and decodes in a single pass via `string.Create(charLength, ..., DecodeUtf8SinglePass)`. This eliminates the `CountUtf8Chars` Pass 1 — the headline V4N3/H2Q6 win. + +**The WASM string-cache path bypasses this win.** When `_useStringCaching` is true (Blazor WASM target), `ReadStringUtf8WithCharLen` dispatches to `ReadStringUtf8Cached(byteLength)` for short strings. On cache HIT, the cached instance is returned (zero decode — already optimal). **On cache MISS, the current `ReadStringUtf8Cached` falls back to `Utf8NoBom.GetString(slice)` — the BCL kétpasszos UTF-8 decoder.** The H2Q6 1-pass decode benefit is lost on every cache MISS. + +Per-cell impact estimate on a WASM workload with hot-path strings (typical Blazor SignalR DTO traffic): +- Cache HIT rate ~30-50% on repeated property names + tags + categories +- Cache MISS rate ~50-70% on first occurrences + unique values +- MISS path = `Utf8NoBom.GetString` BCL call (virtual dispatch + EncoderFallback overhead) instead of `string.Create(charLength, ..., DecodeUtf8SinglePass)` + +### Implementation outline + +`ReadStringUtf8Cached` accepts both `charLength` and `byteLength` (or just compute charLength from the cache check / decode result). Cache HIT: `cached.Length == charLength` invariant check (UTF-16 char count, not UTF-8 byte count) + ASCII verification. Cache MISS: replace `Utf8NoBom.GetString(slice)` with `string.Create(charLength, (Buffer, Pos, Len), static (chars, state) => DecodeUtf8SinglePass(state.Buffer.AsSpan(state.Pos, state.Len), chars))`. + +Cross-check: the existing `ComputeStringHashFull(slice)` and `VerifyAsciiUtf8Match(cached, slice)` operate on the raw UTF-8 bytes — these stay unchanged. Only the MISS-side string materialization needs the H2Q6-aware refactor. + +### Why P2 (WASM-target) / P3 (otherwise) + +- The non-WASM benchmark host (x64) doesn't enable `_useStringCaching` by default, so this optimization is invisible on the current bench +- On Blazor WASM, all interning + repeated-string-cached deserialization currently pays the BCL decode tax on cache MISS +- Estimated +5-15% Deser improvement on WASM workloads with significant cache MISS rate +- Direct extension of the H2Q6 win to the WASM execution profile + +### Acceptance + +- `ReadStringUtf8Cached` cache MISS path uses `string.Create(charLength, ..., DecodeUtf8SinglePass)` — no BCL `Utf8NoBom.GetString` on MISS +- Round-trip tests pass on cached + uncached short-string scenarios across all UTF-8 content classes (ASCII / Hungarian / CJK / emoji) +- WASM-target benchmark (Blazor profile) shows ≥ 5% Deser improvement vs. pre-W2C8 state on a representative hot-string-heavy DTO workload +- Cache HIT path performance unchanged (already optimal — no decode) +- Cache eviction / capacity behavior unchanged + +### Trigger + +- Pre-NuGet release if Blazor WASM is a primary supported scenario in the release narrative +- Or: when a WASM-fókuszú benchmark workload becomes the active perf measurement target + +## ACCORE-BIN-T-F3W6: Dedicated FastWire string marker (split mode-shared `StringSmall`) +**Priority:** P3 · **Type:** Performance · **Related:** `WriteStringWithDispatch` FastWire branch, `ReadStringSmall` FastWire branch, `BinaryTypeCode.StringSmall`, H2Q6 marker reservation + +The H2Q6 marker layout currently shares `StringSmall` (=91) between Compact and FastWire modes: +- **Compact** emits `[91][charLen:8][utf8Len:8][UTF-8 bytes]` +- **FastWire** emits `[91][VarUInt charCount][UTF-16 raw bytes]` + +The reader dispatches on `context.FastWire` inside `ReadStringSmall`. Correct (the deserializer's mode is fixed per operation), but the mode-shared marker forces runtime branching at hot points: + +- **Writer**: `if (FastWire)` at the top of `WriteStringWithDispatch` runs on every string write — runtime check on a path-dominant (Compact) call site +- **Reader**: `if (context.FastWire)` inside `ReadStringSmall` runs on every short non-ASCII string deserialization — Compact-side waste +- **SGen template**: every regenerated reader contains the FastWire-aware `case StringSmall:` block (more code per type, larger AOT binary) +- **JIT/AOT inlining**: the larger `WriteStringWithDispatch` / `ReadStringSmall` method bodies may exceed inline budgets at hot call sites — particularly under NativeAOT + +A dedicated `StringFastWire` marker (one value from the H2Q6-freed 106-134 range — proposed allocation: **131**) splits the path: + +- **Compact** stays on `StringSmall` (=91) → `ReadStringSmall` becomes Compact-only (no `if (FastWire)` branch, smaller method body) +- **FastWire** uses new `StringFastWire` → dedicated `ReadStringFastWire` reader, FastWire-only logic +- Writer's FastWire branch emits `StringFastWire` instead of `StringSmall` + +### Wire format compatibility + +The marker swap is internally consistent within the v3 envelope — producers that opt in to the dedicated FastWire marker emit it; readers expanded to handle both `StringSmall` and `StringFastWire` (transitional). Once all producers emit the dedicated marker, the old mode-shared dispatch in `ReadStringSmall` can be removed. + +### Why P3 — "minden apró % számít" + +- Estimated **+0.5-1% Ser** (writer branch elimination on Compact path) +- Estimated **+0.5-1% Deser** (reader smaller method body, better JIT/AOT inline-eligibility on Compact path; FastWire reader gets a tight dedicated path too) +- Compounds with other micro-opts across the hot path — small percentages add up +- Marker-space cost: **1 reserved value** consumed (general-reserve count drops from 4 to 3 in the H2Q6 reservation table) +- Risk: low — mechanical split; round-trip tested against both wire-format variants + +### Implementation outline + +1. `BinaryTypeCode.StringFastWire = 131` constant + helper updates (`IsString` range check + dispatch) +2. `WriteStringWithDispatch` FastWire branch emits `StringFastWire` (was `StringSmall`) +3. New `ReadStringFastWire` static reader — `[VarUInt charCount][UTF-16 bytes]` decode, no Compact-mode branching +4. `ReadStringSmall` simplified — Compact-only, drops `if (context.FastWire)` branch +5. `TypeReaderTable[StringFastWire]` registration +6. `SkipValue` `case StringFastWire:` — same skip layout as `StringSmall` FastWire branch (charCount VarUInt + 2 × charCount bytes) +7. SGen template `EmitReadString` — new `case StringFastWire:` block (FastWire-only branch); `case StringSmall:` simplified to Compact-only +8. Round-trip tests: separate FastWire and Compact wire format coverage + +### Acceptance + +- Round-trip parity on both Compact and FastWire wire formats (existing tests pass) +- Benchmark on FastWire mode shows ≥ 0.5% improvement vs. mode-shared baseline +- Compact mode shows no regression (likely marginal gain from simpler `ReadStringSmall`) +- AOT-published binary shows reduced generated reader size per `[AcBinarySerializable]` type (one less case-block + branch) +- Marker-space documented: `BinaryTypeCode.cs` reservation comment + H2Q6 entry's reservation table updated to reflect the F3W6 allocation + +### Trigger + +- Pre-NuGet release if every measurable percentage point on the Compact hot path matters for the "fastest" narrative +- Or: when the Compact/FastWire branch profile shows up in a NativeAOT inlining audit (`ACCORE-BIN-T-V4N4`) + +### Roll-back fallback + +If a future marker-space crunch arises (additional H2Q6 tiers, new compression markers, etc.), `F3W6` can be reverted by switching the writer back to emitting `StringSmall` on FastWire and re-introducing the mode-shared dispatch in `ReadStringSmall`. The original design is correctness-equivalent — the dedicated marker is purely an optimization. **If marker gondunk lesz, kivesszük.** + +