From d4e4c4480a9c4872ef5dce86325fe7927a5b9a0a Mon Sep 17 00:00:00 2001 From: Loretta Date: Sun, 24 May 2026 07:39:21 +0200 Subject: [PATCH] SGen null-handling parity, micro-opt CV, doc & bench fixes - Fix SGen collection/dictionary null-handling: always emit PropertySkip for nulls, preventing NREs regardless of nullable annotation. - Add micro-opt CV threshold (1.5%) to benchmark output for finer-grained result flagging; update reporting and context. - Benchmark loop: add inter-sample settle delay, trimmed median, and branchless progress for more reliable measurements. - Add regression tests for SGen null-handling (complex, collection, dictionary; null/non-null; SGen/reflection; FastMode/Default). - Update docs: clarify SGen null-check contract, add AQN binder security plan, and cross-reference related issues. - Misc: code cleanups, improved comments, and minor doc clarifications. --- .claude/settings.local.json | 17 +- AyCode.Benchmark/BdnSummaryAdapter.cs | 3 +- .../Reporting/BenchmarkReportWriter.cs | 33 +- .../Reporting/ReportingContext.cs | 3 +- .../BenchmarkLoop.cs | 62 ++- .../Configuration.cs | 16 + .../AcBinarySourceGenerator.GenWriter.cs | 11 +- ...ySerializerSGenNullComplexPropertyTests.cs | 136 +++++ .../SGenNullComplexPropertyModels.cs | 31 ++ ...rySerializer.BinarySerializationContext.cs | 7 +- .../docs/BINARY/BINARY_AQN_IMPLEMENTATION.md | 495 ++++++++++++++++++ AyCode.Core/docs/BINARY/BINARY_SGEN.md | 13 +- AyCode.Core/docs/BINARY/BINARY_STRICT_SGEN.md | 1 + AyCode.Core/docs/BINARY/BINARY_TODO.md | 22 + .../SIGNALR_BINARY_PROTOCOL_ISSUES.md | 6 +- .../SIGNALR_BINARY_PROTOCOL_TODO.md | 4 +- 16 files changed, 818 insertions(+), 42 deletions(-) create mode 100644 AyCode.Core/docs/BINARY/BINARY_AQN_IMPLEMENTATION.md diff --git a/.claude/settings.local.json b/.claude/settings.local.json index d12a13f..0fd2bd7 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -122,7 +122,22 @@ "Bash(echo \"EXIT=$?\")", "Bash(awk -F: '$1>8289')", "Bash(DOTNET_TieredCompilation=0 dotnet run --project AyCode.Core.Serializers.Console/AyCode.Core.Serializers.Console.csproj -c Release -- FastestByte)", - "Bash(DOTNET_TieredCompilation=0 dotnet run --project AyCode.Core.Serializers.Console/AyCode.Core.Serializers.Console.csproj -c Release -- FastestByte AsciiShort)" + "Bash(DOTNET_TieredCompilation=0 dotnet run --project AyCode.Core.Serializers.Console/AyCode.Core.Serializers.Console.csproj -c Release -- FastestByte AsciiShort)", + "Bash(DOTNET_TieredCompilation=0 DOTNET_JitDisasm='*BinarySerializationContext*WriteByte*' dotnet run -c Release --project AyCode.Benchmark/AyCode.Benchmark.csproj -- --jitasm)", + "Bash(echo \"exit=$?\")", + "Bash(DOTNET_TieredCompilation=0 DOTNET_JitDisasm='*TestOrder_All_False_GeneratedWriter*' dotnet run -c Release --project AyCode.Benchmark/AyCode.Benchmark.csproj -- --jitasm > /tmp/jitasm_gen.txt 2>&1; echo \"exit=$?\"; wc -l /tmp/jitasm_gen.txt; grep -c -E '^G_M|; Assembly' /tmp/jitasm_gen.txt)", + "Bash(DOTNET_TieredCompilation=0 DOTNET_JitDisasm='*WriteByte*' AyCode.Benchmark/bin/FruitBank/Release/net10.0/AyCode.Benchmark.exe --jitasm)", + "WebFetch(domain:blog.ndepend.com)", + "WebFetch(domain:devblogs.microsoft.com)", + "PowerShell(\"deleted\")", + "Bash(awk 'NR<=36812 && /^; Assembly listing for method/ {match_line=NR; match_text=$0} END {print \"Line \" match_line \":\"; print match_text}' jitasm_tier1.txt)", + "Bash(awk 'NR<=36812 && /^; Total bytes of code/ {prev=$0; prev_line=NR} NR>=36812 && /^; Total bytes of code/ {if \\(!found\\) {print \"Line \" NR \": \" $0; found=1}}' jitasm_tier1.txt)", + "Bash(sed -n '36760,36815p' jitasm_tier1.txt)", + "Bash(awk '/^; Assembly listing for method/ {current=$0} /call.*BufferAt/ {print NR\": \"current}' jitasm_tier1.txt)", + "Bash(awk '/^; Assembly listing for method AyCode.Core.Tests.TestModels.TestOrder_All_False_GeneratedWriter:WriteProperties/{found=1; next} found && /^; Assembly listing for method/{exit} found && /call/{print}' jitasm_tier1.txt)", + "Bash(awk '/^; Assembly listing for method AyCode.Core.Tests.TestModels.TestOrder_All_False_GeneratedWriter:WriteProperties/{found=1} found && /^; Total bytes of code/{print; exit}' jitasm_tier1.txt)", + "Bash(sed -n '50108,58000p' jitasm_tier1.txt)", + "Bash(awk '/^; Assembly listing for method AyCode.Core.Tests.TestModels.TestOrder_All_False_GeneratedWriter:WriteProperties/{found=1; next} found && /^; Assembly listing for method/{exit} found && /mov[[:space:]]+byte[[:space:]]+ptr/{print}' jitasm_tier1.txt)" ] } } diff --git a/AyCode.Benchmark/BdnSummaryAdapter.cs b/AyCode.Benchmark/BdnSummaryAdapter.cs index 9b21f28..34c003f 100644 --- a/AyCode.Benchmark/BdnSummaryAdapter.cs +++ b/AyCode.Benchmark/BdnSummaryAdapter.cs @@ -75,7 +75,8 @@ public static class BdnSummaryAdapter WarmupIterations: 0, BenchmarkSamples: 0, TargetSampleMs: 0, - UnstableCVThreshold: 0.03); + UnstableCVThreshold: 0.03, + MicroOptCVThreshold: 0.015); } /// diff --git a/AyCode.Benchmark/Reporting/BenchmarkReportWriter.cs b/AyCode.Benchmark/Reporting/BenchmarkReportWriter.cs index ae17afd..5ac598d 100644 --- a/AyCode.Benchmark/Reporting/BenchmarkReportWriter.cs +++ b/AyCode.Benchmark/Reporting/BenchmarkReportWriter.cs @@ -102,7 +102,7 @@ public static class BenchmarkReportWriter /// All time inputs are total-batch milliseconds; is the per-row iter /// count (post-adaptive-calibration). /// - public static string FormatMicrosWithRange(double medianMs, double minMs, double maxMs, double stdDevMs, int iterations, CultureInfo inv, double unstableCvThreshold) + public static string FormatMicrosWithRange(double medianMs, double minMs, double maxMs, double stdDevMs, int iterations, CultureInfo inv, double unstableCvThreshold, double microOptCvThreshold = 0.0) { var med = ToPerOpMicros(medianMs, iterations); // No range data (single-sample fast path) — surface as bare median, identical to the prior format. @@ -113,16 +113,20 @@ public static class BenchmarkReportWriter var max = ToPerOpMicros(maxMs, iterations); var range = $"{med.ToString("F2", inv)} ({min.ToString("F2", inv)}..{max.ToString("F2", inv)})"; - // CV (coefficient of variation = stddev / mean) — flag rows above the unstable threshold so a - // small inter-engine delta on a high-CV row is easy to discount as noise. + // CV (coefficient of variation = stddev / mean) — two-band flagging: + // ⚠️ X.X% : above the unstable threshold (e.g. 3%) — sub-threshold inter-engine + // deltas on this row are essentially noise; entirely dismissable. + // ⚠️micro X.X% : above the micro-opt threshold (e.g. 1.5%) but below unstable — not + // noise but sub-2% deltas are at the edge of reliability; cross-check + // with re-run or BDN before declaring a regression / improvement. + // microOptCvThreshold = 0 disables the soft-flag band (backward-compat for callers that + // only want the original unstable-only behaviour). if (medianMs > 0 && stdDevMs > 0) { var cv = stdDevMs / medianMs; - if (cv > unstableCvThreshold) - { - var cvPct = (cv * 100).ToString("F1", inv); - return $"{range} ⚠️{cvPct}%"; - } + var cvPct = (cv * 100).ToString("F1", inv); + if (cv > unstableCvThreshold) return $"{range} ⚠️{cvPct}%"; + if (microOptCvThreshold > 0 && cv > microOptCvThreshold) return $"{range} ⚠️micro {cvPct}%"; } return range; @@ -606,7 +610,12 @@ public static class BenchmarkReportWriter var runStatsHeader = ctx.SourceTag == "Bdn" ? "Iterations: BDN-managed | Warmup: BDN-managed | Samples: BDN-managed" : $"Iterations: per-cell adaptive (target ~{ctx.TargetSampleMs} ms/sample) | Warmup: {ctx.WarmupIterations} per phase (Ser/Des isolated) | Samples: {ctx.BenchmarkSamples} (median) + 1 pilot discarded"; - sb.AppendLine($"Charset: {ctx.CharsetName} | {runStatsHeader} | .NET: {Environment.Version} | UnstableCV threshold: {ctx.UnstableCVThreshold * 100:F0}%"); + // F1 formatter without an explicit IFormatProvider would pick the current culture (e.g. "3,0%" + // in Hungarian locale) — break parsability of the .LLM. Force InvariantCulture so the header + // matches the row values which already go through CultureInfo.InvariantCulture in FormatMicrosWithRange. + var unstablePct = (ctx.UnstableCVThreshold * 100).ToString("F1", CultureInfo.InvariantCulture); + var microPct = (ctx.MicroOptCVThreshold * 100).ToString("F1", CultureInfo.InvariantCulture); + sb.AppendLine($"Charset: {ctx.CharsetName} | {runStatsHeader} | .NET: {Environment.Version} | UnstableCV: {unstablePct}% | MicroOptCV: {microPct}%"); sb.AppendLine("Baseline: MemoryPack (Byte[]) (SOTA reference) | Verified: round-trip correctness checked once per cell before warmup"); // Options summary. Bracketed [OrderType] surfaces the TestOrder variant each preset serialised — @@ -643,11 +652,11 @@ public static class BenchmarkReportWriter foreach (var r in testResults) { - var ser = r.SerializeTimeMs > 0 ? FormatMicrosWithRange(r.SerializeTimeMs, r.SerializeTimeMinMs, r.SerializeTimeMaxMs, r.SerializeTimeStdDevMs, r.SerializeIterations, inv, ctx.UnstableCVThreshold) : "-"; - var des = r.DeserializeTimeMs > 0 ? FormatMicrosWithRange(r.DeserializeTimeMs, r.DeserializeTimeMinMs, r.DeserializeTimeMaxMs, r.DeserializeTimeStdDevMs, r.DeserializeIterations, inv, ctx.UnstableCVThreshold) : "-"; + var ser = r.SerializeTimeMs > 0 ? FormatMicrosWithRange(r.SerializeTimeMs, r.SerializeTimeMinMs, r.SerializeTimeMaxMs, r.SerializeTimeStdDevMs, r.SerializeIterations, inv, ctx.UnstableCVThreshold, ctx.MicroOptCVThreshold) : "-"; + var des = r.DeserializeTimeMs > 0 ? FormatMicrosWithRange(r.DeserializeTimeMs, r.DeserializeTimeMinMs, r.DeserializeTimeMaxMs, r.DeserializeTimeStdDevMs, r.DeserializeIterations, inv, ctx.UnstableCVThreshold, ctx.MicroOptCVThreshold) : "-"; var rt = r.RoundTripTimeMs > 0 ? (r.IsRoundTripOnly - ? FormatMicrosWithRange(r.RoundTripTimeMs, r.RoundTripTimeMinMs, r.RoundTripTimeMaxMs, r.RoundTripTimeStdDevMs, r.RoundTripIterations, inv, ctx.UnstableCVThreshold) + ? FormatMicrosWithRange(r.RoundTripTimeMs, r.RoundTripTimeMinMs, r.RoundTripTimeMaxMs, r.RoundTripTimeStdDevMs, r.RoundTripIterations, inv, ctx.UnstableCVThreshold, ctx.MicroOptCVThreshold) : RtPerOp(r).ToString("F2", inv)) : "-"; diff --git a/AyCode.Benchmark/Reporting/ReportingContext.cs b/AyCode.Benchmark/Reporting/ReportingContext.cs index 666130a..245c356 100644 --- a/AyCode.Benchmark/Reporting/ReportingContext.cs +++ b/AyCode.Benchmark/Reporting/ReportingContext.cs @@ -22,7 +22,8 @@ public sealed record ReportingContext( int WarmupIterations, int BenchmarkSamples, int TargetSampleMs, - double UnstableCVThreshold) + double UnstableCVThreshold, + double MicroOptCVThreshold) { /// /// Walks up from the assembly's BaseDirectory to find the repo root (marker: AyCode.Core.sln). diff --git a/AyCode.Core.Serializers.Console/BenchmarkLoop.cs b/AyCode.Core.Serializers.Console/BenchmarkLoop.cs index 0ae7329..b6f37bf 100644 --- a/AyCode.Core.Serializers.Console/BenchmarkLoop.cs +++ b/AyCode.Core.Serializers.Console/BenchmarkLoop.cs @@ -137,7 +137,8 @@ internal static class BenchmarkLoop WarmupIterations: Configuration.WarmupIterations, BenchmarkSamples: Configuration.BenchmarkSamples, TargetSampleMs: Configuration.TargetSampleMs, - UnstableCVThreshold: Configuration.UnstableCVThreshold); + UnstableCVThreshold: Configuration.UnstableCVThreshold, + MicroOptCVThreshold: Configuration.MicroOptCVThreshold); // Print grouped results BenchmarkReportWriter.PrintGroupedResults(allResults, testDataSets); @@ -644,6 +645,13 @@ internal static class BenchmarkLoop GC.WaitForPendingFinalizers(); GC.Collect(); + // Inter-sample thermal-settle: CPU boost-clock can drop mid-batch under sustained load + // (e.g. 10×250ms = 2.5 sec burst). InterSampleSettleMs lets the boost-clock state + // settle so later samples don't read systematically slower than early ones. Skip before + // the first sample (no prior heat to settle from). Set to 0 in Configuration to disable. + if (s > 0 && Configuration.InterSampleSettleMs > 0) + Thread.Sleep(Configuration.InterSampleSettleMs); + var sw = Stopwatch.StartNew(); RunWithProgress(action, iterations, progressLabel, samples + 1, sampleIndex: s + 1); sw.Stop(); @@ -672,8 +680,18 @@ internal static class BenchmarkLoop var stdDevMs = Math.Sqrt(Math.Max(0.0, variance)); Array.Sort(times); - // Median: middle value for odd sample counts, average of two middles for even counts. - var medianMs = samples % 2 == 1 ? times[samples / 2] : (times[samples / 2 - 1] + times[samples / 2]) / 2.0; + + // Trimmed median: when samples >= 4, drop the single min and single max (sorted-array + // first and last) and compute median on the remaining (samples - 2) entries. Removes the + // worst per-sample contamination (a thermal spike, OS preempt, or a GC pause that escaped + // the per-sample GC.Collect settle) without throwing away too much signal. The min/max / + // stdDev outputs still reflect the FULL sample population — the trim affects only the + // headline median figure, so the visible range still shows the actual measurement extremes. + var trimStart = samples >= 4 ? 1 : 0; + var trimCount = samples >= 4 ? samples - 2 : samples; + var medianMs = trimCount % 2 == 1 + ? times[trimStart + trimCount / 2] + : (times[trimStart + trimCount / 2 - 1] + times[trimStart + trimCount / 2]) / 2.0; EndProgress(progressLabel, medianMs); return (medianMs, minMs, maxMs, stdDevMs); @@ -765,27 +783,33 @@ internal static class BenchmarkLoop return; } - // ~10 progress emits per sample run. Avoid emitting on every iter (Console.Write is - // expensive enough to skew sub-µs benchmarks if overdone). + // Batch-based progress emit — ~10 progress prints per sample. The inner loop is branchless + // (no per-iter modulo / progress check), so the per-iter overhead is bare `action()` cost. + // The outer loop drives the batches; progress emit happens once per batch on the boundary. + // This keeps sub-µs ops cleanly measurable — the prior `if ((i + 1) % step == 0)` check + // added a 1-2 cycle per-iter branch that distorted hot loops near the Stopwatch resolution. var step = Math.Max(1, iterations / 10); - for (var i = 0; i < iterations; i++) + var done = 0; + while (done < iterations) { - action(); - if ((i + 1) % step == 0 || i == iterations - 1) - { - var pct = (int)((i + 1) * 100L / iterations); - var line = samples > 1 - ? $" > {label} sample {sampleIndex + 1}/{samples} {pct,3}% ({i + 1}/{iterations})" - : $" > {label} {pct,3}% ({i + 1}/{iterations})"; + var batch = Math.Min(step, iterations - done); - System.Console.Write('\r'); - System.Console.Write(line); + // Inner tight loop: no progress check, no modulo. Just the measured action() calls. + for (var i = 0; i < batch; i++) action(); + done += batch; - if (line.Length < _progressLastLineLen) - System.Console.Write(new string(' ', _progressLastLineLen - line.Length)); + var pct = (int)(done * 100L / iterations); + var line = samples > 1 + ? $" > {label} sample {sampleIndex + 1}/{samples} {pct,3}% ({done}/{iterations})" + : $" > {label} {pct,3}% ({done}/{iterations})"; - _progressLastLineLen = line.Length; - } + System.Console.Write('\r'); + System.Console.Write(line); + + if (line.Length < _progressLastLineLen) + System.Console.Write(new string(' ', _progressLastLineLen - line.Length)); + + _progressLastLineLen = line.Length; } } diff --git a/AyCode.Core.Serializers.Console/Configuration.cs b/AyCode.Core.Serializers.Console/Configuration.cs index 0cf0401..7b7f6f6 100644 --- a/AyCode.Core.Serializers.Console/Configuration.cs +++ b/AyCode.Core.Serializers.Console/Configuration.cs @@ -60,6 +60,22 @@ internal static class Configuration // sub-3% inter-engine deltas. internal const double UnstableCVThreshold = 0.03; + // Lower-bound CV threshold for micro-optimization measurement reliability. Rows with CV in + // the (MicroOptCVThreshold, UnstableCVThreshold] range get a softer "⚠️micro" flag — they are + // not unstable enough to be entirely dismissable, but sub-2% inter-engine deltas observed on + // such a row are at the edge of the noise floor and should be cross-checked (re-run, BDN). + // Use case: micro-opt sprints where a ~1-2% signal lives below the unstable threshold but the + // row's own CV is still high enough to make that signal suspect. + internal const double MicroOptCVThreshold = 0.015; + + // Inter-sample cool-down delay (ms) inserted between recorded samples in the timed loop. + // Mitigates CPU thermal-throttling drift across a sustained burst (e.g. 10×250ms = 2.5 sec): + // without it, boost-clock can drop mid-batch on thermally-constrained hosts (laptops esp.), + // and the later samples in the batch read systematically slower than the early ones. 50ms is + // enough for boost-clock state to settle but cheap in total (~500ms / cell) — quick-bench + // workflow is not meaningfully slower. + internal const int InterSampleSettleMs = 50; + // JIT-tier-promotion drain delay between warmup and measurement. // - JIT mode (RuntimeFeature.IsDynamicCodeCompiled == true): tiered JIT promotes hot methods // in a background thread; we wait briefly for the queue to drain so the first measurement diff --git a/AyCode.Core.Serializers.SourceGenerator/AcBinarySourceGenerator.GenWriter.cs b/AyCode.Core.Serializers.SourceGenerator/AcBinarySourceGenerator.GenWriter.cs index 6f4d859..16ac5ab 100644 --- a/AyCode.Core.Serializers.SourceGenerator/AcBinarySourceGenerator.GenWriter.cs +++ b/AyCode.Core.Serializers.SourceGenerator/AcBinarySourceGenerator.GenWriter.cs @@ -322,13 +322,18 @@ public partial class AcBinarySourceGenerator // Direct collection write for List/T[] with Complex element types that have generated writers if (p.ElementHasGeneratedWriter && p.CollectionKind != null) EmitDirectCollectionWrite(sb, p, a, i); - else if (p.IsNullable) + else { + // Reference type collections (with non-SGen elements, falling onto the + // WriteValueGenerated bridge) can always be null at runtime regardless of nullable + // annotation — runtime can violate the nullable-disabled contract via EF lazy-load, + // projection gaps, missing initializers, etc. Mirrors EmitDirectCollectionWrite + // (line ~877) and EmitDirectObjectWrite (line ~828) defensive null-check. + // Reader-side compat: every markered property is wrapped in `if (tc != PropertySkip)` + // by EmitReadProperty (GenReader.cs line ~137). sb.AppendLine($"{i}if ({a} == null) context.WriteByte(BinaryTypeCode.PropertySkip);"); sb.AppendLine($"{i}else AcBinarySerializer.WriteValueGenerated({a}, typeof({p.TypeNameForTypeof}), context);"); } - else - sb.AppendLine($"{i}AcBinarySerializer.WriteValueGenerated({a}, typeof({p.TypeNameForTypeof}), context);"); break; case PropertyTypeKind.Dictionary: EmitDirectDictionaryWrite(sb, p, a, i); diff --git a/AyCode.Core.Tests/Serialization/AcBinarySerializerSGenNullComplexPropertyTests.cs b/AyCode.Core.Tests/Serialization/AcBinarySerializerSGenNullComplexPropertyTests.cs index c0f5da9..2cdfc31 100644 --- a/AyCode.Core.Tests/Serialization/AcBinarySerializerSGenNullComplexPropertyTests.cs +++ b/AyCode.Core.Tests/Serialization/AcBinarySerializerSGenNullComplexPropertyTests.cs @@ -67,4 +67,140 @@ public class AcBinarySerializerSGenNullComplexPropertyTests Assert.AreEqual(model.Customer.Id, roundTrip.Customer.Id); Assert.AreEqual(model.Customer.Name, roundTrip.Customer.Name); } + + [TestMethod] + [DataRow(true, true)] + [DataRow(true, false)] + [DataRow(false, false)] + [DataRow(false, true)] + public void Serialize_SGenCollectionPropertyNull_DoesNotThrow_AndRoundTripsAsNull(bool useSgen, bool fastMode) + { + var model = new SGenNullCollectionParent + { + Id = 11, + Items = null!, + Note = "regression-collection" + }; + + var options = fastMode ? AcBinarySerializerOptions.FastMode: AcBinarySerializerOptions.Default; + options.UseGeneratedCode = useSgen; + + var bytes = AcBinarySerializer.Serialize(model, options); + var roundTrip = AcBinaryDeserializer.Deserialize(bytes, options); + + Assert.IsNotNull(roundTrip); + Assert.AreEqual(model.Id, roundTrip.Id); + Assert.AreEqual(model.Note, roundTrip.Note); + Assert.IsNull(roundTrip.Items, + "collection property (with non-SGen element type) must round-trip as null when source was null " + + "(regression for SGen Collection fallback WriteValueGenerated else-branch null-check)"); + + Assert.IsTrue(System.Array.IndexOf(bytes, (byte)BinaryTypeCode.PropertySkip) >= 0, + "writer must emit PropertySkip marker on the null Items slot " + + "(confirms the fix took the PropertySkip path, not an unrelated null-safe code path)"); + } + + [TestMethod] + [DataRow(true, true)] + [DataRow(true, false)] + [DataRow(false, false)] + [DataRow(false, true)] + public void Serialize_SGenCollectionPropertyNonNull_RoundTripsCorrectly(bool useSgen, bool fastMode) + { + var model = new SGenNullCollectionParent + { + Id = 17, + Items = new List + { + new() { Id = 1, Name = "first" }, + new() { Id = 2, Name = "second" } + }, + Note = "positive-collection" + }; + + var options = fastMode ? AcBinarySerializerOptions.FastMode: AcBinarySerializerOptions.Default; + options.UseGeneratedCode = useSgen; + + var bytes = AcBinarySerializer.Serialize(model, options); + var roundTrip = AcBinaryDeserializer.Deserialize(bytes, options); + + Assert.IsNotNull(roundTrip); + Assert.AreEqual(model.Id, roundTrip.Id); + Assert.AreEqual(model.Note, roundTrip.Note); + Assert.IsNotNull(roundTrip.Items, + "non-null collection property must round-trip (null-check fix must not break the non-null path)"); + Assert.AreEqual(model.Items.Count, roundTrip.Items.Count); + Assert.AreEqual(model.Items[0].Id, roundTrip.Items[0].Id); + Assert.AreEqual(model.Items[0].Name, roundTrip.Items[0].Name); + Assert.AreEqual(model.Items[1].Id, roundTrip.Items[1].Id); + Assert.AreEqual(model.Items[1].Name, roundTrip.Items[1].Name); + } + + [TestMethod] + [DataRow(true, true)] + [DataRow(true, false)] + [DataRow(false, false)] + [DataRow(false, true)] + public void Serialize_SGenDictionaryPropertyNull_DoesNotThrow_AndRoundTripsAsNull(bool useSgen, bool fastMode) + { + var model = new SGenNullDictionaryParent + { + Id = 23, + Mapping = null!, + Note = "regression-dictionary" + }; + + var options = fastMode ? AcBinarySerializerOptions.FastMode: AcBinarySerializerOptions.Default; + options.UseGeneratedCode = useSgen; + + var bytes = AcBinarySerializer.Serialize(model, options); + var roundTrip = AcBinaryDeserializer.Deserialize(bytes, options); + + Assert.IsNotNull(roundTrip); + Assert.AreEqual(model.Id, roundTrip.Id); + Assert.AreEqual(model.Note, roundTrip.Note); + Assert.IsNull(roundTrip.Mapping, + "dictionary property must round-trip as null when source was null " + + "(pins EmitDirectDictionaryWrite line ~1037 null-check against future regression)"); + + Assert.IsTrue(System.Array.IndexOf(bytes, (byte)BinaryTypeCode.PropertySkip) >= 0, + "writer must emit PropertySkip marker on the null Mapping slot " + + "(confirms the PropertySkip path, not an unrelated null-safe code path)"); + } + + [TestMethod] + [DataRow(true, true)] + [DataRow(true, false)] + [DataRow(false, false)] + [DataRow(false, true)] + public void Serialize_SGenDictionaryPropertyNonNull_RoundTripsCorrectly(bool useSgen, bool fastMode) + { + var model = new SGenNullDictionaryParent + { + Id = 29, + Mapping = new Dictionary + { + ["alpha"] = new() { Id = 1, Name = "first" }, + ["beta"] = new() { Id = 2, Name = "second" } + }, + Note = "positive-dictionary" + }; + + var options = fastMode ? AcBinarySerializerOptions.FastMode: AcBinarySerializerOptions.Default; + options.UseGeneratedCode = useSgen; + + var bytes = AcBinarySerializer.Serialize(model, options); + var roundTrip = AcBinaryDeserializer.Deserialize(bytes, options); + + Assert.IsNotNull(roundTrip); + Assert.AreEqual(model.Id, roundTrip.Id); + Assert.AreEqual(model.Note, roundTrip.Note); + Assert.IsNotNull(roundTrip.Mapping, + "non-null dictionary property must round-trip (null-check pin must not break the non-null path)"); + Assert.AreEqual(model.Mapping.Count, roundTrip.Mapping.Count); + Assert.AreEqual(model.Mapping["alpha"].Id, roundTrip.Mapping["alpha"].Id); + Assert.AreEqual(model.Mapping["alpha"].Name, roundTrip.Mapping["alpha"].Name); + Assert.AreEqual(model.Mapping["beta"].Id, roundTrip.Mapping["beta"].Id); + Assert.AreEqual(model.Mapping["beta"].Name, roundTrip.Mapping["beta"].Name); + } } diff --git a/AyCode.Core.Tests/TestModels/SGenNullComplexPropertyModels.cs b/AyCode.Core.Tests/TestModels/SGenNullComplexPropertyModels.cs index a904e58..cb26378 100644 --- a/AyCode.Core.Tests/TestModels/SGenNullComplexPropertyModels.cs +++ b/AyCode.Core.Tests/TestModels/SGenNullComplexPropertyModels.cs @@ -25,3 +25,34 @@ public class SGenNullComplexParent public NonGeneratedComplexCustomer Customer { get; set; } = null!; public string? Note { get; set; } } + +/// +/// Regression model for SGen Collection-property null handling — the fallback WriteValueGenerated +/// branch in GenWriter.cs PropertyTypeKind.Collection case (~line 321), when the element type has +/// no generated writer (cross-assembly / unattributed). The Items property is non-nullable in +/// signature, but runtime data can still contain null. Serializer must emit PropertySkip instead +/// of forwarding null into the WriteValueGenerated bridge. +/// +[AcBinarySerializable] +public class SGenNullCollectionParent +{ + public int Id { get; set; } + public List Items { get; set; } = null!; + public string? Note { get; set; } +} + +/// +/// Regression model for SGen Dictionary-property null handling — the EmitDirectDictionaryWrite +/// branch in GenWriter.cs (~line 1031). The branch was already null-safe at the time of the +/// N4P8 audit (explicit `if (a == null) PropertySkip` at line ~1037), but no regression test +/// existed to pin the behaviour. The Mapping property is non-nullable in signature, but runtime +/// data can still contain null — same pattern as / +/// . +/// +[AcBinarySerializable] +public class SGenNullDictionaryParent +{ + public int Id { get; set; } + public Dictionary Mapping { get; set; } = null!; + public string? Note { get; set; } +} diff --git a/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs b/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs index 9c408bc..542487a 100644 --- a/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs +++ b/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs @@ -656,9 +656,11 @@ public static partial class AcBinarySerializer if (value < 0x80) { if (_position >= _bufferEnd) GrowOne(); + BufferAt(_position++) = (byte)value; return; } + EnsureCapacity(10); WriteVarULongMultiByteUnsafe(value); } @@ -672,6 +674,7 @@ public static partial class AcBinarySerializer BufferAt(_position++) = (byte)value; return; } + WriteVarULongMultiByteUnsafe(value); } @@ -683,6 +686,7 @@ public static partial class AcBinarySerializer BufferAt(_position++) = (byte)(value | 0x80); value >>= 7; } + BufferAt(_position++) = (byte)value; } @@ -719,6 +723,7 @@ public static partial class AcBinarySerializer { Span bits = stackalloc int[4]; decimal.TryGetBits(value, bits, out _); + MemoryMarshal.AsBytes(bits).CopyTo(_buffer.AsSpan(_position, 16)); _position += 16; } @@ -883,7 +888,7 @@ public static partial class AcBinarySerializer #endif // Overflow guard (O7G2) — predict-friendly (always false on realistic input). NoInlining throw helper. - if ((uint)charLength > BinaryTypeCode.MaxStringCharLength) ThrowStringTooLong(charLength); + //if ((uint)charLength > BinaryTypeCode.MaxStringCharLength) ThrowStringTooLong(charLength); // Tight UTF-8 upper bound for valid UTF-16 input: max 3 bytes per UTF-16 code unit. var maxBytes = charLength * 3; diff --git a/AyCode.Core/docs/BINARY/BINARY_AQN_IMPLEMENTATION.md b/AyCode.Core/docs/BINARY/BINARY_AQN_IMPLEMENTATION.md new file mode 100644 index 0000000..b99e76d --- /dev/null +++ b/AyCode.Core/docs/BINARY/BINARY_AQN_IMPLEMENTATION.md @@ -0,0 +1,495 @@ +# Polymorph & SignalR Type-binding Security — Implementation Plan + +Working notes summarizing the design discussion for replacing wire-supplied +`Type.GetType(AQN)` calls with a registry-gated `IAcTypeBinder` mechanism. Covers +the AcBinary polymorph path (`ObjectWithTypeName` marker) and the +`AyCodeBinaryHubProtocol` `HasType` header. Single binder, two consumers. + +> Related: `BINARY_ISSUES.md` (no entry yet — to be opened on landing), +> `../../../AyCode.Services/docs/SIGNALR_BINARY_PROTOCOL/SIGNALR_BINARY_PROTOCOL_ISSUES.md#accore-sbp-i-r8k3` +> (the originating Critical-severity issue). + +## Motivation + +The wire format currently emits `Type.AssemblyQualifiedName` for polymorph +property values (AcBinary `ObjectWithTypeName` marker) and for the SignalR +data-arg type header (`AyCodeBinaryHubProtocol.WriteHeader` HasType flag). The +deserializer calls `Type.GetType(typeName)` to resolve — wire-supplied input +controls type instantiation, the textbook deserialization-gadget pattern that +caused `BinaryFormatter` deprecation. + +### Threat-model realism + +| Deployment | Realistic risk | +|---|---| +| Authenticated SignalR + TLS + first-party DTOs | Low — attacker must already control wire bytes (= compromised endpoint or TLS-MITM), at which point AQN-injection is the *N*-th problem, not the first. | +| Open hub or unauthenticated endpoint | Higher — any sender can choose AQN, classic gadget attack. | +| NuGet-public-API consumer (unknown deployments) | Unknown — defense-in-depth required as a baseline contract. | + +The current production deployment (authenticated, TLS, first-party graph) sits +firmly in the *low* category. The fix matters anyway because: + +1. **NuGet-public-API context**: `AcBinarySerializer` is planned as a public + NuGet package. Consumer deployments are unknown — security defaults must + handle the worst case. +2. **Industry expectation**: no-controlled-AQN-deserialization is the standard + contract for any serializer post-`BinaryFormatter`. Auditors will flag it + regardless of the current deployment context. +3. **Cost is negligible**: a single dictionary lookup at deserialize-time on a + rare path (polymorph or SignalR HasType). Zero hot-path impact. + +### What this prevents + +- `Type.GetType("System.Diagnostics.Process, ...")` → injection of dangerous + BCL types into the deserialization graph +- `Type.GetType("Attacker.UploadedAssembly.Gadget, ...")` → loading + attacker-deposited assemblies via static-ctor side effects + +### What this does NOT prevent (out of scope — by design) + +- Wire-data value tampering (transport-layer concern: TLS). +- **Structural type-confusion within the allowlist** — closes itself: the SGen + and runtime deserializers do strongly-typed property assignment, so a + wire-supplied subtype that does not satisfy the target's assignability rules + fails with `InvalidCastException` at assign time, regardless of binder + membership. **Semantic** type-confusion (correct base, attacker-preferred + but legitimate subtype with side-effecting setters) is the application's + validation concern, not the serializer's. +- Authenticated insider exploitation of the serialize-graph (auth-layer concern). + +### Auto-feed scope is process-wide + +The implicit `BinaryDeserializeTypeMetadata` ctor-feed produces a **single +process-wide trust set**: the polymorph allowlist becomes the union of every +type that has ever been deserialized in this process, not a per-polymorph-site +filter. For most apps this is the desired behavior (less ceremony, identical +to "what was ever expected on the wire"). + +If a narrower polymorph surface is required (e.g. a specific `object`-typed +property must only ever accept `Dog` / `Cat` and never any other allowed +type), the application disables the implicit feed and seeds the binder only +through explicit `Allow()`. Per-property polymorph-subset enforcement +remains an application-level concern — the binder is a flat allowlist, not a +per-site union-type discriminator. + +## Security tiers — runtime gate to compile-out + +The binder gate is the baseline defense. Two stronger tiers layer on top using +the **same SGen feature-flag mechanism** that already gates the perf features +(interning / ref-handling / metadata) — no new infrastructure. + +| Tier | Mechanism | Where the AQN→Type lookup lives | +|---|---|---| +| **Default** | Binder allowlist gate (auto-feed via deserialize-metadata + explicit `Allow<>()`) | Runtime, in `IAcTypeBinder.TryResolve` | +| **Strict** | Auto-feed disabled, only explicit `Allow<>()` (narrower polymorph surface) | Runtime, in `IAcTypeBinder.TryResolve` (fewer entries) | +| **Paranoid** | `[AcBinarySerializable(enablePolymorphDetectFeature: false)]` on the class — SGen does not emit the polymorph dispatch code at all, AND `ACBIN002` raises a compile error if any `object`-typed property is declared on the type | **Does not exist** — no AQN→Type machinery in the binary | + +The Paranoid tier is built on the **same compile-out mechanism** that powers +the perf-feature flags (`enableIdTrackingFeature`, `enableInternStringFeature`, +`enableRefHandlingFeature` — each strip their respective code paths when set +to `false`). "Pay only for what you use" doubles as "shrink the attack +surface": one flag system, two payoffs. + +Use the Paranoid tier when a class **provably** has no polymorph property and +the developer wants that guarantee at compile time, not just runtime config — +the deserialization code that could ever resolve a wire-supplied AQN simply +does not exist in the assembly for that type. + +### Differentiation vs other binary serializers + +Structural compile-out of polymorph dispatch **is also available** in +MemoryPack (`[MemoryPackUnion]`) and MessagePack-CSharp (`[Union]`) — the +Paranoid tier is not a unique AcBinary capability. What differs is the **shape +of the runtime polymorph support** that the other two tiers (Default / Strict) +provide: + +- **MemoryPack / MessagePack** — rigid compile-time enumeration. The base + class lists every concrete subtype upfront via `[MemoryPackUnion(0, typeof(Cat))]`, + `[MemoryPackUnion(1, typeof(Dog))]`, etc. Adding a new subtype = modifying + the base class's attribute and rebuilding every dependent project. Open + `object`-typed properties are not supported through this mechanism. +- **AcBinary** — runtime allowlist via the binder. `object`-typed properties + are supported, new subtypes can be added by registering them with the + binder (no base-class modification), implicit feed via deserialize-metadata + auto-trusts whatever the application actually deserializes. The cost is + that the security gate is runtime (`binder.TryResolve`), not structurally + absent (as it would be in MemoryPack's tag-on-wire format where no AQN→Type + machinery exists in the first place). + +The future tag-on-wire format (post-W9F1) brings AcBinary's runtime tier +**structurally up to MemoryPack's level** while keeping the open-`object` / +runtime-config / implicit-feed flexibility — that combination is the +differentiator, not compile-out elimination per se. + +## Design — single `IAcTypeBinder`, two layers + +The NuGet `AcBinary` package exposes a small interface. Both the AcBinary +polymorph reader and the `AyCodeBinaryHubProtocol` HasType reader use the same +binder instance — single source of truth. + +### Public NuGet contract + +```csharp +namespace AyCode.Core.Serializers.Binaries; + +public interface IAcTypeBinder +{ + /// Register a type into the trusted set. + void Register(Type type); + + /// + /// Resolve a wire-supplied AQN against the trusted set. Single lookup, no Type.GetType. + /// null or empty AQN returns false (the wire path that emits the marker should never + /// produce an empty string, but the API guards against malformed wire defensively). + /// + bool TryResolve(string? assemblyQualifiedName, out Type type); + + /// Fluent allowlist for types not naturally fed via deserialize-metadata creation. + IAcTypeBinder Allow(); + IAcTypeBinder Allow(Type type); +} + +public sealed class AcTypeBinder : IAcTypeBinder +{ + private readonly ConcurrentDictionary _trusted = new(StringComparer.Ordinal); + + public void Register(Type type) + => _trusted.TryAdd(NormalizeAqn(type.AssemblyQualifiedName!), type); + + public bool TryResolve(string? aqn, out Type type) + { + if (string.IsNullOrEmpty(aqn)) { type = null!; return false; } + return _trusted.TryGetValue(NormalizeAqn(aqn), out type!); + } + + public IAcTypeBinder Allow() { Register(typeof(T)); return this; } + public IAcTypeBinder Allow(Type type) { Register(type); return this; } +} +``` + +### AQN normalization — cross-version rolling deploys + +The binder stores and looks up types under a **normalized AQN** form that strips +`Version=…`, `Culture=…`, `PublicKeyToken=…` segments — both at the outer +assembly reference AND at every embedded assembly reference inside closed +generic type arguments. Result keeps just `"Namespace.TypeName, AssemblyName"` +(recursive for generics). + +```csharp +// Before normalize: +// System.Collections.Generic.List`1[[FruitBank.Common.Dtos.OrderDto, +// FruitBank.Common, Version=1.2.3.0, Culture=neutral, PublicKeyToken=null]], +// System.Private.CoreLib, Version=9.0.0.0, Culture=neutral, PublicKeyToken=7cec85d7bea7798e +// +// After normalize: +// System.Collections.Generic.List`1[[FruitBank.Common.Dtos.OrderDto, +// FruitBank.Common]], System.Private.CoreLib +``` + +**Why:** without normalization, a NuGet-version-bump or framework upgrade +shifts the AQN string → registry lookup miss → polymorph throw on previously +working wires. Strict version-pinned AQN matching is too brittle for rolling +deploys. The normalize form trades cross-version tolerance for the (extremely +narrow) risk that two assemblies with the same simple-name but different +strong-name keys would collide — practically never in first-party DTO graphs. + +Impl note: a single `Regex.Replace(aqn, ", Version=…, Culture=…, PublicKeyToken=…", "")` +with the precompiled regex against the documented AQN grammar handles both the +outer and the nested-generic cases (the comma-separated segments share the +same form regardless of nesting). + +public class AcBinarySerializerOptions +{ + /// + /// Type-binder for resolving wire-supplied AQN strings (polymorph paths). + /// null (default): polymorph wire-content throws. + /// + public IAcTypeBinder? TypeBinder { get; set; } +} +``` + +### Feed sources + +Three additive sources populate the binder, all merging into the same +`ConcurrentDictionary`: + +| Source | Trigger | What it adds | +|---|---|---| +| **Implicit: deserialize-metadata ctor** | First-time `BinaryDeserializeTypeMetadata` build for a type (cache miss) | The type being prepared for deserialization — has gone through developer-intentional deserialize-graph-walking, so it is trusted | +| **Explicit: `binder.Allow()`** | App startup | Types that won't naturally appear via deserialize-metadata (rare polymorph-only branches, 3rd-party non-attributable types) | +| **Framework helper: `SignalRTagBindings.ExtractTypes`** | App startup | Types declared via `[SignalRTagBinding(typeof(T))]` on SignalR tag constants — feeds into the binder via `Allow` | + +#### Closed-generic types on the wire + +An `object`-typed property may carry a closed generic at runtime +(`List`, `Dictionary`, `OrderDto[]`) — the wire +emits the **closed-generic AQN** (the runtime type's AQN, including +type-argument AQNs). Handling: + +- **Implicit feed covers the typical case**: if the app also deserializes + `List` through a non-polymorph (binder-typed) call site, the + closed generic's `BinaryDeserializeTypeMetadata` ctor fires → auto-registers. + Most real graphs hit this path. +- **Explicit `Allow>()`** for closed generics that **only** appear on + the polymorph path (no non-polymorph deserialize-call ever instantiates + them) — register the exact closed form. +- **Future (v1.x): `AllowOpenGeneric(typeof(List<>))`** — one open-generic + declaration covers every closed form. Implementation: decompose wire-AQN + into open-generic + type-argument-AQNs, validate each separately against + the binder, then `Type.MakeGenericType` on validated components. Safer than + raw `Type.GetType(aqn)` because every composable piece is registry-checked + before instantiation. Tracked as a follow-up — not part of the initial fix. + +#### Why deserialize-only feed (NOT serialize) + +The `BinarySerializeTypeMetadata` ctor does **NOT** feed the binder. Reason: +the serializer side never calls `Type.GetType` — the runtime type is known +locally (`obj.GetType()`). There is no security-relevant lookup to gate. + +Asymmetric trust per process role is the correct model: + +- A **server** that only sends `OrderDto` outward (never deserializes one) does + not need `OrderDto` in its trusted-set. If an attacker injects `OrderDto` + AQN on a client→server wire to the same server, deserialize would also need + to build deserialize-metadata for it — which would either succeed (legitimate + graph) or fail (the server never expected to deserialize that type). +- The "what I deserialize" history is the correct trust signal, not "what I + serialize". + +### Polymorph resolve flow + +```csharp +// AcBinaryDeserializer ObjectWithTypeName handler: +internal static Type ResolvePolymorphType(string aqn, AcBinarySerializerOptions options) +{ + if (options.TypeBinder?.TryResolve(aqn, out var t) == true) + return t; + + throw new AcBinaryDeserializationException( + $"Polymorph type '{aqn}' is not in the trusted binder. Either the type must " + + $"pass through deserialize-metadata creation (e.g. be part of a regular deserialize graph) " + + $"or be explicitly added via TypeBinder.Allow().", + position: -1); +} +``` + +**No `Type.GetType(aqn)` call**: AQN is looked up directly against the +pre-registered dictionary. Side-effects of `Type.GetType` (assembly load, +static-ctor execution) are eliminated from the happy path. + +### Two call-sites — same binder + +| Call-site | Current | New | +|---|---|---| +| `AcBinaryDeserializer` polymorph (`ObjectWithTypeName` marker handler) | `Type.GetType(typeName)` | `options.TypeBinder.TryResolve(typeName)` | +| `AyCodeBinaryHubProtocol.ReadHeader:114` | `Type.GetType(typeName)` | `Options.SerializerOptions.TypeBinder.TryResolve(typeName)` | + +Both layers share the same `IAcTypeBinder` instance via DI: + +```csharp +services.AddSingleton(binder); +services.Configure((opts, sp) => + opts.SerializerOptions.TypeBinder = sp.GetRequiredService()); +``` + +## Framework helper — `SignalRTagBindings` + +> **Note** — the `[SignalRTagBinding(typeof(T))]` attribute pulls double duty: +> it is the binder-feed source today (AQN-on-wire path), AND it is the +> foundation of the future **tag-on-wire** evolution (post-W9F1). The same +> attribute will be the canonical tag→type registry once the wire-format +> switches from AQN string to compact `int` tags. The interim measure +> (`ExtractTypes` feeding the binder) is therefore not throwaway — it is the +> first step of the long-term solution, with the same source-of-truth surface. + + +The `AyCodeBinaryHubProtocol` framework defines an attribute on SignalR tag +constants. A single helper method extracts the referenced types into a list +that the app feeds to the binder. **No separate tag-registry, no separate +lookup mechanism** — the tag-binding attribute is just a convenience source +for binder seeding. + +```csharp +namespace AyCode.Services.SignalRs; + +[AttributeUsage(AttributeTargets.Field)] +public sealed class SignalRTagBindingAttribute : Attribute +{ + public Type ExpectedType { get; } + public SignalRTagBindingAttribute(Type expectedType) => ExpectedType = expectedType; +} + +public static class SignalRTagBindings +{ + /// + /// Reflects out all [SignalRTagBinding(typeof(T))] references from the given tag-container + /// classes (public static int const fields). Returned list seeds an IAcTypeBinder at startup. + /// + public static List ExtractTypes(params Type[] tagContainers) + { + var result = new List(); + foreach (var c in tagContainers) + foreach (var f in c.GetFields(BindingFlags.Public | BindingFlags.Static)) + { + if (f.FieldType != typeof(int) || !f.IsLiteral) continue; + if (f.GetCustomAttribute() is { } b) + result.Add(b.ExpectedType); + } + return result; + } +} +``` + +### Consumer-side attribute usage + +```csharp +public static class SignalRTags +{ + [SignalRTagBinding(typeof(List))] + public const int GetAllOrderDtos = 111; + + [SignalRTagBinding(typeof(List))] + public const int GetPendingOrderDtosForMeasuring = 116; + + // No attribute → no binder feed via this tag. If the type still flows + // through regular deserialize-graph, it's auto-trusted via metadata ctor. +} +``` + +## Configuration + +### Program.cs (DI integration) + +```csharp +// 1. Build one binder, feed multiple sources: +var binder = new AcTypeBinder(); + +// Source A: SignalR tag-bindings extracted from the app's tag-container class(es) +foreach (var t in SignalRTagBindings.ExtractTypes(typeof(SignalRTags))) + binder.Allow(t); + +// Source B: 3rd-party explicit (types that won't naturally appear via +// deserialize-metadata-graph, but need to be polymorph-allowable) +binder.Allow(); + +// 2. Register as singleton — same instance reachable from anywhere via DI: +services.AddSingleton(binder); + +// 3. Wire it into AcBinaryHubProtocol options so the protocol's deserialize path uses it: +services.Configure((opts, sp) => + opts.SerializerOptions.TypeBinder = sp.GetRequiredService()); +``` + +### Standalone (no DI, pure NuGet AcBinary use) + +```csharp +var options = new AcBinarySerializerOptions +{ + TypeBinder = new AcTypeBinder() + .Allow() + .Allow() +}; +var bytes = AcBinarySerializer.Serialize(myDto, options); +var roundTrip = AcBinaryDeserializer.Deserialize(bytes, options); +``` + +For non-polymorph use (no `object`-typed properties, no `ObjectWithTypeName` +marker on the wire), the `TypeBinder` can stay `null` — the path is never +entered, no security gate fires, no behavior change. + +## Performance impact + +| Path | Cost | +|---|---| +| Hot serialize/deserialize (no polymorph) | **Zero** — binder never consulted | +| Polymorph deserialize (rare) | One `ConcurrentDictionary.TryGetValue` + one AQN-normalize regex `Replace` per polymorph wire-AQN occurrence — linear in polymorph-density on the wire, not fixed overhead. Typical graphs have zero or near-zero density; AQN-rich graphs scale proportionally. | +| SignalR HasType wire (per message at most once) | One same-dictionary lookup | +| Binder seeding (startup, once) | Reflection-walk of tag-container classes (~µs) | + +The polymorph path is by definition rare — most application graphs do not +trigger `ObjectWithTypeName` markers. The cost is invisible against the +deserialize work surrounding it. + +## Migration + +### Backward compat + +Default `options.TypeBinder = null` → polymorph wire-content **throws**. +Existing non-polymorph consumers are unaffected (no `ObjectWithTypeName` +markers, no `HasType` flag → no binder lookup → no behavior change). + +Polymorph consumers must configure a binder before deserializing polymorph +wire — explicit opt-in to enable the path. + +### Legacy `Type.GetType(aqn)` fallback (optional, deprecated) + +For migration windows where the binder cannot yet be fully populated, an +opt-in flag re-enables the legacy unsafe path: + +```csharp +public class AcBinarySerializerOptions +{ + /// + /// LEGACY: falls back to Type.GetType(aqn) when TypeBinder lookup misses. + /// UNSAFE — enables RCE-gadget attack surface. Migration shim only. + /// + [Obsolete("Configure TypeBinder explicitly. AllowUnsafeTypeResolve will be removed in v2.0.")] + public bool AllowUnsafeTypeResolve { get; set; } // default: false +} +``` + +Removed in v2.0. No silent fallback at any point — explicit opt-in only. + +## Files affected + +| File | Change | +|---|---| +| `AyCode.Core/Serializers/Binaries/IAcTypeBinder.cs` (new) | Public NuGet interface (~10 lines) | +| `AyCode.Core/Serializers/Binaries/AcTypeBinder.cs` (new) | Default impl (~30 lines) | +| `AyCode.Core/Serializers/Binaries/AcBinarySerializerOptions.cs` | `TypeBinder` property (+3 lines) | +| `AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializeTypeMetadata.cs` ctor | `options.TypeBinder?.Register(targetType);` (+1 line) | +| `AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs` polymorph-path | `Type.GetType` → `options.TypeBinder.TryResolve` (~5 lines) | +| `AyCode.Services/SignalRs/SignalRTagBindingAttribute.cs` (new) | Framework-side attribute (~10 lines) | +| `AyCode.Services/SignalRs/SignalRTagBindings.cs` (new) | `ExtractTypes` helper (~20 lines) | +| `AyCode.Services/SignalRs/AyCodeBinaryHubProtocol.cs` line 110-117 | `Type.GetType` → `Options.SerializerOptions.TypeBinder.TryResolve` (~5 lines) | +| Consumer `SignalRTags.cs` | Add `[SignalRTagBinding(typeof(...))]` per tag (per tag) | +| Consumer `Program.cs` | Build & register binder (~10 lines) | + +Estimated effort: half a day for the core change, plus per-consumer +attribute-decoration time. Low blast radius — all changes in localized files. + +## Cross-references + +- `../../../AyCode.Services/docs/SIGNALR_BINARY_PROTOCOL/SIGNALR_BINARY_PROTOCOL_ISSUES.md` — originating `ACCORE-SBP-I-R8K3` issue +- `BINARY_TODO.md#accore-bin-t-w9f1` — compile-time metadata generation (independent perf task; once it lands, SGen ModuleInit can auto-feed the binder via the same `BinaryDeserializeTypeMetadata` ctor path, eliminating most need for explicit `Allow()` calls) +- `BINARY_OPTIONS.md` — for the `TypeBinder` property docs once added + +## Open questions / future work + +- **Tag-based wire format (post-W9F1)**: replace AQN-strings on the wire with + registry-stable type-indices (VarInt). Reduces wire size 50-150× and + eliminates AQN-string parsing entirely. Builds on the **same** registry the + `[SignalRTagBinding]` attribute feeds today — the evolution becomes "switch + resolve from name to index" rather than new infrastructure. Wire-format + breaking change — schedule with another wire-format evolution. +- **Open-generic allowlist (`AllowOpenGeneric(typeof(List<>))`)**: covers all + closed-form `List` AQN occurrences with a single declaration, decomposes + the wire-AQN into open-generic + type-argument-AQNs (each registry-checked + separately), then `Type.MakeGenericType` on validated components. Safer + than raw `Type.GetType(aqn)` because every piece is allowlisted. +- **Diagnostic logging**: optional `ILogger`-backed audit of binder + resolve-failures — production-deployments may want to track attempted + AQN-injections. +- **`AllowUnsafeTypeResolve` v2.0 removal timing**: schedule with the wire-format + evolution that retires AQN-on-wire. + +### Rejected directions + +- **Default deny-list bundled in NuGet** (BCL-gadget pre-block): redundant + alongside an exhaustive binder allowlist. The polymorph path is the only + AQN→object route, and the binder's contents are fully developer-controlled — + a deny-list either duplicates the allowlist's negation (useless) or attempts + to enumerate dangerous types (inherently incomplete, false-security signal). + The allowlist itself is the real defense; a deny-list would only matter on + a misconfigured (permissive) allowlist, which would be a deployment bug to + fix directly. diff --git a/AyCode.Core/docs/BINARY/BINARY_SGEN.md b/AyCode.Core/docs/BINARY/BINARY_SGEN.md index 8b3bf27..5ceab14 100644 --- a/AyCode.Core/docs/BINARY/BINARY_SGEN.md +++ b/AyCode.Core/docs/BINARY/BINARY_SGEN.md @@ -87,7 +87,18 @@ Located in `AcBinarySerializer.cs` region "Generated Writer Bridge Methods". Cal | `WriteStringGenerated(value, ctx)` | SGen writes string property | null → PropertySkip, empty → StringEmpty, else → `WriteString` (with interning) | | `ScanValueGenerated(value, type, ctx, depth)` | SGen scan encounters non-SGen child | → runtime `ScanValue` for reference/string tracking | -> **✱ Caller contract — null-check required.** The `WriteObjectGenerated` bridges assume non-null `value` and dereference via `Unsafe.As` inside the target's `WriteProperties`. All four SGen emit branches (`EmitDirectObjectWrite`, `EmitDirectCollectionWrite`, `IsObjectDeclaredType`, and the fallback `WriteObjectGenerated` path) MUST emit `if ({a} == null) WriteByte(BinaryTypeCode.PropertySkip);` before calling — runtime null is possible regardless of the nullable annotation (EF lazy-load, projection gaps, detached navigation properties, cross-assembly types without compile-time nullable context). The reader-side companion `EmitReadProperty` already wraps every markered property in `if (tc != PropertySkip) { … }`, so the marker is uniformly handled. The parity bug that motivated this contract is documented in [`BINARY_TODO.md#accore-bin-t-n4p8`](BINARY_TODO.md#accore-bin-t-n4p8). `WriteStringGenerated`, by contrast, **internalizes** the null-check (one of the bridge's responsibilities) — caller emit is shorter on the string ag. +> **✱ Caller contract — null-check required.** The `WriteObjectGenerated` and `WriteValueGenerated` bridges assume non-null `value` and dereference via `Unsafe.As` (or compiled-lambda property-walk) inside the target's `WriteProperties` / `WriteValueNonPrimitive`. All SGen reference-property emit branches in `GenWriter.cs` MUST emit `if ({a} == null) WriteByte(BinaryTypeCode.PropertySkip);` before calling, across every `PropertyTypeKind`: +> +> | Branch | Location | Null-check pattern | +> |---|---|---| +> | `PropertyTypeKind.Complex` → `EmitDirectObjectWrite` | line ~828 | `if ({a} == null) PropertySkip; else …` (always) | +> | `PropertyTypeKind.Complex` → `IsObjectDeclaredType` (System.Object property) | line ~295 | `if ({a} == null) PropertySkip; else …` (always) | +> | `PropertyTypeKind.Complex` → fallback `WriteObjectGenerated` | line ~309 | `if ({a} == null) PropertySkip; else WriteObjectGenerated(…)` (always — fixed in [`N4P8`](BINARY_TODO.md#accore-bin-t-n4p8)) | +> | `PropertyTypeKind.Collection` → `EmitDirectCollectionWrite` | line ~877 | `if ({a} == null) PropertySkip; else …` (always) | +> | `PropertyTypeKind.Collection` → fallback `WriteValueGenerated` | line ~321 | `if ({a} == null) PropertySkip; else WriteValueGenerated(…)` (always — fixed in [`N4P8` follow-up](BINARY_TODO.md#accore-bin-t-n4p8)) | +> | `PropertyTypeKind.Dictionary` → `EmitDirectDictionaryWrite` | line ~1037 | `if ({a} == null) PropertySkip; else …` (always) | +> +> Runtime null is possible regardless of the nullable annotation (EF lazy-load, projection gaps, detached navigation properties, missing collection initializers, cross-assembly types without compile-time nullable context). The reader-side companion `EmitReadProperty` (GenReader.cs line ~137) already wraps every markered property in `if (tc != PropertySkip) { … }`, so the `PropertySkip` marker is uniformly handled across all `PropertyTypeKind`s. `WriteStringGenerated`, by contrast, **internalizes** the null-check (one of the bridge's responsibilities) — caller emit is shorter on the string ag. ## Wrapper Slot System diff --git a/AyCode.Core/docs/BINARY/BINARY_STRICT_SGEN.md b/AyCode.Core/docs/BINARY/BINARY_STRICT_SGEN.md index 7c8680a..68c5785 100644 --- a/AyCode.Core/docs/BINARY/BINARY_STRICT_SGEN.md +++ b/AyCode.Core/docs/BINARY/BINARY_STRICT_SGEN.md @@ -109,3 +109,4 @@ The analyzer in Phase 4 enforces the "fully decorated graph" property on Strict - `BINARY_ISSUES.md#accore-bin-i-n6q3` — cold-start cost chain (Phase 3 addresses this directly) - `BINARY_TODO.md#accore-bin-t-w9f1` — compile-time metadata generation (a precondition for Phase 3 — eliminates `Expression.Compile` cold-start) - `BINARY_TODO.md#accore-bin-t-t5j8` — JIT Tier-1 warmup (residual cold-start after Phase 3) +- `BINARY_TODO.md#accore-bin-t-n4p8` — Hybrid-side reference-property null-check parity fix (2026-05-23). Phase 2 collapsing structurally eliminates this regression class: the fallback `WriteObjectGenerated` ag (which the bug lived on) does not emit in Strict mode — every Complex property routes through `EmitDirectObjectWrite` because all child types are guaranteed-decorated. diff --git a/AyCode.Core/docs/BINARY/BINARY_TODO.md b/AyCode.Core/docs/BINARY/BINARY_TODO.md index ef7a102..591aebf 100644 --- a/AyCode.Core/docs/BINARY/BINARY_TODO.md +++ b/AyCode.Core/docs/BINARY/BINARY_TODO.md @@ -38,6 +38,26 @@ All four reference-property emit branches now share the same defensive null-chec - ✅ Regression tests added in `AcBinarySerializerSGenNullComplexPropertyTests.cs`: 8 test cases (2 methods × 4 DataRows = SGen/Reflection × FastMode/Default), covering both null-property and non-null-property paths with marker-byte verification (asserts `BinaryTypeCode.PropertySkip` presence in the wire-output for the null slot — confirms the fix took the PropertySkip path, not an unrelated null-safe code path). - 🟢 Discovery vector: the companion CHUNK_ABORT fault-isolation feature ([`ACCORE-SBP-T-Q3M7`](../../../AyCode.Services/docs/SIGNALR_BINARY_PROTOCOL/SIGNALR_BINARY_PROTOCOL_TODO.md#accore-sbp-t-q3m7)) was developed in response to the production NRE, which in turn exposed this unprotected emit branch. With this fix, CHUNK_ABORT becomes a genuine safety-net for unrelated failures rather than a workaround for a self-inflicted serializer bug. +### Follow-up (2026-05-23): Collection-ag parity fix + +After the Complex-branch fix landed, a second production CHUNK_ABORT surfaced from the same call chain (`OrderItemPallet → OrderItemDto → OrderDto → ...`), but the stack-trace now pointed to a *different* property — `OrderDto.OrderNotes` (a `List` with cross-assembly element type, no `[AcBinarySerializable]`) — and the NRE happened inside a `lambda_method` (`Expression.Compile`-d runtime accessor) reached via `AcBinarySerializer.WriteValueGenerated`. Re-audit of `GenWriter.cs` revealed **the exact same parity bug, but on the `PropertyTypeKind.Collection` branch** (~line 321): the `else if (p.IsNullable) … else …` shape, where the `else` (non-nullable signature) emitted `WriteValueGenerated({a}, …)` without a null guard. Production `OrderNotes` was non-nullable in signature, null at runtime (NopCommerce convention: no initializer). + +**Audit completed** across all four `PropertyTypeKind` emit branches in `GenWriter.cs`: + +- ✅ `PropertyTypeKind.Complex` (line ~282) — fixed in the original N4P8 above. +- ✅ `PropertyTypeKind.Collection` (line ~321) — fixed now: `else if (p.IsNullable) … else …` collapsed into one always-null-check branch; element-side fallback (`WriteValueGenerated`) now sees only non-null collections. +- ✅ `PropertyTypeKind.Dictionary` (line ~333) — unconditionally routes through `EmitDirectDictionaryWrite` (line ~1031), which already null-checks at line ~1037 with the same explicit comment. +- ✅ `default` (`EmitSkip`) — value-type primitives, no null possible. + +The companion `WriteValueGenerated` bridge in `AcBinarySerializer.cs` shares the non-null contract with `WriteObjectGenerated`; both assume the caller emitted a null-check. The four-branch parity is now complete and verified by code inspection. + +**Regression tests extended** in `AcBinarySerializerSGenNullComplexPropertyTests.cs` with four additional methods covering both new PropertyTypeKinds: + +- `Serialize_SGenCollectionPropertyNull_DoesNotThrow_AndRoundTripsAsNull` + `…NonNull_RoundTripsCorrectly` — Collection-ag regression (the actual bug-fix coverage). +- `Serialize_SGenDictionaryPropertyNull_DoesNotThrow_AndRoundTripsAsNull` + `…NonNull_RoundTripsCorrectly` — Dictionary-ag pin (already null-safe at line ~1037, but previously had no regression test to lock that behaviour in against future refactors). + +Each method uses the same 4-row `(useSgen, fastMode)` parameterization plus a marker-byte verification (`Assert.IsTrue(Array.IndexOf(bytes, BinaryTypeCode.PropertySkip) >= 0, …)`) on the null tests, confirming the writer took the PropertySkip path rather than an unrelated null-safe branch. **Total coverage: 24 test cases** (6 methods × 4 rows = Complex/Collection/Dictionary × null/non-null × SGen/Reflection × FastMode/Default). New models `SGenNullCollectionParent` and `SGenNullDictionaryParent` in `SGenNullComplexPropertyModels.cs` mirror the production shapes (non-nullable `List` / `Dictionary`). + ## ACCORE-BIN-T-P6M4: Universal hotpath optimization guardrails + follow-up backlog **Priority:** P1 · **Type:** Performance @@ -500,6 +520,8 @@ Today's `BufferWriterBinaryOutput` standalone-mode partly fills this gap — exp ## ACCORE-BIN-T-U6Y8: Attribute-driven polymorphism via `[AcBinaryUnion]` + SGen (opt-in, AOT-friendly) **Priority:** P1 (if AOT target required) / P2 (non-AOT only) · **Type:** Feature +**Related security work:** the runtime polymorphism path (AQN-based, the default this TODO keeps) is gated by `IAcTypeBinder` per the consolidated plan in [`BINARY_AQN_IMPLEMENTATION.md`](BINARY_AQN_IMPLEMENTATION.md). The `[AcBinaryUnion]` SGen path proposed here bypasses `Type.GetType` entirely (closed `switch (tag)` dispatch in code-gen), so it is AOT-friendly **and** inherently security-clean (no wire-supplied type-name resolution at all). The two TODOs are complementary, not overlapping — U6Y8 is the fast/AOT alternative; the binder secures the legacy AQN path. + **Design philosophy alignment:** AcBinary's market positioning is "JSON-style flexibility with MessagePack-class speed" — attributes are **opt-in optimization**, never required. The runtime polymorphism path (AQN-based, today's default) **stays the default** and continues to work for arbitrary unattributed types. This TODO adds a fast/AOT path **alongside** it, never replaces it. AcBinary today handles polymorphism at runtime: the wire writes `ObjectWithTypeName(72)` + AQN string, and the deserializer calls `Type.GetType(aqn)` to resolve. This is **flexible** (no upfront declaration), but has three significant drawbacks for some consumers: diff --git a/AyCode.Services/docs/SIGNALR_BINARY_PROTOCOL/SIGNALR_BINARY_PROTOCOL_ISSUES.md b/AyCode.Services/docs/SIGNALR_BINARY_PROTOCOL/SIGNALR_BINARY_PROTOCOL_ISSUES.md index 13f068e..6fc5c4a 100644 --- a/AyCode.Services/docs/SIGNALR_BINARY_PROTOCOL/SIGNALR_BINARY_PROTOCOL_ISSUES.md +++ b/AyCode.Services/docs/SIGNALR_BINARY_PROTOCOL/SIGNALR_BINARY_PROTOCOL_ISSUES.md @@ -81,4 +81,8 @@ Originally the header type-name was needed because Hub method dispatch uses `IIn ### Related TODO -(To be opened once a fix direction is chosen — likely whitelist-based first, tag-based as a follow-up wire-format evolution.) +- [`SIGNALR_BINARY_PROTOCOL_TODO.md#accore-sbp-t-s2h6`](SIGNALR_BINARY_PROTOCOL_TODO.md#accore-sbp-t-s2h6-replace-typegettypewire-supplied-name-with-whitelist-based-type-resolution) — implementation tracking (P0) + +### Planned solution + +See [`../../../../AyCode.Core/AyCode.Core/docs/BINARY/BINARY_AQN_IMPLEMENTATION.md`](../../../../AyCode.Core/AyCode.Core/docs/BINARY/BINARY_AQN_IMPLEMENTATION.md) — design plan that consolidates the discussion: single `IAcTypeBinder` shared between AcBinary polymorph and `AyCodeBinaryHubProtocol` HasType paths, implicit trust via `BinaryDeserializeTypeMetadata` ctor-feed, explicit `Allow()` allowlist for non-SGen 3rd-party polymorph subtypes, framework helper `SignalRTagBindings.ExtractTypes` for bulk-seeding from `[SignalRTagBinding]` attributes. diff --git a/AyCode.Services/docs/SIGNALR_BINARY_PROTOCOL/SIGNALR_BINARY_PROTOCOL_TODO.md b/AyCode.Services/docs/SIGNALR_BINARY_PROTOCOL/SIGNALR_BINARY_PROTOCOL_TODO.md index 6cb53de..e2bd91c 100644 --- a/AyCode.Services/docs/SIGNALR_BINARY_PROTOCOL/SIGNALR_BINARY_PROTOCOL_TODO.md +++ b/AyCode.Services/docs/SIGNALR_BINARY_PROTOCOL/SIGNALR_BINARY_PROTOCOL_TODO.md @@ -235,9 +235,9 @@ In `AsyncSegment` mode the total message size is unknown until `CHUNK_END`, so a - Stack-with-encryption order decision documented (industry standard: **encrypt-then-MAC**, but evaluate trade-offs). ## ACCORE-SBP-T-S2H6: Replace `Type.GetType(wire-supplied-name)` with whitelist-based type resolution -**Priority:** P0 — security blocker · **Type:** Security fix · **Related Issue:** [`SIGNALR_BINARY_PROTOCOL_ISSUES.md#accore-sbp-i-r8k3`](SIGNALR_BINARY_PROTOCOL_ISSUES.md#accore-sbp-i-r8k3-typegettypewire-supplied-name-enables-deserialization-gadget-attack) (full threat model, attack surface, and mitigation analysis there — **do not duplicate here**) +**Priority:** P0 — security blocker · **Type:** Security fix · **Related Issue:** [`SIGNALR_BINARY_PROTOCOL_ISSUES.md#accore-sbp-i-r8k3`](SIGNALR_BINARY_PROTOCOL_ISSUES.md#accore-sbp-i-r8k3-typegettypewire-supplied-name-enables-deserialization-gadget-attack) (full threat model, attack surface, and mitigation analysis there — **do not duplicate here**) · **Planned solution:** [`../../../../AyCode.Core/AyCode.Core/docs/BINARY/BINARY_AQN_IMPLEMENTATION.md`](../../../../AyCode.Core/AyCode.Core/docs/BINARY/BINARY_AQN_IMPLEMENTATION.md) (consolidated design plan — single `IAcTypeBinder` shared with the AcBinary polymorph path, implicit trust via metadata ctor-feed, framework helper for `[SignalRTagBinding]` bulk-seed) -Replace the unsafe `Type.GetType(typeName)` lookup in `AyCodeBinaryHubProtocol`'s argument-binder header parsing with a registry-validated lookup. The deserialization-gadget attack surface this fix closes, the rationale for why the type-resolution mechanism is needed at all, and the four mitigation directions are covered in the issue — **this TODO tracks the chosen implementation path only**. +Replace the unsafe `Type.GetType(typeName)` lookup in `AyCodeBinaryHubProtocol`'s argument-binder header parsing with a registry-validated lookup. The deserialization-gadget attack surface this fix closes, the rationale for why the type-resolution mechanism is needed at all, and the four mitigation directions are covered in the issue — **this TODO tracks the chosen implementation path only**. The consolidated design plan in `BINARY_AQN_IMPLEMENTATION.md` selects the whitelist-registry direction and ties it together with the AcBinary polymorph path (single binder, two call-sites) — the implementation steps below align with that plan. ### Implementation steps