From 6f5c57af6ac6dc905d10e2b6f1d948e8b5b8b0ad Mon Sep 17 00:00:00 2001 From: Loretta Date: Thu, 30 Apr 2026 06:53:59 +0200 Subject: [PATCH] [LOADED_DOCS: 3 files, no new loads] Benchmark: multi-sample median timing & EH inlining docs Added BenchmarkSamples for multi-sample median timing in benchmarks, reducing variance and improving result stability. Updated output to show sample count. Refactored RunTimed to support multiple samples. Expanded documentation on JIT inlining barriers: clarified that EH regions (try/catch/finally/using) in hot-path and generated methods block inlining on .NET 9, and provided guidance for future generator features and stackalloc usage. Added audit requirements for EH and stackalloc in hot paths. --- AyCode.Core.Serializers.Console/Program.cs | 40 +++++++++++++++---- .../docs/BINARY/BINARY_IMPLEMENTATION.md | 8 ++++ AyCode.Core/docs/BINARY/BINARY_SGEN.md | 17 ++++++++ AyCode.Core/docs/BINARY/BINARY_TODO.md | 5 +++ 4 files changed, 63 insertions(+), 7 deletions(-) diff --git a/AyCode.Core.Serializers.Console/Program.cs b/AyCode.Core.Serializers.Console/Program.cs index 5c80008..40da481 100644 --- a/AyCode.Core.Serializers.Console/Program.cs +++ b/AyCode.Core.Serializers.Console/Program.cs @@ -50,9 +50,11 @@ public static class Program #if DEBUG private static int WarmupIterations = 0; private static int TestIterations = 1; + private static int BenchmarkSamples = 1; // Debug: single sample, fast iteration #else private static int WarmupIterations = 5000; private static int TestIterations = 1000; + private static int BenchmarkSamples = 5; // Release: 5-sample median for stability (~±5% variance vs. ~±15% single-sample) //private static int WarmupIterations = 5000; //private static int TestIterations = 2000; @@ -69,6 +71,7 @@ public static class Program { WarmupIterations = 5; TestIterations = 100; + BenchmarkSamples = 3; mode = "all"; } @@ -85,7 +88,7 @@ public static class Program var allResults = new List(); var testDataSets = BenchmarkTestDataProvider.CreateTestDataSets(); - System.Console.WriteLine($"Mode: {mode} | Iterations: {TestIterations} | Warmup: {WarmupIterations}"); + System.Console.WriteLine($"Mode: {mode} | Iterations: {TestIterations} | Warmup: {WarmupIterations} | Samples: {BenchmarkSamples} (median)"); System.Console.WriteLine($"Build: {BuildConfiguration} | .NET: {Environment.Version} | Test Type: {testDataSets.FirstOrDefault()?.TypeName ?? "unknown"}"); System.Console.WriteLine(); @@ -253,15 +256,37 @@ public static class Program }; } + /// + /// Runs the action times for independent samples, + /// returning the median elapsed time. Multi-sample design reduces single-run variance from ~±15% to ~±5% + /// by smoothing transient effects (background activity, thermal/turbo state, JIT tier-promotion timing). + /// When <= 1, falls back to single-sample timing (Debug / quick mode). + /// private static double RunTimed(Action action, int iterations) { - var sw = Stopwatch.StartNew(); - for (var i = 0; i < iterations; i++) + var samples = BenchmarkSamples; + if (samples <= 1) { - action(); + // Single-sample fast path (Debug or trivial run) — no allocation, no sort. + var sw = Stopwatch.StartNew(); + for (var i = 0; i < iterations; i++) action(); + sw.Stop(); + return sw.Elapsed.TotalMilliseconds; } - sw.Stop(); - return sw.Elapsed.TotalMilliseconds; + + var times = new double[samples]; + for (int s = 0; s < samples; s++) + { + var sw = Stopwatch.StartNew(); + for (var i = 0; i < iterations; i++) action(); + sw.Stop(); + times[s] = sw.Elapsed.TotalMilliseconds; + } + Array.Sort(times); + // Median: middle value for odd sample counts, average of two middles for even counts. + return samples % 2 == 1 + ? times[samples / 2] + : (times[samples / 2 - 1] + times[samples / 2]) / 2.0; } #endregion @@ -747,6 +772,7 @@ public static class Program sb.AppendLine($"║ Generated: {DateTime.Now:yyyy-MM-dd HH:mm:ss}".PadRight(100) + "║"); sb.AppendLine($"║ Build: {BuildConfiguration}".PadRight(100) + "║"); sb.AppendLine($"║ Iterations: {TestIterations}".PadRight(100) + "║"); + sb.AppendLine($"║ Samples: {BenchmarkSamples} (median)".PadRight(100) + "║"); sb.AppendLine($"║ Test Type: {testDataSets.FirstOrDefault()?.TypeName ?? "unknown"}".PadRight(100) + "║"); sb.AppendLine("╚══════════════════════════════════════════════════════════════════════════════════════════════════════╝"); sb.AppendLine(); @@ -874,7 +900,7 @@ public static class Program var sb = new StringBuilder(); var testTypeName = testDataSets.FirstOrDefault()?.TypeName ?? "unknown"; sb.AppendLine($"# AcBinary Benchmark {BuildConfiguration} {DateTime.Now:yyyy-MM-dd HH:mm:ss}"); - sb.AppendLine($"Iterations: {TestIterations} | Warmup: {WarmupIterations} | .NET: {Environment.Version} | TestType: {testTypeName}"); + sb.AppendLine($"Iterations: {TestIterations} | Warmup: {WarmupIterations} | Samples: {BenchmarkSamples} (median) | .NET: {Environment.Version} | TestType: {testTypeName}"); // Options summary var optionsMap = results diff --git a/AyCode.Core/docs/BINARY/BINARY_IMPLEMENTATION.md b/AyCode.Core/docs/BINARY/BINARY_IMPLEMENTATION.md index 8ee2ef3..eecd6c7 100644 --- a/AyCode.Core/docs/BINARY/BINARY_IMPLEMENTATION.md +++ b/AyCode.Core/docs/BINARY/BINARY_IMPLEMENTATION.md @@ -115,6 +115,14 @@ Two-phase: - **Cold:** multi-byte logic in separate `NoInlining` method (e.g. `WriteVarUIntMultiByteUnsafe`) - Keeps caller IL small, cache-friendly +**Inlining barriers — `[MethodImpl(AggressiveInlining)]` is silently ignored when:** + +- **`try` / `catch` / `finally` / `using`** — any EH region in the method is a hard JIT rule (`inline.cpp` in CoreCLR). `using` statements desugar to `try/finally` and have the same effect. Move resource cleanup (`Pool.Return`, `ArrayPool.Return`, `Dispose`) into a separate cold method or keep the cleanup outside the hot caller. The Pool.Get → try/finally → Pool.Return pattern (Rule #5) is fine because it sits at the entry point of `Serialize`, not on a per-property hot path. +- **`stackalloc` with non-constant or large size** — small constant `stackalloc` (≤ ~1KB) is inlinable in .NET 6+, but the moment any other barrier (try/finally, complex control flow) is added the method becomes non-inlinable. When mixing `stackalloc` with `try/finally` (e.g. ArrayPool fallback + scratch buffer), expect the helper to always be a separate call frame — design accordingly (avoid inline-only assumptions in the caller). +- **Method size / IL token count** — the JIT has IL-size and basic-block thresholds even with `AggressiveInlining`. For large generated methods (SGen `WriteProperties` for property-heavy types) the attribute is a hint, not a guarantee; see `BINARY_TODO.md#accore-bin-t-t5j8` for `AggressiveOptimization` as a complementary tool. + +When adding a new helper to the hot path: read the method first for any of the above before placing `[MethodImpl(AggressiveInlining)]` on it. The attribute lies if the body has an EH region — silently. + ### 4. SGen Root Fast Path **Rule:** Root-level SGen types MUST skip `WriteValue`/`TryWritePrimitive`/`WriteValueNonPrimitive` dispatch chain. diff --git a/AyCode.Core/docs/BINARY/BINARY_SGEN.md b/AyCode.Core/docs/BINARY/BINARY_SGEN.md index 714fd6b..e84cac1 100644 --- a/AyCode.Core/docs/BINARY/BINARY_SGEN.md +++ b/AyCode.Core/docs/BINARY/BINARY_SGEN.md @@ -134,6 +134,23 @@ void ScanObject(object value, BinarySerializationContext ctx, } ``` +## SGen Output Constraints + +Design rules for anyone modifying `AcBinarySourceGenerator.cs`. Violating these silently regresses the SGen hot path. + +**No EH regions in generated hot methods.** Generated `WriteProperties` / `ScanObject` / `ScanForDuplicates` / `ReadObject` / `ReadProperties` MUST NOT emit `try`, `catch`, `finally`, or `using` blocks. On **.NET 9 (project's minimum target)**, the CoreCLR JIT refuses to inline any method containing an EH region — `AggressiveInlining` is silently ignored, and the SGen Root Fast Path collapses back to a regular call frame at every property write. .NET 10 partially lifts this for same-module try-finally ([`dotnet/runtime#112998`](https://github.com/dotnet/runtime/pull/112998), merged 2025-03-20); however, this is **not yet our minimum runtime**, and even on .NET 10+ `catch` / cross-module / P/Invoke-stub cases remain blocked. Treat this as a hard rule for the SGen output regardless of runtime — the generated method must work on .NET 9 today AND keep its inline-friendliness on .NET 10+ tomorrow. + +**Future-feature trap.** When adding generator features that *seem* to need EH: + +- `[CustomSerializer]` / `[CustomDeserializer]` attribute hooks → wrap user code in a cold helper called from the generated method, not in a `try` block inline. +- `OnSerializing` / `OnDeserialized` lifecycle callbacks → cold helper. +- Validation attributes (`[Validate]`, `[Required]`) → cold helper that throws; the generated method calls it without try/catch. +- Rented-buffer cleanup (`using var pooled = ...`) → keep the `using` in the entry frame (`Serialize`), never in the generated `WriteProperties`. + +**Straight-line rule.** Generated hot methods are: `Unsafe.As` cast → null/depth check → property write or bridge call → repeat. No exception handling, no resource ownership, no early-return cleanup. Resource ownership lives at `Serialize` / `Deserialize` entry, not per-property. + +See `BINARY_IMPLEMENTATION.md` Rule #3 (Inlining barriers) for the JIT-level rationale and `BINARY_TODO.md#accore-bin-t-t5j8` for the audit task that enforces this on the existing generator output. + ## Object Marker Bridge — Metadata Caching `WriteObjectFullMarkerIId` / `WriteObjectFullMarkerAll` in `PropertyWriters.cs`: when `UseMetadata=true`, `GetWrapper` result and `wrapper.Metadata` are cached in a local variable at the method entry. This avoids redundant `GetWrapper` + `value.GetType()` calls in the ref-handling and non-ref branches. diff --git a/AyCode.Core/docs/BINARY/BINARY_TODO.md b/AyCode.Core/docs/BINARY/BINARY_TODO.md index 8d63b79..3525f47 100644 --- a/AyCode.Core/docs/BINARY/BINARY_TODO.md +++ b/AyCode.Core/docs/BINARY/BINARY_TODO.md @@ -57,6 +57,11 @@ After ACCORE-BIN-T-W9F1 lands, JIT of generated `WriteProperties` / `ScanObject` - **Background prewarm from `ModuleInit`**: `Task.Run(() => RuntimeHelpers.PrepareMethod(handle))` for each registered writer/reader method. Parallelizes JIT with app startup. Keep it opt-in (option flag) to avoid surprising consumers with extra startup threads. - **ReadyToRun (R2R)** in consuming projects' publish config — pre-compiles IL to native at publish time. External to SGen, complementary. Document as a recommended publish setting. - **Code chunking** (split generated methods exceeding a property threshold into sub-methods, e.g. `WriteProperties_Part1` / `_Part2`) — **measure first**. Only beneficial for unusually large types (20+ properties / nested collections). Call overhead can offset gains; JIT inliner may already handle reasonably-sized methods well. +- **`try` / `finally` audit on hot path** — On **.NET 9 (project's minimum target)**, JIT silently refuses to inline any method containing an EH region (`AggressiveInlining` is ignored). [.NET 10 partially lifts this for same-module try-finally — see [`dotnet/runtime#112998`](https://github.com/dotnet/runtime/pull/112998), merged 2025-03-20 — but `catch`, cross-module, and P/Invoke-stub cases stay blocked. Until project's minimum runtime moves to .NET 10, treat EH as an absolute inlining barrier; even after the upgrade, several sub-cases keep the rule.] Audit scope: + - **Hand-written bridges**: `WriteValueGenerated` / `WriteObjectGenerated` / `WriteStringGenerated` / `ScanValueGenerated` and any helper called from generated `WriteProperties` for accidental `try/finally` / `using` blocks. + - **SGen output template** (`AcBinarySourceGenerator.cs`): generated `WriteProperties` / `ScanObject` / `ScanForDuplicates` / `ReadObject` / `ReadProperties` MUST stay straight-line. Future feature additions ([CustomSerializer] / [CustomDeserializer] hooks, `OnSerializing` / `OnDeserialized` callbacks, validation attributes, rented-buffer `using` blocks) are tempting candidates for `try/catch/finally` — emit them in separate cold helpers, never inline into the generated hot method. A single accidental `try` block in `WriteProperties` makes the whole generated method non-inlinable, killing the SGen Root Fast Path benefit. + - Resource cleanup (Pool/ArrayPool/Dispose) belongs in `Serialize` entry-frame only, not in per-property helpers or generated hot methods. See `BINARY_IMPLEMENTATION.md` Rule #3 (Inlining barriers) and `BINARY_SGEN.md` (SGen Output Constraints). +- **`stackalloc` size discipline on hot path** — On **.NET 9**, methods containing `localloc` (any C# `stackalloc`) historically blocked inlining. Modern .NET allows inlining only for **fixed-size `stackalloc` ≤ 32 bytes outside loops** (see [`dotnet/runtime#7113`](https://github.com/dotnet/runtime/issues/7113)) — anything larger or loop-nested still blocks. Our typical scratch-buffer patterns (UTF-8 encoding scratch, ArrayPool fallbacks) sit far above 32 bytes (256+), so any helper containing such a `stackalloc` is non-inlinable. Combined with `try/finally` for `ArrayPool.Return` cleanup, the method is **doubly non-inlinable** on .NET 9. Plan accordingly: keep `stackalloc`-using helpers as deliberate cold call-frames, not as `AggressiveInlining` candidates. - **Native AOT** — out of scope for this TODO; separate architectural decision with deployment-model implications. **Acceptance:**