diff --git a/.claude/settings.local.json b/.claude/settings.local.json index 740a5cb..f9527aa 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -74,7 +74,8 @@ "Bash(where dotnet-trace *)", "Bash(dotnet tool *)", "Bash(dotnet-trace convert *)", - "Bash(find ~/.nuget/packages/memorypack* -name \"*.cs\" 2>/dev/null | head -5; find /mnt/c/Users/Fullepi/.nuget/packages/memorypack* -name \"MemoryPackSerializer*.cs\" 2>/dev/null | head -5)" + "Bash(find ~/.nuget/packages/memorypack* -name \"*.cs\" 2>/dev/null | head -5; find /mnt/c/Users/Fullepi/.nuget/packages/memorypack* -name \"MemoryPackSerializer*.cs\" 2>/dev/null | head -5)", + "PowerShell($path = \"H:\\\\Applications\\\\Aycode\\\\Source\\\\AyCode.Core\\\\AyCode.Core.Serializers.Console\\\\Program.cs\"; $c = [IO.File]::ReadAllText\\($path\\); $c = $c -replace 'MeasureAllocationTotal', 'BenchmarkLoop.MeasureAllocationTotal'; $c = $c -replace 'MeasureAllocation\\\\\\(', 'BenchmarkLoop.MeasureAllocation\\('; $c = $c -replace 'ForceGcCollect\\\\\\(', 'BenchmarkLoop.ForceGcCollect\\('; $c = $c -replace 'CalibrateIterations\\\\\\(', 'BenchmarkLoop.CalibrateIterations\\('; $c = $c -replace 'RunTimed\\\\\\(', 'BenchmarkLoop.RunTimed\\('; $c = $c -replace 'DeepEqualsViaJson\\\\\\(', 'BenchmarkLoop.DeepEqualsViaJson\\('; $c = $c -replace 'ValidateMemoryPackSetup\\\\\\(', 'BenchmarkLoop.ValidateMemoryPackSetup\\('; $c = $c -replace 'FilterByLayer\\\\\\(', 'BenchmarkLoop.FilterByLayer\\('; [IO.File]::WriteAllText\\($path, $c\\); Write-Output \"OK new length: $\\($c.Length\\)\")" ] } } diff --git a/AyCode.Core.Serializers.Console/BenchmarkLoop.cs b/AyCode.Core.Serializers.Console/BenchmarkLoop.cs new file mode 100644 index 0000000..4e41e66 --- /dev/null +++ b/AyCode.Core.Serializers.Console/BenchmarkLoop.cs @@ -0,0 +1,366 @@ +using AyCode.Core.Tests.TestModels; +using MemoryPack; +using System.Diagnostics; +using System.Runtime.CompilerServices; +using System.Text.Json; + +namespace AyCode.Core.Serializers.Console; + +/// +/// Benchmark execution helpers: timing (), per-cell adaptive iteration +/// calibration (), allocation measurement +/// ( + ), in-place +/// \r-progress reporting, full-GC phase-boundary helper (), +/// startup validation (), and per-cell round-trip equality +/// (). Pure benchmark-execution infrastructure — no display +/// formatting (that lives in Output) and no per-engine glue (which lives with the +/// individual ISerializerBenchmark implementations). +/// +internal static class BenchmarkLoop +{ + /// + /// Forces a full GC cycle at a phase boundary in the benchmark loop. Two-pass collect with finalizer drain + /// in between: the first pass moves managed garbage to the finalization queue, WaitForPendingFinalizers + /// runs the finalizers, the second pass reclaims any objects the finalizers released. After this returns the + /// heap is in a known-quiescent state — the next warmup/measurement phase starts on a clean slate, isolated + /// from the previous phase's residual allocations (write-buffer pools, intern cache, write-plan arrays, etc.). + /// Called between every Ser-phase / Des-phase boundary in RunBenchmarksForTestData. + /// + [MethodImpl(MethodImplOptions.NoInlining)] + internal static void ForceGcCollect() + { + GC.Collect(2, GCCollectionMode.Forced, blocking: true); + GC.WaitForPendingFinalizers(); + GC.Collect(2, GCCollectionMode.Forced, blocking: true); + } + + /// + /// Runs the action times for independent samples, + /// returning the median, min, and max elapsed time. Multi-sample design reduces single-run variance + /// from ~±15% to ~±5% by smoothing transient effects (background activity, thermal/turbo state). + /// When <= 1, falls back to single-sample timing (Debug / quick mode). + /// When is non-null, emits in-place \r progress updates so a + /// stuck benchmark (e.g. deadlocked NamedPipe row) is visibly stuck at a specific %% rather than + /// silently hanging. + /// + /// Stabilization (added 2026-05-07): + /// 1) Pilot sample is run BEFORE the recorded loop and discarded. The first measurement after + /// warmup tends to absorb residual JIT bookkeeping and GC bookkeeping; dropping it tightens + /// the min/max range without throwing away signal (the median is the SAME data as before). + /// 2) GC.Collect / WaitForPendingFinalizers / GC.Collect runs BEFORE every recorded sample. + /// Without this, GC pressure from sample N occasionally triggered a Gen-2 pause inside + /// sample N+1, painting it as an outlier; collecting up-front gives every sample the + /// same starting heap shape. + /// 3) Returns (median, min, max) so the caller can surface the inter-sample range — visible + /// noise floor for the row, replacing the previous "median only" view. + /// + internal static (double medianMs, double minMs, double maxMs, double stdDevMs) RunTimed(Action action, int iterations, string? progressLabel = null) + { + var samples = Configuration.BenchmarkSamples; + if (samples <= 1) + { + // Single-sample fast path (Debug or trivial run) — no allocation, no sort, no stddev. + var sw = Stopwatch.StartNew(); + RunWithProgress(action, iterations, progressLabel, samples: 1, sampleIndex: 0); + sw.Stop(); + var ms = sw.Elapsed.TotalMilliseconds; + EndProgress(progressLabel, ms); + return (ms, ms, ms, 0); + } + + // Pilot sample (discarded). Counts as sample index 0 of (samples + 1) for progress display + // so the user sees an extra "warmup-ish" tick before the recorded samples start. + GC.Collect(); + GC.WaitForPendingFinalizers(); + GC.Collect(); + + var pilotSw = Stopwatch.StartNew(); + RunWithProgress(action, iterations, progressLabel, samples + 1, sampleIndex: 0); + pilotSw.Stop(); + // intentionally not stored + + var times = new double[samples]; + for (var s = 0; s < samples; s++) + { + // Per-sample GC settle. Forces every sample to start from the same heap state, so + // a Gen-2 pause caused by the previous sample doesn't bleed into the next sample's + // timing. Cost is paid OUTSIDE the Stopwatch window — no impact on the measurement. + GC.Collect(); + GC.WaitForPendingFinalizers(); + GC.Collect(); + + var sw = Stopwatch.StartNew(); + RunWithProgress(action, iterations, progressLabel, samples + 1, sampleIndex: s + 1); + sw.Stop(); + times[s] = sw.Elapsed.TotalMilliseconds; + } + + // Capture min/max/sum/sumSq BEFORE sort to avoid order ambiguity (Array.Sort is in-place). + var minMs = double.MaxValue; + var maxMs = double.MinValue; + var sum = 0.0; + var sumSq = 0.0; + + for (var i = 0; i < times.Length; i++) + { + var t = times[i]; + sum += t; + sumSq += t * t; + if (t < minMs) minMs = t; + if (t > maxMs) maxMs = t; + } + // Population stddev (not sample-stddev — we treat the captured samples as the population for + // CV computation). variance = E[X²] - E[X]² with Math.Max(0, ...) guard against tiny negative + // values from FP rounding when samples are nearly identical. + var mean = sum / times.Length; + var variance = (sumSq / times.Length) - (mean * mean); + var stdDevMs = Math.Sqrt(Math.Max(0.0, variance)); + + Array.Sort(times); + // Median: middle value for odd sample counts, average of two middles for even counts. + var medianMs = samples % 2 == 1 ? times[samples / 2] : (times[samples / 2 - 1] + times[samples / 2]) / 2.0; + EndProgress(progressLabel, medianMs); + + return (medianMs, minMs, maxMs, stdDevMs); + } + + /// + /// Per-cell adaptive iteration calibration. Runs a 100-iter measurement after warmup and computes + /// how many iterations are needed to reach wall-clock per sample. + /// Returns iter rounded UP to the nearest 1000, floored at 1000 (the prior fixed minimum) and + /// ceiling-capped at 200_000 (sanity bound for pathologically fast ops). In Debug single-sample mode + /// (Configuration.BenchmarkSamples <= 1) returns the global unchanged — + /// calibration overhead is unjustified there. Calibration runs OUTSIDE the timed sample loop and + /// does NOT count toward warmup; its sole purpose is to measure per-op cost. + /// + internal static int CalibrateIterations(Action action, int targetMs) + { + if (Configuration.BenchmarkSamples <= 1) return Configuration.TestIterations; // Debug fast path + + GC.Collect(); + GC.WaitForPendingFinalizers(); + GC.Collect(); + + const int calibIter = 100; + var sw = Stopwatch.StartNew(); + for (var i = 0; i < calibIter; i++) action(); + sw.Stop(); + var ms = sw.Elapsed.TotalMilliseconds; + + // Pathologically-fast op below Stopwatch resolution — cap at ceiling (further calibration won't help). + if (ms <= 0.0001) return 200_000; + + var iterPerMs = calibIter / ms; + var raw = (int)Math.Ceiling(targetMs * iterPerMs); + // Round UP to nearest 1000 — keeps numbers human-readable in the markdown output. + var rounded = ((raw + 999) / 1000) * 1000; + + return rounded switch + { + < 1000 => 1000, + > 200_000 => 200_000, + _ => rounded + }; + } + + /// + /// Measures per-call allocation in bytes after a clean GC. Single dedicated sample (no median) — keeps timing samples pure. + /// + internal static long MeasureAllocation(Action action, int iterations, string? progressLabel = null) + { + GC.Collect(); + GC.WaitForPendingFinalizers(); + GC.Collect(); + + var sw = Stopwatch.StartNew(); + var before = GC.GetAllocatedBytesForCurrentThread(); + RunWithProgress(action, iterations, progressLabel, samples: 1, sampleIndex: 0); + + var after = GC.GetAllocatedBytesForCurrentThread(); + sw.Stop(); + EndProgress(progressLabel, sw.Elapsed.TotalMilliseconds); + return (after - before) / iterations; + } + + /// + /// Process-wide allocation measurement — needed for round-trip-only benchmarks (NamedPipe etc.) where + /// the work happens across multiple threads. would + /// only count the caller-thread allocations, missing the server-side new byte[len] buffers and + /// any drain-pump-thread allocations. covers the entire process. + /// Slightly noisier than the per-thread variant (background threads / GC bookkeeping leak in), but + /// over 1000 iterations the signal dominates. + /// + internal static long MeasureAllocationTotal(Action action, int iterations, string? progressLabel = null) + { + GC.Collect(); + GC.WaitForPendingFinalizers(); + GC.Collect(); + + var sw = Stopwatch.StartNew(); + var before = GC.GetTotalAllocatedBytes(precise: true); + RunWithProgress(action, iterations, progressLabel, samples: 1, sampleIndex: 0); + + var after = GC.GetTotalAllocatedBytes(precise: true); + sw.Stop(); + EndProgress(progressLabel, sw.Elapsed.TotalMilliseconds); + return (after - before) / iterations; + } + + // ============================================================================================ + // Progress reporting — \r-driven in-place updates so a stuck benchmark surfaces the exact phase + // and % where it stopped, instead of appearing as a silent hang. Used by RunTimed and the + // MeasureAllocation* helpers when the caller passes a non-null progressLabel. + // ============================================================================================ + + // Tracks the longest line written by the current progress session, so EndProgress can clear + // any leftover characters from a prior longer line (avoids "ghost" trailing chars after \r). + private static int _progressLastLineLen; + + /// + /// Runs times, emitting \r-overwriting + /// progress every ~10% (approx. 10 progress prints per sample). When + /// is null, runs without any progress output (zero overhead beyond a null check per iter). + /// + private static void RunWithProgress(Action action, int iterations, string? label, int samples, int sampleIndex) + { + if (label is null) + { + for (var i = 0; i < iterations; i++) action(); + return; + } + + // ~10 progress emits per sample run. Avoid emitting on every iter (Console.Write is + // expensive enough to skew sub-µs benchmarks if overdone). + var step = Math.Max(1, iterations / 10); + for (var i = 0; i < iterations; i++) + { + action(); + if ((i + 1) % step == 0 || i == iterations - 1) + { + var pct = (int)((i + 1) * 100L / iterations); + var line = samples > 1 + ? $" > {label} sample {sampleIndex + 1}/{samples} {pct,3}% ({i + 1}/{iterations})" + : $" > {label} {pct,3}% ({i + 1}/{iterations})"; + + System.Console.Write('\r'); + System.Console.Write(line); + + if (line.Length < _progressLastLineLen) + System.Console.Write(new string(' ', _progressLastLineLen - line.Length)); + + _progressLastLineLen = line.Length; + } + } + } + + /// + /// Closes a progress line cleanly: clears any leftover chars and writes a final "done" line on + /// the same row, terminated by \n so subsequent WriteLine calls render below. + /// + private static void EndProgress(string? label, double elapsedMs) + { + if (label is null) return; + var done = $" > {label} done in {elapsedMs,7:F1} ms"; + + System.Console.Write('\r'); + System.Console.Write(done); + + if (done.Length < _progressLastLineLen) + System.Console.Write(new string(' ', _progressLastLineLen - done.Length)); + + System.Console.WriteLine(); + _progressLastLineLen = 0; + } + +#if !AYCODE_NATIVEAOT + private static readonly JsonSerializerOptions VerifyJsonOpts = new() + { + WriteIndented = false, + + DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull, + ReferenceHandler = System.Text.Json.Serialization.ReferenceHandler.IgnoreCycles + }; +#endif + + /// + /// Round-trip equality check: serialize both via System.Text.Json (canonical form) and compare strings. + /// Slower than property-by-property compare, but universal — works for any object graph without custom comparer. + /// + /// + /// AOT publish skip: System.Text.Json's reflection path uses runtime closed-generic instantiation + /// (JsonPropertyInfo<TestStatus> et al.) that the trimmer drops, causing + /// NotSupportedException: missing native code or metadata. The validation is JIT-only — the actual + /// benchmark Serialize/Deserialize loops don't touch this path. Under AOT we return true so all + /// VerifyRoundTrip() calls pass without running the cross-format validation. + /// + internal static bool DeepEqualsViaJson(object? a, object? b) + { +#if AYCODE_NATIVEAOT + // Skip cross-format validation under AOT — STJ reflection path is incompatible. The roundtrip + // itself still runs (caller-side Serialize+Deserialize), just the JSON-canonical compare is bypassed. + return true; +#else + if (a == null && b == null) return true; + if (a == null || b == null) return false; + + var jsonA = JsonSerializer.Serialize(a, VerifyJsonOpts); + var jsonB = JsonSerializer.Serialize(b, VerifyJsonOpts); + + return jsonA == jsonB; +#endif + } + + /// + /// Validates MemoryPack setup at startup. Aborts the benchmark if TestOrder is not [MemoryPackable]. + /// Without this attribute, MemoryPack falls back to runtime resolver (slower) — comparison would be INVALID. + /// + internal static void ValidateMemoryPackSetup() + { + var typesToCheck = new[] { typeof(TestOrder) }; + + foreach (var type in typesToCheck) + { + var hasAttr = type.GetCustomAttributes(typeof(MemoryPackableAttribute), inherit: true).Any(); + if (!hasAttr) + { + System.Console.Error.WriteLine($"❌ FATAL: {type.FullName} is not [MemoryPackable] — MemoryPack would fall back to runtime resolver, comparison is INVALID for SGen-vs-SGen claim."); + System.Console.Error.WriteLine("Add [MemoryPackable] to the type and any nested types referenced from it."); + + Environment.Exit(1); + } + } + } + + /// + /// Filters test data sets by layer keyword. Layered approach lets you run only what's needed for the iteration cadence. + /// P1: only "Core" data exists (Small/Medium/Large/Repeated/Deep). Comprehensive and Edge layers will be expanded in P2. + /// + internal static List FilterByLayer(List all, string layer) + { + if (layer == "all") return all.ToList(); + + var coreNames = new[] { "Small", "Medium", "Large", "Repeated", "Deep" }; + // P2 will add: "Flat", "Polymorphic", "Collection", "Numeric", "NonAscii", etc. + var comprehensiveExtras = new string[] { /* P2 */ }; + // P3 will add: "ColdStart", "VeryLarge", "PathologicalString", etc. + var edgeExtras = new string[] { /* P3 */ }; + + return layer switch + { + "core" => all.Where(t => StartsWithAny(t.Name, coreNames)).ToList(), + "comprehensive" => all.Where(t => StartsWithAny(t.Name, coreNames) || StartsWithAny(t.Name, comprehensiveExtras)).ToList(), + "edge" => all.Where(t => StartsWithAny(t.Name, coreNames) || StartsWithAny(t.Name, comprehensiveExtras) || StartsWithAny(t.Name, edgeExtras)).ToList(), + // Single-cell A/B mini-suite filters — match by case-insensitive prefix on Name. + // Use case: tight optimization-iteration loop on one specific cell (e.g. `dotnet run -- repeated` + // or interactive menu shortcut), avoiding the full ~110 sec suite when only one cell is in scope. + "small" => all.Where(t => t.Name.StartsWith("Small", StringComparison.OrdinalIgnoreCase)).ToList(), + "medium" => all.Where(t => t.Name.StartsWith("Medium", StringComparison.OrdinalIgnoreCase)).ToList(), + "large" => all.Where(t => t.Name.StartsWith("Large", StringComparison.OrdinalIgnoreCase)).ToList(), + "repeated" => all.Where(t => t.Name.StartsWith("Repeated", StringComparison.OrdinalIgnoreCase)).ToList(), + "deep" => all.Where(t => t.Name.StartsWith("Deep", StringComparison.OrdinalIgnoreCase)).ToList(), + _ => all.ToList() + }; + + static bool StartsWithAny(string name, string[] prefixes) => prefixes.Any(name.StartsWith); + } +} diff --git a/AyCode.Core.Serializers.Console/Output.cs b/AyCode.Core.Serializers.Console/Output.cs index bfa9280..e6247d5 100644 --- a/AyCode.Core.Serializers.Console/Output.cs +++ b/AyCode.Core.Serializers.Console/Output.cs @@ -253,6 +253,7 @@ internal static class Output var serPct = SerPerOp(memPackResult) > 0 ? (SerPerOp(acBinaryResult) / SerPerOp(memPackResult) - 1) * 100 : 0; var desPct = DesPerOp(memPackResult) > 0 ? (DesPerOp(acBinaryResult) / DesPerOp(memPackResult) - 1) * 100 : 0; var rtPct = RtPerOp(memPackResult) > 0 ? (RtPerOp(acBinaryResult) / RtPerOp(memPackResult) - 1) * 100 : 0; + var serAllocPct = memPackResult.SerializeAllocBytesPerOp > 0 ? (acBinaryResult.SerializeAllocBytesPerOp / (double)memPackResult.SerializeAllocBytesPerOp - 1) * 100 : 0; var desAllocPct = memPackResult.DeserializeAllocBytesPerOp > 0 ? (acBinaryResult.DeserializeAllocBytesPerOp / (double)memPackResult.DeserializeAllocBytesPerOp - 1) * 100 : 0; var rtAllocPct = memPackResult.RoundTripAllocBytesPerOp > 0 ? (acBinaryResult.RoundTripAllocBytesPerOp / (double)memPackResult.RoundTripAllocBytesPerOp - 1) * 100 : 0; diff --git a/AyCode.Core.Serializers.Console/Program.cs b/AyCode.Core.Serializers.Console/Program.cs index 80b31c0..8588af2 100644 --- a/AyCode.Core.Serializers.Console/Program.cs +++ b/AyCode.Core.Serializers.Console/Program.cs @@ -101,7 +101,7 @@ public static class Program // Setup validation — abort BEFORE any benchmark logic if MemoryPack baseline is invalid. // Done early so user is told immediately, not after warmup. - ValidateMemoryPackSetup(); + BenchmarkLoop.ValidateMemoryPackSetup(); // CLI mode (args provided): run once, parse args, exit. Backward-compatible behaviour. if (args.Length > 0) @@ -238,7 +238,7 @@ public static class Program { var allResults = new List(); var allTestDataSets = BenchmarkTestDataProvider.CreateTestDataSets(); - var testDataSets = FilterByLayer(allTestDataSets, layer); + var testDataSets = BenchmarkLoop.FilterByLayer(allTestDataSets, layer); System.Console.WriteLine($"Layer: {layer} | OpMode: {opMode} | SerializerMode: {serializerMode} | Charset: {Configuration.GetCurrentCharsetName()} | Iterations: per-cell adaptive (~{Configuration.TargetSampleMs} ms target) | Warmup: {Configuration.WarmupIterations} per phase (Ser/Des isolated) | Samples: {Configuration.BenchmarkSamples} (median) + pilot discard"); System.Console.WriteLine($"Build: {Configuration.BuildConfiguration} | .NET: {Environment.Version} | Test Type: {testDataSets.FirstOrDefault()?.TypeName ?? "unknown"} | Test Cells: {testDataSets.Count}/{allTestDataSets.Count}"); @@ -315,23 +315,7 @@ public static class Program #region Benchmark Execution - /// - /// Forces a full GC cycle at a phase boundary in the benchmark loop. Two-pass collect with finalizer drain - /// in between: the first pass moves managed garbage to the finalization queue, WaitForPendingFinalizers - /// runs the finalizers, the second pass reclaims any objects the finalizers released. After this returns the - /// heap is in a known-quiescent state — the next warmup/measurement phase starts on a clean slate, isolated - /// from the previous phase's residual allocations (write-buffer pools, intern cache, write-plan arrays, etc.). - /// Called between every Ser-phase / Des-phase boundary in . - /// - [MethodImpl(MethodImplOptions.NoInlining)] - private static void ForceGcCollect() - { - GC.Collect(2, GCCollectionMode.Forced, blocking: true); - GC.WaitForPendingFinalizers(); - GC.Collect(2, GCCollectionMode.Forced, blocking: true); - } - - private static List RunBenchmarksForTestData(TestDataSet testData, string mode, string serializerMode) +private static List RunBenchmarksForTestData(TestDataSet testData, string mode, string serializerMode) { var results = new List(); var serializers = CreateSerializers(testData, serializerMode); @@ -396,12 +380,12 @@ public static class Program // entire round-trip path, then record into the RT result columns. if (mode is "all" or "serialize" or "ser") { - ForceGcCollect(); + BenchmarkLoop.ForceGcCollect(); serializer.WarmupSerialize(Configuration.WarmupIterations); if (Configuration.JitSleep > 0) Thread.Sleep(Configuration.JitSleep); - var rtIter = CalibrateIterations(() => serializer.Serialize(), Configuration.TargetSampleMs); - var (rtMed, rtMin, rtMax, rtStd) = RunTimed(() => serializer.Serialize(), rtIter, $"{groupLabel} [RT timing]"); + var rtIter = BenchmarkLoop.CalibrateIterations(() => serializer.Serialize(), Configuration.TargetSampleMs); + var (rtMed, rtMin, rtMax, rtStd) = BenchmarkLoop.RunTimed(() => serializer.Serialize(), rtIter, $"{groupLabel} [RT timing]"); result.RoundTripTimeMs = rtMed; result.RoundTripTimeMinMs = rtMin; result.RoundTripTimeMaxMs = rtMax; @@ -409,7 +393,7 @@ public static class Program result.RoundTripIterations = rtIter; // Process-wide allocation measurement: server-drain-thread allocations (server-side new byte[len]) // also show up — otherwise current-thread alloc would only count the client side and look ~halved. - result.RoundTripAllocBytesPerOp = MeasureAllocationTotal(() => serializer.Serialize(), rtIter, $"{groupLabel} [RT alloc]"); + result.RoundTripAllocBytesPerOp = BenchmarkLoop.MeasureAllocationTotal(() => serializer.Serialize(), rtIter, $"{groupLabel} [RT alloc]"); } // mode == "deserialize" alone is meaningless for a round-trip-only benchmark; skip silently. } @@ -418,19 +402,19 @@ public static class Program // ── Ser phase ── isolated warmup → Configuration.JitSleep → calibrate → time → alloc; preceded by GC.Collect. if (mode is "all" or "serialize" or "ser") { - ForceGcCollect(); + BenchmarkLoop.ForceGcCollect(); serializer.WarmupSerialize(Configuration.WarmupIterations); if (Configuration.JitSleep > 0) Thread.Sleep(Configuration.JitSleep); - var serIter = CalibrateIterations(() => serializer.Serialize(), Configuration.TargetSampleMs); - var (serMed, serMin, serMax, serStd) = RunTimed(() => serializer.Serialize(), serIter, $"{groupLabel} [Ser timing]"); + var serIter = BenchmarkLoop.CalibrateIterations(() => serializer.Serialize(), Configuration.TargetSampleMs); + var (serMed, serMin, serMax, serStd) = BenchmarkLoop.RunTimed(() => serializer.Serialize(), serIter, $"{groupLabel} [Ser timing]"); result.SerializeTimeMs = serMed; result.SerializeTimeMinMs = serMin; result.SerializeTimeMaxMs = serMax; result.SerializeTimeStdDevMs = serStd; result.SerializeIterations = serIter; // Dedicated alloc-only sample (separate from timing samples; keeps timing pure) - result.SerializeAllocBytesPerOp = MeasureAllocation(() => serializer.Serialize(), serIter, $"{groupLabel} [Ser alloc]"); + result.SerializeAllocBytesPerOp = BenchmarkLoop.MeasureAllocation(() => serializer.Serialize(), serIter, $"{groupLabel} [Ser alloc]"); } // ── Des phase ── isolated warmup → Configuration.JitSleep → calibrate → time → alloc; preceded by GC.Collect. @@ -438,18 +422,18 @@ public static class Program // Des-phase's allocation measurement reflects ONLY Des-side allocations (deserialized object graph). if (mode is "all" or "deserialize" or "des") { - ForceGcCollect(); + BenchmarkLoop.ForceGcCollect(); serializer.WarmupDeserialize(Configuration.WarmupIterations); if (Configuration.JitSleep > 0) Thread.Sleep(Configuration.JitSleep); - var desIter = CalibrateIterations(() => serializer.Deserialize(), Configuration.TargetSampleMs); - var (desMed, desMin, desMax, desStd) = RunTimed(() => serializer.Deserialize(), desIter, $"{groupLabel} [Des timing]"); + var desIter = BenchmarkLoop.CalibrateIterations(() => serializer.Deserialize(), Configuration.TargetSampleMs); + var (desMed, desMin, desMax, desStd) = BenchmarkLoop.RunTimed(() => serializer.Deserialize(), desIter, $"{groupLabel} [Des timing]"); result.DeserializeTimeMs = desMed; result.DeserializeTimeMinMs = desMin; result.DeserializeTimeMaxMs = desMax; result.DeserializeTimeStdDevMs = desStd; result.DeserializeIterations = desIter; - result.DeserializeAllocBytesPerOp = MeasureAllocation(() => serializer.Deserialize(), desIter, $"{groupLabel} [Des alloc]"); + result.DeserializeAllocBytesPerOp = BenchmarkLoop.MeasureAllocation(() => serializer.Deserialize(), desIter, $"{groupLabel} [Des alloc]"); } // Compose RT from Ser+Des. Because Ser and Des may have DIFFERENT iter counts post-calibration, @@ -647,336 +631,6 @@ public static class Program }; } - /// - /// Runs the action times for independent samples, - /// returning the median, min, and max elapsed time. Multi-sample design reduces single-run variance - /// from ~±15% to ~±5% by smoothing transient effects (background activity, thermal/turbo state). - /// When <= 1, falls back to single-sample timing (Debug / quick mode). - /// When is non-null, emits in-place \r progress updates so a - /// stuck benchmark (e.g. deadlocked NamedPipe row) is visibly stuck at a specific %% rather than - /// silently hanging. - /// - /// Stabilization (added 2026-05-07): - /// 1) Pilot sample is run BEFORE the recorded loop and discarded. The first measurement after - /// warmup tends to absorb residual JIT bookkeeping and GC bookkeeping; dropping it tightens - /// the min/max range without throwing away signal (the median is the SAME data as before). - /// 2) GC.Collect / WaitForPendingFinalizers / GC.Collect runs BEFORE every recorded sample. - /// Without this, GC pressure from sample N occasionally triggered a Gen-2 pause inside - /// sample N+1, painting it as an outlier; collecting up-front gives every sample the - /// same starting heap shape. - /// 3) Returns (median, min, max) so the caller can surface the inter-sample range — visible - /// noise floor for the row, replacing the previous "median only" view. - /// - private static (double medianMs, double minMs, double maxMs, double stdDevMs) RunTimed(Action action, int iterations, string? progressLabel = null) - { - var samples = Configuration.BenchmarkSamples; - if (samples <= 1) - { - // Single-sample fast path (Debug or trivial run) — no allocation, no sort, no stddev. - var sw = Stopwatch.StartNew(); - RunWithProgress(action, iterations, progressLabel, samples: 1, sampleIndex: 0); - sw.Stop(); - var ms = sw.Elapsed.TotalMilliseconds; - EndProgress(progressLabel, ms); - return (ms, ms, ms, 0); - } - - // Pilot sample (discarded). Counts as sample index 0 of (samples + 1) for progress display - // so the user sees an extra "warmup-ish" tick before the recorded samples start. - GC.Collect(); - GC.WaitForPendingFinalizers(); - GC.Collect(); - - var pilotSw = Stopwatch.StartNew(); - RunWithProgress(action, iterations, progressLabel, samples + 1, sampleIndex: 0); - pilotSw.Stop(); - // intentionally not stored - - var times = new double[samples]; - for (var s = 0; s < samples; s++) - { - // Per-sample GC settle. Forces every sample to start from the same heap state, so - // a Gen-2 pause caused by the previous sample doesn't bleed into the next sample's - // timing. Cost is paid OUTSIDE the Stopwatch window — no impact on the measurement. - GC.Collect(); - GC.WaitForPendingFinalizers(); - GC.Collect(); - - var sw = Stopwatch.StartNew(); - RunWithProgress(action, iterations, progressLabel, samples + 1, sampleIndex: s + 1); - sw.Stop(); - times[s] = sw.Elapsed.TotalMilliseconds; - } - - // Capture min/max/sum/sumSq BEFORE sort to avoid order ambiguity (Array.Sort is in-place). - var minMs = double.MaxValue; - var maxMs = double.MinValue; - var sum = 0.0; - var sumSq = 0.0; - - for (var i = 0; i < times.Length; i++) - { - var t = times[i]; - sum += t; - sumSq += t * t; - if (t < minMs) minMs = t; - if (t > maxMs) maxMs = t; - } - // Population stddev (not sample-stddev — we treat the captured samples as the population for - // CV computation). variance = E[X²] - E[X]² with Math.Max(0, ...) guard against tiny negative - // values from FP rounding when samples are nearly identical. - var mean = sum / times.Length; - var variance = (sumSq / times.Length) - (mean * mean); - var stdDevMs = Math.Sqrt(Math.Max(0.0, variance)); - - Array.Sort(times); - // Median: middle value for odd sample counts, average of two middles for even counts. - var medianMs = samples % 2 == 1 ? times[samples / 2] : (times[samples / 2 - 1] + times[samples / 2]) / 2.0; - EndProgress(progressLabel, medianMs); - - return (medianMs, minMs, maxMs, stdDevMs); - } - - /// - /// Per-cell adaptive iteration calibration. Runs a 100-iter measurement after warmup and computes - /// how many iterations are needed to reach wall-clock per sample. - /// Returns iter rounded UP to the nearest 1000, floored at 1000 (the prior fixed minimum) and - /// ceiling-capped at 200_000 (sanity bound for pathologically fast ops). In Debug single-sample mode - /// (Configuration.BenchmarkSamples <= 1) returns the global unchanged — - /// calibration overhead is unjustified there. Calibration runs OUTSIDE the timed sample loop and - /// does NOT count toward warmup; its sole purpose is to measure per-op cost. - /// - private static int CalibrateIterations(Action action, int targetMs) - { - if (Configuration.BenchmarkSamples <= 1) return Configuration.TestIterations; // Debug fast path - - GC.Collect(); - GC.WaitForPendingFinalizers(); - GC.Collect(); - - const int calibIter = 100; - var sw = Stopwatch.StartNew(); - for (var i = 0; i < calibIter; i++) action(); - sw.Stop(); - var ms = sw.Elapsed.TotalMilliseconds; - - // Pathologically-fast op below Stopwatch resolution — cap at ceiling (further calibration won't help). - if (ms <= 0.0001) return 200_000; - - var iterPerMs = calibIter / ms; - var raw = (int)Math.Ceiling(targetMs * iterPerMs); - // Round UP to nearest 1000 — keeps numbers human-readable in the markdown output. - var rounded = ((raw + 999) / 1000) * 1000; - - return rounded switch - { - < 1000 => 1000, - > 200_000 => 200_000, - _ => rounded - }; - } - - /// - /// Measures per-call allocation in bytes after a clean GC. Single dedicated sample (no median) — keeps timing samples pure. - /// - private static long MeasureAllocation(Action action, int iterations, string? progressLabel = null) - { - GC.Collect(); - GC.WaitForPendingFinalizers(); - GC.Collect(); - - var sw = Stopwatch.StartNew(); - var before = GC.GetAllocatedBytesForCurrentThread(); - RunWithProgress(action, iterations, progressLabel, samples: 1, sampleIndex: 0); - - var after = GC.GetAllocatedBytesForCurrentThread(); - sw.Stop(); - EndProgress(progressLabel, sw.Elapsed.TotalMilliseconds); - return (after - before) / iterations; - } - - /// - /// Process-wide allocation measurement — needed for round-trip-only benchmarks (NamedPipe etc.) where - /// the work happens across multiple threads. would - /// only count the caller-thread allocations, missing the server-side new byte[len] buffers and - /// any drain-pump-thread allocations. covers the entire process. - /// Slightly noisier than the per-thread variant (background threads / GC bookkeeping leak in), but - /// over 1000 iterations the signal dominates. - /// - private static long MeasureAllocationTotal(Action action, int iterations, string? progressLabel = null) - { - GC.Collect(); - GC.WaitForPendingFinalizers(); - GC.Collect(); - - var sw = Stopwatch.StartNew(); - var before = GC.GetTotalAllocatedBytes(precise: true); - RunWithProgress(action, iterations, progressLabel, samples: 1, sampleIndex: 0); - - var after = GC.GetTotalAllocatedBytes(precise: true); - sw.Stop(); - EndProgress(progressLabel, sw.Elapsed.TotalMilliseconds); - return (after - before) / iterations; - } - - // ============================================================================================ - // Progress reporting — \r-driven in-place updates so a stuck benchmark surfaces the exact phase - // and % where it stopped, instead of appearing as a silent hang. Used by RunTimed and the - // MeasureAllocation* helpers when the caller passes a non-null progressLabel. - // ============================================================================================ - - // Tracks the longest line written by the current progress session, so EndProgress can clear - // any leftover characters from a prior longer line (avoids "ghost" trailing chars after \r). - private static int _progressLastLineLen; - - /// - /// Runs times, emitting \r-overwriting - /// progress every ~10% (approx. 10 progress prints per sample). When - /// is null, runs without any progress output (zero overhead beyond a null check per iter). - /// - private static void RunWithProgress(Action action, int iterations, string? label, int samples, int sampleIndex) - { - if (label is null) - { - for (var i = 0; i < iterations; i++) action(); - return; - } - - // ~10 progress emits per sample run. Avoid emitting on every iter (Console.Write is - // expensive enough to skew sub-µs benchmarks if overdone). - var step = Math.Max(1, iterations / 10); - for (var i = 0; i < iterations; i++) - { - action(); - if ((i + 1) % step == 0 || i == iterations - 1) - { - var pct = (int)((i + 1) * 100L / iterations); - var line = samples > 1 - ? $" > {label} sample {sampleIndex + 1}/{samples} {pct,3}% ({i + 1}/{iterations})" - : $" > {label} {pct,3}% ({i + 1}/{iterations})"; - - System.Console.Write('\r'); - System.Console.Write(line); - - if (line.Length < _progressLastLineLen) - System.Console.Write(new string(' ', _progressLastLineLen - line.Length)); - - _progressLastLineLen = line.Length; - } - } - } - - /// - /// Closes a progress line cleanly: clears any leftover chars and writes a final "done" line on - /// the same row, terminated by \n so subsequent WriteLine calls render below. - /// - private static void EndProgress(string? label, double elapsedMs) - { - if (label is null) return; - var done = $" > {label} done in {elapsedMs,7:F1} ms"; - - System.Console.Write('\r'); - System.Console.Write(done); - - if (done.Length < _progressLastLineLen) - System.Console.Write(new string(' ', _progressLastLineLen - done.Length)); - - System.Console.WriteLine(); - _progressLastLineLen = 0; - } - -#if !AYCODE_NATIVEAOT - private static readonly JsonSerializerOptions VerifyJsonOpts = new() - { - WriteIndented = false, - - DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull, - ReferenceHandler = System.Text.Json.Serialization.ReferenceHandler.IgnoreCycles - }; -#endif - - /// - /// Round-trip equality check: serialize both via System.Text.Json (canonical form) and compare strings. - /// Slower than property-by-property compare, but universal — works for any object graph without custom comparer. - /// - /// - /// AOT publish skip: System.Text.Json's reflection path uses runtime closed-generic instantiation - /// (JsonPropertyInfo<TestStatus> et al.) that the trimmer drops, causing - /// NotSupportedException: missing native code or metadata. The validation is JIT-only — the actual - /// benchmark Serialize/Deserialize loops don't touch this path. Under AOT we return true so all - /// VerifyRoundTrip() calls pass without running the cross-format validation. - /// - private static bool DeepEqualsViaJson(object? a, object? b) - { -#if AYCODE_NATIVEAOT - // Skip cross-format validation under AOT — STJ reflection path is incompatible. The roundtrip - // itself still runs (caller-side Serialize+Deserialize), just the JSON-canonical compare is bypassed. - return true; -#else - if (a == null && b == null) return true; - if (a == null || b == null) return false; - - var jsonA = JsonSerializer.Serialize(a, VerifyJsonOpts); - var jsonB = JsonSerializer.Serialize(b, VerifyJsonOpts); - - return jsonA == jsonB; -#endif - } - - /// - /// Validates MemoryPack setup at startup. Aborts the benchmark if TestOrder is not [MemoryPackable]. - /// Without this attribute, MemoryPack falls back to runtime resolver (slower) — comparison would be INVALID. - /// - private static void ValidateMemoryPackSetup() - { - var typesToCheck = new[] { typeof(TestOrder) }; - - foreach (var type in typesToCheck) - { - var hasAttr = type.GetCustomAttributes(typeof(MemoryPackableAttribute), inherit: true).Any(); - if (!hasAttr) - { - System.Console.Error.WriteLine($"❌ FATAL: {type.FullName} is not [MemoryPackable] — MemoryPack would fall back to runtime resolver, comparison is INVALID for SGen-vs-SGen claim."); - System.Console.Error.WriteLine("Add [MemoryPackable] to the type and any nested types referenced from it."); - - Environment.Exit(1); - } - } - } - - /// - /// Filters test data sets by layer keyword. Layered approach lets you run only what's needed for the iteration cadence. - /// P1: only "Core" data exists (Small/Medium/Large/Repeated/Deep). Comprehensive and Edge layers will be expanded in P2. - /// - private static List FilterByLayer(List all, string layer) - { - if (layer == "all") return all.ToList(); - - var coreNames = new[] { "Small", "Medium", "Large", "Repeated", "Deep" }; - // P2 will add: "Flat", "Polymorphic", "Collection", "Numeric", "NonAscii", etc. - var comprehensiveExtras = new string[] { /* P2 */ }; - // P3 will add: "ColdStart", "VeryLarge", "PathologicalString", etc. - var edgeExtras = new string[] { /* P3 */ }; - - return layer switch - { - "core" => all.Where(t => StartsWithAny(t.Name, coreNames)).ToList(), - "comprehensive" => all.Where(t => StartsWithAny(t.Name, coreNames) || StartsWithAny(t.Name, comprehensiveExtras)).ToList(), - "edge" => all.Where(t => StartsWithAny(t.Name, coreNames) || StartsWithAny(t.Name, comprehensiveExtras) || StartsWithAny(t.Name, edgeExtras)).ToList(), - // Single-cell A/B mini-suite filters — match by case-insensitive prefix on Name. - // Use case: tight optimization-iteration loop on one specific cell (e.g. `dotnet run -- repeated` - // or interactive menu shortcut), avoiding the full ~110 sec suite when only one cell is in scope. - "small" => all.Where(t => t.Name.StartsWith("Small", StringComparison.OrdinalIgnoreCase)).ToList(), - "medium" => all.Where(t => t.Name.StartsWith("Medium", StringComparison.OrdinalIgnoreCase)).ToList(), - "large" => all.Where(t => t.Name.StartsWith("Large", StringComparison.OrdinalIgnoreCase)).ToList(), - "repeated" => all.Where(t => t.Name.StartsWith("Repeated", StringComparison.OrdinalIgnoreCase)).ToList(), - "deep" => all.Where(t => t.Name.StartsWith("Deep", StringComparison.OrdinalIgnoreCase)).ToList(), - _ => all.ToList() - }; - - static bool StartsWithAny(string name, string[] prefixes) => prefixes.Any(name.StartsWith); - } - #endregion #region Serializer Implementations @@ -1071,7 +725,7 @@ public static class Program { var bytes = AcBinarySerializer.Serialize(_order, _options); var roundTripped = AcBinaryDeserializer.Deserialize(bytes, _options); - return DeepEqualsViaJson(_order, roundTripped); + return BenchmarkLoop.DeepEqualsViaJson(_order, roundTripped); } } @@ -1108,7 +762,7 @@ public static class Program { var bytes = MemoryPackSerializer.Serialize(_order, _options); var roundTripped = MemoryPackSerializer.Deserialize(bytes, _options); - return DeepEqualsViaJson(_order, roundTripped); + return BenchmarkLoop.DeepEqualsViaJson(_order, roundTripped); } } @@ -1157,7 +811,7 @@ public static class Program { var bytes = MessagePackSerializer.Serialize(_order, _options); var roundTripped = MessagePackSerializer.Deserialize(bytes, _options); - return DeepEqualsViaJson(_order, roundTripped); + return BenchmarkLoop.DeepEqualsViaJson(_order, roundTripped); } } #endif @@ -1219,7 +873,7 @@ public static class Program var abw = new ArrayBufferWriter(); AcBinarySerializer.Serialize(_order, abw, _options); var roundTripped = AcBinaryDeserializer.Deserialize(new ReadOnlySequence(abw.WrittenMemory), _options); - return DeepEqualsViaJson(_order, roundTripped); + return BenchmarkLoop.DeepEqualsViaJson(_order, roundTripped); } } @@ -1420,7 +1074,7 @@ public static class Program { Serialize(); var result = _lastResult as TestOrder; - return result != null && DeepEqualsViaJson(_order, result); + return result != null && BenchmarkLoop.DeepEqualsViaJson(_order, result); } finally { @@ -1605,7 +1259,7 @@ public static class Program { Serialize(); var result = _lastResult as TestOrder; - return result != null && DeepEqualsViaJson(_order, result); + return result != null && BenchmarkLoop.DeepEqualsViaJson(_order, result); } finally { @@ -1653,7 +1307,7 @@ public static class Program /// /// Per-iter byte[] allocation from AcBinarySerializer.Serialize is part of the cost (matches /// 's API contract); the receive-side scratch buffer is also allocated per-iter - /// on the consumer-task (counted via GC.GetTotalAllocatedBytes in MeasureAllocationTotal). + /// on the consumer-task (counted via GC.GetTotalAllocatedBytes in BenchmarkLoop.MeasureAllocationTotal). /// private sealed class AcBinaryNamedPipeRawByteArrayBenchmark : ISerializerBenchmark, IDisposable { @@ -1747,7 +1401,7 @@ public static class Program try { var size = _pendingReadSize; - var bytes = new byte[size]; // per-iter alloc — counted by MeasureAllocationTotal + var bytes = new byte[size]; // per-iter alloc — counted by BenchmarkLoop.MeasureAllocationTotal var totalRead = 0; while (totalRead < size) { @@ -1814,7 +1468,7 @@ public static class Program { Serialize(); var result = _lastResult as TestOrder; - return result != null && DeepEqualsViaJson(_order, result); + return result != null && BenchmarkLoop.DeepEqualsViaJson(_order, result); } finally { @@ -1981,7 +1635,7 @@ public static class Program { Serialize(); var result = _lastResult as TestOrder; - return result != null && DeepEqualsViaJson(_order, result); + return result != null && BenchmarkLoop.DeepEqualsViaJson(_order, result); } finally { @@ -2049,7 +1703,7 @@ public static class Program var abw = new ArrayBufferWriter(); MemoryPackSerializer.Serialize(abw, _order, _options); var roundTripped = MemoryPackSerializer.Deserialize(new ReadOnlySequence(abw.WrittenMemory), _options); - return DeepEqualsViaJson(_order, roundTripped); + return BenchmarkLoop.DeepEqualsViaJson(_order, roundTripped); } } @@ -2107,7 +1761,7 @@ public static class Program AcBinarySerializer.Serialize(_order, _bufferWriter, _options); var roundTripped = AcBinaryDeserializer.Deserialize(new ReadOnlySequence(_bufferWriter.WrittenMemory), _options); - return DeepEqualsViaJson(_order, roundTripped); + return BenchmarkLoop.DeepEqualsViaJson(_order, roundTripped); } } @@ -2163,7 +1817,7 @@ public static class Program _bufferWriter.ResetWrittenCount(); MemoryPackSerializer.Serialize(_bufferWriter, _order, _options); var roundTripped = MemoryPackSerializer.Deserialize(new ReadOnlySequence(_bufferWriter.WrittenMemory), _options); - return DeepEqualsViaJson(_order, roundTripped); + return BenchmarkLoop.DeepEqualsViaJson(_order, roundTripped); } } @@ -2206,7 +1860,7 @@ public static class Program { var json = JsonSerializer.Serialize(_order, _options); var roundTripped = JsonSerializer.Deserialize(json, _options); - return DeepEqualsViaJson(_order, roundTripped); + return BenchmarkLoop.DeepEqualsViaJson(_order, roundTripped); } }