diff --git a/.claude/settings.local.json b/.claude/settings.local.json
index 740a5cb..f9527aa 100644
--- a/.claude/settings.local.json
+++ b/.claude/settings.local.json
@@ -74,7 +74,8 @@
"Bash(where dotnet-trace *)",
"Bash(dotnet tool *)",
"Bash(dotnet-trace convert *)",
- "Bash(find ~/.nuget/packages/memorypack* -name \"*.cs\" 2>/dev/null | head -5; find /mnt/c/Users/Fullepi/.nuget/packages/memorypack* -name \"MemoryPackSerializer*.cs\" 2>/dev/null | head -5)"
+ "Bash(find ~/.nuget/packages/memorypack* -name \"*.cs\" 2>/dev/null | head -5; find /mnt/c/Users/Fullepi/.nuget/packages/memorypack* -name \"MemoryPackSerializer*.cs\" 2>/dev/null | head -5)",
+ "PowerShell($path = \"H:\\\\Applications\\\\Aycode\\\\Source\\\\AyCode.Core\\\\AyCode.Core.Serializers.Console\\\\Program.cs\"; $c = [IO.File]::ReadAllText\\($path\\); $c = $c -replace 'MeasureAllocationTotal', 'BenchmarkLoop.MeasureAllocationTotal'; $c = $c -replace 'MeasureAllocation\\\\\\(', 'BenchmarkLoop.MeasureAllocation\\('; $c = $c -replace 'ForceGcCollect\\\\\\(', 'BenchmarkLoop.ForceGcCollect\\('; $c = $c -replace 'CalibrateIterations\\\\\\(', 'BenchmarkLoop.CalibrateIterations\\('; $c = $c -replace 'RunTimed\\\\\\(', 'BenchmarkLoop.RunTimed\\('; $c = $c -replace 'DeepEqualsViaJson\\\\\\(', 'BenchmarkLoop.DeepEqualsViaJson\\('; $c = $c -replace 'ValidateMemoryPackSetup\\\\\\(', 'BenchmarkLoop.ValidateMemoryPackSetup\\('; $c = $c -replace 'FilterByLayer\\\\\\(', 'BenchmarkLoop.FilterByLayer\\('; [IO.File]::WriteAllText\\($path, $c\\); Write-Output \"OK new length: $\\($c.Length\\)\")"
]
}
}
diff --git a/AyCode.Core.Serializers.Console/BenchmarkLoop.cs b/AyCode.Core.Serializers.Console/BenchmarkLoop.cs
new file mode 100644
index 0000000..4e41e66
--- /dev/null
+++ b/AyCode.Core.Serializers.Console/BenchmarkLoop.cs
@@ -0,0 +1,366 @@
+using AyCode.Core.Tests.TestModels;
+using MemoryPack;
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+using System.Text.Json;
+
+namespace AyCode.Core.Serializers.Console;
+
+///
+/// Benchmark execution helpers: timing (), per-cell adaptive iteration
+/// calibration (), allocation measurement
+/// ( + ), in-place
+/// \r-progress reporting, full-GC phase-boundary helper (),
+/// startup validation (), and per-cell round-trip equality
+/// (). Pure benchmark-execution infrastructure — no display
+/// formatting (that lives in Output) and no per-engine glue (which lives with the
+/// individual ISerializerBenchmark implementations).
+///
+internal static class BenchmarkLoop
+{
+ ///
+ /// Forces a full GC cycle at a phase boundary in the benchmark loop. Two-pass collect with finalizer drain
+ /// in between: the first pass moves managed garbage to the finalization queue, WaitForPendingFinalizers
+ /// runs the finalizers, the second pass reclaims any objects the finalizers released. After this returns the
+ /// heap is in a known-quiescent state — the next warmup/measurement phase starts on a clean slate, isolated
+ /// from the previous phase's residual allocations (write-buffer pools, intern cache, write-plan arrays, etc.).
+ /// Called between every Ser-phase / Des-phase boundary in RunBenchmarksForTestData.
+ ///
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ internal static void ForceGcCollect()
+ {
+ GC.Collect(2, GCCollectionMode.Forced, blocking: true);
+ GC.WaitForPendingFinalizers();
+ GC.Collect(2, GCCollectionMode.Forced, blocking: true);
+ }
+
+ ///
+ /// Runs the action times for independent samples,
+ /// returning the median, min, and max elapsed time. Multi-sample design reduces single-run variance
+ /// from ~±15% to ~±5% by smoothing transient effects (background activity, thermal/turbo state).
+ /// When <= 1, falls back to single-sample timing (Debug / quick mode).
+ /// When is non-null, emits in-place \r progress updates so a
+ /// stuck benchmark (e.g. deadlocked NamedPipe row) is visibly stuck at a specific %% rather than
+ /// silently hanging.
+ ///
+ /// Stabilization (added 2026-05-07):
+ /// 1) Pilot sample is run BEFORE the recorded loop and discarded. The first measurement after
+ /// warmup tends to absorb residual JIT bookkeeping and GC bookkeeping; dropping it tightens
+ /// the min/max range without throwing away signal (the median is the SAME data as before).
+ /// 2) GC.Collect / WaitForPendingFinalizers / GC.Collect runs BEFORE every recorded sample.
+ /// Without this, GC pressure from sample N occasionally triggered a Gen-2 pause inside
+ /// sample N+1, painting it as an outlier; collecting up-front gives every sample the
+ /// same starting heap shape.
+ /// 3) Returns (median, min, max) so the caller can surface the inter-sample range — visible
+ /// noise floor for the row, replacing the previous "median only" view.
+ ///
+ internal static (double medianMs, double minMs, double maxMs, double stdDevMs) RunTimed(Action action, int iterations, string? progressLabel = null)
+ {
+ var samples = Configuration.BenchmarkSamples;
+ if (samples <= 1)
+ {
+ // Single-sample fast path (Debug or trivial run) — no allocation, no sort, no stddev.
+ var sw = Stopwatch.StartNew();
+ RunWithProgress(action, iterations, progressLabel, samples: 1, sampleIndex: 0);
+ sw.Stop();
+ var ms = sw.Elapsed.TotalMilliseconds;
+ EndProgress(progressLabel, ms);
+ return (ms, ms, ms, 0);
+ }
+
+ // Pilot sample (discarded). Counts as sample index 0 of (samples + 1) for progress display
+ // so the user sees an extra "warmup-ish" tick before the recorded samples start.
+ GC.Collect();
+ GC.WaitForPendingFinalizers();
+ GC.Collect();
+
+ var pilotSw = Stopwatch.StartNew();
+ RunWithProgress(action, iterations, progressLabel, samples + 1, sampleIndex: 0);
+ pilotSw.Stop();
+ // intentionally not stored
+
+ var times = new double[samples];
+ for (var s = 0; s < samples; s++)
+ {
+ // Per-sample GC settle. Forces every sample to start from the same heap state, so
+ // a Gen-2 pause caused by the previous sample doesn't bleed into the next sample's
+ // timing. Cost is paid OUTSIDE the Stopwatch window — no impact on the measurement.
+ GC.Collect();
+ GC.WaitForPendingFinalizers();
+ GC.Collect();
+
+ var sw = Stopwatch.StartNew();
+ RunWithProgress(action, iterations, progressLabel, samples + 1, sampleIndex: s + 1);
+ sw.Stop();
+ times[s] = sw.Elapsed.TotalMilliseconds;
+ }
+
+ // Capture min/max/sum/sumSq BEFORE sort to avoid order ambiguity (Array.Sort is in-place).
+ var minMs = double.MaxValue;
+ var maxMs = double.MinValue;
+ var sum = 0.0;
+ var sumSq = 0.0;
+
+ for (var i = 0; i < times.Length; i++)
+ {
+ var t = times[i];
+ sum += t;
+ sumSq += t * t;
+ if (t < minMs) minMs = t;
+ if (t > maxMs) maxMs = t;
+ }
+ // Population stddev (not sample-stddev — we treat the captured samples as the population for
+ // CV computation). variance = E[X²] - E[X]² with Math.Max(0, ...) guard against tiny negative
+ // values from FP rounding when samples are nearly identical.
+ var mean = sum / times.Length;
+ var variance = (sumSq / times.Length) - (mean * mean);
+ var stdDevMs = Math.Sqrt(Math.Max(0.0, variance));
+
+ Array.Sort(times);
+ // Median: middle value for odd sample counts, average of two middles for even counts.
+ var medianMs = samples % 2 == 1 ? times[samples / 2] : (times[samples / 2 - 1] + times[samples / 2]) / 2.0;
+ EndProgress(progressLabel, medianMs);
+
+ return (medianMs, minMs, maxMs, stdDevMs);
+ }
+
+ ///
+ /// Per-cell adaptive iteration calibration. Runs a 100-iter measurement after warmup and computes
+ /// how many iterations are needed to reach wall-clock per sample.
+ /// Returns iter rounded UP to the nearest 1000, floored at 1000 (the prior fixed minimum) and
+ /// ceiling-capped at 200_000 (sanity bound for pathologically fast ops). In Debug single-sample mode
+ /// (Configuration.BenchmarkSamples <= 1) returns the global unchanged —
+ /// calibration overhead is unjustified there. Calibration runs OUTSIDE the timed sample loop and
+ /// does NOT count toward warmup; its sole purpose is to measure per-op cost.
+ ///
+ internal static int CalibrateIterations(Action action, int targetMs)
+ {
+ if (Configuration.BenchmarkSamples <= 1) return Configuration.TestIterations; // Debug fast path
+
+ GC.Collect();
+ GC.WaitForPendingFinalizers();
+ GC.Collect();
+
+ const int calibIter = 100;
+ var sw = Stopwatch.StartNew();
+ for (var i = 0; i < calibIter; i++) action();
+ sw.Stop();
+ var ms = sw.Elapsed.TotalMilliseconds;
+
+ // Pathologically-fast op below Stopwatch resolution — cap at ceiling (further calibration won't help).
+ if (ms <= 0.0001) return 200_000;
+
+ var iterPerMs = calibIter / ms;
+ var raw = (int)Math.Ceiling(targetMs * iterPerMs);
+ // Round UP to nearest 1000 — keeps numbers human-readable in the markdown output.
+ var rounded = ((raw + 999) / 1000) * 1000;
+
+ return rounded switch
+ {
+ < 1000 => 1000,
+ > 200_000 => 200_000,
+ _ => rounded
+ };
+ }
+
+ ///
+ /// Measures per-call allocation in bytes after a clean GC. Single dedicated sample (no median) — keeps timing samples pure.
+ ///
+ internal static long MeasureAllocation(Action action, int iterations, string? progressLabel = null)
+ {
+ GC.Collect();
+ GC.WaitForPendingFinalizers();
+ GC.Collect();
+
+ var sw = Stopwatch.StartNew();
+ var before = GC.GetAllocatedBytesForCurrentThread();
+ RunWithProgress(action, iterations, progressLabel, samples: 1, sampleIndex: 0);
+
+ var after = GC.GetAllocatedBytesForCurrentThread();
+ sw.Stop();
+ EndProgress(progressLabel, sw.Elapsed.TotalMilliseconds);
+ return (after - before) / iterations;
+ }
+
+ ///
+ /// Process-wide allocation measurement — needed for round-trip-only benchmarks (NamedPipe etc.) where
+ /// the work happens across multiple threads. would
+ /// only count the caller-thread allocations, missing the server-side new byte[len] buffers and
+ /// any drain-pump-thread allocations. covers the entire process.
+ /// Slightly noisier than the per-thread variant (background threads / GC bookkeeping leak in), but
+ /// over 1000 iterations the signal dominates.
+ ///
+ internal static long MeasureAllocationTotal(Action action, int iterations, string? progressLabel = null)
+ {
+ GC.Collect();
+ GC.WaitForPendingFinalizers();
+ GC.Collect();
+
+ var sw = Stopwatch.StartNew();
+ var before = GC.GetTotalAllocatedBytes(precise: true);
+ RunWithProgress(action, iterations, progressLabel, samples: 1, sampleIndex: 0);
+
+ var after = GC.GetTotalAllocatedBytes(precise: true);
+ sw.Stop();
+ EndProgress(progressLabel, sw.Elapsed.TotalMilliseconds);
+ return (after - before) / iterations;
+ }
+
+ // ============================================================================================
+ // Progress reporting — \r-driven in-place updates so a stuck benchmark surfaces the exact phase
+ // and % where it stopped, instead of appearing as a silent hang. Used by RunTimed and the
+ // MeasureAllocation* helpers when the caller passes a non-null progressLabel.
+ // ============================================================================================
+
+ // Tracks the longest line written by the current progress session, so EndProgress can clear
+ // any leftover characters from a prior longer line (avoids "ghost" trailing chars after \r).
+ private static int _progressLastLineLen;
+
+ ///
+ /// Runs times, emitting \r-overwriting
+ /// progress every ~10% (approx. 10 progress prints per sample). When
+ /// is null, runs without any progress output (zero overhead beyond a null check per iter).
+ ///
+ private static void RunWithProgress(Action action, int iterations, string? label, int samples, int sampleIndex)
+ {
+ if (label is null)
+ {
+ for (var i = 0; i < iterations; i++) action();
+ return;
+ }
+
+ // ~10 progress emits per sample run. Avoid emitting on every iter (Console.Write is
+ // expensive enough to skew sub-µs benchmarks if overdone).
+ var step = Math.Max(1, iterations / 10);
+ for (var i = 0; i < iterations; i++)
+ {
+ action();
+ if ((i + 1) % step == 0 || i == iterations - 1)
+ {
+ var pct = (int)((i + 1) * 100L / iterations);
+ var line = samples > 1
+ ? $" > {label} sample {sampleIndex + 1}/{samples} {pct,3}% ({i + 1}/{iterations})"
+ : $" > {label} {pct,3}% ({i + 1}/{iterations})";
+
+ System.Console.Write('\r');
+ System.Console.Write(line);
+
+ if (line.Length < _progressLastLineLen)
+ System.Console.Write(new string(' ', _progressLastLineLen - line.Length));
+
+ _progressLastLineLen = line.Length;
+ }
+ }
+ }
+
+ ///
+ /// Closes a progress line cleanly: clears any leftover chars and writes a final "done" line on
+ /// the same row, terminated by \n so subsequent WriteLine calls render below.
+ ///
+ private static void EndProgress(string? label, double elapsedMs)
+ {
+ if (label is null) return;
+ var done = $" > {label} done in {elapsedMs,7:F1} ms";
+
+ System.Console.Write('\r');
+ System.Console.Write(done);
+
+ if (done.Length < _progressLastLineLen)
+ System.Console.Write(new string(' ', _progressLastLineLen - done.Length));
+
+ System.Console.WriteLine();
+ _progressLastLineLen = 0;
+ }
+
+#if !AYCODE_NATIVEAOT
+ private static readonly JsonSerializerOptions VerifyJsonOpts = new()
+ {
+ WriteIndented = false,
+
+ DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull,
+ ReferenceHandler = System.Text.Json.Serialization.ReferenceHandler.IgnoreCycles
+ };
+#endif
+
+ ///
+ /// Round-trip equality check: serialize both via System.Text.Json (canonical form) and compare strings.
+ /// Slower than property-by-property compare, but universal — works for any object graph without custom comparer.
+ ///
+ ///
+ /// AOT publish skip: System.Text.Json's reflection path uses runtime closed-generic instantiation
+ /// (JsonPropertyInfo<TestStatus> et al.) that the trimmer drops, causing
+ /// NotSupportedException: missing native code or metadata. The validation is JIT-only — the actual
+ /// benchmark Serialize/Deserialize loops don't touch this path. Under AOT we return true so all
+ /// VerifyRoundTrip() calls pass without running the cross-format validation.
+ ///
+ internal static bool DeepEqualsViaJson(object? a, object? b)
+ {
+#if AYCODE_NATIVEAOT
+ // Skip cross-format validation under AOT — STJ reflection path is incompatible. The roundtrip
+ // itself still runs (caller-side Serialize+Deserialize), just the JSON-canonical compare is bypassed.
+ return true;
+#else
+ if (a == null && b == null) return true;
+ if (a == null || b == null) return false;
+
+ var jsonA = JsonSerializer.Serialize(a, VerifyJsonOpts);
+ var jsonB = JsonSerializer.Serialize(b, VerifyJsonOpts);
+
+ return jsonA == jsonB;
+#endif
+ }
+
+ ///
+ /// Validates MemoryPack setup at startup. Aborts the benchmark if TestOrder is not [MemoryPackable].
+ /// Without this attribute, MemoryPack falls back to runtime resolver (slower) — comparison would be INVALID.
+ ///
+ internal static void ValidateMemoryPackSetup()
+ {
+ var typesToCheck = new[] { typeof(TestOrder) };
+
+ foreach (var type in typesToCheck)
+ {
+ var hasAttr = type.GetCustomAttributes(typeof(MemoryPackableAttribute), inherit: true).Any();
+ if (!hasAttr)
+ {
+ System.Console.Error.WriteLine($"❌ FATAL: {type.FullName} is not [MemoryPackable] — MemoryPack would fall back to runtime resolver, comparison is INVALID for SGen-vs-SGen claim.");
+ System.Console.Error.WriteLine("Add [MemoryPackable] to the type and any nested types referenced from it.");
+
+ Environment.Exit(1);
+ }
+ }
+ }
+
+ ///
+ /// Filters test data sets by layer keyword. Layered approach lets you run only what's needed for the iteration cadence.
+ /// P1: only "Core" data exists (Small/Medium/Large/Repeated/Deep). Comprehensive and Edge layers will be expanded in P2.
+ ///
+ internal static List FilterByLayer(List all, string layer)
+ {
+ if (layer == "all") return all.ToList();
+
+ var coreNames = new[] { "Small", "Medium", "Large", "Repeated", "Deep" };
+ // P2 will add: "Flat", "Polymorphic", "Collection", "Numeric", "NonAscii", etc.
+ var comprehensiveExtras = new string[] { /* P2 */ };
+ // P3 will add: "ColdStart", "VeryLarge", "PathologicalString", etc.
+ var edgeExtras = new string[] { /* P3 */ };
+
+ return layer switch
+ {
+ "core" => all.Where(t => StartsWithAny(t.Name, coreNames)).ToList(),
+ "comprehensive" => all.Where(t => StartsWithAny(t.Name, coreNames) || StartsWithAny(t.Name, comprehensiveExtras)).ToList(),
+ "edge" => all.Where(t => StartsWithAny(t.Name, coreNames) || StartsWithAny(t.Name, comprehensiveExtras) || StartsWithAny(t.Name, edgeExtras)).ToList(),
+ // Single-cell A/B mini-suite filters — match by case-insensitive prefix on Name.
+ // Use case: tight optimization-iteration loop on one specific cell (e.g. `dotnet run -- repeated`
+ // or interactive menu shortcut), avoiding the full ~110 sec suite when only one cell is in scope.
+ "small" => all.Where(t => t.Name.StartsWith("Small", StringComparison.OrdinalIgnoreCase)).ToList(),
+ "medium" => all.Where(t => t.Name.StartsWith("Medium", StringComparison.OrdinalIgnoreCase)).ToList(),
+ "large" => all.Where(t => t.Name.StartsWith("Large", StringComparison.OrdinalIgnoreCase)).ToList(),
+ "repeated" => all.Where(t => t.Name.StartsWith("Repeated", StringComparison.OrdinalIgnoreCase)).ToList(),
+ "deep" => all.Where(t => t.Name.StartsWith("Deep", StringComparison.OrdinalIgnoreCase)).ToList(),
+ _ => all.ToList()
+ };
+
+ static bool StartsWithAny(string name, string[] prefixes) => prefixes.Any(name.StartsWith);
+ }
+}
diff --git a/AyCode.Core.Serializers.Console/Output.cs b/AyCode.Core.Serializers.Console/Output.cs
index bfa9280..e6247d5 100644
--- a/AyCode.Core.Serializers.Console/Output.cs
+++ b/AyCode.Core.Serializers.Console/Output.cs
@@ -253,6 +253,7 @@ internal static class Output
var serPct = SerPerOp(memPackResult) > 0 ? (SerPerOp(acBinaryResult) / SerPerOp(memPackResult) - 1) * 100 : 0;
var desPct = DesPerOp(memPackResult) > 0 ? (DesPerOp(acBinaryResult) / DesPerOp(memPackResult) - 1) * 100 : 0;
var rtPct = RtPerOp(memPackResult) > 0 ? (RtPerOp(acBinaryResult) / RtPerOp(memPackResult) - 1) * 100 : 0;
+
var serAllocPct = memPackResult.SerializeAllocBytesPerOp > 0 ? (acBinaryResult.SerializeAllocBytesPerOp / (double)memPackResult.SerializeAllocBytesPerOp - 1) * 100 : 0;
var desAllocPct = memPackResult.DeserializeAllocBytesPerOp > 0 ? (acBinaryResult.DeserializeAllocBytesPerOp / (double)memPackResult.DeserializeAllocBytesPerOp - 1) * 100 : 0;
var rtAllocPct = memPackResult.RoundTripAllocBytesPerOp > 0 ? (acBinaryResult.RoundTripAllocBytesPerOp / (double)memPackResult.RoundTripAllocBytesPerOp - 1) * 100 : 0;
diff --git a/AyCode.Core.Serializers.Console/Program.cs b/AyCode.Core.Serializers.Console/Program.cs
index 80b31c0..8588af2 100644
--- a/AyCode.Core.Serializers.Console/Program.cs
+++ b/AyCode.Core.Serializers.Console/Program.cs
@@ -101,7 +101,7 @@ public static class Program
// Setup validation — abort BEFORE any benchmark logic if MemoryPack baseline is invalid.
// Done early so user is told immediately, not after warmup.
- ValidateMemoryPackSetup();
+ BenchmarkLoop.ValidateMemoryPackSetup();
// CLI mode (args provided): run once, parse args, exit. Backward-compatible behaviour.
if (args.Length > 0)
@@ -238,7 +238,7 @@ public static class Program
{
var allResults = new List();
var allTestDataSets = BenchmarkTestDataProvider.CreateTestDataSets();
- var testDataSets = FilterByLayer(allTestDataSets, layer);
+ var testDataSets = BenchmarkLoop.FilterByLayer(allTestDataSets, layer);
System.Console.WriteLine($"Layer: {layer} | OpMode: {opMode} | SerializerMode: {serializerMode} | Charset: {Configuration.GetCurrentCharsetName()} | Iterations: per-cell adaptive (~{Configuration.TargetSampleMs} ms target) | Warmup: {Configuration.WarmupIterations} per phase (Ser/Des isolated) | Samples: {Configuration.BenchmarkSamples} (median) + pilot discard");
System.Console.WriteLine($"Build: {Configuration.BuildConfiguration} | .NET: {Environment.Version} | Test Type: {testDataSets.FirstOrDefault()?.TypeName ?? "unknown"} | Test Cells: {testDataSets.Count}/{allTestDataSets.Count}");
@@ -315,23 +315,7 @@ public static class Program
#region Benchmark Execution
- ///
- /// Forces a full GC cycle at a phase boundary in the benchmark loop. Two-pass collect with finalizer drain
- /// in between: the first pass moves managed garbage to the finalization queue, WaitForPendingFinalizers
- /// runs the finalizers, the second pass reclaims any objects the finalizers released. After this returns the
- /// heap is in a known-quiescent state — the next warmup/measurement phase starts on a clean slate, isolated
- /// from the previous phase's residual allocations (write-buffer pools, intern cache, write-plan arrays, etc.).
- /// Called between every Ser-phase / Des-phase boundary in .
- ///
- [MethodImpl(MethodImplOptions.NoInlining)]
- private static void ForceGcCollect()
- {
- GC.Collect(2, GCCollectionMode.Forced, blocking: true);
- GC.WaitForPendingFinalizers();
- GC.Collect(2, GCCollectionMode.Forced, blocking: true);
- }
-
- private static List RunBenchmarksForTestData(TestDataSet testData, string mode, string serializerMode)
+private static List RunBenchmarksForTestData(TestDataSet testData, string mode, string serializerMode)
{
var results = new List();
var serializers = CreateSerializers(testData, serializerMode);
@@ -396,12 +380,12 @@ public static class Program
// entire round-trip path, then record into the RT result columns.
if (mode is "all" or "serialize" or "ser")
{
- ForceGcCollect();
+ BenchmarkLoop.ForceGcCollect();
serializer.WarmupSerialize(Configuration.WarmupIterations);
if (Configuration.JitSleep > 0) Thread.Sleep(Configuration.JitSleep);
- var rtIter = CalibrateIterations(() => serializer.Serialize(), Configuration.TargetSampleMs);
- var (rtMed, rtMin, rtMax, rtStd) = RunTimed(() => serializer.Serialize(), rtIter, $"{groupLabel} [RT timing]");
+ var rtIter = BenchmarkLoop.CalibrateIterations(() => serializer.Serialize(), Configuration.TargetSampleMs);
+ var (rtMed, rtMin, rtMax, rtStd) = BenchmarkLoop.RunTimed(() => serializer.Serialize(), rtIter, $"{groupLabel} [RT timing]");
result.RoundTripTimeMs = rtMed;
result.RoundTripTimeMinMs = rtMin;
result.RoundTripTimeMaxMs = rtMax;
@@ -409,7 +393,7 @@ public static class Program
result.RoundTripIterations = rtIter;
// Process-wide allocation measurement: server-drain-thread allocations (server-side new byte[len])
// also show up — otherwise current-thread alloc would only count the client side and look ~halved.
- result.RoundTripAllocBytesPerOp = MeasureAllocationTotal(() => serializer.Serialize(), rtIter, $"{groupLabel} [RT alloc]");
+ result.RoundTripAllocBytesPerOp = BenchmarkLoop.MeasureAllocationTotal(() => serializer.Serialize(), rtIter, $"{groupLabel} [RT alloc]");
}
// mode == "deserialize" alone is meaningless for a round-trip-only benchmark; skip silently.
}
@@ -418,19 +402,19 @@ public static class Program
// ── Ser phase ── isolated warmup → Configuration.JitSleep → calibrate → time → alloc; preceded by GC.Collect.
if (mode is "all" or "serialize" or "ser")
{
- ForceGcCollect();
+ BenchmarkLoop.ForceGcCollect();
serializer.WarmupSerialize(Configuration.WarmupIterations);
if (Configuration.JitSleep > 0) Thread.Sleep(Configuration.JitSleep);
- var serIter = CalibrateIterations(() => serializer.Serialize(), Configuration.TargetSampleMs);
- var (serMed, serMin, serMax, serStd) = RunTimed(() => serializer.Serialize(), serIter, $"{groupLabel} [Ser timing]");
+ var serIter = BenchmarkLoop.CalibrateIterations(() => serializer.Serialize(), Configuration.TargetSampleMs);
+ var (serMed, serMin, serMax, serStd) = BenchmarkLoop.RunTimed(() => serializer.Serialize(), serIter, $"{groupLabel} [Ser timing]");
result.SerializeTimeMs = serMed;
result.SerializeTimeMinMs = serMin;
result.SerializeTimeMaxMs = serMax;
result.SerializeTimeStdDevMs = serStd;
result.SerializeIterations = serIter;
// Dedicated alloc-only sample (separate from timing samples; keeps timing pure)
- result.SerializeAllocBytesPerOp = MeasureAllocation(() => serializer.Serialize(), serIter, $"{groupLabel} [Ser alloc]");
+ result.SerializeAllocBytesPerOp = BenchmarkLoop.MeasureAllocation(() => serializer.Serialize(), serIter, $"{groupLabel} [Ser alloc]");
}
// ── Des phase ── isolated warmup → Configuration.JitSleep → calibrate → time → alloc; preceded by GC.Collect.
@@ -438,18 +422,18 @@ public static class Program
// Des-phase's allocation measurement reflects ONLY Des-side allocations (deserialized object graph).
if (mode is "all" or "deserialize" or "des")
{
- ForceGcCollect();
+ BenchmarkLoop.ForceGcCollect();
serializer.WarmupDeserialize(Configuration.WarmupIterations);
if (Configuration.JitSleep > 0) Thread.Sleep(Configuration.JitSleep);
- var desIter = CalibrateIterations(() => serializer.Deserialize(), Configuration.TargetSampleMs);
- var (desMed, desMin, desMax, desStd) = RunTimed(() => serializer.Deserialize(), desIter, $"{groupLabel} [Des timing]");
+ var desIter = BenchmarkLoop.CalibrateIterations(() => serializer.Deserialize(), Configuration.TargetSampleMs);
+ var (desMed, desMin, desMax, desStd) = BenchmarkLoop.RunTimed(() => serializer.Deserialize(), desIter, $"{groupLabel} [Des timing]");
result.DeserializeTimeMs = desMed;
result.DeserializeTimeMinMs = desMin;
result.DeserializeTimeMaxMs = desMax;
result.DeserializeTimeStdDevMs = desStd;
result.DeserializeIterations = desIter;
- result.DeserializeAllocBytesPerOp = MeasureAllocation(() => serializer.Deserialize(), desIter, $"{groupLabel} [Des alloc]");
+ result.DeserializeAllocBytesPerOp = BenchmarkLoop.MeasureAllocation(() => serializer.Deserialize(), desIter, $"{groupLabel} [Des alloc]");
}
// Compose RT from Ser+Des. Because Ser and Des may have DIFFERENT iter counts post-calibration,
@@ -647,336 +631,6 @@ public static class Program
};
}
- ///
- /// Runs the action times for independent samples,
- /// returning the median, min, and max elapsed time. Multi-sample design reduces single-run variance
- /// from ~±15% to ~±5% by smoothing transient effects (background activity, thermal/turbo state).
- /// When <= 1, falls back to single-sample timing (Debug / quick mode).
- /// When is non-null, emits in-place \r progress updates so a
- /// stuck benchmark (e.g. deadlocked NamedPipe row) is visibly stuck at a specific %% rather than
- /// silently hanging.
- ///
- /// Stabilization (added 2026-05-07):
- /// 1) Pilot sample is run BEFORE the recorded loop and discarded. The first measurement after
- /// warmup tends to absorb residual JIT bookkeeping and GC bookkeeping; dropping it tightens
- /// the min/max range without throwing away signal (the median is the SAME data as before).
- /// 2) GC.Collect / WaitForPendingFinalizers / GC.Collect runs BEFORE every recorded sample.
- /// Without this, GC pressure from sample N occasionally triggered a Gen-2 pause inside
- /// sample N+1, painting it as an outlier; collecting up-front gives every sample the
- /// same starting heap shape.
- /// 3) Returns (median, min, max) so the caller can surface the inter-sample range — visible
- /// noise floor for the row, replacing the previous "median only" view.
- ///
- private static (double medianMs, double minMs, double maxMs, double stdDevMs) RunTimed(Action action, int iterations, string? progressLabel = null)
- {
- var samples = Configuration.BenchmarkSamples;
- if (samples <= 1)
- {
- // Single-sample fast path (Debug or trivial run) — no allocation, no sort, no stddev.
- var sw = Stopwatch.StartNew();
- RunWithProgress(action, iterations, progressLabel, samples: 1, sampleIndex: 0);
- sw.Stop();
- var ms = sw.Elapsed.TotalMilliseconds;
- EndProgress(progressLabel, ms);
- return (ms, ms, ms, 0);
- }
-
- // Pilot sample (discarded). Counts as sample index 0 of (samples + 1) for progress display
- // so the user sees an extra "warmup-ish" tick before the recorded samples start.
- GC.Collect();
- GC.WaitForPendingFinalizers();
- GC.Collect();
-
- var pilotSw = Stopwatch.StartNew();
- RunWithProgress(action, iterations, progressLabel, samples + 1, sampleIndex: 0);
- pilotSw.Stop();
- // intentionally not stored
-
- var times = new double[samples];
- for (var s = 0; s < samples; s++)
- {
- // Per-sample GC settle. Forces every sample to start from the same heap state, so
- // a Gen-2 pause caused by the previous sample doesn't bleed into the next sample's
- // timing. Cost is paid OUTSIDE the Stopwatch window — no impact on the measurement.
- GC.Collect();
- GC.WaitForPendingFinalizers();
- GC.Collect();
-
- var sw = Stopwatch.StartNew();
- RunWithProgress(action, iterations, progressLabel, samples + 1, sampleIndex: s + 1);
- sw.Stop();
- times[s] = sw.Elapsed.TotalMilliseconds;
- }
-
- // Capture min/max/sum/sumSq BEFORE sort to avoid order ambiguity (Array.Sort is in-place).
- var minMs = double.MaxValue;
- var maxMs = double.MinValue;
- var sum = 0.0;
- var sumSq = 0.0;
-
- for (var i = 0; i < times.Length; i++)
- {
- var t = times[i];
- sum += t;
- sumSq += t * t;
- if (t < minMs) minMs = t;
- if (t > maxMs) maxMs = t;
- }
- // Population stddev (not sample-stddev — we treat the captured samples as the population for
- // CV computation). variance = E[X²] - E[X]² with Math.Max(0, ...) guard against tiny negative
- // values from FP rounding when samples are nearly identical.
- var mean = sum / times.Length;
- var variance = (sumSq / times.Length) - (mean * mean);
- var stdDevMs = Math.Sqrt(Math.Max(0.0, variance));
-
- Array.Sort(times);
- // Median: middle value for odd sample counts, average of two middles for even counts.
- var medianMs = samples % 2 == 1 ? times[samples / 2] : (times[samples / 2 - 1] + times[samples / 2]) / 2.0;
- EndProgress(progressLabel, medianMs);
-
- return (medianMs, minMs, maxMs, stdDevMs);
- }
-
- ///
- /// Per-cell adaptive iteration calibration. Runs a 100-iter measurement after warmup and computes
- /// how many iterations are needed to reach wall-clock per sample.
- /// Returns iter rounded UP to the nearest 1000, floored at 1000 (the prior fixed minimum) and
- /// ceiling-capped at 200_000 (sanity bound for pathologically fast ops). In Debug single-sample mode
- /// (Configuration.BenchmarkSamples <= 1) returns the global unchanged —
- /// calibration overhead is unjustified there. Calibration runs OUTSIDE the timed sample loop and
- /// does NOT count toward warmup; its sole purpose is to measure per-op cost.
- ///
- private static int CalibrateIterations(Action action, int targetMs)
- {
- if (Configuration.BenchmarkSamples <= 1) return Configuration.TestIterations; // Debug fast path
-
- GC.Collect();
- GC.WaitForPendingFinalizers();
- GC.Collect();
-
- const int calibIter = 100;
- var sw = Stopwatch.StartNew();
- for (var i = 0; i < calibIter; i++) action();
- sw.Stop();
- var ms = sw.Elapsed.TotalMilliseconds;
-
- // Pathologically-fast op below Stopwatch resolution — cap at ceiling (further calibration won't help).
- if (ms <= 0.0001) return 200_000;
-
- var iterPerMs = calibIter / ms;
- var raw = (int)Math.Ceiling(targetMs * iterPerMs);
- // Round UP to nearest 1000 — keeps numbers human-readable in the markdown output.
- var rounded = ((raw + 999) / 1000) * 1000;
-
- return rounded switch
- {
- < 1000 => 1000,
- > 200_000 => 200_000,
- _ => rounded
- };
- }
-
- ///
- /// Measures per-call allocation in bytes after a clean GC. Single dedicated sample (no median) — keeps timing samples pure.
- ///
- private static long MeasureAllocation(Action action, int iterations, string? progressLabel = null)
- {
- GC.Collect();
- GC.WaitForPendingFinalizers();
- GC.Collect();
-
- var sw = Stopwatch.StartNew();
- var before = GC.GetAllocatedBytesForCurrentThread();
- RunWithProgress(action, iterations, progressLabel, samples: 1, sampleIndex: 0);
-
- var after = GC.GetAllocatedBytesForCurrentThread();
- sw.Stop();
- EndProgress(progressLabel, sw.Elapsed.TotalMilliseconds);
- return (after - before) / iterations;
- }
-
- ///
- /// Process-wide allocation measurement — needed for round-trip-only benchmarks (NamedPipe etc.) where
- /// the work happens across multiple threads. would
- /// only count the caller-thread allocations, missing the server-side new byte[len] buffers and
- /// any drain-pump-thread allocations. covers the entire process.
- /// Slightly noisier than the per-thread variant (background threads / GC bookkeeping leak in), but
- /// over 1000 iterations the signal dominates.
- ///
- private static long MeasureAllocationTotal(Action action, int iterations, string? progressLabel = null)
- {
- GC.Collect();
- GC.WaitForPendingFinalizers();
- GC.Collect();
-
- var sw = Stopwatch.StartNew();
- var before = GC.GetTotalAllocatedBytes(precise: true);
- RunWithProgress(action, iterations, progressLabel, samples: 1, sampleIndex: 0);
-
- var after = GC.GetTotalAllocatedBytes(precise: true);
- sw.Stop();
- EndProgress(progressLabel, sw.Elapsed.TotalMilliseconds);
- return (after - before) / iterations;
- }
-
- // ============================================================================================
- // Progress reporting — \r-driven in-place updates so a stuck benchmark surfaces the exact phase
- // and % where it stopped, instead of appearing as a silent hang. Used by RunTimed and the
- // MeasureAllocation* helpers when the caller passes a non-null progressLabel.
- // ============================================================================================
-
- // Tracks the longest line written by the current progress session, so EndProgress can clear
- // any leftover characters from a prior longer line (avoids "ghost" trailing chars after \r).
- private static int _progressLastLineLen;
-
- ///
- /// Runs times, emitting \r-overwriting
- /// progress every ~10% (approx. 10 progress prints per sample). When
- /// is null, runs without any progress output (zero overhead beyond a null check per iter).
- ///
- private static void RunWithProgress(Action action, int iterations, string? label, int samples, int sampleIndex)
- {
- if (label is null)
- {
- for (var i = 0; i < iterations; i++) action();
- return;
- }
-
- // ~10 progress emits per sample run. Avoid emitting on every iter (Console.Write is
- // expensive enough to skew sub-µs benchmarks if overdone).
- var step = Math.Max(1, iterations / 10);
- for (var i = 0; i < iterations; i++)
- {
- action();
- if ((i + 1) % step == 0 || i == iterations - 1)
- {
- var pct = (int)((i + 1) * 100L / iterations);
- var line = samples > 1
- ? $" > {label} sample {sampleIndex + 1}/{samples} {pct,3}% ({i + 1}/{iterations})"
- : $" > {label} {pct,3}% ({i + 1}/{iterations})";
-
- System.Console.Write('\r');
- System.Console.Write(line);
-
- if (line.Length < _progressLastLineLen)
- System.Console.Write(new string(' ', _progressLastLineLen - line.Length));
-
- _progressLastLineLen = line.Length;
- }
- }
- }
-
- ///
- /// Closes a progress line cleanly: clears any leftover chars and writes a final "done" line on
- /// the same row, terminated by \n so subsequent WriteLine calls render below.
- ///
- private static void EndProgress(string? label, double elapsedMs)
- {
- if (label is null) return;
- var done = $" > {label} done in {elapsedMs,7:F1} ms";
-
- System.Console.Write('\r');
- System.Console.Write(done);
-
- if (done.Length < _progressLastLineLen)
- System.Console.Write(new string(' ', _progressLastLineLen - done.Length));
-
- System.Console.WriteLine();
- _progressLastLineLen = 0;
- }
-
-#if !AYCODE_NATIVEAOT
- private static readonly JsonSerializerOptions VerifyJsonOpts = new()
- {
- WriteIndented = false,
-
- DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull,
- ReferenceHandler = System.Text.Json.Serialization.ReferenceHandler.IgnoreCycles
- };
-#endif
-
- ///
- /// Round-trip equality check: serialize both via System.Text.Json (canonical form) and compare strings.
- /// Slower than property-by-property compare, but universal — works for any object graph without custom comparer.
- ///
- ///
- /// AOT publish skip: System.Text.Json's reflection path uses runtime closed-generic instantiation
- /// (JsonPropertyInfo<TestStatus> et al.) that the trimmer drops, causing
- /// NotSupportedException: missing native code or metadata. The validation is JIT-only — the actual
- /// benchmark Serialize/Deserialize loops don't touch this path. Under AOT we return true so all
- /// VerifyRoundTrip() calls pass without running the cross-format validation.
- ///
- private static bool DeepEqualsViaJson(object? a, object? b)
- {
-#if AYCODE_NATIVEAOT
- // Skip cross-format validation under AOT — STJ reflection path is incompatible. The roundtrip
- // itself still runs (caller-side Serialize+Deserialize), just the JSON-canonical compare is bypassed.
- return true;
-#else
- if (a == null && b == null) return true;
- if (a == null || b == null) return false;
-
- var jsonA = JsonSerializer.Serialize(a, VerifyJsonOpts);
- var jsonB = JsonSerializer.Serialize(b, VerifyJsonOpts);
-
- return jsonA == jsonB;
-#endif
- }
-
- ///
- /// Validates MemoryPack setup at startup. Aborts the benchmark if TestOrder is not [MemoryPackable].
- /// Without this attribute, MemoryPack falls back to runtime resolver (slower) — comparison would be INVALID.
- ///
- private static void ValidateMemoryPackSetup()
- {
- var typesToCheck = new[] { typeof(TestOrder) };
-
- foreach (var type in typesToCheck)
- {
- var hasAttr = type.GetCustomAttributes(typeof(MemoryPackableAttribute), inherit: true).Any();
- if (!hasAttr)
- {
- System.Console.Error.WriteLine($"❌ FATAL: {type.FullName} is not [MemoryPackable] — MemoryPack would fall back to runtime resolver, comparison is INVALID for SGen-vs-SGen claim.");
- System.Console.Error.WriteLine("Add [MemoryPackable] to the type and any nested types referenced from it.");
-
- Environment.Exit(1);
- }
- }
- }
-
- ///
- /// Filters test data sets by layer keyword. Layered approach lets you run only what's needed for the iteration cadence.
- /// P1: only "Core" data exists (Small/Medium/Large/Repeated/Deep). Comprehensive and Edge layers will be expanded in P2.
- ///
- private static List FilterByLayer(List all, string layer)
- {
- if (layer == "all") return all.ToList();
-
- var coreNames = new[] { "Small", "Medium", "Large", "Repeated", "Deep" };
- // P2 will add: "Flat", "Polymorphic", "Collection", "Numeric", "NonAscii", etc.
- var comprehensiveExtras = new string[] { /* P2 */ };
- // P3 will add: "ColdStart", "VeryLarge", "PathologicalString", etc.
- var edgeExtras = new string[] { /* P3 */ };
-
- return layer switch
- {
- "core" => all.Where(t => StartsWithAny(t.Name, coreNames)).ToList(),
- "comprehensive" => all.Where(t => StartsWithAny(t.Name, coreNames) || StartsWithAny(t.Name, comprehensiveExtras)).ToList(),
- "edge" => all.Where(t => StartsWithAny(t.Name, coreNames) || StartsWithAny(t.Name, comprehensiveExtras) || StartsWithAny(t.Name, edgeExtras)).ToList(),
- // Single-cell A/B mini-suite filters — match by case-insensitive prefix on Name.
- // Use case: tight optimization-iteration loop on one specific cell (e.g. `dotnet run -- repeated`
- // or interactive menu shortcut), avoiding the full ~110 sec suite when only one cell is in scope.
- "small" => all.Where(t => t.Name.StartsWith("Small", StringComparison.OrdinalIgnoreCase)).ToList(),
- "medium" => all.Where(t => t.Name.StartsWith("Medium", StringComparison.OrdinalIgnoreCase)).ToList(),
- "large" => all.Where(t => t.Name.StartsWith("Large", StringComparison.OrdinalIgnoreCase)).ToList(),
- "repeated" => all.Where(t => t.Name.StartsWith("Repeated", StringComparison.OrdinalIgnoreCase)).ToList(),
- "deep" => all.Where(t => t.Name.StartsWith("Deep", StringComparison.OrdinalIgnoreCase)).ToList(),
- _ => all.ToList()
- };
-
- static bool StartsWithAny(string name, string[] prefixes) => prefixes.Any(name.StartsWith);
- }
-
#endregion
#region Serializer Implementations
@@ -1071,7 +725,7 @@ public static class Program
{
var bytes = AcBinarySerializer.Serialize(_order, _options);
var roundTripped = AcBinaryDeserializer.Deserialize(bytes, _options);
- return DeepEqualsViaJson(_order, roundTripped);
+ return BenchmarkLoop.DeepEqualsViaJson(_order, roundTripped);
}
}
@@ -1108,7 +762,7 @@ public static class Program
{
var bytes = MemoryPackSerializer.Serialize(_order, _options);
var roundTripped = MemoryPackSerializer.Deserialize(bytes, _options);
- return DeepEqualsViaJson(_order, roundTripped);
+ return BenchmarkLoop.DeepEqualsViaJson(_order, roundTripped);
}
}
@@ -1157,7 +811,7 @@ public static class Program
{
var bytes = MessagePackSerializer.Serialize(_order, _options);
var roundTripped = MessagePackSerializer.Deserialize(bytes, _options);
- return DeepEqualsViaJson(_order, roundTripped);
+ return BenchmarkLoop.DeepEqualsViaJson(_order, roundTripped);
}
}
#endif
@@ -1219,7 +873,7 @@ public static class Program
var abw = new ArrayBufferWriter();
AcBinarySerializer.Serialize(_order, abw, _options);
var roundTripped = AcBinaryDeserializer.Deserialize(new ReadOnlySequence(abw.WrittenMemory), _options);
- return DeepEqualsViaJson(_order, roundTripped);
+ return BenchmarkLoop.DeepEqualsViaJson(_order, roundTripped);
}
}
@@ -1420,7 +1074,7 @@ public static class Program
{
Serialize();
var result = _lastResult as TestOrder;
- return result != null && DeepEqualsViaJson(_order, result);
+ return result != null && BenchmarkLoop.DeepEqualsViaJson(_order, result);
}
finally
{
@@ -1605,7 +1259,7 @@ public static class Program
{
Serialize();
var result = _lastResult as TestOrder;
- return result != null && DeepEqualsViaJson(_order, result);
+ return result != null && BenchmarkLoop.DeepEqualsViaJson(_order, result);
}
finally
{
@@ -1653,7 +1307,7 @@ public static class Program
///
/// Per-iter byte[] allocation from AcBinarySerializer.Serialize is part of the cost (matches
/// 's API contract); the receive-side scratch buffer is also allocated per-iter
- /// on the consumer-task (counted via GC.GetTotalAllocatedBytes in MeasureAllocationTotal).
+ /// on the consumer-task (counted via GC.GetTotalAllocatedBytes in BenchmarkLoop.MeasureAllocationTotal).
///
private sealed class AcBinaryNamedPipeRawByteArrayBenchmark : ISerializerBenchmark, IDisposable
{
@@ -1747,7 +1401,7 @@ public static class Program
try
{
var size = _pendingReadSize;
- var bytes = new byte[size]; // per-iter alloc — counted by MeasureAllocationTotal
+ var bytes = new byte[size]; // per-iter alloc — counted by BenchmarkLoop.MeasureAllocationTotal
var totalRead = 0;
while (totalRead < size)
{
@@ -1814,7 +1468,7 @@ public static class Program
{
Serialize();
var result = _lastResult as TestOrder;
- return result != null && DeepEqualsViaJson(_order, result);
+ return result != null && BenchmarkLoop.DeepEqualsViaJson(_order, result);
}
finally
{
@@ -1981,7 +1635,7 @@ public static class Program
{
Serialize();
var result = _lastResult as TestOrder;
- return result != null && DeepEqualsViaJson(_order, result);
+ return result != null && BenchmarkLoop.DeepEqualsViaJson(_order, result);
}
finally
{
@@ -2049,7 +1703,7 @@ public static class Program
var abw = new ArrayBufferWriter();
MemoryPackSerializer.Serialize(abw, _order, _options);
var roundTripped = MemoryPackSerializer.Deserialize(new ReadOnlySequence(abw.WrittenMemory), _options);
- return DeepEqualsViaJson(_order, roundTripped);
+ return BenchmarkLoop.DeepEqualsViaJson(_order, roundTripped);
}
}
@@ -2107,7 +1761,7 @@ public static class Program
AcBinarySerializer.Serialize(_order, _bufferWriter, _options);
var roundTripped = AcBinaryDeserializer.Deserialize(new ReadOnlySequence(_bufferWriter.WrittenMemory), _options);
- return DeepEqualsViaJson(_order, roundTripped);
+ return BenchmarkLoop.DeepEqualsViaJson(_order, roundTripped);
}
}
@@ -2163,7 +1817,7 @@ public static class Program
_bufferWriter.ResetWrittenCount();
MemoryPackSerializer.Serialize(_bufferWriter, _order, _options);
var roundTripped = MemoryPackSerializer.Deserialize(new ReadOnlySequence(_bufferWriter.WrittenMemory), _options);
- return DeepEqualsViaJson(_order, roundTripped);
+ return BenchmarkLoop.DeepEqualsViaJson(_order, roundTripped);
}
}
@@ -2206,7 +1860,7 @@ public static class Program
{
var json = JsonSerializer.Serialize(_order, _options);
var roundTripped = JsonSerializer.Deserialize(json, _options);
- return DeepEqualsViaJson(_order, roundTripped);
+ return BenchmarkLoop.DeepEqualsViaJson(_order, roundTripped);
}
}