367 lines
18 KiB
C#
367 lines
18 KiB
C#
using AyCode.Core.Tests.TestModels;
|
|
using MemoryPack;
|
|
using System.Diagnostics;
|
|
using System.Runtime.CompilerServices;
|
|
using System.Text.Json;
|
|
|
|
namespace AyCode.Core.Serializers.Console;
|
|
|
|
/// <summary>
|
|
/// Benchmark execution helpers: timing (<see cref="RunTimed"/>), per-cell adaptive iteration
|
|
/// calibration (<see cref="CalibrateIterations"/>), allocation measurement
|
|
/// (<see cref="MeasureAllocation"/> + <see cref="MeasureAllocationTotal"/>), in-place
|
|
/// <c>\r</c>-progress reporting, full-GC phase-boundary helper (<see cref="ForceGcCollect"/>),
|
|
/// startup validation (<see cref="ValidateMemoryPackSetup"/>), and per-cell round-trip equality
|
|
/// (<see cref="DeepEqualsViaJson"/>). Pure benchmark-execution infrastructure — no display
|
|
/// formatting (that lives in <c>Output</c>) and no per-engine glue (which lives with the
|
|
/// individual <c>ISerializerBenchmark</c> implementations).
|
|
/// </summary>
|
|
internal static class BenchmarkLoop
|
|
{
|
|
/// <summary>
|
|
/// Forces a full GC cycle at a phase boundary in the benchmark loop. Two-pass collect with finalizer drain
|
|
/// in between: the first pass moves managed garbage to the finalization queue, <c>WaitForPendingFinalizers</c>
|
|
/// runs the finalizers, the second pass reclaims any objects the finalizers released. After this returns the
|
|
/// heap is in a known-quiescent state — the next warmup/measurement phase starts on a clean slate, isolated
|
|
/// from the previous phase's residual allocations (write-buffer pools, intern cache, write-plan arrays, etc.).
|
|
/// Called between every Ser-phase / Des-phase boundary in <c>RunBenchmarksForTestData</c>.
|
|
/// </summary>
|
|
[MethodImpl(MethodImplOptions.NoInlining)]
|
|
internal static void ForceGcCollect()
|
|
{
|
|
GC.Collect(2, GCCollectionMode.Forced, blocking: true);
|
|
GC.WaitForPendingFinalizers();
|
|
GC.Collect(2, GCCollectionMode.Forced, blocking: true);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Runs the action <paramref name="iterations"/> times for <see cref="Configuration.BenchmarkSamples"/> independent samples,
|
|
/// returning the median, min, and max elapsed time. Multi-sample design reduces single-run variance
|
|
/// from ~±15% to ~±5% by smoothing transient effects (background activity, thermal/turbo state).
|
|
/// When <see cref="Configuration.BenchmarkSamples"/> <= 1, falls back to single-sample timing (Debug / quick mode).
|
|
/// When <paramref name="progressLabel"/> is non-null, emits in-place <c>\r</c> progress updates so a
|
|
/// stuck benchmark (e.g. deadlocked NamedPipe row) is visibly stuck at a specific %% rather than
|
|
/// silently hanging.
|
|
///
|
|
/// Stabilization (added 2026-05-07):
|
|
/// 1) Pilot sample is run BEFORE the recorded loop and discarded. The first measurement after
|
|
/// warmup tends to absorb residual JIT bookkeeping and GC bookkeeping; dropping it tightens
|
|
/// the min/max range without throwing away signal (the median is the SAME data as before).
|
|
/// 2) GC.Collect / WaitForPendingFinalizers / GC.Collect runs BEFORE every recorded sample.
|
|
/// Without this, GC pressure from sample N occasionally triggered a Gen-2 pause inside
|
|
/// sample N+1, painting it as an outlier; collecting up-front gives every sample the
|
|
/// same starting heap shape.
|
|
/// 3) Returns (median, min, max) so the caller can surface the inter-sample range — visible
|
|
/// noise floor for the row, replacing the previous "median only" view.
|
|
/// </summary>
|
|
internal static (double medianMs, double minMs, double maxMs, double stdDevMs) RunTimed(Action action, int iterations, string? progressLabel = null)
|
|
{
|
|
var samples = Configuration.BenchmarkSamples;
|
|
if (samples <= 1)
|
|
{
|
|
// Single-sample fast path (Debug or trivial run) — no allocation, no sort, no stddev.
|
|
var sw = Stopwatch.StartNew();
|
|
RunWithProgress(action, iterations, progressLabel, samples: 1, sampleIndex: 0);
|
|
sw.Stop();
|
|
var ms = sw.Elapsed.TotalMilliseconds;
|
|
EndProgress(progressLabel, ms);
|
|
return (ms, ms, ms, 0);
|
|
}
|
|
|
|
// Pilot sample (discarded). Counts as sample index 0 of (samples + 1) for progress display
|
|
// so the user sees an extra "warmup-ish" tick before the recorded samples start.
|
|
GC.Collect();
|
|
GC.WaitForPendingFinalizers();
|
|
GC.Collect();
|
|
|
|
var pilotSw = Stopwatch.StartNew();
|
|
RunWithProgress(action, iterations, progressLabel, samples + 1, sampleIndex: 0);
|
|
pilotSw.Stop();
|
|
// intentionally not stored
|
|
|
|
var times = new double[samples];
|
|
for (var s = 0; s < samples; s++)
|
|
{
|
|
// Per-sample GC settle. Forces every sample to start from the same heap state, so
|
|
// a Gen-2 pause caused by the previous sample doesn't bleed into the next sample's
|
|
// timing. Cost is paid OUTSIDE the Stopwatch window — no impact on the measurement.
|
|
GC.Collect();
|
|
GC.WaitForPendingFinalizers();
|
|
GC.Collect();
|
|
|
|
var sw = Stopwatch.StartNew();
|
|
RunWithProgress(action, iterations, progressLabel, samples + 1, sampleIndex: s + 1);
|
|
sw.Stop();
|
|
times[s] = sw.Elapsed.TotalMilliseconds;
|
|
}
|
|
|
|
// Capture min/max/sum/sumSq BEFORE sort to avoid order ambiguity (Array.Sort is in-place).
|
|
var minMs = double.MaxValue;
|
|
var maxMs = double.MinValue;
|
|
var sum = 0.0;
|
|
var sumSq = 0.0;
|
|
|
|
for (var i = 0; i < times.Length; i++)
|
|
{
|
|
var t = times[i];
|
|
sum += t;
|
|
sumSq += t * t;
|
|
if (t < minMs) minMs = t;
|
|
if (t > maxMs) maxMs = t;
|
|
}
|
|
// Population stddev (not sample-stddev — we treat the captured samples as the population for
|
|
// CV computation). variance = E[X²] - E[X]² with Math.Max(0, ...) guard against tiny negative
|
|
// values from FP rounding when samples are nearly identical.
|
|
var mean = sum / times.Length;
|
|
var variance = (sumSq / times.Length) - (mean * mean);
|
|
var stdDevMs = Math.Sqrt(Math.Max(0.0, variance));
|
|
|
|
Array.Sort(times);
|
|
// Median: middle value for odd sample counts, average of two middles for even counts.
|
|
var medianMs = samples % 2 == 1 ? times[samples / 2] : (times[samples / 2 - 1] + times[samples / 2]) / 2.0;
|
|
EndProgress(progressLabel, medianMs);
|
|
|
|
return (medianMs, minMs, maxMs, stdDevMs);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Per-cell adaptive iteration calibration. Runs a 100-iter measurement after warmup and computes
|
|
/// how many iterations are needed to reach <see cref="Configuration.TargetSampleMs"/> wall-clock per sample.
|
|
/// Returns iter rounded UP to the nearest 1000, floored at 1000 (the prior fixed minimum) and
|
|
/// ceiling-capped at 200_000 (sanity bound for pathologically fast ops). In Debug single-sample mode
|
|
/// (<c>Configuration.BenchmarkSamples <= 1</c>) returns the global <see cref="Configuration.TestIterations"/> unchanged —
|
|
/// calibration overhead is unjustified there. Calibration runs OUTSIDE the timed sample loop and
|
|
/// does NOT count toward warmup; its sole purpose is to measure per-op cost.
|
|
/// </summary>
|
|
internal static int CalibrateIterations(Action action, int targetMs)
|
|
{
|
|
if (Configuration.BenchmarkSamples <= 1) return Configuration.TestIterations; // Debug fast path
|
|
|
|
GC.Collect();
|
|
GC.WaitForPendingFinalizers();
|
|
GC.Collect();
|
|
|
|
const int calibIter = 100;
|
|
var sw = Stopwatch.StartNew();
|
|
for (var i = 0; i < calibIter; i++) action();
|
|
sw.Stop();
|
|
var ms = sw.Elapsed.TotalMilliseconds;
|
|
|
|
// Pathologically-fast op below Stopwatch resolution — cap at ceiling (further calibration won't help).
|
|
if (ms <= 0.0001) return 200_000;
|
|
|
|
var iterPerMs = calibIter / ms;
|
|
var raw = (int)Math.Ceiling(targetMs * iterPerMs);
|
|
// Round UP to nearest 1000 — keeps numbers human-readable in the markdown output.
|
|
var rounded = ((raw + 999) / 1000) * 1000;
|
|
|
|
return rounded switch
|
|
{
|
|
< 1000 => 1000,
|
|
> 200_000 => 200_000,
|
|
_ => rounded
|
|
};
|
|
}
|
|
|
|
/// <summary>
|
|
/// Measures per-call allocation in bytes after a clean GC. Single dedicated sample (no median) — keeps timing samples pure.
|
|
/// </summary>
|
|
internal static long MeasureAllocation(Action action, int iterations, string? progressLabel = null)
|
|
{
|
|
GC.Collect();
|
|
GC.WaitForPendingFinalizers();
|
|
GC.Collect();
|
|
|
|
var sw = Stopwatch.StartNew();
|
|
var before = GC.GetAllocatedBytesForCurrentThread();
|
|
RunWithProgress(action, iterations, progressLabel, samples: 1, sampleIndex: 0);
|
|
|
|
var after = GC.GetAllocatedBytesForCurrentThread();
|
|
sw.Stop();
|
|
EndProgress(progressLabel, sw.Elapsed.TotalMilliseconds);
|
|
return (after - before) / iterations;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Process-wide allocation measurement — needed for round-trip-only benchmarks (NamedPipe etc.) where
|
|
/// the work happens across multiple threads. <see cref="GC.GetAllocatedBytesForCurrentThread"/> would
|
|
/// only count the caller-thread allocations, missing the server-side <c>new byte[len]</c> buffers and
|
|
/// any drain-pump-thread allocations. <see cref="GC.GetTotalAllocatedBytes"/> covers the entire process.
|
|
/// Slightly noisier than the per-thread variant (background threads / GC bookkeeping leak in), but
|
|
/// over 1000 iterations the signal dominates.
|
|
/// </summary>
|
|
internal static long MeasureAllocationTotal(Action action, int iterations, string? progressLabel = null)
|
|
{
|
|
GC.Collect();
|
|
GC.WaitForPendingFinalizers();
|
|
GC.Collect();
|
|
|
|
var sw = Stopwatch.StartNew();
|
|
var before = GC.GetTotalAllocatedBytes(precise: true);
|
|
RunWithProgress(action, iterations, progressLabel, samples: 1, sampleIndex: 0);
|
|
|
|
var after = GC.GetTotalAllocatedBytes(precise: true);
|
|
sw.Stop();
|
|
EndProgress(progressLabel, sw.Elapsed.TotalMilliseconds);
|
|
return (after - before) / iterations;
|
|
}
|
|
|
|
// ============================================================================================
|
|
// Progress reporting — \r-driven in-place updates so a stuck benchmark surfaces the exact phase
|
|
// and % where it stopped, instead of appearing as a silent hang. Used by RunTimed and the
|
|
// MeasureAllocation* helpers when the caller passes a non-null progressLabel.
|
|
// ============================================================================================
|
|
|
|
// Tracks the longest line written by the current progress session, so EndProgress can clear
|
|
// any leftover characters from a prior longer line (avoids "ghost" trailing chars after \r).
|
|
private static int _progressLastLineLen;
|
|
|
|
/// <summary>
|
|
/// Runs <paramref name="action"/> <paramref name="iterations"/> times, emitting \r-overwriting
|
|
/// progress every ~10% (approx. 10 progress prints per sample). When <paramref name="label"/>
|
|
/// is null, runs without any progress output (zero overhead beyond a null check per iter).
|
|
/// </summary>
|
|
private static void RunWithProgress(Action action, int iterations, string? label, int samples, int sampleIndex)
|
|
{
|
|
if (label is null)
|
|
{
|
|
for (var i = 0; i < iterations; i++) action();
|
|
return;
|
|
}
|
|
|
|
// ~10 progress emits per sample run. Avoid emitting on every iter (Console.Write is
|
|
// expensive enough to skew sub-µs benchmarks if overdone).
|
|
var step = Math.Max(1, iterations / 10);
|
|
for (var i = 0; i < iterations; i++)
|
|
{
|
|
action();
|
|
if ((i + 1) % step == 0 || i == iterations - 1)
|
|
{
|
|
var pct = (int)((i + 1) * 100L / iterations);
|
|
var line = samples > 1
|
|
? $" > {label} sample {sampleIndex + 1}/{samples} {pct,3}% ({i + 1}/{iterations})"
|
|
: $" > {label} {pct,3}% ({i + 1}/{iterations})";
|
|
|
|
System.Console.Write('\r');
|
|
System.Console.Write(line);
|
|
|
|
if (line.Length < _progressLastLineLen)
|
|
System.Console.Write(new string(' ', _progressLastLineLen - line.Length));
|
|
|
|
_progressLastLineLen = line.Length;
|
|
}
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Closes a progress line cleanly: clears any leftover chars and writes a final "done" line on
|
|
/// the same row, terminated by \n so subsequent <c>WriteLine</c> calls render below.
|
|
/// </summary>
|
|
private static void EndProgress(string? label, double elapsedMs)
|
|
{
|
|
if (label is null) return;
|
|
var done = $" > {label} done in {elapsedMs,7:F1} ms";
|
|
|
|
System.Console.Write('\r');
|
|
System.Console.Write(done);
|
|
|
|
if (done.Length < _progressLastLineLen)
|
|
System.Console.Write(new string(' ', _progressLastLineLen - done.Length));
|
|
|
|
System.Console.WriteLine();
|
|
_progressLastLineLen = 0;
|
|
}
|
|
|
|
#if !AYCODE_NATIVEAOT
|
|
private static readonly JsonSerializerOptions VerifyJsonOpts = new()
|
|
{
|
|
WriteIndented = false,
|
|
|
|
DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull,
|
|
ReferenceHandler = System.Text.Json.Serialization.ReferenceHandler.IgnoreCycles
|
|
};
|
|
#endif
|
|
|
|
/// <summary>
|
|
/// Round-trip equality check: serialize both via System.Text.Json (canonical form) and compare strings.
|
|
/// Slower than property-by-property compare, but universal — works for any object graph without custom comparer.
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// AOT publish skip: <c>System.Text.Json</c>'s reflection path uses runtime closed-generic instantiation
|
|
/// (<c>JsonPropertyInfo<TestStatus></c> et al.) that the trimmer drops, causing
|
|
/// <c>NotSupportedException: missing native code or metadata</c>. The validation is JIT-only — the actual
|
|
/// benchmark Serialize/Deserialize loops don't touch this path. Under AOT we return <c>true</c> so all
|
|
/// <c>VerifyRoundTrip()</c> calls pass without running the cross-format validation.
|
|
/// </remarks>
|
|
internal static bool DeepEqualsViaJson(object? a, object? b)
|
|
{
|
|
#if AYCODE_NATIVEAOT
|
|
// Skip cross-format validation under AOT — STJ reflection path is incompatible. The roundtrip
|
|
// itself still runs (caller-side Serialize+Deserialize), just the JSON-canonical compare is bypassed.
|
|
return true;
|
|
#else
|
|
if (a == null && b == null) return true;
|
|
if (a == null || b == null) return false;
|
|
|
|
var jsonA = JsonSerializer.Serialize(a, VerifyJsonOpts);
|
|
var jsonB = JsonSerializer.Serialize(b, VerifyJsonOpts);
|
|
|
|
return jsonA == jsonB;
|
|
#endif
|
|
}
|
|
|
|
/// <summary>
|
|
/// Validates MemoryPack setup at startup. Aborts the benchmark if TestOrder is not [MemoryPackable].
|
|
/// Without this attribute, MemoryPack falls back to runtime resolver (slower) — comparison would be INVALID.
|
|
/// </summary>
|
|
internal static void ValidateMemoryPackSetup()
|
|
{
|
|
var typesToCheck = new[] { typeof(TestOrder) };
|
|
|
|
foreach (var type in typesToCheck)
|
|
{
|
|
var hasAttr = type.GetCustomAttributes(typeof(MemoryPackableAttribute), inherit: true).Any();
|
|
if (!hasAttr)
|
|
{
|
|
System.Console.Error.WriteLine($"❌ FATAL: {type.FullName} is not [MemoryPackable] — MemoryPack would fall back to runtime resolver, comparison is INVALID for SGen-vs-SGen claim.");
|
|
System.Console.Error.WriteLine("Add [MemoryPackable] to the type and any nested types referenced from it.");
|
|
|
|
Environment.Exit(1);
|
|
}
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Filters test data sets by layer keyword. Layered approach lets you run only what's needed for the iteration cadence.
|
|
/// P1: only "Core" data exists (Small/Medium/Large/Repeated/Deep). Comprehensive and Edge layers will be expanded in P2.
|
|
/// </summary>
|
|
internal static List<TestDataSet> FilterByLayer(List<TestDataSet> all, string layer)
|
|
{
|
|
if (layer == "all") return all.ToList();
|
|
|
|
var coreNames = new[] { "Small", "Medium", "Large", "Repeated", "Deep" };
|
|
// P2 will add: "Flat", "Polymorphic", "Collection", "Numeric", "NonAscii", etc.
|
|
var comprehensiveExtras = new string[] { /* P2 */ };
|
|
// P3 will add: "ColdStart", "VeryLarge", "PathologicalString", etc.
|
|
var edgeExtras = new string[] { /* P3 */ };
|
|
|
|
return layer switch
|
|
{
|
|
"core" => all.Where(t => StartsWithAny(t.Name, coreNames)).ToList(),
|
|
"comprehensive" => all.Where(t => StartsWithAny(t.Name, coreNames) || StartsWithAny(t.Name, comprehensiveExtras)).ToList(),
|
|
"edge" => all.Where(t => StartsWithAny(t.Name, coreNames) || StartsWithAny(t.Name, comprehensiveExtras) || StartsWithAny(t.Name, edgeExtras)).ToList(),
|
|
// Single-cell A/B mini-suite filters — match by case-insensitive prefix on Name.
|
|
// Use case: tight optimization-iteration loop on one specific cell (e.g. `dotnet run -- repeated`
|
|
// or interactive menu shortcut), avoiding the full ~110 sec suite when only one cell is in scope.
|
|
"small" => all.Where(t => t.Name.StartsWith("Small", StringComparison.OrdinalIgnoreCase)).ToList(),
|
|
"medium" => all.Where(t => t.Name.StartsWith("Medium", StringComparison.OrdinalIgnoreCase)).ToList(),
|
|
"large" => all.Where(t => t.Name.StartsWith("Large", StringComparison.OrdinalIgnoreCase)).ToList(),
|
|
"repeated" => all.Where(t => t.Name.StartsWith("Repeated", StringComparison.OrdinalIgnoreCase)).ToList(),
|
|
"deep" => all.Where(t => t.Name.StartsWith("Deep", StringComparison.OrdinalIgnoreCase)).ToList(),
|
|
_ => all.ToList()
|
|
};
|
|
|
|
static bool StartsWithAny(string name, string[] prefixes) => prefixes.Any(name.StartsWith);
|
|
}
|
|
}
|