AyCode.Core/AyCode.Core.Serializers.Con.../Program.cs

3323 lines
188 KiB
C#
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

using AyCode.Core.Compression;
using AyCode.Core.Serializers.Attributes;
using AyCode.Core.Serializers.Binaries;
using AyCode.Core.Tests.Serialization; // DrainFromAsync extension (test-only, used by benchmark)
using AyCode.Core.Tests.TestModels;
using MemoryPack;
#if !AYCODE_NATIVEAOT
using MessagePack;
using MessagePack.Resolvers;
#endif
using Microsoft.Extensions.Options;
using System.Buffers;
using System.Diagnostics;
using System.IO.Pipelines;
using System.IO.Pipes;
using System.Reflection;
using System.Runtime.CompilerServices;
using System.Text;
using System.Text.Json;
namespace AyCode.Core.Serializers.Console;
/// <summary>
/// Comprehensive benchmark application for all serializers.
/// Compares: AcBinary (all options), MemoryPack, MessagePack, Newtonsoft.Json, System.Text.Json
///
/// Usage:
/// dotnet run # Run all benchmarks
/// dotnet run -- quick # Quick mode (fewer iterations)
/// dotnet run -- serialize # Serialize only
/// dotnet run -- deserialize # Deserialize only
/// </summary>
public static class Program
{
// Configuration (constants, mutable state, attribute-flag aggregation) → Configuration.cs
/// <summary>
/// Common Options-column formatter for every AcBinary serializer benchmark row. Renders the
/// configured options-level value AND the effective attribute-level enable flag side-by-side
/// (e.g. <c>Interning=All(opt) | False (attr)</c>) so attribute-suppressed features cannot
/// silently mislead. Pass any benchmark-specific extras (e.g. <c>", BufferSize=4096B"</c>)
/// in <paramref name="extra"/> — they are appended after the common fields.
/// </summary>
private static string BuildAcBinaryOptionsDescription(AcBinarySerializerOptions options, string extra = "")
{
// PropertyFilter: opt-side is "Set"/"None" depending on whether a callback is registered (the callback
// itself isn't a meaningful display value); attr-side is the cross-type-aggregated bool (true = every
// tagged type has the feature enabled, false = at least one type opted out via
// [AcBinarySerializable(enablePropertyFilterFeature: false)] → SGen-emit + Runtime hot-loop both gate).
var propFilterOpt = options.PropertyFilter == null ? "None" : "Set";
return $"WireMode={options.WireMode}, " +
$"RefHandling={options.ReferenceHandling}(opt) | {Configuration.AttrFlags.refHandling} (attr), " +
$"Interning={options.UseStringInterning}(opt) | {Configuration.AttrFlags.internString} (attr), " +
$"Metadata={options.UseMetadata}(opt) | {Configuration.AttrFlags.metadata} (attr), " +
$"PropertyFilter={propFilterOpt}(opt) | {Configuration.AttrFlags.propertyFilter} (attr), " +
$"SGen={options.UseGeneratedCode}, " +
$"Compression={options.UseCompression}{extra}";
}
/// <summary>
/// Returns MemoryPack serializer options aligned with <see cref="Configuration.SelectedWireMode"/> for a fair
/// apples-to-apples wire-format comparison:
/// <list type="bullet">
/// <item><see cref="WireMode.Compact"/> → <see cref="MemoryPackSerializerOptions.Default"/> (UTF-8) — both
/// engines encode UTF-8, comparison is purely about header / tier / dispatch overhead.</item>
/// <item><see cref="WireMode.Fast"/> → <see cref="MemoryPackSerializerOptions.Utf16"/> (UTF-16 raw memcpy) —
/// both engines write UTF-16 raw bytes, so wire-size and CPU comparison reflect the same string-encoding family.</item>
/// </list>
/// Without this alignment the FastWire vs MemPack-default comparison conflates two unrelated dimensions
/// (UTF-16 raw vs UTF-8 encoded) and produces a misleading +40% wire-size delta that is structurally
/// the encoding-family difference, NOT an AcBinary-specific overhead.
/// </summary>
private static MemoryPackSerializerOptions GetMemPackOptions() =>
Configuration.SelectedWireMode == WireMode.Fast
? MemoryPackSerializerOptions.Utf16
: MemoryPackSerializerOptions.Default;
/// <summary>
/// Converts a total-time (in ms across <see cref="Configuration.TestIterations"/>) into per-operation microseconds.
/// Formula: <c>totalMs / iterations × 1000</c>. The benchmark stores <c>*TimeMs</c> as the cumulative
/// median over the timing run; the display layer renders per-op µs to make numbers iteration-count
/// independent (e.g. switching <c>Configuration.TestIterations</c> 1000 → 100 leaves the displayed µs/op unchanged
/// — only its sample noise grows). Symmetric with the already-per-op <c>*AllocBytesPerOp</c> fields.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
/// <summary>
/// Converts a total-time (in ms across <paramref name="iterations"/>) into per-operation microseconds.
/// Per-op µs is the iter-independent unit: 1000 iter and 50000 iter of the same operation should
/// produce the same per-op µs (within noise). Necessary because per-cell adaptive iteration makes
/// <c>iterations</c> a per-row property — there is no longer a single global Configuration.TestIterations to divide by.
/// </summary>
private static double ToPerOpMicros(double totalMs, int iterations) => iterations > 0 ? totalMs / iterations * 1000.0 : 0;
// Per-row per-op µs accessors — pull batch-time + iter from BenchmarkResult and convert. Used wherever
// averaging or comparison happens across rows with potentially different iter counts (Winners summary,
// Overall comparison, per-cell summary row). Keeping these as methods rather than properties on
// BenchmarkResult preserves the result-as-data-bag distinction.
private static double SerPerOp(BenchmarkResult r) => ToPerOpMicros(r.SerializeTimeMs, r.SerializeIterations);
private static double DesPerOp(BenchmarkResult r) => ToPerOpMicros(r.DeserializeTimeMs, r.DeserializeIterations);
private static double RtPerOp(BenchmarkResult r) => ToPerOpMicros(r.RoundTripTimeMs, r.RoundTripIterations);
/// <summary>
/// Per-cell-paired aggregation of an overall comparison. Captures three different aggregation
/// strategies so the reader can judge whether the headline delta is dominated by one large cell
/// (arithmetic mean) or representative of typical workload (geometric mean / median).
/// </summary>
/// <param name="ArithMeanPct">Arithmetic mean of µs/op — magnitude-weighted; biased toward Large cell.</param>
/// <param name="GeoMeanPct">Geometric mean of per-cell ratios — magnitude-neutral; each cell weighted equally.</param>
/// <param name="MedianPct">Median of per-cell ratios — outlier-resistant.</param>
/// <param name="AcAvg">Arithmetic mean AcBinary value (µs/op or bytes).</param>
/// <param name="MpAvg">Arithmetic mean MemPack value.</param>
/// <param name="CellCount">Number of paired cells contributing to the geo/median.</param>
private record OverallStats(double ArithMeanPct, double GeoMeanPct, double MedianPct, double AcAvg, double MpAvg, int CellCount);
/// <summary>
/// Computes arithmetic + geometric + median aggregation of an AcBinary-vs-MemPack comparison
/// across paired cells (joined by <c>TestDataName</c>). Per-cell pairing is required for the
/// geo/median variants — a cell where AcBinary or MemPack is missing is dropped from all stats.
/// Returns null when no paired cell has a valid value.
/// </summary>
private static OverallStats? ComputeOverallStats(List<BenchmarkResult> acResults, List<BenchmarkResult> mpResults, Func<BenchmarkResult, double> getValue)
{
if (acResults.Count == 0 || mpResults.Count == 0) return null;
var pairs = (from ac in acResults
join mp in mpResults on ac.TestDataName equals mp.TestDataName
let acV = getValue(ac)
let mpV = getValue(mp)
where acV > 0 && mpV > 0
select (ac: acV, mp: mpV)).ToList();
if (pairs.Count == 0) return null;
var acAvg = pairs.Average(p => p.ac);
var mpAvg = pairs.Average(p => p.mp);
var ratios = pairs.Select(p => p.ac / p.mp).ToList();
// Geometric mean: exp(avg(ln(ratios))) — numerically stable vs Π ratios then ^(1/N).
var geoMean = Math.Exp(ratios.Sum(Math.Log) / ratios.Count);
// Median (paired-ratio): for even N use the midpoint of the two middle values.
var sorted = ratios.OrderBy(r => r).ToList();
var median = sorted.Count % 2 == 1
? sorted[sorted.Count / 2]
: (sorted[sorted.Count / 2 - 1] + sorted[sorted.Count / 2]) / 2.0;
return new OverallStats(
ArithMeanPct: (acAvg / mpAvg - 1) * 100,
GeoMeanPct: (geoMean - 1) * 100,
MedianPct: (median - 1) * 100,
AcAvg: acAvg,
MpAvg: mpAvg,
CellCount: ratios.Count);
}
/// <summary>
/// Formats a per-op micros value with its inter-sample range and CV-threshold marker as
/// <c>"26.86 (24.5..29.1)"</c> or <c>"26.86 (24.5..29.1) ⚠5.2%"</c>. Median first, range in parentheses,
/// CV warning suffix only when CV > <see cref="Configuration.UnstableCVThreshold"/>. When min == max == median
/// (single-sample / Debug / quick mode), collapses to bare median to avoid visual clutter.
/// All time inputs are total-batch milliseconds; <paramref name="iterations"/> is the per-row iter
/// count (post-adaptive-calibration).
/// </summary>
private static string FormatMicrosWithRange(double medianMs, double minMs, double maxMs, double stdDevMs, int iterations, System.Globalization.CultureInfo inv)
{
var med = ToPerOpMicros(medianMs, iterations);
// No range data (single-sample fast path) — surface as bare median, identical to the prior format.
if (minMs <= 0 && maxMs <= 0) return med.ToString("F2", inv);
if (minMs >= medianMs && maxMs <= medianMs) return med.ToString("F2", inv);
var min = ToPerOpMicros(minMs, iterations);
var max = ToPerOpMicros(maxMs, iterations);
var range = $"{med.ToString("F2", inv)} ({min.ToString("F2", inv)}..{max.ToString("F2", inv)})";
// CV (coefficient of variation = stddev / mean) — flag rows above the unstable threshold so a
// small inter-engine delta on a high-CV row is easy to discount as noise.
if (medianMs > 0 && stdDevMs > 0)
{
var cv = stdDevMs / medianMs;
if (cv > Configuration.UnstableCVThreshold)
{
var cvPct = (cv * 100).ToString("F1", inv);
return $"{range} ⚠️{cvPct}%";
}
}
return range;
}
/// <summary>
/// Converts a byte count to KB (1 KB = 1024 B). Display-only helper so allocation columns can
/// render compact F2 KB values (e.g. <c>4.05 KB</c> instead of <c>4,144 B</c>) — header carries
/// the unit so per-row entries stay numbers-only. CSV / raw-data outputs keep the precise byte
/// integers untouched.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static double ToKilobytes(long bytes) => bytes / 1024.0;
public static void Main(string[] args)
{
// Set console encoding to UTF-8 for proper Unicode character display
System.Console.OutputEncoding = Encoding.UTF8;
// Setup validation — abort BEFORE any benchmark logic if MemoryPack baseline is invalid.
// Done early so user is told immediately, not after warmup.
ValidateMemoryPackSetup();
// CLI mode (args provided): run once, parse args, exit. Backward-compatible behaviour.
if (args.Length > 0)
{
if (!TryParseCliArgs(args, out var layer, out var opMode, out var serializerMode))
return; // invalid args
RunBenchmark(layer, opMode, serializerMode);
return;
}
// Interactive mode (no args): loop the menu so the user doesn't have to restart between runs.
// Q exits the menu (and the application).
while (true)
{
var selection = ShowInteractiveMenu();
if (selection == null) return; // user pressed Q
RunBenchmark(selection.Value.layer, "all", selection.Value.serializerMode);
System.Console.WriteLine();
System.Console.WriteLine("─────────────────────────────────────────────────────────────────────");
System.Console.WriteLine("Returning to menu — press any key to continue, or Q to quit...");
var key = System.Console.ReadKey(intercept: true);
if (key.Key == ConsoleKey.Q) return;
System.Console.WriteLine();
}
}
/// <summary>
/// Parses CLI arguments into (layer, opMode, serializerMode). Returns <c>false</c> if the args
/// are invalid; the caller should then exit without running the standard benchmark.
/// </summary>
private static bool TryParseCliArgs(string[] args, out string layer, out string opMode, out string serializerMode)
{
layer = "all";
opMode = "all";
serializerMode = "standard";
var arg = args[0].ToLower();
// Quick mode: short warmup, few iterations, small sample count
if (arg == "quick")
{
Configuration.WarmupIterations = 5;
Configuration.TestIterations = 100;
Configuration.BenchmarkSamples = 3;
layer = "all";
}
else if (arg is "core" or "comprehensive" or "edge" or "all"
or "small" or "medium" or "large" or "repeated" or "deep")
{
layer = arg;
}
else if (arg is "asyncpipe" or "pipe")
{
// AsyncPipe-only mode: streaming I/O isolation across all test data.
layer = "all";
serializerMode = "asyncpipe";
}
else if (arg is "ser" or "serialize")
{
opMode = "serialize";
layer = "all";
}
else if (arg is "des" or "deserialize")
{
opMode = "deserialize";
layer = "all";
}
else
{
// Backwards compat: unknown arg → treat as layer keyword
layer = arg;
}
return true;
}
/// <summary>
/// Runs the benchmark suite end-to-end for the given configuration: pre-warmup → per-cell warmup
/// + measurement → grouped results print → save to disk. Used by both the CLI and interactive
/// menu paths; the interactive loop calls this repeatedly without restarting the process.
/// </summary>
private static void RunBenchmark(string layer, string opMode, string serializerMode)
{
System.Console.WriteLine("╔══════════════════════════════════════════════════════════════════════╗");
System.Console.WriteLine("║ COMPREHENSIVE SERIALIZER BENCHMARK SUITE ║");
System.Console.WriteLine("╚══════════════════════════════════════════════════════════════════════╝");
// Stabilization: pin the entire benchmark process to a single logical CPU and bump priority
// class. Single-core affinity stops Windows from migrating the bench thread between cores
// mid-sample (a migration evicts L1/L2 caches and corrupts a measurement); High priority
// reduces preemption by background tasks (Defender scans, indexer, etc.) that otherwise
// randomly inflate samples by 5-15%.
// Try/finally guarantees the original state is restored even if a benchmark throws — leaving
// a developer machine pinned to one core after a crashed run is a real foot-gun.
// Skipped on Debug single-sample mode (Configuration.BenchmarkSamples <= 1) where stabilization is moot.
var process = Process.GetCurrentProcess();
var origAffinity = (IntPtr)0;
var origPriority = ProcessPriorityClass.Normal;
var stabilizationApplied = false;
// ProcessorAffinity is only supported on Windows + Linux (CA1416). macOS would throw at
// runtime; skip the affinity step there but still raise priority class (which IS supported
// on macOS, just less effective for stabilization than affinity pinning).
if (Configuration.BenchmarkSamples > 1 && (OperatingSystem.IsWindows() || OperatingSystem.IsLinux()))
{
try
{
origAffinity = process.ProcessorAffinity;
origPriority = process.PriorityClass;
// Pin to CPU 0 (mask = 1). Choosing CPU 0 is arbitrary; what matters is "exactly one
// core, consistently" — not which one. If CPU 0 is heavily contended on the host
// (e.g. dedicated to system-wide IRQs on some Windows configs), the user can tweak
// the mask here. The benchmark is single-threaded for the in-memory rows so single
// core is sufficient; round-trip-only NamedPipe rows have a server-drain thread
// that will share the core (acceptable — the bench measures end-to-end RT anyway).
process.ProcessorAffinity = (IntPtr)1;
process.PriorityClass = ProcessPriorityClass.High;
stabilizationApplied = true;
System.Console.WriteLine($"Stabilization: pinned to CPU 0 (affinity=0x1), priority=High.");
}
catch (Exception ex)
{
// Affinity/priority changes may fail on locked-down hosts (group policies, containers
// without CAP_SYS_NICE on Linux, etc.). Surface and continue — the benchmark still
// works, just with the platform default scheduling.
System.Console.WriteLine($"Stabilization SKIPPED: {ex.GetType().Name}: {ex.Message}");
}
}
try
{
var allResults = new List<BenchmarkResult>();
var allTestDataSets = BenchmarkTestDataProvider.CreateTestDataSets();
var testDataSets = FilterByLayer(allTestDataSets, layer);
System.Console.WriteLine($"Layer: {layer} | OpMode: {opMode} | SerializerMode: {serializerMode} | Charset: {GetCurrentCharsetName()} | Iterations: per-cell adaptive (~{Configuration.TargetSampleMs} ms target) | Warmup: {Configuration.WarmupIterations} per phase (Ser/Des isolated) | Samples: {Configuration.BenchmarkSamples} (median) + pilot discard");
System.Console.WriteLine($"Build: {Configuration.BuildConfiguration} | .NET: {Environment.Version} | Test Type: {testDataSets.FirstOrDefault()?.TypeName ?? "unknown"} | Test Cells: {testDataSets.Count}/{allTestDataSets.Count}");
System.Console.WriteLine();
// Global JIT pre-warmup — touches every (testdata × serializer) code path BEFORE any timing happens.
// Without this, the FIRST test data measured carries JIT-tier-promotion latency: the per-cell warmup
// alone doesn't ensure that every Serialize<T>/IBufferWriter overload is fully Tier 1 by the time we
// start measuring. Symptom: first cell's BufferWriter variants run ~2x slower than the SAME variants
// on later cells (e.g. Small BufWr reuse 9ms vs Medium BufWr reuse 4ms — even though Medium is bigger).
// Pre-warmup runs every overload at least once with each data shape so .NET 9's tiered JIT promotes
// them all in the background; the per-cell warmup that follows then locks in cache + branch state.
if (Configuration.BenchmarkSamples > 1) // skip in DEBUG (single-sample fast iteration)
{
System.Console.WriteLine($"Global JIT pre-warmup ({testDataSets.Count} cells × all serializers, light pass)...");
foreach (var testData in testDataSets)
{
var preSerializers = CreateSerializers(testData, serializerMode);
try
{
foreach (var s in preSerializers)
{
// Light warmup just to trigger Tier 0 → Tier 1 promotion. The per-cell 5000-iter warmup
// inside RunBenchmarksForTestData still runs afterwards for cache/BTB warming.
s.Warmup(2000);
}
}
finally
{
// Dispose any IDisposable serializers (NamedPipe / FileStream variants own OS resources).
foreach (var s in preSerializers) (s as IDisposable)?.Dispose();
}
}
// Let background tiered-JIT compilation drain before we begin measuring.
if (Configuration.JitSleep > 0) Thread.Sleep(Configuration.JitSleep);
System.Console.WriteLine("✓ Global pre-warmup complete.\n");
}
foreach (var testData in testDataSets)
{
System.Console.WriteLine($"\n{'═'.ToString().PadRight(70, '═')}");
System.Console.WriteLine($"TEST DATA: {testData.DisplayName}");
System.Console.WriteLine($"{'═'.ToString().PadRight(70, '═')}");
var results = RunBenchmarksForTestData(testData, opMode, serializerMode);
allResults.AddRange(results);
}
// Print grouped results
PrintGroupedResults(allResults, testDataSets);
// Save results to file
SaveResults(allResults, testDataSets);
System.Console.WriteLine("\n✓ Benchmark complete!");
}
finally
{
// Restore process state — affinity/priority changes are process-wide and persist across
// interactive-mode iterations of the menu. Without restore, the second menu run would
// already be on CPU-0 + High priority before its own try-block applied them, masking
// any stabilization-disabled comparison.
if (stabilizationApplied && (OperatingSystem.IsWindows() || OperatingSystem.IsLinux()))
{
try { process.ProcessorAffinity = origAffinity; } catch { /* best-effort */ }
try { process.PriorityClass = origPriority; } catch { /* best-effort */ }
}
}
}
#region Benchmark Execution
/// <summary>
/// Forces a full GC cycle at a phase boundary in the benchmark loop. Two-pass collect with finalizer drain
/// in between: the first pass moves managed garbage to the finalization queue, <c>WaitForPendingFinalizers</c>
/// runs the finalizers, the second pass reclaims any objects the finalizers released. After this returns the
/// heap is in a known-quiescent state — the next warmup/measurement phase starts on a clean slate, isolated
/// from the previous phase's residual allocations (write-buffer pools, intern cache, write-plan arrays, etc.).
/// Called between every Ser-phase / Des-phase boundary in <see cref="RunBenchmarksForTestData"/>.
/// </summary>
[MethodImpl(MethodImplOptions.NoInlining)]
private static void ForceGcCollect()
{
GC.Collect(2, GCCollectionMode.Forced, blocking: true);
GC.WaitForPendingFinalizers();
GC.Collect(2, GCCollectionMode.Forced, blocking: true);
}
private static List<BenchmarkResult> RunBenchmarksForTestData(TestDataSet testData, string mode, string serializerMode)
{
var results = new List<BenchmarkResult>();
var serializers = CreateSerializers(testData, serializerMode);
// Round-trip correctness check — once per (cell × serializer), BEFORE warmup. Aborts the entire benchmark on failure.
System.Console.WriteLine("Verifying round-trip correctness...");
foreach (var serializer in serializers)
{
if (!serializer.VerifyRoundTrip())
{
System.Console.Error.WriteLine($"❌ FATAL: Round-trip verification FAILED for {serializer.Name} on {testData.DisplayName}");
System.Console.Error.WriteLine("Benchmark numbers from a serializer with broken round-trip would be meaningless. Aborting.");
Environment.Exit(1);
}
}
System.Console.WriteLine("✓ All serializers passed round-trip verification.");
// Per-serializer, PER-PHASE (warmup → calibrate → measurement) cycle: each serializer's Ser-path and
// Des-path get COMPLETELY ISOLATED warmup→measure rounds, with a GC.Collect at every phase boundary.
//
// Why phase-isolation: a combined warmup (Ser+Des interleaved) leaves the CPU I-cache + branch-predictor
// in a "compromise state" — neither Ser nor Des code-set dominates. The first phase to measure pays a
// cache-miss penalty as its code-set displaces the leftover-warmup-state. Isolated warmup→measure pairs
// keep the I-cache HOT for ONLY the measured path, both in the warmup (priming) and the measurement
// (steady-state). Branch-predictor history also stays clean per path.
//
// GC.Collect at every boundary: removes residual allocation pressure from the previous phase (write-buffer
// pool churn from Ser, deserialized object graph from Des) so the next phase starts with a quiescent
// heap — GC tier-promotion timing during measurement is then driven only by THAT phase's allocations.
//
// Configuration.JitSleep per-phase: tiered JIT background promotion drain after each warmup (mode-aware: 0 ms in AOT).
// Each phase's freshly-promoted methods settle before its timing starts.
System.Console.WriteLine($"Running benchmarks (target ~{Configuration.TargetSampleMs} ms/sample × {Configuration.BenchmarkSamples} samples median, phase-isolated warmup/measure per Ser/Des)...\n");
foreach (var serializer in serializers)
{
var result = new BenchmarkResult
{
TestDataName = testData.DisplayName, // Use DisplayName for IId% info
Engine = serializer.Engine,
IoMode = serializer.IoMode,
DispatchMode = serializer.DispatchMode,
OptionsPreset = serializer.OptionsPreset,
OptionsDescription = serializer.OptionsDescription,
SerializedSize = serializer.SerializedSize,
SetupSerializeAllocBytes = serializer.SetupSerializeAllocBytes,
SetupDeserializeAllocBytes = serializer.SetupDeserializeAllocBytes,
IsRoundTripOnly = serializer.IsRoundTripOnly
};
// Group label for in-place \r progress. Identifies (cell × serializer) so a stuck benchmark
// is visibly stuck on a specific row at a specific %% rather than silently hanging.
var groupLabel = $"{result.SerializerName}";
if (serializer.IsRoundTripOnly)
{
// Round-trip-only benchmarks (NamedPipe etc.): single phase — Serialize() performs the full RT,
// Deserialize() is a no-op. We use the Ser-phase entry-points (WarmupSerialize) to warm the
// entire round-trip path, then record into the RT result columns.
if (mode is "all" or "serialize" or "ser")
{
ForceGcCollect();
serializer.WarmupSerialize(Configuration.WarmupIterations);
if (Configuration.JitSleep > 0) Thread.Sleep(Configuration.JitSleep);
var rtIter = CalibrateIterations(() => serializer.Serialize(), Configuration.TargetSampleMs);
var (rtMed, rtMin, rtMax, rtStd) = RunTimed(() => serializer.Serialize(), rtIter, $"{groupLabel} [RT timing]");
result.RoundTripTimeMs = rtMed;
result.RoundTripTimeMinMs = rtMin;
result.RoundTripTimeMaxMs = rtMax;
result.RoundTripTimeStdDevMs = rtStd;
result.RoundTripIterations = rtIter;
// Process-wide allocation measurement: server-drain-thread allocations (server-side new byte[len])
// also show up — otherwise current-thread alloc would only count the client side and look ~halved.
result.RoundTripAllocBytesPerOp = MeasureAllocationTotal(() => serializer.Serialize(), rtIter, $"{groupLabel} [RT alloc]");
}
// mode == "deserialize" alone is meaningless for a round-trip-only benchmark; skip silently.
}
else
{
// ── Ser phase ── isolated warmup → Configuration.JitSleep → calibrate → time → alloc; preceded by GC.Collect.
if (mode is "all" or "serialize" or "ser")
{
ForceGcCollect();
serializer.WarmupSerialize(Configuration.WarmupIterations);
if (Configuration.JitSleep > 0) Thread.Sleep(Configuration.JitSleep);
var serIter = CalibrateIterations(() => serializer.Serialize(), Configuration.TargetSampleMs);
var (serMed, serMin, serMax, serStd) = RunTimed(() => serializer.Serialize(), serIter, $"{groupLabel} [Ser timing]");
result.SerializeTimeMs = serMed;
result.SerializeTimeMinMs = serMin;
result.SerializeTimeMaxMs = serMax;
result.SerializeTimeStdDevMs = serStd;
result.SerializeIterations = serIter;
// Dedicated alloc-only sample (separate from timing samples; keeps timing pure)
result.SerializeAllocBytesPerOp = MeasureAllocation(() => serializer.Serialize(), serIter, $"{groupLabel} [Ser alloc]");
}
// ── Des phase ── isolated warmup → Configuration.JitSleep → calibrate → time → alloc; preceded by GC.Collect.
// The GC.Collect here is critical: it discards the Ser-phase's write-buffer pool churn so the
// Des-phase's allocation measurement reflects ONLY Des-side allocations (deserialized object graph).
if (mode is "all" or "deserialize" or "des")
{
ForceGcCollect();
serializer.WarmupDeserialize(Configuration.WarmupIterations);
if (Configuration.JitSleep > 0) Thread.Sleep(Configuration.JitSleep);
var desIter = CalibrateIterations(() => serializer.Deserialize(), Configuration.TargetSampleMs);
var (desMed, desMin, desMax, desStd) = RunTimed(() => serializer.Deserialize(), desIter, $"{groupLabel} [Des timing]");
result.DeserializeTimeMs = desMed;
result.DeserializeTimeMinMs = desMin;
result.DeserializeTimeMaxMs = desMax;
result.DeserializeTimeStdDevMs = desStd;
result.DeserializeIterations = desIter;
result.DeserializeAllocBytesPerOp = MeasureAllocation(() => serializer.Deserialize(), desIter, $"{groupLabel} [Des alloc]");
}
// Compose RT from Ser+Des. Because Ser and Des may have DIFFERENT iter counts post-calibration,
// batch-time addition would be misleading. Instead: compute per-op µs (iter-independent),
// then synthesize RoundTripTimeMs against RoundTripIterations = max(serIter, desIter) so that
// RoundTripTimeMs / RoundTripIterations * 1000 == SerPerOp + DesPerOp.
var serPerOp = ToPerOpMicros(result.SerializeTimeMs, result.SerializeIterations);
var desPerOp = ToPerOpMicros(result.DeserializeTimeMs, result.DeserializeIterations);
var rtPerOp = serPerOp + desPerOp;
result.RoundTripIterations = Math.Max(result.SerializeIterations, result.DeserializeIterations);
result.RoundTripTimeMs = rtPerOp / 1000.0 * result.RoundTripIterations;
result.RoundTripAllocBytesPerOp = result.SerializeAllocBytesPerOp + result.DeserializeAllocBytesPerOp;
}
results.Add(result);
PrintResult(result);
}
// Dispose any IDisposable serializers (NamedPipe / FileStream variants own OS resources that must be released
// before the next test data builds new ones — otherwise pipes / handles leak across test cells).
foreach (var s in serializers) (s as IDisposable)?.Dispose();
return results;
}
private static List<ISerializerBenchmark> CreateSerializers(TestDataSet testData, string serializerMode)
{
// FastestByte mode — focused 1:1 comparison on the "fastest Byte[]" path.
// TWO benchmarks: AcBinary FastMode Byte[] (Compact UTF-8) + MemoryPack Byte[].
// - Compact: smallest wire, UTF-8 encode/decode CPU cost vs MemPack head-to-head.
// Tight optimization-iteration loop: ~30-45 sec vs full 2-3 min.
//
// FastWire row (UTF-16 raw memcpy) commented out for the current optimization sprint —
// we are tuning Compact mode against MemPack directly; FastWire was used as a noise-floor
// reference earlier. Re-enable when revisiting Fast wire-mode performance.
if (serializerMode == "fastestbyte")
{
var fastestByteOptions = AcBinarySerializerOptions.FastMode;
fastestByteOptions.WireMode = Configuration.SelectedWireMode;
return new List<ISerializerBenchmark>
{
new AcBinaryBenchmark(testData.Order, fastestByteOptions, "FastMode"),
//new AcBinaryBenchmark(testData.Order, fastWireOptions, "FastMode (FastWire)"),
new MemoryPackBenchmark(testData.Order, "Default"),
};
}
// AsyncPipe-only mode — return ONLY the AsyncPipe streaming benchmark (no other serializer).
// Streaming I/O has long-lived pipe setup + kernel-buffer overhead that, when interleaved with
// the standard byte-array / IBufferWriter measurements, masks the steady-state numbers. Run it
// in isolation so the timing numbers reflect ONLY the streaming path.
if (serializerMode == "asyncpipe")
{
// NamedPipe — pipe-aligned chunk size for the long-lived IPC scenario. The chunkSize here
// drives the AsyncPipeWriterOutput's chunk-on-wire size (header + data, page-aligned thanks to
// the AcquireChunk fix) AND the kernel pipe buffer size (inBufferSize/outBufferSize on the
// NamedPipeServerStream ctor). Same value across both layers = one WriteFile(chunkSize) syscall
// fits blocking-free in one kernel pipe-buffer slot. Single source of truth for both app-level
// wire chunk AND kernel transfer unit; change ONLY this line when tuning.
var binaryFastModePipeChunkOnly = AcBinarySerializerOptions.FastMode;
binaryFastModePipeChunkOnly.BufferWriterChunkSize = Configuration.PipeChunkSize;
binaryFastModePipeChunkOnly.WireMode = Configuration.SelectedWireMode;
return new List<ISerializerBenchmark>
{
// Chunked-framed AsyncPipe: SerializeChunkedFramed + AsyncPipeReaderInput.DrainFromAsync.
// Measures the FULL streaming-I/O stack — wire framing + drain task + sliding-window buffer +
// MRES wait-on-byte-shortage — over a kernel NamedPipe.
new AcBinaryNamedPipeBenchmark(testData.Order, binaryFastModePipeChunkOnly, "FastMode (PipeChunk)"),
// Raw byte[] over NamedPipe (sync receive, no chunk-framing). Same kernel-pipe transport,
// same inBufferSize, but: serialize → byte[] → Stream.Write → Stream.Read → Deserialize<T>(byte[]).
// No drain task, no AsyncPipeReaderInput, no [201][UINT16][data]…[202] framing. Side-by-side with
// the chunked-row above this isolates AsyncPipe-framework-overhead (Δ vs raw) from
// kernel-transport-overhead (raw vs in-process Byte[]).
new AcBinaryNamedPipeRawByteArrayBenchmark(testData.Order, binaryFastModePipeChunkOnly, "FastMode (PipeRaw)"),
// Chunked-framed AsyncPipe over an IN-MEMORY System.IO.Pipelines.Pipe (NO NamedPipe, NO kernel).
// Same chunked-streaming code path (SerializeChunkedFramed → AsyncPipeReaderInput) but with the
// kernel-pipe replaced by a managed-only Pipe. Eliminates per-chunk syscall overhead (~30 µs/chunk
// on NamedPipe → ~1-2 µs/chunk on in-memory Pipe). Side-by-side with the NamedPipe row above this
// isolates pure CPU cost of the chunked-streaming framework (vs kernel-pipe transport cost) — the
// in-memory Pipe row should be much closer to the raw-byte[] row, validating that NamedPipe loopback
// is the worst-case benchmark scenario for chunked-streaming and not representative of real network
// / file / cross-thread Pipe scenarios.
new AcBinaryInMemoryPipeBenchmark(testData.Order, binaryFastModePipeChunkOnly, "FastMode (PipeChunk)"),
// Raw byte[] over IN-MEMORY direct cross-thread handoff (no transport at all). Apples-to-apples
// baseline for the in-memory chunked row above: same in-memory transport (zero kernel), but raw
// byte[] vs chunked-streaming wire format. Completes the 2x2 matrix [chunked,raw] × [kernel,memory].
new AcBinaryInMemoryRawByteArrayBenchmark(testData.Order, binaryFastModePipeChunkOnly, "FastMode (PipeRaw)"),
};
}
// Standard mode — all serializers EXCEPT AsyncPipe (the streaming benchmark is opt-in via the
// AsyncPipe menu / CLI mode, never bundled with the steady-state suite).
var binaryNoInternOption = AcBinarySerializerOptions.Default;
binaryNoInternOption.UseStringInterning = StringInterningMode.None;
binaryNoInternOption.WireMode = Configuration.SelectedWireMode;
var binaryDefaultNoSgenOption = AcBinarySerializerOptions.Default;
binaryDefaultNoSgenOption.UseGeneratedCode = false;
binaryDefaultNoSgenOption.WireMode = Configuration.SelectedWireMode;
var binaryFastModeNoSgenOption = AcBinarySerializerOptions.FastMode;
binaryFastModeNoSgenOption.UseGeneratedCode = false;
binaryFastModeNoSgenOption.WireMode = Configuration.SelectedWireMode;
var binaryFastModeOption = AcBinarySerializerOptions.FastMode;
binaryFastModeOption.WireMode = Configuration.SelectedWireMode;
// BufWr new — 4 KB chunk size for the FRESH ArrayBufferWriter scenario. The chunkSize here drives
// the serializer's GetSpan(N) request → the ArrayBufferWriter's internal allocation per call.
// Small chunk = small per-call allocation, optimum for one-shot serialization where each iteration
// allocates a fresh ABW. Independent of the AsyncPipe profile (different mechanism: alloc overhead
// vs syscall count).
var binaryFastModeBufWrChunk = AcBinarySerializerOptions.FastMode;
binaryFastModeBufWrChunk.BufferWriterChunkSize = Configuration.PipeChunkSize;
binaryFastModeBufWrChunk.WireMode = Configuration.SelectedWireMode;
// In-memory Pipe variant — same 4 KB chunkSize as the AsyncPipe mode, no kernel-pipe alignment
// concern (managed slabs are not page-aligned anyway). Drives SerializeChunkedFramed via the in-memory
// System.IO.Pipelines.Pipe (zero-copy slab handoff between producer and drain task).
var binaryFastModePipeChunkInMem = AcBinarySerializerOptions.FastMode;
binaryFastModePipeChunkInMem.BufferWriterChunkSize = Configuration.PipeChunkSize;
binaryFastModePipeChunkInMem.WireMode = Configuration.SelectedWireMode;
var defaultOptions = AcBinarySerializerOptions.Default;
defaultOptions.UseStringInterning = StringInterningMode.None;
defaultOptions.ReferenceHandling = ReferenceHandlingMode.OnlyId;
defaultOptions.WireMode = Configuration.SelectedWireMode;
return new List<ISerializerBenchmark>
{
// ============================================================
// AcBinary — Byte[] API (uncomment to compare option presets side-by-side)
// ============================================================
// Fastest Byte[] — SGen path (UseGeneratedCode=true, default).
new AcBinaryBenchmark(testData.Order, binaryFastModeOption, "FastMode"),
// Fastest Byte[] — Runtime path (UseGeneratedCode=false). Same wire/options, no source-generated dispatch.
// Always paired with the SGen variant so every layer can compare the SGen speed-up apples-to-apples.
// NativeAOT-safe: AcSerializerCommon.Create*Getter/Setter falls back to reflection-based delegates
// when RuntimeFeature.IsDynamicCodeSupported is false (slower but works under AOT publish).
new AcBinaryBenchmark(testData.Order, binaryFastModeNoSgenOption, "FastMode"),
// Default preset Byte[] — RefHandling=OnlyId (deduplicates IId-shared references on the wire) +
// UseStringInterning=All (deduplicates repeated strings). Showcases the Default preset's wire-size
// and CPU trade-off vs FastMode on the ~20% IId-ref / repeated-string test data.
new AcBinaryBenchmark(testData.Order, defaultOptions, "Default"),
//new AcBinaryBenchmark(testData.Order, binaryDefaultNoSgenOption, "Default"),
//new AcBinaryBenchmark(testData.Order, AcBinarySerializerOptions.WithoutReferenceHandling, "NoRef"),
//new AcBinaryBenchmark(testData.Order, binaryNoInternOption, "NoIntern"),
// AcBinary via IBufferWriter (reused ArrayBufferWriter — long-running service / batch scenario)
new AcBinaryBufferWriterBenchmark(testData.Order, binaryFastModeOption, "FastMode"),
// AcBinary via IBufferWriter (FRESH ArrayBufferWriter per call — one-shot scenario).
// 4 KB chunk size from binaryFastModeBufWrChunk — minimises the per-call ArrayBufferWriter
// allocation. Optimum for this scenario.
new AcBinaryFreshBufferWriterBenchmark(testData.Order, binaryFastModeBufWrChunk, "FastMode (4KB)"),
// AcBinary chunked-streaming over an IN-MEMORY Pipe (no kernel transport). Side-by-side with the
// Byte[] / IBufferWriter rows above this shows the chunked-streaming framework's pure CPU cost
// (no NamedPipe loopback noise) vs the simpler in-process serialize-then-deserialize patterns.
// The IO column shows "Pipe(in-mem)" — distinct from the NamedPipe AsyncPipe rows in [P] mode.
new AcBinaryInMemoryPipeBenchmark(testData.Order, binaryFastModePipeChunkInMem, "FastMode (PipeChunk)"),
// Raw byte[] over IN-MEMORY direct cross-thread handoff (no transport, no kernel, no Pipe). Apples-to-
// apples baseline for the in-memory chunked row above: same in-memory pattern, but raw byte[] vs
// chunked-streaming wire format. The IO column shows "Bytes(in-mem)".
new AcBinaryInMemoryRawByteArrayBenchmark(testData.Order, binaryFastModePipeChunkInMem, "FastMode (PipeRaw)"),
// AsyncPipe streaming over kernel NamedPipe (AcBinaryNamedPipeBenchmark) is intentionally OMITTED
// here — run it via the dedicated AsyncPipe menu [P] / CLI mode for isolated kernel-transport
// measurements.
// ============================================================
// MemoryPack — three I/O modes for apples-to-apples comparison
// ============================================================
new MemoryPackBenchmark(testData.Order, "Default"),
new MemoryPackBufferWriterBenchmark(testData.Order, "Default"),
new MemoryPackFreshBufferWriterBenchmark(testData.Order, "Default"),
// ============================================================
// MessagePack — for legacy comparison
// ============================================================
#if !AYCODE_NATIVEAOT
// MessagePack v3's DynamicGenericResolver uses Activator.CreateInstance on trimmed
// ListFormatter<T> et al. — fails under NativeAOT publish with "No parameterless constructor".
// Excluded from the AOT build; available for regular JIT runs only.
new MessagePackBenchmark(testData.Order, "ContractBased"),
#endif
// System.Text.Json (commented — JSON serializer for reference; not in active suite)
//new SystemTextJsonBenchmark(testData.Order, "Default")
};
}
/// <summary>
/// Runs the action <paramref name="iterations"/> times for <see cref="Configuration.BenchmarkSamples"/> independent samples,
/// returning the median, min, and max elapsed time. Multi-sample design reduces single-run variance
/// from ~±15% to ~±5% by smoothing transient effects (background activity, thermal/turbo state).
/// When <see cref="Configuration.BenchmarkSamples"/> &lt;= 1, falls back to single-sample timing (Debug / quick mode).
/// When <paramref name="progressLabel"/> is non-null, emits in-place <c>\r</c> progress updates so a
/// stuck benchmark (e.g. deadlocked NamedPipe row) is visibly stuck at a specific %% rather than
/// silently hanging.
///
/// Stabilization (added 2026-05-07):
/// 1) Pilot sample is run BEFORE the recorded loop and discarded. The first measurement after
/// warmup tends to absorb residual JIT bookkeeping and GC bookkeeping; dropping it tightens
/// the min/max range without throwing away signal (the median is the SAME data as before).
/// 2) GC.Collect / WaitForPendingFinalizers / GC.Collect runs BEFORE every recorded sample.
/// Without this, GC pressure from sample N occasionally triggered a Gen-2 pause inside
/// sample N+1, painting it as an outlier; collecting up-front gives every sample the
/// same starting heap shape.
/// 3) Returns (median, min, max) so the caller can surface the inter-sample range — visible
/// noise floor for the row, replacing the previous "median only" view.
/// </summary>
private static (double medianMs, double minMs, double maxMs, double stdDevMs) RunTimed(Action action, int iterations, string? progressLabel = null)
{
var samples = Configuration.BenchmarkSamples;
if (samples <= 1)
{
// Single-sample fast path (Debug or trivial run) — no allocation, no sort, no stddev.
var sw = Stopwatch.StartNew();
RunWithProgress(action, iterations, progressLabel, samples: 1, sampleIndex: 0);
sw.Stop();
var ms = sw.Elapsed.TotalMilliseconds;
EndProgress(progressLabel, ms);
return (ms, ms, ms, 0);
}
// Pilot sample (discarded). Counts as sample index 0 of (samples + 1) for progress display
// so the user sees an extra "warmup-ish" tick before the recorded samples start.
GC.Collect();
GC.WaitForPendingFinalizers();
GC.Collect();
var pilotSw = Stopwatch.StartNew();
RunWithProgress(action, iterations, progressLabel, samples + 1, sampleIndex: 0);
pilotSw.Stop();
// intentionally not stored
var times = new double[samples];
for (var s = 0; s < samples; s++)
{
// Per-sample GC settle. Forces every sample to start from the same heap state, so
// a Gen-2 pause caused by the previous sample doesn't bleed into the next sample's
// timing. Cost is paid OUTSIDE the Stopwatch window — no impact on the measurement.
GC.Collect();
GC.WaitForPendingFinalizers();
GC.Collect();
var sw = Stopwatch.StartNew();
RunWithProgress(action, iterations, progressLabel, samples + 1, sampleIndex: s + 1);
sw.Stop();
times[s] = sw.Elapsed.TotalMilliseconds;
}
// Capture min/max/sum/sumSq BEFORE sort to avoid order ambiguity (Array.Sort is in-place).
var minMs = double.MaxValue;
var maxMs = double.MinValue;
var sum = 0.0;
var sumSq = 0.0;
for (var i = 0; i < times.Length; i++)
{
var t = times[i];
sum += t;
sumSq += t * t;
if (t < minMs) minMs = t;
if (t > maxMs) maxMs = t;
}
// Population stddev (not sample-stddev — we treat the captured samples as the population for
// CV computation). variance = E[X²] - E[X]² with Math.Max(0, ...) guard against tiny negative
// values from FP rounding when samples are nearly identical.
var mean = sum / times.Length;
var variance = (sumSq / times.Length) - (mean * mean);
var stdDevMs = Math.Sqrt(Math.Max(0.0, variance));
Array.Sort(times);
// Median: middle value for odd sample counts, average of two middles for even counts.
var medianMs = samples % 2 == 1 ? times[samples / 2] : (times[samples / 2 - 1] + times[samples / 2]) / 2.0;
EndProgress(progressLabel, medianMs);
return (medianMs, minMs, maxMs, stdDevMs);
}
/// <summary>
/// Per-cell adaptive iteration calibration. Runs a 100-iter measurement after warmup and computes
/// how many iterations are needed to reach <see cref="Configuration.TargetSampleMs"/> wall-clock per sample.
/// Returns iter rounded UP to the nearest 1000, floored at 1000 (the prior fixed minimum) and
/// ceiling-capped at 200_000 (sanity bound for pathologically fast ops). In Debug single-sample mode
/// (<c>Configuration.BenchmarkSamples &lt;= 1</c>) returns the global <see cref="Configuration.TestIterations"/> unchanged —
/// calibration overhead is unjustified there. Calibration runs OUTSIDE the timed sample loop and
/// does NOT count toward warmup; its sole purpose is to measure per-op cost.
/// </summary>
private static int CalibrateIterations(Action action, int targetMs)
{
if (Configuration.BenchmarkSamples <= 1) return Configuration.TestIterations; // Debug fast path
GC.Collect();
GC.WaitForPendingFinalizers();
GC.Collect();
const int calibIter = 100;
var sw = Stopwatch.StartNew();
for (var i = 0; i < calibIter; i++) action();
sw.Stop();
var ms = sw.Elapsed.TotalMilliseconds;
// Pathologically-fast op below Stopwatch resolution — cap at ceiling (further calibration won't help).
if (ms <= 0.0001) return 200_000;
var iterPerMs = calibIter / ms;
var raw = (int)Math.Ceiling(targetMs * iterPerMs);
// Round UP to nearest 1000 — keeps numbers human-readable in the markdown output.
var rounded = ((raw + 999) / 1000) * 1000;
return rounded switch
{
< 1000 => 1000,
> 200_000 => 200_000,
_ => rounded
};
}
/// <summary>
/// Measures per-call allocation in bytes after a clean GC. Single dedicated sample (no median) — keeps timing samples pure.
/// </summary>
private static long MeasureAllocation(Action action, int iterations, string? progressLabel = null)
{
GC.Collect();
GC.WaitForPendingFinalizers();
GC.Collect();
var sw = Stopwatch.StartNew();
var before = GC.GetAllocatedBytesForCurrentThread();
RunWithProgress(action, iterations, progressLabel, samples: 1, sampleIndex: 0);
var after = GC.GetAllocatedBytesForCurrentThread();
sw.Stop();
EndProgress(progressLabel, sw.Elapsed.TotalMilliseconds);
return (after - before) / iterations;
}
/// <summary>
/// Process-wide allocation measurement — needed for round-trip-only benchmarks (NamedPipe etc.) where
/// the work happens across multiple threads. <see cref="GC.GetAllocatedBytesForCurrentThread"/> would
/// only count the caller-thread allocations, missing the server-side <c>new byte[len]</c> buffers and
/// any drain-pump-thread allocations. <see cref="GC.GetTotalAllocatedBytes"/> covers the entire process.
/// Slightly noisier than the per-thread variant (background threads / GC bookkeeping leak in), but
/// over 1000 iterations the signal dominates.
/// </summary>
private static long MeasureAllocationTotal(Action action, int iterations, string? progressLabel = null)
{
GC.Collect();
GC.WaitForPendingFinalizers();
GC.Collect();
var sw = Stopwatch.StartNew();
var before = GC.GetTotalAllocatedBytes(precise: true);
RunWithProgress(action, iterations, progressLabel, samples: 1, sampleIndex: 0);
var after = GC.GetTotalAllocatedBytes(precise: true);
sw.Stop();
EndProgress(progressLabel, sw.Elapsed.TotalMilliseconds);
return (after - before) / iterations;
}
// ============================================================================================
// Progress reporting — \r-driven in-place updates so a stuck benchmark surfaces the exact phase
// and % where it stopped, instead of appearing as a silent hang. Used by RunTimed and the
// MeasureAllocation* helpers when the caller passes a non-null progressLabel.
// ============================================================================================
// Tracks the longest line written by the current progress session, so EndProgress can clear
// any leftover characters from a prior longer line (avoids "ghost" trailing chars after \r).
private static int _progressLastLineLen;
/// <summary>
/// Runs <paramref name="action"/> <paramref name="iterations"/> times, emitting \r-overwriting
/// progress every ~10% (approx. 10 progress prints per sample). When <paramref name="label"/>
/// is null, runs without any progress output (zero overhead beyond a null check per iter).
/// </summary>
private static void RunWithProgress(Action action, int iterations, string? label, int samples, int sampleIndex)
{
if (label is null)
{
for (var i = 0; i < iterations; i++) action();
return;
}
// ~10 progress emits per sample run. Avoid emitting on every iter (Console.Write is
// expensive enough to skew sub-µs benchmarks if overdone).
var step = Math.Max(1, iterations / 10);
for (var i = 0; i < iterations; i++)
{
action();
if ((i + 1) % step == 0 || i == iterations - 1)
{
var pct = (int)((i + 1) * 100L / iterations);
var line = samples > 1
? $" > {label} sample {sampleIndex + 1}/{samples} {pct,3}% ({i + 1}/{iterations})"
: $" > {label} {pct,3}% ({i + 1}/{iterations})";
System.Console.Write('\r');
System.Console.Write(line);
if (line.Length < _progressLastLineLen)
System.Console.Write(new string(' ', _progressLastLineLen - line.Length));
_progressLastLineLen = line.Length;
}
}
}
/// <summary>
/// Closes a progress line cleanly: clears any leftover chars and writes a final "done" line on
/// the same row, terminated by \n so subsequent <c>WriteLine</c> calls render below.
/// </summary>
private static void EndProgress(string? label, double elapsedMs)
{
if (label is null) return;
var done = $" > {label} done in {elapsedMs,7:F1} ms";
System.Console.Write('\r');
System.Console.Write(done);
if (done.Length < _progressLastLineLen)
System.Console.Write(new string(' ', _progressLastLineLen - done.Length));
System.Console.WriteLine();
_progressLastLineLen = 0;
}
#if !AYCODE_NATIVEAOT
private static readonly JsonSerializerOptions VerifyJsonOpts = new()
{
WriteIndented = false,
DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull,
ReferenceHandler = System.Text.Json.Serialization.ReferenceHandler.IgnoreCycles
};
#endif
/// <summary>
/// Round-trip equality check: serialize both via System.Text.Json (canonical form) and compare strings.
/// Slower than property-by-property compare, but universal — works for any object graph without custom comparer.
/// </summary>
/// <remarks>
/// AOT publish skip: <c>System.Text.Json</c>'s reflection path uses runtime closed-generic instantiation
/// (<c>JsonPropertyInfo&lt;TestStatus&gt;</c> et al.) that the trimmer drops, causing
/// <c>NotSupportedException: missing native code or metadata</c>. The validation is JIT-only — the actual
/// benchmark Serialize/Deserialize loops don't touch this path. Under AOT we return <c>true</c> so all
/// <c>VerifyRoundTrip()</c> calls pass without running the cross-format validation.
/// </remarks>
private static bool DeepEqualsViaJson(object? a, object? b)
{
#if AYCODE_NATIVEAOT
// Skip cross-format validation under AOT — STJ reflection path is incompatible. The roundtrip
// itself still runs (caller-side Serialize+Deserialize), just the JSON-canonical compare is bypassed.
return true;
#else
if (a == null && b == null) return true;
if (a == null || b == null) return false;
var jsonA = JsonSerializer.Serialize(a, VerifyJsonOpts);
var jsonB = JsonSerializer.Serialize(b, VerifyJsonOpts);
return jsonA == jsonB;
#endif
}
/// <summary>
/// Validates MemoryPack setup at startup. Aborts the benchmark if TestOrder is not [MemoryPackable].
/// Without this attribute, MemoryPack falls back to runtime resolver (slower) — comparison would be INVALID.
/// </summary>
private static void ValidateMemoryPackSetup()
{
var typesToCheck = new[] { typeof(TestOrder) };
foreach (var type in typesToCheck)
{
var hasAttr = type.GetCustomAttributes(typeof(MemoryPackableAttribute), inherit: true).Any();
if (!hasAttr)
{
System.Console.Error.WriteLine($"❌ FATAL: {type.FullName} is not [MemoryPackable] — MemoryPack would fall back to runtime resolver, comparison is INVALID for SGen-vs-SGen claim.");
System.Console.Error.WriteLine("Add [MemoryPackable] to the type and any nested types referenced from it.");
Environment.Exit(1);
}
}
}
/// <summary>
/// Interactive menu shown when no CLI args. Returns the layer keyword (core/comprehensive/edge/all) or null on Quit.
/// Loops on settings-changes ([S]) — user is returned to this menu after modifying iteration counts.
/// </summary>
private static (string layer, string serializerMode)? ShowInteractiveMenu()
{
while (true)
{
System.Console.WriteLine();
System.Console.WriteLine("╔══════════════════════════════════════════════════════════╗");
System.Console.WriteLine("║ AcBinary Benchmark Suite ║");
System.Console.WriteLine("╚══════════════════════════════════════════════════════════╝");
System.Console.WriteLine();
System.Console.WriteLine("Select benchmark layer:");
System.Console.WriteLine();
System.Console.WriteLine(" [1] Core — daily iteration");
System.Console.WriteLine(" [2] Comprehensive — release validation");
System.Console.WriteLine(" [3] Edge cases — refactor verification");
System.Console.WriteLine(" [A] All layers");
System.Console.WriteLine(" [F] FastestByte — AcBinary FastMode Byte[] vs MemoryPack Byte[] only (tight optimization loop)");
System.Console.WriteLine(" [P] AsyncPipe — streaming I/O isolation (only AsyncPipe, all test data)");
System.Console.WriteLine($" [S] Settings — Iteration / WireMode (current: {Configuration.SelectedWireMode})");
System.Console.WriteLine(" [Q] Quit");
System.Console.Write("\nSelection: ");
var key = System.Console.ReadKey(intercept: false).KeyChar;
System.Console.WriteLine();
switch (char.ToLower(key))
{
case '1': return ("core", "standard");
case '2': return ("comprehensive", "standard");
case '3': return ("edge", "standard");
case 'a': return ("all", "standard");
case 'f': return ("all", "fastestbyte");
case 'p': return ("all", "asyncpipe");
case 's':
ShowSettingsMenu();
continue; // re-display the main menu after settings update
case 'q': return null;
default: return ("all", "standard");
}
}
}
/// <summary>
/// Settings sub-menu — prompts for Warmup / Iterations / Samples values. Empty input keeps the
/// current value. Validation: Configuration.WarmupIterations ≥ 0; Configuration.TestIterations ≥ 1; Configuration.BenchmarkSamples ≥ 1.
/// Returns to the caller (which re-displays the main menu).
/// </summary>
private static void ShowSettingsMenu()
{
while (true)
{
System.Console.WriteLine();
System.Console.WriteLine("─────────────────────────────────────────────");
System.Console.WriteLine("Settings");
System.Console.WriteLine("─────────────────────────────────────────────");
System.Console.WriteLine(" [1] Iteration — Warmup / Iterations / Samples");
System.Console.WriteLine($" [2] WireMode — current: {Configuration.SelectedWireMode}");
System.Console.WriteLine($" [3] Charset — current: {GetCurrentCharsetName()}");
System.Console.WriteLine(" [B] Back");
System.Console.Write("\nSelection: ");
var key = System.Console.ReadKey(intercept: false).KeyChar;
System.Console.WriteLine();
switch (char.ToLower(key))
{
case '1':
ShowIterationSettingsMenu();
break;
case '2':
ShowWireModeSettingsMenu();
break;
case '3':
ShowCharsetSettingsMenu();
break;
case 'b':
return;
default:
continue;
}
}
}
/// <summary>
/// Returns a human-readable name for the currently-active <c>BenchmarkTestDataProvider.LongStringSuffix</c>
/// charset. Returns "Custom" when the suffix doesn't match any of the predefined
/// <see cref="CharsetSuffixes"/> constants. Used in menu state display, console run header, and
/// the .LLM markdown output header so per-charset bench files are self-documenting.
/// </summary>
private static string GetCurrentCharsetName()
{
var s = BenchmarkTestDataProvider.LongStringSuffix;
return s switch
{
CharsetSuffixes.Latin1FixAscii => "Latin1FixAscii",
CharsetSuffixes.Latin1Short => "Latin1Short",
CharsetSuffixes.Latin1Long => "Latin1Long",
CharsetSuffixes.CjkBmp => "CjkBmp",
CharsetSuffixes.Cyrillic => "Cyrillic",
CharsetSuffixes.Mixed => "Mixed",
_ => "Custom"
};
}
private static void ShowCharsetSettingsMenu()
{
while (true)
{
System.Console.WriteLine();
System.Console.WriteLine("─────────────────────────────────────────────");
System.Console.WriteLine("Charset settings — long-string suffix profile");
System.Console.WriteLine("─────────────────────────────────────────────");
System.Console.WriteLine($"Current: {GetCurrentCharsetName()}");
System.Console.WriteLine();
System.Console.WriteLine(" [1] Latin1FixAscii — empty suffix; short FixStr-fast-path stress (Latin1 baseline values stay short)");
System.Console.WriteLine(" [2] Latin1Short — \" árvíztűrő tükörfúrógép\" (~24 char Hungarian mixed)");
System.Console.WriteLine(" [3] Latin1Long — ~47-char Latin1 mixed (default; exceeds FixStr boundary)");
System.Console.WriteLine(" [4] CjkBmp — CJK BMP (long 3-byte runs)");
System.Console.WriteLine(" [5] Cyrillic — Russian Cyrillic (long 2-byte runs)");
System.Console.WriteLine(" [6] Mixed — Hungarian + CJK + Cyrillic + emoji (full-spectrum + surrogate pairs)");
System.Console.WriteLine(" [B] Back");
System.Console.Write("\nSelection: ");
var key = System.Console.ReadKey(intercept: false).KeyChar;
System.Console.WriteLine();
switch (char.ToLower(key))
{
case '1':
BenchmarkTestDataProvider.LongStringSuffix = CharsetSuffixes.Latin1FixAscii;
System.Console.WriteLine("✓ Charset set to Latin1FixAscii");
return;
case '2':
BenchmarkTestDataProvider.LongStringSuffix = CharsetSuffixes.Latin1Short;
System.Console.WriteLine("✓ Charset set to Latin1Short");
return;
case '3':
BenchmarkTestDataProvider.LongStringSuffix = CharsetSuffixes.Latin1Long;
System.Console.WriteLine("✓ Charset set to Latin1Long");
return;
case '4':
BenchmarkTestDataProvider.LongStringSuffix = CharsetSuffixes.CjkBmp;
System.Console.WriteLine("✓ Charset set to CjkBmp");
return;
case '5':
BenchmarkTestDataProvider.LongStringSuffix = CharsetSuffixes.Cyrillic;
System.Console.WriteLine("✓ Charset set to Cyrillic");
return;
case '6':
BenchmarkTestDataProvider.LongStringSuffix = CharsetSuffixes.Mixed;
System.Console.WriteLine("✓ Charset set to Mixed");
return;
case 'b':
return;
default:
continue;
}
}
}
private static void ShowIterationSettingsMenu()
{
System.Console.WriteLine();
System.Console.WriteLine("─────────────────────────────────────────────");
System.Console.WriteLine("Iteration settings — press Enter to keep current value");
System.Console.WriteLine("─────────────────────────────────────────────");
System.Console.WriteLine();
Configuration.WarmupIterations = PromptInt("Configuration.WarmupIterations", Configuration.WarmupIterations, min: 0);
Configuration.TestIterations = PromptInt("Configuration.TestIterations ", Configuration.TestIterations, min: 1);
Configuration.BenchmarkSamples = PromptInt("Configuration.BenchmarkSamples", Configuration.BenchmarkSamples, min: 1);
System.Console.WriteLine();
System.Console.WriteLine($"✓ Iteration settings updated: Warmup={Configuration.WarmupIterations} | Iterations={Configuration.TestIterations} | Samples={Configuration.BenchmarkSamples}");
}
private static void ShowWireModeSettingsMenu()
{
while (true)
{
System.Console.WriteLine();
System.Console.WriteLine("─────────────────────────────────────────────");
System.Console.WriteLine("WireMode settings");
System.Console.WriteLine("─────────────────────────────────────────────");
System.Console.WriteLine($"Current: {Configuration.SelectedWireMode}");
System.Console.WriteLine(" [1] Compact");
System.Console.WriteLine(" [2] Fast");
System.Console.WriteLine(" [B] Back");
System.Console.Write("\nSelection: ");
var key = System.Console.ReadKey(intercept: false).KeyChar;
System.Console.WriteLine();
switch (char.ToLower(key))
{
case '1':
Configuration.SelectedWireMode = WireMode.Compact;
System.Console.WriteLine("✓ WireMode set to Compact");
return;
case '2':
Configuration.SelectedWireMode = WireMode.Fast;
System.Console.WriteLine("✓ WireMode set to Fast");
return;
case 'b':
return;
default:
continue;
}
}
}
/// <summary>
/// Prompts the user for an integer with a default (current value). Returns the current value if
/// the user presses Enter on empty input or if parsing fails / value is below the minimum.
/// </summary>
private static int PromptInt(string name, int currentValue, int min)
{
System.Console.Write($" {name} [{currentValue}]: ");
var input = System.Console.ReadLine()?.Trim() ?? "";
if (input.Length == 0) return currentValue;
if (int.TryParse(input, out var newValue) && newValue >= min) return newValue;
System.Console.WriteLine($" ! Invalid value (need int ≥ {min}) — keeping {currentValue}");
return currentValue;
}
/// <summary>
/// Filters test data sets by layer keyword. Layered approach lets you run only what's needed for the iteration cadence.
/// P1: only "Core" data exists (Small/Medium/Large/Repeated/Deep). Comprehensive and Edge layers will be expanded in P2.
/// </summary>
private static List<TestDataSet> FilterByLayer(List<TestDataSet> all, string layer)
{
if (layer == "all") return all.ToList();
var coreNames = new[] { "Small", "Medium", "Large", "Repeated", "Deep" };
// P2 will add: "Flat", "Polymorphic", "Collection", "Numeric", "NonAscii", etc.
var comprehensiveExtras = new string[] { /* P2 */ };
// P3 will add: "ColdStart", "VeryLarge", "PathologicalString", etc.
var edgeExtras = new string[] { /* P3 */ };
return layer switch
{
"core" => all.Where(t => StartsWithAny(t.Name, coreNames)).ToList(),
"comprehensive" => all.Where(t => StartsWithAny(t.Name, coreNames) || StartsWithAny(t.Name, comprehensiveExtras)).ToList(),
"edge" => all.Where(t => StartsWithAny(t.Name, coreNames) || StartsWithAny(t.Name, comprehensiveExtras) || StartsWithAny(t.Name, edgeExtras)).ToList(),
// Single-cell A/B mini-suite filters — match by case-insensitive prefix on Name.
// Use case: tight optimization-iteration loop on one specific cell (e.g. `dotnet run -- repeated`
// or interactive menu shortcut), avoiding the full ~110 sec suite when only one cell is in scope.
"small" => all.Where(t => t.Name.StartsWith("Small", StringComparison.OrdinalIgnoreCase)).ToList(),
"medium" => all.Where(t => t.Name.StartsWith("Medium", StringComparison.OrdinalIgnoreCase)).ToList(),
"large" => all.Where(t => t.Name.StartsWith("Large", StringComparison.OrdinalIgnoreCase)).ToList(),
"repeated" => all.Where(t => t.Name.StartsWith("Repeated", StringComparison.OrdinalIgnoreCase)).ToList(),
"deep" => all.Where(t => t.Name.StartsWith("Deep", StringComparison.OrdinalIgnoreCase)).ToList(),
_ => all.ToList()
};
static bool StartsWithAny(string name, string[] prefixes) => prefixes.Any(name.StartsWith);
}
#endregion
#region Serializer Implementations
private interface ISerializerBenchmark
{
/// <summary>Serializer engine — e.g. "AcBinary", "MemoryPack", "MessagePack".</summary>
string Engine { get; }
/// <summary>I/O mode — e.g. "Byte[]", "BufWr reuse", "BufWr new", "NamedPipe", "FileStream".</summary>
string IoMode { get; }
/// <summary>Dispatch mode — "SGen", "Runtime", or "Hybrid". For AcBinary derived from <c>UseGeneratedCode</c> + child-type SGen coverage; non-AcBinary engines report their own native dispatch model.</summary>
string DispatchMode { get; }
/// <summary>Options preset name — e.g. "FastMode", "Default", "NoIntern", "WithCompression".</summary>
string OptionsPreset { get; }
/// <summary>Synthesized display name from Engine + IoMode + OptionsPreset.</summary>
string Name => $"{Engine} ({IoMode}, {OptionsPreset})";
int SerializedSize { get; }
string? OptionsDescription => null;
/// <summary>One-time SERIALIZER-side setup allocation cost (e.g., pre-allocated ArrayBufferWriter with internal buffer). Captured in constructor; 0 for byte[] API and Fresh-BufWriter variants.</summary>
long SetupSerializeAllocBytes { get; }
/// <summary>One-time DESERIALIZER-side setup allocation cost (e.g., long-lived AsyncPipeReaderInput's ArrayPool rent + ManualResetEventSlim, drain-task scaffolding). Captured in constructor; 0 for byte[] API and any setup-free deserialize path.</summary>
long SetupDeserializeAllocBytes { get; }
/// <summary>True when Serialize() does a full round-trip (e.g. NamedPipe) and Deserialize() is a no-op.
/// Used by the SUMMARY: WINNERS section to skip such cells from "Fastest Serialize" and "Fastest Deserialize"
/// rankings (because both metrics are misleading there) — they still participate in "Fastest Round-trip".
/// Default false for in-memory IO modes which measure Ser and Des separately.</summary>
bool IsRoundTripOnly => false;
/// <summary>Combined warmup (Ser + Deser interleaved). Currently unused — kept as a legacy entry point
/// for any external caller that still wants single-call warmup. The benchmark loop uses the split
/// <see cref="WarmupSerialize"/> + <see cref="WarmupDeserialize"/> pair for cache-isolated measurements.</summary>
void Warmup(int iterations);
/// <summary>Warm only the Serialize path. Default body iterates <see cref="Serialize"/> N times.
/// Overrides are only needed when the implementor wants Ser-specific warmup-state (e.g. pre-allocate buffers).
/// On <see cref="IsRoundTripOnly"/> benchmarks (NamedPipe-style) <see cref="Serialize"/> performs the full RT,
/// so this warms the entire round-trip path.</summary>
void WarmupSerialize(int iterations)
{
for (var i = 0; i < iterations; i++) Serialize();
}
/// <summary>Warm only the Deserialize path. Default body iterates <see cref="Deserialize"/> N times.
/// On <see cref="IsRoundTripOnly"/> benchmarks <see cref="Deserialize"/> is a no-op, so the bench loop
/// skips the Des-phase entirely for those cells.</summary>
void WarmupDeserialize(int iterations)
{
for (var i = 0; i < iterations; i++) Deserialize();
}
void Serialize();
void Deserialize();
/// <summary>Round-trip correctness check — called once per cell before warmup. Returns true if Serialize+Deserialize preserves data.</summary>
bool VerifyRoundTrip();
}
private sealed class AcBinaryBenchmark : ISerializerBenchmark
{
private readonly TestOrder _order;
private readonly AcBinarySerializerOptions _options;
private readonly byte[] _serialized;
public string Engine => Configuration.EngineAcBinary;
public string IoMode => Configuration.IoByteArray;
public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime;
public string OptionsPreset { get; }
public int SerializedSize => _serialized.Length;
public long SetupSerializeAllocBytes => 0;
public long SetupDeserializeAllocBytes => 0;
public string OptionsDescription => BuildAcBinaryOptionsDescription(_options);
public AcBinaryBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset)
{
_order = order;
_options = options;
OptionsPreset = optionsPreset;
_serialized = AcBinarySerializer.Serialize(order, options);
//_options.UseCompression = Lz4CompressionMode.Block;
}
public void Warmup(int iterations)
{
for (var i = 0; i < iterations; i++)
{
Serialize();
Deserialize();
}
}
[MethodImpl(MethodImplOptions.NoInlining)]
public void Serialize()
{
AcBinarySerializer.Serialize(_order, _options);
//if (_options.ReferenceHandling != ReferenceHandlingMode.None || _options.UseStringInterning != StringInterningMode.None)
//{
// AcBinarySerializer.ScanOnly(_order, _options);
//}
//else AcBinarySerializer.Serialize(_order, _options);
}
[MethodImpl(MethodImplOptions.NoInlining)]
public void Deserialize() => AcBinaryDeserializer.Deserialize<TestOrder>(_serialized, _options);
public bool VerifyRoundTrip()
{
var bytes = AcBinarySerializer.Serialize(_order, _options);
var roundTripped = AcBinaryDeserializer.Deserialize<TestOrder>(bytes, _options);
return DeepEqualsViaJson(_order, roundTripped);
}
}
private sealed class MemoryPackBenchmark : ISerializerBenchmark
{
private readonly TestOrder _order;
private readonly MemoryPackSerializerOptions _options;
private readonly byte[] _serialized;
public string Engine => Configuration.EngineMemoryPack;
public string IoMode => Configuration.IoByteArray;
public string DispatchMode => Configuration.ModeSGen; // MemoryPack always uses [MemoryPackable] source-generated formatters
public string OptionsPreset { get; }
public int SerializedSize => _serialized.Length;
public long SetupSerializeAllocBytes => 0;
public long SetupDeserializeAllocBytes => 0;
public string? OptionsDescription => $"StringEncoding={_options.StringEncoding}";
public MemoryPackBenchmark(TestOrder order, string optionsPreset)
{
_order = order;
OptionsPreset = optionsPreset;
_options = GetMemPackOptions();
_serialized = MemoryPackSerializer.Serialize(order, _options);
}
public void Warmup(int iterations)
{
for (var i = 0; i < iterations; i++)
{
Serialize();
Deserialize();
}
}
[MethodImpl(MethodImplOptions.NoInlining)]
public void Serialize() => MemoryPackSerializer.Serialize(_order, _options);
[MethodImpl(MethodImplOptions.NoInlining)]
public void Deserialize() => MemoryPackSerializer.Deserialize<TestOrder>(_serialized, _options);
public bool VerifyRoundTrip()
{
var bytes = MemoryPackSerializer.Serialize(_order, _options);
var roundTripped = MemoryPackSerializer.Deserialize<TestOrder>(bytes, _options);
return DeepEqualsViaJson(_order, roundTripped);
}
}
#if !AYCODE_NATIVEAOT
// MessagePack benchmark — excluded from NativeAOT build because v3's StandardResolver falls back
// to DynamicGenericResolver for closed-generic types (List<TestOrderItem> et al.), which uses
// Activator.CreateInstance on formatter types the AOT trimmer drops → MissingMethodException at runtime.
// Available for regular JIT runs (`dotnet run`) only.
private sealed class MessagePackBenchmark : ISerializerBenchmark
{
private readonly TestOrder _order;
private readonly MessagePackSerializerOptions _options;
private readonly byte[] _serialized;
public string Engine => Configuration.EngineMessagePack;
public string IoMode => Configuration.IoByteArray;
public string DispatchMode => Configuration.ModeSGen; // MessagePack uses [MessagePackObject] source-generated formatters (StandardResolver)
public string OptionsPreset { get; }
public int SerializedSize => _serialized.Length;
public long SetupSerializeAllocBytes => 0;
public long SetupDeserializeAllocBytes => 0;
public string OptionsDescription { get; }
public MessagePackBenchmark(TestOrder order, string optionsPreset)
{
_order = order;
OptionsPreset = optionsPreset;
//_options = ContractlessStandardResolver.Options.WithCompression(MessagePackCompression.None);
//_options = ContractlessStandardResolver.Options.WithCompression(MessagePackCompression.Lz4Block);
_options = MessagePackSerializerOptions.Standard.WithCompression(MessagePackCompression.None);
var isContractless = _options.Resolver is ContractlessStandardResolver;
OptionsDescription = $"Mode={( isContractless ? "Contractless" : "ContractBased")}, Compression={_options.Compression}";
_serialized = MessagePackSerializer.Serialize(order, _options);
}
public void Warmup(int iterations)
{
for (var i = 0; i < iterations; i++)
{
Serialize();
Deserialize();
}
}
[MethodImpl(MethodImplOptions.NoInlining)]
public void Serialize() => MessagePackSerializer.Serialize(_order, _options);
[MethodImpl(MethodImplOptions.NoInlining)]
public void Deserialize() => MessagePackSerializer.Deserialize<TestOrder>(_serialized, _options);
public bool VerifyRoundTrip()
{
var bytes = MessagePackSerializer.Serialize(_order, _options);
var roundTripped = MessagePackSerializer.Deserialize<TestOrder>(bytes, _options);
return DeepEqualsViaJson(_order, roundTripped);
}
}
#endif
/// <summary>
/// Benchmarks AcBinary via the IBufferWriter overload with a pre-allocated, reused ArrayBufferWriter.
/// Realistic IBufferWriter usage pattern: caller owns + reuses the writer (zero alloc per call after warmup).
/// </summary>
/// <summary>
/// Benchmarks AcBinary via the IBufferWriter overload, allocating a FRESH ArrayBufferWriter on EVERY call.
/// One-shot scenario — represents code that doesn't reuse a writer across calls.
/// Uses BufferWriterChunkSize=4096 (production-realistic, SignalR-aligned) instead of the 65535 default —
/// otherwise AcBinary would request 64KB upfront via GetSpan(), forcing the fresh ABW to allocate 64KB
/// regardless of payload size (heavy over-allocation for small payloads).
/// </summary>
private sealed class AcBinaryFreshBufferWriterBenchmark : ISerializerBenchmark
{
private readonly TestOrder _order;
private readonly AcBinarySerializerOptions _options;
private readonly byte[] _serialized;
public string Engine => Configuration.EngineAcBinary;
public string IoMode => Configuration.IoBufWrNew;
public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime;
public string OptionsPreset { get; }
public int SerializedSize => _serialized.Length;
public long SetupSerializeAllocBytes => 0;
public long SetupDeserializeAllocBytes => 0;
public string OptionsDescription => BuildAcBinaryOptionsDescription(_options, $", BufferSize={_options.BufferWriterChunkSize}B");
public AcBinaryFreshBufferWriterBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset)
{
_order = order;
// BufferWriterChunkSize comes from the caller (central source of truth in CreateSerializers
// — the binaryFastMode4KbChunk options instance). Do NOT mutate _options here; tune the chunk
// size in CreateSerializers only.
_options = options;
OptionsPreset = optionsPreset;
_serialized = AcBinarySerializer.Serialize(order, _options);
}
public void Warmup(int iterations)
{
for (var i = 0; i < iterations; i++)
{
Serialize();
Deserialize();
}
}
[MethodImpl(MethodImplOptions.NoInlining)]
public void Serialize()
{
var abw = new ArrayBufferWriter<byte>(); // FRESH every call — alloc + grow as needed
AcBinarySerializer.Serialize(_order, abw, _options);
}
// BufWr semantic: read from a ReadOnlySequence<byte> (the ROS overload), NOT from byte[] —
// single-segment array-backed sequence triggers the fast-path in AcBinaryDeserializer.cs:298 which
// redirects to the byte[] overload. This means the bench actually exercises the ROS-input path
// (the production-realistic surface for SignalR / Pipe consumers) rather than secretly testing
// byte[] Deser under the BufWr label.
[MethodImpl(MethodImplOptions.NoInlining)]
public void Deserialize() => AcBinaryDeserializer.Deserialize<TestOrder>(new ReadOnlySequence<byte>(_serialized), _options);
public bool VerifyRoundTrip()
{
var abw = new ArrayBufferWriter<byte>();
AcBinarySerializer.Serialize(_order, abw, _options);
var roundTripped = AcBinaryDeserializer.Deserialize<TestOrder>(new ReadOnlySequence<byte>(abw.WrittenMemory), _options);
return DeepEqualsViaJson(_order, roundTripped);
}
}
/// <summary>
/// Benchmarks AcBinary over a long-lived NamedPipe IPC connection using the AcBinary native streaming API
/// (<see cref="AcBinarySerializer.SerializeChunked{T}(T, System.IO.Pipelines.PipeWriter, AcBinarySerializerOptions)"/>
/// + <see cref="AsyncPipeReaderInput"/> + <see cref="AsyncPipeReaderInputExtensions.DrainFromAsync"/>).
/// Mirrors what a real consumer (e.g. <c>DeserializeFromPipeReaderAsync</c>) does per message:
/// long-lived <see cref="AsyncPipeReaderInput"/> with multi-message wire framing on top of a long-lived NamedPipe.
///
/// <para><b>Architecture</b>:</para>
/// <list type="bullet">
/// <item>Constructor (NOT timed): sets up <see cref="NamedPipeServerStream"/> + <see cref="NamedPipeClientStream"/>,
/// waits for connection, creates one long-lived <see cref="System.IO.Pipelines.PipeWriter"/> /
/// <see cref="System.IO.Pipelines.PipeReader"/> pair, ONE long-lived <see cref="AsyncPipeReaderInput"/>
/// in <c>multiMessage = true</c> mode, ONE drain Task that pumps <see cref="AsyncPipeReaderInputExtensions.DrainFromAsync"/>
/// forever, and ONE deserialize Task that loops <c>AcBinaryDeserializer.Deserialize&lt;T&gt;(input, opts)</c>
/// producing into a <see cref="System.Threading.Channels.Channel{T}"/>.</item>
/// <item>Per-iteration <see cref="Serialize"/> (timed): sender writes via
/// <see cref="AcBinarySerializer.SerializeChunkedFramed{T}(T, System.IO.Pipelines.PipeWriter, AcBinarySerializerOptions)"/>
/// — multi-message wire (<c>[201][UINT16][data]...[202]</c>); the <c>[202]</c> end marker arms the input's
/// <c>_readPos = -1</c> sentinel, so the next message's first <c>AppendToBuffer</c> recycles the buffer to 0.
/// Then receiver awaits the channel for the deserialized result.</item>
/// <item><see cref="Deserialize"/> is a no-op (full round-trip captured in <see cref="Serialize"/>);
/// <see cref="IsRoundTripOnly"/>=true → Ser ms / SerAlloc oszlopok N/A, RT ms = full round-trip.</item>
/// </list>
///
/// <para><b>Per-iter overhead</b>: 0 new <c>Task.Run</c>, 0 new <c>AsyncPipeReaderInput</c>, 0 new <c>CancellationTokenSource</c>.
/// Pure cost = <c>SerializeChunkedFramed</c> (CPU + chunk-onkénti flush) + kernel write/read syscalls + 1 sync barrier
/// (channel) + deserialized graph alloc. The "multi-message reuse" pattern enabled by Q4T8 fix (R5K2 minimum: <c>_readPos = -1</c>
/// sentinel + <c>AppendToBuffer</c> sliding-window cycling).</para>
///
/// <para><b>Approximation note</b>: single-process loopback NamedPipe. Real cross-process / cross-machine SignalR
/// adds further transport latency (TCP, WebSocket framing) on top. The benchmark gives a lower bound.</para>
/// </summary>
private sealed class AcBinaryNamedPipeBenchmark : ISerializerBenchmark, IDisposable
{
private readonly TestOrder _order;
private readonly AcBinarySerializerOptions _options;
private readonly byte[] _serialized; // for SerializedSize reporting only
// Long-lived pipe lifecycle (set up once in ctor — NOT timed).
private readonly NamedPipeServerStream _pipeServer;
private readonly NamedPipeClientStream _pipeClient;
private readonly PipeWriter _pipeWriter;
private readonly PipeReader _pipeReader;
// Long-lived multi-message receive infrastructure (set up once in ctor).
private readonly AsyncPipeReaderInput _input;
private readonly CancellationTokenSource _cts;
private readonly Task _drainTask; // BG: PipeReader → input.Feed (continuous pump)
private readonly Task _consumerTask; // BG: per-iter Deserialize<T>(input) loop, signaled by calling thread
private readonly ManualResetEventSlim _consumeRequest = new(false);
private readonly ManualResetEventSlim _consumeDone = new(false);
private object? _lastResult; // captured during VerifyRoundTrip; null in benchmark iters
private bool _captureResult; // toggle: when true, ConsumeLoop stores result; otherwise discards
private bool _disposed;
public string Engine => Configuration.EngineAcBinary;
public string IoMode => Configuration.IoNamedPipe;
public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime;
public string OptionsPreset { get; }
public int SerializedSize => _serialized.Length;
public long SetupSerializeAllocBytes { get; }
public long SetupDeserializeAllocBytes { get; }
public bool IsRoundTripOnly => true;
public string OptionsDescription => BuildAcBinaryOptionsDescription(_options, $", BufferSize={_options.BufferWriterChunkSize}B, Transport=NamedPipe(long-lived,multiMessage,2-task)");
public AcBinaryNamedPipeBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset)
{
_order = order;
// BufferWriterChunkSize comes from the caller (central source of truth in CreateSerializers
// — the binaryFastMode4KbChunk options instance). Do NOT mutate _options here; tune the chunk
// size in CreateSerializers only.
_options = options;
OptionsPreset = optionsPreset;
_serialized = AcBinarySerializer.Serialize(order, _options);
// 1× pipe setup. Kernel-side pipe buffer (inBufferSize / outBufferSize on the server ctor — the
// client inherits the server-defined buffer size at connect time) matches BufferWriterChunkSize
// exactly: AsyncPipeWriterOutput now treats chunkSize as the chunk-on-wire total size (header +
// data), so one WriteFile(chunkSize) syscall lands in exactly one kernel-page slot — page-aligned,
// no fragmentation, no IRP reordering. _options.BufferWriterChunkSize is the single tunable source.
var pipeName = $"AcBinaryBench-{Guid.NewGuid():N}";
// === SERIALIZE-side setup measurement ===
// pipe-pair (server + client) + connect handshake + writer-side PipeWriter wrapper.
GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
var beforeSer = GC.GetAllocatedBytesForCurrentThread();
_pipeServer = new NamedPipeServerStream(pipeName, PipeDirection.In, 1, PipeTransmissionMode.Byte,
System.IO.Pipes.PipeOptions.Asynchronous,
inBufferSize: _options.BufferWriterChunkSize,
outBufferSize: _options.BufferWriterChunkSize);
_pipeClient = new NamedPipeClientStream(".", pipeName, PipeDirection.Out, System.IO.Pipes.PipeOptions.Asynchronous);
var serverWait = _pipeServer.WaitForConnectionAsync();
_pipeClient.Connect();
serverWait.GetAwaiter().GetResult();
_pipeWriter = PipeWriter.Create(_pipeClient);
var afterSer = GC.GetAllocatedBytesForCurrentThread();
SetupSerializeAllocBytes = afterSer - beforeSer;
// === DESERIALIZE-side setup measurement ===
// PipeReader wrapper + AsyncPipeReaderInput (ArrayPool rent + ManualResetEventSlim) + drain
// task + consumer task scaffolding. Two long-lived BG tasks total: drain pumps bytes from the
// kernel pipe into input; consumer drives Deserialize<T>(input) per iter on signal.
GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
var beforeDes = GC.GetAllocatedBytesForCurrentThread();
_pipeReader = PipeReader.Create(_pipeServer);
_input = new AsyncPipeReaderInput(_options.BufferWriterChunkSize * 2, multiMessage: true);
_cts = new CancellationTokenSource();
// Drain task: pumps PipeReader → input.Feed forever (or until cancel). Single Task.Run for
// the full benchmark lifetime — its overhead is amortised across all messages.
_drainTask = Task.Run(() => _input.DrainFromAsync(_pipeReader, _cts.Token));
// Consumer task: per-iter Deserialize<T>(input) loop. Started here once; signaled per-iter via
// _consumeRequest. Enables Ser↔Des streaming overlap — calling thread runs SerializeChunkedFramed
// while THIS task simultaneously runs Deserialize<T>, both consuming/producing through the
// sliding-window buffer pipelined by the drain task.
_consumerTask = Task.Run(ConsumeLoop);
var afterDes = GC.GetAllocatedBytesForCurrentThread();
SetupDeserializeAllocBytes = afterDes - beforeDes;
}
// BG consumer: parks on _consumeRequest, runs Deserialize<T>(_input) when signaled, signals _consumeDone.
// The Deserialize call internally blocks on the input's MRES whenever the drain hasn't yet fed enough
// bytes for the next read — that's where the streaming-pipeline overlap with the calling thread (Ser)
// happens.
private void ConsumeLoop()
{
var ct = _cts.Token;
try
{
while (true)
{
_consumeRequest.Wait(ct);
if (ct.IsCancellationRequested) return;
_consumeRequest.Reset();
try
{
var result = AcBinaryDeserializer.Deserialize<TestOrder>(_input, _options);
if (_captureResult) _lastResult = result;
}
catch
{
// Swallow — calling thread sees the failure via missing/incorrect _lastResult during VerifyRoundTrip,
// or the benchmark loop just continues (timing impacted). Production teardown handled in Dispose.
}
finally
{
_consumeDone.Set();
}
}
}
catch (OperationCanceledException)
{
// Cooperative cancel — Dispose path. Swallow.
}
}
public void Warmup(int iterations)
{
for (var i = 0; i < iterations; i++)
{
Serialize();
}
}
[MethodImpl(MethodImplOptions.NoInlining)]
public void Serialize()
{
// 2-task streaming pipeline:
// 1. Calling thread signals consumer task to begin Deserialize<T>(input). Consumer immediately
// starts; first read blocks on input's MRES because no bytes flowed yet.
// 2. Calling thread starts SerializeChunkedFramed → chunks flow through PipeWriter → kernel pipe →
// drain task (BG) feeds input.Feed → MRES pulses → consumer's Deserialize<T> consumes bytes
// chunk by chunk. Ser↔Des truly overlap here.
// 3. Calling thread waits for _consumeDone (signaling Deserialize<T> returned).
_consumeDone.Reset();
_consumeRequest.Set();
AcBinarySerializer.SerializeChunkedFramed(_order, _pipeWriter, _options);
_consumeDone.Wait();
}
[MethodImpl(MethodImplOptions.NoInlining)]
public void Deserialize()
{
// No-op: per-iter round-trip is captured in Serialize(). See IsRoundTripOnly contract.
}
public bool VerifyRoundTrip()
{
// Use the same 2-task streaming path as the benchmark, but capture the result for graph-equality.
_captureResult = true;
try
{
Serialize();
var result = _lastResult as TestOrder;
return result != null && DeepEqualsViaJson(_order, result);
}
finally
{
_captureResult = false;
_lastResult = null;
}
}
public void Dispose()
{
if (_disposed) return;
_disposed = true;
// Cancel drain + consumer tasks → both exit. Pulse _consumeRequest in case consumer is parked.
try { _cts.Cancel(); } catch { /* swallow on teardown */ }
try { _consumeRequest.Set(); } catch { /* nudge in case consumer Wait is parked */ }
try { _drainTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
try { _consumerTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
// Complete writer + dispose pipe lifecycle.
try { _pipeWriter.CompleteAsync().AsTask().Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
try { _pipeReader.Complete(); } catch { /* swallow on teardown */ }
try { _pipeClient.Dispose(); } catch { /* swallow on teardown */ }
try { _pipeServer.Dispose(); } catch { /* swallow on teardown */ }
try { _input.Dispose(); } catch { /* swallow on teardown */ }
try { _consumeRequest.Dispose(); } catch { /* swallow on teardown */ }
try { _consumeDone.Dispose(); } catch { /* swallow on teardown */ }
try { _cts.Dispose(); } catch { /* swallow on teardown */ }
}
}
/// <summary>
/// Same chunked-framed AsyncPipe code path as <see cref="AcBinaryNamedPipeBenchmark"/>, but the transport
/// is an in-memory <see cref="System.IO.Pipelines.Pipe"/> instead of a kernel <c>NamedPipe</c>. The Pipe's
/// <c>Writer</c>/<c>Reader</c> pair is a managed-only zero-copy slab handoff — no syscalls, no kernel
/// buffer copy, no IRP queueing.
///
/// <para><b>Why this benchmark matters</b>: by holding ALL other variables constant (same SerializeChunkedFramed,
/// same AsyncPipeReaderInput, same drain task, same consumer task, same multi-message wire format), this
/// row isolates the <b>kernel-NamedPipe transport overhead</b> from the chunked-streaming framework's pure
/// CPU cost. The expected delta vs <see cref="AcBinaryNamedPipeBenchmark"/>: per-chunk overhead drops from
/// ~25-30 µs (kernel-syscall pair + IRP) to ~1-2 µs (managed slab handoff). Multi-chunk Large-message rows
/// should converge dramatically toward <see cref="AcBinaryNamedPipeRawByteArrayBenchmark"/>.</para>
///
/// <para><b>Real-world relevance</b>: in-memory Pipe is the typical primitive used for cross-thread serializer
/// pipelines inside a single process (e.g. SignalR's Kestrel transport adapter, gRPC framework internals,
/// custom message brokers). The numbers from this row reflect that scenario, NOT the kernel-pipe loopback
/// of the NamedPipe benchmark.</para>
/// </summary>
private sealed class AcBinaryInMemoryPipeBenchmark : ISerializerBenchmark, IDisposable
{
private readonly TestOrder _order;
private readonly AcBinarySerializerOptions _options;
private readonly byte[] _serialized; // for SerializedSize reporting only
// Long-lived in-memory pipe lifecycle (set up once in ctor — NOT timed).
private readonly Pipe _pipe;
private readonly PipeWriter _pipeWriter;
private readonly PipeReader _pipeReader;
// Long-lived multi-message receive infrastructure (set up once in ctor) — same pattern as the NamedPipe
// variant: drain pumps reader into AsyncPipeReaderInput, consumer task drives Deserialize<T>(input).
private readonly AsyncPipeReaderInput _input;
private readonly CancellationTokenSource _cts;
private readonly Task _drainTask;
private readonly Task _consumerTask;
private readonly ManualResetEventSlim _consumeRequest = new(false);
private readonly ManualResetEventSlim _consumeDone = new(false);
private object? _lastResult;
private bool _captureResult;
private bool _disposed;
public string Engine => Configuration.EngineAcBinary;
public string IoMode => Configuration.IoInMemoryPipe;
public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime;
public string OptionsPreset { get; }
public int SerializedSize => _serialized.Length;
public long SetupSerializeAllocBytes { get; }
public long SetupDeserializeAllocBytes { get; }
public bool IsRoundTripOnly => true;
public string OptionsDescription => BuildAcBinaryOptionsDescription(_options, $", BufferSize={_options.BufferWriterChunkSize}B, Transport=Pipe(in-memory,multiMessage,2-task)");
public AcBinaryInMemoryPipeBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset)
{
_order = order;
_options = options;
OptionsPreset = optionsPreset;
_serialized = AcBinarySerializer.Serialize(order, _options);
// === SERIALIZE-side setup measurement ===
// In-memory Pipe construction. NO kernel-pipe pair, NO Connect handshake — just a managed Pipe object
// and a reference to its Writer side. PipeWriterImpl (parallel-flush capable, NOT StreamPipeWriter).
GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
var beforeSer = GC.GetAllocatedBytesForCurrentThread();
_pipe = new Pipe();
_pipeWriter = _pipe.Writer;
var afterSer = GC.GetAllocatedBytesForCurrentThread();
SetupSerializeAllocBytes = afterSer - beforeSer;
// === DESERIALIZE-side setup measurement ===
// PipeReader reference + AsyncPipeReaderInput (ArrayPool rent + ManualResetEventSlim) + drain task +
// consumer task scaffolding. Identical to the NamedPipe variant on the receive side.
GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
var beforeDes = GC.GetAllocatedBytesForCurrentThread();
_pipeReader = _pipe.Reader;
_input = new AsyncPipeReaderInput(_options.BufferWriterChunkSize * 2, multiMessage: true);
_cts = new CancellationTokenSource();
_drainTask = Task.Run(() => _input.DrainFromAsync(_pipeReader, _cts.Token));
_consumerTask = Task.Run(ConsumeLoop);
var afterDes = GC.GetAllocatedBytesForCurrentThread();
SetupDeserializeAllocBytes = afterDes - beforeDes;
}
// BG consumer: parks on _consumeRequest, runs Deserialize<T>(_input) when signaled, signals _consumeDone.
// Mirror of AcBinaryNamedPipeBenchmark.ConsumeLoop — same pattern, same MRES protocol.
private void ConsumeLoop()
{
var ct = _cts.Token;
try
{
while (true)
{
_consumeRequest.Wait(ct);
if (ct.IsCancellationRequested) return;
_consumeRequest.Reset();
try
{
var result = AcBinaryDeserializer.Deserialize<TestOrder>(_input, _options);
if (_captureResult) _lastResult = result;
}
catch
{
// Swallow — see ConsumeLoop in NamedPipe variant for rationale.
}
finally
{
_consumeDone.Set();
}
}
}
catch (OperationCanceledException)
{
// Cooperative cancel — Dispose path. Swallow.
}
}
public void Warmup(int iterations)
{
for (var i = 0; i < iterations; i++) Serialize();
}
[MethodImpl(MethodImplOptions.NoInlining)]
public void Serialize()
{
// Same 2-task streaming pipeline as NamedPipe variant — only the transport differs (in-memory Pipe
// instead of kernel NamedPipe). Per-chunk SerializeChunkedFramed → PipeWriter slab → drain task
// reads from PipeReader → input.Feed → consumer Deserialize<T> consumes byte-by-byte.
//
// Uses the Pipe-overload (instead of the PipeWriter-overload) so the FlushPolicy parameter is
// exposed for tuning. Toggle between FlushPolicy.PerChunk (bounded peak memory, per-chunk await
// FlushAsync) and FlushPolicy.Coalesced (fire-and-forget per chunk, pipe-coalesced flushes up to
// PauseWriterThreshold ~64 KB) to A/B-test the streaming-pipeline overhead. FlushPolicy.PerChunk
// is functionally equivalent to the PipeWriter-overload (both internally route to
// SerializeToPipeWriterCore with FlushPolicy.PerChunk).
_consumeDone.Reset();
_consumeRequest.Set();
AcBinarySerializer.SerializeChunkedFramed(_order, _pipe, _options, FlushPolicy.Coalesced);
_consumeDone.Wait();
}
[MethodImpl(MethodImplOptions.NoInlining)]
public void Deserialize()
{
// No-op: per-iter round-trip is captured in Serialize(). See IsRoundTripOnly contract.
}
public bool VerifyRoundTrip()
{
_captureResult = true;
try
{
Serialize();
var result = _lastResult as TestOrder;
return result != null && DeepEqualsViaJson(_order, result);
}
finally
{
_captureResult = false;
_lastResult = null;
}
}
public void Dispose()
{
if (_disposed) return;
_disposed = true;
// Cancel drain + consumer tasks → both exit. Pulse _consumeRequest in case consumer is parked.
try { _cts.Cancel(); } catch { /* swallow on teardown */ }
try { _consumeRequest.Set(); } catch { /* nudge in case consumer Wait is parked */ }
try { _drainTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
try { _consumerTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
// Complete writer + reader (in-memory Pipe — no underlying stream to dispose).
try { _pipeWriter.CompleteAsync().AsTask().Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
try { _pipeReader.Complete(); } catch { /* swallow on teardown */ }
try { _input.Dispose(); } catch { /* swallow on teardown */ }
try { _consumeRequest.Dispose(); } catch { /* swallow on teardown */ }
try { _consumeDone.Dispose(); } catch { /* swallow on teardown */ }
try { _cts.Dispose(); } catch { /* swallow on teardown */ }
}
}
/// <summary>
/// Raw <c>byte[]</c> over a long-lived NamedPipe — NO chunk-framing, NO <c>AsyncPipeReaderInput</c>,
/// NO sliding-window buffer. Calling thread serialises + writes; a long-lived background consumer task
/// reads and deserialises. Two-task pattern enables Ser↔Read overlap (kernel-pipe-pipelined) AND
/// avoids the kernel-buffer-full deadlock when <c>bytes.Length &gt; inBufferSize</c>.
///
/// Side-by-side with <see cref="AcBinaryNamedPipeBenchmark"/> (chunked-framed AsyncPipe stack) this
/// isolates two cost components on the SAME kernel-pipe transport with the SAME <c>inBufferSize</c>:
/// <list type="bullet">
/// <item><description><b>This row vs <see cref="AcBinaryBenchmark"/> (Byte[])</b> — pure kernel-NamedPipe
/// overhead (WriteFile / ReadFile syscalls + IRP queueing + buffer-copy + thread-handoff).</description></item>
/// <item><description><b>This row vs <see cref="AcBinaryNamedPipeBenchmark"/> (chunked-framed)</b> — pure
/// AsyncPipe-framework overhead (chunk header writes + sliding-window <c>Feed</c> + MRES wait inside
/// <c>AsyncPipeReaderInput</c>) AND the streaming-pipeline benefit of intra-message Ser↔Des overlap (which
/// raw lacks — raw can only Ser↔Read overlap, with Des sequential after Read completes).</description></item>
/// </list>
/// Per-iter <c>byte[]</c> allocation from <c>AcBinarySerializer.Serialize</c> is part of the cost (matches
/// <see cref="AcBinaryBenchmark"/>'s API contract); the receive-side scratch buffer is also allocated per-iter
/// on the consumer-task (counted via <c>GC.GetTotalAllocatedBytes</c> in <c>MeasureAllocationTotal</c>).
/// </summary>
private sealed class AcBinaryNamedPipeRawByteArrayBenchmark : ISerializerBenchmark, IDisposable
{
private readonly TestOrder _order;
private readonly AcBinarySerializerOptions _options;
private readonly byte[] _serialized; // for SerializedSize reporting + receive-side size known upfront
// Long-lived pipe lifecycle (set up once in ctor — NOT timed).
private readonly NamedPipeServerStream _pipeServer;
private readonly NamedPipeClientStream _pipeClient;
// Long-lived consumer-task infrastructure (Read + Deserialize on BG thread, signaled per iter).
// Mirrors AcBinaryNamedPipeBenchmark's drain+consumer pair, but raw byte[] doesn't have an
// intermediate sliding-window buffer, so Read+Des happen sequentially in one BG task: Read N bytes
// → Deserialize<T>(bytes) → signal done. Calling thread's Ser↔Write overlaps with this BG Read+Des
// through kernel-pipe pipelining.
private readonly CancellationTokenSource _cts;
private readonly Task _consumerTask;
private readonly ManualResetEventSlim _consumeRequest = new(false);
private readonly ManualResetEventSlim _consumeDone = new(false);
private int _pendingReadSize;
private object? _lastResult; // captured during VerifyRoundTrip; null in benchmark iters
private bool _captureResult; // toggle: when true, ConsumerLoop stores result; otherwise discards
private bool _disposed;
public string Engine => Configuration.EngineAcBinary;
public string IoMode => Configuration.IoNamedPipeRaw;
public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime;
public string OptionsPreset { get; }
public int SerializedSize => _serialized.Length;
public long SetupSerializeAllocBytes { get; }
public long SetupDeserializeAllocBytes { get; }
public bool IsRoundTripOnly => true;
public string OptionsDescription => BuildAcBinaryOptionsDescription(_options, $", BufferSize={_options.BufferWriterChunkSize}B, Transport=NamedPipe(raw,2-task)");
public AcBinaryNamedPipeRawByteArrayBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset)
{
_order = order;
// BufferWriterChunkSize comes from the caller — same source-of-truth contract as
// AcBinaryNamedPipeBenchmark. The kernel pipe-buffer (inBufferSize) is wired to it so the
// raw-vs-chunked comparison runs on identical transport conditions.
_options = options;
OptionsPreset = optionsPreset;
_serialized = AcBinarySerializer.Serialize(order, _options);
var pipeName = $"AcBinaryBenchRaw-{Guid.NewGuid():N}";
// === SERIALIZE-side setup measurement ===
// pipe-pair (server + client) + connect handshake. NO PipeWriter wrapper — we use the raw
// Stream.Write API directly, matching the no-framing semantics of this benchmark.
GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
var beforeSer = GC.GetAllocatedBytesForCurrentThread();
_pipeServer = new NamedPipeServerStream(pipeName, PipeDirection.In, 1, PipeTransmissionMode.Byte,
System.IO.Pipes.PipeOptions.Asynchronous,
inBufferSize: _options.BufferWriterChunkSize,
outBufferSize: _options.BufferWriterChunkSize);
_pipeClient = new NamedPipeClientStream(".", pipeName, PipeDirection.Out, System.IO.Pipes.PipeOptions.Asynchronous);
var serverWait = _pipeServer.WaitForConnectionAsync();
_pipeClient.Connect();
serverWait.GetAwaiter().GetResult();
var afterSer = GC.GetAllocatedBytesForCurrentThread();
SetupSerializeAllocBytes = afterSer - beforeSer;
// === DESERIALIZE-side setup measurement ===
// 1× background consumer-task + 2× MRES (request / done) + cancellation source. Matches the
// chunked benchmark's deserialize-side setup cost shape.
GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
var beforeDes = GC.GetAllocatedBytesForCurrentThread();
_cts = new CancellationTokenSource();
_consumerTask = Task.Run(ConsumerLoop);
var afterDes = GC.GetAllocatedBytesForCurrentThread();
SetupDeserializeAllocBytes = afterDes - beforeDes;
}
// BG consumer: parks on _consumeRequest, reads N bytes from pipe, runs Deserialize<T>(bytes), signals
// _consumeDone. The Read overlaps with the calling thread's Write through the kernel-pipe; Des happens
// sequentially after Read completes (raw byte[] needs the full message to deserialize).
private void ConsumerLoop()
{
var ct = _cts.Token;
try
{
while (true)
{
_consumeRequest.Wait(ct);
if (ct.IsCancellationRequested) return;
_consumeRequest.Reset();
try
{
var size = _pendingReadSize;
var bytes = new byte[size]; // per-iter alloc — counted by MeasureAllocationTotal
var totalRead = 0;
while (totalRead < size)
{
var n = _pipeServer.Read(bytes, totalRead, size - totalRead);
if (n == 0) break; // pipe closed / EOF — partial read swallowed
totalRead += n;
}
var result = AcBinaryDeserializer.Deserialize<TestOrder>(bytes, _options);
if (_captureResult) _lastResult = result;
}
catch
{
// Swallow — calling thread sees the failure via missing/incorrect _lastResult during VerifyRoundTrip,
// or the benchmark loop just continues (timing impacted). Production teardown handled in Dispose.
}
finally
{
_consumeDone.Set();
}
}
}
catch (OperationCanceledException)
{
// Cooperative cancel — Dispose path. Swallow.
}
}
public void Warmup(int iterations)
{
for (var i = 0; i < iterations; i++)
{
Serialize();
}
}
[MethodImpl(MethodImplOptions.NoInlining)]
public void Serialize()
{
// 2-task streaming pipeline:
// 1. Calling thread serialises → fresh byte[] (per-iter alloc, matches AcBinaryBenchmark contract).
// 2. Calling thread hands off expected size + signals consumer task. Consumer task starts Read loop
// on the pipe (BG thread). Calling thread proceeds to Write the bytes — Read and Write overlap
// through the kernel-pipe (kernel buffer fills, drains as consumer reads, sender resumes).
// 3. Calling thread waits for _consumeDone (consumer task finished Read+Des).
//
// Note: unlike chunked, raw byte[] cannot do Ser↔Des overlap (Des needs the full bytes before
// starting). Only Write↔Read overlaps here. The Des sequence on BG thread is: Read full bytes →
// Des the full graph → signal done. This is the architectural difference between raw and chunked.
var bytes = AcBinarySerializer.Serialize(_order, _options);
_pendingReadSize = bytes.Length;
_consumeDone.Reset();
_consumeRequest.Set();
_pipeClient.Write(bytes, 0, bytes.Length);
_pipeClient.Flush();
_consumeDone.Wait();
}
[MethodImpl(MethodImplOptions.NoInlining)]
public void Deserialize()
{
// No-op: per-iter round-trip is captured in Serialize(). See IsRoundTripOnly contract.
}
public bool VerifyRoundTrip()
{
// Use the same 2-task streaming path as the benchmark, but capture the result for graph-equality.
_captureResult = true;
try
{
Serialize();
var result = _lastResult as TestOrder;
return result != null && DeepEqualsViaJson(_order, result);
}
finally
{
_captureResult = false;
_lastResult = null;
}
}
public void Dispose()
{
if (_disposed) return;
_disposed = true;
// Cancel the consumer task → ConsumerLoop exits its Wait via OperationCanceledException.
try { _cts.Cancel(); } catch { /* swallow on teardown */ }
try { _consumeRequest.Set(); } catch { /* nudge in case consumer Wait is parked */ }
try { _consumerTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
// Symmetric teardown — close client first (writer side), then server.
try { _pipeClient.Dispose(); } catch { /* swallow on teardown */ }
try { _pipeServer.Dispose(); } catch { /* swallow on teardown */ }
try { _consumeRequest.Dispose(); } catch { /* swallow on teardown */ }
try { _consumeDone.Dispose(); } catch { /* swallow on teardown */ }
try { _cts.Dispose(); } catch { /* swallow on teardown */ }
}
}
/// <summary>
/// Raw <c>byte[]</c> over an in-memory cross-thread handoff — NO transport (no NamedPipe, no Pipe, no
/// Channel<see langword="&lt;T&gt;"/>). Calling thread serialises into a fresh <c>byte[]</c>, hands it to a
/// background consumer task via a single byte[] slot + MRES pair; the consumer deserialises and signals done.
///
/// <para><b>Why this benchmark matters</b>: completes the 2x2 transport × wire-format matrix:</para>
/// <list type="bullet">
/// <item><description><b>NamedPipe + Chunked</b> = <see cref="AcBinaryNamedPipeBenchmark"/></description></item>
/// <item><description><b>NamedPipe + Raw</b> = <see cref="AcBinaryNamedPipeRawByteArrayBenchmark"/></description></item>
/// <item><description><b>In-memory Pipe + Chunked</b> = <see cref="AcBinaryInMemoryPipeBenchmark"/></description></item>
/// <item><description><b>In-memory + Raw</b> = THIS row — apples-to-apples baseline for the in-memory chunked row</description></item>
/// </list>
/// <para>Side-by-side with <see cref="AcBinaryInMemoryPipeBenchmark"/> this isolates the chunked-streaming
/// framework's pure CPU cost, with the same in-memory transport (zero kernel involvement) on both sides.
/// Side-by-side with <see cref="AcBinaryNamedPipeRawByteArrayBenchmark"/> this isolates the kernel-NamedPipe
/// overhead on the raw-byte[] side.</para>
/// </summary>
private sealed class AcBinaryInMemoryRawByteArrayBenchmark : ISerializerBenchmark, IDisposable
{
private readonly TestOrder _order;
private readonly AcBinarySerializerOptions _options;
private readonly byte[] _serialized; // for SerializedSize reporting only
// Long-lived consumer-task infrastructure (Deserialize on BG thread, signaled per iter).
// No transport — just a byte[] slot for handoff between calling thread and consumer task.
private readonly CancellationTokenSource _cts;
private readonly Task _consumerTask;
private readonly ManualResetEventSlim _consumeRequest = new(false);
private readonly ManualResetEventSlim _consumeDone = new(false);
private byte[]? _pendingBytes; // calling thread → consumer task handoff slot
private object? _lastResult; // captured during VerifyRoundTrip; null in benchmark iters
private bool _captureResult;
private bool _disposed;
public string Engine => Configuration.EngineAcBinary;
public string IoMode => Configuration.IoInMemoryRaw;
public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime;
public string OptionsPreset { get; }
public int SerializedSize => _serialized.Length;
public long SetupSerializeAllocBytes { get; }
public long SetupDeserializeAllocBytes { get; }
public bool IsRoundTripOnly => true;
public string OptionsDescription => BuildAcBinaryOptionsDescription(_options, $", BufferSize={_options.BufferWriterChunkSize}B, Transport=in-memory(raw,2-task)");
public AcBinaryInMemoryRawByteArrayBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset)
{
_order = order;
_options = options;
OptionsPreset = optionsPreset;
_serialized = AcBinarySerializer.Serialize(order, _options);
// === SERIALIZE-side setup measurement ===
// Nothing to set up — calling thread allocates byte[] per iter via AcBinarySerializer.Serialize.
SetupSerializeAllocBytes = 0;
// === DESERIALIZE-side setup measurement ===
// 1× background consumer-task + 2× MRES (request / done) + cancellation source.
GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
var beforeDes = GC.GetAllocatedBytesForCurrentThread();
_cts = new CancellationTokenSource();
_consumerTask = Task.Run(ConsumerLoop);
var afterDes = GC.GetAllocatedBytesForCurrentThread();
SetupDeserializeAllocBytes = afterDes - beforeDes;
}
// BG consumer: parks on _consumeRequest, picks up the byte[] from _pendingBytes, runs Deserialize<T>(bytes),
// signals _consumeDone. Direct in-process handoff — no transport syscall, no buffer copy beyond the byte[]
// reference itself (zero-copy by reference).
private void ConsumerLoop()
{
var ct = _cts.Token;
try
{
while (true)
{
_consumeRequest.Wait(ct);
if (ct.IsCancellationRequested) return;
_consumeRequest.Reset();
try
{
var bytes = _pendingBytes;
if (bytes != null)
{
var result = AcBinaryDeserializer.Deserialize<TestOrder>(bytes, _options);
if (_captureResult) _lastResult = result;
}
}
catch
{
// Swallow — see ConsumerLoop in NamedPipe variant for rationale.
}
finally
{
_consumeDone.Set();
}
}
}
catch (OperationCanceledException)
{
// Cooperative cancel — Dispose path. Swallow.
}
}
public void Warmup(int iterations)
{
for (var i = 0; i < iterations; i++) Serialize();
}
[MethodImpl(MethodImplOptions.NoInlining)]
public void Serialize()
{
// 2-task in-memory pipeline:
// 1. Calling thread serialises → fresh byte[] (per-iter alloc, matches AcBinaryBenchmark contract).
// 2. Calling thread parks the byte[] into _pendingBytes and signals consumer task. Consumer task
// picks up the reference (zero-copy) and runs Deserialize<T>(bytes).
// 3. Calling thread waits for _consumeDone (consumer task finished Des).
//
// Same architectural limitation as the NamedPipe-raw variant: Des cannot start until full bytes
// are available. Only the per-iter Ser↔Des thread-handoff overlaps slightly (calling thread starts
// signalling and waiting while consumer thread takes the byte[]).
var bytes = AcBinarySerializer.Serialize(_order, _options);
_pendingBytes = bytes;
_consumeDone.Reset();
_consumeRequest.Set();
_consumeDone.Wait();
}
[MethodImpl(MethodImplOptions.NoInlining)]
public void Deserialize()
{
// No-op: per-iter round-trip is captured in Serialize(). See IsRoundTripOnly contract.
}
public bool VerifyRoundTrip()
{
_captureResult = true;
try
{
Serialize();
var result = _lastResult as TestOrder;
return result != null && DeepEqualsViaJson(_order, result);
}
finally
{
_captureResult = false;
_lastResult = null;
}
}
public void Dispose()
{
if (_disposed) return;
_disposed = true;
try { _cts.Cancel(); } catch { /* swallow on teardown */ }
try { _consumeRequest.Set(); } catch { /* nudge in case consumer Wait is parked */ }
try { _consumerTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
try { _consumeRequest.Dispose(); } catch { /* swallow on teardown */ }
try { _consumeDone.Dispose(); } catch { /* swallow on teardown */ }
try { _cts.Dispose(); } catch { /* swallow on teardown */ }
}
}
/// <summary>
/// Benchmarks MemoryPack via the IBufferWriter overload, allocating a FRESH ArrayBufferWriter on EVERY call.
/// Apples-to-apples counterpart to AcBinaryFreshBufferWriterBenchmark.
/// </summary>
private sealed class MemoryPackFreshBufferWriterBenchmark : ISerializerBenchmark
{
private readonly TestOrder _order;
private readonly MemoryPackSerializerOptions _options;
private readonly byte[] _serialized;
public string Engine => Configuration.EngineMemoryPack;
public string IoMode => Configuration.IoBufWrNew;
public string DispatchMode => Configuration.ModeSGen; // MemoryPack always uses [MemoryPackable] source-generated formatters
public string OptionsPreset { get; }
public int SerializedSize => _serialized.Length;
public long SetupSerializeAllocBytes => 0;
public long SetupDeserializeAllocBytes => 0;
public string? OptionsDescription => $"StringEncoding={_options.StringEncoding}";
public MemoryPackFreshBufferWriterBenchmark(TestOrder order, string optionsPreset)
{
_order = order;
OptionsPreset = optionsPreset;
_options = GetMemPackOptions();
_serialized = MemoryPackSerializer.Serialize(order, _options);
}
public void Warmup(int iterations)
{
for (var i = 0; i < iterations; i++)
{
Serialize();
Deserialize();
}
}
[MethodImpl(MethodImplOptions.NoInlining)]
public void Serialize()
{
var abw = new ArrayBufferWriter<byte>();
MemoryPackSerializer.Serialize(abw, _order, _options);
}
// BufWr semantic: read from a ReadOnlySequence<byte> overload (apples-to-apples with AcBinary's
// BufWr Deser path). MemoryPack's ROS overload also single-segment-fast-paths internally.
[MethodImpl(MethodImplOptions.NoInlining)]
public void Deserialize() => MemoryPackSerializer.Deserialize<TestOrder>(new ReadOnlySequence<byte>(_serialized), _options);
public bool VerifyRoundTrip()
{
var abw = new ArrayBufferWriter<byte>();
MemoryPackSerializer.Serialize(abw, _order, _options);
var roundTripped = MemoryPackSerializer.Deserialize<TestOrder>(new ReadOnlySequence<byte>(abw.WrittenMemory), _options);
return DeepEqualsViaJson(_order, roundTripped);
}
}
private sealed class AcBinaryBufferWriterBenchmark : ISerializerBenchmark
{
private readonly TestOrder _order;
private readonly AcBinarySerializerOptions _options;
private readonly byte[] _serialized;
private readonly ArrayBufferWriter<byte> _bufferWriter;
public string Engine => Configuration.EngineAcBinary;
public string IoMode => Configuration.IoBufWrReuse;
public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime;
public string OptionsPreset { get; }
public int SerializedSize => _serialized.Length;
public long SetupSerializeAllocBytes { get; }
public long SetupDeserializeAllocBytes => 0;
public string OptionsDescription => BuildAcBinaryOptionsDescription(_options);
public AcBinaryBufferWriterBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset)
{
_order = order;
_options = options;
OptionsPreset = optionsPreset;
_serialized = AcBinarySerializer.Serialize(order, options);
// Measure ONLY the BufferWriter infrastructure setup on the serialize side (excluding the
// helper Serialize above). Deserialize side reads directly from `_serialized` byte[] — no
// dedicated setup allocation, hence SetupDeserializeAllocBytes = 0.
GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
var beforeSetup = GC.GetAllocatedBytesForCurrentThread();
_bufferWriter = new ArrayBufferWriter<byte>(_serialized.Length * 2);
var afterSetup = GC.GetAllocatedBytesForCurrentThread();
SetupSerializeAllocBytes = afterSetup - beforeSetup;
}
public void Warmup(int iterations)
{
for (var i = 0; i < iterations; i++)
{
Serialize();
Deserialize();
}
}
[MethodImpl(MethodImplOptions.NoInlining)]
public void Serialize()
{
_bufferWriter.ResetWrittenCount(); // reuse — no alloc, no zeroing
AcBinarySerializer.Serialize(_order, _bufferWriter, _options);
}
// BufWr semantic: read from a ReadOnlySequence<byte> (the ROS overload), NOT from byte[] —
// single-segment array-backed sequence triggers the fast-path in AcBinaryDeserializer.cs:298 which
// redirects to the byte[] overload. This means the bench actually exercises the ROS-input path
// (the production-realistic surface for SignalR / Pipe consumers) rather than secretly testing
// byte[] Deser under the BufWr label.
[MethodImpl(MethodImplOptions.NoInlining)]
public void Deserialize() => AcBinaryDeserializer.Deserialize<TestOrder>(new ReadOnlySequence<byte>(_serialized), _options);
public bool VerifyRoundTrip()
{
_bufferWriter.ResetWrittenCount();
AcBinarySerializer.Serialize(_order, _bufferWriter, _options);
var roundTripped = AcBinaryDeserializer.Deserialize<TestOrder>(new ReadOnlySequence<byte>(_bufferWriter.WrittenMemory), _options);
return DeepEqualsViaJson(_order, roundTripped);
}
}
/// <summary>
/// Benchmarks MemoryPack via the IBufferWriter overload with a pre-allocated, reused ArrayBufferWriter.
/// Apples-to-apples counterpart to AcBinaryBufferWriterBenchmark — MemoryPack's IBufferWriter is the path it's designed for.
/// </summary>
private sealed class MemoryPackBufferWriterBenchmark : ISerializerBenchmark
{
private readonly TestOrder _order;
private readonly MemoryPackSerializerOptions _options;
private readonly byte[] _serialized;
private readonly ArrayBufferWriter<byte> _bufferWriter;
public string Engine => Configuration.EngineMemoryPack;
public string IoMode => Configuration.IoBufWrReuse;
public string DispatchMode => Configuration.ModeSGen; // MemoryPack always uses [MemoryPackable] source-generated formatters
public string OptionsPreset { get; }
public int SerializedSize => _serialized.Length;
public long SetupSerializeAllocBytes { get; }
public long SetupDeserializeAllocBytes => 0;
public string? OptionsDescription => $"StringEncoding={_options.StringEncoding}";
public MemoryPackBufferWriterBenchmark(TestOrder order, string optionsPreset)
{
_order = order;
OptionsPreset = optionsPreset;
_options = GetMemPackOptions();
_serialized = MemoryPackSerializer.Serialize(order, _options);
// Serialize-side setup only — see AcBinaryBufferWriterBenchmark for the full rationale.
GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
var beforeSetup = GC.GetAllocatedBytesForCurrentThread();
_bufferWriter = new ArrayBufferWriter<byte>(_serialized.Length * 2);
var afterSetup = GC.GetAllocatedBytesForCurrentThread();
SetupSerializeAllocBytes = afterSetup - beforeSetup;
}
public void Warmup(int iterations)
{
for (var i = 0; i < iterations; i++)
{
Serialize();
Deserialize();
}
}
[MethodImpl(MethodImplOptions.NoInlining)]
public void Serialize()
{
_bufferWriter.ResetWrittenCount();
MemoryPackSerializer.Serialize(_bufferWriter, _order, _options);
}
// BufWr semantic: read from a ReadOnlySequence<byte> overload (apples-to-apples with AcBinary's
// BufWr Deser path). MemoryPack's ROS overload also single-segment-fast-paths internally.
[MethodImpl(MethodImplOptions.NoInlining)]
public void Deserialize() => MemoryPackSerializer.Deserialize<TestOrder>(new ReadOnlySequence<byte>(_serialized), _options);
public bool VerifyRoundTrip()
{
_bufferWriter.ResetWrittenCount();
MemoryPackSerializer.Serialize(_bufferWriter, _order, _options);
var roundTripped = MemoryPackSerializer.Deserialize<TestOrder>(new ReadOnlySequence<byte>(_bufferWriter.WrittenMemory), _options);
return DeepEqualsViaJson(_order, roundTripped);
}
}
private sealed class SystemTextJsonBenchmark : ISerializerBenchmark
{
private readonly TestOrder _order;
private readonly JsonSerializerOptions _options;
private readonly string _serialized;
private readonly byte[] _serializedUtf8;
public string Engine => Configuration.EngineSystemTextJson;
public string IoMode => Configuration.IoString;
public string DispatchMode => Configuration.ModeRuntime; // System.Text.Json default uses reflection-based metadata (no source generator opt-in here)
public string OptionsPreset { get; }
public int SerializedSize => _serializedUtf8.Length;
public long SetupSerializeAllocBytes => 0;
public long SetupDeserializeAllocBytes => 0;
public SystemTextJsonBenchmark(TestOrder order, string optionsPreset)
{
_order = order;
OptionsPreset = optionsPreset;
_options = new JsonSerializerOptions
{
WriteIndented = false,
DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull,
ReferenceHandler = System.Text.Json.Serialization.ReferenceHandler.IgnoreCycles
};
_serialized = JsonSerializer.Serialize(order, _options);
_serializedUtf8 = Configuration.Utf8NoBom.GetBytes(_serialized);
}
public void Warmup(int iterations)
{
for (var i = 0; i < iterations; i++)
{
Serialize();
Deserialize();
}
}
[MethodImpl(MethodImplOptions.NoInlining)]
public void Serialize() => JsonSerializer.Serialize(_order, _options);
[MethodImpl(MethodImplOptions.NoInlining)]
public void Deserialize() => JsonSerializer.Deserialize<TestOrder>(_serialized, _options);
public bool VerifyRoundTrip()
{
var json = JsonSerializer.Serialize(_order, _options);
var roundTripped = JsonSerializer.Deserialize<TestOrder>(json, _options);
return DeepEqualsViaJson(_order, roundTripped);
}
}
#endregion
#region Results
private sealed class BenchmarkResult
{
public string TestDataName { get; set; } = "";
public string Engine { get; set; } = "";
public string IoMode { get; set; } = "";
public string DispatchMode { get; set; } = "";
public string OptionsPreset { get; set; } = "";
/// <summary>True if Serialize() captures a full round-trip and Deserialize() is a no-op
/// (single-use streaming transports like NamedPipe). Excluded from "Fastest Serialize" / "Fastest Deserialize"
/// winners rankings; still ranked in "Fastest Round-trip". Display-side: Ser µs/op / SerAlloc / Des µs/op / DesAlloc
/// all show "N/A" since they were never measured separately; RT µs/op / RT Alloc carry the full round-trip values.</summary>
public bool IsRoundTripOnly { get; set; }
/// <summary>Synthesized display name for backwards compatibility / single-string-row scenarios. Includes DispatchMode so SGen and Runtime variants of the same preset don't collide in grouping (e.g. SUMMARY: WINNERS).</summary>
public string SerializerName => $"{Engine} ({IoMode}, {OptionsPreset}, {DispatchMode})";
public string? OptionsDescription { get; set; }
public int SerializedSize { get; set; }
public double SerializeTimeMs { get; set; }
public double DeserializeTimeMs { get; set; }
// Per-sample min/max alongside the median (median is the *Time*Ms field above). Surfaces
// inter-sample range — the visible noise floor for the row. 0 when the operation was skipped
// (mode != "all"/"ser"/"des") or when a single-sample fast path was used (min == max == median).
public double SerializeTimeMinMs { get; set; }
public double SerializeTimeMaxMs { get; set; }
public double DeserializeTimeMinMs { get; set; }
public double DeserializeTimeMaxMs { get; set; }
// Sample-population stddev (ms). Used by FormatMicrosWithRange to compute CV (stddev/mean)
// and emit the ⚠️ marker on rows above Configuration.UnstableCVThreshold. 0 in single-sample mode.
public double SerializeTimeStdDevMs { get; set; }
public double DeserializeTimeStdDevMs { get; set; }
// Per-row adaptive iteration count (post-CalibrateIterations). Each Ser and Des function calibrates
// independently to land its sample window at ~Configuration.TargetSampleMs; per-op µs is then iter-independent
// (`SerializeTimeMs / SerializeIterations * 1000`). For round-trip-only rows (NamedPipe etc.),
// RoundTripIterations carries the calibrated iter count; SerializeIterations and DeserializeIterations
// stay 0 (Ser and Des are not separately measurable on those rows).
public int SerializeIterations { get; set; }
public int DeserializeIterations { get; set; }
public int RoundTripIterations { get; set; }
public long SerializeAllocBytesPerOp { get; set; }
public long DeserializeAllocBytesPerOp { get; set; }
public long SetupSerializeAllocBytes { get; set; }
public long SetupDeserializeAllocBytes { get; set; }
/// <summary>Total round-trip time. For in-memory benchmarks: synthesized so that
/// <c>RoundTripTimeMs / RoundTripIterations</c> yields the correct <c>SerPerOp + DesPerOp</c> µs/op
/// (necessary because Ser and Des may have different iter counts post-calibration).
/// For round-trip-only benchmarks (NamedPipe etc.): the directly-measured pipe round-trip time.</summary>
public double RoundTripTimeMs { get; set; }
// Round-trip min/max + stddev — only populated for round-trip-only benchmarks (NamedPipe etc.) where
// RT is directly measured. For in-memory rows RT = Ser + Des, which has no single-sample
// distribution; surface Ser/Des range separately instead.
public double RoundTripTimeMinMs { get; set; }
public double RoundTripTimeMaxMs { get; set; }
public double RoundTripTimeStdDevMs { get; set; }
/// <summary>Total round-trip allocation per op. For in-memory benchmarks: <c>SerializeAlloc + DeserializeAlloc</c>.
/// For round-trip-only benchmarks: process-wide allocation measured via <see cref="GC.GetTotalAllocatedBytes"/>
/// (covers ALL threads — client, server-drain, channel internals — not just the caller).</summary>
public long RoundTripAllocBytesPerOp { get; set; }
}
private static void PrintResult(BenchmarkResult result)
{
// Numbers-only per-row entries; the column-headers carry units (µs/op, KB/op).
var ser = result.SerializeTimeMs > 0 ? $"{SerPerOp(result),7:F2}" : " N/A";
var des = result.DeserializeTimeMs > 0 ? $"{DesPerOp(result),7:F2}" : " N/A";
var serAlloc = result.SerializeTimeMs > 0 ? $"{ToKilobytes(result.SerializeAllocBytesPerOp),7:F2}" : " N/A";
var desAlloc = result.DeserializeTimeMs > 0 ? $"{ToKilobytes(result.DeserializeAllocBytesPerOp),7:F2}" : " N/A";
System.Console.WriteLine($" {result.SerializerName,-40} | Size: {result.SerializedSize,8:N0} B | Ser: {ser} µs/op ({serAlloc} KB/op) | Des: {des} µs/op ({desAlloc} KB/op)");
}
private static void PrintGroupedResults(List<BenchmarkResult> results, List<TestDataSet> testDataSets)
{
System.Console.WriteLine("\n");
System.Console.WriteLine("╔══════════════════════════════════════════════════════════════════════════════════════════════════════╗");
System.Console.WriteLine("║ GROUPED RESULTS BY TEST DATA ║");
System.Console.WriteLine("╚══════════════════════════════════════════════════════════════════════════════════════════════════════╝");
// Print serializer options
var optionsMap = results
.Where(r => r.OptionsDescription != null)
.Select(r => (r.SerializerName, r.OptionsDescription!))
.Distinct()
.ToList();
if (optionsMap.Count > 0)
{
System.Console.WriteLine();
System.Console.WriteLine(" Serializer Options:");
foreach (var (name, opts) in optionsMap)
System.Console.WriteLine($" {name}: {opts}");
}
foreach (var testData in testDataSets)
{
// Order by per-op µs (iter-independent) — rows may have different iter counts post-calibration.
var testResults = results.Where(r => r.TestDataName == testData.DisplayName).OrderBy(r => RtPerOp(r)).ToList();
// Baseline switched MessagePack → MemoryPack: MemoryPack is the SOTA performance leader.
var memPackResult = testResults.FirstOrDefault(r => (r.Engine == Configuration.EngineMemoryPack && r.IoMode == Configuration.IoByteArray));
// Pin the comparison to AcBinary's SGen variant — apples-to-apples vs MemoryPack (also source-generated).
// The Runtime variant is shown alongside in the table for context, not used as the headline number.
var acBinaryResult = testResults.FirstOrDefault(r => (r.Engine == Configuration.EngineAcBinary && r.IoMode == Configuration.IoByteArray && r.DispatchMode == Configuration.ModeSGen));
System.Console.WriteLine($"\n┌─ {testData.DisplayName} ─".PadRight(172, '─') + "┐");
// Header-only units; per-row entries are numbers (µs/op for time, KB/op for alloc, KB pair "ser / des" for Setup, B for Size).
System.Console.WriteLine($"│ {"#",-4} │ {"Engine",-11} │ {"Options",-22} │ {"IO",-12} │ {"Mode",-8} │ {"Setup S/D KB",-14} │ {"Size B",-8} │ {"Ser µs/op",-10} │ {"SerAlc KB",-10} │ {"Des µs/op",-10} │ {"DesAlc KB",-10} │ {"RT µs/op",-10} │ {"RTAlc KB",-10} │");
System.Console.WriteLine($"├{"".PadRight(6, '─')}┼{"".PadRight(13, '─')}┼{"".PadRight(24, '─')}┼{"".PadRight(14, '─')}┼{"".PadRight(10, '─')}┼{"".PadRight(16, '─')}┼{"".PadRight(10, '─')}┼{"".PadRight(12, '─')}┼{"".PadRight(12, '─')}┼{"".PadRight(12, '─')}┼{"".PadRight(12, '─')}┼{"".PadRight(12, '─')}┼{"".PadRight(12, '─')}┤");
var rank = 1;
foreach (var result in testResults)
{
var size = $"{result.SerializedSize:N0}";
var setup = $"{ToKilobytes(result.SetupSerializeAllocBytes):F2} / {ToKilobytes(result.SetupDeserializeAllocBytes):F2}";
var ser = result.SerializeTimeMs > 0 ? $"{SerPerOp(result):F2}" : "N/A";
var des = result.DeserializeTimeMs > 0 ? $"{DesPerOp(result):F2}" : "N/A";
var rt = result.RoundTripTimeMs > 0 ? $"{RtPerOp(result):F2}" : "N/A";
var serAlloc = result.SerializeTimeMs > 0 ? $"{ToKilobytes(result.SerializeAllocBytesPerOp):F2}" : "N/A";
var desAlloc = result.DeserializeTimeMs > 0 ? $"{ToKilobytes(result.DeserializeAllocBytesPerOp):F2}" : "N/A";
var rtAlloc = result.RoundTripAllocBytesPerOp > 0 ? $"{ToKilobytes(result.RoundTripAllocBytesPerOp):F2}" : "N/A";
// Highlight MemoryPack baseline (any Byte[]) and AcBinary headline contender (Byte[] + SGen) with win/lose colors.
// The AcBinary Byte[]+Runtime variant is shown unhighlighted — it's contextual (SGen speed-up reference), not the headline.
var isHighlighted = (result.Engine == Configuration.EngineMemoryPack && result.IoMode == Configuration.IoByteArray)
|| (result.Engine == Configuration.EngineAcBinary && result.IoMode == Configuration.IoByteArray && result.DispatchMode == Configuration.ModeSGen);
var prefix = isHighlighted ? "│►" : "│ ";
var suffix = isHighlighted ? "◄│" : " │";
// Color logic: Green = winner (faster), Red = loser (slower)
if (isHighlighted && memPackResult != null && acBinaryResult != null)
{
var isMemPack = (result.Engine == Configuration.EngineMemoryPack && result.IoMode == Configuration.IoByteArray);
var memPackFaster = RtPerOp(memPackResult) < RtPerOp(acBinaryResult);
if (isMemPack)
{
System.Console.ForegroundColor = memPackFaster ? ConsoleColor.Green : ConsoleColor.Red;
}
else
{
System.Console.ForegroundColor = memPackFaster ? ConsoleColor.Red : ConsoleColor.Green;
}
}
System.Console.WriteLine($"{prefix}{rank++,4} │ {result.Engine,-11} │ {result.OptionsPreset,-22} │ {result.IoMode,-12} │ {result.DispatchMode,-8} │ {setup,14} │ {size,8} │ {ser,10} │ {serAlloc,10} │ {des,10} │ {desAlloc,10} │ {rt,10} │ {rtAlloc,10}{suffix}");
if (isHighlighted)
{
System.Console.ResetColor();
}
}
// Footer row: AcBinary (Byte[]) vs MemoryPack (Byte[]) comparison per column
if (memPackResult != null && acBinaryResult != null)
{
var sizePct = (acBinaryResult.SerializedSize / (double)memPackResult.SerializedSize - 1) * 100;
// Per-op µs ratio (iter-independent) — Ser/Des may have different iter counts on the two rows.
var serPct = SerPerOp(memPackResult) > 0 ? (SerPerOp(acBinaryResult) / SerPerOp(memPackResult) - 1) * 100 : 0;
var desPct = DesPerOp(memPackResult) > 0 ? (DesPerOp(acBinaryResult) / DesPerOp(memPackResult) - 1) * 100 : 0;
var rtPct = RtPerOp(memPackResult) > 0 ? (RtPerOp(acBinaryResult) / RtPerOp(memPackResult) - 1) * 100 : 0;
var serAllocPct = memPackResult.SerializeAllocBytesPerOp > 0 ? (acBinaryResult.SerializeAllocBytesPerOp / (double)memPackResult.SerializeAllocBytesPerOp - 1) * 100 : 0;
var desAllocPct = memPackResult.DeserializeAllocBytesPerOp > 0 ? (acBinaryResult.DeserializeAllocBytesPerOp / (double)memPackResult.DeserializeAllocBytesPerOp - 1) * 100 : 0;
var rtAllocPct = memPackResult.RoundTripAllocBytesPerOp > 0 ? (acBinaryResult.RoundTripAllocBytesPerOp / (double)memPackResult.RoundTripAllocBytesPerOp - 1) * 100 : 0;
// Footer separator: merge first 5 cols (#, Engine, Options, IO, Mode) → comparison label;
// remaining 8 cols stay aligned (Setup S/D KB, Size, Ser µs/op, SerAlc KB, Des µs/op, DesAlc KB, RT µs/op, RTAlc KB).
System.Console.WriteLine($"├{"".PadRight(6, '─')}┴{"".PadRight(13, '─')}┴{"".PadRight(24, '─')}┴{"".PadRight(14, '─')}┴{"".PadRight(10, '─')}┼{"".PadRight(16, '─')}┼{"".PadRight(10, '─')}┼{"".PadRight(12, '─')}┼{"".PadRight(12, '─')}┼{"".PadRight(12, '─')}┼{"".PadRight(12, '─')}┼{"".PadRight(12, '─')}┼{"".PadRight(12, '─')}┤");
// Merged label cell width = 4 + 11 + 22 + 12 + 8 + 4*3 (dropped separators) = 69
System.Console.Write($"│ {" AcBinary (Byte[]) vs MemoryPack (Byte[])",-69} │ ");
// Setup S/D KB (n/a for Byte[] vs Byte[] — neither pre-allocates)
System.Console.Write($"{"",14}");
System.Console.Write(" │ ");
// Size
System.Console.ForegroundColor = sizePct <= 0 ? ConsoleColor.Green : ConsoleColor.Red;
System.Console.Write($"{sizePct,+7:+0;-0}%");
System.Console.ResetColor();
System.Console.Write(" │ ");
// Serialize
System.Console.ForegroundColor = serPct <= 0 ? ConsoleColor.Green : ConsoleColor.Red;
System.Console.Write($"{serPct,+9:+0;-0}%");
System.Console.ResetColor();
System.Console.Write(" │ ");
// Serialize Alloc
System.Console.ForegroundColor = serAllocPct <= 0 ? ConsoleColor.Green : ConsoleColor.Red;
System.Console.Write($"{serAllocPct,+9:+0;-0}%");
System.Console.ResetColor();
System.Console.Write(" │ ");
// Deserialize
System.Console.ForegroundColor = desPct <= 0 ? ConsoleColor.Green : ConsoleColor.Red;
System.Console.Write($"{desPct,+9:+0;-0}%");
System.Console.ResetColor();
System.Console.Write(" │ ");
// Deserialize Alloc
System.Console.ForegroundColor = desAllocPct <= 0 ? ConsoleColor.Green : ConsoleColor.Red;
System.Console.Write($"{desAllocPct,+9:+0;-0}%");
System.Console.ResetColor();
System.Console.Write(" │ ");
// Round-trip
System.Console.ForegroundColor = rtPct <= 0 ? ConsoleColor.Green : ConsoleColor.Red;
System.Console.Write($"{rtPct,+9:+0;-0}%");
System.Console.ResetColor();
System.Console.Write(" │ ");
// Round-trip Alloc
System.Console.ForegroundColor = rtAllocPct <= 0 ? ConsoleColor.Green : ConsoleColor.Red;
System.Console.Write($"{rtAllocPct,+9:+0;-0}%");
System.Console.ResetColor();
System.Console.WriteLine(" │");
}
// Closing line: merged on left (─ between cols 1-5), ┴ on the right (cols 6-13 boundary, 8 unmerged cells).
System.Console.WriteLine($"└{"".PadRight(6, '─')}─{"".PadRight(13, '─')}─{"".PadRight(24, '─')}─{"".PadRight(14, '─')}─{"".PadRight(10, '─')}┴{"".PadRight(16, '─')}┴{"".PadRight(10, '─')}┴{"".PadRight(12, '─')}┴{"".PadRight(12, '─')}┴{"".PadRight(12, '─')}┴{"".PadRight(12, '─')}┴{"".PadRight(12, '─')}┴{"".PadRight(12, '─')}┘");
//System.Console.WriteLine($"GrowBufferCount: {AcBinarySerializer.GrowBufferCount}");
//System.Console.WriteLine($"GrowBufferTotalBytes: {AcBinarySerializer.GrowBufferTotalBytes:N0} bytes");
}
// Summary: Best serializer for each category
System.Console.WriteLine("\n");
System.Console.WriteLine("╔══════════════════════════════════════════════════════════════════════════════════════════════════════╗");
System.Console.WriteLine("║ SUMMARY: WINNERS ║");
System.Console.WriteLine("╚══════════════════════════════════════════════════════════════════════════════════════════════════════╝");
System.Console.WriteLine($"\n{"Category",-20} │ {"Winner",-40} │ {"Avg Value",-18}");
System.Console.WriteLine($"{"".PadRight(20, '─')}─┼─{"".PadRight(40, '─')}─┼─{"".PadRight(18, '─')}");
// Fastest Serialize — round-trip-only serializers (NamedPipe etc.) excluded:
// their Serialize() captures the full round-trip and isn't comparable to a pure Ser metric.
// Average is over per-op µs (iter-independent) instead of batch-time, since rows may now
// have different iter counts post-calibration.
var fastestSer = results.Where(r => r.SerializeTimeMs > 0 && !r.IsRoundTripOnly)
.GroupBy(r => r.SerializerName)
.Select(g => new { Name = g.Key, AvgPerOp = g.Average(r => SerPerOp(r)) })
.OrderBy(x => x.AvgPerOp)
.FirstOrDefault();
if (fastestSer != null)
System.Console.WriteLine($"{"Fastest Serialize",-20} │ {fastestSer.Name,-40} │ {fastestSer.AvgPerOp,12:F2} µs/op");
// Fastest Deserialize — round-trip-only serializers excluded (their Deserialize() is a no-op).
var fastestDes = results.Where(r => r.DeserializeTimeMs > 0 && !r.IsRoundTripOnly)
.GroupBy(r => r.SerializerName)
.Select(g => new { Name = g.Key, AvgPerOp = g.Average(r => DesPerOp(r)) })
.OrderBy(x => x.AvgPerOp)
.FirstOrDefault();
if (fastestDes != null)
System.Console.WriteLine($"{"Fastest Deserialize",-20} │ {fastestDes.Name,-40} │ {fastestDes.AvgPerOp,12:F2} µs/op");
// Smallest Size
var smallestSize = results
.GroupBy(r => r.SerializerName)
.Select(g => new { Name = g.Key, AvgSize = g.Average(r => r.SerializedSize) })
.OrderBy(x => x.AvgSize)
.FirstOrDefault();
if (smallestSize != null)
System.Console.WriteLine($"{"Smallest Size",-20} │ {smallestSize.Name,-40} │ {smallestSize.AvgSize,15:F0} B");
// Fastest Round-trip — iter-independent per-op average.
var fastestRt = results.Where(r => r.RoundTripTimeMs > 0)
.GroupBy(r => r.SerializerName)
.Select(g => new { Name = g.Key, AvgPerOp = g.Average(r => RtPerOp(r)) })
.OrderBy(x => x.AvgPerOp)
.FirstOrDefault();
if (fastestRt != null)
System.Console.WriteLine($"{"Fastest Round-trip",-20} │ {fastestRt.Name,-40} │ {fastestRt.AvgPerOp,12:F2} µs/op");
// Overall AcBinary (SGen) vs MemoryPack comparison (baseline switched MessagePack → MemoryPack as SOTA reference).
// AcBinary side is restricted to DispatchMode == SGen — apples-to-apples vs MemoryPack which is also source-generated.
// The Runtime variant is shown side-by-side in each per-test fancy table for SGen-speedup context, but excluded from this headline.
var memPackSerResults = results.Where(r => (r.Engine == Configuration.EngineMemoryPack && r.IoMode == Configuration.IoByteArray) && r.SerializeTimeMs > 0).ToList();
var memPackDesResults = results.Where(r => (r.Engine == Configuration.EngineMemoryPack && r.IoMode == Configuration.IoByteArray) && r.DeserializeTimeMs > 0).ToList();
var memPackRtResults = results.Where(r => (r.Engine == Configuration.EngineMemoryPack && r.IoMode == Configuration.IoByteArray) && r.RoundTripTimeMs > 0).ToList();
var acBinarySerResults = results.Where(r => (r.Engine == Configuration.EngineAcBinary && r.IoMode == Configuration.IoByteArray && r.DispatchMode == Configuration.ModeSGen) && r.SerializeTimeMs > 0).ToList();
var acBinaryDesResults = results.Where(r => (r.Engine == Configuration.EngineAcBinary && r.IoMode == Configuration.IoByteArray && r.DispatchMode == Configuration.ModeSGen) && r.DeserializeTimeMs > 0).ToList();
var acBinaryRtResults = results.Where(r => (r.Engine == Configuration.EngineAcBinary && r.IoMode == Configuration.IoByteArray && r.DispatchMode == Configuration.ModeSGen) && r.RoundTripTimeMs > 0).ToList();
// Skip comparison if no data available
if (memPackRtResults.Count == 0 || acBinaryRtResults.Count == 0)
{
System.Console.WriteLine();
System.Console.WriteLine("── AcBinary (Byte[], SGen) vs MemoryPack (Byte[]) (Overall) ──");
System.Console.WriteLine(" (Comparison requires both serialize and deserialize data)");
return;
}
// All averages are over per-op µs (iter-independent). Batch-time averaging would mix rows
// measured with different iter counts (post-calibration), producing meaningless numbers.
// Three aggregations per metric:
// - Arithmetic mean (current behavior) — magnitude-weighted, biased toward Large cell.
// - Geometric mean of per-cell ratios — magnitude-neutral, each cell weighted equally.
// - Median of per-cell ratios — outlier-resistant.
// The geo/median variants surface when a single cell dominates the arithmetic average
// (typical when one cell's µs-per-op is an order of magnitude larger than the others).
var sizeAcResults = results.Where(r => (r.Engine == Configuration.EngineAcBinary && r.IoMode == Configuration.IoByteArray && r.DispatchMode == Configuration.ModeSGen)).ToList();
var sizeMpResults = results.Where(r => (r.Engine == Configuration.EngineMemoryPack && r.IoMode == Configuration.IoByteArray)).ToList();
var serStats = ComputeOverallStats(acBinarySerResults, memPackSerResults, SerPerOp);
var desStats = ComputeOverallStats(acBinaryDesResults, memPackDesResults, DesPerOp);
var rtStats = ComputeOverallStats(acBinaryRtResults, memPackRtResults, RtPerOp);
var sizeStats = ComputeOverallStats(sizeAcResults, sizeMpResults, r => r.SerializedSize);
var serAllocStats = ComputeOverallStats(acBinarySerResults, memPackSerResults, r => r.SerializeAllocBytesPerOp);
var desAllocStats = ComputeOverallStats(acBinaryDesResults, memPackDesResults, r => r.DeserializeAllocBytesPerOp);
System.Console.WriteLine();
System.Console.WriteLine("── AcBinary (Byte[], SGen) vs MemoryPack (Byte[]) (Overall) ──");
WriteOverallLine("Serialize", "µs/op", serStats);
WriteOverallLine("Deserialize", "µs/op", desStats);
WriteOverallLine("Round-trip", "µs/op", rtStats);
WriteOverallLine("Size", "B", sizeStats, "F0");
WriteOverallLine("Ser Alloc", "B/op", serAllocStats, "F0");
WriteOverallLine("Des Alloc", "B/op", desAllocStats, "F0");
}
/// <summary>
/// Formats a signed percent delta with explicit sign for positive values (`+1.5%`, `-3.0%`, `0.0%`).
/// Padded to 7 chars (e.g. ` +12.3%`, `-100.0%`) for column alignment in the Overall block.
/// </summary>
private static string FormatPctSigned(double pct) => pct.ToString("+0.0;-0.0;0.0", System.Globalization.CultureInfo.InvariantCulture).PadLeft(6) + "%";
/// <summary>
/// Renders one Overall row with arith / geo / median deltas + AcBinary/MemPack absolute means.
/// Color is driven by the geometric-mean delta (magnitude-neutral signal). Skips silently when
/// stats is null (no paired data).
/// </summary>
private static void WriteOverallLine(string label, string unit, OverallStats? stats, string fmt = "F2")
{
if (stats == null) return;
// Color follows geo-mean (the magnitude-neutral signal). The arith-mean column may show a
// different sign when a single big cell dominates — that's exactly the signal we want to surface.
System.Console.ForegroundColor = stats.GeoMeanPct <= 0 ? ConsoleColor.Green : ConsoleColor.Red;
System.Console.WriteLine($" {label,-12} arith {FormatPctSigned(stats.ArithMeanPct)} │ geo {FormatPctSigned(stats.GeoMeanPct)} │ median {FormatPctSigned(stats.MedianPct)} ({stats.AcAvg.ToString(fmt, System.Globalization.CultureInfo.InvariantCulture)} {unit} vs {stats.MpAvg.ToString(fmt, System.Globalization.CultureInfo.InvariantCulture)} {unit}, {stats.CellCount} cells)");
System.Console.ResetColor();
}
/// <summary>
/// Same as <see cref="WriteOverallLine"/> but appends to a <see cref="StringBuilder"/> (no color).
/// Used by the .log and .LLM file writers.
/// </summary>
private static void AppendOverallLine(StringBuilder sb, string label, string unit, OverallStats? stats, string fmt = "F2")
{
if (stats == null) return;
sb.AppendLine($" {label,-12} arith {FormatPctSigned(stats.ArithMeanPct)} | geo {FormatPctSigned(stats.GeoMeanPct)} | median {FormatPctSigned(stats.MedianPct)} ({stats.AcAvg.ToString(fmt, System.Globalization.CultureInfo.InvariantCulture)} {unit} vs {stats.MpAvg.ToString(fmt, System.Globalization.CultureInfo.InvariantCulture)} {unit}, {stats.CellCount} cells)");
}
private static void SaveResults(List<BenchmarkResult> results, List<TestDataSet> testDataSets)
{
Directory.CreateDirectory(Configuration.ResultsDirectory);
var timestamp = DateTime.Now.ToString("yyyy-MM-dd_HH-mm-ss");
var baseFileName = $"Console.FullBenchmark_{Configuration.BuildConfiguration}_{timestamp}";
var logFilePath = Path.Combine(Configuration.ResultsDirectory, $"{baseFileName}.log");
var outputFilePath = Path.Combine(Configuration.ResultsDirectory, $"{baseFileName}.output");
// Save binary output to separate .output file
var largeTestData = testDataSets.FirstOrDefault(t => t.Name.StartsWith("Large"));
if (largeTestData != null)
{
var outputSb = new StringBuilder();
outputSb.AppendLine("╔══════════════════════════════════════════════════════════════════════════════════════════════════════╗");
outputSb.AppendLine("║ SERIALIZED BINARY OUTPUT ║");
outputSb.AppendLine($"║ Generated: {DateTime.Now:yyyy-MM-dd HH:mm:ss}".PadRight(100) + "║");
outputSb.AppendLine("╚══════════════════════════════════════════════════════════════════════════════════════════════════════╝");
outputSb.AppendLine();
outputSb.AppendLine("=== SERIALIZED BYTES: Large (5x5x5x10) - AcBinary (Default) ===");
var serializedBytes = AcBinarySerializer.Serialize(largeTestData.Order, AcBinarySerializerOptions.Default);
outputSb.AppendLine($"Size: {serializedBytes.Length:N0} bytes");
outputSb.AppendLine();
outputSb.AppendLine("Hex dump:");
outputSb.AppendLine(FormatHexDump(serializedBytes));
File.WriteAllText(outputFilePath, outputSb.ToString(), Configuration.Utf8NoBom);
System.Console.WriteLine($"✓ Binary output saved to: {outputFilePath}");
}
// Save benchmark results to .log file
var sb = new StringBuilder();
sb.AppendLine("╔══════════════════════════════════════════════════════════════════════════════════════════════════════╗");
sb.AppendLine("║ SERIALIZER BENCHMARK RESULTS ║");
sb.AppendLine($"║ Generated: {DateTime.Now:yyyy-MM-dd HH:mm:ss}".PadRight(100) + "║");
sb.AppendLine($"║ Build: {Configuration.BuildConfiguration}".PadRight(100) + "║");
sb.AppendLine($"║ Charset: {GetCurrentCharsetName()}".PadRight(100) + "║");
sb.AppendLine($"║ Iterations: per-cell adaptive (~{Configuration.TargetSampleMs} ms target)".PadRight(100) + "║");
sb.AppendLine($"║ Samples: {Configuration.BenchmarkSamples} (median) + 1 pilot discarded".PadRight(100) + "║");
sb.AppendLine($"║ Test Type: {testDataSets.FirstOrDefault()?.TypeName ?? "unknown"}".PadRight(100) + "║");
sb.AppendLine("╚══════════════════════════════════════════════════════════════════════════════════════════════════════╝");
sb.AppendLine();
// Serializer options summary
var optionsMap = results
.Where(r => r.OptionsDescription != null)
.Select(r => (r.SerializerName, r.OptionsDescription!))
.Distinct()
.ToList();
if (optionsMap.Count > 0)
{
sb.AppendLine("=== SERIALIZER OPTIONS ===");
foreach (var (name, opts) in optionsMap)
sb.AppendLine($" {name}: {opts}");
sb.AppendLine();
}
// CSV-like data for easy import — keeps raw byte integers (no KB rounding) so external tools can compute precisely.
sb.AppendLine("=== RAW DATA (CSV) ===");
sb.AppendLine("TestData,Engine,IO,Mode,Options,Size,SerializeMicrosPerOp,DeserializeMicrosPerOp,RoundTripMicrosPerOp,SerializeAllocBytesPerOp,DeserializeAllocBytesPerOp,RoundTripAllocBytesPerOp,SetupSerializeAllocBytes,SetupDeserializeAllocBytes");
foreach (var testData in testDataSets)
{
var testResults = results.Where(r => r.TestDataName == testData.DisplayName).ToList();
foreach (var result in testResults)
{
sb.AppendLine($"{result.TestDataName},{result.Engine},{result.IoMode},{result.DispatchMode},{result.OptionsPreset},{result.SerializedSize},{SerPerOp(result):F2},{DesPerOp(result):F2},{RtPerOp(result):F2},{result.SerializeAllocBytesPerOp},{result.DeserializeAllocBytesPerOp},{result.RoundTripAllocBytesPerOp},{result.SetupSerializeAllocBytes},{result.SetupDeserializeAllocBytes}");
}
}
sb.AppendLine();
// Formatted results
sb.AppendLine("=== FORMATTED RESULTS BY TEST DATA ===");
sb.AppendLine("(►) = Highlighted: MemoryPack (Byte[]) (baseline) and AcBinary (Byte[])");
sb.AppendLine();
foreach (var testData in testDataSets)
{
// Order by per-op µs (iter-independent) — rows may have different iter counts post-calibration.
var testResults = results.Where(r => r.TestDataName == testData.DisplayName).OrderBy(r => RtPerOp(r)).ToList();
var memPackResult = testResults.FirstOrDefault(r => (r.Engine == Configuration.EngineMemoryPack && r.IoMode == Configuration.IoByteArray));
// Pin the comparison to AcBinary's SGen variant — apples-to-apples vs MemoryPack (also source-generated).
// The Runtime variant is shown alongside in the table for context, not used as the headline number.
var acBinaryResult = testResults.FirstOrDefault(r => (r.Engine == Configuration.EngineAcBinary && r.IoMode == Configuration.IoByteArray && r.DispatchMode == Configuration.ModeSGen));
sb.AppendLine();
sb.AppendLine($"--- {testData.DisplayName} ---");
sb.AppendLine($"{"#",-4} {"Serializer",-42} {"Size B",-12} {"Setup S/D KB",-14} {"Ser µs/op",-12} {"Des µs/op",-12} {"RT µs/op",-12} {"SerAlc KB",-11} {"DesAlc KB",-11}");
sb.AppendLine(new string('-', 140));
var rank = 1;
foreach (var result in testResults)
{
var isHighlighted = ((result.Engine == Configuration.EngineMemoryPack || result.Engine == Configuration.EngineAcBinary) && result.IoMode == Configuration.IoByteArray);
var prefix = isHighlighted ? "► " : " ";
var size = $"{result.SerializedSize:N0}";
var setup = $"{ToKilobytes(result.SetupSerializeAllocBytes):F2} / {ToKilobytes(result.SetupDeserializeAllocBytes):F2}";
var ser = result.SerializeTimeMs > 0 ? $"{SerPerOp(result):F2}" : "N/A";
var des = result.DeserializeTimeMs > 0 ? $"{DesPerOp(result):F2}" : "N/A";
var rt = result.RoundTripTimeMs > 0 ? $"{RtPerOp(result):F2}" : "N/A";
var serAlloc = result.SerializeTimeMs > 0 ? $"{ToKilobytes(result.SerializeAllocBytesPerOp):F2}" : "N/A";
var desAlloc = result.DeserializeTimeMs > 0 ? $"{ToKilobytes(result.DeserializeAllocBytesPerOp):F2}" : "N/A";
sb.AppendLine($"{rank++,2} {prefix}{result.SerializerName,-40} {size,-12} {setup,-14} {ser,-12} {des,-12} {rt,-12} {serAlloc,-11} {desAlloc,-11}");
}
// Summary row for this test data (vs MemoryPack — baseline switched MessagePack → MemoryPack)
if (memPackResult != null && acBinaryResult != null)
{
var sizePct = (acBinaryResult.SerializedSize / (double)memPackResult.SerializedSize - 1) * 100;
// Per-op µs ratio (iter-independent) — Ser/Des may have different iter counts on the two rows.
var serPct = SerPerOp(memPackResult) > 0 ? (SerPerOp(acBinaryResult) / SerPerOp(memPackResult) - 1) * 100 : 0;
var desPct = DesPerOp(memPackResult) > 0 ? (DesPerOp(acBinaryResult) / DesPerOp(memPackResult) - 1) * 100 : 0;
var rtPct = RtPerOp(memPackResult) > 0 ? (RtPerOp(acBinaryResult) / RtPerOp(memPackResult) - 1) * 100 : 0;
sb.AppendLine($" AcBinary (Byte[]) vs MemoryPack (Byte[]): Size {sizePct:+0;-0}% │ Ser {serPct:+0;-0}% │ Des {desPct:+0;-0}% │ RT {rtPct:+0;-0}%");
}
//sb.AppendLine($"GrowBufferCount: {AcBinarySerializer.GrowBufferCount}");
//sb.AppendLine($"GrowBufferTotalBytes: {AcBinarySerializer.GrowBufferTotalBytes:N0} bytes");
}
// Summary comparison (vs MemoryPack)
// Restrict AcBinary side to SGen — the SGen vs Runtime variants are shown side-by-side
// in the per-test fancy table; the headline should compare apples-to-apples (both source-generated).
sb.AppendLine();
sb.AppendLine("=== AcBinary (Byte[], SGen) vs MemoryPack (Byte[]) (Overall) ===");
var memPackSerResults2 = results.Where(r => (r.Engine == Configuration.EngineMemoryPack && r.IoMode == Configuration.IoByteArray) && r.SerializeTimeMs > 0).ToList();
var memPackDesResults2 = results.Where(r => (r.Engine == Configuration.EngineMemoryPack && r.IoMode == Configuration.IoByteArray) && r.DeserializeTimeMs > 0).ToList();
var memPackRtResults2 = results.Where(r => (r.Engine == Configuration.EngineMemoryPack && r.IoMode == Configuration.IoByteArray) && r.RoundTripTimeMs > 0).ToList();
var acBinarySerResults2 = results.Where(r => (r.Engine == Configuration.EngineAcBinary && r.IoMode == Configuration.IoByteArray && r.DispatchMode == Configuration.ModeSGen) && r.SerializeTimeMs > 0).ToList();
var acBinaryDesResults2 = results.Where(r => (r.Engine == Configuration.EngineAcBinary && r.IoMode == Configuration.IoByteArray && r.DispatchMode == Configuration.ModeSGen) && r.DeserializeTimeMs > 0).ToList();
var acBinaryRtResults2 = results.Where(r => (r.Engine == Configuration.EngineAcBinary && r.IoMode == Configuration.IoByteArray && r.DispatchMode == Configuration.ModeSGen) && r.RoundTripTimeMs > 0).ToList();
// Skip comparison block if either side has no Byte[] data — happens in AsyncPipe-only mode
// where only NamedPipe rows exist (no MemoryPack baseline, no AcBinary Byte[] reference).
// Mirrors the same early-return guard in PrintGroupedResults.
if (memPackRtResults2.Count == 0 || acBinaryRtResults2.Count == 0)
{
sb.AppendLine(" (Comparison requires both serialize and deserialize data)");
File.WriteAllText(logFilePath, sb.ToString(), Configuration.Utf8NoBom);
System.Console.WriteLine($"✓ Results saved to: {logFilePath}");
var llmFilePathEarly = Path.Combine(Configuration.ResultsDirectory, $"{baseFileName}.LLM");
SaveLlmResults(llmFilePathEarly, results, testDataSets);
return;
}
// Per-cell-paired aggregation: arithmetic / geometric / median. See PrintSummary's parallel
// block + the OverallStats record for the rationale (per-cell ratio vs magnitude-weighted mean).
var sizeAcResults2 = results.Where(r => (r.Engine == Configuration.EngineAcBinary && r.IoMode == Configuration.IoByteArray && r.DispatchMode == Configuration.ModeSGen)).ToList();
var sizeMpResults2 = results.Where(r => (r.Engine == Configuration.EngineMemoryPack && r.IoMode == Configuration.IoByteArray)).ToList();
AppendOverallLine(sb, "Serialize", "µs/op", ComputeOverallStats(acBinarySerResults2, memPackSerResults2, SerPerOp));
AppendOverallLine(sb, "Ser Alloc", "B/op", ComputeOverallStats(acBinarySerResults2, memPackSerResults2, r => r.SerializeAllocBytesPerOp), "F0");
AppendOverallLine(sb, "Deserialize", "µs/op", ComputeOverallStats(acBinaryDesResults2, memPackDesResults2, DesPerOp));
AppendOverallLine(sb, "Des Alloc", "B/op", ComputeOverallStats(acBinaryDesResults2, memPackDesResults2, r => r.DeserializeAllocBytesPerOp), "F0");
AppendOverallLine(sb, "Round-trip", "µs/op", ComputeOverallStats(acBinaryRtResults2, memPackRtResults2, RtPerOp));
AppendOverallLine(sb, "Size", "B", ComputeOverallStats(sizeAcResults2, sizeMpResults2, r => r.SerializedSize), "F0");
File.WriteAllText(logFilePath, sb.ToString(), Configuration.Utf8NoBom);
System.Console.WriteLine($"✓ Results saved to: {logFilePath}");
// Save LLM-optimized results
var llmFilePath = Path.Combine(Configuration.ResultsDirectory, $"{baseFileName}.LLM");
SaveLlmResults(llmFilePath, results, testDataSets);
}
private static void SaveLlmResults(string filePath, List<BenchmarkResult> results, List<TestDataSet> testDataSets)
{
var sb = new StringBuilder();
var testTypeName = testDataSets.FirstOrDefault()?.TypeName ?? "unknown";
sb.AppendLine($"# AcBinary Benchmark {Configuration.BuildConfiguration} {DateTime.Now:yyyy-MM-dd HH:mm:ss}");
sb.AppendLine($"Charset: {GetCurrentCharsetName()} | Iterations: per-cell adaptive (target ~{Configuration.TargetSampleMs} ms/sample) | Warmup: {Configuration.WarmupIterations} per phase (Ser/Des isolated) | Samples: {Configuration.BenchmarkSamples} (median) + 1 pilot discarded | .NET: {Environment.Version} | TestType: {testTypeName} | UnstableCV threshold: {Configuration.UnstableCVThreshold * 100:F0}%");
sb.AppendLine("Baseline: MemoryPack (Byte[]) (SOTA reference) | Verified: round-trip correctness checked once per cell before warmup");
// Options summary
var optionsMap = results
.Where(r => r.OptionsDescription != null)
.Select(r => (r.SerializerName, r.OptionsDescription!))
.Distinct()
.ToList();
if (optionsMap.Count > 0)
{
sb.AppendLine();
sb.AppendLine("## Options");
sb.AppendLine();
foreach (var (name, opts) in optionsMap)
sb.AppendLine($"- **{name}**: {opts}");
}
// Flat results table sorted by test data then round-trip (now includes Alloc + Iter columns).
// Iter column shows per-row Ser/Des iteration counts (post-adaptive-calibration), so the reader
// can verify that each cell's batch sample landed near the Configuration.TargetSampleMs window.
sb.AppendLine();
sb.AppendLine("## Results");
sb.AppendLine();
sb.AppendLine("TestData | Engine | IO | Mode | Options | Size(B) | Ser(µs/op) | Deser(µs/op) | RT(µs/op) | SerAlloc(KB/op) | DesAlloc(KB/op) | RTAlloc(KB/op) | Setup S/D(KB) | Iter Ser/Des");
sb.AppendLine("---|---|---|---|---|---|---|---|---|---|---|---|---|---");
foreach (var testData in testDataSets)
{
var testResults = results
.Where(r => r.TestDataName == testData.DisplayName)
// Per-op µs (iter-independent) ordering — mixing iter counts within a cell is now expected.
.OrderBy(RtPerOp)
.ToList();
foreach (var r in testResults)
{
var inv = System.Globalization.CultureInfo.InvariantCulture;
// Per-cell median + inter-sample range (min..max) + CV-threshold marker (⚠X.X% when CV > 3%).
// Range surfaces the noise floor for each row so a small inter-engine delta is easy to
// judge against the row's noise. Format: "26.86 (24.50..29.10)" or
// "26.86 (24.50..29.10) ⚠5.2%" when stddev/mean exceeds the unstable threshold.
// When only one sample was taken (Debug / quick mode) min == max == median; collapse
// to bare median to avoid visual clutter.
var ser = r.SerializeTimeMs > 0 ? FormatMicrosWithRange(r.SerializeTimeMs, r.SerializeTimeMinMs, r.SerializeTimeMaxMs, r.SerializeTimeStdDevMs, r.SerializeIterations, inv) : "-";
var des = r.DeserializeTimeMs > 0 ? FormatMicrosWithRange(r.DeserializeTimeMs, r.DeserializeTimeMinMs, r.DeserializeTimeMaxMs, r.DeserializeTimeStdDevMs, r.DeserializeIterations, inv) : "-";
var rt = r.RoundTripTimeMs > 0
? (r.IsRoundTripOnly
? FormatMicrosWithRange(r.RoundTripTimeMs, r.RoundTripTimeMinMs, r.RoundTripTimeMaxMs, r.RoundTripTimeStdDevMs, r.RoundTripIterations, inv)
: RtPerOp(r).ToString("F2", inv))
: "-";
var serAlloc = r.SerializeTimeMs > 0 ? ToKilobytes(r.SerializeAllocBytesPerOp).ToString("F2", inv) : "-";
var desAlloc = r.DeserializeTimeMs > 0 ? ToKilobytes(r.DeserializeAllocBytesPerOp).ToString("F2", inv) : "-";
var rtAlloc = r.RoundTripAllocBytesPerOp > 0 ? ToKilobytes(r.RoundTripAllocBytesPerOp).ToString("F2", inv) : "-";
var setupAlloc = $"{ToKilobytes(r.SetupSerializeAllocBytes).ToString("F2", inv)} / {ToKilobytes(r.SetupDeserializeAllocBytes).ToString("F2", inv)}";
// Iter Ser/Des column — per-row adaptive iter counts. RT-only rows show Iter for RT.
var iterCol = r.IsRoundTripOnly
? r.RoundTripIterations.ToString(inv)
: $"{(r.SerializeIterations > 0 ? r.SerializeIterations.ToString(inv) : "-")} / {(r.DeserializeIterations > 0 ? r.DeserializeIterations.ToString(inv) : "-")}";
sb.AppendLine($"{r.TestDataName} | {r.Engine} | {r.IoMode} | {r.DispatchMode} | {r.OptionsPreset} | {r.SerializedSize} | {ser} | {des} | {rt} | {serAlloc} | {desAlloc} | {rtAlloc} | {setupAlloc} | {iterCol}");
}
}
// Overall AcBinary (SGen, Byte[]) vs MemoryPack (Byte[]) comparison — same three aggregations
// as the .log / console output (arithmetic / geometric / median of per-cell ratios). The
// arith mean is magnitude-weighted (Large cell dominates); geo/median are per-cell-equal
// signals. Adding this lets an LLM diagnose whether a headline delta is a real overall
// win/loss or a single-cell artifact.
var memPackByteArrayResults = results.Where(r => r.Engine == Configuration.EngineMemoryPack && r.IoMode == Configuration.IoByteArray).ToList();
var acBinarySGenByteArrayResults = results.Where(r => r.Engine == Configuration.EngineAcBinary && r.IoMode == Configuration.IoByteArray && r.DispatchMode == Configuration.ModeSGen).ToList();
var memPackSerResultsLlm = memPackByteArrayResults.Where(r => r.SerializeTimeMs > 0).ToList();
var memPackDesResultsLlm = memPackByteArrayResults.Where(r => r.DeserializeTimeMs > 0).ToList();
var memPackRtResultsLlm = memPackByteArrayResults.Where(r => r.RoundTripTimeMs > 0).ToList();
var acBinarySerResultsLlm = acBinarySGenByteArrayResults.Where(r => r.SerializeTimeMs > 0).ToList();
var acBinaryDesResultsLlm = acBinarySGenByteArrayResults.Where(r => r.DeserializeTimeMs > 0).ToList();
var acBinaryRtResultsLlm = acBinarySGenByteArrayResults.Where(r => r.RoundTripTimeMs > 0).ToList();
if (memPackRtResultsLlm.Count > 0 && acBinaryRtResultsLlm.Count > 0)
{
sb.AppendLine();
sb.AppendLine("## Overall: AcBinary (Byte[], SGen) vs MemoryPack (Byte[])");
sb.AppendLine();
sb.AppendLine("Three aggregations of per-cell results: **arith** = arithmetic mean of µs/op (magnitude-weighted, Large cell dominates); **geo** = geometric mean of per-cell ratios (each cell weighted equally); **median** = median of per-cell ratios (outlier-resistant). Negative % = AcBinary faster/smaller; positive % = MemPack faster/smaller. The geo/median variants surface when a single big cell skews the arithmetic mean.");
sb.AppendLine();
sb.AppendLine("```");
AppendOverallLine(sb, "Serialize", "µs/op", ComputeOverallStats(acBinarySerResultsLlm, memPackSerResultsLlm, SerPerOp));
AppendOverallLine(sb, "Ser Alloc", "B/op", ComputeOverallStats(acBinarySerResultsLlm, memPackSerResultsLlm, r => r.SerializeAllocBytesPerOp), "F0");
AppendOverallLine(sb, "Deserialize", "µs/op", ComputeOverallStats(acBinaryDesResultsLlm, memPackDesResultsLlm, DesPerOp));
AppendOverallLine(sb, "Des Alloc", "B/op", ComputeOverallStats(acBinaryDesResultsLlm, memPackDesResultsLlm, r => r.DeserializeAllocBytesPerOp), "F0");
AppendOverallLine(sb, "Round-trip", "µs/op", ComputeOverallStats(acBinaryRtResultsLlm, memPackRtResultsLlm, RtPerOp));
AppendOverallLine(sb, "Size", "B", ComputeOverallStats(acBinarySGenByteArrayResults, memPackByteArrayResults, r => r.SerializedSize), "F0");
sb.AppendLine("```");
}
File.WriteAllText(filePath, sb.ToString(), Configuration.Utf8NoBom);
System.Console.WriteLine($"✓ LLM results saved to: {filePath}");
}
/// <summary>
/// Formats byte array as hex dump with offset, hex values, and ASCII representation.
/// </summary>
private static string FormatHexDump(byte[] bytes, int bytesPerLine = 16)
{
var sb = new StringBuilder();
for (var i = 0; i < bytes.Length; i += bytesPerLine)
{
// Offset
sb.Append($"{i:X8} ");
// Hex bytes
for (var j = 0; j < bytesPerLine; j++)
{
if (i + j < bytes.Length)
sb.Append($"{bytes[i + j]:X2} ");
else
sb.Append(" ");
if (j == 7) sb.Append(' '); // Extra space in middle
}
sb.Append(" |");
// ASCII representation
for (var j = 0; j < bytesPerLine && i + j < bytes.Length; j++)
{
var b = bytes[i + j];
sb.Append(b is >= 32 and < 127 ? (char)b : '.');
}
sb.AppendLine("|");
}
return sb.ToString();
}
#endregion
}