3323 lines
188 KiB
C#
3323 lines
188 KiB
C#
using AyCode.Core.Compression;
|
||
using AyCode.Core.Serializers.Attributes;
|
||
using AyCode.Core.Serializers.Binaries;
|
||
using AyCode.Core.Tests.Serialization; // DrainFromAsync extension (test-only, used by benchmark)
|
||
using AyCode.Core.Tests.TestModels;
|
||
using MemoryPack;
|
||
#if !AYCODE_NATIVEAOT
|
||
using MessagePack;
|
||
using MessagePack.Resolvers;
|
||
#endif
|
||
using Microsoft.Extensions.Options;
|
||
using System.Buffers;
|
||
using System.Diagnostics;
|
||
using System.IO.Pipelines;
|
||
using System.IO.Pipes;
|
||
using System.Reflection;
|
||
using System.Runtime.CompilerServices;
|
||
using System.Text;
|
||
using System.Text.Json;
|
||
|
||
namespace AyCode.Core.Serializers.Console;
|
||
|
||
/// <summary>
|
||
/// Comprehensive benchmark application for all serializers.
|
||
/// Compares: AcBinary (all options), MemoryPack, MessagePack, Newtonsoft.Json, System.Text.Json
|
||
///
|
||
/// Usage:
|
||
/// dotnet run # Run all benchmarks
|
||
/// dotnet run -- quick # Quick mode (fewer iterations)
|
||
/// dotnet run -- serialize # Serialize only
|
||
/// dotnet run -- deserialize # Deserialize only
|
||
/// </summary>
|
||
public static class Program
|
||
{
|
||
// Configuration (constants, mutable state, attribute-flag aggregation) → Configuration.cs
|
||
|
||
/// <summary>
|
||
/// Common Options-column formatter for every AcBinary serializer benchmark row. Renders the
|
||
/// configured options-level value AND the effective attribute-level enable flag side-by-side
|
||
/// (e.g. <c>Interning=All(opt) | False (attr)</c>) so attribute-suppressed features cannot
|
||
/// silently mislead. Pass any benchmark-specific extras (e.g. <c>", BufferSize=4096B"</c>)
|
||
/// in <paramref name="extra"/> — they are appended after the common fields.
|
||
/// </summary>
|
||
private static string BuildAcBinaryOptionsDescription(AcBinarySerializerOptions options, string extra = "")
|
||
{
|
||
// PropertyFilter: opt-side is "Set"/"None" depending on whether a callback is registered (the callback
|
||
// itself isn't a meaningful display value); attr-side is the cross-type-aggregated bool (true = every
|
||
// tagged type has the feature enabled, false = at least one type opted out via
|
||
// [AcBinarySerializable(enablePropertyFilterFeature: false)] → SGen-emit + Runtime hot-loop both gate).
|
||
var propFilterOpt = options.PropertyFilter == null ? "None" : "Set";
|
||
|
||
return $"WireMode={options.WireMode}, " +
|
||
$"RefHandling={options.ReferenceHandling}(opt) | {Configuration.AttrFlags.refHandling} (attr), " +
|
||
$"Interning={options.UseStringInterning}(opt) | {Configuration.AttrFlags.internString} (attr), " +
|
||
$"Metadata={options.UseMetadata}(opt) | {Configuration.AttrFlags.metadata} (attr), " +
|
||
$"PropertyFilter={propFilterOpt}(opt) | {Configuration.AttrFlags.propertyFilter} (attr), " +
|
||
$"SGen={options.UseGeneratedCode}, " +
|
||
$"Compression={options.UseCompression}{extra}";
|
||
}
|
||
|
||
/// <summary>
|
||
/// Returns MemoryPack serializer options aligned with <see cref="Configuration.SelectedWireMode"/> for a fair
|
||
/// apples-to-apples wire-format comparison:
|
||
/// <list type="bullet">
|
||
/// <item><see cref="WireMode.Compact"/> → <see cref="MemoryPackSerializerOptions.Default"/> (UTF-8) — both
|
||
/// engines encode UTF-8, comparison is purely about header / tier / dispatch overhead.</item>
|
||
/// <item><see cref="WireMode.Fast"/> → <see cref="MemoryPackSerializerOptions.Utf16"/> (UTF-16 raw memcpy) —
|
||
/// both engines write UTF-16 raw bytes, so wire-size and CPU comparison reflect the same string-encoding family.</item>
|
||
/// </list>
|
||
/// Without this alignment the FastWire vs MemPack-default comparison conflates two unrelated dimensions
|
||
/// (UTF-16 raw vs UTF-8 encoded) and produces a misleading +40% wire-size delta that is structurally
|
||
/// the encoding-family difference, NOT an AcBinary-specific overhead.
|
||
/// </summary>
|
||
private static MemoryPackSerializerOptions GetMemPackOptions() =>
|
||
Configuration.SelectedWireMode == WireMode.Fast
|
||
? MemoryPackSerializerOptions.Utf16
|
||
: MemoryPackSerializerOptions.Default;
|
||
|
||
/// <summary>
|
||
/// Converts a total-time (in ms across <see cref="Configuration.TestIterations"/>) into per-operation microseconds.
|
||
/// Formula: <c>totalMs / iterations × 1000</c>. The benchmark stores <c>*TimeMs</c> as the cumulative
|
||
/// median over the timing run; the display layer renders per-op µs to make numbers iteration-count
|
||
/// independent (e.g. switching <c>Configuration.TestIterations</c> 1000 → 100 leaves the displayed µs/op unchanged
|
||
/// — only its sample noise grows). Symmetric with the already-per-op <c>*AllocBytesPerOp</c> fields.
|
||
/// </summary>
|
||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||
|
||
/// <summary>
|
||
/// Converts a total-time (in ms across <paramref name="iterations"/>) into per-operation microseconds.
|
||
/// Per-op µs is the iter-independent unit: 1000 iter and 50000 iter of the same operation should
|
||
/// produce the same per-op µs (within noise). Necessary because per-cell adaptive iteration makes
|
||
/// <c>iterations</c> a per-row property — there is no longer a single global Configuration.TestIterations to divide by.
|
||
/// </summary>
|
||
private static double ToPerOpMicros(double totalMs, int iterations) => iterations > 0 ? totalMs / iterations * 1000.0 : 0;
|
||
|
||
// Per-row per-op µs accessors — pull batch-time + iter from BenchmarkResult and convert. Used wherever
|
||
// averaging or comparison happens across rows with potentially different iter counts (Winners summary,
|
||
// Overall comparison, per-cell summary row). Keeping these as methods rather than properties on
|
||
// BenchmarkResult preserves the result-as-data-bag distinction.
|
||
private static double SerPerOp(BenchmarkResult r) => ToPerOpMicros(r.SerializeTimeMs, r.SerializeIterations);
|
||
private static double DesPerOp(BenchmarkResult r) => ToPerOpMicros(r.DeserializeTimeMs, r.DeserializeIterations);
|
||
private static double RtPerOp(BenchmarkResult r) => ToPerOpMicros(r.RoundTripTimeMs, r.RoundTripIterations);
|
||
|
||
/// <summary>
|
||
/// Per-cell-paired aggregation of an overall comparison. Captures three different aggregation
|
||
/// strategies so the reader can judge whether the headline delta is dominated by one large cell
|
||
/// (arithmetic mean) or representative of typical workload (geometric mean / median).
|
||
/// </summary>
|
||
/// <param name="ArithMeanPct">Arithmetic mean of µs/op — magnitude-weighted; biased toward Large cell.</param>
|
||
/// <param name="GeoMeanPct">Geometric mean of per-cell ratios — magnitude-neutral; each cell weighted equally.</param>
|
||
/// <param name="MedianPct">Median of per-cell ratios — outlier-resistant.</param>
|
||
/// <param name="AcAvg">Arithmetic mean AcBinary value (µs/op or bytes).</param>
|
||
/// <param name="MpAvg">Arithmetic mean MemPack value.</param>
|
||
/// <param name="CellCount">Number of paired cells contributing to the geo/median.</param>
|
||
private record OverallStats(double ArithMeanPct, double GeoMeanPct, double MedianPct, double AcAvg, double MpAvg, int CellCount);
|
||
|
||
/// <summary>
|
||
/// Computes arithmetic + geometric + median aggregation of an AcBinary-vs-MemPack comparison
|
||
/// across paired cells (joined by <c>TestDataName</c>). Per-cell pairing is required for the
|
||
/// geo/median variants — a cell where AcBinary or MemPack is missing is dropped from all stats.
|
||
/// Returns null when no paired cell has a valid value.
|
||
/// </summary>
|
||
private static OverallStats? ComputeOverallStats(List<BenchmarkResult> acResults, List<BenchmarkResult> mpResults, Func<BenchmarkResult, double> getValue)
|
||
{
|
||
if (acResults.Count == 0 || mpResults.Count == 0) return null;
|
||
|
||
var pairs = (from ac in acResults
|
||
join mp in mpResults on ac.TestDataName equals mp.TestDataName
|
||
let acV = getValue(ac)
|
||
let mpV = getValue(mp)
|
||
where acV > 0 && mpV > 0
|
||
select (ac: acV, mp: mpV)).ToList();
|
||
|
||
if (pairs.Count == 0) return null;
|
||
|
||
var acAvg = pairs.Average(p => p.ac);
|
||
var mpAvg = pairs.Average(p => p.mp);
|
||
var ratios = pairs.Select(p => p.ac / p.mp).ToList();
|
||
|
||
// Geometric mean: exp(avg(ln(ratios))) — numerically stable vs Π ratios then ^(1/N).
|
||
var geoMean = Math.Exp(ratios.Sum(Math.Log) / ratios.Count);
|
||
|
||
// Median (paired-ratio): for even N use the midpoint of the two middle values.
|
||
var sorted = ratios.OrderBy(r => r).ToList();
|
||
var median = sorted.Count % 2 == 1
|
||
? sorted[sorted.Count / 2]
|
||
: (sorted[sorted.Count / 2 - 1] + sorted[sorted.Count / 2]) / 2.0;
|
||
|
||
return new OverallStats(
|
||
ArithMeanPct: (acAvg / mpAvg - 1) * 100,
|
||
GeoMeanPct: (geoMean - 1) * 100,
|
||
MedianPct: (median - 1) * 100,
|
||
AcAvg: acAvg,
|
||
MpAvg: mpAvg,
|
||
CellCount: ratios.Count);
|
||
}
|
||
|
||
/// <summary>
|
||
/// Formats a per-op micros value with its inter-sample range and CV-threshold marker as
|
||
/// <c>"26.86 (24.5..29.1)"</c> or <c>"26.86 (24.5..29.1) ⚠️5.2%"</c>. Median first, range in parentheses,
|
||
/// CV warning suffix only when CV > <see cref="Configuration.UnstableCVThreshold"/>. When min == max == median
|
||
/// (single-sample / Debug / quick mode), collapses to bare median to avoid visual clutter.
|
||
/// All time inputs are total-batch milliseconds; <paramref name="iterations"/> is the per-row iter
|
||
/// count (post-adaptive-calibration).
|
||
/// </summary>
|
||
private static string FormatMicrosWithRange(double medianMs, double minMs, double maxMs, double stdDevMs, int iterations, System.Globalization.CultureInfo inv)
|
||
{
|
||
var med = ToPerOpMicros(medianMs, iterations);
|
||
// No range data (single-sample fast path) — surface as bare median, identical to the prior format.
|
||
if (minMs <= 0 && maxMs <= 0) return med.ToString("F2", inv);
|
||
if (minMs >= medianMs && maxMs <= medianMs) return med.ToString("F2", inv);
|
||
|
||
var min = ToPerOpMicros(minMs, iterations);
|
||
var max = ToPerOpMicros(maxMs, iterations);
|
||
var range = $"{med.ToString("F2", inv)} ({min.ToString("F2", inv)}..{max.ToString("F2", inv)})";
|
||
|
||
// CV (coefficient of variation = stddev / mean) — flag rows above the unstable threshold so a
|
||
// small inter-engine delta on a high-CV row is easy to discount as noise.
|
||
if (medianMs > 0 && stdDevMs > 0)
|
||
{
|
||
var cv = stdDevMs / medianMs;
|
||
if (cv > Configuration.UnstableCVThreshold)
|
||
{
|
||
var cvPct = (cv * 100).ToString("F1", inv);
|
||
return $"{range} ⚠️{cvPct}%";
|
||
}
|
||
}
|
||
|
||
return range;
|
||
}
|
||
|
||
/// <summary>
|
||
/// Converts a byte count to KB (1 KB = 1024 B). Display-only helper so allocation columns can
|
||
/// render compact F2 KB values (e.g. <c>4.05 KB</c> instead of <c>4,144 B</c>) — header carries
|
||
/// the unit so per-row entries stay numbers-only. CSV / raw-data outputs keep the precise byte
|
||
/// integers untouched.
|
||
/// </summary>
|
||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||
private static double ToKilobytes(long bytes) => bytes / 1024.0;
|
||
|
||
public static void Main(string[] args)
|
||
{
|
||
// Set console encoding to UTF-8 for proper Unicode character display
|
||
System.Console.OutputEncoding = Encoding.UTF8;
|
||
|
||
// Setup validation — abort BEFORE any benchmark logic if MemoryPack baseline is invalid.
|
||
// Done early so user is told immediately, not after warmup.
|
||
ValidateMemoryPackSetup();
|
||
|
||
// CLI mode (args provided): run once, parse args, exit. Backward-compatible behaviour.
|
||
if (args.Length > 0)
|
||
{
|
||
if (!TryParseCliArgs(args, out var layer, out var opMode, out var serializerMode))
|
||
return; // invalid args
|
||
|
||
RunBenchmark(layer, opMode, serializerMode);
|
||
return;
|
||
}
|
||
|
||
// Interactive mode (no args): loop the menu so the user doesn't have to restart between runs.
|
||
// Q exits the menu (and the application).
|
||
while (true)
|
||
{
|
||
var selection = ShowInteractiveMenu();
|
||
if (selection == null) return; // user pressed Q
|
||
|
||
RunBenchmark(selection.Value.layer, "all", selection.Value.serializerMode);
|
||
|
||
System.Console.WriteLine();
|
||
System.Console.WriteLine("─────────────────────────────────────────────────────────────────────");
|
||
System.Console.WriteLine("Returning to menu — press any key to continue, or Q to quit...");
|
||
var key = System.Console.ReadKey(intercept: true);
|
||
if (key.Key == ConsoleKey.Q) return;
|
||
System.Console.WriteLine();
|
||
}
|
||
}
|
||
|
||
/// <summary>
|
||
/// Parses CLI arguments into (layer, opMode, serializerMode). Returns <c>false</c> if the args
|
||
/// are invalid; the caller should then exit without running the standard benchmark.
|
||
/// </summary>
|
||
private static bool TryParseCliArgs(string[] args, out string layer, out string opMode, out string serializerMode)
|
||
{
|
||
layer = "all";
|
||
opMode = "all";
|
||
serializerMode = "standard";
|
||
|
||
var arg = args[0].ToLower();
|
||
|
||
// Quick mode: short warmup, few iterations, small sample count
|
||
if (arg == "quick")
|
||
{
|
||
Configuration.WarmupIterations = 5;
|
||
Configuration.TestIterations = 100;
|
||
Configuration.BenchmarkSamples = 3;
|
||
layer = "all";
|
||
}
|
||
else if (arg is "core" or "comprehensive" or "edge" or "all"
|
||
or "small" or "medium" or "large" or "repeated" or "deep")
|
||
{
|
||
layer = arg;
|
||
}
|
||
else if (arg is "asyncpipe" or "pipe")
|
||
{
|
||
// AsyncPipe-only mode: streaming I/O isolation across all test data.
|
||
layer = "all";
|
||
serializerMode = "asyncpipe";
|
||
}
|
||
else if (arg is "ser" or "serialize")
|
||
{
|
||
opMode = "serialize";
|
||
layer = "all";
|
||
}
|
||
else if (arg is "des" or "deserialize")
|
||
{
|
||
opMode = "deserialize";
|
||
layer = "all";
|
||
}
|
||
else
|
||
{
|
||
// Backwards compat: unknown arg → treat as layer keyword
|
||
layer = arg;
|
||
}
|
||
|
||
return true;
|
||
}
|
||
|
||
/// <summary>
|
||
/// Runs the benchmark suite end-to-end for the given configuration: pre-warmup → per-cell warmup
|
||
/// + measurement → grouped results print → save to disk. Used by both the CLI and interactive
|
||
/// menu paths; the interactive loop calls this repeatedly without restarting the process.
|
||
/// </summary>
|
||
private static void RunBenchmark(string layer, string opMode, string serializerMode)
|
||
{
|
||
System.Console.WriteLine("╔══════════════════════════════════════════════════════════════════════╗");
|
||
System.Console.WriteLine("║ COMPREHENSIVE SERIALIZER BENCHMARK SUITE ║");
|
||
System.Console.WriteLine("╚══════════════════════════════════════════════════════════════════════╝");
|
||
|
||
// Stabilization: pin the entire benchmark process to a single logical CPU and bump priority
|
||
// class. Single-core affinity stops Windows from migrating the bench thread between cores
|
||
// mid-sample (a migration evicts L1/L2 caches and corrupts a measurement); High priority
|
||
// reduces preemption by background tasks (Defender scans, indexer, etc.) that otherwise
|
||
// randomly inflate samples by 5-15%.
|
||
// Try/finally guarantees the original state is restored even if a benchmark throws — leaving
|
||
// a developer machine pinned to one core after a crashed run is a real foot-gun.
|
||
// Skipped on Debug single-sample mode (Configuration.BenchmarkSamples <= 1) where stabilization is moot.
|
||
var process = Process.GetCurrentProcess();
|
||
var origAffinity = (IntPtr)0;
|
||
var origPriority = ProcessPriorityClass.Normal;
|
||
var stabilizationApplied = false;
|
||
|
||
// ProcessorAffinity is only supported on Windows + Linux (CA1416). macOS would throw at
|
||
// runtime; skip the affinity step there but still raise priority class (which IS supported
|
||
// on macOS, just less effective for stabilization than affinity pinning).
|
||
if (Configuration.BenchmarkSamples > 1 && (OperatingSystem.IsWindows() || OperatingSystem.IsLinux()))
|
||
{
|
||
try
|
||
{
|
||
origAffinity = process.ProcessorAffinity;
|
||
origPriority = process.PriorityClass;
|
||
// Pin to CPU 0 (mask = 1). Choosing CPU 0 is arbitrary; what matters is "exactly one
|
||
// core, consistently" — not which one. If CPU 0 is heavily contended on the host
|
||
// (e.g. dedicated to system-wide IRQs on some Windows configs), the user can tweak
|
||
// the mask here. The benchmark is single-threaded for the in-memory rows so single
|
||
// core is sufficient; round-trip-only NamedPipe rows have a server-drain thread
|
||
// that will share the core (acceptable — the bench measures end-to-end RT anyway).
|
||
process.ProcessorAffinity = (IntPtr)1;
|
||
process.PriorityClass = ProcessPriorityClass.High;
|
||
stabilizationApplied = true;
|
||
System.Console.WriteLine($"Stabilization: pinned to CPU 0 (affinity=0x1), priority=High.");
|
||
}
|
||
catch (Exception ex)
|
||
{
|
||
// Affinity/priority changes may fail on locked-down hosts (group policies, containers
|
||
// without CAP_SYS_NICE on Linux, etc.). Surface and continue — the benchmark still
|
||
// works, just with the platform default scheduling.
|
||
System.Console.WriteLine($"Stabilization SKIPPED: {ex.GetType().Name}: {ex.Message}");
|
||
}
|
||
}
|
||
|
||
try
|
||
{
|
||
var allResults = new List<BenchmarkResult>();
|
||
var allTestDataSets = BenchmarkTestDataProvider.CreateTestDataSets();
|
||
var testDataSets = FilterByLayer(allTestDataSets, layer);
|
||
|
||
System.Console.WriteLine($"Layer: {layer} | OpMode: {opMode} | SerializerMode: {serializerMode} | Charset: {GetCurrentCharsetName()} | Iterations: per-cell adaptive (~{Configuration.TargetSampleMs} ms target) | Warmup: {Configuration.WarmupIterations} per phase (Ser/Des isolated) | Samples: {Configuration.BenchmarkSamples} (median) + pilot discard");
|
||
System.Console.WriteLine($"Build: {Configuration.BuildConfiguration} | .NET: {Environment.Version} | Test Type: {testDataSets.FirstOrDefault()?.TypeName ?? "unknown"} | Test Cells: {testDataSets.Count}/{allTestDataSets.Count}");
|
||
System.Console.WriteLine();
|
||
|
||
// Global JIT pre-warmup — touches every (testdata × serializer) code path BEFORE any timing happens.
|
||
// Without this, the FIRST test data measured carries JIT-tier-promotion latency: the per-cell warmup
|
||
// alone doesn't ensure that every Serialize<T>/IBufferWriter overload is fully Tier 1 by the time we
|
||
// start measuring. Symptom: first cell's BufferWriter variants run ~2x slower than the SAME variants
|
||
// on later cells (e.g. Small BufWr reuse 9ms vs Medium BufWr reuse 4ms — even though Medium is bigger).
|
||
// Pre-warmup runs every overload at least once with each data shape so .NET 9's tiered JIT promotes
|
||
// them all in the background; the per-cell warmup that follows then locks in cache + branch state.
|
||
if (Configuration.BenchmarkSamples > 1) // skip in DEBUG (single-sample fast iteration)
|
||
{
|
||
System.Console.WriteLine($"Global JIT pre-warmup ({testDataSets.Count} cells × all serializers, light pass)...");
|
||
|
||
foreach (var testData in testDataSets)
|
||
{
|
||
var preSerializers = CreateSerializers(testData, serializerMode);
|
||
try
|
||
{
|
||
foreach (var s in preSerializers)
|
||
{
|
||
// Light warmup just to trigger Tier 0 → Tier 1 promotion. The per-cell 5000-iter warmup
|
||
// inside RunBenchmarksForTestData still runs afterwards for cache/BTB warming.
|
||
s.Warmup(2000);
|
||
}
|
||
}
|
||
finally
|
||
{
|
||
// Dispose any IDisposable serializers (NamedPipe / FileStream variants own OS resources).
|
||
foreach (var s in preSerializers) (s as IDisposable)?.Dispose();
|
||
}
|
||
}
|
||
|
||
// Let background tiered-JIT compilation drain before we begin measuring.
|
||
if (Configuration.JitSleep > 0) Thread.Sleep(Configuration.JitSleep);
|
||
System.Console.WriteLine("✓ Global pre-warmup complete.\n");
|
||
}
|
||
|
||
foreach (var testData in testDataSets)
|
||
{
|
||
System.Console.WriteLine($"\n{'═'.ToString().PadRight(70, '═')}");
|
||
System.Console.WriteLine($"TEST DATA: {testData.DisplayName}");
|
||
System.Console.WriteLine($"{'═'.ToString().PadRight(70, '═')}");
|
||
|
||
var results = RunBenchmarksForTestData(testData, opMode, serializerMode);
|
||
allResults.AddRange(results);
|
||
}
|
||
|
||
// Print grouped results
|
||
PrintGroupedResults(allResults, testDataSets);
|
||
|
||
// Save results to file
|
||
SaveResults(allResults, testDataSets);
|
||
|
||
System.Console.WriteLine("\n✓ Benchmark complete!");
|
||
}
|
||
finally
|
||
{
|
||
// Restore process state — affinity/priority changes are process-wide and persist across
|
||
// interactive-mode iterations of the menu. Without restore, the second menu run would
|
||
// already be on CPU-0 + High priority before its own try-block applied them, masking
|
||
// any stabilization-disabled comparison.
|
||
if (stabilizationApplied && (OperatingSystem.IsWindows() || OperatingSystem.IsLinux()))
|
||
{
|
||
try { process.ProcessorAffinity = origAffinity; } catch { /* best-effort */ }
|
||
try { process.PriorityClass = origPriority; } catch { /* best-effort */ }
|
||
}
|
||
}
|
||
}
|
||
|
||
#region Benchmark Execution
|
||
|
||
/// <summary>
|
||
/// Forces a full GC cycle at a phase boundary in the benchmark loop. Two-pass collect with finalizer drain
|
||
/// in between: the first pass moves managed garbage to the finalization queue, <c>WaitForPendingFinalizers</c>
|
||
/// runs the finalizers, the second pass reclaims any objects the finalizers released. After this returns the
|
||
/// heap is in a known-quiescent state — the next warmup/measurement phase starts on a clean slate, isolated
|
||
/// from the previous phase's residual allocations (write-buffer pools, intern cache, write-plan arrays, etc.).
|
||
/// Called between every Ser-phase / Des-phase boundary in <see cref="RunBenchmarksForTestData"/>.
|
||
/// </summary>
|
||
[MethodImpl(MethodImplOptions.NoInlining)]
|
||
private static void ForceGcCollect()
|
||
{
|
||
GC.Collect(2, GCCollectionMode.Forced, blocking: true);
|
||
GC.WaitForPendingFinalizers();
|
||
GC.Collect(2, GCCollectionMode.Forced, blocking: true);
|
||
}
|
||
|
||
private static List<BenchmarkResult> RunBenchmarksForTestData(TestDataSet testData, string mode, string serializerMode)
|
||
{
|
||
var results = new List<BenchmarkResult>();
|
||
var serializers = CreateSerializers(testData, serializerMode);
|
||
|
||
// Round-trip correctness check — once per (cell × serializer), BEFORE warmup. Aborts the entire benchmark on failure.
|
||
System.Console.WriteLine("Verifying round-trip correctness...");
|
||
|
||
foreach (var serializer in serializers)
|
||
{
|
||
if (!serializer.VerifyRoundTrip())
|
||
{
|
||
System.Console.Error.WriteLine($"❌ FATAL: Round-trip verification FAILED for {serializer.Name} on {testData.DisplayName}");
|
||
System.Console.Error.WriteLine("Benchmark numbers from a serializer with broken round-trip would be meaningless. Aborting.");
|
||
|
||
Environment.Exit(1);
|
||
}
|
||
}
|
||
|
||
System.Console.WriteLine("✓ All serializers passed round-trip verification.");
|
||
|
||
// Per-serializer, PER-PHASE (warmup → calibrate → measurement) cycle: each serializer's Ser-path and
|
||
// Des-path get COMPLETELY ISOLATED warmup→measure rounds, with a GC.Collect at every phase boundary.
|
||
//
|
||
// Why phase-isolation: a combined warmup (Ser+Des interleaved) leaves the CPU I-cache + branch-predictor
|
||
// in a "compromise state" — neither Ser nor Des code-set dominates. The first phase to measure pays a
|
||
// cache-miss penalty as its code-set displaces the leftover-warmup-state. Isolated warmup→measure pairs
|
||
// keep the I-cache HOT for ONLY the measured path, both in the warmup (priming) and the measurement
|
||
// (steady-state). Branch-predictor history also stays clean per path.
|
||
//
|
||
// GC.Collect at every boundary: removes residual allocation pressure from the previous phase (write-buffer
|
||
// pool churn from Ser, deserialized object graph from Des) so the next phase starts with a quiescent
|
||
// heap — GC tier-promotion timing during measurement is then driven only by THAT phase's allocations.
|
||
//
|
||
// Configuration.JitSleep per-phase: tiered JIT background promotion drain after each warmup (mode-aware: 0 ms in AOT).
|
||
// Each phase's freshly-promoted methods settle before its timing starts.
|
||
System.Console.WriteLine($"Running benchmarks (target ~{Configuration.TargetSampleMs} ms/sample × {Configuration.BenchmarkSamples} samples median, phase-isolated warmup/measure per Ser/Des)...\n");
|
||
|
||
foreach (var serializer in serializers)
|
||
{
|
||
var result = new BenchmarkResult
|
||
{
|
||
TestDataName = testData.DisplayName, // Use DisplayName for IId% info
|
||
Engine = serializer.Engine,
|
||
IoMode = serializer.IoMode,
|
||
DispatchMode = serializer.DispatchMode,
|
||
OptionsPreset = serializer.OptionsPreset,
|
||
OptionsDescription = serializer.OptionsDescription,
|
||
SerializedSize = serializer.SerializedSize,
|
||
SetupSerializeAllocBytes = serializer.SetupSerializeAllocBytes,
|
||
SetupDeserializeAllocBytes = serializer.SetupDeserializeAllocBytes,
|
||
IsRoundTripOnly = serializer.IsRoundTripOnly
|
||
};
|
||
|
||
// Group label for in-place \r progress. Identifies (cell × serializer) so a stuck benchmark
|
||
// is visibly stuck on a specific row at a specific %% rather than silently hanging.
|
||
var groupLabel = $"{result.SerializerName}";
|
||
|
||
if (serializer.IsRoundTripOnly)
|
||
{
|
||
// Round-trip-only benchmarks (NamedPipe etc.): single phase — Serialize() performs the full RT,
|
||
// Deserialize() is a no-op. We use the Ser-phase entry-points (WarmupSerialize) to warm the
|
||
// entire round-trip path, then record into the RT result columns.
|
||
if (mode is "all" or "serialize" or "ser")
|
||
{
|
||
ForceGcCollect();
|
||
serializer.WarmupSerialize(Configuration.WarmupIterations);
|
||
if (Configuration.JitSleep > 0) Thread.Sleep(Configuration.JitSleep);
|
||
|
||
var rtIter = CalibrateIterations(() => serializer.Serialize(), Configuration.TargetSampleMs);
|
||
var (rtMed, rtMin, rtMax, rtStd) = RunTimed(() => serializer.Serialize(), rtIter, $"{groupLabel} [RT timing]");
|
||
result.RoundTripTimeMs = rtMed;
|
||
result.RoundTripTimeMinMs = rtMin;
|
||
result.RoundTripTimeMaxMs = rtMax;
|
||
result.RoundTripTimeStdDevMs = rtStd;
|
||
result.RoundTripIterations = rtIter;
|
||
// Process-wide allocation measurement: server-drain-thread allocations (server-side new byte[len])
|
||
// also show up — otherwise current-thread alloc would only count the client side and look ~halved.
|
||
result.RoundTripAllocBytesPerOp = MeasureAllocationTotal(() => serializer.Serialize(), rtIter, $"{groupLabel} [RT alloc]");
|
||
}
|
||
// mode == "deserialize" alone is meaningless for a round-trip-only benchmark; skip silently.
|
||
}
|
||
else
|
||
{
|
||
// ── Ser phase ── isolated warmup → Configuration.JitSleep → calibrate → time → alloc; preceded by GC.Collect.
|
||
if (mode is "all" or "serialize" or "ser")
|
||
{
|
||
ForceGcCollect();
|
||
serializer.WarmupSerialize(Configuration.WarmupIterations);
|
||
if (Configuration.JitSleep > 0) Thread.Sleep(Configuration.JitSleep);
|
||
|
||
var serIter = CalibrateIterations(() => serializer.Serialize(), Configuration.TargetSampleMs);
|
||
var (serMed, serMin, serMax, serStd) = RunTimed(() => serializer.Serialize(), serIter, $"{groupLabel} [Ser timing]");
|
||
result.SerializeTimeMs = serMed;
|
||
result.SerializeTimeMinMs = serMin;
|
||
result.SerializeTimeMaxMs = serMax;
|
||
result.SerializeTimeStdDevMs = serStd;
|
||
result.SerializeIterations = serIter;
|
||
// Dedicated alloc-only sample (separate from timing samples; keeps timing pure)
|
||
result.SerializeAllocBytesPerOp = MeasureAllocation(() => serializer.Serialize(), serIter, $"{groupLabel} [Ser alloc]");
|
||
}
|
||
|
||
// ── Des phase ── isolated warmup → Configuration.JitSleep → calibrate → time → alloc; preceded by GC.Collect.
|
||
// The GC.Collect here is critical: it discards the Ser-phase's write-buffer pool churn so the
|
||
// Des-phase's allocation measurement reflects ONLY Des-side allocations (deserialized object graph).
|
||
if (mode is "all" or "deserialize" or "des")
|
||
{
|
||
ForceGcCollect();
|
||
serializer.WarmupDeserialize(Configuration.WarmupIterations);
|
||
if (Configuration.JitSleep > 0) Thread.Sleep(Configuration.JitSleep);
|
||
|
||
var desIter = CalibrateIterations(() => serializer.Deserialize(), Configuration.TargetSampleMs);
|
||
var (desMed, desMin, desMax, desStd) = RunTimed(() => serializer.Deserialize(), desIter, $"{groupLabel} [Des timing]");
|
||
result.DeserializeTimeMs = desMed;
|
||
result.DeserializeTimeMinMs = desMin;
|
||
result.DeserializeTimeMaxMs = desMax;
|
||
result.DeserializeTimeStdDevMs = desStd;
|
||
result.DeserializeIterations = desIter;
|
||
result.DeserializeAllocBytesPerOp = MeasureAllocation(() => serializer.Deserialize(), desIter, $"{groupLabel} [Des alloc]");
|
||
}
|
||
|
||
// Compose RT from Ser+Des. Because Ser and Des may have DIFFERENT iter counts post-calibration,
|
||
// batch-time addition would be misleading. Instead: compute per-op µs (iter-independent),
|
||
// then synthesize RoundTripTimeMs against RoundTripIterations = max(serIter, desIter) so that
|
||
// RoundTripTimeMs / RoundTripIterations * 1000 == SerPerOp + DesPerOp.
|
||
var serPerOp = ToPerOpMicros(result.SerializeTimeMs, result.SerializeIterations);
|
||
var desPerOp = ToPerOpMicros(result.DeserializeTimeMs, result.DeserializeIterations);
|
||
var rtPerOp = serPerOp + desPerOp;
|
||
result.RoundTripIterations = Math.Max(result.SerializeIterations, result.DeserializeIterations);
|
||
result.RoundTripTimeMs = rtPerOp / 1000.0 * result.RoundTripIterations;
|
||
result.RoundTripAllocBytesPerOp = result.SerializeAllocBytesPerOp + result.DeserializeAllocBytesPerOp;
|
||
}
|
||
|
||
results.Add(result);
|
||
PrintResult(result);
|
||
}
|
||
|
||
// Dispose any IDisposable serializers (NamedPipe / FileStream variants own OS resources that must be released
|
||
// before the next test data builds new ones — otherwise pipes / handles leak across test cells).
|
||
foreach (var s in serializers) (s as IDisposable)?.Dispose();
|
||
|
||
return results;
|
||
}
|
||
|
||
private static List<ISerializerBenchmark> CreateSerializers(TestDataSet testData, string serializerMode)
|
||
{
|
||
// FastestByte mode — focused 1:1 comparison on the "fastest Byte[]" path.
|
||
// TWO benchmarks: AcBinary FastMode Byte[] (Compact UTF-8) + MemoryPack Byte[].
|
||
// - Compact: smallest wire, UTF-8 encode/decode CPU cost vs MemPack head-to-head.
|
||
// Tight optimization-iteration loop: ~30-45 sec vs full 2-3 min.
|
||
//
|
||
// FastWire row (UTF-16 raw memcpy) commented out for the current optimization sprint —
|
||
// we are tuning Compact mode against MemPack directly; FastWire was used as a noise-floor
|
||
// reference earlier. Re-enable when revisiting Fast wire-mode performance.
|
||
if (serializerMode == "fastestbyte")
|
||
{
|
||
var fastestByteOptions = AcBinarySerializerOptions.FastMode;
|
||
fastestByteOptions.WireMode = Configuration.SelectedWireMode;
|
||
|
||
return new List<ISerializerBenchmark>
|
||
{
|
||
new AcBinaryBenchmark(testData.Order, fastestByteOptions, "FastMode"),
|
||
//new AcBinaryBenchmark(testData.Order, fastWireOptions, "FastMode (FastWire)"),
|
||
new MemoryPackBenchmark(testData.Order, "Default"),
|
||
};
|
||
}
|
||
|
||
// AsyncPipe-only mode — return ONLY the AsyncPipe streaming benchmark (no other serializer).
|
||
// Streaming I/O has long-lived pipe setup + kernel-buffer overhead that, when interleaved with
|
||
// the standard byte-array / IBufferWriter measurements, masks the steady-state numbers. Run it
|
||
// in isolation so the timing numbers reflect ONLY the streaming path.
|
||
if (serializerMode == "asyncpipe")
|
||
{
|
||
// NamedPipe — pipe-aligned chunk size for the long-lived IPC scenario. The chunkSize here
|
||
// drives the AsyncPipeWriterOutput's chunk-on-wire size (header + data, page-aligned thanks to
|
||
// the AcquireChunk fix) AND the kernel pipe buffer size (inBufferSize/outBufferSize on the
|
||
// NamedPipeServerStream ctor). Same value across both layers = one WriteFile(chunkSize) syscall
|
||
// fits blocking-free in one kernel pipe-buffer slot. Single source of truth for both app-level
|
||
// wire chunk AND kernel transfer unit; change ONLY this line when tuning.
|
||
var binaryFastModePipeChunkOnly = AcBinarySerializerOptions.FastMode;
|
||
binaryFastModePipeChunkOnly.BufferWriterChunkSize = Configuration.PipeChunkSize;
|
||
binaryFastModePipeChunkOnly.WireMode = Configuration.SelectedWireMode;
|
||
|
||
return new List<ISerializerBenchmark>
|
||
{
|
||
// Chunked-framed AsyncPipe: SerializeChunkedFramed + AsyncPipeReaderInput.DrainFromAsync.
|
||
// Measures the FULL streaming-I/O stack — wire framing + drain task + sliding-window buffer +
|
||
// MRES wait-on-byte-shortage — over a kernel NamedPipe.
|
||
new AcBinaryNamedPipeBenchmark(testData.Order, binaryFastModePipeChunkOnly, "FastMode (PipeChunk)"),
|
||
// Raw byte[] over NamedPipe (sync receive, no chunk-framing). Same kernel-pipe transport,
|
||
// same inBufferSize, but: serialize → byte[] → Stream.Write → Stream.Read → Deserialize<T>(byte[]).
|
||
// No drain task, no AsyncPipeReaderInput, no [201][UINT16][data]…[202] framing. Side-by-side with
|
||
// the chunked-row above this isolates AsyncPipe-framework-overhead (Δ vs raw) from
|
||
// kernel-transport-overhead (raw vs in-process Byte[]).
|
||
new AcBinaryNamedPipeRawByteArrayBenchmark(testData.Order, binaryFastModePipeChunkOnly, "FastMode (PipeRaw)"),
|
||
// Chunked-framed AsyncPipe over an IN-MEMORY System.IO.Pipelines.Pipe (NO NamedPipe, NO kernel).
|
||
// Same chunked-streaming code path (SerializeChunkedFramed → AsyncPipeReaderInput) but with the
|
||
// kernel-pipe replaced by a managed-only Pipe. Eliminates per-chunk syscall overhead (~30 µs/chunk
|
||
// on NamedPipe → ~1-2 µs/chunk on in-memory Pipe). Side-by-side with the NamedPipe row above this
|
||
// isolates pure CPU cost of the chunked-streaming framework (vs kernel-pipe transport cost) — the
|
||
// in-memory Pipe row should be much closer to the raw-byte[] row, validating that NamedPipe loopback
|
||
// is the worst-case benchmark scenario for chunked-streaming and not representative of real network
|
||
// / file / cross-thread Pipe scenarios.
|
||
new AcBinaryInMemoryPipeBenchmark(testData.Order, binaryFastModePipeChunkOnly, "FastMode (PipeChunk)"),
|
||
// Raw byte[] over IN-MEMORY direct cross-thread handoff (no transport at all). Apples-to-apples
|
||
// baseline for the in-memory chunked row above: same in-memory transport (zero kernel), but raw
|
||
// byte[] vs chunked-streaming wire format. Completes the 2x2 matrix [chunked,raw] × [kernel,memory].
|
||
new AcBinaryInMemoryRawByteArrayBenchmark(testData.Order, binaryFastModePipeChunkOnly, "FastMode (PipeRaw)"),
|
||
};
|
||
}
|
||
|
||
// Standard mode — all serializers EXCEPT AsyncPipe (the streaming benchmark is opt-in via the
|
||
// AsyncPipe menu / CLI mode, never bundled with the steady-state suite).
|
||
|
||
var binaryNoInternOption = AcBinarySerializerOptions.Default;
|
||
binaryNoInternOption.UseStringInterning = StringInterningMode.None;
|
||
binaryNoInternOption.WireMode = Configuration.SelectedWireMode;
|
||
|
||
var binaryDefaultNoSgenOption = AcBinarySerializerOptions.Default;
|
||
binaryDefaultNoSgenOption.UseGeneratedCode = false;
|
||
binaryDefaultNoSgenOption.WireMode = Configuration.SelectedWireMode;
|
||
|
||
var binaryFastModeNoSgenOption = AcBinarySerializerOptions.FastMode;
|
||
binaryFastModeNoSgenOption.UseGeneratedCode = false;
|
||
binaryFastModeNoSgenOption.WireMode = Configuration.SelectedWireMode;
|
||
|
||
var binaryFastModeOption = AcBinarySerializerOptions.FastMode;
|
||
binaryFastModeOption.WireMode = Configuration.SelectedWireMode;
|
||
|
||
// BufWr new — 4 KB chunk size for the FRESH ArrayBufferWriter scenario. The chunkSize here drives
|
||
// the serializer's GetSpan(N) request → the ArrayBufferWriter's internal allocation per call.
|
||
// Small chunk = small per-call allocation, optimum for one-shot serialization where each iteration
|
||
// allocates a fresh ABW. Independent of the AsyncPipe profile (different mechanism: alloc overhead
|
||
// vs syscall count).
|
||
var binaryFastModeBufWrChunk = AcBinarySerializerOptions.FastMode;
|
||
binaryFastModeBufWrChunk.BufferWriterChunkSize = Configuration.PipeChunkSize;
|
||
binaryFastModeBufWrChunk.WireMode = Configuration.SelectedWireMode;
|
||
|
||
// In-memory Pipe variant — same 4 KB chunkSize as the AsyncPipe mode, no kernel-pipe alignment
|
||
// concern (managed slabs are not page-aligned anyway). Drives SerializeChunkedFramed via the in-memory
|
||
// System.IO.Pipelines.Pipe (zero-copy slab handoff between producer and drain task).
|
||
var binaryFastModePipeChunkInMem = AcBinarySerializerOptions.FastMode;
|
||
binaryFastModePipeChunkInMem.BufferWriterChunkSize = Configuration.PipeChunkSize;
|
||
binaryFastModePipeChunkInMem.WireMode = Configuration.SelectedWireMode;
|
||
|
||
var defaultOptions = AcBinarySerializerOptions.Default;
|
||
defaultOptions.UseStringInterning = StringInterningMode.None;
|
||
defaultOptions.ReferenceHandling = ReferenceHandlingMode.OnlyId;
|
||
defaultOptions.WireMode = Configuration.SelectedWireMode;
|
||
|
||
return new List<ISerializerBenchmark>
|
||
{
|
||
// ============================================================
|
||
// AcBinary — Byte[] API (uncomment to compare option presets side-by-side)
|
||
// ============================================================
|
||
// Fastest Byte[] — SGen path (UseGeneratedCode=true, default).
|
||
new AcBinaryBenchmark(testData.Order, binaryFastModeOption, "FastMode"),
|
||
// Fastest Byte[] — Runtime path (UseGeneratedCode=false). Same wire/options, no source-generated dispatch.
|
||
// Always paired with the SGen variant so every layer can compare the SGen speed-up apples-to-apples.
|
||
// NativeAOT-safe: AcSerializerCommon.Create*Getter/Setter falls back to reflection-based delegates
|
||
// when RuntimeFeature.IsDynamicCodeSupported is false (slower but works under AOT publish).
|
||
new AcBinaryBenchmark(testData.Order, binaryFastModeNoSgenOption, "FastMode"),
|
||
// Default preset Byte[] — RefHandling=OnlyId (deduplicates IId-shared references on the wire) +
|
||
// UseStringInterning=All (deduplicates repeated strings). Showcases the Default preset's wire-size
|
||
// and CPU trade-off vs FastMode on the ~20% IId-ref / repeated-string test data.
|
||
|
||
new AcBinaryBenchmark(testData.Order, defaultOptions, "Default"),
|
||
//new AcBinaryBenchmark(testData.Order, binaryDefaultNoSgenOption, "Default"),
|
||
//new AcBinaryBenchmark(testData.Order, AcBinarySerializerOptions.WithoutReferenceHandling, "NoRef"),
|
||
//new AcBinaryBenchmark(testData.Order, binaryNoInternOption, "NoIntern"),
|
||
|
||
// AcBinary via IBufferWriter (reused ArrayBufferWriter — long-running service / batch scenario)
|
||
new AcBinaryBufferWriterBenchmark(testData.Order, binaryFastModeOption, "FastMode"),
|
||
|
||
// AcBinary via IBufferWriter (FRESH ArrayBufferWriter per call — one-shot scenario).
|
||
// 4 KB chunk size from binaryFastModeBufWrChunk — minimises the per-call ArrayBufferWriter
|
||
// allocation. Optimum for this scenario.
|
||
new AcBinaryFreshBufferWriterBenchmark(testData.Order, binaryFastModeBufWrChunk, "FastMode (4KB)"),
|
||
|
||
// AcBinary chunked-streaming over an IN-MEMORY Pipe (no kernel transport). Side-by-side with the
|
||
// Byte[] / IBufferWriter rows above this shows the chunked-streaming framework's pure CPU cost
|
||
// (no NamedPipe loopback noise) vs the simpler in-process serialize-then-deserialize patterns.
|
||
// The IO column shows "Pipe(in-mem)" — distinct from the NamedPipe AsyncPipe rows in [P] mode.
|
||
new AcBinaryInMemoryPipeBenchmark(testData.Order, binaryFastModePipeChunkInMem, "FastMode (PipeChunk)"),
|
||
|
||
// Raw byte[] over IN-MEMORY direct cross-thread handoff (no transport, no kernel, no Pipe). Apples-to-
|
||
// apples baseline for the in-memory chunked row above: same in-memory pattern, but raw byte[] vs
|
||
// chunked-streaming wire format. The IO column shows "Bytes(in-mem)".
|
||
new AcBinaryInMemoryRawByteArrayBenchmark(testData.Order, binaryFastModePipeChunkInMem, "FastMode (PipeRaw)"),
|
||
|
||
// AsyncPipe streaming over kernel NamedPipe (AcBinaryNamedPipeBenchmark) is intentionally OMITTED
|
||
// here — run it via the dedicated AsyncPipe menu [P] / CLI mode for isolated kernel-transport
|
||
// measurements.
|
||
|
||
// ============================================================
|
||
// MemoryPack — three I/O modes for apples-to-apples comparison
|
||
// ============================================================
|
||
new MemoryPackBenchmark(testData.Order, "Default"),
|
||
new MemoryPackBufferWriterBenchmark(testData.Order, "Default"),
|
||
new MemoryPackFreshBufferWriterBenchmark(testData.Order, "Default"),
|
||
|
||
// ============================================================
|
||
// MessagePack — for legacy comparison
|
||
// ============================================================
|
||
#if !AYCODE_NATIVEAOT
|
||
// MessagePack v3's DynamicGenericResolver uses Activator.CreateInstance on trimmed
|
||
// ListFormatter<T> et al. — fails under NativeAOT publish with "No parameterless constructor".
|
||
// Excluded from the AOT build; available for regular JIT runs only.
|
||
new MessagePackBenchmark(testData.Order, "ContractBased"),
|
||
#endif
|
||
|
||
// System.Text.Json (commented — JSON serializer for reference; not in active suite)
|
||
//new SystemTextJsonBenchmark(testData.Order, "Default")
|
||
};
|
||
}
|
||
|
||
/// <summary>
|
||
/// Runs the action <paramref name="iterations"/> times for <see cref="Configuration.BenchmarkSamples"/> independent samples,
|
||
/// returning the median, min, and max elapsed time. Multi-sample design reduces single-run variance
|
||
/// from ~±15% to ~±5% by smoothing transient effects (background activity, thermal/turbo state).
|
||
/// When <see cref="Configuration.BenchmarkSamples"/> <= 1, falls back to single-sample timing (Debug / quick mode).
|
||
/// When <paramref name="progressLabel"/> is non-null, emits in-place <c>\r</c> progress updates so a
|
||
/// stuck benchmark (e.g. deadlocked NamedPipe row) is visibly stuck at a specific %% rather than
|
||
/// silently hanging.
|
||
///
|
||
/// Stabilization (added 2026-05-07):
|
||
/// 1) Pilot sample is run BEFORE the recorded loop and discarded. The first measurement after
|
||
/// warmup tends to absorb residual JIT bookkeeping and GC bookkeeping; dropping it tightens
|
||
/// the min/max range without throwing away signal (the median is the SAME data as before).
|
||
/// 2) GC.Collect / WaitForPendingFinalizers / GC.Collect runs BEFORE every recorded sample.
|
||
/// Without this, GC pressure from sample N occasionally triggered a Gen-2 pause inside
|
||
/// sample N+1, painting it as an outlier; collecting up-front gives every sample the
|
||
/// same starting heap shape.
|
||
/// 3) Returns (median, min, max) so the caller can surface the inter-sample range — visible
|
||
/// noise floor for the row, replacing the previous "median only" view.
|
||
/// </summary>
|
||
private static (double medianMs, double minMs, double maxMs, double stdDevMs) RunTimed(Action action, int iterations, string? progressLabel = null)
|
||
{
|
||
var samples = Configuration.BenchmarkSamples;
|
||
if (samples <= 1)
|
||
{
|
||
// Single-sample fast path (Debug or trivial run) — no allocation, no sort, no stddev.
|
||
var sw = Stopwatch.StartNew();
|
||
RunWithProgress(action, iterations, progressLabel, samples: 1, sampleIndex: 0);
|
||
sw.Stop();
|
||
var ms = sw.Elapsed.TotalMilliseconds;
|
||
EndProgress(progressLabel, ms);
|
||
return (ms, ms, ms, 0);
|
||
}
|
||
|
||
// Pilot sample (discarded). Counts as sample index 0 of (samples + 1) for progress display
|
||
// so the user sees an extra "warmup-ish" tick before the recorded samples start.
|
||
GC.Collect();
|
||
GC.WaitForPendingFinalizers();
|
||
GC.Collect();
|
||
|
||
var pilotSw = Stopwatch.StartNew();
|
||
RunWithProgress(action, iterations, progressLabel, samples + 1, sampleIndex: 0);
|
||
pilotSw.Stop();
|
||
// intentionally not stored
|
||
|
||
var times = new double[samples];
|
||
for (var s = 0; s < samples; s++)
|
||
{
|
||
// Per-sample GC settle. Forces every sample to start from the same heap state, so
|
||
// a Gen-2 pause caused by the previous sample doesn't bleed into the next sample's
|
||
// timing. Cost is paid OUTSIDE the Stopwatch window — no impact on the measurement.
|
||
GC.Collect();
|
||
GC.WaitForPendingFinalizers();
|
||
GC.Collect();
|
||
|
||
var sw = Stopwatch.StartNew();
|
||
RunWithProgress(action, iterations, progressLabel, samples + 1, sampleIndex: s + 1);
|
||
sw.Stop();
|
||
times[s] = sw.Elapsed.TotalMilliseconds;
|
||
}
|
||
|
||
// Capture min/max/sum/sumSq BEFORE sort to avoid order ambiguity (Array.Sort is in-place).
|
||
var minMs = double.MaxValue;
|
||
var maxMs = double.MinValue;
|
||
var sum = 0.0;
|
||
var sumSq = 0.0;
|
||
|
||
for (var i = 0; i < times.Length; i++)
|
||
{
|
||
var t = times[i];
|
||
sum += t;
|
||
sumSq += t * t;
|
||
if (t < minMs) minMs = t;
|
||
if (t > maxMs) maxMs = t;
|
||
}
|
||
// Population stddev (not sample-stddev — we treat the captured samples as the population for
|
||
// CV computation). variance = E[X²] - E[X]² with Math.Max(0, ...) guard against tiny negative
|
||
// values from FP rounding when samples are nearly identical.
|
||
var mean = sum / times.Length;
|
||
var variance = (sumSq / times.Length) - (mean * mean);
|
||
var stdDevMs = Math.Sqrt(Math.Max(0.0, variance));
|
||
|
||
Array.Sort(times);
|
||
// Median: middle value for odd sample counts, average of two middles for even counts.
|
||
var medianMs = samples % 2 == 1 ? times[samples / 2] : (times[samples / 2 - 1] + times[samples / 2]) / 2.0;
|
||
EndProgress(progressLabel, medianMs);
|
||
|
||
return (medianMs, minMs, maxMs, stdDevMs);
|
||
}
|
||
|
||
/// <summary>
|
||
/// Per-cell adaptive iteration calibration. Runs a 100-iter measurement after warmup and computes
|
||
/// how many iterations are needed to reach <see cref="Configuration.TargetSampleMs"/> wall-clock per sample.
|
||
/// Returns iter rounded UP to the nearest 1000, floored at 1000 (the prior fixed minimum) and
|
||
/// ceiling-capped at 200_000 (sanity bound for pathologically fast ops). In Debug single-sample mode
|
||
/// (<c>Configuration.BenchmarkSamples <= 1</c>) returns the global <see cref="Configuration.TestIterations"/> unchanged —
|
||
/// calibration overhead is unjustified there. Calibration runs OUTSIDE the timed sample loop and
|
||
/// does NOT count toward warmup; its sole purpose is to measure per-op cost.
|
||
/// </summary>
|
||
private static int CalibrateIterations(Action action, int targetMs)
|
||
{
|
||
if (Configuration.BenchmarkSamples <= 1) return Configuration.TestIterations; // Debug fast path
|
||
|
||
GC.Collect();
|
||
GC.WaitForPendingFinalizers();
|
||
GC.Collect();
|
||
|
||
const int calibIter = 100;
|
||
var sw = Stopwatch.StartNew();
|
||
for (var i = 0; i < calibIter; i++) action();
|
||
sw.Stop();
|
||
var ms = sw.Elapsed.TotalMilliseconds;
|
||
|
||
// Pathologically-fast op below Stopwatch resolution — cap at ceiling (further calibration won't help).
|
||
if (ms <= 0.0001) return 200_000;
|
||
|
||
var iterPerMs = calibIter / ms;
|
||
var raw = (int)Math.Ceiling(targetMs * iterPerMs);
|
||
// Round UP to nearest 1000 — keeps numbers human-readable in the markdown output.
|
||
var rounded = ((raw + 999) / 1000) * 1000;
|
||
|
||
return rounded switch
|
||
{
|
||
< 1000 => 1000,
|
||
> 200_000 => 200_000,
|
||
_ => rounded
|
||
};
|
||
}
|
||
|
||
/// <summary>
|
||
/// Measures per-call allocation in bytes after a clean GC. Single dedicated sample (no median) — keeps timing samples pure.
|
||
/// </summary>
|
||
private static long MeasureAllocation(Action action, int iterations, string? progressLabel = null)
|
||
{
|
||
GC.Collect();
|
||
GC.WaitForPendingFinalizers();
|
||
GC.Collect();
|
||
|
||
var sw = Stopwatch.StartNew();
|
||
var before = GC.GetAllocatedBytesForCurrentThread();
|
||
RunWithProgress(action, iterations, progressLabel, samples: 1, sampleIndex: 0);
|
||
|
||
var after = GC.GetAllocatedBytesForCurrentThread();
|
||
sw.Stop();
|
||
EndProgress(progressLabel, sw.Elapsed.TotalMilliseconds);
|
||
return (after - before) / iterations;
|
||
}
|
||
|
||
/// <summary>
|
||
/// Process-wide allocation measurement — needed for round-trip-only benchmarks (NamedPipe etc.) where
|
||
/// the work happens across multiple threads. <see cref="GC.GetAllocatedBytesForCurrentThread"/> would
|
||
/// only count the caller-thread allocations, missing the server-side <c>new byte[len]</c> buffers and
|
||
/// any drain-pump-thread allocations. <see cref="GC.GetTotalAllocatedBytes"/> covers the entire process.
|
||
/// Slightly noisier than the per-thread variant (background threads / GC bookkeeping leak in), but
|
||
/// over 1000 iterations the signal dominates.
|
||
/// </summary>
|
||
private static long MeasureAllocationTotal(Action action, int iterations, string? progressLabel = null)
|
||
{
|
||
GC.Collect();
|
||
GC.WaitForPendingFinalizers();
|
||
GC.Collect();
|
||
|
||
var sw = Stopwatch.StartNew();
|
||
var before = GC.GetTotalAllocatedBytes(precise: true);
|
||
RunWithProgress(action, iterations, progressLabel, samples: 1, sampleIndex: 0);
|
||
|
||
var after = GC.GetTotalAllocatedBytes(precise: true);
|
||
sw.Stop();
|
||
EndProgress(progressLabel, sw.Elapsed.TotalMilliseconds);
|
||
return (after - before) / iterations;
|
||
}
|
||
|
||
// ============================================================================================
|
||
// Progress reporting — \r-driven in-place updates so a stuck benchmark surfaces the exact phase
|
||
// and % where it stopped, instead of appearing as a silent hang. Used by RunTimed and the
|
||
// MeasureAllocation* helpers when the caller passes a non-null progressLabel.
|
||
// ============================================================================================
|
||
|
||
// Tracks the longest line written by the current progress session, so EndProgress can clear
|
||
// any leftover characters from a prior longer line (avoids "ghost" trailing chars after \r).
|
||
private static int _progressLastLineLen;
|
||
|
||
/// <summary>
|
||
/// Runs <paramref name="action"/> <paramref name="iterations"/> times, emitting \r-overwriting
|
||
/// progress every ~10% (approx. 10 progress prints per sample). When <paramref name="label"/>
|
||
/// is null, runs without any progress output (zero overhead beyond a null check per iter).
|
||
/// </summary>
|
||
private static void RunWithProgress(Action action, int iterations, string? label, int samples, int sampleIndex)
|
||
{
|
||
if (label is null)
|
||
{
|
||
for (var i = 0; i < iterations; i++) action();
|
||
return;
|
||
}
|
||
|
||
// ~10 progress emits per sample run. Avoid emitting on every iter (Console.Write is
|
||
// expensive enough to skew sub-µs benchmarks if overdone).
|
||
var step = Math.Max(1, iterations / 10);
|
||
for (var i = 0; i < iterations; i++)
|
||
{
|
||
action();
|
||
if ((i + 1) % step == 0 || i == iterations - 1)
|
||
{
|
||
var pct = (int)((i + 1) * 100L / iterations);
|
||
var line = samples > 1
|
||
? $" > {label} sample {sampleIndex + 1}/{samples} {pct,3}% ({i + 1}/{iterations})"
|
||
: $" > {label} {pct,3}% ({i + 1}/{iterations})";
|
||
|
||
System.Console.Write('\r');
|
||
System.Console.Write(line);
|
||
|
||
if (line.Length < _progressLastLineLen)
|
||
System.Console.Write(new string(' ', _progressLastLineLen - line.Length));
|
||
|
||
_progressLastLineLen = line.Length;
|
||
}
|
||
}
|
||
}
|
||
|
||
/// <summary>
|
||
/// Closes a progress line cleanly: clears any leftover chars and writes a final "done" line on
|
||
/// the same row, terminated by \n so subsequent <c>WriteLine</c> calls render below.
|
||
/// </summary>
|
||
private static void EndProgress(string? label, double elapsedMs)
|
||
{
|
||
if (label is null) return;
|
||
var done = $" > {label} done in {elapsedMs,7:F1} ms";
|
||
|
||
System.Console.Write('\r');
|
||
System.Console.Write(done);
|
||
|
||
if (done.Length < _progressLastLineLen)
|
||
System.Console.Write(new string(' ', _progressLastLineLen - done.Length));
|
||
|
||
System.Console.WriteLine();
|
||
_progressLastLineLen = 0;
|
||
}
|
||
|
||
#if !AYCODE_NATIVEAOT
|
||
private static readonly JsonSerializerOptions VerifyJsonOpts = new()
|
||
{
|
||
WriteIndented = false,
|
||
|
||
DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull,
|
||
ReferenceHandler = System.Text.Json.Serialization.ReferenceHandler.IgnoreCycles
|
||
};
|
||
#endif
|
||
|
||
/// <summary>
|
||
/// Round-trip equality check: serialize both via System.Text.Json (canonical form) and compare strings.
|
||
/// Slower than property-by-property compare, but universal — works for any object graph without custom comparer.
|
||
/// </summary>
|
||
/// <remarks>
|
||
/// AOT publish skip: <c>System.Text.Json</c>'s reflection path uses runtime closed-generic instantiation
|
||
/// (<c>JsonPropertyInfo<TestStatus></c> et al.) that the trimmer drops, causing
|
||
/// <c>NotSupportedException: missing native code or metadata</c>. The validation is JIT-only — the actual
|
||
/// benchmark Serialize/Deserialize loops don't touch this path. Under AOT we return <c>true</c> so all
|
||
/// <c>VerifyRoundTrip()</c> calls pass without running the cross-format validation.
|
||
/// </remarks>
|
||
private static bool DeepEqualsViaJson(object? a, object? b)
|
||
{
|
||
#if AYCODE_NATIVEAOT
|
||
// Skip cross-format validation under AOT — STJ reflection path is incompatible. The roundtrip
|
||
// itself still runs (caller-side Serialize+Deserialize), just the JSON-canonical compare is bypassed.
|
||
return true;
|
||
#else
|
||
if (a == null && b == null) return true;
|
||
if (a == null || b == null) return false;
|
||
|
||
var jsonA = JsonSerializer.Serialize(a, VerifyJsonOpts);
|
||
var jsonB = JsonSerializer.Serialize(b, VerifyJsonOpts);
|
||
|
||
return jsonA == jsonB;
|
||
#endif
|
||
}
|
||
|
||
/// <summary>
|
||
/// Validates MemoryPack setup at startup. Aborts the benchmark if TestOrder is not [MemoryPackable].
|
||
/// Without this attribute, MemoryPack falls back to runtime resolver (slower) — comparison would be INVALID.
|
||
/// </summary>
|
||
private static void ValidateMemoryPackSetup()
|
||
{
|
||
var typesToCheck = new[] { typeof(TestOrder) };
|
||
|
||
foreach (var type in typesToCheck)
|
||
{
|
||
var hasAttr = type.GetCustomAttributes(typeof(MemoryPackableAttribute), inherit: true).Any();
|
||
if (!hasAttr)
|
||
{
|
||
System.Console.Error.WriteLine($"❌ FATAL: {type.FullName} is not [MemoryPackable] — MemoryPack would fall back to runtime resolver, comparison is INVALID for SGen-vs-SGen claim.");
|
||
System.Console.Error.WriteLine("Add [MemoryPackable] to the type and any nested types referenced from it.");
|
||
|
||
Environment.Exit(1);
|
||
}
|
||
}
|
||
}
|
||
|
||
/// <summary>
|
||
/// Interactive menu shown when no CLI args. Returns the layer keyword (core/comprehensive/edge/all) or null on Quit.
|
||
/// Loops on settings-changes ([S]) — user is returned to this menu after modifying iteration counts.
|
||
/// </summary>
|
||
private static (string layer, string serializerMode)? ShowInteractiveMenu()
|
||
{
|
||
while (true)
|
||
{
|
||
System.Console.WriteLine();
|
||
System.Console.WriteLine("╔══════════════════════════════════════════════════════════╗");
|
||
System.Console.WriteLine("║ AcBinary Benchmark Suite ║");
|
||
System.Console.WriteLine("╚══════════════════════════════════════════════════════════╝");
|
||
System.Console.WriteLine();
|
||
System.Console.WriteLine("Select benchmark layer:");
|
||
System.Console.WriteLine();
|
||
System.Console.WriteLine(" [1] Core — daily iteration");
|
||
System.Console.WriteLine(" [2] Comprehensive — release validation");
|
||
System.Console.WriteLine(" [3] Edge cases — refactor verification");
|
||
System.Console.WriteLine(" [A] All layers");
|
||
System.Console.WriteLine(" [F] FastestByte — AcBinary FastMode Byte[] vs MemoryPack Byte[] only (tight optimization loop)");
|
||
System.Console.WriteLine(" [P] AsyncPipe — streaming I/O isolation (only AsyncPipe, all test data)");
|
||
System.Console.WriteLine($" [S] Settings — Iteration / WireMode (current: {Configuration.SelectedWireMode})");
|
||
System.Console.WriteLine(" [Q] Quit");
|
||
System.Console.Write("\nSelection: ");
|
||
|
||
var key = System.Console.ReadKey(intercept: false).KeyChar;
|
||
System.Console.WriteLine();
|
||
|
||
switch (char.ToLower(key))
|
||
{
|
||
case '1': return ("core", "standard");
|
||
case '2': return ("comprehensive", "standard");
|
||
case '3': return ("edge", "standard");
|
||
case 'a': return ("all", "standard");
|
||
case 'f': return ("all", "fastestbyte");
|
||
case 'p': return ("all", "asyncpipe");
|
||
case 's':
|
||
ShowSettingsMenu();
|
||
continue; // re-display the main menu after settings update
|
||
case 'q': return null;
|
||
default: return ("all", "standard");
|
||
}
|
||
}
|
||
}
|
||
|
||
/// <summary>
|
||
/// Settings sub-menu — prompts for Warmup / Iterations / Samples values. Empty input keeps the
|
||
/// current value. Validation: Configuration.WarmupIterations ≥ 0; Configuration.TestIterations ≥ 1; Configuration.BenchmarkSamples ≥ 1.
|
||
/// Returns to the caller (which re-displays the main menu).
|
||
/// </summary>
|
||
private static void ShowSettingsMenu()
|
||
{
|
||
while (true)
|
||
{
|
||
System.Console.WriteLine();
|
||
System.Console.WriteLine("─────────────────────────────────────────────");
|
||
System.Console.WriteLine("Settings");
|
||
System.Console.WriteLine("─────────────────────────────────────────────");
|
||
System.Console.WriteLine(" [1] Iteration — Warmup / Iterations / Samples");
|
||
System.Console.WriteLine($" [2] WireMode — current: {Configuration.SelectedWireMode}");
|
||
System.Console.WriteLine($" [3] Charset — current: {GetCurrentCharsetName()}");
|
||
System.Console.WriteLine(" [B] Back");
|
||
System.Console.Write("\nSelection: ");
|
||
|
||
var key = System.Console.ReadKey(intercept: false).KeyChar;
|
||
System.Console.WriteLine();
|
||
|
||
switch (char.ToLower(key))
|
||
{
|
||
case '1':
|
||
ShowIterationSettingsMenu();
|
||
break;
|
||
case '2':
|
||
ShowWireModeSettingsMenu();
|
||
break;
|
||
case '3':
|
||
ShowCharsetSettingsMenu();
|
||
break;
|
||
case 'b':
|
||
return;
|
||
default:
|
||
continue;
|
||
}
|
||
}
|
||
}
|
||
|
||
/// <summary>
|
||
/// Returns a human-readable name for the currently-active <c>BenchmarkTestDataProvider.LongStringSuffix</c>
|
||
/// charset. Returns "Custom" when the suffix doesn't match any of the predefined
|
||
/// <see cref="CharsetSuffixes"/> constants. Used in menu state display, console run header, and
|
||
/// the .LLM markdown output header so per-charset bench files are self-documenting.
|
||
/// </summary>
|
||
private static string GetCurrentCharsetName()
|
||
{
|
||
var s = BenchmarkTestDataProvider.LongStringSuffix;
|
||
|
||
return s switch
|
||
{
|
||
CharsetSuffixes.Latin1FixAscii => "Latin1FixAscii",
|
||
CharsetSuffixes.Latin1Short => "Latin1Short",
|
||
CharsetSuffixes.Latin1Long => "Latin1Long",
|
||
CharsetSuffixes.CjkBmp => "CjkBmp",
|
||
CharsetSuffixes.Cyrillic => "Cyrillic",
|
||
CharsetSuffixes.Mixed => "Mixed",
|
||
_ => "Custom"
|
||
};
|
||
}
|
||
|
||
private static void ShowCharsetSettingsMenu()
|
||
{
|
||
while (true)
|
||
{
|
||
System.Console.WriteLine();
|
||
System.Console.WriteLine("─────────────────────────────────────────────");
|
||
System.Console.WriteLine("Charset settings — long-string suffix profile");
|
||
System.Console.WriteLine("─────────────────────────────────────────────");
|
||
System.Console.WriteLine($"Current: {GetCurrentCharsetName()}");
|
||
System.Console.WriteLine();
|
||
System.Console.WriteLine(" [1] Latin1FixAscii — empty suffix; short FixStr-fast-path stress (Latin1 baseline values stay short)");
|
||
System.Console.WriteLine(" [2] Latin1Short — \" árvíztűrő tükörfúrógép\" (~24 char Hungarian mixed)");
|
||
System.Console.WriteLine(" [3] Latin1Long — ~47-char Latin1 mixed (default; exceeds FixStr boundary)");
|
||
System.Console.WriteLine(" [4] CjkBmp — CJK BMP (long 3-byte runs)");
|
||
System.Console.WriteLine(" [5] Cyrillic — Russian Cyrillic (long 2-byte runs)");
|
||
System.Console.WriteLine(" [6] Mixed — Hungarian + CJK + Cyrillic + emoji (full-spectrum + surrogate pairs)");
|
||
System.Console.WriteLine(" [B] Back");
|
||
System.Console.Write("\nSelection: ");
|
||
|
||
var key = System.Console.ReadKey(intercept: false).KeyChar;
|
||
System.Console.WriteLine();
|
||
|
||
switch (char.ToLower(key))
|
||
{
|
||
case '1':
|
||
BenchmarkTestDataProvider.LongStringSuffix = CharsetSuffixes.Latin1FixAscii;
|
||
System.Console.WriteLine("✓ Charset set to Latin1FixAscii");
|
||
return;
|
||
case '2':
|
||
BenchmarkTestDataProvider.LongStringSuffix = CharsetSuffixes.Latin1Short;
|
||
System.Console.WriteLine("✓ Charset set to Latin1Short");
|
||
return;
|
||
case '3':
|
||
BenchmarkTestDataProvider.LongStringSuffix = CharsetSuffixes.Latin1Long;
|
||
System.Console.WriteLine("✓ Charset set to Latin1Long");
|
||
return;
|
||
case '4':
|
||
BenchmarkTestDataProvider.LongStringSuffix = CharsetSuffixes.CjkBmp;
|
||
System.Console.WriteLine("✓ Charset set to CjkBmp");
|
||
return;
|
||
case '5':
|
||
BenchmarkTestDataProvider.LongStringSuffix = CharsetSuffixes.Cyrillic;
|
||
System.Console.WriteLine("✓ Charset set to Cyrillic");
|
||
return;
|
||
case '6':
|
||
BenchmarkTestDataProvider.LongStringSuffix = CharsetSuffixes.Mixed;
|
||
System.Console.WriteLine("✓ Charset set to Mixed");
|
||
return;
|
||
case 'b':
|
||
return;
|
||
default:
|
||
continue;
|
||
}
|
||
}
|
||
}
|
||
|
||
private static void ShowIterationSettingsMenu()
|
||
{
|
||
System.Console.WriteLine();
|
||
System.Console.WriteLine("─────────────────────────────────────────────");
|
||
System.Console.WriteLine("Iteration settings — press Enter to keep current value");
|
||
System.Console.WriteLine("─────────────────────────────────────────────");
|
||
System.Console.WriteLine();
|
||
|
||
Configuration.WarmupIterations = PromptInt("Configuration.WarmupIterations", Configuration.WarmupIterations, min: 0);
|
||
Configuration.TestIterations = PromptInt("Configuration.TestIterations ", Configuration.TestIterations, min: 1);
|
||
Configuration.BenchmarkSamples = PromptInt("Configuration.BenchmarkSamples", Configuration.BenchmarkSamples, min: 1);
|
||
|
||
System.Console.WriteLine();
|
||
System.Console.WriteLine($"✓ Iteration settings updated: Warmup={Configuration.WarmupIterations} | Iterations={Configuration.TestIterations} | Samples={Configuration.BenchmarkSamples}");
|
||
}
|
||
|
||
private static void ShowWireModeSettingsMenu()
|
||
{
|
||
while (true)
|
||
{
|
||
System.Console.WriteLine();
|
||
System.Console.WriteLine("─────────────────────────────────────────────");
|
||
System.Console.WriteLine("WireMode settings");
|
||
System.Console.WriteLine("─────────────────────────────────────────────");
|
||
System.Console.WriteLine($"Current: {Configuration.SelectedWireMode}");
|
||
System.Console.WriteLine(" [1] Compact");
|
||
System.Console.WriteLine(" [2] Fast");
|
||
System.Console.WriteLine(" [B] Back");
|
||
System.Console.Write("\nSelection: ");
|
||
|
||
var key = System.Console.ReadKey(intercept: false).KeyChar;
|
||
System.Console.WriteLine();
|
||
|
||
switch (char.ToLower(key))
|
||
{
|
||
case '1':
|
||
Configuration.SelectedWireMode = WireMode.Compact;
|
||
System.Console.WriteLine("✓ WireMode set to Compact");
|
||
return;
|
||
case '2':
|
||
Configuration.SelectedWireMode = WireMode.Fast;
|
||
System.Console.WriteLine("✓ WireMode set to Fast");
|
||
return;
|
||
case 'b':
|
||
return;
|
||
default:
|
||
continue;
|
||
}
|
||
}
|
||
}
|
||
|
||
/// <summary>
|
||
/// Prompts the user for an integer with a default (current value). Returns the current value if
|
||
/// the user presses Enter on empty input or if parsing fails / value is below the minimum.
|
||
/// </summary>
|
||
private static int PromptInt(string name, int currentValue, int min)
|
||
{
|
||
System.Console.Write($" {name} [{currentValue}]: ");
|
||
|
||
var input = System.Console.ReadLine()?.Trim() ?? "";
|
||
if (input.Length == 0) return currentValue;
|
||
|
||
if (int.TryParse(input, out var newValue) && newValue >= min) return newValue;
|
||
|
||
System.Console.WriteLine($" ! Invalid value (need int ≥ {min}) — keeping {currentValue}");
|
||
return currentValue;
|
||
}
|
||
|
||
/// <summary>
|
||
/// Filters test data sets by layer keyword. Layered approach lets you run only what's needed for the iteration cadence.
|
||
/// P1: only "Core" data exists (Small/Medium/Large/Repeated/Deep). Comprehensive and Edge layers will be expanded in P2.
|
||
/// </summary>
|
||
private static List<TestDataSet> FilterByLayer(List<TestDataSet> all, string layer)
|
||
{
|
||
if (layer == "all") return all.ToList();
|
||
|
||
var coreNames = new[] { "Small", "Medium", "Large", "Repeated", "Deep" };
|
||
// P2 will add: "Flat", "Polymorphic", "Collection", "Numeric", "NonAscii", etc.
|
||
var comprehensiveExtras = new string[] { /* P2 */ };
|
||
// P3 will add: "ColdStart", "VeryLarge", "PathologicalString", etc.
|
||
var edgeExtras = new string[] { /* P3 */ };
|
||
|
||
return layer switch
|
||
{
|
||
"core" => all.Where(t => StartsWithAny(t.Name, coreNames)).ToList(),
|
||
"comprehensive" => all.Where(t => StartsWithAny(t.Name, coreNames) || StartsWithAny(t.Name, comprehensiveExtras)).ToList(),
|
||
"edge" => all.Where(t => StartsWithAny(t.Name, coreNames) || StartsWithAny(t.Name, comprehensiveExtras) || StartsWithAny(t.Name, edgeExtras)).ToList(),
|
||
// Single-cell A/B mini-suite filters — match by case-insensitive prefix on Name.
|
||
// Use case: tight optimization-iteration loop on one specific cell (e.g. `dotnet run -- repeated`
|
||
// or interactive menu shortcut), avoiding the full ~110 sec suite when only one cell is in scope.
|
||
"small" => all.Where(t => t.Name.StartsWith("Small", StringComparison.OrdinalIgnoreCase)).ToList(),
|
||
"medium" => all.Where(t => t.Name.StartsWith("Medium", StringComparison.OrdinalIgnoreCase)).ToList(),
|
||
"large" => all.Where(t => t.Name.StartsWith("Large", StringComparison.OrdinalIgnoreCase)).ToList(),
|
||
"repeated" => all.Where(t => t.Name.StartsWith("Repeated", StringComparison.OrdinalIgnoreCase)).ToList(),
|
||
"deep" => all.Where(t => t.Name.StartsWith("Deep", StringComparison.OrdinalIgnoreCase)).ToList(),
|
||
_ => all.ToList()
|
||
};
|
||
|
||
static bool StartsWithAny(string name, string[] prefixes) => prefixes.Any(name.StartsWith);
|
||
}
|
||
|
||
#endregion
|
||
|
||
#region Serializer Implementations
|
||
|
||
private interface ISerializerBenchmark
|
||
{
|
||
/// <summary>Serializer engine — e.g. "AcBinary", "MemoryPack", "MessagePack".</summary>
|
||
string Engine { get; }
|
||
/// <summary>I/O mode — e.g. "Byte[]", "BufWr reuse", "BufWr new", "NamedPipe", "FileStream".</summary>
|
||
string IoMode { get; }
|
||
/// <summary>Dispatch mode — "SGen", "Runtime", or "Hybrid". For AcBinary derived from <c>UseGeneratedCode</c> + child-type SGen coverage; non-AcBinary engines report their own native dispatch model.</summary>
|
||
string DispatchMode { get; }
|
||
/// <summary>Options preset name — e.g. "FastMode", "Default", "NoIntern", "WithCompression".</summary>
|
||
string OptionsPreset { get; }
|
||
/// <summary>Synthesized display name from Engine + IoMode + OptionsPreset.</summary>
|
||
string Name => $"{Engine} ({IoMode}, {OptionsPreset})";
|
||
int SerializedSize { get; }
|
||
string? OptionsDescription => null;
|
||
/// <summary>One-time SERIALIZER-side setup allocation cost (e.g., pre-allocated ArrayBufferWriter with internal buffer). Captured in constructor; 0 for byte[] API and Fresh-BufWriter variants.</summary>
|
||
long SetupSerializeAllocBytes { get; }
|
||
/// <summary>One-time DESERIALIZER-side setup allocation cost (e.g., long-lived AsyncPipeReaderInput's ArrayPool rent + ManualResetEventSlim, drain-task scaffolding). Captured in constructor; 0 for byte[] API and any setup-free deserialize path.</summary>
|
||
long SetupDeserializeAllocBytes { get; }
|
||
/// <summary>True when Serialize() does a full round-trip (e.g. NamedPipe) and Deserialize() is a no-op.
|
||
/// Used by the SUMMARY: WINNERS section to skip such cells from "Fastest Serialize" and "Fastest Deserialize"
|
||
/// rankings (because both metrics are misleading there) — they still participate in "Fastest Round-trip".
|
||
/// Default false for in-memory IO modes which measure Ser and Des separately.</summary>
|
||
bool IsRoundTripOnly => false;
|
||
/// <summary>Combined warmup (Ser + Deser interleaved). Currently unused — kept as a legacy entry point
|
||
/// for any external caller that still wants single-call warmup. The benchmark loop uses the split
|
||
/// <see cref="WarmupSerialize"/> + <see cref="WarmupDeserialize"/> pair for cache-isolated measurements.</summary>
|
||
void Warmup(int iterations);
|
||
|
||
/// <summary>Warm only the Serialize path. Default body iterates <see cref="Serialize"/> N times.
|
||
/// Overrides are only needed when the implementor wants Ser-specific warmup-state (e.g. pre-allocate buffers).
|
||
/// On <see cref="IsRoundTripOnly"/> benchmarks (NamedPipe-style) <see cref="Serialize"/> performs the full RT,
|
||
/// so this warms the entire round-trip path.</summary>
|
||
void WarmupSerialize(int iterations)
|
||
{
|
||
for (var i = 0; i < iterations; i++) Serialize();
|
||
}
|
||
|
||
/// <summary>Warm only the Deserialize path. Default body iterates <see cref="Deserialize"/> N times.
|
||
/// On <see cref="IsRoundTripOnly"/> benchmarks <see cref="Deserialize"/> is a no-op, so the bench loop
|
||
/// skips the Des-phase entirely for those cells.</summary>
|
||
void WarmupDeserialize(int iterations)
|
||
{
|
||
for (var i = 0; i < iterations; i++) Deserialize();
|
||
}
|
||
|
||
void Serialize();
|
||
void Deserialize();
|
||
/// <summary>Round-trip correctness check — called once per cell before warmup. Returns true if Serialize+Deserialize preserves data.</summary>
|
||
bool VerifyRoundTrip();
|
||
}
|
||
|
||
private sealed class AcBinaryBenchmark : ISerializerBenchmark
|
||
{
|
||
private readonly TestOrder _order;
|
||
private readonly AcBinarySerializerOptions _options;
|
||
private readonly byte[] _serialized;
|
||
|
||
public string Engine => Configuration.EngineAcBinary;
|
||
public string IoMode => Configuration.IoByteArray;
|
||
public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime;
|
||
public string OptionsPreset { get; }
|
||
public int SerializedSize => _serialized.Length;
|
||
public long SetupSerializeAllocBytes => 0;
|
||
public long SetupDeserializeAllocBytes => 0;
|
||
public string OptionsDescription => BuildAcBinaryOptionsDescription(_options);
|
||
|
||
public AcBinaryBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset)
|
||
{
|
||
_order = order;
|
||
_options = options;
|
||
OptionsPreset = optionsPreset;
|
||
_serialized = AcBinarySerializer.Serialize(order, options);
|
||
|
||
//_options.UseCompression = Lz4CompressionMode.Block;
|
||
}
|
||
|
||
public void Warmup(int iterations)
|
||
{
|
||
for (var i = 0; i < iterations; i++)
|
||
{
|
||
Serialize();
|
||
Deserialize();
|
||
}
|
||
}
|
||
|
||
[MethodImpl(MethodImplOptions.NoInlining)]
|
||
public void Serialize()
|
||
{
|
||
AcBinarySerializer.Serialize(_order, _options);
|
||
|
||
//if (_options.ReferenceHandling != ReferenceHandlingMode.None || _options.UseStringInterning != StringInterningMode.None)
|
||
//{
|
||
// AcBinarySerializer.ScanOnly(_order, _options);
|
||
//}
|
||
//else AcBinarySerializer.Serialize(_order, _options);
|
||
}
|
||
|
||
[MethodImpl(MethodImplOptions.NoInlining)]
|
||
public void Deserialize() => AcBinaryDeserializer.Deserialize<TestOrder>(_serialized, _options);
|
||
|
||
public bool VerifyRoundTrip()
|
||
{
|
||
var bytes = AcBinarySerializer.Serialize(_order, _options);
|
||
var roundTripped = AcBinaryDeserializer.Deserialize<TestOrder>(bytes, _options);
|
||
return DeepEqualsViaJson(_order, roundTripped);
|
||
}
|
||
}
|
||
|
||
private sealed class MemoryPackBenchmark : ISerializerBenchmark
|
||
{
|
||
private readonly TestOrder _order;
|
||
private readonly MemoryPackSerializerOptions _options;
|
||
private readonly byte[] _serialized;
|
||
|
||
public string Engine => Configuration.EngineMemoryPack;
|
||
public string IoMode => Configuration.IoByteArray;
|
||
public string DispatchMode => Configuration.ModeSGen; // MemoryPack always uses [MemoryPackable] source-generated formatters
|
||
public string OptionsPreset { get; }
|
||
public int SerializedSize => _serialized.Length;
|
||
public long SetupSerializeAllocBytes => 0;
|
||
public long SetupDeserializeAllocBytes => 0;
|
||
public string? OptionsDescription => $"StringEncoding={_options.StringEncoding}";
|
||
|
||
public MemoryPackBenchmark(TestOrder order, string optionsPreset)
|
||
{
|
||
_order = order;
|
||
OptionsPreset = optionsPreset;
|
||
_options = GetMemPackOptions();
|
||
_serialized = MemoryPackSerializer.Serialize(order, _options);
|
||
}
|
||
|
||
public void Warmup(int iterations)
|
||
{
|
||
for (var i = 0; i < iterations; i++)
|
||
{
|
||
Serialize();
|
||
Deserialize();
|
||
}
|
||
}
|
||
|
||
[MethodImpl(MethodImplOptions.NoInlining)]
|
||
public void Serialize() => MemoryPackSerializer.Serialize(_order, _options);
|
||
|
||
[MethodImpl(MethodImplOptions.NoInlining)]
|
||
public void Deserialize() => MemoryPackSerializer.Deserialize<TestOrder>(_serialized, _options);
|
||
|
||
public bool VerifyRoundTrip()
|
||
{
|
||
var bytes = MemoryPackSerializer.Serialize(_order, _options);
|
||
var roundTripped = MemoryPackSerializer.Deserialize<TestOrder>(bytes, _options);
|
||
return DeepEqualsViaJson(_order, roundTripped);
|
||
}
|
||
}
|
||
|
||
#if !AYCODE_NATIVEAOT
|
||
// MessagePack benchmark — excluded from NativeAOT build because v3's StandardResolver falls back
|
||
// to DynamicGenericResolver for closed-generic types (List<TestOrderItem> et al.), which uses
|
||
// Activator.CreateInstance on formatter types the AOT trimmer drops → MissingMethodException at runtime.
|
||
// Available for regular JIT runs (`dotnet run`) only.
|
||
private sealed class MessagePackBenchmark : ISerializerBenchmark
|
||
{
|
||
private readonly TestOrder _order;
|
||
private readonly MessagePackSerializerOptions _options;
|
||
private readonly byte[] _serialized;
|
||
|
||
public string Engine => Configuration.EngineMessagePack;
|
||
public string IoMode => Configuration.IoByteArray;
|
||
public string DispatchMode => Configuration.ModeSGen; // MessagePack uses [MessagePackObject] source-generated formatters (StandardResolver)
|
||
public string OptionsPreset { get; }
|
||
public int SerializedSize => _serialized.Length;
|
||
public long SetupSerializeAllocBytes => 0;
|
||
public long SetupDeserializeAllocBytes => 0;
|
||
public string OptionsDescription { get; }
|
||
|
||
public MessagePackBenchmark(TestOrder order, string optionsPreset)
|
||
{
|
||
_order = order;
|
||
OptionsPreset = optionsPreset;
|
||
|
||
//_options = ContractlessStandardResolver.Options.WithCompression(MessagePackCompression.None);
|
||
//_options = ContractlessStandardResolver.Options.WithCompression(MessagePackCompression.Lz4Block);
|
||
_options = MessagePackSerializerOptions.Standard.WithCompression(MessagePackCompression.None);
|
||
|
||
var isContractless = _options.Resolver is ContractlessStandardResolver;
|
||
OptionsDescription = $"Mode={( isContractless ? "Contractless" : "ContractBased")}, Compression={_options.Compression}";
|
||
|
||
_serialized = MessagePackSerializer.Serialize(order, _options);
|
||
}
|
||
|
||
public void Warmup(int iterations)
|
||
{
|
||
for (var i = 0; i < iterations; i++)
|
||
{
|
||
Serialize();
|
||
Deserialize();
|
||
}
|
||
}
|
||
|
||
[MethodImpl(MethodImplOptions.NoInlining)]
|
||
public void Serialize() => MessagePackSerializer.Serialize(_order, _options);
|
||
|
||
[MethodImpl(MethodImplOptions.NoInlining)]
|
||
public void Deserialize() => MessagePackSerializer.Deserialize<TestOrder>(_serialized, _options);
|
||
|
||
public bool VerifyRoundTrip()
|
||
{
|
||
var bytes = MessagePackSerializer.Serialize(_order, _options);
|
||
var roundTripped = MessagePackSerializer.Deserialize<TestOrder>(bytes, _options);
|
||
return DeepEqualsViaJson(_order, roundTripped);
|
||
}
|
||
}
|
||
#endif
|
||
|
||
/// <summary>
|
||
/// Benchmarks AcBinary via the IBufferWriter overload with a pre-allocated, reused ArrayBufferWriter.
|
||
/// Realistic IBufferWriter usage pattern: caller owns + reuses the writer (zero alloc per call after warmup).
|
||
/// </summary>
|
||
/// <summary>
|
||
/// Benchmarks AcBinary via the IBufferWriter overload, allocating a FRESH ArrayBufferWriter on EVERY call.
|
||
/// One-shot scenario — represents code that doesn't reuse a writer across calls.
|
||
/// Uses BufferWriterChunkSize=4096 (production-realistic, SignalR-aligned) instead of the 65535 default —
|
||
/// otherwise AcBinary would request 64KB upfront via GetSpan(), forcing the fresh ABW to allocate 64KB
|
||
/// regardless of payload size (heavy over-allocation for small payloads).
|
||
/// </summary>
|
||
private sealed class AcBinaryFreshBufferWriterBenchmark : ISerializerBenchmark
|
||
{
|
||
private readonly TestOrder _order;
|
||
private readonly AcBinarySerializerOptions _options;
|
||
private readonly byte[] _serialized;
|
||
|
||
public string Engine => Configuration.EngineAcBinary;
|
||
public string IoMode => Configuration.IoBufWrNew;
|
||
public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime;
|
||
public string OptionsPreset { get; }
|
||
public int SerializedSize => _serialized.Length;
|
||
public long SetupSerializeAllocBytes => 0;
|
||
public long SetupDeserializeAllocBytes => 0;
|
||
public string OptionsDescription => BuildAcBinaryOptionsDescription(_options, $", BufferSize={_options.BufferWriterChunkSize}B");
|
||
|
||
public AcBinaryFreshBufferWriterBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset)
|
||
{
|
||
_order = order;
|
||
// BufferWriterChunkSize comes from the caller (central source of truth in CreateSerializers
|
||
// — the binaryFastMode4KbChunk options instance). Do NOT mutate _options here; tune the chunk
|
||
// size in CreateSerializers only.
|
||
_options = options;
|
||
OptionsPreset = optionsPreset;
|
||
_serialized = AcBinarySerializer.Serialize(order, _options);
|
||
}
|
||
|
||
public void Warmup(int iterations)
|
||
{
|
||
for (var i = 0; i < iterations; i++)
|
||
{
|
||
Serialize();
|
||
Deserialize();
|
||
}
|
||
}
|
||
|
||
[MethodImpl(MethodImplOptions.NoInlining)]
|
||
public void Serialize()
|
||
{
|
||
var abw = new ArrayBufferWriter<byte>(); // FRESH every call — alloc + grow as needed
|
||
AcBinarySerializer.Serialize(_order, abw, _options);
|
||
}
|
||
|
||
// BufWr semantic: read from a ReadOnlySequence<byte> (the ROS overload), NOT from byte[] —
|
||
// single-segment array-backed sequence triggers the fast-path in AcBinaryDeserializer.cs:298 which
|
||
// redirects to the byte[] overload. This means the bench actually exercises the ROS-input path
|
||
// (the production-realistic surface for SignalR / Pipe consumers) rather than secretly testing
|
||
// byte[] Deser under the BufWr label.
|
||
[MethodImpl(MethodImplOptions.NoInlining)]
|
||
public void Deserialize() => AcBinaryDeserializer.Deserialize<TestOrder>(new ReadOnlySequence<byte>(_serialized), _options);
|
||
|
||
public bool VerifyRoundTrip()
|
||
{
|
||
var abw = new ArrayBufferWriter<byte>();
|
||
AcBinarySerializer.Serialize(_order, abw, _options);
|
||
var roundTripped = AcBinaryDeserializer.Deserialize<TestOrder>(new ReadOnlySequence<byte>(abw.WrittenMemory), _options);
|
||
return DeepEqualsViaJson(_order, roundTripped);
|
||
}
|
||
}
|
||
|
||
/// <summary>
|
||
/// Benchmarks AcBinary over a long-lived NamedPipe IPC connection using the AcBinary native streaming API
|
||
/// (<see cref="AcBinarySerializer.SerializeChunked{T}(T, System.IO.Pipelines.PipeWriter, AcBinarySerializerOptions)"/>
|
||
/// + <see cref="AsyncPipeReaderInput"/> + <see cref="AsyncPipeReaderInputExtensions.DrainFromAsync"/>).
|
||
/// Mirrors what a real consumer (e.g. <c>DeserializeFromPipeReaderAsync</c>) does per message:
|
||
/// long-lived <see cref="AsyncPipeReaderInput"/> with multi-message wire framing on top of a long-lived NamedPipe.
|
||
///
|
||
/// <para><b>Architecture</b>:</para>
|
||
/// <list type="bullet">
|
||
/// <item>Constructor (NOT timed): sets up <see cref="NamedPipeServerStream"/> + <see cref="NamedPipeClientStream"/>,
|
||
/// waits for connection, creates one long-lived <see cref="System.IO.Pipelines.PipeWriter"/> /
|
||
/// <see cref="System.IO.Pipelines.PipeReader"/> pair, ONE long-lived <see cref="AsyncPipeReaderInput"/>
|
||
/// in <c>multiMessage = true</c> mode, ONE drain Task that pumps <see cref="AsyncPipeReaderInputExtensions.DrainFromAsync"/>
|
||
/// forever, and ONE deserialize Task that loops <c>AcBinaryDeserializer.Deserialize<T>(input, opts)</c>
|
||
/// producing into a <see cref="System.Threading.Channels.Channel{T}"/>.</item>
|
||
/// <item>Per-iteration <see cref="Serialize"/> (timed): sender writes via
|
||
/// <see cref="AcBinarySerializer.SerializeChunkedFramed{T}(T, System.IO.Pipelines.PipeWriter, AcBinarySerializerOptions)"/>
|
||
/// — multi-message wire (<c>[201][UINT16][data]...[202]</c>); the <c>[202]</c> end marker arms the input's
|
||
/// <c>_readPos = -1</c> sentinel, so the next message's first <c>AppendToBuffer</c> recycles the buffer to 0.
|
||
/// Then receiver awaits the channel for the deserialized result.</item>
|
||
/// <item><see cref="Deserialize"/> is a no-op (full round-trip captured in <see cref="Serialize"/>);
|
||
/// <see cref="IsRoundTripOnly"/>=true → Ser ms / SerAlloc oszlopok N/A, RT ms = full round-trip.</item>
|
||
/// </list>
|
||
///
|
||
/// <para><b>Per-iter overhead</b>: 0 new <c>Task.Run</c>, 0 new <c>AsyncPipeReaderInput</c>, 0 new <c>CancellationTokenSource</c>.
|
||
/// Pure cost = <c>SerializeChunkedFramed</c> (CPU + chunk-onkénti flush) + kernel write/read syscalls + 1 sync barrier
|
||
/// (channel) + deserialized graph alloc. The "multi-message reuse" pattern enabled by Q4T8 fix (R5K2 minimum: <c>_readPos = -1</c>
|
||
/// sentinel + <c>AppendToBuffer</c> sliding-window cycling).</para>
|
||
///
|
||
/// <para><b>Approximation note</b>: single-process loopback NamedPipe. Real cross-process / cross-machine SignalR
|
||
/// adds further transport latency (TCP, WebSocket framing) on top. The benchmark gives a lower bound.</para>
|
||
/// </summary>
|
||
private sealed class AcBinaryNamedPipeBenchmark : ISerializerBenchmark, IDisposable
|
||
{
|
||
private readonly TestOrder _order;
|
||
private readonly AcBinarySerializerOptions _options;
|
||
private readonly byte[] _serialized; // for SerializedSize reporting only
|
||
|
||
// Long-lived pipe lifecycle (set up once in ctor — NOT timed).
|
||
private readonly NamedPipeServerStream _pipeServer;
|
||
private readonly NamedPipeClientStream _pipeClient;
|
||
private readonly PipeWriter _pipeWriter;
|
||
private readonly PipeReader _pipeReader;
|
||
|
||
// Long-lived multi-message receive infrastructure (set up once in ctor).
|
||
private readonly AsyncPipeReaderInput _input;
|
||
private readonly CancellationTokenSource _cts;
|
||
private readonly Task _drainTask; // BG: PipeReader → input.Feed (continuous pump)
|
||
private readonly Task _consumerTask; // BG: per-iter Deserialize<T>(input) loop, signaled by calling thread
|
||
private readonly ManualResetEventSlim _consumeRequest = new(false);
|
||
private readonly ManualResetEventSlim _consumeDone = new(false);
|
||
private object? _lastResult; // captured during VerifyRoundTrip; null in benchmark iters
|
||
private bool _captureResult; // toggle: when true, ConsumeLoop stores result; otherwise discards
|
||
private bool _disposed;
|
||
|
||
public string Engine => Configuration.EngineAcBinary;
|
||
public string IoMode => Configuration.IoNamedPipe;
|
||
public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime;
|
||
public string OptionsPreset { get; }
|
||
public int SerializedSize => _serialized.Length;
|
||
public long SetupSerializeAllocBytes { get; }
|
||
public long SetupDeserializeAllocBytes { get; }
|
||
public bool IsRoundTripOnly => true;
|
||
public string OptionsDescription => BuildAcBinaryOptionsDescription(_options, $", BufferSize={_options.BufferWriterChunkSize}B, Transport=NamedPipe(long-lived,multiMessage,2-task)");
|
||
|
||
public AcBinaryNamedPipeBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset)
|
||
{
|
||
_order = order;
|
||
// BufferWriterChunkSize comes from the caller (central source of truth in CreateSerializers
|
||
// — the binaryFastMode4KbChunk options instance). Do NOT mutate _options here; tune the chunk
|
||
// size in CreateSerializers only.
|
||
_options = options;
|
||
OptionsPreset = optionsPreset;
|
||
|
||
_serialized = AcBinarySerializer.Serialize(order, _options);
|
||
|
||
// 1× pipe setup. Kernel-side pipe buffer (inBufferSize / outBufferSize on the server ctor — the
|
||
// client inherits the server-defined buffer size at connect time) matches BufferWriterChunkSize
|
||
// exactly: AsyncPipeWriterOutput now treats chunkSize as the chunk-on-wire total size (header +
|
||
// data), so one WriteFile(chunkSize) syscall lands in exactly one kernel-page slot — page-aligned,
|
||
// no fragmentation, no IRP reordering. _options.BufferWriterChunkSize is the single tunable source.
|
||
var pipeName = $"AcBinaryBench-{Guid.NewGuid():N}";
|
||
|
||
// === SERIALIZE-side setup measurement ===
|
||
// pipe-pair (server + client) + connect handshake + writer-side PipeWriter wrapper.
|
||
GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
|
||
var beforeSer = GC.GetAllocatedBytesForCurrentThread();
|
||
|
||
_pipeServer = new NamedPipeServerStream(pipeName, PipeDirection.In, 1, PipeTransmissionMode.Byte,
|
||
System.IO.Pipes.PipeOptions.Asynchronous,
|
||
inBufferSize: _options.BufferWriterChunkSize,
|
||
outBufferSize: _options.BufferWriterChunkSize);
|
||
|
||
_pipeClient = new NamedPipeClientStream(".", pipeName, PipeDirection.Out, System.IO.Pipes.PipeOptions.Asynchronous);
|
||
|
||
var serverWait = _pipeServer.WaitForConnectionAsync();
|
||
_pipeClient.Connect();
|
||
serverWait.GetAwaiter().GetResult();
|
||
|
||
_pipeWriter = PipeWriter.Create(_pipeClient);
|
||
var afterSer = GC.GetAllocatedBytesForCurrentThread();
|
||
SetupSerializeAllocBytes = afterSer - beforeSer;
|
||
|
||
// === DESERIALIZE-side setup measurement ===
|
||
// PipeReader wrapper + AsyncPipeReaderInput (ArrayPool rent + ManualResetEventSlim) + drain
|
||
// task + consumer task scaffolding. Two long-lived BG tasks total: drain pumps bytes from the
|
||
// kernel pipe into input; consumer drives Deserialize<T>(input) per iter on signal.
|
||
GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
|
||
var beforeDes = GC.GetAllocatedBytesForCurrentThread();
|
||
|
||
_pipeReader = PipeReader.Create(_pipeServer);
|
||
_input = new AsyncPipeReaderInput(_options.BufferWriterChunkSize * 2, multiMessage: true);
|
||
_cts = new CancellationTokenSource();
|
||
|
||
// Drain task: pumps PipeReader → input.Feed forever (or until cancel). Single Task.Run for
|
||
// the full benchmark lifetime — its overhead is amortised across all messages.
|
||
_drainTask = Task.Run(() => _input.DrainFromAsync(_pipeReader, _cts.Token));
|
||
|
||
// Consumer task: per-iter Deserialize<T>(input) loop. Started here once; signaled per-iter via
|
||
// _consumeRequest. Enables Ser↔Des streaming overlap — calling thread runs SerializeChunkedFramed
|
||
// while THIS task simultaneously runs Deserialize<T>, both consuming/producing through the
|
||
// sliding-window buffer pipelined by the drain task.
|
||
_consumerTask = Task.Run(ConsumeLoop);
|
||
|
||
var afterDes = GC.GetAllocatedBytesForCurrentThread();
|
||
SetupDeserializeAllocBytes = afterDes - beforeDes;
|
||
}
|
||
|
||
// BG consumer: parks on _consumeRequest, runs Deserialize<T>(_input) when signaled, signals _consumeDone.
|
||
// The Deserialize call internally blocks on the input's MRES whenever the drain hasn't yet fed enough
|
||
// bytes for the next read — that's where the streaming-pipeline overlap with the calling thread (Ser)
|
||
// happens.
|
||
private void ConsumeLoop()
|
||
{
|
||
var ct = _cts.Token;
|
||
try
|
||
{
|
||
while (true)
|
||
{
|
||
_consumeRequest.Wait(ct);
|
||
if (ct.IsCancellationRequested) return;
|
||
_consumeRequest.Reset();
|
||
|
||
try
|
||
{
|
||
var result = AcBinaryDeserializer.Deserialize<TestOrder>(_input, _options);
|
||
if (_captureResult) _lastResult = result;
|
||
}
|
||
catch
|
||
{
|
||
// Swallow — calling thread sees the failure via missing/incorrect _lastResult during VerifyRoundTrip,
|
||
// or the benchmark loop just continues (timing impacted). Production teardown handled in Dispose.
|
||
}
|
||
finally
|
||
{
|
||
_consumeDone.Set();
|
||
}
|
||
}
|
||
}
|
||
catch (OperationCanceledException)
|
||
{
|
||
// Cooperative cancel — Dispose path. Swallow.
|
||
}
|
||
}
|
||
|
||
public void Warmup(int iterations)
|
||
{
|
||
for (var i = 0; i < iterations; i++)
|
||
{
|
||
Serialize();
|
||
}
|
||
}
|
||
|
||
[MethodImpl(MethodImplOptions.NoInlining)]
|
||
public void Serialize()
|
||
{
|
||
// 2-task streaming pipeline:
|
||
// 1. Calling thread signals consumer task to begin Deserialize<T>(input). Consumer immediately
|
||
// starts; first read blocks on input's MRES because no bytes flowed yet.
|
||
// 2. Calling thread starts SerializeChunkedFramed → chunks flow through PipeWriter → kernel pipe →
|
||
// drain task (BG) feeds input.Feed → MRES pulses → consumer's Deserialize<T> consumes bytes
|
||
// chunk by chunk. Ser↔Des truly overlap here.
|
||
// 3. Calling thread waits for _consumeDone (signaling Deserialize<T> returned).
|
||
_consumeDone.Reset();
|
||
_consumeRequest.Set();
|
||
|
||
AcBinarySerializer.SerializeChunkedFramed(_order, _pipeWriter, _options);
|
||
|
||
_consumeDone.Wait();
|
||
}
|
||
|
||
[MethodImpl(MethodImplOptions.NoInlining)]
|
||
public void Deserialize()
|
||
{
|
||
// No-op: per-iter round-trip is captured in Serialize(). See IsRoundTripOnly contract.
|
||
}
|
||
|
||
public bool VerifyRoundTrip()
|
||
{
|
||
// Use the same 2-task streaming path as the benchmark, but capture the result for graph-equality.
|
||
_captureResult = true;
|
||
try
|
||
{
|
||
Serialize();
|
||
var result = _lastResult as TestOrder;
|
||
return result != null && DeepEqualsViaJson(_order, result);
|
||
}
|
||
finally
|
||
{
|
||
_captureResult = false;
|
||
_lastResult = null;
|
||
}
|
||
}
|
||
|
||
public void Dispose()
|
||
{
|
||
if (_disposed) return;
|
||
_disposed = true;
|
||
|
||
// Cancel drain + consumer tasks → both exit. Pulse _consumeRequest in case consumer is parked.
|
||
try { _cts.Cancel(); } catch { /* swallow on teardown */ }
|
||
try { _consumeRequest.Set(); } catch { /* nudge in case consumer Wait is parked */ }
|
||
try { _drainTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
|
||
try { _consumerTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
|
||
|
||
// Complete writer + dispose pipe lifecycle.
|
||
try { _pipeWriter.CompleteAsync().AsTask().Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
|
||
try { _pipeReader.Complete(); } catch { /* swallow on teardown */ }
|
||
try { _pipeClient.Dispose(); } catch { /* swallow on teardown */ }
|
||
try { _pipeServer.Dispose(); } catch { /* swallow on teardown */ }
|
||
try { _input.Dispose(); } catch { /* swallow on teardown */ }
|
||
try { _consumeRequest.Dispose(); } catch { /* swallow on teardown */ }
|
||
try { _consumeDone.Dispose(); } catch { /* swallow on teardown */ }
|
||
try { _cts.Dispose(); } catch { /* swallow on teardown */ }
|
||
}
|
||
}
|
||
|
||
/// <summary>
|
||
/// Same chunked-framed AsyncPipe code path as <see cref="AcBinaryNamedPipeBenchmark"/>, but the transport
|
||
/// is an in-memory <see cref="System.IO.Pipelines.Pipe"/> instead of a kernel <c>NamedPipe</c>. The Pipe's
|
||
/// <c>Writer</c>/<c>Reader</c> pair is a managed-only zero-copy slab handoff — no syscalls, no kernel
|
||
/// buffer copy, no IRP queueing.
|
||
///
|
||
/// <para><b>Why this benchmark matters</b>: by holding ALL other variables constant (same SerializeChunkedFramed,
|
||
/// same AsyncPipeReaderInput, same drain task, same consumer task, same multi-message wire format), this
|
||
/// row isolates the <b>kernel-NamedPipe transport overhead</b> from the chunked-streaming framework's pure
|
||
/// CPU cost. The expected delta vs <see cref="AcBinaryNamedPipeBenchmark"/>: per-chunk overhead drops from
|
||
/// ~25-30 µs (kernel-syscall pair + IRP) to ~1-2 µs (managed slab handoff). Multi-chunk Large-message rows
|
||
/// should converge dramatically toward <see cref="AcBinaryNamedPipeRawByteArrayBenchmark"/>.</para>
|
||
///
|
||
/// <para><b>Real-world relevance</b>: in-memory Pipe is the typical primitive used for cross-thread serializer
|
||
/// pipelines inside a single process (e.g. SignalR's Kestrel transport adapter, gRPC framework internals,
|
||
/// custom message brokers). The numbers from this row reflect that scenario, NOT the kernel-pipe loopback
|
||
/// of the NamedPipe benchmark.</para>
|
||
/// </summary>
|
||
private sealed class AcBinaryInMemoryPipeBenchmark : ISerializerBenchmark, IDisposable
|
||
{
|
||
private readonly TestOrder _order;
|
||
private readonly AcBinarySerializerOptions _options;
|
||
private readonly byte[] _serialized; // for SerializedSize reporting only
|
||
|
||
// Long-lived in-memory pipe lifecycle (set up once in ctor — NOT timed).
|
||
private readonly Pipe _pipe;
|
||
private readonly PipeWriter _pipeWriter;
|
||
private readonly PipeReader _pipeReader;
|
||
|
||
// Long-lived multi-message receive infrastructure (set up once in ctor) — same pattern as the NamedPipe
|
||
// variant: drain pumps reader into AsyncPipeReaderInput, consumer task drives Deserialize<T>(input).
|
||
private readonly AsyncPipeReaderInput _input;
|
||
private readonly CancellationTokenSource _cts;
|
||
private readonly Task _drainTask;
|
||
private readonly Task _consumerTask;
|
||
private readonly ManualResetEventSlim _consumeRequest = new(false);
|
||
private readonly ManualResetEventSlim _consumeDone = new(false);
|
||
private object? _lastResult;
|
||
private bool _captureResult;
|
||
private bool _disposed;
|
||
|
||
public string Engine => Configuration.EngineAcBinary;
|
||
public string IoMode => Configuration.IoInMemoryPipe;
|
||
public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime;
|
||
public string OptionsPreset { get; }
|
||
public int SerializedSize => _serialized.Length;
|
||
public long SetupSerializeAllocBytes { get; }
|
||
public long SetupDeserializeAllocBytes { get; }
|
||
public bool IsRoundTripOnly => true;
|
||
public string OptionsDescription => BuildAcBinaryOptionsDescription(_options, $", BufferSize={_options.BufferWriterChunkSize}B, Transport=Pipe(in-memory,multiMessage,2-task)");
|
||
|
||
public AcBinaryInMemoryPipeBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset)
|
||
{
|
||
_order = order;
|
||
_options = options;
|
||
OptionsPreset = optionsPreset;
|
||
|
||
_serialized = AcBinarySerializer.Serialize(order, _options);
|
||
|
||
// === SERIALIZE-side setup measurement ===
|
||
// In-memory Pipe construction. NO kernel-pipe pair, NO Connect handshake — just a managed Pipe object
|
||
// and a reference to its Writer side. PipeWriterImpl (parallel-flush capable, NOT StreamPipeWriter).
|
||
GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
|
||
var beforeSer = GC.GetAllocatedBytesForCurrentThread();
|
||
_pipe = new Pipe();
|
||
_pipeWriter = _pipe.Writer;
|
||
var afterSer = GC.GetAllocatedBytesForCurrentThread();
|
||
SetupSerializeAllocBytes = afterSer - beforeSer;
|
||
|
||
// === DESERIALIZE-side setup measurement ===
|
||
// PipeReader reference + AsyncPipeReaderInput (ArrayPool rent + ManualResetEventSlim) + drain task +
|
||
// consumer task scaffolding. Identical to the NamedPipe variant on the receive side.
|
||
GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
|
||
var beforeDes = GC.GetAllocatedBytesForCurrentThread();
|
||
|
||
_pipeReader = _pipe.Reader;
|
||
_input = new AsyncPipeReaderInput(_options.BufferWriterChunkSize * 2, multiMessage: true);
|
||
_cts = new CancellationTokenSource();
|
||
_drainTask = Task.Run(() => _input.DrainFromAsync(_pipeReader, _cts.Token));
|
||
_consumerTask = Task.Run(ConsumeLoop);
|
||
|
||
var afterDes = GC.GetAllocatedBytesForCurrentThread();
|
||
SetupDeserializeAllocBytes = afterDes - beforeDes;
|
||
}
|
||
|
||
// BG consumer: parks on _consumeRequest, runs Deserialize<T>(_input) when signaled, signals _consumeDone.
|
||
// Mirror of AcBinaryNamedPipeBenchmark.ConsumeLoop — same pattern, same MRES protocol.
|
||
private void ConsumeLoop()
|
||
{
|
||
var ct = _cts.Token;
|
||
try
|
||
{
|
||
while (true)
|
||
{
|
||
_consumeRequest.Wait(ct);
|
||
if (ct.IsCancellationRequested) return;
|
||
_consumeRequest.Reset();
|
||
|
||
try
|
||
{
|
||
var result = AcBinaryDeserializer.Deserialize<TestOrder>(_input, _options);
|
||
if (_captureResult) _lastResult = result;
|
||
}
|
||
catch
|
||
{
|
||
// Swallow — see ConsumeLoop in NamedPipe variant for rationale.
|
||
}
|
||
finally
|
||
{
|
||
_consumeDone.Set();
|
||
}
|
||
}
|
||
}
|
||
catch (OperationCanceledException)
|
||
{
|
||
// Cooperative cancel — Dispose path. Swallow.
|
||
}
|
||
}
|
||
|
||
public void Warmup(int iterations)
|
||
{
|
||
for (var i = 0; i < iterations; i++) Serialize();
|
||
}
|
||
|
||
[MethodImpl(MethodImplOptions.NoInlining)]
|
||
public void Serialize()
|
||
{
|
||
// Same 2-task streaming pipeline as NamedPipe variant — only the transport differs (in-memory Pipe
|
||
// instead of kernel NamedPipe). Per-chunk SerializeChunkedFramed → PipeWriter slab → drain task
|
||
// reads from PipeReader → input.Feed → consumer Deserialize<T> consumes byte-by-byte.
|
||
//
|
||
// Uses the Pipe-overload (instead of the PipeWriter-overload) so the FlushPolicy parameter is
|
||
// exposed for tuning. Toggle between FlushPolicy.PerChunk (bounded peak memory, per-chunk await
|
||
// FlushAsync) and FlushPolicy.Coalesced (fire-and-forget per chunk, pipe-coalesced flushes up to
|
||
// PauseWriterThreshold ~64 KB) to A/B-test the streaming-pipeline overhead. FlushPolicy.PerChunk
|
||
// is functionally equivalent to the PipeWriter-overload (both internally route to
|
||
// SerializeToPipeWriterCore with FlushPolicy.PerChunk).
|
||
_consumeDone.Reset();
|
||
_consumeRequest.Set();
|
||
|
||
AcBinarySerializer.SerializeChunkedFramed(_order, _pipe, _options, FlushPolicy.Coalesced);
|
||
|
||
_consumeDone.Wait();
|
||
}
|
||
|
||
[MethodImpl(MethodImplOptions.NoInlining)]
|
||
public void Deserialize()
|
||
{
|
||
// No-op: per-iter round-trip is captured in Serialize(). See IsRoundTripOnly contract.
|
||
}
|
||
|
||
public bool VerifyRoundTrip()
|
||
{
|
||
_captureResult = true;
|
||
try
|
||
{
|
||
Serialize();
|
||
var result = _lastResult as TestOrder;
|
||
return result != null && DeepEqualsViaJson(_order, result);
|
||
}
|
||
finally
|
||
{
|
||
_captureResult = false;
|
||
_lastResult = null;
|
||
}
|
||
}
|
||
|
||
public void Dispose()
|
||
{
|
||
if (_disposed) return;
|
||
_disposed = true;
|
||
|
||
// Cancel drain + consumer tasks → both exit. Pulse _consumeRequest in case consumer is parked.
|
||
try { _cts.Cancel(); } catch { /* swallow on teardown */ }
|
||
try { _consumeRequest.Set(); } catch { /* nudge in case consumer Wait is parked */ }
|
||
try { _drainTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
|
||
try { _consumerTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
|
||
|
||
// Complete writer + reader (in-memory Pipe — no underlying stream to dispose).
|
||
try { _pipeWriter.CompleteAsync().AsTask().Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
|
||
try { _pipeReader.Complete(); } catch { /* swallow on teardown */ }
|
||
try { _input.Dispose(); } catch { /* swallow on teardown */ }
|
||
try { _consumeRequest.Dispose(); } catch { /* swallow on teardown */ }
|
||
try { _consumeDone.Dispose(); } catch { /* swallow on teardown */ }
|
||
try { _cts.Dispose(); } catch { /* swallow on teardown */ }
|
||
}
|
||
}
|
||
|
||
/// <summary>
|
||
/// Raw <c>byte[]</c> over a long-lived NamedPipe — NO chunk-framing, NO <c>AsyncPipeReaderInput</c>,
|
||
/// NO sliding-window buffer. Calling thread serialises + writes; a long-lived background consumer task
|
||
/// reads and deserialises. Two-task pattern enables Ser↔Read overlap (kernel-pipe-pipelined) AND
|
||
/// avoids the kernel-buffer-full deadlock when <c>bytes.Length > inBufferSize</c>.
|
||
///
|
||
/// Side-by-side with <see cref="AcBinaryNamedPipeBenchmark"/> (chunked-framed AsyncPipe stack) this
|
||
/// isolates two cost components on the SAME kernel-pipe transport with the SAME <c>inBufferSize</c>:
|
||
/// <list type="bullet">
|
||
/// <item><description><b>This row vs <see cref="AcBinaryBenchmark"/> (Byte[])</b> — pure kernel-NamedPipe
|
||
/// overhead (WriteFile / ReadFile syscalls + IRP queueing + buffer-copy + thread-handoff).</description></item>
|
||
/// <item><description><b>This row vs <see cref="AcBinaryNamedPipeBenchmark"/> (chunked-framed)</b> — pure
|
||
/// AsyncPipe-framework overhead (chunk header writes + sliding-window <c>Feed</c> + MRES wait inside
|
||
/// <c>AsyncPipeReaderInput</c>) AND the streaming-pipeline benefit of intra-message Ser↔Des overlap (which
|
||
/// raw lacks — raw can only Ser↔Read overlap, with Des sequential after Read completes).</description></item>
|
||
/// </list>
|
||
/// Per-iter <c>byte[]</c> allocation from <c>AcBinarySerializer.Serialize</c> is part of the cost (matches
|
||
/// <see cref="AcBinaryBenchmark"/>'s API contract); the receive-side scratch buffer is also allocated per-iter
|
||
/// on the consumer-task (counted via <c>GC.GetTotalAllocatedBytes</c> in <c>MeasureAllocationTotal</c>).
|
||
/// </summary>
|
||
private sealed class AcBinaryNamedPipeRawByteArrayBenchmark : ISerializerBenchmark, IDisposable
|
||
{
|
||
private readonly TestOrder _order;
|
||
private readonly AcBinarySerializerOptions _options;
|
||
private readonly byte[] _serialized; // for SerializedSize reporting + receive-side size known upfront
|
||
|
||
// Long-lived pipe lifecycle (set up once in ctor — NOT timed).
|
||
private readonly NamedPipeServerStream _pipeServer;
|
||
private readonly NamedPipeClientStream _pipeClient;
|
||
|
||
// Long-lived consumer-task infrastructure (Read + Deserialize on BG thread, signaled per iter).
|
||
// Mirrors AcBinaryNamedPipeBenchmark's drain+consumer pair, but raw byte[] doesn't have an
|
||
// intermediate sliding-window buffer, so Read+Des happen sequentially in one BG task: Read N bytes
|
||
// → Deserialize<T>(bytes) → signal done. Calling thread's Ser↔Write overlaps with this BG Read+Des
|
||
// through kernel-pipe pipelining.
|
||
private readonly CancellationTokenSource _cts;
|
||
private readonly Task _consumerTask;
|
||
private readonly ManualResetEventSlim _consumeRequest = new(false);
|
||
private readonly ManualResetEventSlim _consumeDone = new(false);
|
||
private int _pendingReadSize;
|
||
private object? _lastResult; // captured during VerifyRoundTrip; null in benchmark iters
|
||
private bool _captureResult; // toggle: when true, ConsumerLoop stores result; otherwise discards
|
||
private bool _disposed;
|
||
|
||
public string Engine => Configuration.EngineAcBinary;
|
||
public string IoMode => Configuration.IoNamedPipeRaw;
|
||
public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime;
|
||
public string OptionsPreset { get; }
|
||
public int SerializedSize => _serialized.Length;
|
||
public long SetupSerializeAllocBytes { get; }
|
||
public long SetupDeserializeAllocBytes { get; }
|
||
public bool IsRoundTripOnly => true;
|
||
public string OptionsDescription => BuildAcBinaryOptionsDescription(_options, $", BufferSize={_options.BufferWriterChunkSize}B, Transport=NamedPipe(raw,2-task)");
|
||
|
||
public AcBinaryNamedPipeRawByteArrayBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset)
|
||
{
|
||
_order = order;
|
||
// BufferWriterChunkSize comes from the caller — same source-of-truth contract as
|
||
// AcBinaryNamedPipeBenchmark. The kernel pipe-buffer (inBufferSize) is wired to it so the
|
||
// raw-vs-chunked comparison runs on identical transport conditions.
|
||
_options = options;
|
||
OptionsPreset = optionsPreset;
|
||
|
||
_serialized = AcBinarySerializer.Serialize(order, _options);
|
||
|
||
var pipeName = $"AcBinaryBenchRaw-{Guid.NewGuid():N}";
|
||
|
||
// === SERIALIZE-side setup measurement ===
|
||
// pipe-pair (server + client) + connect handshake. NO PipeWriter wrapper — we use the raw
|
||
// Stream.Write API directly, matching the no-framing semantics of this benchmark.
|
||
GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
|
||
var beforeSer = GC.GetAllocatedBytesForCurrentThread();
|
||
_pipeServer = new NamedPipeServerStream(pipeName, PipeDirection.In, 1, PipeTransmissionMode.Byte,
|
||
System.IO.Pipes.PipeOptions.Asynchronous,
|
||
inBufferSize: _options.BufferWriterChunkSize,
|
||
outBufferSize: _options.BufferWriterChunkSize);
|
||
_pipeClient = new NamedPipeClientStream(".", pipeName, PipeDirection.Out, System.IO.Pipes.PipeOptions.Asynchronous);
|
||
|
||
var serverWait = _pipeServer.WaitForConnectionAsync();
|
||
_pipeClient.Connect();
|
||
serverWait.GetAwaiter().GetResult();
|
||
var afterSer = GC.GetAllocatedBytesForCurrentThread();
|
||
SetupSerializeAllocBytes = afterSer - beforeSer;
|
||
|
||
// === DESERIALIZE-side setup measurement ===
|
||
// 1× background consumer-task + 2× MRES (request / done) + cancellation source. Matches the
|
||
// chunked benchmark's deserialize-side setup cost shape.
|
||
GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
|
||
var beforeDes = GC.GetAllocatedBytesForCurrentThread();
|
||
_cts = new CancellationTokenSource();
|
||
_consumerTask = Task.Run(ConsumerLoop);
|
||
var afterDes = GC.GetAllocatedBytesForCurrentThread();
|
||
SetupDeserializeAllocBytes = afterDes - beforeDes;
|
||
}
|
||
|
||
// BG consumer: parks on _consumeRequest, reads N bytes from pipe, runs Deserialize<T>(bytes), signals
|
||
// _consumeDone. The Read overlaps with the calling thread's Write through the kernel-pipe; Des happens
|
||
// sequentially after Read completes (raw byte[] needs the full message to deserialize).
|
||
private void ConsumerLoop()
|
||
{
|
||
var ct = _cts.Token;
|
||
try
|
||
{
|
||
while (true)
|
||
{
|
||
_consumeRequest.Wait(ct);
|
||
if (ct.IsCancellationRequested) return;
|
||
_consumeRequest.Reset();
|
||
|
||
try
|
||
{
|
||
var size = _pendingReadSize;
|
||
var bytes = new byte[size]; // per-iter alloc — counted by MeasureAllocationTotal
|
||
var totalRead = 0;
|
||
while (totalRead < size)
|
||
{
|
||
var n = _pipeServer.Read(bytes, totalRead, size - totalRead);
|
||
if (n == 0) break; // pipe closed / EOF — partial read swallowed
|
||
totalRead += n;
|
||
}
|
||
var result = AcBinaryDeserializer.Deserialize<TestOrder>(bytes, _options);
|
||
if (_captureResult) _lastResult = result;
|
||
}
|
||
catch
|
||
{
|
||
// Swallow — calling thread sees the failure via missing/incorrect _lastResult during VerifyRoundTrip,
|
||
// or the benchmark loop just continues (timing impacted). Production teardown handled in Dispose.
|
||
}
|
||
finally
|
||
{
|
||
_consumeDone.Set();
|
||
}
|
||
}
|
||
}
|
||
catch (OperationCanceledException)
|
||
{
|
||
// Cooperative cancel — Dispose path. Swallow.
|
||
}
|
||
}
|
||
|
||
public void Warmup(int iterations)
|
||
{
|
||
for (var i = 0; i < iterations; i++)
|
||
{
|
||
Serialize();
|
||
}
|
||
}
|
||
|
||
[MethodImpl(MethodImplOptions.NoInlining)]
|
||
public void Serialize()
|
||
{
|
||
// 2-task streaming pipeline:
|
||
// 1. Calling thread serialises → fresh byte[] (per-iter alloc, matches AcBinaryBenchmark contract).
|
||
// 2. Calling thread hands off expected size + signals consumer task. Consumer task starts Read loop
|
||
// on the pipe (BG thread). Calling thread proceeds to Write the bytes — Read and Write overlap
|
||
// through the kernel-pipe (kernel buffer fills, drains as consumer reads, sender resumes).
|
||
// 3. Calling thread waits for _consumeDone (consumer task finished Read+Des).
|
||
//
|
||
// Note: unlike chunked, raw byte[] cannot do Ser↔Des overlap (Des needs the full bytes before
|
||
// starting). Only Write↔Read overlaps here. The Des sequence on BG thread is: Read full bytes →
|
||
// Des the full graph → signal done. This is the architectural difference between raw and chunked.
|
||
var bytes = AcBinarySerializer.Serialize(_order, _options);
|
||
|
||
_pendingReadSize = bytes.Length;
|
||
_consumeDone.Reset();
|
||
_consumeRequest.Set();
|
||
|
||
_pipeClient.Write(bytes, 0, bytes.Length);
|
||
_pipeClient.Flush();
|
||
|
||
_consumeDone.Wait();
|
||
}
|
||
|
||
[MethodImpl(MethodImplOptions.NoInlining)]
|
||
public void Deserialize()
|
||
{
|
||
// No-op: per-iter round-trip is captured in Serialize(). See IsRoundTripOnly contract.
|
||
}
|
||
|
||
public bool VerifyRoundTrip()
|
||
{
|
||
// Use the same 2-task streaming path as the benchmark, but capture the result for graph-equality.
|
||
_captureResult = true;
|
||
try
|
||
{
|
||
Serialize();
|
||
var result = _lastResult as TestOrder;
|
||
return result != null && DeepEqualsViaJson(_order, result);
|
||
}
|
||
finally
|
||
{
|
||
_captureResult = false;
|
||
_lastResult = null;
|
||
}
|
||
}
|
||
|
||
public void Dispose()
|
||
{
|
||
if (_disposed) return;
|
||
_disposed = true;
|
||
|
||
// Cancel the consumer task → ConsumerLoop exits its Wait via OperationCanceledException.
|
||
try { _cts.Cancel(); } catch { /* swallow on teardown */ }
|
||
try { _consumeRequest.Set(); } catch { /* nudge in case consumer Wait is parked */ }
|
||
try { _consumerTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
|
||
|
||
// Symmetric teardown — close client first (writer side), then server.
|
||
try { _pipeClient.Dispose(); } catch { /* swallow on teardown */ }
|
||
try { _pipeServer.Dispose(); } catch { /* swallow on teardown */ }
|
||
try { _consumeRequest.Dispose(); } catch { /* swallow on teardown */ }
|
||
try { _consumeDone.Dispose(); } catch { /* swallow on teardown */ }
|
||
try { _cts.Dispose(); } catch { /* swallow on teardown */ }
|
||
}
|
||
}
|
||
|
||
/// <summary>
|
||
/// Raw <c>byte[]</c> over an in-memory cross-thread handoff — NO transport (no NamedPipe, no Pipe, no
|
||
/// Channel<see langword="<T>"/>). Calling thread serialises into a fresh <c>byte[]</c>, hands it to a
|
||
/// background consumer task via a single byte[] slot + MRES pair; the consumer deserialises and signals done.
|
||
///
|
||
/// <para><b>Why this benchmark matters</b>: completes the 2x2 transport × wire-format matrix:</para>
|
||
/// <list type="bullet">
|
||
/// <item><description><b>NamedPipe + Chunked</b> = <see cref="AcBinaryNamedPipeBenchmark"/></description></item>
|
||
/// <item><description><b>NamedPipe + Raw</b> = <see cref="AcBinaryNamedPipeRawByteArrayBenchmark"/></description></item>
|
||
/// <item><description><b>In-memory Pipe + Chunked</b> = <see cref="AcBinaryInMemoryPipeBenchmark"/></description></item>
|
||
/// <item><description><b>In-memory + Raw</b> = THIS row — apples-to-apples baseline for the in-memory chunked row</description></item>
|
||
/// </list>
|
||
/// <para>Side-by-side with <see cref="AcBinaryInMemoryPipeBenchmark"/> this isolates the chunked-streaming
|
||
/// framework's pure CPU cost, with the same in-memory transport (zero kernel involvement) on both sides.
|
||
/// Side-by-side with <see cref="AcBinaryNamedPipeRawByteArrayBenchmark"/> this isolates the kernel-NamedPipe
|
||
/// overhead on the raw-byte[] side.</para>
|
||
/// </summary>
|
||
private sealed class AcBinaryInMemoryRawByteArrayBenchmark : ISerializerBenchmark, IDisposable
|
||
{
|
||
private readonly TestOrder _order;
|
||
private readonly AcBinarySerializerOptions _options;
|
||
private readonly byte[] _serialized; // for SerializedSize reporting only
|
||
|
||
// Long-lived consumer-task infrastructure (Deserialize on BG thread, signaled per iter).
|
||
// No transport — just a byte[] slot for handoff between calling thread and consumer task.
|
||
private readonly CancellationTokenSource _cts;
|
||
private readonly Task _consumerTask;
|
||
private readonly ManualResetEventSlim _consumeRequest = new(false);
|
||
private readonly ManualResetEventSlim _consumeDone = new(false);
|
||
private byte[]? _pendingBytes; // calling thread → consumer task handoff slot
|
||
private object? _lastResult; // captured during VerifyRoundTrip; null in benchmark iters
|
||
private bool _captureResult;
|
||
private bool _disposed;
|
||
|
||
public string Engine => Configuration.EngineAcBinary;
|
||
public string IoMode => Configuration.IoInMemoryRaw;
|
||
public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime;
|
||
public string OptionsPreset { get; }
|
||
public int SerializedSize => _serialized.Length;
|
||
public long SetupSerializeAllocBytes { get; }
|
||
public long SetupDeserializeAllocBytes { get; }
|
||
public bool IsRoundTripOnly => true;
|
||
public string OptionsDescription => BuildAcBinaryOptionsDescription(_options, $", BufferSize={_options.BufferWriterChunkSize}B, Transport=in-memory(raw,2-task)");
|
||
|
||
public AcBinaryInMemoryRawByteArrayBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset)
|
||
{
|
||
_order = order;
|
||
_options = options;
|
||
OptionsPreset = optionsPreset;
|
||
|
||
_serialized = AcBinarySerializer.Serialize(order, _options);
|
||
|
||
// === SERIALIZE-side setup measurement ===
|
||
// Nothing to set up — calling thread allocates byte[] per iter via AcBinarySerializer.Serialize.
|
||
SetupSerializeAllocBytes = 0;
|
||
|
||
// === DESERIALIZE-side setup measurement ===
|
||
// 1× background consumer-task + 2× MRES (request / done) + cancellation source.
|
||
GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
|
||
var beforeDes = GC.GetAllocatedBytesForCurrentThread();
|
||
_cts = new CancellationTokenSource();
|
||
_consumerTask = Task.Run(ConsumerLoop);
|
||
var afterDes = GC.GetAllocatedBytesForCurrentThread();
|
||
SetupDeserializeAllocBytes = afterDes - beforeDes;
|
||
}
|
||
|
||
// BG consumer: parks on _consumeRequest, picks up the byte[] from _pendingBytes, runs Deserialize<T>(bytes),
|
||
// signals _consumeDone. Direct in-process handoff — no transport syscall, no buffer copy beyond the byte[]
|
||
// reference itself (zero-copy by reference).
|
||
private void ConsumerLoop()
|
||
{
|
||
var ct = _cts.Token;
|
||
try
|
||
{
|
||
while (true)
|
||
{
|
||
_consumeRequest.Wait(ct);
|
||
if (ct.IsCancellationRequested) return;
|
||
_consumeRequest.Reset();
|
||
|
||
try
|
||
{
|
||
var bytes = _pendingBytes;
|
||
if (bytes != null)
|
||
{
|
||
var result = AcBinaryDeserializer.Deserialize<TestOrder>(bytes, _options);
|
||
if (_captureResult) _lastResult = result;
|
||
}
|
||
}
|
||
catch
|
||
{
|
||
// Swallow — see ConsumerLoop in NamedPipe variant for rationale.
|
||
}
|
||
finally
|
||
{
|
||
_consumeDone.Set();
|
||
}
|
||
}
|
||
}
|
||
catch (OperationCanceledException)
|
||
{
|
||
// Cooperative cancel — Dispose path. Swallow.
|
||
}
|
||
}
|
||
|
||
public void Warmup(int iterations)
|
||
{
|
||
for (var i = 0; i < iterations; i++) Serialize();
|
||
}
|
||
|
||
[MethodImpl(MethodImplOptions.NoInlining)]
|
||
public void Serialize()
|
||
{
|
||
// 2-task in-memory pipeline:
|
||
// 1. Calling thread serialises → fresh byte[] (per-iter alloc, matches AcBinaryBenchmark contract).
|
||
// 2. Calling thread parks the byte[] into _pendingBytes and signals consumer task. Consumer task
|
||
// picks up the reference (zero-copy) and runs Deserialize<T>(bytes).
|
||
// 3. Calling thread waits for _consumeDone (consumer task finished Des).
|
||
//
|
||
// Same architectural limitation as the NamedPipe-raw variant: Des cannot start until full bytes
|
||
// are available. Only the per-iter Ser↔Des thread-handoff overlaps slightly (calling thread starts
|
||
// signalling and waiting while consumer thread takes the byte[]).
|
||
var bytes = AcBinarySerializer.Serialize(_order, _options);
|
||
|
||
_pendingBytes = bytes;
|
||
_consumeDone.Reset();
|
||
_consumeRequest.Set();
|
||
|
||
_consumeDone.Wait();
|
||
}
|
||
|
||
[MethodImpl(MethodImplOptions.NoInlining)]
|
||
public void Deserialize()
|
||
{
|
||
// No-op: per-iter round-trip is captured in Serialize(). See IsRoundTripOnly contract.
|
||
}
|
||
|
||
public bool VerifyRoundTrip()
|
||
{
|
||
_captureResult = true;
|
||
try
|
||
{
|
||
Serialize();
|
||
var result = _lastResult as TestOrder;
|
||
return result != null && DeepEqualsViaJson(_order, result);
|
||
}
|
||
finally
|
||
{
|
||
_captureResult = false;
|
||
_lastResult = null;
|
||
}
|
||
}
|
||
|
||
public void Dispose()
|
||
{
|
||
if (_disposed) return;
|
||
_disposed = true;
|
||
|
||
try { _cts.Cancel(); } catch { /* swallow on teardown */ }
|
||
try { _consumeRequest.Set(); } catch { /* nudge in case consumer Wait is parked */ }
|
||
try { _consumerTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
|
||
|
||
try { _consumeRequest.Dispose(); } catch { /* swallow on teardown */ }
|
||
try { _consumeDone.Dispose(); } catch { /* swallow on teardown */ }
|
||
try { _cts.Dispose(); } catch { /* swallow on teardown */ }
|
||
}
|
||
}
|
||
|
||
/// <summary>
|
||
/// Benchmarks MemoryPack via the IBufferWriter overload, allocating a FRESH ArrayBufferWriter on EVERY call.
|
||
/// Apples-to-apples counterpart to AcBinaryFreshBufferWriterBenchmark.
|
||
/// </summary>
|
||
private sealed class MemoryPackFreshBufferWriterBenchmark : ISerializerBenchmark
|
||
{
|
||
private readonly TestOrder _order;
|
||
private readonly MemoryPackSerializerOptions _options;
|
||
private readonly byte[] _serialized;
|
||
|
||
public string Engine => Configuration.EngineMemoryPack;
|
||
public string IoMode => Configuration.IoBufWrNew;
|
||
public string DispatchMode => Configuration.ModeSGen; // MemoryPack always uses [MemoryPackable] source-generated formatters
|
||
public string OptionsPreset { get; }
|
||
public int SerializedSize => _serialized.Length;
|
||
public long SetupSerializeAllocBytes => 0;
|
||
public long SetupDeserializeAllocBytes => 0;
|
||
public string? OptionsDescription => $"StringEncoding={_options.StringEncoding}";
|
||
|
||
public MemoryPackFreshBufferWriterBenchmark(TestOrder order, string optionsPreset)
|
||
{
|
||
_order = order;
|
||
OptionsPreset = optionsPreset;
|
||
_options = GetMemPackOptions();
|
||
_serialized = MemoryPackSerializer.Serialize(order, _options);
|
||
}
|
||
|
||
public void Warmup(int iterations)
|
||
{
|
||
for (var i = 0; i < iterations; i++)
|
||
{
|
||
Serialize();
|
||
Deserialize();
|
||
}
|
||
}
|
||
|
||
[MethodImpl(MethodImplOptions.NoInlining)]
|
||
public void Serialize()
|
||
{
|
||
var abw = new ArrayBufferWriter<byte>();
|
||
MemoryPackSerializer.Serialize(abw, _order, _options);
|
||
}
|
||
|
||
// BufWr semantic: read from a ReadOnlySequence<byte> overload (apples-to-apples with AcBinary's
|
||
// BufWr Deser path). MemoryPack's ROS overload also single-segment-fast-paths internally.
|
||
[MethodImpl(MethodImplOptions.NoInlining)]
|
||
public void Deserialize() => MemoryPackSerializer.Deserialize<TestOrder>(new ReadOnlySequence<byte>(_serialized), _options);
|
||
|
||
public bool VerifyRoundTrip()
|
||
{
|
||
var abw = new ArrayBufferWriter<byte>();
|
||
MemoryPackSerializer.Serialize(abw, _order, _options);
|
||
var roundTripped = MemoryPackSerializer.Deserialize<TestOrder>(new ReadOnlySequence<byte>(abw.WrittenMemory), _options);
|
||
return DeepEqualsViaJson(_order, roundTripped);
|
||
}
|
||
}
|
||
|
||
private sealed class AcBinaryBufferWriterBenchmark : ISerializerBenchmark
|
||
{
|
||
private readonly TestOrder _order;
|
||
private readonly AcBinarySerializerOptions _options;
|
||
private readonly byte[] _serialized;
|
||
private readonly ArrayBufferWriter<byte> _bufferWriter;
|
||
|
||
public string Engine => Configuration.EngineAcBinary;
|
||
public string IoMode => Configuration.IoBufWrReuse;
|
||
public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime;
|
||
public string OptionsPreset { get; }
|
||
public int SerializedSize => _serialized.Length;
|
||
public long SetupSerializeAllocBytes { get; }
|
||
public long SetupDeserializeAllocBytes => 0;
|
||
public string OptionsDescription => BuildAcBinaryOptionsDescription(_options);
|
||
|
||
public AcBinaryBufferWriterBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset)
|
||
{
|
||
_order = order;
|
||
_options = options;
|
||
OptionsPreset = optionsPreset;
|
||
_serialized = AcBinarySerializer.Serialize(order, options);
|
||
|
||
// Measure ONLY the BufferWriter infrastructure setup on the serialize side (excluding the
|
||
// helper Serialize above). Deserialize side reads directly from `_serialized` byte[] — no
|
||
// dedicated setup allocation, hence SetupDeserializeAllocBytes = 0.
|
||
GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
|
||
var beforeSetup = GC.GetAllocatedBytesForCurrentThread();
|
||
_bufferWriter = new ArrayBufferWriter<byte>(_serialized.Length * 2);
|
||
var afterSetup = GC.GetAllocatedBytesForCurrentThread();
|
||
SetupSerializeAllocBytes = afterSetup - beforeSetup;
|
||
}
|
||
|
||
public void Warmup(int iterations)
|
||
{
|
||
for (var i = 0; i < iterations; i++)
|
||
{
|
||
Serialize();
|
||
Deserialize();
|
||
}
|
||
}
|
||
|
||
[MethodImpl(MethodImplOptions.NoInlining)]
|
||
public void Serialize()
|
||
{
|
||
_bufferWriter.ResetWrittenCount(); // reuse — no alloc, no zeroing
|
||
AcBinarySerializer.Serialize(_order, _bufferWriter, _options);
|
||
}
|
||
|
||
// BufWr semantic: read from a ReadOnlySequence<byte> (the ROS overload), NOT from byte[] —
|
||
// single-segment array-backed sequence triggers the fast-path in AcBinaryDeserializer.cs:298 which
|
||
// redirects to the byte[] overload. This means the bench actually exercises the ROS-input path
|
||
// (the production-realistic surface for SignalR / Pipe consumers) rather than secretly testing
|
||
// byte[] Deser under the BufWr label.
|
||
[MethodImpl(MethodImplOptions.NoInlining)]
|
||
public void Deserialize() => AcBinaryDeserializer.Deserialize<TestOrder>(new ReadOnlySequence<byte>(_serialized), _options);
|
||
|
||
public bool VerifyRoundTrip()
|
||
{
|
||
_bufferWriter.ResetWrittenCount();
|
||
AcBinarySerializer.Serialize(_order, _bufferWriter, _options);
|
||
|
||
var roundTripped = AcBinaryDeserializer.Deserialize<TestOrder>(new ReadOnlySequence<byte>(_bufferWriter.WrittenMemory), _options);
|
||
return DeepEqualsViaJson(_order, roundTripped);
|
||
}
|
||
}
|
||
|
||
/// <summary>
|
||
/// Benchmarks MemoryPack via the IBufferWriter overload with a pre-allocated, reused ArrayBufferWriter.
|
||
/// Apples-to-apples counterpart to AcBinaryBufferWriterBenchmark — MemoryPack's IBufferWriter is the path it's designed for.
|
||
/// </summary>
|
||
private sealed class MemoryPackBufferWriterBenchmark : ISerializerBenchmark
|
||
{
|
||
private readonly TestOrder _order;
|
||
private readonly MemoryPackSerializerOptions _options;
|
||
private readonly byte[] _serialized;
|
||
private readonly ArrayBufferWriter<byte> _bufferWriter;
|
||
|
||
public string Engine => Configuration.EngineMemoryPack;
|
||
public string IoMode => Configuration.IoBufWrReuse;
|
||
public string DispatchMode => Configuration.ModeSGen; // MemoryPack always uses [MemoryPackable] source-generated formatters
|
||
public string OptionsPreset { get; }
|
||
public int SerializedSize => _serialized.Length;
|
||
public long SetupSerializeAllocBytes { get; }
|
||
public long SetupDeserializeAllocBytes => 0;
|
||
public string? OptionsDescription => $"StringEncoding={_options.StringEncoding}";
|
||
|
||
public MemoryPackBufferWriterBenchmark(TestOrder order, string optionsPreset)
|
||
{
|
||
_order = order;
|
||
OptionsPreset = optionsPreset;
|
||
_options = GetMemPackOptions();
|
||
_serialized = MemoryPackSerializer.Serialize(order, _options);
|
||
|
||
// Serialize-side setup only — see AcBinaryBufferWriterBenchmark for the full rationale.
|
||
GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
|
||
var beforeSetup = GC.GetAllocatedBytesForCurrentThread();
|
||
_bufferWriter = new ArrayBufferWriter<byte>(_serialized.Length * 2);
|
||
var afterSetup = GC.GetAllocatedBytesForCurrentThread();
|
||
SetupSerializeAllocBytes = afterSetup - beforeSetup;
|
||
}
|
||
|
||
public void Warmup(int iterations)
|
||
{
|
||
for (var i = 0; i < iterations; i++)
|
||
{
|
||
Serialize();
|
||
Deserialize();
|
||
}
|
||
}
|
||
|
||
[MethodImpl(MethodImplOptions.NoInlining)]
|
||
public void Serialize()
|
||
{
|
||
_bufferWriter.ResetWrittenCount();
|
||
MemoryPackSerializer.Serialize(_bufferWriter, _order, _options);
|
||
}
|
||
|
||
// BufWr semantic: read from a ReadOnlySequence<byte> overload (apples-to-apples with AcBinary's
|
||
// BufWr Deser path). MemoryPack's ROS overload also single-segment-fast-paths internally.
|
||
[MethodImpl(MethodImplOptions.NoInlining)]
|
||
public void Deserialize() => MemoryPackSerializer.Deserialize<TestOrder>(new ReadOnlySequence<byte>(_serialized), _options);
|
||
|
||
public bool VerifyRoundTrip()
|
||
{
|
||
_bufferWriter.ResetWrittenCount();
|
||
MemoryPackSerializer.Serialize(_bufferWriter, _order, _options);
|
||
var roundTripped = MemoryPackSerializer.Deserialize<TestOrder>(new ReadOnlySequence<byte>(_bufferWriter.WrittenMemory), _options);
|
||
return DeepEqualsViaJson(_order, roundTripped);
|
||
}
|
||
}
|
||
|
||
private sealed class SystemTextJsonBenchmark : ISerializerBenchmark
|
||
{
|
||
private readonly TestOrder _order;
|
||
private readonly JsonSerializerOptions _options;
|
||
private readonly string _serialized;
|
||
private readonly byte[] _serializedUtf8;
|
||
|
||
public string Engine => Configuration.EngineSystemTextJson;
|
||
public string IoMode => Configuration.IoString;
|
||
public string DispatchMode => Configuration.ModeRuntime; // System.Text.Json default uses reflection-based metadata (no source generator opt-in here)
|
||
public string OptionsPreset { get; }
|
||
public int SerializedSize => _serializedUtf8.Length;
|
||
public long SetupSerializeAllocBytes => 0;
|
||
public long SetupDeserializeAllocBytes => 0;
|
||
|
||
public SystemTextJsonBenchmark(TestOrder order, string optionsPreset)
|
||
{
|
||
_order = order;
|
||
OptionsPreset = optionsPreset;
|
||
_options = new JsonSerializerOptions
|
||
{
|
||
WriteIndented = false,
|
||
DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull,
|
||
ReferenceHandler = System.Text.Json.Serialization.ReferenceHandler.IgnoreCycles
|
||
};
|
||
_serialized = JsonSerializer.Serialize(order, _options);
|
||
_serializedUtf8 = Configuration.Utf8NoBom.GetBytes(_serialized);
|
||
}
|
||
|
||
public void Warmup(int iterations)
|
||
{
|
||
for (var i = 0; i < iterations; i++)
|
||
{
|
||
Serialize();
|
||
Deserialize();
|
||
}
|
||
}
|
||
|
||
[MethodImpl(MethodImplOptions.NoInlining)]
|
||
public void Serialize() => JsonSerializer.Serialize(_order, _options);
|
||
|
||
[MethodImpl(MethodImplOptions.NoInlining)]
|
||
public void Deserialize() => JsonSerializer.Deserialize<TestOrder>(_serialized, _options);
|
||
|
||
public bool VerifyRoundTrip()
|
||
{
|
||
var json = JsonSerializer.Serialize(_order, _options);
|
||
var roundTripped = JsonSerializer.Deserialize<TestOrder>(json, _options);
|
||
return DeepEqualsViaJson(_order, roundTripped);
|
||
}
|
||
}
|
||
|
||
#endregion
|
||
|
||
#region Results
|
||
|
||
private sealed class BenchmarkResult
|
||
{
|
||
public string TestDataName { get; set; } = "";
|
||
public string Engine { get; set; } = "";
|
||
public string IoMode { get; set; } = "";
|
||
public string DispatchMode { get; set; } = "";
|
||
public string OptionsPreset { get; set; } = "";
|
||
/// <summary>True if Serialize() captures a full round-trip and Deserialize() is a no-op
|
||
/// (single-use streaming transports like NamedPipe). Excluded from "Fastest Serialize" / "Fastest Deserialize"
|
||
/// winners rankings; still ranked in "Fastest Round-trip". Display-side: Ser µs/op / SerAlloc / Des µs/op / DesAlloc
|
||
/// all show "N/A" since they were never measured separately; RT µs/op / RT Alloc carry the full round-trip values.</summary>
|
||
public bool IsRoundTripOnly { get; set; }
|
||
/// <summary>Synthesized display name for backwards compatibility / single-string-row scenarios. Includes DispatchMode so SGen and Runtime variants of the same preset don't collide in grouping (e.g. SUMMARY: WINNERS).</summary>
|
||
public string SerializerName => $"{Engine} ({IoMode}, {OptionsPreset}, {DispatchMode})";
|
||
public string? OptionsDescription { get; set; }
|
||
public int SerializedSize { get; set; }
|
||
public double SerializeTimeMs { get; set; }
|
||
public double DeserializeTimeMs { get; set; }
|
||
// Per-sample min/max alongside the median (median is the *Time*Ms field above). Surfaces
|
||
// inter-sample range — the visible noise floor for the row. 0 when the operation was skipped
|
||
// (mode != "all"/"ser"/"des") or when a single-sample fast path was used (min == max == median).
|
||
public double SerializeTimeMinMs { get; set; }
|
||
public double SerializeTimeMaxMs { get; set; }
|
||
public double DeserializeTimeMinMs { get; set; }
|
||
public double DeserializeTimeMaxMs { get; set; }
|
||
// Sample-population stddev (ms). Used by FormatMicrosWithRange to compute CV (stddev/mean)
|
||
// and emit the ⚠️ marker on rows above Configuration.UnstableCVThreshold. 0 in single-sample mode.
|
||
public double SerializeTimeStdDevMs { get; set; }
|
||
public double DeserializeTimeStdDevMs { get; set; }
|
||
// Per-row adaptive iteration count (post-CalibrateIterations). Each Ser and Des function calibrates
|
||
// independently to land its sample window at ~Configuration.TargetSampleMs; per-op µs is then iter-independent
|
||
// (`SerializeTimeMs / SerializeIterations * 1000`). For round-trip-only rows (NamedPipe etc.),
|
||
// RoundTripIterations carries the calibrated iter count; SerializeIterations and DeserializeIterations
|
||
// stay 0 (Ser and Des are not separately measurable on those rows).
|
||
public int SerializeIterations { get; set; }
|
||
public int DeserializeIterations { get; set; }
|
||
public int RoundTripIterations { get; set; }
|
||
public long SerializeAllocBytesPerOp { get; set; }
|
||
public long DeserializeAllocBytesPerOp { get; set; }
|
||
public long SetupSerializeAllocBytes { get; set; }
|
||
public long SetupDeserializeAllocBytes { get; set; }
|
||
/// <summary>Total round-trip time. For in-memory benchmarks: synthesized so that
|
||
/// <c>RoundTripTimeMs / RoundTripIterations</c> yields the correct <c>SerPerOp + DesPerOp</c> µs/op
|
||
/// (necessary because Ser and Des may have different iter counts post-calibration).
|
||
/// For round-trip-only benchmarks (NamedPipe etc.): the directly-measured pipe round-trip time.</summary>
|
||
public double RoundTripTimeMs { get; set; }
|
||
// Round-trip min/max + stddev — only populated for round-trip-only benchmarks (NamedPipe etc.) where
|
||
// RT is directly measured. For in-memory rows RT = Ser + Des, which has no single-sample
|
||
// distribution; surface Ser/Des range separately instead.
|
||
public double RoundTripTimeMinMs { get; set; }
|
||
public double RoundTripTimeMaxMs { get; set; }
|
||
public double RoundTripTimeStdDevMs { get; set; }
|
||
/// <summary>Total round-trip allocation per op. For in-memory benchmarks: <c>SerializeAlloc + DeserializeAlloc</c>.
|
||
/// For round-trip-only benchmarks: process-wide allocation measured via <see cref="GC.GetTotalAllocatedBytes"/>
|
||
/// (covers ALL threads — client, server-drain, channel internals — not just the caller).</summary>
|
||
public long RoundTripAllocBytesPerOp { get; set; }
|
||
}
|
||
|
||
private static void PrintResult(BenchmarkResult result)
|
||
{
|
||
// Numbers-only per-row entries; the column-headers carry units (µs/op, KB/op).
|
||
var ser = result.SerializeTimeMs > 0 ? $"{SerPerOp(result),7:F2}" : " N/A";
|
||
var des = result.DeserializeTimeMs > 0 ? $"{DesPerOp(result),7:F2}" : " N/A";
|
||
var serAlloc = result.SerializeTimeMs > 0 ? $"{ToKilobytes(result.SerializeAllocBytesPerOp),7:F2}" : " N/A";
|
||
var desAlloc = result.DeserializeTimeMs > 0 ? $"{ToKilobytes(result.DeserializeAllocBytesPerOp),7:F2}" : " N/A";
|
||
System.Console.WriteLine($" {result.SerializerName,-40} | Size: {result.SerializedSize,8:N0} B | Ser: {ser} µs/op ({serAlloc} KB/op) | Des: {des} µs/op ({desAlloc} KB/op)");
|
||
}
|
||
|
||
private static void PrintGroupedResults(List<BenchmarkResult> results, List<TestDataSet> testDataSets)
|
||
{
|
||
System.Console.WriteLine("\n");
|
||
System.Console.WriteLine("╔══════════════════════════════════════════════════════════════════════════════════════════════════════╗");
|
||
System.Console.WriteLine("║ GROUPED RESULTS BY TEST DATA ║");
|
||
System.Console.WriteLine("╚══════════════════════════════════════════════════════════════════════════════════════════════════════╝");
|
||
|
||
// Print serializer options
|
||
var optionsMap = results
|
||
.Where(r => r.OptionsDescription != null)
|
||
.Select(r => (r.SerializerName, r.OptionsDescription!))
|
||
.Distinct()
|
||
.ToList();
|
||
|
||
if (optionsMap.Count > 0)
|
||
{
|
||
System.Console.WriteLine();
|
||
System.Console.WriteLine(" Serializer Options:");
|
||
foreach (var (name, opts) in optionsMap)
|
||
System.Console.WriteLine($" {name}: {opts}");
|
||
}
|
||
|
||
foreach (var testData in testDataSets)
|
||
{
|
||
// Order by per-op µs (iter-independent) — rows may have different iter counts post-calibration.
|
||
var testResults = results.Where(r => r.TestDataName == testData.DisplayName).OrderBy(r => RtPerOp(r)).ToList();
|
||
// Baseline switched MessagePack → MemoryPack: MemoryPack is the SOTA performance leader.
|
||
var memPackResult = testResults.FirstOrDefault(r => (r.Engine == Configuration.EngineMemoryPack && r.IoMode == Configuration.IoByteArray));
|
||
// Pin the comparison to AcBinary's SGen variant — apples-to-apples vs MemoryPack (also source-generated).
|
||
// The Runtime variant is shown alongside in the table for context, not used as the headline number.
|
||
var acBinaryResult = testResults.FirstOrDefault(r => (r.Engine == Configuration.EngineAcBinary && r.IoMode == Configuration.IoByteArray && r.DispatchMode == Configuration.ModeSGen));
|
||
|
||
System.Console.WriteLine($"\n┌─ {testData.DisplayName} ─".PadRight(172, '─') + "┐");
|
||
// Header-only units; per-row entries are numbers (µs/op for time, KB/op for alloc, KB pair "ser / des" for Setup, B for Size).
|
||
System.Console.WriteLine($"│ {"#",-4} │ {"Engine",-11} │ {"Options",-22} │ {"IO",-12} │ {"Mode",-8} │ {"Setup S/D KB",-14} │ {"Size B",-8} │ {"Ser µs/op",-10} │ {"SerAlc KB",-10} │ {"Des µs/op",-10} │ {"DesAlc KB",-10} │ {"RT µs/op",-10} │ {"RTAlc KB",-10} │");
|
||
System.Console.WriteLine($"├{"─".PadRight(6, '─')}┼{"─".PadRight(13, '─')}┼{"─".PadRight(24, '─')}┼{"─".PadRight(14, '─')}┼{"─".PadRight(10, '─')}┼{"─".PadRight(16, '─')}┼{"─".PadRight(10, '─')}┼{"─".PadRight(12, '─')}┼{"─".PadRight(12, '─')}┼{"─".PadRight(12, '─')}┼{"─".PadRight(12, '─')}┼{"─".PadRight(12, '─')}┼{"─".PadRight(12, '─')}┤");
|
||
|
||
var rank = 1;
|
||
foreach (var result in testResults)
|
||
{
|
||
var size = $"{result.SerializedSize:N0}";
|
||
var setup = $"{ToKilobytes(result.SetupSerializeAllocBytes):F2} / {ToKilobytes(result.SetupDeserializeAllocBytes):F2}";
|
||
var ser = result.SerializeTimeMs > 0 ? $"{SerPerOp(result):F2}" : "N/A";
|
||
var des = result.DeserializeTimeMs > 0 ? $"{DesPerOp(result):F2}" : "N/A";
|
||
var rt = result.RoundTripTimeMs > 0 ? $"{RtPerOp(result):F2}" : "N/A";
|
||
var serAlloc = result.SerializeTimeMs > 0 ? $"{ToKilobytes(result.SerializeAllocBytesPerOp):F2}" : "N/A";
|
||
var desAlloc = result.DeserializeTimeMs > 0 ? $"{ToKilobytes(result.DeserializeAllocBytesPerOp):F2}" : "N/A";
|
||
var rtAlloc = result.RoundTripAllocBytesPerOp > 0 ? $"{ToKilobytes(result.RoundTripAllocBytesPerOp):F2}" : "N/A";
|
||
|
||
// Highlight MemoryPack baseline (any Byte[]) and AcBinary headline contender (Byte[] + SGen) with win/lose colors.
|
||
// The AcBinary Byte[]+Runtime variant is shown unhighlighted — it's contextual (SGen speed-up reference), not the headline.
|
||
var isHighlighted = (result.Engine == Configuration.EngineMemoryPack && result.IoMode == Configuration.IoByteArray)
|
||
|| (result.Engine == Configuration.EngineAcBinary && result.IoMode == Configuration.IoByteArray && result.DispatchMode == Configuration.ModeSGen);
|
||
|
||
var prefix = isHighlighted ? "│►" : "│ ";
|
||
var suffix = isHighlighted ? "◄│" : " │";
|
||
|
||
// Color logic: Green = winner (faster), Red = loser (slower)
|
||
if (isHighlighted && memPackResult != null && acBinaryResult != null)
|
||
{
|
||
var isMemPack = (result.Engine == Configuration.EngineMemoryPack && result.IoMode == Configuration.IoByteArray);
|
||
var memPackFaster = RtPerOp(memPackResult) < RtPerOp(acBinaryResult);
|
||
|
||
if (isMemPack)
|
||
{
|
||
System.Console.ForegroundColor = memPackFaster ? ConsoleColor.Green : ConsoleColor.Red;
|
||
}
|
||
else
|
||
{
|
||
System.Console.ForegroundColor = memPackFaster ? ConsoleColor.Red : ConsoleColor.Green;
|
||
}
|
||
}
|
||
|
||
System.Console.WriteLine($"{prefix}{rank++,4} │ {result.Engine,-11} │ {result.OptionsPreset,-22} │ {result.IoMode,-12} │ {result.DispatchMode,-8} │ {setup,14} │ {size,8} │ {ser,10} │ {serAlloc,10} │ {des,10} │ {desAlloc,10} │ {rt,10} │ {rtAlloc,10}{suffix}");
|
||
|
||
if (isHighlighted)
|
||
{
|
||
System.Console.ResetColor();
|
||
}
|
||
}
|
||
|
||
// Footer row: AcBinary (Byte[]) vs MemoryPack (Byte[]) comparison per column
|
||
if (memPackResult != null && acBinaryResult != null)
|
||
{
|
||
var sizePct = (acBinaryResult.SerializedSize / (double)memPackResult.SerializedSize - 1) * 100;
|
||
// Per-op µs ratio (iter-independent) — Ser/Des may have different iter counts on the two rows.
|
||
var serPct = SerPerOp(memPackResult) > 0 ? (SerPerOp(acBinaryResult) / SerPerOp(memPackResult) - 1) * 100 : 0;
|
||
var desPct = DesPerOp(memPackResult) > 0 ? (DesPerOp(acBinaryResult) / DesPerOp(memPackResult) - 1) * 100 : 0;
|
||
var rtPct = RtPerOp(memPackResult) > 0 ? (RtPerOp(acBinaryResult) / RtPerOp(memPackResult) - 1) * 100 : 0;
|
||
var serAllocPct = memPackResult.SerializeAllocBytesPerOp > 0 ? (acBinaryResult.SerializeAllocBytesPerOp / (double)memPackResult.SerializeAllocBytesPerOp - 1) * 100 : 0;
|
||
var desAllocPct = memPackResult.DeserializeAllocBytesPerOp > 0 ? (acBinaryResult.DeserializeAllocBytesPerOp / (double)memPackResult.DeserializeAllocBytesPerOp - 1) * 100 : 0;
|
||
var rtAllocPct = memPackResult.RoundTripAllocBytesPerOp > 0 ? (acBinaryResult.RoundTripAllocBytesPerOp / (double)memPackResult.RoundTripAllocBytesPerOp - 1) * 100 : 0;
|
||
|
||
// Footer separator: merge first 5 cols (#, Engine, Options, IO, Mode) → comparison label;
|
||
// remaining 8 cols stay aligned (Setup S/D KB, Size, Ser µs/op, SerAlc KB, Des µs/op, DesAlc KB, RT µs/op, RTAlc KB).
|
||
System.Console.WriteLine($"├{"─".PadRight(6, '─')}┴{"─".PadRight(13, '─')}┴{"─".PadRight(24, '─')}┴{"─".PadRight(14, '─')}┴{"─".PadRight(10, '─')}┼{"─".PadRight(16, '─')}┼{"─".PadRight(10, '─')}┼{"─".PadRight(12, '─')}┼{"─".PadRight(12, '─')}┼{"─".PadRight(12, '─')}┼{"─".PadRight(12, '─')}┼{"─".PadRight(12, '─')}┼{"─".PadRight(12, '─')}┤");
|
||
// Merged label cell width = 4 + 11 + 22 + 12 + 8 + 4*3 (dropped separators) = 69
|
||
System.Console.Write($"│ {"► AcBinary (Byte[]) vs MemoryPack (Byte[])",-69} │ ");
|
||
|
||
// Setup S/D KB (n/a for Byte[] vs Byte[] — neither pre-allocates)
|
||
System.Console.Write($"{"—",14}");
|
||
System.Console.Write(" │ ");
|
||
|
||
// Size
|
||
System.Console.ForegroundColor = sizePct <= 0 ? ConsoleColor.Green : ConsoleColor.Red;
|
||
System.Console.Write($"{sizePct,+7:+0;-0}%");
|
||
System.Console.ResetColor();
|
||
System.Console.Write(" │ ");
|
||
|
||
// Serialize
|
||
System.Console.ForegroundColor = serPct <= 0 ? ConsoleColor.Green : ConsoleColor.Red;
|
||
System.Console.Write($"{serPct,+9:+0;-0}%");
|
||
System.Console.ResetColor();
|
||
System.Console.Write(" │ ");
|
||
|
||
// Serialize Alloc
|
||
System.Console.ForegroundColor = serAllocPct <= 0 ? ConsoleColor.Green : ConsoleColor.Red;
|
||
System.Console.Write($"{serAllocPct,+9:+0;-0}%");
|
||
System.Console.ResetColor();
|
||
System.Console.Write(" │ ");
|
||
|
||
// Deserialize
|
||
System.Console.ForegroundColor = desPct <= 0 ? ConsoleColor.Green : ConsoleColor.Red;
|
||
System.Console.Write($"{desPct,+9:+0;-0}%");
|
||
System.Console.ResetColor();
|
||
System.Console.Write(" │ ");
|
||
|
||
// Deserialize Alloc
|
||
System.Console.ForegroundColor = desAllocPct <= 0 ? ConsoleColor.Green : ConsoleColor.Red;
|
||
System.Console.Write($"{desAllocPct,+9:+0;-0}%");
|
||
System.Console.ResetColor();
|
||
System.Console.Write(" │ ");
|
||
|
||
// Round-trip
|
||
System.Console.ForegroundColor = rtPct <= 0 ? ConsoleColor.Green : ConsoleColor.Red;
|
||
System.Console.Write($"{rtPct,+9:+0;-0}%");
|
||
System.Console.ResetColor();
|
||
System.Console.Write(" │ ");
|
||
|
||
// Round-trip Alloc
|
||
System.Console.ForegroundColor = rtAllocPct <= 0 ? ConsoleColor.Green : ConsoleColor.Red;
|
||
System.Console.Write($"{rtAllocPct,+9:+0;-0}%");
|
||
System.Console.ResetColor();
|
||
System.Console.WriteLine(" │");
|
||
}
|
||
|
||
// Closing line: merged on left (─ between cols 1-5), ┴ on the right (cols 6-13 boundary, 8 unmerged cells).
|
||
System.Console.WriteLine($"└{"─".PadRight(6, '─')}─{"─".PadRight(13, '─')}─{"─".PadRight(24, '─')}─{"─".PadRight(14, '─')}─{"─".PadRight(10, '─')}┴{"─".PadRight(16, '─')}┴{"─".PadRight(10, '─')}┴{"─".PadRight(12, '─')}┴{"─".PadRight(12, '─')}┴{"─".PadRight(12, '─')}┴{"─".PadRight(12, '─')}┴{"─".PadRight(12, '─')}┴{"─".PadRight(12, '─')}┘");
|
||
//System.Console.WriteLine($"GrowBufferCount: {AcBinarySerializer.GrowBufferCount}");
|
||
//System.Console.WriteLine($"GrowBufferTotalBytes: {AcBinarySerializer.GrowBufferTotalBytes:N0} bytes");
|
||
}
|
||
|
||
// Summary: Best serializer for each category
|
||
System.Console.WriteLine("\n");
|
||
System.Console.WriteLine("╔══════════════════════════════════════════════════════════════════════════════════════════════════════╗");
|
||
System.Console.WriteLine("║ SUMMARY: WINNERS ║");
|
||
System.Console.WriteLine("╚══════════════════════════════════════════════════════════════════════════════════════════════════════╝");
|
||
|
||
System.Console.WriteLine($"\n{"Category",-20} │ {"Winner",-40} │ {"Avg Value",-18}");
|
||
System.Console.WriteLine($"{"─".PadRight(20, '─')}─┼─{"─".PadRight(40, '─')}─┼─{"─".PadRight(18, '─')}");
|
||
|
||
// Fastest Serialize — round-trip-only serializers (NamedPipe etc.) excluded:
|
||
// their Serialize() captures the full round-trip and isn't comparable to a pure Ser metric.
|
||
// Average is over per-op µs (iter-independent) instead of batch-time, since rows may now
|
||
// have different iter counts post-calibration.
|
||
var fastestSer = results.Where(r => r.SerializeTimeMs > 0 && !r.IsRoundTripOnly)
|
||
.GroupBy(r => r.SerializerName)
|
||
.Select(g => new { Name = g.Key, AvgPerOp = g.Average(r => SerPerOp(r)) })
|
||
.OrderBy(x => x.AvgPerOp)
|
||
.FirstOrDefault();
|
||
|
||
if (fastestSer != null)
|
||
System.Console.WriteLine($"{"Fastest Serialize",-20} │ {fastestSer.Name,-40} │ {fastestSer.AvgPerOp,12:F2} µs/op");
|
||
|
||
// Fastest Deserialize — round-trip-only serializers excluded (their Deserialize() is a no-op).
|
||
var fastestDes = results.Where(r => r.DeserializeTimeMs > 0 && !r.IsRoundTripOnly)
|
||
.GroupBy(r => r.SerializerName)
|
||
.Select(g => new { Name = g.Key, AvgPerOp = g.Average(r => DesPerOp(r)) })
|
||
.OrderBy(x => x.AvgPerOp)
|
||
.FirstOrDefault();
|
||
|
||
if (fastestDes != null)
|
||
System.Console.WriteLine($"{"Fastest Deserialize",-20} │ {fastestDes.Name,-40} │ {fastestDes.AvgPerOp,12:F2} µs/op");
|
||
|
||
// Smallest Size
|
||
var smallestSize = results
|
||
.GroupBy(r => r.SerializerName)
|
||
.Select(g => new { Name = g.Key, AvgSize = g.Average(r => r.SerializedSize) })
|
||
.OrderBy(x => x.AvgSize)
|
||
.FirstOrDefault();
|
||
|
||
if (smallestSize != null)
|
||
System.Console.WriteLine($"{"Smallest Size",-20} │ {smallestSize.Name,-40} │ {smallestSize.AvgSize,15:F0} B");
|
||
|
||
// Fastest Round-trip — iter-independent per-op average.
|
||
var fastestRt = results.Where(r => r.RoundTripTimeMs > 0)
|
||
.GroupBy(r => r.SerializerName)
|
||
.Select(g => new { Name = g.Key, AvgPerOp = g.Average(r => RtPerOp(r)) })
|
||
.OrderBy(x => x.AvgPerOp)
|
||
.FirstOrDefault();
|
||
|
||
if (fastestRt != null)
|
||
System.Console.WriteLine($"{"Fastest Round-trip",-20} │ {fastestRt.Name,-40} │ {fastestRt.AvgPerOp,12:F2} µs/op");
|
||
|
||
// Overall AcBinary (SGen) vs MemoryPack comparison (baseline switched MessagePack → MemoryPack as SOTA reference).
|
||
// AcBinary side is restricted to DispatchMode == SGen — apples-to-apples vs MemoryPack which is also source-generated.
|
||
// The Runtime variant is shown side-by-side in each per-test fancy table for SGen-speedup context, but excluded from this headline.
|
||
var memPackSerResults = results.Where(r => (r.Engine == Configuration.EngineMemoryPack && r.IoMode == Configuration.IoByteArray) && r.SerializeTimeMs > 0).ToList();
|
||
var memPackDesResults = results.Where(r => (r.Engine == Configuration.EngineMemoryPack && r.IoMode == Configuration.IoByteArray) && r.DeserializeTimeMs > 0).ToList();
|
||
var memPackRtResults = results.Where(r => (r.Engine == Configuration.EngineMemoryPack && r.IoMode == Configuration.IoByteArray) && r.RoundTripTimeMs > 0).ToList();
|
||
|
||
var acBinarySerResults = results.Where(r => (r.Engine == Configuration.EngineAcBinary && r.IoMode == Configuration.IoByteArray && r.DispatchMode == Configuration.ModeSGen) && r.SerializeTimeMs > 0).ToList();
|
||
var acBinaryDesResults = results.Where(r => (r.Engine == Configuration.EngineAcBinary && r.IoMode == Configuration.IoByteArray && r.DispatchMode == Configuration.ModeSGen) && r.DeserializeTimeMs > 0).ToList();
|
||
var acBinaryRtResults = results.Where(r => (r.Engine == Configuration.EngineAcBinary && r.IoMode == Configuration.IoByteArray && r.DispatchMode == Configuration.ModeSGen) && r.RoundTripTimeMs > 0).ToList();
|
||
|
||
// Skip comparison if no data available
|
||
if (memPackRtResults.Count == 0 || acBinaryRtResults.Count == 0)
|
||
{
|
||
System.Console.WriteLine();
|
||
System.Console.WriteLine("── AcBinary (Byte[], SGen) vs MemoryPack (Byte[]) (Overall) ──");
|
||
System.Console.WriteLine(" (Comparison requires both serialize and deserialize data)");
|
||
return;
|
||
}
|
||
|
||
// All averages are over per-op µs (iter-independent). Batch-time averaging would mix rows
|
||
// measured with different iter counts (post-calibration), producing meaningless numbers.
|
||
// Three aggregations per metric:
|
||
// - Arithmetic mean (current behavior) — magnitude-weighted, biased toward Large cell.
|
||
// - Geometric mean of per-cell ratios — magnitude-neutral, each cell weighted equally.
|
||
// - Median of per-cell ratios — outlier-resistant.
|
||
// The geo/median variants surface when a single cell dominates the arithmetic average
|
||
// (typical when one cell's µs-per-op is an order of magnitude larger than the others).
|
||
var sizeAcResults = results.Where(r => (r.Engine == Configuration.EngineAcBinary && r.IoMode == Configuration.IoByteArray && r.DispatchMode == Configuration.ModeSGen)).ToList();
|
||
var sizeMpResults = results.Where(r => (r.Engine == Configuration.EngineMemoryPack && r.IoMode == Configuration.IoByteArray)).ToList();
|
||
|
||
var serStats = ComputeOverallStats(acBinarySerResults, memPackSerResults, SerPerOp);
|
||
var desStats = ComputeOverallStats(acBinaryDesResults, memPackDesResults, DesPerOp);
|
||
var rtStats = ComputeOverallStats(acBinaryRtResults, memPackRtResults, RtPerOp);
|
||
var sizeStats = ComputeOverallStats(sizeAcResults, sizeMpResults, r => r.SerializedSize);
|
||
var serAllocStats = ComputeOverallStats(acBinarySerResults, memPackSerResults, r => r.SerializeAllocBytesPerOp);
|
||
var desAllocStats = ComputeOverallStats(acBinaryDesResults, memPackDesResults, r => r.DeserializeAllocBytesPerOp);
|
||
|
||
System.Console.WriteLine();
|
||
System.Console.WriteLine("── AcBinary (Byte[], SGen) vs MemoryPack (Byte[]) (Overall) ──");
|
||
|
||
WriteOverallLine("Serialize", "µs/op", serStats);
|
||
WriteOverallLine("Deserialize", "µs/op", desStats);
|
||
WriteOverallLine("Round-trip", "µs/op", rtStats);
|
||
WriteOverallLine("Size", "B", sizeStats, "F0");
|
||
WriteOverallLine("Ser Alloc", "B/op", serAllocStats, "F0");
|
||
WriteOverallLine("Des Alloc", "B/op", desAllocStats, "F0");
|
||
}
|
||
|
||
/// <summary>
|
||
/// Formats a signed percent delta with explicit sign for positive values (`+1.5%`, `-3.0%`, `0.0%`).
|
||
/// Padded to 7 chars (e.g. ` +12.3%`, `-100.0%`) for column alignment in the Overall block.
|
||
/// </summary>
|
||
private static string FormatPctSigned(double pct) => pct.ToString("+0.0;-0.0;0.0", System.Globalization.CultureInfo.InvariantCulture).PadLeft(6) + "%";
|
||
|
||
/// <summary>
|
||
/// Renders one Overall row with arith / geo / median deltas + AcBinary/MemPack absolute means.
|
||
/// Color is driven by the geometric-mean delta (magnitude-neutral signal). Skips silently when
|
||
/// stats is null (no paired data).
|
||
/// </summary>
|
||
private static void WriteOverallLine(string label, string unit, OverallStats? stats, string fmt = "F2")
|
||
{
|
||
if (stats == null) return;
|
||
|
||
// Color follows geo-mean (the magnitude-neutral signal). The arith-mean column may show a
|
||
// different sign when a single big cell dominates — that's exactly the signal we want to surface.
|
||
System.Console.ForegroundColor = stats.GeoMeanPct <= 0 ? ConsoleColor.Green : ConsoleColor.Red;
|
||
System.Console.WriteLine($" {label,-12} arith {FormatPctSigned(stats.ArithMeanPct)} │ geo {FormatPctSigned(stats.GeoMeanPct)} │ median {FormatPctSigned(stats.MedianPct)} ({stats.AcAvg.ToString(fmt, System.Globalization.CultureInfo.InvariantCulture)} {unit} vs {stats.MpAvg.ToString(fmt, System.Globalization.CultureInfo.InvariantCulture)} {unit}, {stats.CellCount} cells)");
|
||
System.Console.ResetColor();
|
||
}
|
||
|
||
/// <summary>
|
||
/// Same as <see cref="WriteOverallLine"/> but appends to a <see cref="StringBuilder"/> (no color).
|
||
/// Used by the .log and .LLM file writers.
|
||
/// </summary>
|
||
private static void AppendOverallLine(StringBuilder sb, string label, string unit, OverallStats? stats, string fmt = "F2")
|
||
{
|
||
if (stats == null) return;
|
||
sb.AppendLine($" {label,-12} arith {FormatPctSigned(stats.ArithMeanPct)} | geo {FormatPctSigned(stats.GeoMeanPct)} | median {FormatPctSigned(stats.MedianPct)} ({stats.AcAvg.ToString(fmt, System.Globalization.CultureInfo.InvariantCulture)} {unit} vs {stats.MpAvg.ToString(fmt, System.Globalization.CultureInfo.InvariantCulture)} {unit}, {stats.CellCount} cells)");
|
||
}
|
||
|
||
private static void SaveResults(List<BenchmarkResult> results, List<TestDataSet> testDataSets)
|
||
{
|
||
Directory.CreateDirectory(Configuration.ResultsDirectory);
|
||
|
||
var timestamp = DateTime.Now.ToString("yyyy-MM-dd_HH-mm-ss");
|
||
var baseFileName = $"Console.FullBenchmark_{Configuration.BuildConfiguration}_{timestamp}";
|
||
var logFilePath = Path.Combine(Configuration.ResultsDirectory, $"{baseFileName}.log");
|
||
var outputFilePath = Path.Combine(Configuration.ResultsDirectory, $"{baseFileName}.output");
|
||
|
||
// Save binary output to separate .output file
|
||
var largeTestData = testDataSets.FirstOrDefault(t => t.Name.StartsWith("Large"));
|
||
if (largeTestData != null)
|
||
{
|
||
var outputSb = new StringBuilder();
|
||
outputSb.AppendLine("╔══════════════════════════════════════════════════════════════════════════════════════════════════════╗");
|
||
outputSb.AppendLine("║ SERIALIZED BINARY OUTPUT ║");
|
||
outputSb.AppendLine($"║ Generated: {DateTime.Now:yyyy-MM-dd HH:mm:ss}".PadRight(100) + "║");
|
||
outputSb.AppendLine("╚══════════════════════════════════════════════════════════════════════════════════════════════════════╝");
|
||
outputSb.AppendLine();
|
||
|
||
outputSb.AppendLine("=== SERIALIZED BYTES: Large (5x5x5x10) - AcBinary (Default) ===");
|
||
var serializedBytes = AcBinarySerializer.Serialize(largeTestData.Order, AcBinarySerializerOptions.Default);
|
||
outputSb.AppendLine($"Size: {serializedBytes.Length:N0} bytes");
|
||
outputSb.AppendLine();
|
||
outputSb.AppendLine("Hex dump:");
|
||
outputSb.AppendLine(FormatHexDump(serializedBytes));
|
||
|
||
File.WriteAllText(outputFilePath, outputSb.ToString(), Configuration.Utf8NoBom);
|
||
System.Console.WriteLine($"✓ Binary output saved to: {outputFilePath}");
|
||
}
|
||
|
||
// Save benchmark results to .log file
|
||
var sb = new StringBuilder();
|
||
sb.AppendLine("╔══════════════════════════════════════════════════════════════════════════════════════════════════════╗");
|
||
sb.AppendLine("║ SERIALIZER BENCHMARK RESULTS ║");
|
||
sb.AppendLine($"║ Generated: {DateTime.Now:yyyy-MM-dd HH:mm:ss}".PadRight(100) + "║");
|
||
sb.AppendLine($"║ Build: {Configuration.BuildConfiguration}".PadRight(100) + "║");
|
||
sb.AppendLine($"║ Charset: {GetCurrentCharsetName()}".PadRight(100) + "║");
|
||
sb.AppendLine($"║ Iterations: per-cell adaptive (~{Configuration.TargetSampleMs} ms target)".PadRight(100) + "║");
|
||
sb.AppendLine($"║ Samples: {Configuration.BenchmarkSamples} (median) + 1 pilot discarded".PadRight(100) + "║");
|
||
sb.AppendLine($"║ Test Type: {testDataSets.FirstOrDefault()?.TypeName ?? "unknown"}".PadRight(100) + "║");
|
||
sb.AppendLine("╚══════════════════════════════════════════════════════════════════════════════════════════════════════╝");
|
||
sb.AppendLine();
|
||
|
||
// Serializer options summary
|
||
var optionsMap = results
|
||
.Where(r => r.OptionsDescription != null)
|
||
.Select(r => (r.SerializerName, r.OptionsDescription!))
|
||
.Distinct()
|
||
.ToList();
|
||
if (optionsMap.Count > 0)
|
||
{
|
||
sb.AppendLine("=== SERIALIZER OPTIONS ===");
|
||
foreach (var (name, opts) in optionsMap)
|
||
sb.AppendLine($" {name}: {opts}");
|
||
sb.AppendLine();
|
||
}
|
||
|
||
// CSV-like data for easy import — keeps raw byte integers (no KB rounding) so external tools can compute precisely.
|
||
sb.AppendLine("=== RAW DATA (CSV) ===");
|
||
sb.AppendLine("TestData,Engine,IO,Mode,Options,Size,SerializeMicrosPerOp,DeserializeMicrosPerOp,RoundTripMicrosPerOp,SerializeAllocBytesPerOp,DeserializeAllocBytesPerOp,RoundTripAllocBytesPerOp,SetupSerializeAllocBytes,SetupDeserializeAllocBytes");
|
||
|
||
foreach (var testData in testDataSets)
|
||
{
|
||
var testResults = results.Where(r => r.TestDataName == testData.DisplayName).ToList();
|
||
foreach (var result in testResults)
|
||
{
|
||
sb.AppendLine($"{result.TestDataName},{result.Engine},{result.IoMode},{result.DispatchMode},{result.OptionsPreset},{result.SerializedSize},{SerPerOp(result):F2},{DesPerOp(result):F2},{RtPerOp(result):F2},{result.SerializeAllocBytesPerOp},{result.DeserializeAllocBytesPerOp},{result.RoundTripAllocBytesPerOp},{result.SetupSerializeAllocBytes},{result.SetupDeserializeAllocBytes}");
|
||
}
|
||
}
|
||
sb.AppendLine();
|
||
|
||
// Formatted results
|
||
sb.AppendLine("=== FORMATTED RESULTS BY TEST DATA ===");
|
||
sb.AppendLine("(►) = Highlighted: MemoryPack (Byte[]) (baseline) and AcBinary (Byte[])");
|
||
sb.AppendLine();
|
||
|
||
foreach (var testData in testDataSets)
|
||
{
|
||
// Order by per-op µs (iter-independent) — rows may have different iter counts post-calibration.
|
||
var testResults = results.Where(r => r.TestDataName == testData.DisplayName).OrderBy(r => RtPerOp(r)).ToList();
|
||
var memPackResult = testResults.FirstOrDefault(r => (r.Engine == Configuration.EngineMemoryPack && r.IoMode == Configuration.IoByteArray));
|
||
// Pin the comparison to AcBinary's SGen variant — apples-to-apples vs MemoryPack (also source-generated).
|
||
// The Runtime variant is shown alongside in the table for context, not used as the headline number.
|
||
var acBinaryResult = testResults.FirstOrDefault(r => (r.Engine == Configuration.EngineAcBinary && r.IoMode == Configuration.IoByteArray && r.DispatchMode == Configuration.ModeSGen));
|
||
|
||
sb.AppendLine();
|
||
sb.AppendLine($"--- {testData.DisplayName} ---");
|
||
sb.AppendLine($"{"#",-4} {"Serializer",-42} {"Size B",-12} {"Setup S/D KB",-14} {"Ser µs/op",-12} {"Des µs/op",-12} {"RT µs/op",-12} {"SerAlc KB",-11} {"DesAlc KB",-11}");
|
||
sb.AppendLine(new string('-', 140));
|
||
|
||
var rank = 1;
|
||
foreach (var result in testResults)
|
||
{
|
||
var isHighlighted = ((result.Engine == Configuration.EngineMemoryPack || result.Engine == Configuration.EngineAcBinary) && result.IoMode == Configuration.IoByteArray);
|
||
var prefix = isHighlighted ? "► " : " ";
|
||
|
||
var size = $"{result.SerializedSize:N0}";
|
||
var setup = $"{ToKilobytes(result.SetupSerializeAllocBytes):F2} / {ToKilobytes(result.SetupDeserializeAllocBytes):F2}";
|
||
var ser = result.SerializeTimeMs > 0 ? $"{SerPerOp(result):F2}" : "N/A";
|
||
var des = result.DeserializeTimeMs > 0 ? $"{DesPerOp(result):F2}" : "N/A";
|
||
var rt = result.RoundTripTimeMs > 0 ? $"{RtPerOp(result):F2}" : "N/A";
|
||
var serAlloc = result.SerializeTimeMs > 0 ? $"{ToKilobytes(result.SerializeAllocBytesPerOp):F2}" : "N/A";
|
||
var desAlloc = result.DeserializeTimeMs > 0 ? $"{ToKilobytes(result.DeserializeAllocBytesPerOp):F2}" : "N/A";
|
||
|
||
sb.AppendLine($"{rank++,2} {prefix}{result.SerializerName,-40} {size,-12} {setup,-14} {ser,-12} {des,-12} {rt,-12} {serAlloc,-11} {desAlloc,-11}");
|
||
}
|
||
|
||
// Summary row for this test data (vs MemoryPack — baseline switched MessagePack → MemoryPack)
|
||
if (memPackResult != null && acBinaryResult != null)
|
||
{
|
||
var sizePct = (acBinaryResult.SerializedSize / (double)memPackResult.SerializedSize - 1) * 100;
|
||
// Per-op µs ratio (iter-independent) — Ser/Des may have different iter counts on the two rows.
|
||
var serPct = SerPerOp(memPackResult) > 0 ? (SerPerOp(acBinaryResult) / SerPerOp(memPackResult) - 1) * 100 : 0;
|
||
var desPct = DesPerOp(memPackResult) > 0 ? (DesPerOp(acBinaryResult) / DesPerOp(memPackResult) - 1) * 100 : 0;
|
||
var rtPct = RtPerOp(memPackResult) > 0 ? (RtPerOp(acBinaryResult) / RtPerOp(memPackResult) - 1) * 100 : 0;
|
||
|
||
sb.AppendLine($" AcBinary (Byte[]) vs MemoryPack (Byte[]): Size {sizePct:+0;-0}% │ Ser {serPct:+0;-0}% │ Des {desPct:+0;-0}% │ RT {rtPct:+0;-0}%");
|
||
}
|
||
|
||
//sb.AppendLine($"GrowBufferCount: {AcBinarySerializer.GrowBufferCount}");
|
||
//sb.AppendLine($"GrowBufferTotalBytes: {AcBinarySerializer.GrowBufferTotalBytes:N0} bytes");
|
||
}
|
||
|
||
|
||
// Summary comparison (vs MemoryPack)
|
||
// Restrict AcBinary side to SGen — the SGen vs Runtime variants are shown side-by-side
|
||
// in the per-test fancy table; the headline should compare apples-to-apples (both source-generated).
|
||
sb.AppendLine();
|
||
sb.AppendLine("=== AcBinary (Byte[], SGen) vs MemoryPack (Byte[]) (Overall) ===");
|
||
|
||
var memPackSerResults2 = results.Where(r => (r.Engine == Configuration.EngineMemoryPack && r.IoMode == Configuration.IoByteArray) && r.SerializeTimeMs > 0).ToList();
|
||
var memPackDesResults2 = results.Where(r => (r.Engine == Configuration.EngineMemoryPack && r.IoMode == Configuration.IoByteArray) && r.DeserializeTimeMs > 0).ToList();
|
||
var memPackRtResults2 = results.Where(r => (r.Engine == Configuration.EngineMemoryPack && r.IoMode == Configuration.IoByteArray) && r.RoundTripTimeMs > 0).ToList();
|
||
|
||
var acBinarySerResults2 = results.Where(r => (r.Engine == Configuration.EngineAcBinary && r.IoMode == Configuration.IoByteArray && r.DispatchMode == Configuration.ModeSGen) && r.SerializeTimeMs > 0).ToList();
|
||
var acBinaryDesResults2 = results.Where(r => (r.Engine == Configuration.EngineAcBinary && r.IoMode == Configuration.IoByteArray && r.DispatchMode == Configuration.ModeSGen) && r.DeserializeTimeMs > 0).ToList();
|
||
var acBinaryRtResults2 = results.Where(r => (r.Engine == Configuration.EngineAcBinary && r.IoMode == Configuration.IoByteArray && r.DispatchMode == Configuration.ModeSGen) && r.RoundTripTimeMs > 0).ToList();
|
||
|
||
// Skip comparison block if either side has no Byte[] data — happens in AsyncPipe-only mode
|
||
// where only NamedPipe rows exist (no MemoryPack baseline, no AcBinary Byte[] reference).
|
||
// Mirrors the same early-return guard in PrintGroupedResults.
|
||
if (memPackRtResults2.Count == 0 || acBinaryRtResults2.Count == 0)
|
||
{
|
||
sb.AppendLine(" (Comparison requires both serialize and deserialize data)");
|
||
File.WriteAllText(logFilePath, sb.ToString(), Configuration.Utf8NoBom);
|
||
System.Console.WriteLine($"✓ Results saved to: {logFilePath}");
|
||
|
||
var llmFilePathEarly = Path.Combine(Configuration.ResultsDirectory, $"{baseFileName}.LLM");
|
||
SaveLlmResults(llmFilePathEarly, results, testDataSets);
|
||
return;
|
||
}
|
||
|
||
// Per-cell-paired aggregation: arithmetic / geometric / median. See PrintSummary's parallel
|
||
// block + the OverallStats record for the rationale (per-cell ratio vs magnitude-weighted mean).
|
||
var sizeAcResults2 = results.Where(r => (r.Engine == Configuration.EngineAcBinary && r.IoMode == Configuration.IoByteArray && r.DispatchMode == Configuration.ModeSGen)).ToList();
|
||
var sizeMpResults2 = results.Where(r => (r.Engine == Configuration.EngineMemoryPack && r.IoMode == Configuration.IoByteArray)).ToList();
|
||
|
||
AppendOverallLine(sb, "Serialize", "µs/op", ComputeOverallStats(acBinarySerResults2, memPackSerResults2, SerPerOp));
|
||
AppendOverallLine(sb, "Ser Alloc", "B/op", ComputeOverallStats(acBinarySerResults2, memPackSerResults2, r => r.SerializeAllocBytesPerOp), "F0");
|
||
AppendOverallLine(sb, "Deserialize", "µs/op", ComputeOverallStats(acBinaryDesResults2, memPackDesResults2, DesPerOp));
|
||
AppendOverallLine(sb, "Des Alloc", "B/op", ComputeOverallStats(acBinaryDesResults2, memPackDesResults2, r => r.DeserializeAllocBytesPerOp), "F0");
|
||
AppendOverallLine(sb, "Round-trip", "µs/op", ComputeOverallStats(acBinaryRtResults2, memPackRtResults2, RtPerOp));
|
||
AppendOverallLine(sb, "Size", "B", ComputeOverallStats(sizeAcResults2, sizeMpResults2, r => r.SerializedSize), "F0");
|
||
|
||
File.WriteAllText(logFilePath, sb.ToString(), Configuration.Utf8NoBom);
|
||
System.Console.WriteLine($"✓ Results saved to: {logFilePath}");
|
||
|
||
// Save LLM-optimized results
|
||
var llmFilePath = Path.Combine(Configuration.ResultsDirectory, $"{baseFileName}.LLM");
|
||
SaveLlmResults(llmFilePath, results, testDataSets);
|
||
}
|
||
|
||
private static void SaveLlmResults(string filePath, List<BenchmarkResult> results, List<TestDataSet> testDataSets)
|
||
{
|
||
var sb = new StringBuilder();
|
||
var testTypeName = testDataSets.FirstOrDefault()?.TypeName ?? "unknown";
|
||
sb.AppendLine($"# AcBinary Benchmark {Configuration.BuildConfiguration} {DateTime.Now:yyyy-MM-dd HH:mm:ss}");
|
||
sb.AppendLine($"Charset: {GetCurrentCharsetName()} | Iterations: per-cell adaptive (target ~{Configuration.TargetSampleMs} ms/sample) | Warmup: {Configuration.WarmupIterations} per phase (Ser/Des isolated) | Samples: {Configuration.BenchmarkSamples} (median) + 1 pilot discarded | .NET: {Environment.Version} | TestType: {testTypeName} | UnstableCV threshold: {Configuration.UnstableCVThreshold * 100:F0}%");
|
||
sb.AppendLine("Baseline: MemoryPack (Byte[]) (SOTA reference) | Verified: round-trip correctness checked once per cell before warmup");
|
||
|
||
// Options summary
|
||
var optionsMap = results
|
||
.Where(r => r.OptionsDescription != null)
|
||
.Select(r => (r.SerializerName, r.OptionsDescription!))
|
||
.Distinct()
|
||
.ToList();
|
||
if (optionsMap.Count > 0)
|
||
{
|
||
sb.AppendLine();
|
||
sb.AppendLine("## Options");
|
||
sb.AppendLine();
|
||
foreach (var (name, opts) in optionsMap)
|
||
sb.AppendLine($"- **{name}**: {opts}");
|
||
}
|
||
|
||
// Flat results table sorted by test data then round-trip (now includes Alloc + Iter columns).
|
||
// Iter column shows per-row Ser/Des iteration counts (post-adaptive-calibration), so the reader
|
||
// can verify that each cell's batch sample landed near the Configuration.TargetSampleMs window.
|
||
sb.AppendLine();
|
||
sb.AppendLine("## Results");
|
||
sb.AppendLine();
|
||
sb.AppendLine("TestData | Engine | IO | Mode | Options | Size(B) | Ser(µs/op) | Deser(µs/op) | RT(µs/op) | SerAlloc(KB/op) | DesAlloc(KB/op) | RTAlloc(KB/op) | Setup S/D(KB) | Iter Ser/Des");
|
||
sb.AppendLine("---|---|---|---|---|---|---|---|---|---|---|---|---|---");
|
||
|
||
foreach (var testData in testDataSets)
|
||
{
|
||
var testResults = results
|
||
.Where(r => r.TestDataName == testData.DisplayName)
|
||
// Per-op µs (iter-independent) ordering — mixing iter counts within a cell is now expected.
|
||
.OrderBy(RtPerOp)
|
||
.ToList();
|
||
|
||
foreach (var r in testResults)
|
||
{
|
||
var inv = System.Globalization.CultureInfo.InvariantCulture;
|
||
// Per-cell median + inter-sample range (min..max) + CV-threshold marker (⚠️X.X% when CV > 3%).
|
||
// Range surfaces the noise floor for each row so a small inter-engine delta is easy to
|
||
// judge against the row's noise. Format: "26.86 (24.50..29.10)" or
|
||
// "26.86 (24.50..29.10) ⚠️5.2%" when stddev/mean exceeds the unstable threshold.
|
||
// When only one sample was taken (Debug / quick mode) min == max == median; collapse
|
||
// to bare median to avoid visual clutter.
|
||
var ser = r.SerializeTimeMs > 0 ? FormatMicrosWithRange(r.SerializeTimeMs, r.SerializeTimeMinMs, r.SerializeTimeMaxMs, r.SerializeTimeStdDevMs, r.SerializeIterations, inv) : "-";
|
||
var des = r.DeserializeTimeMs > 0 ? FormatMicrosWithRange(r.DeserializeTimeMs, r.DeserializeTimeMinMs, r.DeserializeTimeMaxMs, r.DeserializeTimeStdDevMs, r.DeserializeIterations, inv) : "-";
|
||
var rt = r.RoundTripTimeMs > 0
|
||
? (r.IsRoundTripOnly
|
||
? FormatMicrosWithRange(r.RoundTripTimeMs, r.RoundTripTimeMinMs, r.RoundTripTimeMaxMs, r.RoundTripTimeStdDevMs, r.RoundTripIterations, inv)
|
||
: RtPerOp(r).ToString("F2", inv))
|
||
: "-";
|
||
|
||
var serAlloc = r.SerializeTimeMs > 0 ? ToKilobytes(r.SerializeAllocBytesPerOp).ToString("F2", inv) : "-";
|
||
var desAlloc = r.DeserializeTimeMs > 0 ? ToKilobytes(r.DeserializeAllocBytesPerOp).ToString("F2", inv) : "-";
|
||
var rtAlloc = r.RoundTripAllocBytesPerOp > 0 ? ToKilobytes(r.RoundTripAllocBytesPerOp).ToString("F2", inv) : "-";
|
||
var setupAlloc = $"{ToKilobytes(r.SetupSerializeAllocBytes).ToString("F2", inv)} / {ToKilobytes(r.SetupDeserializeAllocBytes).ToString("F2", inv)}";
|
||
|
||
// Iter Ser/Des column — per-row adaptive iter counts. RT-only rows show Iter for RT.
|
||
var iterCol = r.IsRoundTripOnly
|
||
? r.RoundTripIterations.ToString(inv)
|
||
: $"{(r.SerializeIterations > 0 ? r.SerializeIterations.ToString(inv) : "-")} / {(r.DeserializeIterations > 0 ? r.DeserializeIterations.ToString(inv) : "-")}";
|
||
sb.AppendLine($"{r.TestDataName} | {r.Engine} | {r.IoMode} | {r.DispatchMode} | {r.OptionsPreset} | {r.SerializedSize} | {ser} | {des} | {rt} | {serAlloc} | {desAlloc} | {rtAlloc} | {setupAlloc} | {iterCol}");
|
||
}
|
||
}
|
||
|
||
// Overall AcBinary (SGen, Byte[]) vs MemoryPack (Byte[]) comparison — same three aggregations
|
||
// as the .log / console output (arithmetic / geometric / median of per-cell ratios). The
|
||
// arith mean is magnitude-weighted (Large cell dominates); geo/median are per-cell-equal
|
||
// signals. Adding this lets an LLM diagnose whether a headline delta is a real overall
|
||
// win/loss or a single-cell artifact.
|
||
var memPackByteArrayResults = results.Where(r => r.Engine == Configuration.EngineMemoryPack && r.IoMode == Configuration.IoByteArray).ToList();
|
||
var acBinarySGenByteArrayResults = results.Where(r => r.Engine == Configuration.EngineAcBinary && r.IoMode == Configuration.IoByteArray && r.DispatchMode == Configuration.ModeSGen).ToList();
|
||
var memPackSerResultsLlm = memPackByteArrayResults.Where(r => r.SerializeTimeMs > 0).ToList();
|
||
var memPackDesResultsLlm = memPackByteArrayResults.Where(r => r.DeserializeTimeMs > 0).ToList();
|
||
var memPackRtResultsLlm = memPackByteArrayResults.Where(r => r.RoundTripTimeMs > 0).ToList();
|
||
var acBinarySerResultsLlm = acBinarySGenByteArrayResults.Where(r => r.SerializeTimeMs > 0).ToList();
|
||
var acBinaryDesResultsLlm = acBinarySGenByteArrayResults.Where(r => r.DeserializeTimeMs > 0).ToList();
|
||
var acBinaryRtResultsLlm = acBinarySGenByteArrayResults.Where(r => r.RoundTripTimeMs > 0).ToList();
|
||
|
||
if (memPackRtResultsLlm.Count > 0 && acBinaryRtResultsLlm.Count > 0)
|
||
{
|
||
sb.AppendLine();
|
||
sb.AppendLine("## Overall: AcBinary (Byte[], SGen) vs MemoryPack (Byte[])");
|
||
sb.AppendLine();
|
||
sb.AppendLine("Three aggregations of per-cell results: **arith** = arithmetic mean of µs/op (magnitude-weighted, Large cell dominates); **geo** = geometric mean of per-cell ratios (each cell weighted equally); **median** = median of per-cell ratios (outlier-resistant). Negative % = AcBinary faster/smaller; positive % = MemPack faster/smaller. The geo/median variants surface when a single big cell skews the arithmetic mean.");
|
||
sb.AppendLine();
|
||
sb.AppendLine("```");
|
||
AppendOverallLine(sb, "Serialize", "µs/op", ComputeOverallStats(acBinarySerResultsLlm, memPackSerResultsLlm, SerPerOp));
|
||
AppendOverallLine(sb, "Ser Alloc", "B/op", ComputeOverallStats(acBinarySerResultsLlm, memPackSerResultsLlm, r => r.SerializeAllocBytesPerOp), "F0");
|
||
AppendOverallLine(sb, "Deserialize", "µs/op", ComputeOverallStats(acBinaryDesResultsLlm, memPackDesResultsLlm, DesPerOp));
|
||
AppendOverallLine(sb, "Des Alloc", "B/op", ComputeOverallStats(acBinaryDesResultsLlm, memPackDesResultsLlm, r => r.DeserializeAllocBytesPerOp), "F0");
|
||
AppendOverallLine(sb, "Round-trip", "µs/op", ComputeOverallStats(acBinaryRtResultsLlm, memPackRtResultsLlm, RtPerOp));
|
||
AppendOverallLine(sb, "Size", "B", ComputeOverallStats(acBinarySGenByteArrayResults, memPackByteArrayResults, r => r.SerializedSize), "F0");
|
||
sb.AppendLine("```");
|
||
}
|
||
|
||
File.WriteAllText(filePath, sb.ToString(), Configuration.Utf8NoBom);
|
||
System.Console.WriteLine($"✓ LLM results saved to: {filePath}");
|
||
}
|
||
|
||
/// <summary>
|
||
/// Formats byte array as hex dump with offset, hex values, and ASCII representation.
|
||
/// </summary>
|
||
private static string FormatHexDump(byte[] bytes, int bytesPerLine = 16)
|
||
{
|
||
var sb = new StringBuilder();
|
||
for (var i = 0; i < bytes.Length; i += bytesPerLine)
|
||
{
|
||
// Offset
|
||
sb.Append($"{i:X8} ");
|
||
|
||
// Hex bytes
|
||
for (var j = 0; j < bytesPerLine; j++)
|
||
{
|
||
if (i + j < bytes.Length)
|
||
sb.Append($"{bytes[i + j]:X2} ");
|
||
else
|
||
sb.Append(" ");
|
||
|
||
if (j == 7) sb.Append(' '); // Extra space in middle
|
||
}
|
||
|
||
sb.Append(" |");
|
||
|
||
// ASCII representation
|
||
for (var j = 0; j < bytesPerLine && i + j < bytes.Length; j++)
|
||
{
|
||
var b = bytes[i + j];
|
||
sb.Append(b is >= 32 and < 127 ? (char)b : '.');
|
||
}
|
||
|
||
sb.AppendLine("|");
|
||
}
|
||
return sb.ToString();
|
||
}
|
||
|
||
#endregion
|
||
}
|