Stabilize BDN runs; improve benchmark output ordering

Added WithProcessStabilization to pin CPU affinity and raise process priority for all BDN entry points, matching Console runner stabilization. Benchmark results are now ordered by Engine then RtPerOp for stable, diff-friendly output. Report headers clarify when BDN manages run parameters. Enhanced comments for clarity; no changes to benchmark logic.
This commit is contained in:
Loretta 2026-05-15 23:05:06 +02:00
parent c611d4b535
commit f68b797a9f
2 changed files with 92 additions and 13 deletions

View File

@ -106,14 +106,18 @@ namespace AyCode.Benchmark
// Byte[] vs MemoryPack Default Byte[] across 5 TestData cells. BdnSummaryAdapter translates
// the BDN Summary into BenchmarkResult rows and emits the Bdn.FullBenchmark_*.{log,LLM,output}
// triplet to <results>/Benchmark/ (BDN-native artifacts go under .../BDN/ via the global config).
var serializerSummary = BenchmarkRunner.Run<AcBinaryVsMemPackBenchmark>(config);
BdnSummaryAdapter.WriteResults(serializerSummary);
WithProcessStabilization(() =>
{
var serializerSummary = BenchmarkRunner.Run<AcBinaryVsMemPackBenchmark>(config);
BdnSummaryAdapter.WriteResults(serializerSummary);
});
return;
}
if (args.Length > 0 && args[0] == "--jitasm")
{
RunBenchmark<JitDisassemblyBenchmark>(config, benchmarkDir, memDiagDir, "JitDisassemblyBenchmark");
WithProcessStabilization(() =>
RunBenchmark<JitDisassemblyBenchmark>(config, benchmarkDir, memDiagDir, "JitDisassemblyBenchmark"));
return;
}
@ -129,8 +133,67 @@ namespace AyCode.Benchmark
// args → BDN parses them as benchmark filters / job options). Same code path either way — the
// known custom switches above (--serializers, --jitasm, --quick, --test, --testmsgpack,
// --save-coverage) return early before reaching this point.
BenchmarkSwitcher.FromAssembly(typeof(Program).Assembly).Run(args, config);
CollectBenchmarkArtifacts(benchmarkDir, memDiagDir, "SwitcherRun");
WithProcessStabilization(() =>
{
BenchmarkSwitcher.FromAssembly(typeof(Program).Assembly).Run(args, config);
CollectBenchmarkArtifacts(benchmarkDir, memDiagDir, "SwitcherRun");
});
}
/// <summary>
/// Runs the given action with CPU affinity pinned to CPU 0 and process priority raised to High,
/// restoring the original state in <c>finally</c>. Matches the stabilization block in
/// <c>AyCode.Core.Serializers.Console.BenchmarkLoop.RunBenchmark</c> so BDN-side measurements
/// receive the same OS-scheduler insulation the Console runner enjoys.
/// <para><b>Worker process inheritance:</b> BDN spawns a per-job child process to host the
/// workload. CPU affinity propagates from parent → child on both Windows (CreateProcess inherits
/// affinity by default) and Linux (fork+exec inherits via sched_setaffinity). So pinning the
/// orchestrator process here pins the actual measurement loop too — not just the BDN driver.</para>
/// <para>Skipped on macOS where <see cref="Process.ProcessorAffinity"/> throws (priority still
/// raised). Failures inside the try block fall through to a best-effort restore in finally.</para>
/// </summary>
static void WithProcessStabilization(Action action)
{
var process = Process.GetCurrentProcess();
var origAffinity = (IntPtr)0;
var origPriority = ProcessPriorityClass.Normal;
var stabilizationApplied = false;
if (OperatingSystem.IsWindows() || OperatingSystem.IsLinux())
{
try
{
origAffinity = process.ProcessorAffinity;
origPriority = process.PriorityClass;
// Pin to CPU 0 (mask = 1). The choice is arbitrary — what matters is "exactly one
// core, consistently" — not which one. BDN's child worker process inherits the
// affinity, so the measurement loop itself runs pinned. Mirrors Console's pinning.
process.ProcessorAffinity = (IntPtr)1;
process.PriorityClass = ProcessPriorityClass.High;
stabilizationApplied = true;
Console.WriteLine("Stabilization: pinned to CPU 0 (affinity=0x1), priority=High (BDN workers inherit affinity).");
}
catch (Exception ex)
{
// Affinity / priority changes may fail on locked-down hosts (group policies, containers
// without CAP_SYS_NICE on Linux). Surface and continue — BDN still works, just without
// scheduler insulation.
Console.WriteLine($"Stabilization SKIPPED: {ex.GetType().Name}: {ex.Message}");
}
}
try
{
action();
}
finally
{
if (stabilizationApplied && (OperatingSystem.IsWindows() || OperatingSystem.IsLinux()))
{
try { process.ProcessorAffinity = origAffinity; } catch { /* best-effort */ }
try { process.PriorityClass = origPriority; } catch { /* best-effort */ }
}
}
}
/// <summary>

View File

@ -194,8 +194,11 @@ public static class BenchmarkReportWriter
foreach (var testData in testDataSets)
{
// Order by per-op µs (iter-independent) — rows may have different iter counts post-calibration.
var testResults = results.Where(r => r.TestDataName == testData.DisplayName).OrderBy(r => RtPerOp(r)).ToList();
// Order by Engine (so the same engine column-position stays stable across cells, especially
// when two engines are within noise floor on a given cell — flip-flopping speed-rank produces
// diff-hostile output across runs). RtPerOp is the secondary tiebreaker for cells where
// multiple variants of the same engine exist (e.g. AcBinary SGen vs Runtime).
var testResults = results.Where(r => r.TestDataName == testData.DisplayName).OrderBy(r => r.Engine).ThenBy(r => RtPerOp(r)).ToList();
// Baseline switched MessagePack → MemoryPack: MemoryPack is the SOTA performance leader.
var memPackResult = testResults.FirstOrDefault(r => (r.Engine == BenchmarkEngine.MemoryPack && r.IoMode == BenchmarkIoMode.ByteArray));
// Pin the comparison to AcBinary's SGen variant — apples-to-apples vs MemoryPack (also source-generated).
@ -459,8 +462,14 @@ public static class BenchmarkReportWriter
sb.AppendLine($"║ Source: {ctx.SourceTag}".PadRight(100) + "║");
sb.AppendLine($"║ Build: {ctx.BuildConfiguration}".PadRight(100) + "║");
sb.AppendLine($"║ Charset: {ctx.CharsetName}".PadRight(100) + "║");
sb.AppendLine($"║ Iterations: per-cell adaptive (~{ctx.TargetSampleMs} ms target)".PadRight(100) + "║");
sb.AppendLine($"║ Samples: {ctx.BenchmarkSamples} (median) + 1 pilot discarded".PadRight(100) + "║");
// For BDN-sourced contexts, warmup / samples / target are managed inside BDN's job config (not by
// our adaptive engine) — surfacing the placeholder zeros as concrete numbers would be misleading.
// Print "BDN-managed" instead; raw BDN config is recoverable from the BDN-native artifacts under .../BDN/.
var isBdn = ctx.SourceTag == "Bdn";
var iterationsHeader = isBdn ? "Iterations: BDN-managed" : $"Iterations: per-cell adaptive (~{ctx.TargetSampleMs} ms target)";
var samplesHeader = isBdn ? "Samples: BDN-managed" : $"Samples: {ctx.BenchmarkSamples} (median) + 1 pilot discarded";
sb.AppendLine($"║ {iterationsHeader}".PadRight(100) + "║");
sb.AppendLine($"║ {samplesHeader}".PadRight(100) + "║");
sb.AppendLine("╚══════════════════════════════════════════════════════════════════════════════════════════════════════╝");
sb.AppendLine();
@ -507,8 +516,9 @@ public static class BenchmarkReportWriter
foreach (var testData in testDataSets)
{
// Order by per-op µs (iter-independent) — rows may have different iter counts post-calibration.
var testResults = results.Where(r => r.TestDataName == testData.DisplayName).OrderBy(r => RtPerOp(r)).ToList();
// Order by Engine (stable column-position across cells, see PrintGroupedResults for rationale);
// RtPerOp is the secondary tiebreaker between same-engine variants (SGen vs Runtime).
var testResults = results.Where(r => r.TestDataName == testData.DisplayName).OrderBy(r => r.Engine).ThenBy(r => RtPerOp(r)).ToList();
var memPackResult = testResults.FirstOrDefault(r => (r.Engine == BenchmarkEngine.MemoryPack && r.IoMode == BenchmarkIoMode.ByteArray));
var acBinaryResult = testResults.FirstOrDefault(r => (r.Engine == BenchmarkEngine.AcBinary && r.IoMode == BenchmarkIoMode.ByteArray && r.DispatchMode == BenchmarkDispatchMode.SGen));
@ -590,7 +600,12 @@ public static class BenchmarkReportWriter
{
var sb = new StringBuilder();
sb.AppendLine($"# AcBinary Benchmark [{ctx.SourceTag}] {ctx.BuildConfiguration} {DateTime.Now:yyyy-MM-dd HH:mm:ss}");
sb.AppendLine($"Charset: {ctx.CharsetName} | Iterations: per-cell adaptive (target ~{ctx.TargetSampleMs} ms/sample) | Warmup: {ctx.WarmupIterations} per phase (Ser/Des isolated) | Samples: {ctx.BenchmarkSamples} (median) + 1 pilot discarded | .NET: {Environment.Version} | UnstableCV threshold: {ctx.UnstableCVThreshold * 100:F0}%");
// BDN-sourced: warmup / iter / samples are BDN-job-config-managed (see .../BDN/ artifacts for raw N).
// Console-sourced: our adaptive engine emits real numbers.
var runStatsHeader = ctx.SourceTag == "Bdn"
? "Iterations: BDN-managed | Warmup: BDN-managed | Samples: BDN-managed"
: $"Iterations: per-cell adaptive (target ~{ctx.TargetSampleMs} ms/sample) | Warmup: {ctx.WarmupIterations} per phase (Ser/Des isolated) | Samples: {ctx.BenchmarkSamples} (median) + 1 pilot discarded";
sb.AppendLine($"Charset: {ctx.CharsetName} | {runStatsHeader} | .NET: {Environment.Version} | UnstableCV threshold: {ctx.UnstableCVThreshold * 100:F0}%");
sb.AppendLine("Baseline: MemoryPack (Byte[]) (SOTA reference) | Verified: round-trip correctness checked once per cell before warmup");
// Options summary. Bracketed [OrderType] surfaces the TestOrder variant each preset serialised —
@ -619,9 +634,10 @@ public static class BenchmarkReportWriter
foreach (var testData in testDataSets)
{
// Order by Engine for stable column-position across cells (see PrintGroupedResults for rationale).
var testResults = results
.Where(r => r.TestDataName == testData.DisplayName)
.OrderBy(RtPerOp)
.OrderBy(r => r.Engine).ThenBy(RtPerOp)
.ToList();
foreach (var r in testResults)