diff --git a/AyCode.Benchmark/Program.cs b/AyCode.Benchmark/Program.cs index 844dbac..f36379a 100644 --- a/AyCode.Benchmark/Program.cs +++ b/AyCode.Benchmark/Program.cs @@ -106,14 +106,18 @@ namespace AyCode.Benchmark // Byte[] vs MemoryPack Default Byte[] across 5 TestData cells. BdnSummaryAdapter translates // the BDN Summary into BenchmarkResult rows and emits the Bdn.FullBenchmark_*.{log,LLM,output} // triplet to /Benchmark/ (BDN-native artifacts go under .../BDN/ via the global config). - var serializerSummary = BenchmarkRunner.Run(config); - BdnSummaryAdapter.WriteResults(serializerSummary); + WithProcessStabilization(() => + { + var serializerSummary = BenchmarkRunner.Run(config); + BdnSummaryAdapter.WriteResults(serializerSummary); + }); return; } if (args.Length > 0 && args[0] == "--jitasm") { - RunBenchmark(config, benchmarkDir, memDiagDir, "JitDisassemblyBenchmark"); + WithProcessStabilization(() => + RunBenchmark(config, benchmarkDir, memDiagDir, "JitDisassemblyBenchmark")); return; } @@ -129,8 +133,67 @@ namespace AyCode.Benchmark // args → BDN parses them as benchmark filters / job options). Same code path either way — the // known custom switches above (--serializers, --jitasm, --quick, --test, --testmsgpack, // --save-coverage) return early before reaching this point. - BenchmarkSwitcher.FromAssembly(typeof(Program).Assembly).Run(args, config); - CollectBenchmarkArtifacts(benchmarkDir, memDiagDir, "SwitcherRun"); + WithProcessStabilization(() => + { + BenchmarkSwitcher.FromAssembly(typeof(Program).Assembly).Run(args, config); + CollectBenchmarkArtifacts(benchmarkDir, memDiagDir, "SwitcherRun"); + }); + } + + /// + /// Runs the given action with CPU affinity pinned to CPU 0 and process priority raised to High, + /// restoring the original state in finally. Matches the stabilization block in + /// AyCode.Core.Serializers.Console.BenchmarkLoop.RunBenchmark so BDN-side measurements + /// receive the same OS-scheduler insulation the Console runner enjoys. + /// Worker process inheritance: BDN spawns a per-job child process to host the + /// workload. CPU affinity propagates from parent → child on both Windows (CreateProcess inherits + /// affinity by default) and Linux (fork+exec inherits via sched_setaffinity). So pinning the + /// orchestrator process here pins the actual measurement loop too — not just the BDN driver. + /// Skipped on macOS where throws (priority still + /// raised). Failures inside the try block fall through to a best-effort restore in finally. + /// + static void WithProcessStabilization(Action action) + { + var process = Process.GetCurrentProcess(); + var origAffinity = (IntPtr)0; + var origPriority = ProcessPriorityClass.Normal; + var stabilizationApplied = false; + + if (OperatingSystem.IsWindows() || OperatingSystem.IsLinux()) + { + try + { + origAffinity = process.ProcessorAffinity; + origPriority = process.PriorityClass; + // Pin to CPU 0 (mask = 1). The choice is arbitrary — what matters is "exactly one + // core, consistently" — not which one. BDN's child worker process inherits the + // affinity, so the measurement loop itself runs pinned. Mirrors Console's pinning. + process.ProcessorAffinity = (IntPtr)1; + process.PriorityClass = ProcessPriorityClass.High; + stabilizationApplied = true; + Console.WriteLine("Stabilization: pinned to CPU 0 (affinity=0x1), priority=High (BDN workers inherit affinity)."); + } + catch (Exception ex) + { + // Affinity / priority changes may fail on locked-down hosts (group policies, containers + // without CAP_SYS_NICE on Linux). Surface and continue — BDN still works, just without + // scheduler insulation. + Console.WriteLine($"Stabilization SKIPPED: {ex.GetType().Name}: {ex.Message}"); + } + } + + try + { + action(); + } + finally + { + if (stabilizationApplied && (OperatingSystem.IsWindows() || OperatingSystem.IsLinux())) + { + try { process.ProcessorAffinity = origAffinity; } catch { /* best-effort */ } + try { process.PriorityClass = origPriority; } catch { /* best-effort */ } + } + } } /// diff --git a/AyCode.Benchmark/Reporting/BenchmarkReportWriter.cs b/AyCode.Benchmark/Reporting/BenchmarkReportWriter.cs index 98248db..97c8dd1 100644 --- a/AyCode.Benchmark/Reporting/BenchmarkReportWriter.cs +++ b/AyCode.Benchmark/Reporting/BenchmarkReportWriter.cs @@ -194,8 +194,11 @@ public static class BenchmarkReportWriter foreach (var testData in testDataSets) { - // Order by per-op µs (iter-independent) — rows may have different iter counts post-calibration. - var testResults = results.Where(r => r.TestDataName == testData.DisplayName).OrderBy(r => RtPerOp(r)).ToList(); + // Order by Engine (so the same engine column-position stays stable across cells, especially + // when two engines are within noise floor on a given cell — flip-flopping speed-rank produces + // diff-hostile output across runs). RtPerOp is the secondary tiebreaker for cells where + // multiple variants of the same engine exist (e.g. AcBinary SGen vs Runtime). + var testResults = results.Where(r => r.TestDataName == testData.DisplayName).OrderBy(r => r.Engine).ThenBy(r => RtPerOp(r)).ToList(); // Baseline switched MessagePack → MemoryPack: MemoryPack is the SOTA performance leader. var memPackResult = testResults.FirstOrDefault(r => (r.Engine == BenchmarkEngine.MemoryPack && r.IoMode == BenchmarkIoMode.ByteArray)); // Pin the comparison to AcBinary's SGen variant — apples-to-apples vs MemoryPack (also source-generated). @@ -459,8 +462,14 @@ public static class BenchmarkReportWriter sb.AppendLine($"║ Source: {ctx.SourceTag}".PadRight(100) + "║"); sb.AppendLine($"║ Build: {ctx.BuildConfiguration}".PadRight(100) + "║"); sb.AppendLine($"║ Charset: {ctx.CharsetName}".PadRight(100) + "║"); - sb.AppendLine($"║ Iterations: per-cell adaptive (~{ctx.TargetSampleMs} ms target)".PadRight(100) + "║"); - sb.AppendLine($"║ Samples: {ctx.BenchmarkSamples} (median) + 1 pilot discarded".PadRight(100) + "║"); + // For BDN-sourced contexts, warmup / samples / target are managed inside BDN's job config (not by + // our adaptive engine) — surfacing the placeholder zeros as concrete numbers would be misleading. + // Print "BDN-managed" instead; raw BDN config is recoverable from the BDN-native artifacts under .../BDN/. + var isBdn = ctx.SourceTag == "Bdn"; + var iterationsHeader = isBdn ? "Iterations: BDN-managed" : $"Iterations: per-cell adaptive (~{ctx.TargetSampleMs} ms target)"; + var samplesHeader = isBdn ? "Samples: BDN-managed" : $"Samples: {ctx.BenchmarkSamples} (median) + 1 pilot discarded"; + sb.AppendLine($"║ {iterationsHeader}".PadRight(100) + "║"); + sb.AppendLine($"║ {samplesHeader}".PadRight(100) + "║"); sb.AppendLine("╚══════════════════════════════════════════════════════════════════════════════════════════════════════╝"); sb.AppendLine(); @@ -507,8 +516,9 @@ public static class BenchmarkReportWriter foreach (var testData in testDataSets) { - // Order by per-op µs (iter-independent) — rows may have different iter counts post-calibration. - var testResults = results.Where(r => r.TestDataName == testData.DisplayName).OrderBy(r => RtPerOp(r)).ToList(); + // Order by Engine (stable column-position across cells, see PrintGroupedResults for rationale); + // RtPerOp is the secondary tiebreaker between same-engine variants (SGen vs Runtime). + var testResults = results.Where(r => r.TestDataName == testData.DisplayName).OrderBy(r => r.Engine).ThenBy(r => RtPerOp(r)).ToList(); var memPackResult = testResults.FirstOrDefault(r => (r.Engine == BenchmarkEngine.MemoryPack && r.IoMode == BenchmarkIoMode.ByteArray)); var acBinaryResult = testResults.FirstOrDefault(r => (r.Engine == BenchmarkEngine.AcBinary && r.IoMode == BenchmarkIoMode.ByteArray && r.DispatchMode == BenchmarkDispatchMode.SGen)); @@ -590,7 +600,12 @@ public static class BenchmarkReportWriter { var sb = new StringBuilder(); sb.AppendLine($"# AcBinary Benchmark [{ctx.SourceTag}] {ctx.BuildConfiguration} {DateTime.Now:yyyy-MM-dd HH:mm:ss}"); - sb.AppendLine($"Charset: {ctx.CharsetName} | Iterations: per-cell adaptive (target ~{ctx.TargetSampleMs} ms/sample) | Warmup: {ctx.WarmupIterations} per phase (Ser/Des isolated) | Samples: {ctx.BenchmarkSamples} (median) + 1 pilot discarded | .NET: {Environment.Version} | UnstableCV threshold: {ctx.UnstableCVThreshold * 100:F0}%"); + // BDN-sourced: warmup / iter / samples are BDN-job-config-managed (see .../BDN/ artifacts for raw N). + // Console-sourced: our adaptive engine emits real numbers. + var runStatsHeader = ctx.SourceTag == "Bdn" + ? "Iterations: BDN-managed | Warmup: BDN-managed | Samples: BDN-managed" + : $"Iterations: per-cell adaptive (target ~{ctx.TargetSampleMs} ms/sample) | Warmup: {ctx.WarmupIterations} per phase (Ser/Des isolated) | Samples: {ctx.BenchmarkSamples} (median) + 1 pilot discarded"; + sb.AppendLine($"Charset: {ctx.CharsetName} | {runStatsHeader} | .NET: {Environment.Version} | UnstableCV threshold: {ctx.UnstableCVThreshold * 100:F0}%"); sb.AppendLine("Baseline: MemoryPack (Byte[]) (SOTA reference) | Verified: round-trip correctness checked once per cell before warmup"); // Options summary. Bracketed [OrderType] surfaces the TestOrder variant each preset serialised — @@ -619,9 +634,10 @@ public static class BenchmarkReportWriter foreach (var testData in testDataSets) { + // Order by Engine for stable column-position across cells (see PrintGroupedResults for rationale). var testResults = results .Where(r => r.TestDataName == testData.DisplayName) - .OrderBy(RtPerOp) + .OrderBy(r => r.Engine).ThenBy(RtPerOp) .ToList(); foreach (var r in testResults)