diff --git a/AyCode.Core.Serializers.Console/Program.cs b/AyCode.Core.Serializers.Console/Program.cs
index 103679b..c954b11 100644
--- a/AyCode.Core.Serializers.Console/Program.cs
+++ b/AyCode.Core.Serializers.Console/Program.cs
@@ -47,7 +47,7 @@ public static class Program
private static int TestIterations = 1;
private static int BenchmarkSamples = 1; // Debug: single sample, fast iteration
#else
- private static int WarmupIterations = 10000; //5000
+ private static int WarmupIterations = 5000; //10000 — per-phase (Ser AND Des get their own warmup separately)
private static int TestIterations = 1000; //1000
private static int BenchmarkSamples = 10;
#endif
@@ -462,7 +462,7 @@ public static class Program
var allTestDataSets = BenchmarkTestDataProvider.CreateTestDataSets();
var testDataSets = FilterByLayer(allTestDataSets, layer);
- System.Console.WriteLine($"Layer: {layer} | OpMode: {opMode} | SerializerMode: {serializerMode} | Charset: {GetCurrentCharsetName()} | Iterations: per-cell adaptive (~{TargetSampleMs} ms target) | Warmup: {WarmupIterations} | Samples: {BenchmarkSamples} (median) + pilot discard");
+ System.Console.WriteLine($"Layer: {layer} | OpMode: {opMode} | SerializerMode: {serializerMode} | Charset: {GetCurrentCharsetName()} | Iterations: per-cell adaptive (~{TargetSampleMs} ms target) | Warmup: {WarmupIterations} per phase (Ser/Des isolated) | Samples: {BenchmarkSamples} (median) + pilot discard");
System.Console.WriteLine($"Build: {BuildConfiguration} | .NET: {Environment.Version} | Test Type: {testDataSets.FirstOrDefault()?.TypeName ?? "unknown"} | Test Cells: {testDataSets.Count}/{allTestDataSets.Count}");
System.Console.WriteLine();
@@ -589,6 +589,22 @@ public static class Program
#region Benchmark Execution
+ ///
+ /// Forces a full GC cycle at a phase boundary in the benchmark loop. Two-pass collect with finalizer drain
+ /// in between: the first pass moves managed garbage to the finalization queue, WaitForPendingFinalizers
+ /// runs the finalizers, the second pass reclaims any objects the finalizers released. After this returns the
+ /// heap is in a known-quiescent state — the next warmup/measurement phase starts on a clean slate, isolated
+ /// from the previous phase's residual allocations (write-buffer pools, intern cache, write-plan arrays, etc.).
+ /// Called between every Ser-phase / Des-phase boundary in .
+ ///
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ private static void ForceGcCollect()
+ {
+ GC.Collect(2, GCCollectionMode.Forced, blocking: true);
+ GC.WaitForPendingFinalizers();
+ GC.Collect(2, GCCollectionMode.Forced, blocking: true);
+ }
+
private static List RunBenchmarksForTestData(TestDataSet testData, string mode, string serializerMode)
{
var results = new List();
@@ -610,39 +626,25 @@ public static class Program
System.Console.WriteLine("✓ All serializers passed round-trip verification.");
- // Per-serializer (warmup → calibrate → measurement) cycle: each serializer warms up IMMEDIATELY
- // before its own bench, then calibrates iter per-function (Ser and Des independently) so each
- // sample lands at ~TargetSampleMs wall-clock. This avoids cache pollution AND equalizes sample
- // window length across cells of vastly different per-op cost.
- System.Console.WriteLine($"Running benchmarks (target ~{TargetSampleMs} ms/sample × {BenchmarkSamples} samples median, per-serializer warmup + adaptive iter)...\n");
+ // Per-serializer, PER-PHASE (warmup → calibrate → measurement) cycle: each serializer's Ser-path and
+ // Des-path get COMPLETELY ISOLATED warmup→measure rounds, with a GC.Collect at every phase boundary.
+ //
+ // Why phase-isolation: a combined warmup (Ser+Des interleaved) leaves the CPU I-cache + branch-predictor
+ // in a "compromise state" — neither Ser nor Des code-set dominates. The first phase to measure pays a
+ // cache-miss penalty as its code-set displaces the leftover-warmup-state. Isolated warmup→measure pairs
+ // keep the I-cache HOT for ONLY the measured path, both in the warmup (priming) and the measurement
+ // (steady-state). Branch-predictor history also stays clean per path.
+ //
+ // GC.Collect at every boundary: removes residual allocation pressure from the previous phase (write-buffer
+ // pool churn from Ser, deserialized object graph from Des) so the next phase starts with a quiescent
+ // heap — GC tier-promotion timing during measurement is then driven only by THAT phase's allocations.
+ //
+ // JitSleep per-phase: tiered JIT background promotion drain after each warmup (mode-aware: 0 ms in AOT).
+ // Each phase's freshly-promoted methods settle before its timing starts.
+ System.Console.WriteLine($"Running benchmarks (target ~{TargetSampleMs} ms/sample × {BenchmarkSamples} samples median, phase-isolated warmup/measure per Ser/Des)...\n");
foreach (var serializer in serializers)
{
- // Warmup THIS serializer right before benching it — keeps its hot code/data in cache.
- serializer.Warmup(WarmupIterations);
-
- // Wait for tiered JIT background compilation to drain (mode-aware: 0ms in AOT).
- // Per-serializer instead of once globally — guarantees this serializer's freshly-promoted
- // methods are settled before timing, regardless of when it appears in the iteration order.
- if (JitSleep > 0) Thread.Sleep(JitSleep);
-
- // Adaptive iter calibration — per Ser/Des/RT function, post-warmup. Each function gets its
- // own iter count tuned to TargetSampleMs (typically 250 ms). The 100-iter calibration cost
- // is amortized over the BenchmarkSamples + 1 (pilot) recorded measurements that follow.
- int serIter = TestIterations, desIter = TestIterations, rtIter = TestIterations;
- if (serializer.IsRoundTripOnly)
- {
- if (mode is "all" or "serialize" or "ser")
- rtIter = CalibrateIterations(() => serializer.Serialize(), TargetSampleMs);
- }
- else
- {
- if (mode is "all" or "serialize" or "ser")
- serIter = CalibrateIterations(() => serializer.Serialize(), TargetSampleMs);
- if (mode is "all" or "deserialize" or "des")
- desIter = CalibrateIterations(() => serializer.Deserialize(), TargetSampleMs);
- }
-
var result = new BenchmarkResult
{
TestDataName = testData.DisplayName, // Use DisplayName for IId% info
@@ -663,26 +665,38 @@ public static class Program
if (serializer.IsRoundTripOnly)
{
- // Round-trip-only benchmarks (NamedPipe etc.): measure the full pipe round-trip directly into the RT
- // columns. Ser ms / SerAlloc / Des ms / DesAlloc stay 0 → display as "N/A". Allocation uses the
- // process-wide measurement so the server-drain-thread allocations (e.g. server-side new byte[len])
- // also show up — otherwise current-thread alloc would only count the client side and look ~halved.
+ // Round-trip-only benchmarks (NamedPipe etc.): single phase — Serialize() performs the full RT,
+ // Deserialize() is a no-op. We use the Ser-phase entry-points (WarmupSerialize) to warm the
+ // entire round-trip path, then record into the RT result columns.
if (mode is "all" or "serialize" or "ser")
{
+ ForceGcCollect();
+ serializer.WarmupSerialize(WarmupIterations);
+ if (JitSleep > 0) Thread.Sleep(JitSleep);
+
+ var rtIter = CalibrateIterations(() => serializer.Serialize(), TargetSampleMs);
var (rtMed, rtMin, rtMax, rtStd) = RunTimed(() => serializer.Serialize(), rtIter, $"{groupLabel} [RT timing]");
result.RoundTripTimeMs = rtMed;
result.RoundTripTimeMinMs = rtMin;
result.RoundTripTimeMaxMs = rtMax;
result.RoundTripTimeStdDevMs = rtStd;
result.RoundTripIterations = rtIter;
+ // Process-wide allocation measurement: server-drain-thread allocations (server-side new byte[len])
+ // also show up — otherwise current-thread alloc would only count the client side and look ~halved.
result.RoundTripAllocBytesPerOp = MeasureAllocationTotal(() => serializer.Serialize(), rtIter, $"{groupLabel} [RT alloc]");
}
// mode == "deserialize" alone is meaningless for a round-trip-only benchmark; skip silently.
}
else
{
+ // ── Ser phase ── isolated warmup → JitSleep → calibrate → time → alloc; preceded by GC.Collect.
if (mode is "all" or "serialize" or "ser")
{
+ ForceGcCollect();
+ serializer.WarmupSerialize(WarmupIterations);
+ if (JitSleep > 0) Thread.Sleep(JitSleep);
+
+ var serIter = CalibrateIterations(() => serializer.Serialize(), TargetSampleMs);
var (serMed, serMin, serMax, serStd) = RunTimed(() => serializer.Serialize(), serIter, $"{groupLabel} [Ser timing]");
result.SerializeTimeMs = serMed;
result.SerializeTimeMinMs = serMin;
@@ -693,8 +707,16 @@ public static class Program
result.SerializeAllocBytesPerOp = MeasureAllocation(() => serializer.Serialize(), serIter, $"{groupLabel} [Ser alloc]");
}
+ // ── Des phase ── isolated warmup → JitSleep → calibrate → time → alloc; preceded by GC.Collect.
+ // The GC.Collect here is critical: it discards the Ser-phase's write-buffer pool churn so the
+ // Des-phase's allocation measurement reflects ONLY Des-side allocations (deserialized object graph).
if (mode is "all" or "deserialize" or "des")
{
+ ForceGcCollect();
+ serializer.WarmupDeserialize(WarmupIterations);
+ if (JitSleep > 0) Thread.Sleep(JitSleep);
+
+ var desIter = CalibrateIterations(() => serializer.Deserialize(), TargetSampleMs);
var (desMed, desMin, desMax, desStd) = RunTimed(() => serializer.Deserialize(), desIter, $"{groupLabel} [Des timing]");
result.DeserializeTimeMs = desMed;
result.DeserializeTimeMinMs = desMin;
@@ -708,10 +730,10 @@ public static class Program
// batch-time addition would be misleading. Instead: compute per-op µs (iter-independent),
// then synthesize RoundTripTimeMs against RoundTripIterations = max(serIter, desIter) so that
// RoundTripTimeMs / RoundTripIterations * 1000 == SerPerOp + DesPerOp.
- var serPerOp = ToPerOpMicros(result.SerializeTimeMs, serIter);
- var desPerOp = ToPerOpMicros(result.DeserializeTimeMs, desIter);
+ var serPerOp = ToPerOpMicros(result.SerializeTimeMs, result.SerializeIterations);
+ var desPerOp = ToPerOpMicros(result.DeserializeTimeMs, result.DeserializeIterations);
var rtPerOp = serPerOp + desPerOp;
- result.RoundTripIterations = Math.Max(serIter, desIter);
+ result.RoundTripIterations = Math.Max(result.SerializeIterations, result.DeserializeIterations);
result.RoundTripTimeMs = rtPerOp / 1000.0 * result.RoundTripIterations;
result.RoundTripAllocBytesPerOp = result.SerializeAllocBytesPerOp + result.DeserializeAllocBytesPerOp;
}
@@ -1469,7 +1491,28 @@ public static class Program
/// rankings (because both metrics are misleading there) — they still participate in "Fastest Round-trip".
/// Default false for in-memory IO modes which measure Ser and Des separately.
bool IsRoundTripOnly => false;
+ /// Combined warmup (Ser + Deser interleaved). Kept for backward-compat with ProfilerMode
+ /// and other callers that don't need phase-separated warmup. The benchmark loop prefers the split
+ /// + pair for cache-isolated measurements.
void Warmup(int iterations);
+
+ /// Warm only the Serialize path. Default body iterates N times.
+ /// Overrides are only needed when the implementor wants Ser-specific warmup-state (e.g. pre-allocate buffers).
+ /// On benchmarks (NamedPipe-style) performs the full RT,
+ /// so this warms the entire round-trip path.
+ void WarmupSerialize(int iterations)
+ {
+ for (var i = 0; i < iterations; i++) Serialize();
+ }
+
+ /// Warm only the Deserialize path. Default body iterates N times.
+ /// On benchmarks is a no-op, so the bench loop
+ /// skips the Des-phase entirely for those cells.
+ void WarmupDeserialize(int iterations)
+ {
+ for (var i = 0; i < iterations; i++) Deserialize();
+ }
+
void Serialize();
void Deserialize();
/// Round-trip correctness check — called once per cell before warmup. Returns true if Serialize+Deserialize preserves data.
@@ -3274,7 +3317,7 @@ public static class Program
var sb = new StringBuilder();
var testTypeName = testDataSets.FirstOrDefault()?.TypeName ?? "unknown";
sb.AppendLine($"# AcBinary Benchmark {BuildConfiguration} {DateTime.Now:yyyy-MM-dd HH:mm:ss}");
- sb.AppendLine($"Charset: {GetCurrentCharsetName()} | Iterations: per-cell adaptive (target ~{TargetSampleMs} ms/sample) | Warmup: {WarmupIterations} | Samples: {BenchmarkSamples} (median) + 1 pilot discarded | .NET: {Environment.Version} | TestType: {testTypeName} | UnstableCV threshold: {UnstableCVThreshold * 100:F0}%");
+ sb.AppendLine($"Charset: {GetCurrentCharsetName()} | Iterations: per-cell adaptive (target ~{TargetSampleMs} ms/sample) | Warmup: {WarmupIterations} per phase (Ser/Des isolated) | Samples: {BenchmarkSamples} (median) + 1 pilot discarded | .NET: {Environment.Version} | TestType: {testTypeName} | UnstableCV threshold: {UnstableCVThreshold * 100:F0}%");
sb.AppendLine($"Baseline: {"MemoryPack (Byte[])"} (SOTA reference) | Verified: round-trip correctness checked once per cell before warmup");
// Options summary