From c722f775f63cce93d77041eabdcc592512363047 Mon Sep 17 00:00:00 2001
From: Loretta <jozsef.b@aycode.com>
Date: Tue, 12 May 2026 13:52:28 +0200
Subject: [PATCH] Refactor: move serializer benchmarks to separate files

Moved all ISerializerBenchmark implementations for AcBinary and MemoryPack from Program.cs into dedicated files under Benchmarks/. Improves code organization and maintainability; no logic changes, only file structure refactor.
---
 .../AcBinaryBufferWriterBenchmark.cs          |   68 ++
 .../AcBinaryFreshBufferWriterBenchmark.cs     |   63 +
 .../AcBinaryInMemoryPipeBenchmark.cs          |  190 ++++
 .../AcBinaryInMemoryRawByteArrayBenchmark.cs  |  168 +++
 .../Benchmarks/AcBinaryNamedPipeBenchmark.cs  |  237 ++++
 .../AcBinaryNamedPipeRawByteArrayBenchmark.cs |  213 ++++
 .../MemoryPackBufferWriterBenchmark.cs        |   63 +
 .../MemoryPackFreshBufferWriterBenchmark.cs   |   54 +
 AyCode.Core.Serializers.Console/Program.cs    | 1009 +----------------
 9 files changed, 1057 insertions(+), 1008 deletions(-)
 create mode 100644 AyCode.Core.Serializers.Console/Benchmarks/AcBinaryBufferWriterBenchmark.cs
 create mode 100644 AyCode.Core.Serializers.Console/Benchmarks/AcBinaryFreshBufferWriterBenchmark.cs
 create mode 100644 AyCode.Core.Serializers.Console/Benchmarks/AcBinaryInMemoryPipeBenchmark.cs
 create mode 100644 AyCode.Core.Serializers.Console/Benchmarks/AcBinaryInMemoryRawByteArrayBenchmark.cs
 create mode 100644 AyCode.Core.Serializers.Console/Benchmarks/AcBinaryNamedPipeBenchmark.cs
 create mode 100644 AyCode.Core.Serializers.Console/Benchmarks/AcBinaryNamedPipeRawByteArrayBenchmark.cs
 create mode 100644 AyCode.Core.Serializers.Console/Benchmarks/MemoryPackBufferWriterBenchmark.cs
 create mode 100644 AyCode.Core.Serializers.Console/Benchmarks/MemoryPackFreshBufferWriterBenchmark.cs
diff --git a/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryBufferWriterBenchmark.cs b/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryBufferWriterBenchmark.cs
new file mode 100644
index 0000000..aacafe1
--- /dev/null
+++ b/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryBufferWriterBenchmark.cs
@@ -0,0 +1,68 @@
+using AyCode.Core.Serializers.Binaries;
+using AyCode.Core.Tests.TestModels;
+using System.Buffers;
+using System.Runtime.CompilerServices;
+
+namespace AyCode.Core.Serializers.Console.Benchmarks;
+
+/// <summary>
+/// Benchmarks AcBinary via the IBufferWriter overload with a pre-allocated, reused ArrayBufferWriter.
+/// Realistic IBufferWriter usage pattern: caller owns + reuses the writer (zero alloc per call after warmup).
+/// </summary>
+internal sealed class AcBinaryBufferWriterBenchmark : ISerializerBenchmark
+{
+    private readonly TestOrder _order;
+    private readonly AcBinarySerializerOptions _options;
+    private readonly byte[] _serialized;
+    private readonly ArrayBufferWriter<byte> _bufferWriter;
+
+    public string Engine => Configuration.EngineAcBinary;
+    public string IoMode => Configuration.IoBufWrReuse;
+    public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime;
+    public string OptionsPreset { get; }
+    public int SerializedSize => _serialized.Length;
+    public long SetupSerializeAllocBytes { get; }
+    public long SetupDeserializeAllocBytes => 0;
+    public string OptionsDescription => BenchmarkOptions.BuildAcBinary(_options);
+
+    public AcBinaryBufferWriterBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset)
+    {
+        _order = order;
+        _options = options;
+        OptionsPreset = optionsPreset;
+        _serialized = AcBinarySerializer.Serialize(order, options);
+
+        // Measure ONLY the BufferWriter infrastructure setup on the serialize side (excluding the
+        // helper Serialize above). Deserialize side reads directly from `_serialized` byte[] — no
+        // dedicated setup allocation, hence SetupDeserializeAllocBytes = 0.
+        GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
+        var beforeSetup = GC.GetAllocatedBytesForCurrentThread();
+        _bufferWriter = new ArrayBufferWriter<byte>(_serialized.Length * 2);
+        var afterSetup = GC.GetAllocatedBytesForCurrentThread();
+        SetupSerializeAllocBytes = afterSetup - beforeSetup;
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    public void Serialize()
+    {
+        _bufferWriter.ResetWrittenCount();  // reuse — no alloc, no zeroing
+        AcBinarySerializer.Serialize(_order, _bufferWriter, _options);
+    }
+
+    // BufWr semantic: read from a ReadOnlySequence<byte> (the ROS overload), NOT from byte[] —
+    // single-segment array-backed sequence triggers the fast-path in AcBinaryDeserializer.cs:298 which
+    // redirects to the byte[] overload. This means the bench actually exercises the ROS-input path
+    // (the production-realistic surface for SignalR / Pipe consumers) rather than secretly testing
+    // byte[] Deser under the BufWr label.
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    public void Deserialize() => AcBinaryDeserializer.Deserialize<TestOrder>(new ReadOnlySequence<byte>(_serialized), _options);
+
+    public bool VerifyRoundTrip()
+    {
+        _bufferWriter.ResetWrittenCount();
+        AcBinarySerializer.Serialize(_order, _bufferWriter, _options);
+
+        var roundTripped = AcBinaryDeserializer.Deserialize<TestOrder>(new ReadOnlySequence<byte>(_bufferWriter.WrittenMemory), _options);
+        return BenchmarkLoop.DeepEqualsViaJson(_order, roundTripped);
+    }
+}
diff --git a/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryFreshBufferWriterBenchmark.cs b/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryFreshBufferWriterBenchmark.cs
new file mode 100644
index 0000000..2e43f42
--- /dev/null
+++ b/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryFreshBufferWriterBenchmark.cs
@@ -0,0 +1,63 @@
+using AyCode.Core.Serializers.Binaries;
+using AyCode.Core.Tests.TestModels;
+using System.Buffers;
+using System.Runtime.CompilerServices;
+
+namespace AyCode.Core.Serializers.Console.Benchmarks;
+
+/// <summary>
+/// Benchmarks AcBinary via the IBufferWriter overload, allocating a FRESH ArrayBufferWriter on EVERY call.
+/// One-shot scenario — represents code that doesn't reuse a writer across calls.
+/// Uses BufferWriterChunkSize=4096 (production-realistic, SignalR-aligned) instead of the 65535 default —
+/// otherwise AcBinary would request 64KB upfront via GetSpan(), forcing the fresh ABW to allocate 64KB
+/// regardless of payload size (heavy over-allocation for small payloads).
+/// </summary>
+internal sealed class AcBinaryFreshBufferWriterBenchmark : ISerializerBenchmark
+{
+    private readonly TestOrder _order;
+    private readonly AcBinarySerializerOptions _options;
+    private readonly byte[] _serialized;
+
+    public string Engine => Configuration.EngineAcBinary;
+    public string IoMode => Configuration.IoBufWrNew;
+    public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime;
+    public string OptionsPreset { get; }
+    public int SerializedSize => _serialized.Length;
+    public long SetupSerializeAllocBytes => 0;
+    public long SetupDeserializeAllocBytes => 0;
+    public string OptionsDescription => BenchmarkOptions.BuildAcBinary(_options, $", BufferSize={_options.BufferWriterChunkSize}B");
+
+    public AcBinaryFreshBufferWriterBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset)
+    {
+        _order = order;
+        // BufferWriterChunkSize comes from the caller (central source of truth in CreateSerializers
+        // — the binaryFastMode4KbChunk options instance). Do NOT mutate _options here; tune the chunk
+        // size in CreateSerializers only.
+        _options = options;
+        OptionsPreset = optionsPreset;
+        _serialized = AcBinarySerializer.Serialize(order, _options);
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    public void Serialize()
+    {
+        var abw = new ArrayBufferWriter<byte>();  // FRESH every call — alloc + grow as needed
+        AcBinarySerializer.Serialize(_order, abw, _options);
+    }
+
+    // BufWr semantic: read from a ReadOnlySequence<byte> (the ROS overload), NOT from byte[] —
+    // single-segment array-backed sequence triggers the fast-path in AcBinaryDeserializer.cs:298 which
+    // redirects to the byte[] overload. This means the bench actually exercises the ROS-input path
+    // (the production-realistic surface for SignalR / Pipe consumers) rather than secretly testing
+    // byte[] Deser under the BufWr label.
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    public void Deserialize() => AcBinaryDeserializer.Deserialize<TestOrder>(new ReadOnlySequence<byte>(_serialized), _options);
+
+    public bool VerifyRoundTrip()
+    {
+        var abw = new ArrayBufferWriter<byte>();
+        AcBinarySerializer.Serialize(_order, abw, _options);
+        var roundTripped = AcBinaryDeserializer.Deserialize<TestOrder>(new ReadOnlySequence<byte>(abw.WrittenMemory), _options);
+        return BenchmarkLoop.DeepEqualsViaJson(_order, roundTripped);
+    }
+}
diff --git a/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryInMemoryPipeBenchmark.cs b/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryInMemoryPipeBenchmark.cs
new file mode 100644
index 0000000..1b09377
--- /dev/null
+++ b/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryInMemoryPipeBenchmark.cs
@@ -0,0 +1,190 @@
+using AyCode.Core.Serializers.Binaries;
+using AyCode.Core.Tests.Serialization;   // DrainFromAsync extension (test-only, used by benchmark)
+using AyCode.Core.Tests.TestModels;
+using System.IO.Pipelines;
+using System.Runtime.CompilerServices;
+
+namespace AyCode.Core.Serializers.Console.Benchmarks;
+
+/// <summary>
+/// Same chunked-framed AsyncPipe code path as <see cref="AcBinaryNamedPipeBenchmark"/>, but the transport
+/// is an in-memory <see cref="System.IO.Pipelines.Pipe"/> instead of a kernel <c>NamedPipe</c>. The Pipe's
+/// <c>Writer</c>/<c>Reader</c> pair is a managed-only zero-copy slab handoff — no syscalls, no kernel
+/// buffer copy, no IRP queueing.
+///
+/// <para><b>Why this benchmark matters</b>: by holding ALL other variables constant (same SerializeChunkedFramed,
+/// same AsyncPipeReaderInput, same drain task, same consumer task, same multi-message wire format), this
+/// row isolates the <b>kernel-NamedPipe transport overhead</b> from the chunked-streaming framework's pure
+/// CPU cost. The expected delta vs <see cref="AcBinaryNamedPipeBenchmark"/>: per-chunk overhead drops from
+/// ~25-30 µs (kernel-syscall pair + IRP) to ~1-2 µs (managed slab handoff). Multi-chunk Large-message rows
+/// should converge dramatically toward <see cref="AcBinaryNamedPipeRawByteArrayBenchmark"/>.</para>
+///
+/// <para><b>Real-world relevance</b>: in-memory Pipe is the typical primitive used for cross-thread serializer
+/// pipelines inside a single process (e.g. SignalR's Kestrel transport adapter, gRPC framework internals,
+/// custom message brokers). The numbers from this row reflect that scenario, NOT the kernel-pipe loopback
+/// of the NamedPipe benchmark.</para>
+/// </summary>
+internal sealed class AcBinaryInMemoryPipeBenchmark : ISerializerBenchmark, IDisposable
+{
+    private readonly TestOrder _order;
+    private readonly AcBinarySerializerOptions _options;
+    private readonly byte[] _serialized; // for SerializedSize reporting only
+
+    // Long-lived in-memory pipe lifecycle (set up once in ctor — NOT timed).
+    private readonly Pipe _pipe;
+    private readonly PipeWriter _pipeWriter;
+    private readonly PipeReader _pipeReader;
+
+    // Long-lived multi-message receive infrastructure (set up once in ctor) — same pattern as the NamedPipe
+    // variant: drain pumps reader into AsyncPipeReaderInput, consumer task drives Deserialize<T>(input).
+    private readonly AsyncPipeReaderInput _input;
+    private readonly CancellationTokenSource _cts;
+    private readonly Task _drainTask;
+    private readonly Task _consumerTask;
+    private readonly ManualResetEventSlim _consumeRequest = new(false);
+    private readonly ManualResetEventSlim _consumeDone = new(false);
+    private object? _lastResult;
+    private bool _captureResult;
+    private bool _disposed;
+
+    public string Engine => Configuration.EngineAcBinary;
+    public string IoMode => Configuration.IoInMemoryPipe;
+    public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime;
+    public string OptionsPreset { get; }
+    public int SerializedSize => _serialized.Length;
+    public long SetupSerializeAllocBytes { get; }
+    public long SetupDeserializeAllocBytes { get; }
+    public bool IsRoundTripOnly => true;
+    public string OptionsDescription => BenchmarkOptions.BuildAcBinary(_options, $", BufferSize={_options.BufferWriterChunkSize}B, Transport=Pipe(in-memory,multiMessage,2-task)");
+
+    public AcBinaryInMemoryPipeBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset)
+    {
+        _order = order;
+        _options = options;
+        OptionsPreset = optionsPreset;
+
+        _serialized = AcBinarySerializer.Serialize(order, _options);
+
+        // === SERIALIZE-side setup measurement ===
+        // In-memory Pipe construction. NO kernel-pipe pair, NO Connect handshake — just a managed Pipe object
+        // and a reference to its Writer side. PipeWriterImpl (parallel-flush capable, NOT StreamPipeWriter).
+        GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
+        var beforeSer = GC.GetAllocatedBytesForCurrentThread();
+        _pipe = new Pipe();
+        _pipeWriter = _pipe.Writer;
+        var afterSer = GC.GetAllocatedBytesForCurrentThread();
+        SetupSerializeAllocBytes = afterSer - beforeSer;
+
+        // === DESERIALIZE-side setup measurement ===
+        // PipeReader reference + AsyncPipeReaderInput (ArrayPool rent + ManualResetEventSlim) + drain task +
+        // consumer task scaffolding. Identical to the NamedPipe variant on the receive side.
+        GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
+        var beforeDes = GC.GetAllocatedBytesForCurrentThread();
+
+        _pipeReader = _pipe.Reader;
+        _input = new AsyncPipeReaderInput(_options.BufferWriterChunkSize * 2, multiMessage: true);
+        _cts = new CancellationTokenSource();
+        _drainTask = Task.Run(() => _input.DrainFromAsync(_pipeReader, _cts.Token));
+        _consumerTask = Task.Run(ConsumeLoop);
+
+        var afterDes = GC.GetAllocatedBytesForCurrentThread();
+        SetupDeserializeAllocBytes = afterDes - beforeDes;
+    }
+
+    // BG consumer: parks on _consumeRequest, runs Deserialize<T>(_input) when signaled, signals _consumeDone.
+    // Mirror of AcBinaryNamedPipeBenchmark.ConsumeLoop — same pattern, same MRES protocol.
+    private void ConsumeLoop()
+    {
+        var ct = _cts.Token;
+        try
+        {
+            while (true)
+            {
+                _consumeRequest.Wait(ct);
+                if (ct.IsCancellationRequested) return;
+                _consumeRequest.Reset();
+
+                try
+                {
+                    var result = AcBinaryDeserializer.Deserialize<TestOrder>(_input, _options);
+                    if (_captureResult) _lastResult = result;
+                }
+                catch
+                {
+                    // Swallow — see ConsumeLoop in NamedPipe variant for rationale.
+                }
+                finally
+                {
+                    _consumeDone.Set();
+                }
+            }
+        }
+        catch (OperationCanceledException)
+        {
+            // Cooperative cancel — Dispose path. Swallow.
+        }
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    public void Serialize()
+    {
+        // Same 2-task streaming pipeline as NamedPipe variant — only the transport differs (in-memory Pipe
+        // instead of kernel NamedPipe). Per-chunk SerializeChunkedFramed → PipeWriter slab → drain task
+        // reads from PipeReader → input.Feed → consumer Deserialize<T> consumes byte-by-byte.
+        //
+        // Uses the Pipe-overload (instead of the PipeWriter-overload) so the FlushPolicy parameter is
+        // exposed for tuning. Toggle between FlushPolicy.PerChunk (bounded peak memory, per-chunk await
+        // FlushAsync) and FlushPolicy.Coalesced (fire-and-forget per chunk, pipe-coalesced flushes up to
+        // PauseWriterThreshold ~64 KB) to A/B-test the streaming-pipeline overhead. FlushPolicy.PerChunk
+        // is functionally equivalent to the PipeWriter-overload (both internally route to
+        // SerializeToPipeWriterCore with FlushPolicy.PerChunk).
+        _consumeDone.Reset();
+        _consumeRequest.Set();
+
+        AcBinarySerializer.SerializeChunkedFramed(_order, _pipe, _options, FlushPolicy.Coalesced);
+
+        _consumeDone.Wait();
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    public void Deserialize()
+    {
+        // No-op: per-iter round-trip is captured in Serialize(). See IsRoundTripOnly contract.
+    }
+
+    public bool VerifyRoundTrip()
+    {
+        _captureResult = true;
+        try
+        {
+            Serialize();
+            var result = _lastResult as TestOrder;
+            return result != null && BenchmarkLoop.DeepEqualsViaJson(_order, result);
+        }
+        finally
+        {
+            _captureResult = false;
+            _lastResult = null;
+        }
+    }
+
+    public void Dispose()
+    {
+        if (_disposed) return;
+        _disposed = true;
+
+        // Cancel drain + consumer tasks → both exit. Pulse _consumeRequest in case consumer is parked.
+        try { _cts.Cancel(); } catch { /* swallow on teardown */ }
+        try { _consumeRequest.Set(); } catch { /* nudge in case consumer Wait is parked */ }
+        try { _drainTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
+        try { _consumerTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
+
+        // Complete writer + reader (in-memory Pipe — no underlying stream to dispose).
+        try { _pipeWriter.CompleteAsync().AsTask().Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
+        try { _pipeReader.Complete(); } catch { /* swallow on teardown */ }
+        try { _input.Dispose(); } catch { /* swallow on teardown */ }
+        try { _consumeRequest.Dispose(); } catch { /* swallow on teardown */ }
+        try { _consumeDone.Dispose(); } catch { /* swallow on teardown */ }
+        try { _cts.Dispose(); } catch { /* swallow on teardown */ }
+    }
+}
diff --git a/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryInMemoryRawByteArrayBenchmark.cs b/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryInMemoryRawByteArrayBenchmark.cs
new file mode 100644
index 0000000..ccde326
--- /dev/null
+++ b/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryInMemoryRawByteArrayBenchmark.cs
@@ -0,0 +1,168 @@
+using AyCode.Core.Serializers.Binaries;
+using AyCode.Core.Tests.TestModels;
+using System.Runtime.CompilerServices;
+
+namespace AyCode.Core.Serializers.Console.Benchmarks;
+
+/// <summary>
+/// Raw <c>byte[]</c> over an in-memory cross-thread handoff — NO transport (no NamedPipe, no Pipe, no
+/// Channel<see langword="&lt;T&gt;"/>). Calling thread serialises into a fresh <c>byte[]</c>, hands it to a
+/// background consumer task via a single byte[] slot + MRES pair; the consumer deserialises and signals done.
+///
+/// <para><b>Why this benchmark matters</b>: completes the 2x2 transport × wire-format matrix:</para>
+/// <list type="bullet">
+///   <item><description><b>NamedPipe + Chunked</b> = <see cref="AcBinaryNamedPipeBenchmark"/></description></item>
+///   <item><description><b>NamedPipe + Raw</b> = <see cref="AcBinaryNamedPipeRawByteArrayBenchmark"/></description></item>
+///   <item><description><b>In-memory Pipe + Chunked</b> = <see cref="AcBinaryInMemoryPipeBenchmark"/></description></item>
+///   <item><description><b>In-memory + Raw</b> = THIS row — apples-to-apples baseline for the in-memory chunked row</description></item>
+/// </list>
+/// <para>Side-by-side with <see cref="AcBinaryInMemoryPipeBenchmark"/> this isolates the chunked-streaming
+/// framework's pure CPU cost, with the same in-memory transport (zero kernel involvement) on both sides.
+/// Side-by-side with <see cref="AcBinaryNamedPipeRawByteArrayBenchmark"/> this isolates the kernel-NamedPipe
+/// overhead on the raw-byte[] side.</para>
+/// </summary>
+internal sealed class AcBinaryInMemoryRawByteArrayBenchmark : ISerializerBenchmark, IDisposable
+{
+    private readonly TestOrder _order;
+    private readonly AcBinarySerializerOptions _options;
+    private readonly byte[] _serialized; // for SerializedSize reporting only
+
+    // Long-lived consumer-task infrastructure (Deserialize on BG thread, signaled per iter).
+    // No transport — just a byte[] slot for handoff between calling thread and consumer task.
+    private readonly CancellationTokenSource _cts;
+    private readonly Task _consumerTask;
+    private readonly ManualResetEventSlim _consumeRequest = new(false);
+    private readonly ManualResetEventSlim _consumeDone = new(false);
+    private byte[]? _pendingBytes;             // calling thread → consumer task handoff slot
+    private object? _lastResult;               // captured during VerifyRoundTrip; null in benchmark iters
+    private bool _captureResult;
+    private bool _disposed;
+
+    public string Engine => Configuration.EngineAcBinary;
+    public string IoMode => Configuration.IoInMemoryRaw;
+    public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime;
+    public string OptionsPreset { get; }
+    public int SerializedSize => _serialized.Length;
+    public long SetupSerializeAllocBytes { get; }
+    public long SetupDeserializeAllocBytes { get; }
+    public bool IsRoundTripOnly => true;
+    public string OptionsDescription => BenchmarkOptions.BuildAcBinary(_options, $", BufferSize={_options.BufferWriterChunkSize}B, Transport=in-memory(raw,2-task)");
+
+    public AcBinaryInMemoryRawByteArrayBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset)
+    {
+        _order = order;
+        _options = options;
+        OptionsPreset = optionsPreset;
+
+        _serialized = AcBinarySerializer.Serialize(order, _options);
+
+        // === SERIALIZE-side setup measurement ===
+        // Nothing to set up — calling thread allocates byte[] per iter via AcBinarySerializer.Serialize.
+        SetupSerializeAllocBytes = 0;
+
+        // === DESERIALIZE-side setup measurement ===
+        // 1× background consumer-task + 2× MRES (request / done) + cancellation source.
+        GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
+        var beforeDes = GC.GetAllocatedBytesForCurrentThread();
+        _cts = new CancellationTokenSource();
+        _consumerTask = Task.Run(ConsumerLoop);
+        var afterDes = GC.GetAllocatedBytesForCurrentThread();
+        SetupDeserializeAllocBytes = afterDes - beforeDes;
+    }
+
+    // BG consumer: parks on _consumeRequest, picks up the byte[] from _pendingBytes, runs Deserialize<T>(bytes),
+    // signals _consumeDone. Direct in-process handoff — no transport syscall, no buffer copy beyond the byte[]
+    // reference itself (zero-copy by reference).
+    private void ConsumerLoop()
+    {
+        var ct = _cts.Token;
+        try
+        {
+            while (true)
+            {
+                _consumeRequest.Wait(ct);
+                if (ct.IsCancellationRequested) return;
+                _consumeRequest.Reset();
+
+                try
+                {
+                    var bytes = _pendingBytes;
+                    if (bytes != null)
+                    {
+                        var result = AcBinaryDeserializer.Deserialize<TestOrder>(bytes, _options);
+                        if (_captureResult) _lastResult = result;
+                    }
+                }
+                catch
+                {
+                    // Swallow — see ConsumerLoop in NamedPipe variant for rationale.
+                }
+                finally
+                {
+                    _consumeDone.Set();
+                }
+            }
+        }
+        catch (OperationCanceledException)
+        {
+            // Cooperative cancel — Dispose path. Swallow.
+        }
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    public void Serialize()
+    {
+        // 2-task in-memory pipeline:
+        // 1. Calling thread serialises → fresh byte[] (per-iter alloc, matches AcBinaryBenchmark contract).
+        // 2. Calling thread parks the byte[] into _pendingBytes and signals consumer task. Consumer task
+        //    picks up the reference (zero-copy) and runs Deserialize<T>(bytes).
+        // 3. Calling thread waits for _consumeDone (consumer task finished Des).
+        //
+        // Same architectural limitation as the NamedPipe-raw variant: Des cannot start until full bytes
+        // are available. Only the per-iter Ser↔Des thread-handoff overlaps slightly (calling thread starts
+        // signalling and waiting while consumer thread takes the byte[]).
+        var bytes = AcBinarySerializer.Serialize(_order, _options);
+
+        _pendingBytes = bytes;
+        _consumeDone.Reset();
+        _consumeRequest.Set();
+
+        _consumeDone.Wait();
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    public void Deserialize()
+    {
+        // No-op: per-iter round-trip is captured in Serialize(). See IsRoundTripOnly contract.
+    }
+
+    public bool VerifyRoundTrip()
+    {
+        _captureResult = true;
+        try
+        {
+            Serialize();
+            var result = _lastResult as TestOrder;
+            return result != null && BenchmarkLoop.DeepEqualsViaJson(_order, result);
+        }
+        finally
+        {
+            _captureResult = false;
+            _lastResult = null;
+        }
+    }
+
+    public void Dispose()
+    {
+        if (_disposed) return;
+        _disposed = true;
+
+        try { _cts.Cancel(); } catch { /* swallow on teardown */ }
+        try { _consumeRequest.Set(); } catch { /* nudge in case consumer Wait is parked */ }
+        try { _consumerTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
+
+        try { _consumeRequest.Dispose(); } catch { /* swallow on teardown */ }
+        try { _consumeDone.Dispose(); } catch { /* swallow on teardown */ }
+        try { _cts.Dispose(); } catch { /* swallow on teardown */ }
+    }
+}
diff --git a/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryNamedPipeBenchmark.cs b/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryNamedPipeBenchmark.cs
new file mode 100644
index 0000000..9065cfc
--- /dev/null
+++ b/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryNamedPipeBenchmark.cs
@@ -0,0 +1,237 @@
+using AyCode.Core.Serializers.Binaries;
+using AyCode.Core.Tests.Serialization;   // DrainFromAsync extension (test-only, used by benchmark)
+using AyCode.Core.Tests.TestModels;
+using System.IO.Pipelines;
+using System.IO.Pipes;
+using System.Runtime.CompilerServices;
+
+namespace AyCode.Core.Serializers.Console.Benchmarks;
+
+/// <summary>
+/// Benchmarks AcBinary over a long-lived NamedPipe IPC connection using the AcBinary native streaming API
+/// (<see cref="AcBinarySerializer.SerializeChunked{T}(T, System.IO.Pipelines.PipeWriter, AcBinarySerializerOptions)"/>
+/// + <see cref="AsyncPipeReaderInput"/> + <see cref="AsyncPipeReaderInputExtensions.DrainFromAsync"/>).
+/// Mirrors what a real consumer (e.g. <c>DeserializeFromPipeReaderAsync</c>) does per message:
+/// long-lived <see cref="AsyncPipeReaderInput"/> with multi-message wire framing on top of a long-lived NamedPipe.
+///
+/// <para><b>Architecture</b>:</para>
+/// <list type="bullet">
+///   <item>Constructor (NOT timed): sets up <see cref="NamedPipeServerStream"/> + <see cref="NamedPipeClientStream"/>,
+///     waits for connection, creates one long-lived <see cref="System.IO.Pipelines.PipeWriter"/> /
+///     <see cref="System.IO.Pipelines.PipeReader"/> pair, ONE long-lived <see cref="AsyncPipeReaderInput"/>
+///     in <c>multiMessage = true</c> mode, ONE drain Task that pumps <see cref="AsyncPipeReaderInputExtensions.DrainFromAsync"/>
+///     forever, and ONE deserialize Task that loops <c>AcBinaryDeserializer.Deserialize&lt;T&gt;(input, opts)</c>
+///     producing into a <see cref="System.Threading.Channels.Channel{T}"/>.</item>
+///   <item>Per-iteration <see cref="Serialize"/> (timed): sender writes via
+///     <see cref="AcBinarySerializer.SerializeChunkedFramed{T}(T, System.IO.Pipelines.PipeWriter, AcBinarySerializerOptions)"/>
+///     — multi-message wire (<c>[201][UINT16][data]...[202]</c>); the <c>[202]</c> end marker arms the input's
+///     <c>_readPos = -1</c> sentinel, so the next message's first <c>AppendToBuffer</c> recycles the buffer to 0.
+///     Then receiver awaits the channel for the deserialized result.</item>
+///   <item><see cref="Deserialize"/> is a no-op (full round-trip captured in <see cref="Serialize"/>);
+///     <see cref="IsRoundTripOnly"/>=true → Ser ms / SerAlloc oszlopok N/A, RT ms = full round-trip.</item>
+/// </list>
+///
+/// <para><b>Per-iter overhead</b>: 0 new <c>Task.Run</c>, 0 new <c>AsyncPipeReaderInput</c>, 0 new <c>CancellationTokenSource</c>.
+/// Pure cost = <c>SerializeChunkedFramed</c> (CPU + chunk-onkénti flush) + kernel write/read syscalls + 1 sync barrier
+/// (channel) + deserialized graph alloc. The "multi-message reuse" pattern enabled by Q4T8 fix (R5K2 minimum: <c>_readPos = -1</c>
+/// sentinel + <c>AppendToBuffer</c> sliding-window cycling).</para>
+///
+/// <para><b>Approximation note</b>: single-process loopback NamedPipe. Real cross-process / cross-machine SignalR
+/// adds further transport latency (TCP, WebSocket framing) on top. The benchmark gives a lower bound.</para>
+/// </summary>
+internal sealed class AcBinaryNamedPipeBenchmark : ISerializerBenchmark, IDisposable
+{
+    private readonly TestOrder _order;
+    private readonly AcBinarySerializerOptions _options;
+    private readonly byte[] _serialized; // for SerializedSize reporting only
+
+    // Long-lived pipe lifecycle (set up once in ctor — NOT timed).
+    private readonly NamedPipeServerStream _pipeServer;
+    private readonly NamedPipeClientStream _pipeClient;
+    private readonly PipeWriter _pipeWriter;
+    private readonly PipeReader _pipeReader;
+
+    // Long-lived multi-message receive infrastructure (set up once in ctor).
+    private readonly AsyncPipeReaderInput _input;
+    private readonly CancellationTokenSource _cts;
+    private readonly Task _drainTask;       // BG: PipeReader → input.Feed (continuous pump)
+    private readonly Task _consumerTask;    // BG: per-iter Deserialize<T>(input) loop, signaled by calling thread
+    private readonly ManualResetEventSlim _consumeRequest = new(false);
+    private readonly ManualResetEventSlim _consumeDone = new(false);
+    private object? _lastResult;            // captured during VerifyRoundTrip; null in benchmark iters
+    private bool _captureResult;            // toggle: when true, ConsumeLoop stores result; otherwise discards
+    private bool _disposed;
+
+    public string Engine => Configuration.EngineAcBinary;
+    public string IoMode => Configuration.IoNamedPipe;
+    public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime;
+    public string OptionsPreset { get; }
+    public int SerializedSize => _serialized.Length;
+    public long SetupSerializeAllocBytes { get; }
+    public long SetupDeserializeAllocBytes { get; }
+    public bool IsRoundTripOnly => true;
+    public string OptionsDescription => BenchmarkOptions.BuildAcBinary(_options, $", BufferSize={_options.BufferWriterChunkSize}B, Transport=NamedPipe(long-lived,multiMessage,2-task)");
+
+    public AcBinaryNamedPipeBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset)
+    {
+        _order = order;
+        // BufferWriterChunkSize comes from the caller (central source of truth in CreateSerializers
+        // — the binaryFastMode4KbChunk options instance). Do NOT mutate _options here; tune the chunk
+        // size in CreateSerializers only.
+        _options = options;
+        OptionsPreset = optionsPreset;
+
+        _serialized = AcBinarySerializer.Serialize(order, _options);
+
+        // 1× pipe setup. Kernel-side pipe buffer (inBufferSize / outBufferSize on the server ctor — the
+        // client inherits the server-defined buffer size at connect time) matches BufferWriterChunkSize
+        // exactly: AsyncPipeWriterOutput now treats chunkSize as the chunk-on-wire total size (header +
+        // data), so one WriteFile(chunkSize) syscall lands in exactly one kernel-page slot — page-aligned,
+        // no fragmentation, no IRP reordering. _options.BufferWriterChunkSize is the single tunable source.
+        var pipeName = $"AcBinaryBench-{Guid.NewGuid():N}";
+
+        // === SERIALIZE-side setup measurement ===
+        // pipe-pair (server + client) + connect handshake + writer-side PipeWriter wrapper.
+        GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
+        var beforeSer = GC.GetAllocatedBytesForCurrentThread();
+
+        _pipeServer = new NamedPipeServerStream(pipeName, PipeDirection.In, 1, PipeTransmissionMode.Byte,
+            System.IO.Pipes.PipeOptions.Asynchronous,
+            inBufferSize:  _options.BufferWriterChunkSize,
+            outBufferSize: _options.BufferWriterChunkSize);
+
+        _pipeClient = new NamedPipeClientStream(".", pipeName, PipeDirection.Out, System.IO.Pipes.PipeOptions.Asynchronous);
+
+        var serverWait = _pipeServer.WaitForConnectionAsync();
+        _pipeClient.Connect();
+        serverWait.GetAwaiter().GetResult();
+
+        _pipeWriter = PipeWriter.Create(_pipeClient);
+        var afterSer = GC.GetAllocatedBytesForCurrentThread();
+        SetupSerializeAllocBytes = afterSer - beforeSer;
+
+        // === DESERIALIZE-side setup measurement ===
+        // PipeReader wrapper + AsyncPipeReaderInput (ArrayPool rent + ManualResetEventSlim) + drain
+        // task + consumer task scaffolding. Two long-lived BG tasks total: drain pumps bytes from the
+        // kernel pipe into input; consumer drives Deserialize<T>(input) per iter on signal.
+        GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
+        var beforeDes = GC.GetAllocatedBytesForCurrentThread();
+
+        _pipeReader = PipeReader.Create(_pipeServer);
+        _input = new AsyncPipeReaderInput(_options.BufferWriterChunkSize * 2, multiMessage: true);
+        _cts = new CancellationTokenSource();
+
+        // Drain task: pumps PipeReader → input.Feed forever (or until cancel). Single Task.Run for
+        // the full benchmark lifetime — its overhead is amortised across all messages.
+        _drainTask = Task.Run(() => _input.DrainFromAsync(_pipeReader, _cts.Token));
+
+        // Consumer task: per-iter Deserialize<T>(input) loop. Started here once; signaled per-iter via
+        // _consumeRequest. Enables Ser↔Des streaming overlap — calling thread runs SerializeChunkedFramed
+        // while THIS task simultaneously runs Deserialize<T>, both consuming/producing through the
+        // sliding-window buffer pipelined by the drain task.
+        _consumerTask = Task.Run(ConsumeLoop);
+
+        var afterDes = GC.GetAllocatedBytesForCurrentThread();
+        SetupDeserializeAllocBytes = afterDes - beforeDes;
+    }
+
+    // BG consumer: parks on _consumeRequest, runs Deserialize<T>(_input) when signaled, signals _consumeDone.
+    // The Deserialize call internally blocks on the input's MRES whenever the drain hasn't yet fed enough
+    // bytes for the next read — that's where the streaming-pipeline overlap with the calling thread (Ser)
+    // happens.
+    private void ConsumeLoop()
+    {
+        var ct = _cts.Token;
+        try
+        {
+            while (true)
+            {
+                _consumeRequest.Wait(ct);
+                if (ct.IsCancellationRequested) return;
+                _consumeRequest.Reset();
+
+                try
+                {
+                    var result = AcBinaryDeserializer.Deserialize<TestOrder>(_input, _options);
+                    if (_captureResult) _lastResult = result;
+                }
+                catch
+                {
+                    // Swallow — calling thread sees the failure via missing/incorrect _lastResult during VerifyRoundTrip,
+                    // or the benchmark loop just continues (timing impacted). Production teardown handled in Dispose.
+                }
+                finally
+                {
+                    _consumeDone.Set();
+                }
+            }
+        }
+        catch (OperationCanceledException)
+        {
+            // Cooperative cancel — Dispose path. Swallow.
+        }
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    public void Serialize()
+    {
+        // 2-task streaming pipeline:
+        // 1. Calling thread signals consumer task to begin Deserialize<T>(input). Consumer immediately
+        //    starts; first read blocks on input's MRES because no bytes flowed yet.
+        // 2. Calling thread starts SerializeChunkedFramed → chunks flow through PipeWriter → kernel pipe →
+        //    drain task (BG) feeds input.Feed → MRES pulses → consumer's Deserialize<T> consumes bytes
+        //    chunk by chunk. Ser↔Des truly overlap here.
+        // 3. Calling thread waits for _consumeDone (signaling Deserialize<T> returned).
+        _consumeDone.Reset();
+        _consumeRequest.Set();
+
+        AcBinarySerializer.SerializeChunkedFramed(_order, _pipeWriter, _options);
+
+        _consumeDone.Wait();
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    public void Deserialize()
+    {
+        // No-op: per-iter round-trip is captured in Serialize(). See IsRoundTripOnly contract.
+    }
+
+    public bool VerifyRoundTrip()
+    {
+        // Use the same 2-task streaming path as the benchmark, but capture the result for graph-equality.
+        _captureResult = true;
+        try
+        {
+            Serialize();
+            var result = _lastResult as TestOrder;
+            return result != null && BenchmarkLoop.DeepEqualsViaJson(_order, result);
+        }
+        finally
+        {
+            _captureResult = false;
+            _lastResult = null;
+        }
+    }
+
+    public void Dispose()
+    {
+        if (_disposed) return;
+        _disposed = true;
+
+        // Cancel drain + consumer tasks → both exit. Pulse _consumeRequest in case consumer is parked.
+        try { _cts.Cancel(); } catch { /* swallow on teardown */ }
+        try { _consumeRequest.Set(); } catch { /* nudge in case consumer Wait is parked */ }
+        try { _drainTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
+        try { _consumerTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
+
+        // Complete writer + dispose pipe lifecycle.
+        try { _pipeWriter.CompleteAsync().AsTask().Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
+        try { _pipeReader.Complete(); } catch { /* swallow on teardown */ }
+        try { _pipeClient.Dispose(); } catch { /* swallow on teardown */ }
+        try { _pipeServer.Dispose(); } catch { /* swallow on teardown */ }
+        try { _input.Dispose(); } catch { /* swallow on teardown */ }
+        try { _consumeRequest.Dispose(); } catch { /* swallow on teardown */ }
+        try { _consumeDone.Dispose(); } catch { /* swallow on teardown */ }
+        try { _cts.Dispose(); } catch { /* swallow on teardown */ }
+    }
+}
diff --git a/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryNamedPipeRawByteArrayBenchmark.cs b/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryNamedPipeRawByteArrayBenchmark.cs
new file mode 100644
index 0000000..d6b49ec
--- /dev/null
+++ b/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryNamedPipeRawByteArrayBenchmark.cs
@@ -0,0 +1,213 @@
+using AyCode.Core.Serializers.Binaries;
+using AyCode.Core.Tests.TestModels;
+using System.IO.Pipes;
+using System.Runtime.CompilerServices;
+
+namespace AyCode.Core.Serializers.Console.Benchmarks;
+
+/// <summary>
+/// Raw <c>byte[]</c> over a long-lived NamedPipe — NO chunk-framing, NO <c>AsyncPipeReaderInput</c>,
+/// NO sliding-window buffer. Calling thread serialises + writes; a long-lived background consumer task
+/// reads and deserialises. Two-task pattern enables Ser↔Read overlap (kernel-pipe-pipelined) AND
+/// avoids the kernel-buffer-full deadlock when <c>bytes.Length &gt; inBufferSize</c>.
+///
+/// Side-by-side with <see cref="AcBinaryNamedPipeBenchmark"/> (chunked-framed AsyncPipe stack) this
+/// isolates two cost components on the SAME kernel-pipe transport with the SAME <c>inBufferSize</c>:
+/// <list type="bullet">
+///   <item><description><b>This row vs <see cref="AcBinaryBenchmark"/> (Byte[])</b> — pure kernel-NamedPipe
+///     overhead (WriteFile / ReadFile syscalls + IRP queueing + buffer-copy + thread-handoff).</description></item>
+///   <item><description><b>This row vs <see cref="AcBinaryNamedPipeBenchmark"/> (chunked-framed)</b> — pure
+///     AsyncPipe-framework overhead (chunk header writes + sliding-window <c>Feed</c> + MRES wait inside
+///     <c>AsyncPipeReaderInput</c>) AND the streaming-pipeline benefit of intra-message Ser↔Des overlap (which
+///     raw lacks — raw can only Ser↔Read overlap, with Des sequential after Read completes).</description></item>
+/// </list>
+/// Per-iter <c>byte[]</c> allocation from <c>AcBinarySerializer.Serialize</c> is part of the cost (matches
+/// <see cref="AcBinaryBenchmark"/>'s API contract); the receive-side scratch buffer is also allocated per-iter
+/// on the consumer-task (counted via <c>GC.GetTotalAllocatedBytes</c> in <c>BenchmarkLoop.MeasureAllocationTotal</c>).
+/// </summary>
+internal sealed class AcBinaryNamedPipeRawByteArrayBenchmark : ISerializerBenchmark, IDisposable
+{
+    private readonly TestOrder _order;
+    private readonly AcBinarySerializerOptions _options;
+    private readonly byte[] _serialized; // for SerializedSize reporting + receive-side size known upfront
+
+    // Long-lived pipe lifecycle (set up once in ctor — NOT timed).
+    private readonly NamedPipeServerStream _pipeServer;
+    private readonly NamedPipeClientStream _pipeClient;
+
+    // Long-lived consumer-task infrastructure (Read + Deserialize on BG thread, signaled per iter).
+    // Mirrors AcBinaryNamedPipeBenchmark's drain+consumer pair, but raw byte[] doesn't have an
+    // intermediate sliding-window buffer, so Read+Des happen sequentially in one BG task: Read N bytes
+    // → Deserialize<T>(bytes) → signal done. Calling thread's Ser↔Write overlaps with this BG Read+Des
+    // through kernel-pipe pipelining.
+    private readonly CancellationTokenSource _cts;
+    private readonly Task _consumerTask;
+    private readonly ManualResetEventSlim _consumeRequest = new(false);
+    private readonly ManualResetEventSlim _consumeDone = new(false);
+    private int _pendingReadSize;
+    private object? _lastResult;            // captured during VerifyRoundTrip; null in benchmark iters
+    private bool _captureResult;            // toggle: when true, ConsumerLoop stores result; otherwise discards
+    private bool _disposed;
+
+    public string Engine => Configuration.EngineAcBinary;
+    public string IoMode => Configuration.IoNamedPipeRaw;
+    public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime;
+    public string OptionsPreset { get; }
+    public int SerializedSize => _serialized.Length;
+    public long SetupSerializeAllocBytes { get; }
+    public long SetupDeserializeAllocBytes { get; }
+    public bool IsRoundTripOnly => true;
+    public string OptionsDescription => BenchmarkOptions.BuildAcBinary(_options, $", BufferSize={_options.BufferWriterChunkSize}B, Transport=NamedPipe(raw,2-task)");
+
+    public AcBinaryNamedPipeRawByteArrayBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset)
+    {
+        _order = order;
+        // BufferWriterChunkSize comes from the caller — same source-of-truth contract as
+        // AcBinaryNamedPipeBenchmark. The kernel pipe-buffer (inBufferSize) is wired to it so the
+        // raw-vs-chunked comparison runs on identical transport conditions.
+        _options = options;
+        OptionsPreset = optionsPreset;
+
+        _serialized = AcBinarySerializer.Serialize(order, _options);
+
+        var pipeName = $"AcBinaryBenchRaw-{Guid.NewGuid():N}";
+
+        // === SERIALIZE-side setup measurement ===
+        // pipe-pair (server + client) + connect handshake. NO PipeWriter wrapper — we use the raw
+        // Stream.Write API directly, matching the no-framing semantics of this benchmark.
+        GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
+        var beforeSer = GC.GetAllocatedBytesForCurrentThread();
+        _pipeServer = new NamedPipeServerStream(pipeName, PipeDirection.In, 1, PipeTransmissionMode.Byte,
+            System.IO.Pipes.PipeOptions.Asynchronous,
+            inBufferSize:  _options.BufferWriterChunkSize,
+            outBufferSize: _options.BufferWriterChunkSize);
+        _pipeClient = new NamedPipeClientStream(".", pipeName, PipeDirection.Out, System.IO.Pipes.PipeOptions.Asynchronous);
+
+        var serverWait = _pipeServer.WaitForConnectionAsync();
+        _pipeClient.Connect();
+        serverWait.GetAwaiter().GetResult();
+        var afterSer = GC.GetAllocatedBytesForCurrentThread();
+        SetupSerializeAllocBytes = afterSer - beforeSer;
+
+        // === DESERIALIZE-side setup measurement ===
+        // 1× background consumer-task + 2× MRES (request / done) + cancellation source. Matches the
+        // chunked benchmark's deserialize-side setup cost shape.
+        GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
+        var beforeDes = GC.GetAllocatedBytesForCurrentThread();
+        _cts = new CancellationTokenSource();
+        _consumerTask = Task.Run(ConsumerLoop);
+        var afterDes = GC.GetAllocatedBytesForCurrentThread();
+        SetupDeserializeAllocBytes = afterDes - beforeDes;
+    }
+
+    // BG consumer: parks on _consumeRequest, reads N bytes from pipe, runs Deserialize<T>(bytes), signals
+    // _consumeDone. The Read overlaps with the calling thread's Write through the kernel-pipe; Des happens
+    // sequentially after Read completes (raw byte[] needs the full message to deserialize).
+    private void ConsumerLoop()
+    {
+        var ct = _cts.Token;
+        try
+        {
+            while (true)
+            {
+                _consumeRequest.Wait(ct);
+                if (ct.IsCancellationRequested) return;
+                _consumeRequest.Reset();
+
+                try
+                {
+                    var size = _pendingReadSize;
+                    var bytes = new byte[size]; // per-iter alloc — counted by BenchmarkLoop.MeasureAllocationTotal
+                    var totalRead = 0;
+                    while (totalRead < size)
+                    {
+                        var n = _pipeServer.Read(bytes, totalRead, size - totalRead);
+                        if (n == 0) break; // pipe closed / EOF — partial read swallowed
+                        totalRead += n;
+                    }
+                    var result = AcBinaryDeserializer.Deserialize<TestOrder>(bytes, _options);
+                    if (_captureResult) _lastResult = result;
+                }
+                catch
+                {
+                    // Swallow — calling thread sees the failure via missing/incorrect _lastResult during VerifyRoundTrip,
+                    // or the benchmark loop just continues (timing impacted). Production teardown handled in Dispose.
+                }
+                finally
+                {
+                    _consumeDone.Set();
+                }
+            }
+        }
+        catch (OperationCanceledException)
+        {
+            // Cooperative cancel — Dispose path. Swallow.
+        }
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    public void Serialize()
+    {
+        // 2-task streaming pipeline:
+        // 1. Calling thread serialises → fresh byte[] (per-iter alloc, matches AcBinaryBenchmark contract).
+        // 2. Calling thread hands off expected size + signals consumer task. Consumer task starts Read loop
+        //    on the pipe (BG thread). Calling thread proceeds to Write the bytes — Read and Write overlap
+        //    through the kernel-pipe (kernel buffer fills, drains as consumer reads, sender resumes).
+        // 3. Calling thread waits for _consumeDone (consumer task finished Read+Des).
+        //
+        // Note: unlike chunked, raw byte[] cannot do Ser↔Des overlap (Des needs the full bytes before
+        // starting). Only Write↔Read overlaps here. The Des sequence on BG thread is: Read full bytes →
+        // Des the full graph → signal done. This is the architectural difference between raw and chunked.
+        var bytes = AcBinarySerializer.Serialize(_order, _options);
+
+        _pendingReadSize = bytes.Length;
+        _consumeDone.Reset();
+        _consumeRequest.Set();
+
+        _pipeClient.Write(bytes, 0, bytes.Length);
+        _pipeClient.Flush();
+
+        _consumeDone.Wait();
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    public void Deserialize()
+    {
+        // No-op: per-iter round-trip is captured in Serialize(). See IsRoundTripOnly contract.
+    }
+
+    public bool VerifyRoundTrip()
+    {
+        // Use the same 2-task streaming path as the benchmark, but capture the result for graph-equality.
+        _captureResult = true;
+        try
+        {
+            Serialize();
+            var result = _lastResult as TestOrder;
+            return result != null && BenchmarkLoop.DeepEqualsViaJson(_order, result);
+        }
+        finally
+        {
+            _captureResult = false;
+            _lastResult = null;
+        }
+    }
+
+    public void Dispose()
+    {
+        if (_disposed) return;
+        _disposed = true;
+
+        // Cancel the consumer task → ConsumerLoop exits its Wait via OperationCanceledException.
+        try { _cts.Cancel(); } catch { /* swallow on teardown */ }
+        try { _consumeRequest.Set(); } catch { /* nudge in case consumer Wait is parked */ }
+        try { _consumerTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
+
+        // Symmetric teardown — close client first (writer side), then server.
+        try { _pipeClient.Dispose(); } catch { /* swallow on teardown */ }
+        try { _pipeServer.Dispose(); } catch { /* swallow on teardown */ }
+        try { _consumeRequest.Dispose(); } catch { /* swallow on teardown */ }
+        try { _consumeDone.Dispose(); } catch { /* swallow on teardown */ }
+        try { _cts.Dispose(); } catch { /* swallow on teardown */ }
+    }
+}
diff --git a/AyCode.Core.Serializers.Console/Benchmarks/MemoryPackBufferWriterBenchmark.cs b/AyCode.Core.Serializers.Console/Benchmarks/MemoryPackBufferWriterBenchmark.cs
new file mode 100644
index 0000000..0320e6f
--- /dev/null
+++ b/AyCode.Core.Serializers.Console/Benchmarks/MemoryPackBufferWriterBenchmark.cs
@@ -0,0 +1,63 @@
+using AyCode.Core.Tests.TestModels;
+using MemoryPack;
+using System.Buffers;
+using System.Runtime.CompilerServices;
+
+namespace AyCode.Core.Serializers.Console.Benchmarks;
+
+/// <summary>
+/// Benchmarks MemoryPack via the IBufferWriter overload with a pre-allocated, reused ArrayBufferWriter.
+/// Apples-to-apples counterpart to <see cref="AcBinaryBufferWriterBenchmark"/> — MemoryPack's IBufferWriter
+/// is the path it's designed for.
+/// </summary>
+internal sealed class MemoryPackBufferWriterBenchmark : ISerializerBenchmark
+{
+    private readonly TestOrder _order;
+    private readonly MemoryPackSerializerOptions _options;
+    private readonly byte[] _serialized;
+    private readonly ArrayBufferWriter<byte> _bufferWriter;
+
+    public string Engine => Configuration.EngineMemoryPack;
+    public string IoMode => Configuration.IoBufWrReuse;
+    public string DispatchMode => Configuration.ModeSGen; // MemoryPack always uses [MemoryPackable] source-generated formatters
+    public string OptionsPreset { get; }
+    public int SerializedSize => _serialized.Length;
+    public long SetupSerializeAllocBytes { get; }
+    public long SetupDeserializeAllocBytes => 0;
+    public string? OptionsDescription => $"StringEncoding={_options.StringEncoding}";
+
+    public MemoryPackBufferWriterBenchmark(TestOrder order, string optionsPreset)
+    {
+        _order = order;
+        OptionsPreset = optionsPreset;
+        _options = BenchmarkOptions.GetMemPack();
+        _serialized = MemoryPackSerializer.Serialize(order, _options);
+
+        // Serialize-side setup only — see AcBinaryBufferWriterBenchmark for the full rationale.
+        GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
+        var beforeSetup = GC.GetAllocatedBytesForCurrentThread();
+        _bufferWriter = new ArrayBufferWriter<byte>(_serialized.Length * 2);
+        var afterSetup = GC.GetAllocatedBytesForCurrentThread();
+        SetupSerializeAllocBytes = afterSetup - beforeSetup;
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    public void Serialize()
+    {
+        _bufferWriter.ResetWrittenCount();
+        MemoryPackSerializer.Serialize(_bufferWriter, _order, _options);
+    }
+
+    // BufWr semantic: read from a ReadOnlySequence<byte> overload (apples-to-apples with AcBinary's
+    // BufWr Deser path). MemoryPack's ROS overload also single-segment-fast-paths internally.
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    public void Deserialize() => MemoryPackSerializer.Deserialize<TestOrder>(new ReadOnlySequence<byte>(_serialized), _options);
+
+    public bool VerifyRoundTrip()
+    {
+        _bufferWriter.ResetWrittenCount();
+        MemoryPackSerializer.Serialize(_bufferWriter, _order, _options);
+        var roundTripped = MemoryPackSerializer.Deserialize<TestOrder>(new ReadOnlySequence<byte>(_bufferWriter.WrittenMemory), _options);
+        return BenchmarkLoop.DeepEqualsViaJson(_order, roundTripped);
+    }
+}
diff --git a/AyCode.Core.Serializers.Console/Benchmarks/MemoryPackFreshBufferWriterBenchmark.cs b/AyCode.Core.Serializers.Console/Benchmarks/MemoryPackFreshBufferWriterBenchmark.cs
new file mode 100644
index 0000000..032f21a
--- /dev/null
+++ b/AyCode.Core.Serializers.Console/Benchmarks/MemoryPackFreshBufferWriterBenchmark.cs
@@ -0,0 +1,54 @@
+using AyCode.Core.Tests.TestModels;
+using MemoryPack;
+using System.Buffers;
+using System.Runtime.CompilerServices;
+
+namespace AyCode.Core.Serializers.Console.Benchmarks;
+
+/// <summary>
+/// Benchmarks MemoryPack via the IBufferWriter overload, allocating a FRESH ArrayBufferWriter on EVERY call.
+/// Apples-to-apples counterpart to <see cref="AcBinaryFreshBufferWriterBenchmark"/>.
+/// </summary>
+internal sealed class MemoryPackFreshBufferWriterBenchmark : ISerializerBenchmark
+{
+    private readonly TestOrder _order;
+    private readonly MemoryPackSerializerOptions _options;
+    private readonly byte[] _serialized;
+
+    public string Engine => Configuration.EngineMemoryPack;
+    public string IoMode => Configuration.IoBufWrNew;
+    public string DispatchMode => Configuration.ModeSGen; // MemoryPack always uses [MemoryPackable] source-generated formatters
+    public string OptionsPreset { get; }
+    public int SerializedSize => _serialized.Length;
+    public long SetupSerializeAllocBytes => 0;
+    public long SetupDeserializeAllocBytes => 0;
+    public string? OptionsDescription => $"StringEncoding={_options.StringEncoding}";
+
+    public MemoryPackFreshBufferWriterBenchmark(TestOrder order, string optionsPreset)
+    {
+        _order = order;
+        OptionsPreset = optionsPreset;
+        _options = BenchmarkOptions.GetMemPack();
+        _serialized = MemoryPackSerializer.Serialize(order, _options);
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    public void Serialize()
+    {
+        var abw = new ArrayBufferWriter<byte>();
+        MemoryPackSerializer.Serialize(abw, _order, _options);
+    }
+
+    // BufWr semantic: read from a ReadOnlySequence<byte> overload (apples-to-apples with AcBinary's
+    // BufWr Deser path). MemoryPack's ROS overload also single-segment-fast-paths internally.
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    public void Deserialize() => MemoryPackSerializer.Deserialize<TestOrder>(new ReadOnlySequence<byte>(_serialized), _options);
+
+    public bool VerifyRoundTrip()
+    {
+        var abw = new ArrayBufferWriter<byte>();
+        MemoryPackSerializer.Serialize(abw, _order, _options);
+        var roundTripped = MemoryPackSerializer.Deserialize<TestOrder>(new ReadOnlySequence<byte>(abw.WrittenMemory), _options);
+        return BenchmarkLoop.DeepEqualsViaJson(_order, roundTripped);
+    }
+}
diff --git a/AyCode.Core.Serializers.Console/Program.cs b/AyCode.Core.Serializers.Console/Program.cs
index 19501ed..77d86d6 100644
--- a/AyCode.Core.Serializers.Console/Program.cs
+++ b/AyCode.Core.Serializers.Console/Program.cs
@@ -576,1014 +576,7 @@ private static List<BenchmarkResult> RunBenchmarksForTestData(TestDataSet testDa
 
     #endregion
 
-    #region Serializer Implementations
-
-    /// <summary>
-    /// Benchmarks AcBinary via the IBufferWriter overload with a pre-allocated, reused ArrayBufferWriter.
-    /// Realistic IBufferWriter usage pattern: caller owns + reuses the writer (zero alloc per call after warmup).
-    /// </summary>
-    /// <summary>
-    /// Benchmarks AcBinary via the IBufferWriter overload, allocating a FRESH ArrayBufferWriter on EVERY call.
-    /// One-shot scenario — represents code that doesn't reuse a writer across calls.
-    /// Uses BufferWriterChunkSize=4096 (production-realistic, SignalR-aligned) instead of the 65535 default —
-    /// otherwise AcBinary would request 64KB upfront via GetSpan(), forcing the fresh ABW to allocate 64KB
-    /// regardless of payload size (heavy over-allocation for small payloads).
-    /// </summary>
-    internal sealed class AcBinaryFreshBufferWriterBenchmark : ISerializerBenchmark
-    {
-        private readonly TestOrder _order;
-        private readonly AcBinarySerializerOptions _options;
-        private readonly byte[] _serialized;
-
-        public string Engine => Configuration.EngineAcBinary;
-        public string IoMode => Configuration.IoBufWrNew;
-        public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime;
-        public string OptionsPreset { get; }
-        public int SerializedSize => _serialized.Length;
-        public long SetupSerializeAllocBytes => 0;
-        public long SetupDeserializeAllocBytes => 0;
-        public string OptionsDescription => BenchmarkOptions.BuildAcBinary(_options, $", BufferSize={_options.BufferWriterChunkSize}B");
-
-        public AcBinaryFreshBufferWriterBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset)
-        {
-            _order = order;
-            // BufferWriterChunkSize comes from the caller (central source of truth in CreateSerializers
-            // — the binaryFastMode4KbChunk options instance). Do NOT mutate _options here; tune the chunk
-            // size in CreateSerializers only.
-            _options = options;
-            OptionsPreset = optionsPreset;
-            _serialized = AcBinarySerializer.Serialize(order, _options);
-        }
-
-        [MethodImpl(MethodImplOptions.NoInlining)]
-        public void Serialize()
-        {
-            var abw = new ArrayBufferWriter<byte>();  // FRESH every call — alloc + grow as needed
-            AcBinarySerializer.Serialize(_order, abw, _options);
-        }
-
-        // BufWr semantic: read from a ReadOnlySequence<byte> (the ROS overload), NOT from byte[] —
-        // single-segment array-backed sequence triggers the fast-path in AcBinaryDeserializer.cs:298 which
-        // redirects to the byte[] overload. This means the bench actually exercises the ROS-input path
-        // (the production-realistic surface for SignalR / Pipe consumers) rather than secretly testing
-        // byte[] Deser under the BufWr label.
-        [MethodImpl(MethodImplOptions.NoInlining)]
-        public void Deserialize() => AcBinaryDeserializer.Deserialize<TestOrder>(new ReadOnlySequence<byte>(_serialized), _options);
-
-        public bool VerifyRoundTrip()
-        {
-            var abw = new ArrayBufferWriter<byte>();
-            AcBinarySerializer.Serialize(_order, abw, _options);
-            var roundTripped = AcBinaryDeserializer.Deserialize<TestOrder>(new ReadOnlySequence<byte>(abw.WrittenMemory), _options);
-            return BenchmarkLoop.DeepEqualsViaJson(_order, roundTripped);
-        }
-    }
-
-    /// <summary>
-    /// Benchmarks AcBinary over a long-lived NamedPipe IPC connection using the AcBinary native streaming API
-    /// (<see cref="AcBinarySerializer.SerializeChunked{T}(T, System.IO.Pipelines.PipeWriter, AcBinarySerializerOptions)"/>
-    /// + <see cref="AsyncPipeReaderInput"/> + <see cref="AsyncPipeReaderInputExtensions.DrainFromAsync"/>).
-    /// Mirrors what a real consumer (e.g. <c>DeserializeFromPipeReaderAsync</c>) does per message:
-    /// long-lived <see cref="AsyncPipeReaderInput"/> with multi-message wire framing on top of a long-lived NamedPipe.
-    ///
-    /// <para><b>Architecture</b>:</para>
-    /// <list type="bullet">
-    ///   <item>Constructor (NOT timed): sets up <see cref="NamedPipeServerStream"/> + <see cref="NamedPipeClientStream"/>,
-    ///     waits for connection, creates one long-lived <see cref="System.IO.Pipelines.PipeWriter"/> /
-    ///     <see cref="System.IO.Pipelines.PipeReader"/> pair, ONE long-lived <see cref="AsyncPipeReaderInput"/>
-    ///     in <c>multiMessage = true</c> mode, ONE drain Task that pumps <see cref="AsyncPipeReaderInputExtensions.DrainFromAsync"/>
-    ///     forever, and ONE deserialize Task that loops <c>AcBinaryDeserializer.Deserialize&lt;T&gt;(input, opts)</c>
-    ///     producing into a <see cref="System.Threading.Channels.Channel{T}"/>.</item>
-    ///   <item>Per-iteration <see cref="Serialize"/> (timed): sender writes via
-    ///     <see cref="AcBinarySerializer.SerializeChunkedFramed{T}(T, System.IO.Pipelines.PipeWriter, AcBinarySerializerOptions)"/>
-    ///     — multi-message wire (<c>[201][UINT16][data]...[202]</c>); the <c>[202]</c> end marker arms the input's
-    ///     <c>_readPos = -1</c> sentinel, so the next message's first <c>AppendToBuffer</c> recycles the buffer to 0.
-    ///     Then receiver awaits the channel for the deserialized result.</item>
-    ///   <item><see cref="Deserialize"/> is a no-op (full round-trip captured in <see cref="Serialize"/>);
-    ///     <see cref="IsRoundTripOnly"/>=true → Ser ms / SerAlloc oszlopok N/A, RT ms = full round-trip.</item>
-    /// </list>
-    ///
-    /// <para><b>Per-iter overhead</b>: 0 new <c>Task.Run</c>, 0 new <c>AsyncPipeReaderInput</c>, 0 new <c>CancellationTokenSource</c>.
-    /// Pure cost = <c>SerializeChunkedFramed</c> (CPU + chunk-onkénti flush) + kernel write/read syscalls + 1 sync barrier
-    /// (channel) + deserialized graph alloc. The "multi-message reuse" pattern enabled by Q4T8 fix (R5K2 minimum: <c>_readPos = -1</c>
-    /// sentinel + <c>AppendToBuffer</c> sliding-window cycling).</para>
-    ///
-    /// <para><b>Approximation note</b>: single-process loopback NamedPipe. Real cross-process / cross-machine SignalR
-    /// adds further transport latency (TCP, WebSocket framing) on top. The benchmark gives a lower bound.</para>
-    /// </summary>
-    internal sealed class AcBinaryNamedPipeBenchmark : ISerializerBenchmark, IDisposable
-    {
-        private readonly TestOrder _order;
-        private readonly AcBinarySerializerOptions _options;
-        private readonly byte[] _serialized; // for SerializedSize reporting only
-
-        // Long-lived pipe lifecycle (set up once in ctor — NOT timed).
-        private readonly NamedPipeServerStream _pipeServer;
-        private readonly NamedPipeClientStream _pipeClient;
-        private readonly PipeWriter _pipeWriter;
-        private readonly PipeReader _pipeReader;
-
-        // Long-lived multi-message receive infrastructure (set up once in ctor).
-        private readonly AsyncPipeReaderInput _input;
-        private readonly CancellationTokenSource _cts;
-        private readonly Task _drainTask;       // BG: PipeReader → input.Feed (continuous pump)
-        private readonly Task _consumerTask;    // BG: per-iter Deserialize<T>(input) loop, signaled by calling thread
-        private readonly ManualResetEventSlim _consumeRequest = new(false);
-        private readonly ManualResetEventSlim _consumeDone = new(false);
-        private object? _lastResult;            // captured during VerifyRoundTrip; null in benchmark iters
-        private bool _captureResult;            // toggle: when true, ConsumeLoop stores result; otherwise discards
-        private bool _disposed;
-
-        public string Engine => Configuration.EngineAcBinary;
-        public string IoMode => Configuration.IoNamedPipe;
-        public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime;
-        public string OptionsPreset { get; }
-        public int SerializedSize => _serialized.Length;
-        public long SetupSerializeAllocBytes { get; }
-        public long SetupDeserializeAllocBytes { get; }
-        public bool IsRoundTripOnly => true;
-        public string OptionsDescription => BenchmarkOptions.BuildAcBinary(_options, $", BufferSize={_options.BufferWriterChunkSize}B, Transport=NamedPipe(long-lived,multiMessage,2-task)");
-
-        public AcBinaryNamedPipeBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset)
-        {
-            _order = order;
-            // BufferWriterChunkSize comes from the caller (central source of truth in CreateSerializers
-            // — the binaryFastMode4KbChunk options instance). Do NOT mutate _options here; tune the chunk
-            // size in CreateSerializers only.
-            _options = options;
-            OptionsPreset = optionsPreset;
-
-            _serialized = AcBinarySerializer.Serialize(order, _options);
-
-            // 1× pipe setup. Kernel-side pipe buffer (inBufferSize / outBufferSize on the server ctor — the
-            // client inherits the server-defined buffer size at connect time) matches BufferWriterChunkSize
-            // exactly: AsyncPipeWriterOutput now treats chunkSize as the chunk-on-wire total size (header +
-            // data), so one WriteFile(chunkSize) syscall lands in exactly one kernel-page slot — page-aligned,
-            // no fragmentation, no IRP reordering. _options.BufferWriterChunkSize is the single tunable source.
-            var pipeName = $"AcBinaryBench-{Guid.NewGuid():N}";
-
-            // === SERIALIZE-side setup measurement ===
-            // pipe-pair (server + client) + connect handshake + writer-side PipeWriter wrapper.
-            GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
-            var beforeSer = GC.GetAllocatedBytesForCurrentThread();
-
-            _pipeServer = new NamedPipeServerStream(pipeName, PipeDirection.In, 1, PipeTransmissionMode.Byte,
-                System.IO.Pipes.PipeOptions.Asynchronous,
-                inBufferSize:  _options.BufferWriterChunkSize,
-                outBufferSize: _options.BufferWriterChunkSize);
-
-            _pipeClient = new NamedPipeClientStream(".", pipeName, PipeDirection.Out, System.IO.Pipes.PipeOptions.Asynchronous);
-
-            var serverWait = _pipeServer.WaitForConnectionAsync();
-            _pipeClient.Connect();
-            serverWait.GetAwaiter().GetResult();
-
-            _pipeWriter = PipeWriter.Create(_pipeClient);
-            var afterSer = GC.GetAllocatedBytesForCurrentThread();
-            SetupSerializeAllocBytes = afterSer - beforeSer;
-
-            // === DESERIALIZE-side setup measurement ===
-            // PipeReader wrapper + AsyncPipeReaderInput (ArrayPool rent + ManualResetEventSlim) + drain
-            // task + consumer task scaffolding. Two long-lived BG tasks total: drain pumps bytes from the
-            // kernel pipe into input; consumer drives Deserialize<T>(input) per iter on signal.
-            GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
-            var beforeDes = GC.GetAllocatedBytesForCurrentThread();
-
-            _pipeReader = PipeReader.Create(_pipeServer);
-            _input = new AsyncPipeReaderInput(_options.BufferWriterChunkSize * 2, multiMessage: true);
-            _cts = new CancellationTokenSource();
-
-            // Drain task: pumps PipeReader → input.Feed forever (or until cancel). Single Task.Run for
-            // the full benchmark lifetime — its overhead is amortised across all messages.
-            _drainTask = Task.Run(() => _input.DrainFromAsync(_pipeReader, _cts.Token));
-
-            // Consumer task: per-iter Deserialize<T>(input) loop. Started here once; signaled per-iter via
-            // _consumeRequest. Enables Ser↔Des streaming overlap — calling thread runs SerializeChunkedFramed
-            // while THIS task simultaneously runs Deserialize<T>, both consuming/producing through the
-            // sliding-window buffer pipelined by the drain task.
-            _consumerTask = Task.Run(ConsumeLoop);
-
-            var afterDes = GC.GetAllocatedBytesForCurrentThread();
-            SetupDeserializeAllocBytes = afterDes - beforeDes;
-        }
-
-        // BG consumer: parks on _consumeRequest, runs Deserialize<T>(_input) when signaled, signals _consumeDone.
-        // The Deserialize call internally blocks on the input's MRES whenever the drain hasn't yet fed enough
-        // bytes for the next read — that's where the streaming-pipeline overlap with the calling thread (Ser)
-        // happens.
-        private void ConsumeLoop()
-        {
-            var ct = _cts.Token;
-            try
-            {
-                while (true)
-                {
-                    _consumeRequest.Wait(ct);
-                    if (ct.IsCancellationRequested) return;
-                    _consumeRequest.Reset();
-
-                    try
-                    {
-                        var result = AcBinaryDeserializer.Deserialize<TestOrder>(_input, _options);
-                        if (_captureResult) _lastResult = result;
-                    }
-                    catch
-                    {
-                        // Swallow — calling thread sees the failure via missing/incorrect _lastResult during VerifyRoundTrip,
-                        // or the benchmark loop just continues (timing impacted). Production teardown handled in Dispose.
-                    }
-                    finally
-                    {
-                        _consumeDone.Set();
-                    }
-                }
-            }
-            catch (OperationCanceledException)
-            {
-                // Cooperative cancel — Dispose path. Swallow.
-            }
-        }
-
-        [MethodImpl(MethodImplOptions.NoInlining)]
-        public void Serialize()
-        {
-            // 2-task streaming pipeline:
-            // 1. Calling thread signals consumer task to begin Deserialize<T>(input). Consumer immediately
-            //    starts; first read blocks on input's MRES because no bytes flowed yet.
-            // 2. Calling thread starts SerializeChunkedFramed → chunks flow through PipeWriter → kernel pipe →
-            //    drain task (BG) feeds input.Feed → MRES pulses → consumer's Deserialize<T> consumes bytes
-            //    chunk by chunk. Ser↔Des truly overlap here.
-            // 3. Calling thread waits for _consumeDone (signaling Deserialize<T> returned).
-            _consumeDone.Reset();
-            _consumeRequest.Set();
-
-            AcBinarySerializer.SerializeChunkedFramed(_order, _pipeWriter, _options);
-
-            _consumeDone.Wait();
-        }
-
-        [MethodImpl(MethodImplOptions.NoInlining)]
-        public void Deserialize()
-        {
-            // No-op: per-iter round-trip is captured in Serialize(). See IsRoundTripOnly contract.
-        }
-
-        public bool VerifyRoundTrip()
-        {
-            // Use the same 2-task streaming path as the benchmark, but capture the result for graph-equality.
-            _captureResult = true;
-            try
-            {
-                Serialize();
-                var result = _lastResult as TestOrder;
-                return result != null && BenchmarkLoop.DeepEqualsViaJson(_order, result);
-            }
-            finally
-            {
-                _captureResult = false;
-                _lastResult = null;
-            }
-        }
-
-        public void Dispose()
-        {
-            if (_disposed) return;
-            _disposed = true;
-
-            // Cancel drain + consumer tasks → both exit. Pulse _consumeRequest in case consumer is parked.
-            try { _cts.Cancel(); } catch { /* swallow on teardown */ }
-            try { _consumeRequest.Set(); } catch { /* nudge in case consumer Wait is parked */ }
-            try { _drainTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
-            try { _consumerTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
-
-            // Complete writer + dispose pipe lifecycle.
-            try { _pipeWriter.CompleteAsync().AsTask().Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
-            try { _pipeReader.Complete(); } catch { /* swallow on teardown */ }
-            try { _pipeClient.Dispose(); } catch { /* swallow on teardown */ }
-            try { _pipeServer.Dispose(); } catch { /* swallow on teardown */ }
-            try { _input.Dispose(); } catch { /* swallow on teardown */ }
-            try { _consumeRequest.Dispose(); } catch { /* swallow on teardown */ }
-            try { _consumeDone.Dispose(); } catch { /* swallow on teardown */ }
-            try { _cts.Dispose(); } catch { /* swallow on teardown */ }
-        }
-    }
-
-    /// <summary>
-    /// Same chunked-framed AsyncPipe code path as <see cref="AcBinaryNamedPipeBenchmark"/>, but the transport
-    /// is an in-memory <see cref="System.IO.Pipelines.Pipe"/> instead of a kernel <c>NamedPipe</c>. The Pipe's
-    /// <c>Writer</c>/<c>Reader</c> pair is a managed-only zero-copy slab handoff — no syscalls, no kernel
-    /// buffer copy, no IRP queueing.
-    ///
-    /// <para><b>Why this benchmark matters</b>: by holding ALL other variables constant (same SerializeChunkedFramed,
-    /// same AsyncPipeReaderInput, same drain task, same consumer task, same multi-message wire format), this
-    /// row isolates the <b>kernel-NamedPipe transport overhead</b> from the chunked-streaming framework's pure
-    /// CPU cost. The expected delta vs <see cref="AcBinaryNamedPipeBenchmark"/>: per-chunk overhead drops from
-    /// ~25-30 µs (kernel-syscall pair + IRP) to ~1-2 µs (managed slab handoff). Multi-chunk Large-message rows
-    /// should converge dramatically toward <see cref="AcBinaryNamedPipeRawByteArrayBenchmark"/>.</para>
-    ///
-    /// <para><b>Real-world relevance</b>: in-memory Pipe is the typical primitive used for cross-thread serializer
-    /// pipelines inside a single process (e.g. SignalR's Kestrel transport adapter, gRPC framework internals,
-    /// custom message brokers). The numbers from this row reflect that scenario, NOT the kernel-pipe loopback
-    /// of the NamedPipe benchmark.</para>
-    /// </summary>
-    internal sealed class AcBinaryInMemoryPipeBenchmark : ISerializerBenchmark, IDisposable
-    {
-        private readonly TestOrder _order;
-        private readonly AcBinarySerializerOptions _options;
-        private readonly byte[] _serialized; // for SerializedSize reporting only
-
-        // Long-lived in-memory pipe lifecycle (set up once in ctor — NOT timed).
-        private readonly Pipe _pipe;
-        private readonly PipeWriter _pipeWriter;
-        private readonly PipeReader _pipeReader;
-
-        // Long-lived multi-message receive infrastructure (set up once in ctor) — same pattern as the NamedPipe
-        // variant: drain pumps reader into AsyncPipeReaderInput, consumer task drives Deserialize<T>(input).
-        private readonly AsyncPipeReaderInput _input;
-        private readonly CancellationTokenSource _cts;
-        private readonly Task _drainTask;
-        private readonly Task _consumerTask;
-        private readonly ManualResetEventSlim _consumeRequest = new(false);
-        private readonly ManualResetEventSlim _consumeDone = new(false);
-        private object? _lastResult;
-        private bool _captureResult;
-        private bool _disposed;
-
-        public string Engine => Configuration.EngineAcBinary;
-        public string IoMode => Configuration.IoInMemoryPipe;
-        public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime;
-        public string OptionsPreset { get; }
-        public int SerializedSize => _serialized.Length;
-        public long SetupSerializeAllocBytes { get; }
-        public long SetupDeserializeAllocBytes { get; }
-        public bool IsRoundTripOnly => true;
-        public string OptionsDescription => BenchmarkOptions.BuildAcBinary(_options, $", BufferSize={_options.BufferWriterChunkSize}B, Transport=Pipe(in-memory,multiMessage,2-task)");
-
-        public AcBinaryInMemoryPipeBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset)
-        {
-            _order = order;
-            _options = options;
-            OptionsPreset = optionsPreset;
-
-            _serialized = AcBinarySerializer.Serialize(order, _options);
-
-            // === SERIALIZE-side setup measurement ===
-            // In-memory Pipe construction. NO kernel-pipe pair, NO Connect handshake — just a managed Pipe object
-            // and a reference to its Writer side. PipeWriterImpl (parallel-flush capable, NOT StreamPipeWriter).
-            GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
-            var beforeSer = GC.GetAllocatedBytesForCurrentThread();
-            _pipe = new Pipe();
-            _pipeWriter = _pipe.Writer;
-            var afterSer = GC.GetAllocatedBytesForCurrentThread();
-            SetupSerializeAllocBytes = afterSer - beforeSer;
-
-            // === DESERIALIZE-side setup measurement ===
-            // PipeReader reference + AsyncPipeReaderInput (ArrayPool rent + ManualResetEventSlim) + drain task +
-            // consumer task scaffolding. Identical to the NamedPipe variant on the receive side.
-            GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
-            var beforeDes = GC.GetAllocatedBytesForCurrentThread();
-
-            _pipeReader = _pipe.Reader;
-            _input = new AsyncPipeReaderInput(_options.BufferWriterChunkSize * 2, multiMessage: true);
-            _cts = new CancellationTokenSource();
-            _drainTask = Task.Run(() => _input.DrainFromAsync(_pipeReader, _cts.Token));
-            _consumerTask = Task.Run(ConsumeLoop);
-
-            var afterDes = GC.GetAllocatedBytesForCurrentThread();
-            SetupDeserializeAllocBytes = afterDes - beforeDes;
-        }
-
-        // BG consumer: parks on _consumeRequest, runs Deserialize<T>(_input) when signaled, signals _consumeDone.
-        // Mirror of AcBinaryNamedPipeBenchmark.ConsumeLoop — same pattern, same MRES protocol.
-        private void ConsumeLoop()
-        {
-            var ct = _cts.Token;
-            try
-            {
-                while (true)
-                {
-                    _consumeRequest.Wait(ct);
-                    if (ct.IsCancellationRequested) return;
-                    _consumeRequest.Reset();
-
-                    try
-                    {
-                        var result = AcBinaryDeserializer.Deserialize<TestOrder>(_input, _options);
-                        if (_captureResult) _lastResult = result;
-                    }
-                    catch
-                    {
-                        // Swallow — see ConsumeLoop in NamedPipe variant for rationale.
-                    }
-                    finally
-                    {
-                        _consumeDone.Set();
-                    }
-                }
-            }
-            catch (OperationCanceledException)
-            {
-                // Cooperative cancel — Dispose path. Swallow.
-            }
-        }
-
-        [MethodImpl(MethodImplOptions.NoInlining)]
-        public void Serialize()
-        {
-            // Same 2-task streaming pipeline as NamedPipe variant — only the transport differs (in-memory Pipe
-            // instead of kernel NamedPipe). Per-chunk SerializeChunkedFramed → PipeWriter slab → drain task
-            // reads from PipeReader → input.Feed → consumer Deserialize<T> consumes byte-by-byte.
-            //
-            // Uses the Pipe-overload (instead of the PipeWriter-overload) so the FlushPolicy parameter is
-            // exposed for tuning. Toggle between FlushPolicy.PerChunk (bounded peak memory, per-chunk await
-            // FlushAsync) and FlushPolicy.Coalesced (fire-and-forget per chunk, pipe-coalesced flushes up to
-            // PauseWriterThreshold ~64 KB) to A/B-test the streaming-pipeline overhead. FlushPolicy.PerChunk
-            // is functionally equivalent to the PipeWriter-overload (both internally route to
-            // SerializeToPipeWriterCore with FlushPolicy.PerChunk).
-            _consumeDone.Reset();
-            _consumeRequest.Set();
-
-            AcBinarySerializer.SerializeChunkedFramed(_order, _pipe, _options, FlushPolicy.Coalesced);
-
-            _consumeDone.Wait();
-        }
-
-        [MethodImpl(MethodImplOptions.NoInlining)]
-        public void Deserialize()
-        {
-            // No-op: per-iter round-trip is captured in Serialize(). See IsRoundTripOnly contract.
-        }
-
-        public bool VerifyRoundTrip()
-        {
-            _captureResult = true;
-            try
-            {
-                Serialize();
-                var result = _lastResult as TestOrder;
-                return result != null && BenchmarkLoop.DeepEqualsViaJson(_order, result);
-            }
-            finally
-            {
-                _captureResult = false;
-                _lastResult = null;
-            }
-        }
-
-        public void Dispose()
-        {
-            if (_disposed) return;
-            _disposed = true;
-
-            // Cancel drain + consumer tasks → both exit. Pulse _consumeRequest in case consumer is parked.
-            try { _cts.Cancel(); } catch { /* swallow on teardown */ }
-            try { _consumeRequest.Set(); } catch { /* nudge in case consumer Wait is parked */ }
-            try { _drainTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
-            try { _consumerTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
-
-            // Complete writer + reader (in-memory Pipe — no underlying stream to dispose).
-            try { _pipeWriter.CompleteAsync().AsTask().Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
-            try { _pipeReader.Complete(); } catch { /* swallow on teardown */ }
-            try { _input.Dispose(); } catch { /* swallow on teardown */ }
-            try { _consumeRequest.Dispose(); } catch { /* swallow on teardown */ }
-            try { _consumeDone.Dispose(); } catch { /* swallow on teardown */ }
-            try { _cts.Dispose(); } catch { /* swallow on teardown */ }
-        }
-    }
-
-    /// <summary>
-    /// Raw <c>byte[]</c> over a long-lived NamedPipe — NO chunk-framing, NO <c>AsyncPipeReaderInput</c>,
-    /// NO sliding-window buffer. Calling thread serialises + writes; a long-lived background consumer task
-    /// reads and deserialises. Two-task pattern enables Ser↔Read overlap (kernel-pipe-pipelined) AND
-    /// avoids the kernel-buffer-full deadlock when <c>bytes.Length &gt; inBufferSize</c>.
-    ///
-    /// Side-by-side with <see cref="AcBinaryNamedPipeBenchmark"/> (chunked-framed AsyncPipe stack) this
-    /// isolates two cost components on the SAME kernel-pipe transport with the SAME <c>inBufferSize</c>:
-    /// <list type="bullet">
-    ///   <item><description><b>This row vs <see cref="AcBinaryBenchmark"/> (Byte[])</b> — pure kernel-NamedPipe
-    ///     overhead (WriteFile / ReadFile syscalls + IRP queueing + buffer-copy + thread-handoff).</description></item>
-    ///   <item><description><b>This row vs <see cref="AcBinaryNamedPipeBenchmark"/> (chunked-framed)</b> — pure
-    ///     AsyncPipe-framework overhead (chunk header writes + sliding-window <c>Feed</c> + MRES wait inside
-    ///     <c>AsyncPipeReaderInput</c>) AND the streaming-pipeline benefit of intra-message Ser↔Des overlap (which
-    ///     raw lacks — raw can only Ser↔Read overlap, with Des sequential after Read completes).</description></item>
-    /// </list>
-    /// Per-iter <c>byte[]</c> allocation from <c>AcBinarySerializer.Serialize</c> is part of the cost (matches
-    /// <see cref="AcBinaryBenchmark"/>'s API contract); the receive-side scratch buffer is also allocated per-iter
-    /// on the consumer-task (counted via <c>GC.GetTotalAllocatedBytes</c> in <c>BenchmarkLoop.MeasureAllocationTotal</c>).
-    /// </summary>
-    internal sealed class AcBinaryNamedPipeRawByteArrayBenchmark : ISerializerBenchmark, IDisposable
-    {
-        private readonly TestOrder _order;
-        private readonly AcBinarySerializerOptions _options;
-        private readonly byte[] _serialized; // for SerializedSize reporting + receive-side size known upfront
-
-        // Long-lived pipe lifecycle (set up once in ctor — NOT timed).
-        private readonly NamedPipeServerStream _pipeServer;
-        private readonly NamedPipeClientStream _pipeClient;
-
-        // Long-lived consumer-task infrastructure (Read + Deserialize on BG thread, signaled per iter).
-        // Mirrors AcBinaryNamedPipeBenchmark's drain+consumer pair, but raw byte[] doesn't have an
-        // intermediate sliding-window buffer, so Read+Des happen sequentially in one BG task: Read N bytes
-        // → Deserialize<T>(bytes) → signal done. Calling thread's Ser↔Write overlaps with this BG Read+Des
-        // through kernel-pipe pipelining.
-        private readonly CancellationTokenSource _cts;
-        private readonly Task _consumerTask;
-        private readonly ManualResetEventSlim _consumeRequest = new(false);
-        private readonly ManualResetEventSlim _consumeDone = new(false);
-        private int _pendingReadSize;
-        private object? _lastResult;            // captured during VerifyRoundTrip; null in benchmark iters
-        private bool _captureResult;            // toggle: when true, ConsumerLoop stores result; otherwise discards
-        private bool _disposed;
-
-        public string Engine => Configuration.EngineAcBinary;
-        public string IoMode => Configuration.IoNamedPipeRaw;
-        public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime;
-        public string OptionsPreset { get; }
-        public int SerializedSize => _serialized.Length;
-        public long SetupSerializeAllocBytes { get; }
-        public long SetupDeserializeAllocBytes { get; }
-        public bool IsRoundTripOnly => true;
-        public string OptionsDescription => BenchmarkOptions.BuildAcBinary(_options, $", BufferSize={_options.BufferWriterChunkSize}B, Transport=NamedPipe(raw,2-task)");
-
-        public AcBinaryNamedPipeRawByteArrayBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset)
-        {
-            _order = order;
-            // BufferWriterChunkSize comes from the caller — same source-of-truth contract as
-            // AcBinaryNamedPipeBenchmark. The kernel pipe-buffer (inBufferSize) is wired to it so the
-            // raw-vs-chunked comparison runs on identical transport conditions.
-            _options = options;
-            OptionsPreset = optionsPreset;
-
-            _serialized = AcBinarySerializer.Serialize(order, _options);
-
-            var pipeName = $"AcBinaryBenchRaw-{Guid.NewGuid():N}";
-
-            // === SERIALIZE-side setup measurement ===
-            // pipe-pair (server + client) + connect handshake. NO PipeWriter wrapper — we use the raw
-            // Stream.Write API directly, matching the no-framing semantics of this benchmark.
-            GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
-            var beforeSer = GC.GetAllocatedBytesForCurrentThread();
-            _pipeServer = new NamedPipeServerStream(pipeName, PipeDirection.In, 1, PipeTransmissionMode.Byte,
-                System.IO.Pipes.PipeOptions.Asynchronous,
-                inBufferSize:  _options.BufferWriterChunkSize,
-                outBufferSize: _options.BufferWriterChunkSize);
-            _pipeClient = new NamedPipeClientStream(".", pipeName, PipeDirection.Out, System.IO.Pipes.PipeOptions.Asynchronous);
-
-            var serverWait = _pipeServer.WaitForConnectionAsync();
-            _pipeClient.Connect();
-            serverWait.GetAwaiter().GetResult();
-            var afterSer = GC.GetAllocatedBytesForCurrentThread();
-            SetupSerializeAllocBytes = afterSer - beforeSer;
-
-            // === DESERIALIZE-side setup measurement ===
-            // 1× background consumer-task + 2× MRES (request / done) + cancellation source. Matches the
-            // chunked benchmark's deserialize-side setup cost shape.
-            GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
-            var beforeDes = GC.GetAllocatedBytesForCurrentThread();
-            _cts = new CancellationTokenSource();
-            _consumerTask = Task.Run(ConsumerLoop);
-            var afterDes = GC.GetAllocatedBytesForCurrentThread();
-            SetupDeserializeAllocBytes = afterDes - beforeDes;
-        }
-
-        // BG consumer: parks on _consumeRequest, reads N bytes from pipe, runs Deserialize<T>(bytes), signals
-        // _consumeDone. The Read overlaps with the calling thread's Write through the kernel-pipe; Des happens
-        // sequentially after Read completes (raw byte[] needs the full message to deserialize).
-        private void ConsumerLoop()
-        {
-            var ct = _cts.Token;
-            try
-            {
-                while (true)
-                {
-                    _consumeRequest.Wait(ct);
-                    if (ct.IsCancellationRequested) return;
-                    _consumeRequest.Reset();
-
-                    try
-                    {
-                        var size = _pendingReadSize;
-                        var bytes = new byte[size]; // per-iter alloc — counted by BenchmarkLoop.MeasureAllocationTotal
-                        var totalRead = 0;
-                        while (totalRead < size)
-                        {
-                            var n = _pipeServer.Read(bytes, totalRead, size - totalRead);
-                            if (n == 0) break; // pipe closed / EOF — partial read swallowed
-                            totalRead += n;
-                        }
-                        var result = AcBinaryDeserializer.Deserialize<TestOrder>(bytes, _options);
-                        if (_captureResult) _lastResult = result;
-                    }
-                    catch
-                    {
-                        // Swallow — calling thread sees the failure via missing/incorrect _lastResult during VerifyRoundTrip,
-                        // or the benchmark loop just continues (timing impacted). Production teardown handled in Dispose.
-                    }
-                    finally
-                    {
-                        _consumeDone.Set();
-                    }
-                }
-            }
-            catch (OperationCanceledException)
-            {
-                // Cooperative cancel — Dispose path. Swallow.
-            }
-        }
-
-        [MethodImpl(MethodImplOptions.NoInlining)]
-        public void Serialize()
-        {
-            // 2-task streaming pipeline:
-            // 1. Calling thread serialises → fresh byte[] (per-iter alloc, matches AcBinaryBenchmark contract).
-            // 2. Calling thread hands off expected size + signals consumer task. Consumer task starts Read loop
-            //    on the pipe (BG thread). Calling thread proceeds to Write the bytes — Read and Write overlap
-            //    through the kernel-pipe (kernel buffer fills, drains as consumer reads, sender resumes).
-            // 3. Calling thread waits for _consumeDone (consumer task finished Read+Des).
-            //
-            // Note: unlike chunked, raw byte[] cannot do Ser↔Des overlap (Des needs the full bytes before
-            // starting). Only Write↔Read overlaps here. The Des sequence on BG thread is: Read full bytes →
-            // Des the full graph → signal done. This is the architectural difference between raw and chunked.
-            var bytes = AcBinarySerializer.Serialize(_order, _options);
-
-            _pendingReadSize = bytes.Length;
-            _consumeDone.Reset();
-            _consumeRequest.Set();
-
-            _pipeClient.Write(bytes, 0, bytes.Length);
-            _pipeClient.Flush();
-
-            _consumeDone.Wait();
-        }
-
-        [MethodImpl(MethodImplOptions.NoInlining)]
-        public void Deserialize()
-        {
-            // No-op: per-iter round-trip is captured in Serialize(). See IsRoundTripOnly contract.
-        }
-
-        public bool VerifyRoundTrip()
-        {
-            // Use the same 2-task streaming path as the benchmark, but capture the result for graph-equality.
-            _captureResult = true;
-            try
-            {
-                Serialize();
-                var result = _lastResult as TestOrder;
-                return result != null && BenchmarkLoop.DeepEqualsViaJson(_order, result);
-            }
-            finally
-            {
-                _captureResult = false;
-                _lastResult = null;
-            }
-        }
-
-        public void Dispose()
-        {
-            if (_disposed) return;
-            _disposed = true;
-
-            // Cancel the consumer task → ConsumerLoop exits its Wait via OperationCanceledException.
-            try { _cts.Cancel(); } catch { /* swallow on teardown */ }
-            try { _consumeRequest.Set(); } catch { /* nudge in case consumer Wait is parked */ }
-            try { _consumerTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
-
-            // Symmetric teardown — close client first (writer side), then server.
-            try { _pipeClient.Dispose(); } catch { /* swallow on teardown */ }
-            try { _pipeServer.Dispose(); } catch { /* swallow on teardown */ }
-            try { _consumeRequest.Dispose(); } catch { /* swallow on teardown */ }
-            try { _consumeDone.Dispose(); } catch { /* swallow on teardown */ }
-            try { _cts.Dispose(); } catch { /* swallow on teardown */ }
-        }
-    }
-
-    /// <summary>
-    /// Raw <c>byte[]</c> over an in-memory cross-thread handoff — NO transport (no NamedPipe, no Pipe, no
-    /// Channel<see langword="&lt;T&gt;"/>). Calling thread serialises into a fresh <c>byte[]</c>, hands it to a
-    /// background consumer task via a single byte[] slot + MRES pair; the consumer deserialises and signals done.
-    ///
-    /// <para><b>Why this benchmark matters</b>: completes the 2x2 transport × wire-format matrix:</para>
-    /// <list type="bullet">
-    ///   <item><description><b>NamedPipe + Chunked</b> = <see cref="AcBinaryNamedPipeBenchmark"/></description></item>
-    ///   <item><description><b>NamedPipe + Raw</b> = <see cref="AcBinaryNamedPipeRawByteArrayBenchmark"/></description></item>
-    ///   <item><description><b>In-memory Pipe + Chunked</b> = <see cref="AcBinaryInMemoryPipeBenchmark"/></description></item>
-    ///   <item><description><b>In-memory + Raw</b> = THIS row — apples-to-apples baseline for the in-memory chunked row</description></item>
-    /// </list>
-    /// <para>Side-by-side with <see cref="AcBinaryInMemoryPipeBenchmark"/> this isolates the chunked-streaming
-    /// framework's pure CPU cost, with the same in-memory transport (zero kernel involvement) on both sides.
-    /// Side-by-side with <see cref="AcBinaryNamedPipeRawByteArrayBenchmark"/> this isolates the kernel-NamedPipe
-    /// overhead on the raw-byte[] side.</para>
-    /// </summary>
-    internal sealed class AcBinaryInMemoryRawByteArrayBenchmark : ISerializerBenchmark, IDisposable
-    {
-        private readonly TestOrder _order;
-        private readonly AcBinarySerializerOptions _options;
-        private readonly byte[] _serialized; // for SerializedSize reporting only
-
-        // Long-lived consumer-task infrastructure (Deserialize on BG thread, signaled per iter).
-        // No transport — just a byte[] slot for handoff between calling thread and consumer task.
-        private readonly CancellationTokenSource _cts;
-        private readonly Task _consumerTask;
-        private readonly ManualResetEventSlim _consumeRequest = new(false);
-        private readonly ManualResetEventSlim _consumeDone = new(false);
-        private byte[]? _pendingBytes;             // calling thread → consumer task handoff slot
-        private object? _lastResult;               // captured during VerifyRoundTrip; null in benchmark iters
-        private bool _captureResult;
-        private bool _disposed;
-
-        public string Engine => Configuration.EngineAcBinary;
-        public string IoMode => Configuration.IoInMemoryRaw;
-        public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime;
-        public string OptionsPreset { get; }
-        public int SerializedSize => _serialized.Length;
-        public long SetupSerializeAllocBytes { get; }
-        public long SetupDeserializeAllocBytes { get; }
-        public bool IsRoundTripOnly => true;
-        public string OptionsDescription => BenchmarkOptions.BuildAcBinary(_options, $", BufferSize={_options.BufferWriterChunkSize}B, Transport=in-memory(raw,2-task)");
-
-        public AcBinaryInMemoryRawByteArrayBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset)
-        {
-            _order = order;
-            _options = options;
-            OptionsPreset = optionsPreset;
-
-            _serialized = AcBinarySerializer.Serialize(order, _options);
-
-            // === SERIALIZE-side setup measurement ===
-            // Nothing to set up — calling thread allocates byte[] per iter via AcBinarySerializer.Serialize.
-            SetupSerializeAllocBytes = 0;
-
-            // === DESERIALIZE-side setup measurement ===
-            // 1× background consumer-task + 2× MRES (request / done) + cancellation source.
-            GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
-            var beforeDes = GC.GetAllocatedBytesForCurrentThread();
-            _cts = new CancellationTokenSource();
-            _consumerTask = Task.Run(ConsumerLoop);
-            var afterDes = GC.GetAllocatedBytesForCurrentThread();
-            SetupDeserializeAllocBytes = afterDes - beforeDes;
-        }
-
-        // BG consumer: parks on _consumeRequest, picks up the byte[] from _pendingBytes, runs Deserialize<T>(bytes),
-        // signals _consumeDone. Direct in-process handoff — no transport syscall, no buffer copy beyond the byte[]
-        // reference itself (zero-copy by reference).
-        private void ConsumerLoop()
-        {
-            var ct = _cts.Token;
-            try
-            {
-                while (true)
-                {
-                    _consumeRequest.Wait(ct);
-                    if (ct.IsCancellationRequested) return;
-                    _consumeRequest.Reset();
-
-                    try
-                    {
-                        var bytes = _pendingBytes;
-                        if (bytes != null)
-                        {
-                            var result = AcBinaryDeserializer.Deserialize<TestOrder>(bytes, _options);
-                            if (_captureResult) _lastResult = result;
-                        }
-                    }
-                    catch
-                    {
-                        // Swallow — see ConsumerLoop in NamedPipe variant for rationale.
-                    }
-                    finally
-                    {
-                        _consumeDone.Set();
-                    }
-                }
-            }
-            catch (OperationCanceledException)
-            {
-                // Cooperative cancel — Dispose path. Swallow.
-            }
-        }
-
-        [MethodImpl(MethodImplOptions.NoInlining)]
-        public void Serialize()
-        {
-            // 2-task in-memory pipeline:
-            // 1. Calling thread serialises → fresh byte[] (per-iter alloc, matches AcBinaryBenchmark contract).
-            // 2. Calling thread parks the byte[] into _pendingBytes and signals consumer task. Consumer task
-            //    picks up the reference (zero-copy) and runs Deserialize<T>(bytes).
-            // 3. Calling thread waits for _consumeDone (consumer task finished Des).
-            //
-            // Same architectural limitation as the NamedPipe-raw variant: Des cannot start until full bytes
-            // are available. Only the per-iter Ser↔Des thread-handoff overlaps slightly (calling thread starts
-            // signalling and waiting while consumer thread takes the byte[]).
-            var bytes = AcBinarySerializer.Serialize(_order, _options);
-
-            _pendingBytes = bytes;
-            _consumeDone.Reset();
-            _consumeRequest.Set();
-
-            _consumeDone.Wait();
-        }
-
-        [MethodImpl(MethodImplOptions.NoInlining)]
-        public void Deserialize()
-        {
-            // No-op: per-iter round-trip is captured in Serialize(). See IsRoundTripOnly contract.
-        }
-
-        public bool VerifyRoundTrip()
-        {
-            _captureResult = true;
-            try
-            {
-                Serialize();
-                var result = _lastResult as TestOrder;
-                return result != null && BenchmarkLoop.DeepEqualsViaJson(_order, result);
-            }
-            finally
-            {
-                _captureResult = false;
-                _lastResult = null;
-            }
-        }
-
-        public void Dispose()
-        {
-            if (_disposed) return;
-            _disposed = true;
-
-            try { _cts.Cancel(); } catch { /* swallow on teardown */ }
-            try { _consumeRequest.Set(); } catch { /* nudge in case consumer Wait is parked */ }
-            try { _consumerTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
-
-            try { _consumeRequest.Dispose(); } catch { /* swallow on teardown */ }
-            try { _consumeDone.Dispose(); } catch { /* swallow on teardown */ }
-            try { _cts.Dispose(); } catch { /* swallow on teardown */ }
-        }
-    }
-
-    /// <summary>
-    /// Benchmarks MemoryPack via the IBufferWriter overload, allocating a FRESH ArrayBufferWriter on EVERY call.
-    /// Apples-to-apples counterpart to AcBinaryFreshBufferWriterBenchmark.
-    /// </summary>
-    internal sealed class MemoryPackFreshBufferWriterBenchmark : ISerializerBenchmark
-    {
-        private readonly TestOrder _order;
-        private readonly MemoryPackSerializerOptions _options;
-        private readonly byte[] _serialized;
-
-        public string Engine => Configuration.EngineMemoryPack;
-        public string IoMode => Configuration.IoBufWrNew;
-        public string DispatchMode => Configuration.ModeSGen; // MemoryPack always uses [MemoryPackable] source-generated formatters
-        public string OptionsPreset { get; }
-        public int SerializedSize => _serialized.Length;
-        public long SetupSerializeAllocBytes => 0;
-        public long SetupDeserializeAllocBytes => 0;
-        public string? OptionsDescription => $"StringEncoding={_options.StringEncoding}";
-
-        public MemoryPackFreshBufferWriterBenchmark(TestOrder order, string optionsPreset)
-        {
-            _order = order;
-            OptionsPreset = optionsPreset;
-            _options = BenchmarkOptions.GetMemPack();
-            _serialized = MemoryPackSerializer.Serialize(order, _options);
-        }
-
-        [MethodImpl(MethodImplOptions.NoInlining)]
-        public void Serialize()
-        {
-            var abw = new ArrayBufferWriter<byte>();
-            MemoryPackSerializer.Serialize(abw, _order, _options);
-        }
-
-        // BufWr semantic: read from a ReadOnlySequence<byte> overload (apples-to-apples with AcBinary's
-        // BufWr Deser path). MemoryPack's ROS overload also single-segment-fast-paths internally.
-        [MethodImpl(MethodImplOptions.NoInlining)]
-        public void Deserialize() => MemoryPackSerializer.Deserialize<TestOrder>(new ReadOnlySequence<byte>(_serialized), _options);
-
-        public bool VerifyRoundTrip()
-        {
-            var abw = new ArrayBufferWriter<byte>();
-            MemoryPackSerializer.Serialize(abw, _order, _options);
-            var roundTripped = MemoryPackSerializer.Deserialize<TestOrder>(new ReadOnlySequence<byte>(abw.WrittenMemory), _options);
-            return BenchmarkLoop.DeepEqualsViaJson(_order, roundTripped);
-        }
-    }
-
-    internal sealed class AcBinaryBufferWriterBenchmark : ISerializerBenchmark
-    {
-        private readonly TestOrder _order;
-        private readonly AcBinarySerializerOptions _options;
-        private readonly byte[] _serialized;
-        private readonly ArrayBufferWriter<byte> _bufferWriter;
-
-        public string Engine => Configuration.EngineAcBinary;
-        public string IoMode => Configuration.IoBufWrReuse;
-        public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime;
-        public string OptionsPreset { get; }
-        public int SerializedSize => _serialized.Length;
-        public long SetupSerializeAllocBytes { get; }
-        public long SetupDeserializeAllocBytes => 0;
-        public string OptionsDescription => BenchmarkOptions.BuildAcBinary(_options);
-
-        public AcBinaryBufferWriterBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset)
-        {
-            _order = order;
-            _options = options;
-            OptionsPreset = optionsPreset;
-            _serialized = AcBinarySerializer.Serialize(order, options);
-
-            // Measure ONLY the BufferWriter infrastructure setup on the serialize side (excluding the
-            // helper Serialize above). Deserialize side reads directly from `_serialized` byte[] — no
-            // dedicated setup allocation, hence SetupDeserializeAllocBytes = 0.
-            GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
-            var beforeSetup = GC.GetAllocatedBytesForCurrentThread();
-            _bufferWriter = new ArrayBufferWriter<byte>(_serialized.Length * 2);
-            var afterSetup = GC.GetAllocatedBytesForCurrentThread();
-            SetupSerializeAllocBytes = afterSetup - beforeSetup;
-        }
-
-        [MethodImpl(MethodImplOptions.NoInlining)]
-        public void Serialize()
-        {
-            _bufferWriter.ResetWrittenCount();  // reuse — no alloc, no zeroing
-            AcBinarySerializer.Serialize(_order, _bufferWriter, _options);
-        }
-
-        // BufWr semantic: read from a ReadOnlySequence<byte> (the ROS overload), NOT from byte[] —
-        // single-segment array-backed sequence triggers the fast-path in AcBinaryDeserializer.cs:298 which
-        // redirects to the byte[] overload. This means the bench actually exercises the ROS-input path
-        // (the production-realistic surface for SignalR / Pipe consumers) rather than secretly testing
-        // byte[] Deser under the BufWr label.
-        [MethodImpl(MethodImplOptions.NoInlining)]
-        public void Deserialize() => AcBinaryDeserializer.Deserialize<TestOrder>(new ReadOnlySequence<byte>(_serialized), _options);
-
-        public bool VerifyRoundTrip()
-        {
-            _bufferWriter.ResetWrittenCount();
-            AcBinarySerializer.Serialize(_order, _bufferWriter, _options);
-
-            var roundTripped = AcBinaryDeserializer.Deserialize<TestOrder>(new ReadOnlySequence<byte>(_bufferWriter.WrittenMemory), _options);
-            return BenchmarkLoop.DeepEqualsViaJson(_order, roundTripped);
-        }
-    }
-
-    /// <summary>
-    /// Benchmarks MemoryPack via the IBufferWriter overload with a pre-allocated, reused ArrayBufferWriter.
-    /// Apples-to-apples counterpart to AcBinaryBufferWriterBenchmark — MemoryPack's IBufferWriter is the path it's designed for.
-    /// </summary>
-    internal sealed class MemoryPackBufferWriterBenchmark : ISerializerBenchmark
-    {
-        private readonly TestOrder _order;
-        private readonly MemoryPackSerializerOptions _options;
-        private readonly byte[] _serialized;
-        private readonly ArrayBufferWriter<byte> _bufferWriter;
-
-        public string Engine => Configuration.EngineMemoryPack;
-        public string IoMode => Configuration.IoBufWrReuse;
-        public string DispatchMode => Configuration.ModeSGen; // MemoryPack always uses [MemoryPackable] source-generated formatters
-        public string OptionsPreset { get; }
-        public int SerializedSize => _serialized.Length;
-        public long SetupSerializeAllocBytes { get; }
-        public long SetupDeserializeAllocBytes => 0;
-        public string? OptionsDescription => $"StringEncoding={_options.StringEncoding}";
-
-        public MemoryPackBufferWriterBenchmark(TestOrder order, string optionsPreset)
-        {
-            _order = order;
-            OptionsPreset = optionsPreset;
-            _options = BenchmarkOptions.GetMemPack();
-            _serialized = MemoryPackSerializer.Serialize(order, _options);
-
-            // Serialize-side setup only — see AcBinaryBufferWriterBenchmark for the full rationale.
-            GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
-            var beforeSetup = GC.GetAllocatedBytesForCurrentThread();
-            _bufferWriter = new ArrayBufferWriter<byte>(_serialized.Length * 2);
-            var afterSetup = GC.GetAllocatedBytesForCurrentThread();
-            SetupSerializeAllocBytes = afterSetup - beforeSetup;
-        }
-
-        [MethodImpl(MethodImplOptions.NoInlining)]
-        public void Serialize()
-        {
-            _bufferWriter.ResetWrittenCount();
-            MemoryPackSerializer.Serialize(_bufferWriter, _order, _options);
-        }
-
-        // BufWr semantic: read from a ReadOnlySequence<byte> overload (apples-to-apples with AcBinary's
-        // BufWr Deser path). MemoryPack's ROS overload also single-segment-fast-paths internally.
-        [MethodImpl(MethodImplOptions.NoInlining)]
-        public void Deserialize() => MemoryPackSerializer.Deserialize<TestOrder>(new ReadOnlySequence<byte>(_serialized), _options);
-
-        public bool VerifyRoundTrip()
-        {
-            _bufferWriter.ResetWrittenCount();
-            MemoryPackSerializer.Serialize(_bufferWriter, _order, _options);
-            var roundTripped = MemoryPackSerializer.Deserialize<TestOrder>(new ReadOnlySequence<byte>(_bufferWriter.WrittenMemory), _options);
-            return BenchmarkLoop.DeepEqualsViaJson(_order, roundTripped);
-        }
-    }
-
-#endregion
+    // Serializer implementations (ISerializerBenchmark + 12 concrete benchmark classes) → Benchmarks/
 
     // Results / output formatters → Output.cs
     // BenchmarkResult DTO → BenchmarkResult.cs