diff --git a/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryBufferWriterBenchmark.cs b/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryBufferWriterBenchmark.cs
new file mode 100644
index 0000000..aacafe1
--- /dev/null
+++ b/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryBufferWriterBenchmark.cs
@@ -0,0 +1,68 @@
+using AyCode.Core.Serializers.Binaries;
+using AyCode.Core.Tests.TestModels;
+using System.Buffers;
+using System.Runtime.CompilerServices;
+
+namespace AyCode.Core.Serializers.Console.Benchmarks;
+
+///
+/// Benchmarks AcBinary via the IBufferWriter overload with a pre-allocated, reused ArrayBufferWriter.
+/// Realistic IBufferWriter usage pattern: caller owns + reuses the writer (zero alloc per call after warmup).
+///
+internal sealed class AcBinaryBufferWriterBenchmark : ISerializerBenchmark
+{
+ private readonly TestOrder _order;
+ private readonly AcBinarySerializerOptions _options;
+ private readonly byte[] _serialized;
+ private readonly ArrayBufferWriter _bufferWriter;
+
+ public string Engine => Configuration.EngineAcBinary;
+ public string IoMode => Configuration.IoBufWrReuse;
+ public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime;
+ public string OptionsPreset { get; }
+ public int SerializedSize => _serialized.Length;
+ public long SetupSerializeAllocBytes { get; }
+ public long SetupDeserializeAllocBytes => 0;
+ public string OptionsDescription => BenchmarkOptions.BuildAcBinary(_options);
+
+ public AcBinaryBufferWriterBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset)
+ {
+ _order = order;
+ _options = options;
+ OptionsPreset = optionsPreset;
+ _serialized = AcBinarySerializer.Serialize(order, options);
+
+ // Measure ONLY the BufferWriter infrastructure setup on the serialize side (excluding the
+ // helper Serialize above). Deserialize side reads directly from `_serialized` byte[] — no
+ // dedicated setup allocation, hence SetupDeserializeAllocBytes = 0.
+ GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
+ var beforeSetup = GC.GetAllocatedBytesForCurrentThread();
+ _bufferWriter = new ArrayBufferWriter(_serialized.Length * 2);
+ var afterSetup = GC.GetAllocatedBytesForCurrentThread();
+ SetupSerializeAllocBytes = afterSetup - beforeSetup;
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ public void Serialize()
+ {
+ _bufferWriter.ResetWrittenCount(); // reuse — no alloc, no zeroing
+ AcBinarySerializer.Serialize(_order, _bufferWriter, _options);
+ }
+
+ // BufWr semantic: read from a ReadOnlySequence (the ROS overload), NOT from byte[] —
+ // single-segment array-backed sequence triggers the fast-path in AcBinaryDeserializer.cs:298 which
+ // redirects to the byte[] overload. This means the bench actually exercises the ROS-input path
+ // (the production-realistic surface for SignalR / Pipe consumers) rather than secretly testing
+ // byte[] Deser under the BufWr label.
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ public void Deserialize() => AcBinaryDeserializer.Deserialize(new ReadOnlySequence(_serialized), _options);
+
+ public bool VerifyRoundTrip()
+ {
+ _bufferWriter.ResetWrittenCount();
+ AcBinarySerializer.Serialize(_order, _bufferWriter, _options);
+
+ var roundTripped = AcBinaryDeserializer.Deserialize(new ReadOnlySequence(_bufferWriter.WrittenMemory), _options);
+ return BenchmarkLoop.DeepEqualsViaJson(_order, roundTripped);
+ }
+}
diff --git a/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryFreshBufferWriterBenchmark.cs b/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryFreshBufferWriterBenchmark.cs
new file mode 100644
index 0000000..2e43f42
--- /dev/null
+++ b/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryFreshBufferWriterBenchmark.cs
@@ -0,0 +1,63 @@
+using AyCode.Core.Serializers.Binaries;
+using AyCode.Core.Tests.TestModels;
+using System.Buffers;
+using System.Runtime.CompilerServices;
+
+namespace AyCode.Core.Serializers.Console.Benchmarks;
+
+///
+/// Benchmarks AcBinary via the IBufferWriter overload, allocating a FRESH ArrayBufferWriter on EVERY call.
+/// One-shot scenario — represents code that doesn't reuse a writer across calls.
+/// Uses BufferWriterChunkSize=4096 (production-realistic, SignalR-aligned) instead of the 65535 default —
+/// otherwise AcBinary would request 64KB upfront via GetSpan(), forcing the fresh ABW to allocate 64KB
+/// regardless of payload size (heavy over-allocation for small payloads).
+///
+internal sealed class AcBinaryFreshBufferWriterBenchmark : ISerializerBenchmark
+{
+ private readonly TestOrder _order;
+ private readonly AcBinarySerializerOptions _options;
+ private readonly byte[] _serialized;
+
+ public string Engine => Configuration.EngineAcBinary;
+ public string IoMode => Configuration.IoBufWrNew;
+ public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime;
+ public string OptionsPreset { get; }
+ public int SerializedSize => _serialized.Length;
+ public long SetupSerializeAllocBytes => 0;
+ public long SetupDeserializeAllocBytes => 0;
+ public string OptionsDescription => BenchmarkOptions.BuildAcBinary(_options, $", BufferSize={_options.BufferWriterChunkSize}B");
+
+ public AcBinaryFreshBufferWriterBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset)
+ {
+ _order = order;
+ // BufferWriterChunkSize comes from the caller (central source of truth in CreateSerializers
+ // — the binaryFastMode4KbChunk options instance). Do NOT mutate _options here; tune the chunk
+ // size in CreateSerializers only.
+ _options = options;
+ OptionsPreset = optionsPreset;
+ _serialized = AcBinarySerializer.Serialize(order, _options);
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ public void Serialize()
+ {
+ var abw = new ArrayBufferWriter(); // FRESH every call — alloc + grow as needed
+ AcBinarySerializer.Serialize(_order, abw, _options);
+ }
+
+ // BufWr semantic: read from a ReadOnlySequence (the ROS overload), NOT from byte[] —
+ // single-segment array-backed sequence triggers the fast-path in AcBinaryDeserializer.cs:298 which
+ // redirects to the byte[] overload. This means the bench actually exercises the ROS-input path
+ // (the production-realistic surface for SignalR / Pipe consumers) rather than secretly testing
+ // byte[] Deser under the BufWr label.
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ public void Deserialize() => AcBinaryDeserializer.Deserialize(new ReadOnlySequence(_serialized), _options);
+
+ public bool VerifyRoundTrip()
+ {
+ var abw = new ArrayBufferWriter();
+ AcBinarySerializer.Serialize(_order, abw, _options);
+ var roundTripped = AcBinaryDeserializer.Deserialize(new ReadOnlySequence(abw.WrittenMemory), _options);
+ return BenchmarkLoop.DeepEqualsViaJson(_order, roundTripped);
+ }
+}
diff --git a/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryInMemoryPipeBenchmark.cs b/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryInMemoryPipeBenchmark.cs
new file mode 100644
index 0000000..1b09377
--- /dev/null
+++ b/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryInMemoryPipeBenchmark.cs
@@ -0,0 +1,190 @@
+using AyCode.Core.Serializers.Binaries;
+using AyCode.Core.Tests.Serialization; // DrainFromAsync extension (test-only, used by benchmark)
+using AyCode.Core.Tests.TestModels;
+using System.IO.Pipelines;
+using System.Runtime.CompilerServices;
+
+namespace AyCode.Core.Serializers.Console.Benchmarks;
+
+///
+/// Same chunked-framed AsyncPipe code path as , but the transport
+/// is an in-memory instead of a kernel NamedPipe. The Pipe's
+/// Writer/Reader pair is a managed-only zero-copy slab handoff — no syscalls, no kernel
+/// buffer copy, no IRP queueing.
+///
+/// Why this benchmark matters: by holding ALL other variables constant (same SerializeChunkedFramed,
+/// same AsyncPipeReaderInput, same drain task, same consumer task, same multi-message wire format), this
+/// row isolates the kernel-NamedPipe transport overhead from the chunked-streaming framework's pure
+/// CPU cost. The expected delta vs : per-chunk overhead drops from
+/// ~25-30 µs (kernel-syscall pair + IRP) to ~1-2 µs (managed slab handoff). Multi-chunk Large-message rows
+/// should converge dramatically toward .
+///
+/// Real-world relevance: in-memory Pipe is the typical primitive used for cross-thread serializer
+/// pipelines inside a single process (e.g. SignalR's Kestrel transport adapter, gRPC framework internals,
+/// custom message brokers). The numbers from this row reflect that scenario, NOT the kernel-pipe loopback
+/// of the NamedPipe benchmark.
+///
+internal sealed class AcBinaryInMemoryPipeBenchmark : ISerializerBenchmark, IDisposable
+{
+ private readonly TestOrder _order;
+ private readonly AcBinarySerializerOptions _options;
+ private readonly byte[] _serialized; // for SerializedSize reporting only
+
+ // Long-lived in-memory pipe lifecycle (set up once in ctor — NOT timed).
+ private readonly Pipe _pipe;
+ private readonly PipeWriter _pipeWriter;
+ private readonly PipeReader _pipeReader;
+
+ // Long-lived multi-message receive infrastructure (set up once in ctor) — same pattern as the NamedPipe
+ // variant: drain pumps reader into AsyncPipeReaderInput, consumer task drives Deserialize(input).
+ private readonly AsyncPipeReaderInput _input;
+ private readonly CancellationTokenSource _cts;
+ private readonly Task _drainTask;
+ private readonly Task _consumerTask;
+ private readonly ManualResetEventSlim _consumeRequest = new(false);
+ private readonly ManualResetEventSlim _consumeDone = new(false);
+ private object? _lastResult;
+ private bool _captureResult;
+ private bool _disposed;
+
+ public string Engine => Configuration.EngineAcBinary;
+ public string IoMode => Configuration.IoInMemoryPipe;
+ public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime;
+ public string OptionsPreset { get; }
+ public int SerializedSize => _serialized.Length;
+ public long SetupSerializeAllocBytes { get; }
+ public long SetupDeserializeAllocBytes { get; }
+ public bool IsRoundTripOnly => true;
+ public string OptionsDescription => BenchmarkOptions.BuildAcBinary(_options, $", BufferSize={_options.BufferWriterChunkSize}B, Transport=Pipe(in-memory,multiMessage,2-task)");
+
+ public AcBinaryInMemoryPipeBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset)
+ {
+ _order = order;
+ _options = options;
+ OptionsPreset = optionsPreset;
+
+ _serialized = AcBinarySerializer.Serialize(order, _options);
+
+ // === SERIALIZE-side setup measurement ===
+ // In-memory Pipe construction. NO kernel-pipe pair, NO Connect handshake — just a managed Pipe object
+ // and a reference to its Writer side. PipeWriterImpl (parallel-flush capable, NOT StreamPipeWriter).
+ GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
+ var beforeSer = GC.GetAllocatedBytesForCurrentThread();
+ _pipe = new Pipe();
+ _pipeWriter = _pipe.Writer;
+ var afterSer = GC.GetAllocatedBytesForCurrentThread();
+ SetupSerializeAllocBytes = afterSer - beforeSer;
+
+ // === DESERIALIZE-side setup measurement ===
+ // PipeReader reference + AsyncPipeReaderInput (ArrayPool rent + ManualResetEventSlim) + drain task +
+ // consumer task scaffolding. Identical to the NamedPipe variant on the receive side.
+ GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
+ var beforeDes = GC.GetAllocatedBytesForCurrentThread();
+
+ _pipeReader = _pipe.Reader;
+ _input = new AsyncPipeReaderInput(_options.BufferWriterChunkSize * 2, multiMessage: true);
+ _cts = new CancellationTokenSource();
+ _drainTask = Task.Run(() => _input.DrainFromAsync(_pipeReader, _cts.Token));
+ _consumerTask = Task.Run(ConsumeLoop);
+
+ var afterDes = GC.GetAllocatedBytesForCurrentThread();
+ SetupDeserializeAllocBytes = afterDes - beforeDes;
+ }
+
+ // BG consumer: parks on _consumeRequest, runs Deserialize(_input) when signaled, signals _consumeDone.
+ // Mirror of AcBinaryNamedPipeBenchmark.ConsumeLoop — same pattern, same MRES protocol.
+ private void ConsumeLoop()
+ {
+ var ct = _cts.Token;
+ try
+ {
+ while (true)
+ {
+ _consumeRequest.Wait(ct);
+ if (ct.IsCancellationRequested) return;
+ _consumeRequest.Reset();
+
+ try
+ {
+ var result = AcBinaryDeserializer.Deserialize(_input, _options);
+ if (_captureResult) _lastResult = result;
+ }
+ catch
+ {
+ // Swallow — see ConsumeLoop in NamedPipe variant for rationale.
+ }
+ finally
+ {
+ _consumeDone.Set();
+ }
+ }
+ }
+ catch (OperationCanceledException)
+ {
+ // Cooperative cancel — Dispose path. Swallow.
+ }
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ public void Serialize()
+ {
+ // Same 2-task streaming pipeline as NamedPipe variant — only the transport differs (in-memory Pipe
+ // instead of kernel NamedPipe). Per-chunk SerializeChunkedFramed → PipeWriter slab → drain task
+ // reads from PipeReader → input.Feed → consumer Deserialize consumes byte-by-byte.
+ //
+ // Uses the Pipe-overload (instead of the PipeWriter-overload) so the FlushPolicy parameter is
+ // exposed for tuning. Toggle between FlushPolicy.PerChunk (bounded peak memory, per-chunk await
+ // FlushAsync) and FlushPolicy.Coalesced (fire-and-forget per chunk, pipe-coalesced flushes up to
+ // PauseWriterThreshold ~64 KB) to A/B-test the streaming-pipeline overhead. FlushPolicy.PerChunk
+ // is functionally equivalent to the PipeWriter-overload (both internally route to
+ // SerializeToPipeWriterCore with FlushPolicy.PerChunk).
+ _consumeDone.Reset();
+ _consumeRequest.Set();
+
+ AcBinarySerializer.SerializeChunkedFramed(_order, _pipe, _options, FlushPolicy.Coalesced);
+
+ _consumeDone.Wait();
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ public void Deserialize()
+ {
+ // No-op: per-iter round-trip is captured in Serialize(). See IsRoundTripOnly contract.
+ }
+
+ public bool VerifyRoundTrip()
+ {
+ _captureResult = true;
+ try
+ {
+ Serialize();
+ var result = _lastResult as TestOrder;
+ return result != null && BenchmarkLoop.DeepEqualsViaJson(_order, result);
+ }
+ finally
+ {
+ _captureResult = false;
+ _lastResult = null;
+ }
+ }
+
+ public void Dispose()
+ {
+ if (_disposed) return;
+ _disposed = true;
+
+ // Cancel drain + consumer tasks → both exit. Pulse _consumeRequest in case consumer is parked.
+ try { _cts.Cancel(); } catch { /* swallow on teardown */ }
+ try { _consumeRequest.Set(); } catch { /* nudge in case consumer Wait is parked */ }
+ try { _drainTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
+ try { _consumerTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
+
+ // Complete writer + reader (in-memory Pipe — no underlying stream to dispose).
+ try { _pipeWriter.CompleteAsync().AsTask().Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
+ try { _pipeReader.Complete(); } catch { /* swallow on teardown */ }
+ try { _input.Dispose(); } catch { /* swallow on teardown */ }
+ try { _consumeRequest.Dispose(); } catch { /* swallow on teardown */ }
+ try { _consumeDone.Dispose(); } catch { /* swallow on teardown */ }
+ try { _cts.Dispose(); } catch { /* swallow on teardown */ }
+ }
+}
diff --git a/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryInMemoryRawByteArrayBenchmark.cs b/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryInMemoryRawByteArrayBenchmark.cs
new file mode 100644
index 0000000..ccde326
--- /dev/null
+++ b/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryInMemoryRawByteArrayBenchmark.cs
@@ -0,0 +1,168 @@
+using AyCode.Core.Serializers.Binaries;
+using AyCode.Core.Tests.TestModels;
+using System.Runtime.CompilerServices;
+
+namespace AyCode.Core.Serializers.Console.Benchmarks;
+
+///
+/// Raw byte[] over an in-memory cross-thread handoff — NO transport (no NamedPipe, no Pipe, no
+/// Channel). Calling thread serialises into a fresh byte[], hands it to a
+/// background consumer task via a single byte[] slot + MRES pair; the consumer deserialises and signals done.
+///
+/// Why this benchmark matters: completes the 2x2 transport × wire-format matrix:
+///
+/// - NamedPipe + Chunked =
+/// - NamedPipe + Raw =
+/// - In-memory Pipe + Chunked =
+/// - In-memory + Raw = THIS row — apples-to-apples baseline for the in-memory chunked row
+///
+/// Side-by-side with this isolates the chunked-streaming
+/// framework's pure CPU cost, with the same in-memory transport (zero kernel involvement) on both sides.
+/// Side-by-side with this isolates the kernel-NamedPipe
+/// overhead on the raw-byte[] side.
+///
+internal sealed class AcBinaryInMemoryRawByteArrayBenchmark : ISerializerBenchmark, IDisposable
+{
+ private readonly TestOrder _order;
+ private readonly AcBinarySerializerOptions _options;
+ private readonly byte[] _serialized; // for SerializedSize reporting only
+
+ // Long-lived consumer-task infrastructure (Deserialize on BG thread, signaled per iter).
+ // No transport — just a byte[] slot for handoff between calling thread and consumer task.
+ private readonly CancellationTokenSource _cts;
+ private readonly Task _consumerTask;
+ private readonly ManualResetEventSlim _consumeRequest = new(false);
+ private readonly ManualResetEventSlim _consumeDone = new(false);
+ private byte[]? _pendingBytes; // calling thread → consumer task handoff slot
+ private object? _lastResult; // captured during VerifyRoundTrip; null in benchmark iters
+ private bool _captureResult;
+ private bool _disposed;
+
+ public string Engine => Configuration.EngineAcBinary;
+ public string IoMode => Configuration.IoInMemoryRaw;
+ public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime;
+ public string OptionsPreset { get; }
+ public int SerializedSize => _serialized.Length;
+ public long SetupSerializeAllocBytes { get; }
+ public long SetupDeserializeAllocBytes { get; }
+ public bool IsRoundTripOnly => true;
+ public string OptionsDescription => BenchmarkOptions.BuildAcBinary(_options, $", BufferSize={_options.BufferWriterChunkSize}B, Transport=in-memory(raw,2-task)");
+
+ public AcBinaryInMemoryRawByteArrayBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset)
+ {
+ _order = order;
+ _options = options;
+ OptionsPreset = optionsPreset;
+
+ _serialized = AcBinarySerializer.Serialize(order, _options);
+
+ // === SERIALIZE-side setup measurement ===
+ // Nothing to set up — calling thread allocates byte[] per iter via AcBinarySerializer.Serialize.
+ SetupSerializeAllocBytes = 0;
+
+ // === DESERIALIZE-side setup measurement ===
+ // 1× background consumer-task + 2× MRES (request / done) + cancellation source.
+ GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
+ var beforeDes = GC.GetAllocatedBytesForCurrentThread();
+ _cts = new CancellationTokenSource();
+ _consumerTask = Task.Run(ConsumerLoop);
+ var afterDes = GC.GetAllocatedBytesForCurrentThread();
+ SetupDeserializeAllocBytes = afterDes - beforeDes;
+ }
+
+ // BG consumer: parks on _consumeRequest, picks up the byte[] from _pendingBytes, runs Deserialize(bytes),
+ // signals _consumeDone. Direct in-process handoff — no transport syscall, no buffer copy beyond the byte[]
+ // reference itself (zero-copy by reference).
+ private void ConsumerLoop()
+ {
+ var ct = _cts.Token;
+ try
+ {
+ while (true)
+ {
+ _consumeRequest.Wait(ct);
+ if (ct.IsCancellationRequested) return;
+ _consumeRequest.Reset();
+
+ try
+ {
+ var bytes = _pendingBytes;
+ if (bytes != null)
+ {
+ var result = AcBinaryDeserializer.Deserialize(bytes, _options);
+ if (_captureResult) _lastResult = result;
+ }
+ }
+ catch
+ {
+ // Swallow — see ConsumerLoop in NamedPipe variant for rationale.
+ }
+ finally
+ {
+ _consumeDone.Set();
+ }
+ }
+ }
+ catch (OperationCanceledException)
+ {
+ // Cooperative cancel — Dispose path. Swallow.
+ }
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ public void Serialize()
+ {
+ // 2-task in-memory pipeline:
+ // 1. Calling thread serialises → fresh byte[] (per-iter alloc, matches AcBinaryBenchmark contract).
+ // 2. Calling thread parks the byte[] into _pendingBytes and signals consumer task. Consumer task
+ // picks up the reference (zero-copy) and runs Deserialize(bytes).
+ // 3. Calling thread waits for _consumeDone (consumer task finished Des).
+ //
+ // Same architectural limitation as the NamedPipe-raw variant: Des cannot start until full bytes
+ // are available. Only the per-iter Ser↔Des thread-handoff overlaps slightly (calling thread starts
+ // signalling and waiting while consumer thread takes the byte[]).
+ var bytes = AcBinarySerializer.Serialize(_order, _options);
+
+ _pendingBytes = bytes;
+ _consumeDone.Reset();
+ _consumeRequest.Set();
+
+ _consumeDone.Wait();
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ public void Deserialize()
+ {
+ // No-op: per-iter round-trip is captured in Serialize(). See IsRoundTripOnly contract.
+ }
+
+ public bool VerifyRoundTrip()
+ {
+ _captureResult = true;
+ try
+ {
+ Serialize();
+ var result = _lastResult as TestOrder;
+ return result != null && BenchmarkLoop.DeepEqualsViaJson(_order, result);
+ }
+ finally
+ {
+ _captureResult = false;
+ _lastResult = null;
+ }
+ }
+
+ public void Dispose()
+ {
+ if (_disposed) return;
+ _disposed = true;
+
+ try { _cts.Cancel(); } catch { /* swallow on teardown */ }
+ try { _consumeRequest.Set(); } catch { /* nudge in case consumer Wait is parked */ }
+ try { _consumerTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
+
+ try { _consumeRequest.Dispose(); } catch { /* swallow on teardown */ }
+ try { _consumeDone.Dispose(); } catch { /* swallow on teardown */ }
+ try { _cts.Dispose(); } catch { /* swallow on teardown */ }
+ }
+}
diff --git a/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryNamedPipeBenchmark.cs b/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryNamedPipeBenchmark.cs
new file mode 100644
index 0000000..9065cfc
--- /dev/null
+++ b/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryNamedPipeBenchmark.cs
@@ -0,0 +1,237 @@
+using AyCode.Core.Serializers.Binaries;
+using AyCode.Core.Tests.Serialization; // DrainFromAsync extension (test-only, used by benchmark)
+using AyCode.Core.Tests.TestModels;
+using System.IO.Pipelines;
+using System.IO.Pipes;
+using System.Runtime.CompilerServices;
+
+namespace AyCode.Core.Serializers.Console.Benchmarks;
+
+///
+/// Benchmarks AcBinary over a long-lived NamedPipe IPC connection using the AcBinary native streaming API
+/// (
+/// + + ).
+/// Mirrors what a real consumer (e.g. DeserializeFromPipeReaderAsync) does per message:
+/// long-lived with multi-message wire framing on top of a long-lived NamedPipe.
+///
+/// Architecture:
+///
+/// - Constructor (NOT timed): sets up + ,
+/// waits for connection, creates one long-lived /
+/// pair, ONE long-lived
+/// in multiMessage = true mode, ONE drain Task that pumps
+/// forever, and ONE deserialize Task that loops AcBinaryDeserializer.Deserialize<T>(input, opts)
+/// producing into a .
+/// - Per-iteration (timed): sender writes via
+///
+/// — multi-message wire ([201][UINT16][data]...[202]); the [202] end marker arms the input's
+/// _readPos = -1 sentinel, so the next message's first AppendToBuffer recycles the buffer to 0.
+/// Then receiver awaits the channel for the deserialized result.
+/// - is a no-op (full round-trip captured in );
+/// =true → Ser ms / SerAlloc oszlopok N/A, RT ms = full round-trip.
+///
+///
+/// Per-iter overhead: 0 new Task.Run, 0 new AsyncPipeReaderInput, 0 new CancellationTokenSource.
+/// Pure cost = SerializeChunkedFramed (CPU + chunk-onkénti flush) + kernel write/read syscalls + 1 sync barrier
+/// (channel) + deserialized graph alloc. The "multi-message reuse" pattern enabled by Q4T8 fix (R5K2 minimum: _readPos = -1
+/// sentinel + AppendToBuffer sliding-window cycling).
+///
+/// Approximation note: single-process loopback NamedPipe. Real cross-process / cross-machine SignalR
+/// adds further transport latency (TCP, WebSocket framing) on top. The benchmark gives a lower bound.
+///
+internal sealed class AcBinaryNamedPipeBenchmark : ISerializerBenchmark, IDisposable
+{
+ private readonly TestOrder _order;
+ private readonly AcBinarySerializerOptions _options;
+ private readonly byte[] _serialized; // for SerializedSize reporting only
+
+ // Long-lived pipe lifecycle (set up once in ctor — NOT timed).
+ private readonly NamedPipeServerStream _pipeServer;
+ private readonly NamedPipeClientStream _pipeClient;
+ private readonly PipeWriter _pipeWriter;
+ private readonly PipeReader _pipeReader;
+
+ // Long-lived multi-message receive infrastructure (set up once in ctor).
+ private readonly AsyncPipeReaderInput _input;
+ private readonly CancellationTokenSource _cts;
+ private readonly Task _drainTask; // BG: PipeReader → input.Feed (continuous pump)
+ private readonly Task _consumerTask; // BG: per-iter Deserialize(input) loop, signaled by calling thread
+ private readonly ManualResetEventSlim _consumeRequest = new(false);
+ private readonly ManualResetEventSlim _consumeDone = new(false);
+ private object? _lastResult; // captured during VerifyRoundTrip; null in benchmark iters
+ private bool _captureResult; // toggle: when true, ConsumeLoop stores result; otherwise discards
+ private bool _disposed;
+
+ public string Engine => Configuration.EngineAcBinary;
+ public string IoMode => Configuration.IoNamedPipe;
+ public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime;
+ public string OptionsPreset { get; }
+ public int SerializedSize => _serialized.Length;
+ public long SetupSerializeAllocBytes { get; }
+ public long SetupDeserializeAllocBytes { get; }
+ public bool IsRoundTripOnly => true;
+ public string OptionsDescription => BenchmarkOptions.BuildAcBinary(_options, $", BufferSize={_options.BufferWriterChunkSize}B, Transport=NamedPipe(long-lived,multiMessage,2-task)");
+
+ public AcBinaryNamedPipeBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset)
+ {
+ _order = order;
+ // BufferWriterChunkSize comes from the caller (central source of truth in CreateSerializers
+ // — the binaryFastMode4KbChunk options instance). Do NOT mutate _options here; tune the chunk
+ // size in CreateSerializers only.
+ _options = options;
+ OptionsPreset = optionsPreset;
+
+ _serialized = AcBinarySerializer.Serialize(order, _options);
+
+ // 1× pipe setup. Kernel-side pipe buffer (inBufferSize / outBufferSize on the server ctor — the
+ // client inherits the server-defined buffer size at connect time) matches BufferWriterChunkSize
+ // exactly: AsyncPipeWriterOutput now treats chunkSize as the chunk-on-wire total size (header +
+ // data), so one WriteFile(chunkSize) syscall lands in exactly one kernel-page slot — page-aligned,
+ // no fragmentation, no IRP reordering. _options.BufferWriterChunkSize is the single tunable source.
+ var pipeName = $"AcBinaryBench-{Guid.NewGuid():N}";
+
+ // === SERIALIZE-side setup measurement ===
+ // pipe-pair (server + client) + connect handshake + writer-side PipeWriter wrapper.
+ GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
+ var beforeSer = GC.GetAllocatedBytesForCurrentThread();
+
+ _pipeServer = new NamedPipeServerStream(pipeName, PipeDirection.In, 1, PipeTransmissionMode.Byte,
+ System.IO.Pipes.PipeOptions.Asynchronous,
+ inBufferSize: _options.BufferWriterChunkSize,
+ outBufferSize: _options.BufferWriterChunkSize);
+
+ _pipeClient = new NamedPipeClientStream(".", pipeName, PipeDirection.Out, System.IO.Pipes.PipeOptions.Asynchronous);
+
+ var serverWait = _pipeServer.WaitForConnectionAsync();
+ _pipeClient.Connect();
+ serverWait.GetAwaiter().GetResult();
+
+ _pipeWriter = PipeWriter.Create(_pipeClient);
+ var afterSer = GC.GetAllocatedBytesForCurrentThread();
+ SetupSerializeAllocBytes = afterSer - beforeSer;
+
+ // === DESERIALIZE-side setup measurement ===
+ // PipeReader wrapper + AsyncPipeReaderInput (ArrayPool rent + ManualResetEventSlim) + drain
+ // task + consumer task scaffolding. Two long-lived BG tasks total: drain pumps bytes from the
+ // kernel pipe into input; consumer drives Deserialize(input) per iter on signal.
+ GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
+ var beforeDes = GC.GetAllocatedBytesForCurrentThread();
+
+ _pipeReader = PipeReader.Create(_pipeServer);
+ _input = new AsyncPipeReaderInput(_options.BufferWriterChunkSize * 2, multiMessage: true);
+ _cts = new CancellationTokenSource();
+
+ // Drain task: pumps PipeReader → input.Feed forever (or until cancel). Single Task.Run for
+ // the full benchmark lifetime — its overhead is amortised across all messages.
+ _drainTask = Task.Run(() => _input.DrainFromAsync(_pipeReader, _cts.Token));
+
+ // Consumer task: per-iter Deserialize(input) loop. Started here once; signaled per-iter via
+ // _consumeRequest. Enables Ser↔Des streaming overlap — calling thread runs SerializeChunkedFramed
+ // while THIS task simultaneously runs Deserialize, both consuming/producing through the
+ // sliding-window buffer pipelined by the drain task.
+ _consumerTask = Task.Run(ConsumeLoop);
+
+ var afterDes = GC.GetAllocatedBytesForCurrentThread();
+ SetupDeserializeAllocBytes = afterDes - beforeDes;
+ }
+
+ // BG consumer: parks on _consumeRequest, runs Deserialize(_input) when signaled, signals _consumeDone.
+ // The Deserialize call internally blocks on the input's MRES whenever the drain hasn't yet fed enough
+ // bytes for the next read — that's where the streaming-pipeline overlap with the calling thread (Ser)
+ // happens.
+ private void ConsumeLoop()
+ {
+ var ct = _cts.Token;
+ try
+ {
+ while (true)
+ {
+ _consumeRequest.Wait(ct);
+ if (ct.IsCancellationRequested) return;
+ _consumeRequest.Reset();
+
+ try
+ {
+ var result = AcBinaryDeserializer.Deserialize(_input, _options);
+ if (_captureResult) _lastResult = result;
+ }
+ catch
+ {
+ // Swallow — calling thread sees the failure via missing/incorrect _lastResult during VerifyRoundTrip,
+ // or the benchmark loop just continues (timing impacted). Production teardown handled in Dispose.
+ }
+ finally
+ {
+ _consumeDone.Set();
+ }
+ }
+ }
+ catch (OperationCanceledException)
+ {
+ // Cooperative cancel — Dispose path. Swallow.
+ }
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ public void Serialize()
+ {
+ // 2-task streaming pipeline:
+ // 1. Calling thread signals consumer task to begin Deserialize(input). Consumer immediately
+ // starts; first read blocks on input's MRES because no bytes flowed yet.
+ // 2. Calling thread starts SerializeChunkedFramed → chunks flow through PipeWriter → kernel pipe →
+ // drain task (BG) feeds input.Feed → MRES pulses → consumer's Deserialize consumes bytes
+ // chunk by chunk. Ser↔Des truly overlap here.
+ // 3. Calling thread waits for _consumeDone (signaling Deserialize returned).
+ _consumeDone.Reset();
+ _consumeRequest.Set();
+
+ AcBinarySerializer.SerializeChunkedFramed(_order, _pipeWriter, _options);
+
+ _consumeDone.Wait();
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ public void Deserialize()
+ {
+ // No-op: per-iter round-trip is captured in Serialize(). See IsRoundTripOnly contract.
+ }
+
+ public bool VerifyRoundTrip()
+ {
+ // Use the same 2-task streaming path as the benchmark, but capture the result for graph-equality.
+ _captureResult = true;
+ try
+ {
+ Serialize();
+ var result = _lastResult as TestOrder;
+ return result != null && BenchmarkLoop.DeepEqualsViaJson(_order, result);
+ }
+ finally
+ {
+ _captureResult = false;
+ _lastResult = null;
+ }
+ }
+
+ public void Dispose()
+ {
+ if (_disposed) return;
+ _disposed = true;
+
+ // Cancel drain + consumer tasks → both exit. Pulse _consumeRequest in case consumer is parked.
+ try { _cts.Cancel(); } catch { /* swallow on teardown */ }
+ try { _consumeRequest.Set(); } catch { /* nudge in case consumer Wait is parked */ }
+ try { _drainTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
+ try { _consumerTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
+
+ // Complete writer + dispose pipe lifecycle.
+ try { _pipeWriter.CompleteAsync().AsTask().Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
+ try { _pipeReader.Complete(); } catch { /* swallow on teardown */ }
+ try { _pipeClient.Dispose(); } catch { /* swallow on teardown */ }
+ try { _pipeServer.Dispose(); } catch { /* swallow on teardown */ }
+ try { _input.Dispose(); } catch { /* swallow on teardown */ }
+ try { _consumeRequest.Dispose(); } catch { /* swallow on teardown */ }
+ try { _consumeDone.Dispose(); } catch { /* swallow on teardown */ }
+ try { _cts.Dispose(); } catch { /* swallow on teardown */ }
+ }
+}
diff --git a/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryNamedPipeRawByteArrayBenchmark.cs b/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryNamedPipeRawByteArrayBenchmark.cs
new file mode 100644
index 0000000..d6b49ec
--- /dev/null
+++ b/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryNamedPipeRawByteArrayBenchmark.cs
@@ -0,0 +1,213 @@
+using AyCode.Core.Serializers.Binaries;
+using AyCode.Core.Tests.TestModels;
+using System.IO.Pipes;
+using System.Runtime.CompilerServices;
+
+namespace AyCode.Core.Serializers.Console.Benchmarks;
+
+///
+/// Raw byte[] over a long-lived NamedPipe — NO chunk-framing, NO AsyncPipeReaderInput,
+/// NO sliding-window buffer. Calling thread serialises + writes; a long-lived background consumer task
+/// reads and deserialises. Two-task pattern enables Ser↔Read overlap (kernel-pipe-pipelined) AND
+/// avoids the kernel-buffer-full deadlock when bytes.Length > inBufferSize.
+///
+/// Side-by-side with (chunked-framed AsyncPipe stack) this
+/// isolates two cost components on the SAME kernel-pipe transport with the SAME inBufferSize:
+///
+/// - This row vs (Byte[]) — pure kernel-NamedPipe
+/// overhead (WriteFile / ReadFile syscalls + IRP queueing + buffer-copy + thread-handoff).
+/// - This row vs (chunked-framed) — pure
+/// AsyncPipe-framework overhead (chunk header writes + sliding-window Feed + MRES wait inside
+/// AsyncPipeReaderInput) AND the streaming-pipeline benefit of intra-message Ser↔Des overlap (which
+/// raw lacks — raw can only Ser↔Read overlap, with Des sequential after Read completes).
+///
+/// Per-iter byte[] allocation from AcBinarySerializer.Serialize is part of the cost (matches
+/// 's API contract); the receive-side scratch buffer is also allocated per-iter
+/// on the consumer-task (counted via GC.GetTotalAllocatedBytes in BenchmarkLoop.MeasureAllocationTotal).
+///
+internal sealed class AcBinaryNamedPipeRawByteArrayBenchmark : ISerializerBenchmark, IDisposable
+{
+ private readonly TestOrder _order;
+ private readonly AcBinarySerializerOptions _options;
+ private readonly byte[] _serialized; // for SerializedSize reporting + receive-side size known upfront
+
+ // Long-lived pipe lifecycle (set up once in ctor — NOT timed).
+ private readonly NamedPipeServerStream _pipeServer;
+ private readonly NamedPipeClientStream _pipeClient;
+
+ // Long-lived consumer-task infrastructure (Read + Deserialize on BG thread, signaled per iter).
+ // Mirrors AcBinaryNamedPipeBenchmark's drain+consumer pair, but raw byte[] doesn't have an
+ // intermediate sliding-window buffer, so Read+Des happen sequentially in one BG task: Read N bytes
+ // → Deserialize(bytes) → signal done. Calling thread's Ser↔Write overlaps with this BG Read+Des
+ // through kernel-pipe pipelining.
+ private readonly CancellationTokenSource _cts;
+ private readonly Task _consumerTask;
+ private readonly ManualResetEventSlim _consumeRequest = new(false);
+ private readonly ManualResetEventSlim _consumeDone = new(false);
+ private int _pendingReadSize;
+ private object? _lastResult; // captured during VerifyRoundTrip; null in benchmark iters
+ private bool _captureResult; // toggle: when true, ConsumerLoop stores result; otherwise discards
+ private bool _disposed;
+
+ public string Engine => Configuration.EngineAcBinary;
+ public string IoMode => Configuration.IoNamedPipeRaw;
+ public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime;
+ public string OptionsPreset { get; }
+ public int SerializedSize => _serialized.Length;
+ public long SetupSerializeAllocBytes { get; }
+ public long SetupDeserializeAllocBytes { get; }
+ public bool IsRoundTripOnly => true;
+ public string OptionsDescription => BenchmarkOptions.BuildAcBinary(_options, $", BufferSize={_options.BufferWriterChunkSize}B, Transport=NamedPipe(raw,2-task)");
+
+ public AcBinaryNamedPipeRawByteArrayBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset)
+ {
+ _order = order;
+ // BufferWriterChunkSize comes from the caller — same source-of-truth contract as
+ // AcBinaryNamedPipeBenchmark. The kernel pipe-buffer (inBufferSize) is wired to it so the
+ // raw-vs-chunked comparison runs on identical transport conditions.
+ _options = options;
+ OptionsPreset = optionsPreset;
+
+ _serialized = AcBinarySerializer.Serialize(order, _options);
+
+ var pipeName = $"AcBinaryBenchRaw-{Guid.NewGuid():N}";
+
+ // === SERIALIZE-side setup measurement ===
+ // pipe-pair (server + client) + connect handshake. NO PipeWriter wrapper — we use the raw
+ // Stream.Write API directly, matching the no-framing semantics of this benchmark.
+ GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
+ var beforeSer = GC.GetAllocatedBytesForCurrentThread();
+ _pipeServer = new NamedPipeServerStream(pipeName, PipeDirection.In, 1, PipeTransmissionMode.Byte,
+ System.IO.Pipes.PipeOptions.Asynchronous,
+ inBufferSize: _options.BufferWriterChunkSize,
+ outBufferSize: _options.BufferWriterChunkSize);
+ _pipeClient = new NamedPipeClientStream(".", pipeName, PipeDirection.Out, System.IO.Pipes.PipeOptions.Asynchronous);
+
+ var serverWait = _pipeServer.WaitForConnectionAsync();
+ _pipeClient.Connect();
+ serverWait.GetAwaiter().GetResult();
+ var afterSer = GC.GetAllocatedBytesForCurrentThread();
+ SetupSerializeAllocBytes = afterSer - beforeSer;
+
+ // === DESERIALIZE-side setup measurement ===
+ // 1× background consumer-task + 2× MRES (request / done) + cancellation source. Matches the
+ // chunked benchmark's deserialize-side setup cost shape.
+ GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
+ var beforeDes = GC.GetAllocatedBytesForCurrentThread();
+ _cts = new CancellationTokenSource();
+ _consumerTask = Task.Run(ConsumerLoop);
+ var afterDes = GC.GetAllocatedBytesForCurrentThread();
+ SetupDeserializeAllocBytes = afterDes - beforeDes;
+ }
+
+ // BG consumer: parks on _consumeRequest, reads N bytes from pipe, runs Deserialize(bytes), signals
+ // _consumeDone. The Read overlaps with the calling thread's Write through the kernel-pipe; Des happens
+ // sequentially after Read completes (raw byte[] needs the full message to deserialize).
+ private void ConsumerLoop()
+ {
+ var ct = _cts.Token;
+ try
+ {
+ while (true)
+ {
+ _consumeRequest.Wait(ct);
+ if (ct.IsCancellationRequested) return;
+ _consumeRequest.Reset();
+
+ try
+ {
+ var size = _pendingReadSize;
+ var bytes = new byte[size]; // per-iter alloc — counted by BenchmarkLoop.MeasureAllocationTotal
+ var totalRead = 0;
+ while (totalRead < size)
+ {
+ var n = _pipeServer.Read(bytes, totalRead, size - totalRead);
+ if (n == 0) break; // pipe closed / EOF — partial read swallowed
+ totalRead += n;
+ }
+ var result = AcBinaryDeserializer.Deserialize(bytes, _options);
+ if (_captureResult) _lastResult = result;
+ }
+ catch
+ {
+ // Swallow — calling thread sees the failure via missing/incorrect _lastResult during VerifyRoundTrip,
+ // or the benchmark loop just continues (timing impacted). Production teardown handled in Dispose.
+ }
+ finally
+ {
+ _consumeDone.Set();
+ }
+ }
+ }
+ catch (OperationCanceledException)
+ {
+ // Cooperative cancel — Dispose path. Swallow.
+ }
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ public void Serialize()
+ {
+ // 2-task streaming pipeline:
+ // 1. Calling thread serialises → fresh byte[] (per-iter alloc, matches AcBinaryBenchmark contract).
+ // 2. Calling thread hands off expected size + signals consumer task. Consumer task starts Read loop
+ // on the pipe (BG thread). Calling thread proceeds to Write the bytes — Read and Write overlap
+ // through the kernel-pipe (kernel buffer fills, drains as consumer reads, sender resumes).
+ // 3. Calling thread waits for _consumeDone (consumer task finished Read+Des).
+ //
+ // Note: unlike chunked, raw byte[] cannot do Ser↔Des overlap (Des needs the full bytes before
+ // starting). Only Write↔Read overlaps here. The Des sequence on BG thread is: Read full bytes →
+ // Des the full graph → signal done. This is the architectural difference between raw and chunked.
+ var bytes = AcBinarySerializer.Serialize(_order, _options);
+
+ _pendingReadSize = bytes.Length;
+ _consumeDone.Reset();
+ _consumeRequest.Set();
+
+ _pipeClient.Write(bytes, 0, bytes.Length);
+ _pipeClient.Flush();
+
+ _consumeDone.Wait();
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ public void Deserialize()
+ {
+ // No-op: per-iter round-trip is captured in Serialize(). See IsRoundTripOnly contract.
+ }
+
+ public bool VerifyRoundTrip()
+ {
+ // Use the same 2-task streaming path as the benchmark, but capture the result for graph-equality.
+ _captureResult = true;
+ try
+ {
+ Serialize();
+ var result = _lastResult as TestOrder;
+ return result != null && BenchmarkLoop.DeepEqualsViaJson(_order, result);
+ }
+ finally
+ {
+ _captureResult = false;
+ _lastResult = null;
+ }
+ }
+
+ public void Dispose()
+ {
+ if (_disposed) return;
+ _disposed = true;
+
+ // Cancel the consumer task → ConsumerLoop exits its Wait via OperationCanceledException.
+ try { _cts.Cancel(); } catch { /* swallow on teardown */ }
+ try { _consumeRequest.Set(); } catch { /* nudge in case consumer Wait is parked */ }
+ try { _consumerTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
+
+ // Symmetric teardown — close client first (writer side), then server.
+ try { _pipeClient.Dispose(); } catch { /* swallow on teardown */ }
+ try { _pipeServer.Dispose(); } catch { /* swallow on teardown */ }
+ try { _consumeRequest.Dispose(); } catch { /* swallow on teardown */ }
+ try { _consumeDone.Dispose(); } catch { /* swallow on teardown */ }
+ try { _cts.Dispose(); } catch { /* swallow on teardown */ }
+ }
+}
diff --git a/AyCode.Core.Serializers.Console/Benchmarks/MemoryPackBufferWriterBenchmark.cs b/AyCode.Core.Serializers.Console/Benchmarks/MemoryPackBufferWriterBenchmark.cs
new file mode 100644
index 0000000..0320e6f
--- /dev/null
+++ b/AyCode.Core.Serializers.Console/Benchmarks/MemoryPackBufferWriterBenchmark.cs
@@ -0,0 +1,63 @@
+using AyCode.Core.Tests.TestModels;
+using MemoryPack;
+using System.Buffers;
+using System.Runtime.CompilerServices;
+
+namespace AyCode.Core.Serializers.Console.Benchmarks;
+
+///
+/// Benchmarks MemoryPack via the IBufferWriter overload with a pre-allocated, reused ArrayBufferWriter.
+/// Apples-to-apples counterpart to — MemoryPack's IBufferWriter
+/// is the path it's designed for.
+///
+internal sealed class MemoryPackBufferWriterBenchmark : ISerializerBenchmark
+{
+ private readonly TestOrder _order;
+ private readonly MemoryPackSerializerOptions _options;
+ private readonly byte[] _serialized;
+ private readonly ArrayBufferWriter _bufferWriter;
+
+ public string Engine => Configuration.EngineMemoryPack;
+ public string IoMode => Configuration.IoBufWrReuse;
+ public string DispatchMode => Configuration.ModeSGen; // MemoryPack always uses [MemoryPackable] source-generated formatters
+ public string OptionsPreset { get; }
+ public int SerializedSize => _serialized.Length;
+ public long SetupSerializeAllocBytes { get; }
+ public long SetupDeserializeAllocBytes => 0;
+ public string? OptionsDescription => $"StringEncoding={_options.StringEncoding}";
+
+ public MemoryPackBufferWriterBenchmark(TestOrder order, string optionsPreset)
+ {
+ _order = order;
+ OptionsPreset = optionsPreset;
+ _options = BenchmarkOptions.GetMemPack();
+ _serialized = MemoryPackSerializer.Serialize(order, _options);
+
+ // Serialize-side setup only — see AcBinaryBufferWriterBenchmark for the full rationale.
+ GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
+ var beforeSetup = GC.GetAllocatedBytesForCurrentThread();
+ _bufferWriter = new ArrayBufferWriter(_serialized.Length * 2);
+ var afterSetup = GC.GetAllocatedBytesForCurrentThread();
+ SetupSerializeAllocBytes = afterSetup - beforeSetup;
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ public void Serialize()
+ {
+ _bufferWriter.ResetWrittenCount();
+ MemoryPackSerializer.Serialize(_bufferWriter, _order, _options);
+ }
+
+ // BufWr semantic: read from a ReadOnlySequence overload (apples-to-apples with AcBinary's
+ // BufWr Deser path). MemoryPack's ROS overload also single-segment-fast-paths internally.
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ public void Deserialize() => MemoryPackSerializer.Deserialize(new ReadOnlySequence(_serialized), _options);
+
+ public bool VerifyRoundTrip()
+ {
+ _bufferWriter.ResetWrittenCount();
+ MemoryPackSerializer.Serialize(_bufferWriter, _order, _options);
+ var roundTripped = MemoryPackSerializer.Deserialize(new ReadOnlySequence(_bufferWriter.WrittenMemory), _options);
+ return BenchmarkLoop.DeepEqualsViaJson(_order, roundTripped);
+ }
+}
diff --git a/AyCode.Core.Serializers.Console/Benchmarks/MemoryPackFreshBufferWriterBenchmark.cs b/AyCode.Core.Serializers.Console/Benchmarks/MemoryPackFreshBufferWriterBenchmark.cs
new file mode 100644
index 0000000..032f21a
--- /dev/null
+++ b/AyCode.Core.Serializers.Console/Benchmarks/MemoryPackFreshBufferWriterBenchmark.cs
@@ -0,0 +1,54 @@
+using AyCode.Core.Tests.TestModels;
+using MemoryPack;
+using System.Buffers;
+using System.Runtime.CompilerServices;
+
+namespace AyCode.Core.Serializers.Console.Benchmarks;
+
+///
+/// Benchmarks MemoryPack via the IBufferWriter overload, allocating a FRESH ArrayBufferWriter on EVERY call.
+/// Apples-to-apples counterpart to .
+///
+internal sealed class MemoryPackFreshBufferWriterBenchmark : ISerializerBenchmark
+{
+ private readonly TestOrder _order;
+ private readonly MemoryPackSerializerOptions _options;
+ private readonly byte[] _serialized;
+
+ public string Engine => Configuration.EngineMemoryPack;
+ public string IoMode => Configuration.IoBufWrNew;
+ public string DispatchMode => Configuration.ModeSGen; // MemoryPack always uses [MemoryPackable] source-generated formatters
+ public string OptionsPreset { get; }
+ public int SerializedSize => _serialized.Length;
+ public long SetupSerializeAllocBytes => 0;
+ public long SetupDeserializeAllocBytes => 0;
+ public string? OptionsDescription => $"StringEncoding={_options.StringEncoding}";
+
+ public MemoryPackFreshBufferWriterBenchmark(TestOrder order, string optionsPreset)
+ {
+ _order = order;
+ OptionsPreset = optionsPreset;
+ _options = BenchmarkOptions.GetMemPack();
+ _serialized = MemoryPackSerializer.Serialize(order, _options);
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ public void Serialize()
+ {
+ var abw = new ArrayBufferWriter();
+ MemoryPackSerializer.Serialize(abw, _order, _options);
+ }
+
+ // BufWr semantic: read from a ReadOnlySequence overload (apples-to-apples with AcBinary's
+ // BufWr Deser path). MemoryPack's ROS overload also single-segment-fast-paths internally.
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ public void Deserialize() => MemoryPackSerializer.Deserialize(new ReadOnlySequence(_serialized), _options);
+
+ public bool VerifyRoundTrip()
+ {
+ var abw = new ArrayBufferWriter();
+ MemoryPackSerializer.Serialize(abw, _order, _options);
+ var roundTripped = MemoryPackSerializer.Deserialize(new ReadOnlySequence(abw.WrittenMemory), _options);
+ return BenchmarkLoop.DeepEqualsViaJson(_order, roundTripped);
+ }
+}
diff --git a/AyCode.Core.Serializers.Console/Program.cs b/AyCode.Core.Serializers.Console/Program.cs
index 19501ed..77d86d6 100644
--- a/AyCode.Core.Serializers.Console/Program.cs
+++ b/AyCode.Core.Serializers.Console/Program.cs
@@ -576,1014 +576,7 @@ private static List RunBenchmarksForTestData(TestDataSet testDa
#endregion
- #region Serializer Implementations
-
- ///
- /// Benchmarks AcBinary via the IBufferWriter overload with a pre-allocated, reused ArrayBufferWriter.
- /// Realistic IBufferWriter usage pattern: caller owns + reuses the writer (zero alloc per call after warmup).
- ///
- ///
- /// Benchmarks AcBinary via the IBufferWriter overload, allocating a FRESH ArrayBufferWriter on EVERY call.
- /// One-shot scenario — represents code that doesn't reuse a writer across calls.
- /// Uses BufferWriterChunkSize=4096 (production-realistic, SignalR-aligned) instead of the 65535 default —
- /// otherwise AcBinary would request 64KB upfront via GetSpan(), forcing the fresh ABW to allocate 64KB
- /// regardless of payload size (heavy over-allocation for small payloads).
- ///
- internal sealed class AcBinaryFreshBufferWriterBenchmark : ISerializerBenchmark
- {
- private readonly TestOrder _order;
- private readonly AcBinarySerializerOptions _options;
- private readonly byte[] _serialized;
-
- public string Engine => Configuration.EngineAcBinary;
- public string IoMode => Configuration.IoBufWrNew;
- public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime;
- public string OptionsPreset { get; }
- public int SerializedSize => _serialized.Length;
- public long SetupSerializeAllocBytes => 0;
- public long SetupDeserializeAllocBytes => 0;
- public string OptionsDescription => BenchmarkOptions.BuildAcBinary(_options, $", BufferSize={_options.BufferWriterChunkSize}B");
-
- public AcBinaryFreshBufferWriterBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset)
- {
- _order = order;
- // BufferWriterChunkSize comes from the caller (central source of truth in CreateSerializers
- // — the binaryFastMode4KbChunk options instance). Do NOT mutate _options here; tune the chunk
- // size in CreateSerializers only.
- _options = options;
- OptionsPreset = optionsPreset;
- _serialized = AcBinarySerializer.Serialize(order, _options);
- }
-
- [MethodImpl(MethodImplOptions.NoInlining)]
- public void Serialize()
- {
- var abw = new ArrayBufferWriter(); // FRESH every call — alloc + grow as needed
- AcBinarySerializer.Serialize(_order, abw, _options);
- }
-
- // BufWr semantic: read from a ReadOnlySequence (the ROS overload), NOT from byte[] —
- // single-segment array-backed sequence triggers the fast-path in AcBinaryDeserializer.cs:298 which
- // redirects to the byte[] overload. This means the bench actually exercises the ROS-input path
- // (the production-realistic surface for SignalR / Pipe consumers) rather than secretly testing
- // byte[] Deser under the BufWr label.
- [MethodImpl(MethodImplOptions.NoInlining)]
- public void Deserialize() => AcBinaryDeserializer.Deserialize(new ReadOnlySequence(_serialized), _options);
-
- public bool VerifyRoundTrip()
- {
- var abw = new ArrayBufferWriter();
- AcBinarySerializer.Serialize(_order, abw, _options);
- var roundTripped = AcBinaryDeserializer.Deserialize(new ReadOnlySequence(abw.WrittenMemory), _options);
- return BenchmarkLoop.DeepEqualsViaJson(_order, roundTripped);
- }
- }
-
- ///
- /// Benchmarks AcBinary over a long-lived NamedPipe IPC connection using the AcBinary native streaming API
- /// (
- /// + + ).
- /// Mirrors what a real consumer (e.g. DeserializeFromPipeReaderAsync) does per message:
- /// long-lived with multi-message wire framing on top of a long-lived NamedPipe.
- ///
- /// Architecture:
- ///
- /// - Constructor (NOT timed): sets up + ,
- /// waits for connection, creates one long-lived /
- /// pair, ONE long-lived
- /// in multiMessage = true mode, ONE drain Task that pumps
- /// forever, and ONE deserialize Task that loops AcBinaryDeserializer.Deserialize<T>(input, opts)
- /// producing into a .
- /// - Per-iteration (timed): sender writes via
- ///
- /// — multi-message wire ([201][UINT16][data]...[202]); the [202] end marker arms the input's
- /// _readPos = -1 sentinel, so the next message's first AppendToBuffer recycles the buffer to 0.
- /// Then receiver awaits the channel for the deserialized result.
- /// - is a no-op (full round-trip captured in );
- /// =true → Ser ms / SerAlloc oszlopok N/A, RT ms = full round-trip.
- ///
- ///
- /// Per-iter overhead: 0 new Task.Run, 0 new AsyncPipeReaderInput, 0 new CancellationTokenSource.
- /// Pure cost = SerializeChunkedFramed (CPU + chunk-onkénti flush) + kernel write/read syscalls + 1 sync barrier
- /// (channel) + deserialized graph alloc. The "multi-message reuse" pattern enabled by Q4T8 fix (R5K2 minimum: _readPos = -1
- /// sentinel + AppendToBuffer sliding-window cycling).
- ///
- /// Approximation note: single-process loopback NamedPipe. Real cross-process / cross-machine SignalR
- /// adds further transport latency (TCP, WebSocket framing) on top. The benchmark gives a lower bound.
- ///
- internal sealed class AcBinaryNamedPipeBenchmark : ISerializerBenchmark, IDisposable
- {
- private readonly TestOrder _order;
- private readonly AcBinarySerializerOptions _options;
- private readonly byte[] _serialized; // for SerializedSize reporting only
-
- // Long-lived pipe lifecycle (set up once in ctor — NOT timed).
- private readonly NamedPipeServerStream _pipeServer;
- private readonly NamedPipeClientStream _pipeClient;
- private readonly PipeWriter _pipeWriter;
- private readonly PipeReader _pipeReader;
-
- // Long-lived multi-message receive infrastructure (set up once in ctor).
- private readonly AsyncPipeReaderInput _input;
- private readonly CancellationTokenSource _cts;
- private readonly Task _drainTask; // BG: PipeReader → input.Feed (continuous pump)
- private readonly Task _consumerTask; // BG: per-iter Deserialize(input) loop, signaled by calling thread
- private readonly ManualResetEventSlim _consumeRequest = new(false);
- private readonly ManualResetEventSlim _consumeDone = new(false);
- private object? _lastResult; // captured during VerifyRoundTrip; null in benchmark iters
- private bool _captureResult; // toggle: when true, ConsumeLoop stores result; otherwise discards
- private bool _disposed;
-
- public string Engine => Configuration.EngineAcBinary;
- public string IoMode => Configuration.IoNamedPipe;
- public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime;
- public string OptionsPreset { get; }
- public int SerializedSize => _serialized.Length;
- public long SetupSerializeAllocBytes { get; }
- public long SetupDeserializeAllocBytes { get; }
- public bool IsRoundTripOnly => true;
- public string OptionsDescription => BenchmarkOptions.BuildAcBinary(_options, $", BufferSize={_options.BufferWriterChunkSize}B, Transport=NamedPipe(long-lived,multiMessage,2-task)");
-
- public AcBinaryNamedPipeBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset)
- {
- _order = order;
- // BufferWriterChunkSize comes from the caller (central source of truth in CreateSerializers
- // — the binaryFastMode4KbChunk options instance). Do NOT mutate _options here; tune the chunk
- // size in CreateSerializers only.
- _options = options;
- OptionsPreset = optionsPreset;
-
- _serialized = AcBinarySerializer.Serialize(order, _options);
-
- // 1× pipe setup. Kernel-side pipe buffer (inBufferSize / outBufferSize on the server ctor — the
- // client inherits the server-defined buffer size at connect time) matches BufferWriterChunkSize
- // exactly: AsyncPipeWriterOutput now treats chunkSize as the chunk-on-wire total size (header +
- // data), so one WriteFile(chunkSize) syscall lands in exactly one kernel-page slot — page-aligned,
- // no fragmentation, no IRP reordering. _options.BufferWriterChunkSize is the single tunable source.
- var pipeName = $"AcBinaryBench-{Guid.NewGuid():N}";
-
- // === SERIALIZE-side setup measurement ===
- // pipe-pair (server + client) + connect handshake + writer-side PipeWriter wrapper.
- GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
- var beforeSer = GC.GetAllocatedBytesForCurrentThread();
-
- _pipeServer = new NamedPipeServerStream(pipeName, PipeDirection.In, 1, PipeTransmissionMode.Byte,
- System.IO.Pipes.PipeOptions.Asynchronous,
- inBufferSize: _options.BufferWriterChunkSize,
- outBufferSize: _options.BufferWriterChunkSize);
-
- _pipeClient = new NamedPipeClientStream(".", pipeName, PipeDirection.Out, System.IO.Pipes.PipeOptions.Asynchronous);
-
- var serverWait = _pipeServer.WaitForConnectionAsync();
- _pipeClient.Connect();
- serverWait.GetAwaiter().GetResult();
-
- _pipeWriter = PipeWriter.Create(_pipeClient);
- var afterSer = GC.GetAllocatedBytesForCurrentThread();
- SetupSerializeAllocBytes = afterSer - beforeSer;
-
- // === DESERIALIZE-side setup measurement ===
- // PipeReader wrapper + AsyncPipeReaderInput (ArrayPool rent + ManualResetEventSlim) + drain
- // task + consumer task scaffolding. Two long-lived BG tasks total: drain pumps bytes from the
- // kernel pipe into input; consumer drives Deserialize(input) per iter on signal.
- GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
- var beforeDes = GC.GetAllocatedBytesForCurrentThread();
-
- _pipeReader = PipeReader.Create(_pipeServer);
- _input = new AsyncPipeReaderInput(_options.BufferWriterChunkSize * 2, multiMessage: true);
- _cts = new CancellationTokenSource();
-
- // Drain task: pumps PipeReader → input.Feed forever (or until cancel). Single Task.Run for
- // the full benchmark lifetime — its overhead is amortised across all messages.
- _drainTask = Task.Run(() => _input.DrainFromAsync(_pipeReader, _cts.Token));
-
- // Consumer task: per-iter Deserialize(input) loop. Started here once; signaled per-iter via
- // _consumeRequest. Enables Ser↔Des streaming overlap — calling thread runs SerializeChunkedFramed
- // while THIS task simultaneously runs Deserialize, both consuming/producing through the
- // sliding-window buffer pipelined by the drain task.
- _consumerTask = Task.Run(ConsumeLoop);
-
- var afterDes = GC.GetAllocatedBytesForCurrentThread();
- SetupDeserializeAllocBytes = afterDes - beforeDes;
- }
-
- // BG consumer: parks on _consumeRequest, runs Deserialize(_input) when signaled, signals _consumeDone.
- // The Deserialize call internally blocks on the input's MRES whenever the drain hasn't yet fed enough
- // bytes for the next read — that's where the streaming-pipeline overlap with the calling thread (Ser)
- // happens.
- private void ConsumeLoop()
- {
- var ct = _cts.Token;
- try
- {
- while (true)
- {
- _consumeRequest.Wait(ct);
- if (ct.IsCancellationRequested) return;
- _consumeRequest.Reset();
-
- try
- {
- var result = AcBinaryDeserializer.Deserialize(_input, _options);
- if (_captureResult) _lastResult = result;
- }
- catch
- {
- // Swallow — calling thread sees the failure via missing/incorrect _lastResult during VerifyRoundTrip,
- // or the benchmark loop just continues (timing impacted). Production teardown handled in Dispose.
- }
- finally
- {
- _consumeDone.Set();
- }
- }
- }
- catch (OperationCanceledException)
- {
- // Cooperative cancel — Dispose path. Swallow.
- }
- }
-
- [MethodImpl(MethodImplOptions.NoInlining)]
- public void Serialize()
- {
- // 2-task streaming pipeline:
- // 1. Calling thread signals consumer task to begin Deserialize(input). Consumer immediately
- // starts; first read blocks on input's MRES because no bytes flowed yet.
- // 2. Calling thread starts SerializeChunkedFramed → chunks flow through PipeWriter → kernel pipe →
- // drain task (BG) feeds input.Feed → MRES pulses → consumer's Deserialize consumes bytes
- // chunk by chunk. Ser↔Des truly overlap here.
- // 3. Calling thread waits for _consumeDone (signaling Deserialize returned).
- _consumeDone.Reset();
- _consumeRequest.Set();
-
- AcBinarySerializer.SerializeChunkedFramed(_order, _pipeWriter, _options);
-
- _consumeDone.Wait();
- }
-
- [MethodImpl(MethodImplOptions.NoInlining)]
- public void Deserialize()
- {
- // No-op: per-iter round-trip is captured in Serialize(). See IsRoundTripOnly contract.
- }
-
- public bool VerifyRoundTrip()
- {
- // Use the same 2-task streaming path as the benchmark, but capture the result for graph-equality.
- _captureResult = true;
- try
- {
- Serialize();
- var result = _lastResult as TestOrder;
- return result != null && BenchmarkLoop.DeepEqualsViaJson(_order, result);
- }
- finally
- {
- _captureResult = false;
- _lastResult = null;
- }
- }
-
- public void Dispose()
- {
- if (_disposed) return;
- _disposed = true;
-
- // Cancel drain + consumer tasks → both exit. Pulse _consumeRequest in case consumer is parked.
- try { _cts.Cancel(); } catch { /* swallow on teardown */ }
- try { _consumeRequest.Set(); } catch { /* nudge in case consumer Wait is parked */ }
- try { _drainTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
- try { _consumerTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
-
- // Complete writer + dispose pipe lifecycle.
- try { _pipeWriter.CompleteAsync().AsTask().Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
- try { _pipeReader.Complete(); } catch { /* swallow on teardown */ }
- try { _pipeClient.Dispose(); } catch { /* swallow on teardown */ }
- try { _pipeServer.Dispose(); } catch { /* swallow on teardown */ }
- try { _input.Dispose(); } catch { /* swallow on teardown */ }
- try { _consumeRequest.Dispose(); } catch { /* swallow on teardown */ }
- try { _consumeDone.Dispose(); } catch { /* swallow on teardown */ }
- try { _cts.Dispose(); } catch { /* swallow on teardown */ }
- }
- }
-
- ///
- /// Same chunked-framed AsyncPipe code path as , but the transport
- /// is an in-memory instead of a kernel NamedPipe. The Pipe's
- /// Writer/Reader pair is a managed-only zero-copy slab handoff — no syscalls, no kernel
- /// buffer copy, no IRP queueing.
- ///
- /// Why this benchmark matters: by holding ALL other variables constant (same SerializeChunkedFramed,
- /// same AsyncPipeReaderInput, same drain task, same consumer task, same multi-message wire format), this
- /// row isolates the kernel-NamedPipe transport overhead from the chunked-streaming framework's pure
- /// CPU cost. The expected delta vs : per-chunk overhead drops from
- /// ~25-30 µs (kernel-syscall pair + IRP) to ~1-2 µs (managed slab handoff). Multi-chunk Large-message rows
- /// should converge dramatically toward .
- ///
- /// Real-world relevance: in-memory Pipe is the typical primitive used for cross-thread serializer
- /// pipelines inside a single process (e.g. SignalR's Kestrel transport adapter, gRPC framework internals,
- /// custom message brokers). The numbers from this row reflect that scenario, NOT the kernel-pipe loopback
- /// of the NamedPipe benchmark.
- ///
- internal sealed class AcBinaryInMemoryPipeBenchmark : ISerializerBenchmark, IDisposable
- {
- private readonly TestOrder _order;
- private readonly AcBinarySerializerOptions _options;
- private readonly byte[] _serialized; // for SerializedSize reporting only
-
- // Long-lived in-memory pipe lifecycle (set up once in ctor — NOT timed).
- private readonly Pipe _pipe;
- private readonly PipeWriter _pipeWriter;
- private readonly PipeReader _pipeReader;
-
- // Long-lived multi-message receive infrastructure (set up once in ctor) — same pattern as the NamedPipe
- // variant: drain pumps reader into AsyncPipeReaderInput, consumer task drives Deserialize(input).
- private readonly AsyncPipeReaderInput _input;
- private readonly CancellationTokenSource _cts;
- private readonly Task _drainTask;
- private readonly Task _consumerTask;
- private readonly ManualResetEventSlim _consumeRequest = new(false);
- private readonly ManualResetEventSlim _consumeDone = new(false);
- private object? _lastResult;
- private bool _captureResult;
- private bool _disposed;
-
- public string Engine => Configuration.EngineAcBinary;
- public string IoMode => Configuration.IoInMemoryPipe;
- public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime;
- public string OptionsPreset { get; }
- public int SerializedSize => _serialized.Length;
- public long SetupSerializeAllocBytes { get; }
- public long SetupDeserializeAllocBytes { get; }
- public bool IsRoundTripOnly => true;
- public string OptionsDescription => BenchmarkOptions.BuildAcBinary(_options, $", BufferSize={_options.BufferWriterChunkSize}B, Transport=Pipe(in-memory,multiMessage,2-task)");
-
- public AcBinaryInMemoryPipeBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset)
- {
- _order = order;
- _options = options;
- OptionsPreset = optionsPreset;
-
- _serialized = AcBinarySerializer.Serialize(order, _options);
-
- // === SERIALIZE-side setup measurement ===
- // In-memory Pipe construction. NO kernel-pipe pair, NO Connect handshake — just a managed Pipe object
- // and a reference to its Writer side. PipeWriterImpl (parallel-flush capable, NOT StreamPipeWriter).
- GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
- var beforeSer = GC.GetAllocatedBytesForCurrentThread();
- _pipe = new Pipe();
- _pipeWriter = _pipe.Writer;
- var afterSer = GC.GetAllocatedBytesForCurrentThread();
- SetupSerializeAllocBytes = afterSer - beforeSer;
-
- // === DESERIALIZE-side setup measurement ===
- // PipeReader reference + AsyncPipeReaderInput (ArrayPool rent + ManualResetEventSlim) + drain task +
- // consumer task scaffolding. Identical to the NamedPipe variant on the receive side.
- GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
- var beforeDes = GC.GetAllocatedBytesForCurrentThread();
-
- _pipeReader = _pipe.Reader;
- _input = new AsyncPipeReaderInput(_options.BufferWriterChunkSize * 2, multiMessage: true);
- _cts = new CancellationTokenSource();
- _drainTask = Task.Run(() => _input.DrainFromAsync(_pipeReader, _cts.Token));
- _consumerTask = Task.Run(ConsumeLoop);
-
- var afterDes = GC.GetAllocatedBytesForCurrentThread();
- SetupDeserializeAllocBytes = afterDes - beforeDes;
- }
-
- // BG consumer: parks on _consumeRequest, runs Deserialize(_input) when signaled, signals _consumeDone.
- // Mirror of AcBinaryNamedPipeBenchmark.ConsumeLoop — same pattern, same MRES protocol.
- private void ConsumeLoop()
- {
- var ct = _cts.Token;
- try
- {
- while (true)
- {
- _consumeRequest.Wait(ct);
- if (ct.IsCancellationRequested) return;
- _consumeRequest.Reset();
-
- try
- {
- var result = AcBinaryDeserializer.Deserialize(_input, _options);
- if (_captureResult) _lastResult = result;
- }
- catch
- {
- // Swallow — see ConsumeLoop in NamedPipe variant for rationale.
- }
- finally
- {
- _consumeDone.Set();
- }
- }
- }
- catch (OperationCanceledException)
- {
- // Cooperative cancel — Dispose path. Swallow.
- }
- }
-
- [MethodImpl(MethodImplOptions.NoInlining)]
- public void Serialize()
- {
- // Same 2-task streaming pipeline as NamedPipe variant — only the transport differs (in-memory Pipe
- // instead of kernel NamedPipe). Per-chunk SerializeChunkedFramed → PipeWriter slab → drain task
- // reads from PipeReader → input.Feed → consumer Deserialize consumes byte-by-byte.
- //
- // Uses the Pipe-overload (instead of the PipeWriter-overload) so the FlushPolicy parameter is
- // exposed for tuning. Toggle between FlushPolicy.PerChunk (bounded peak memory, per-chunk await
- // FlushAsync) and FlushPolicy.Coalesced (fire-and-forget per chunk, pipe-coalesced flushes up to
- // PauseWriterThreshold ~64 KB) to A/B-test the streaming-pipeline overhead. FlushPolicy.PerChunk
- // is functionally equivalent to the PipeWriter-overload (both internally route to
- // SerializeToPipeWriterCore with FlushPolicy.PerChunk).
- _consumeDone.Reset();
- _consumeRequest.Set();
-
- AcBinarySerializer.SerializeChunkedFramed(_order, _pipe, _options, FlushPolicy.Coalesced);
-
- _consumeDone.Wait();
- }
-
- [MethodImpl(MethodImplOptions.NoInlining)]
- public void Deserialize()
- {
- // No-op: per-iter round-trip is captured in Serialize(). See IsRoundTripOnly contract.
- }
-
- public bool VerifyRoundTrip()
- {
- _captureResult = true;
- try
- {
- Serialize();
- var result = _lastResult as TestOrder;
- return result != null && BenchmarkLoop.DeepEqualsViaJson(_order, result);
- }
- finally
- {
- _captureResult = false;
- _lastResult = null;
- }
- }
-
- public void Dispose()
- {
- if (_disposed) return;
- _disposed = true;
-
- // Cancel drain + consumer tasks → both exit. Pulse _consumeRequest in case consumer is parked.
- try { _cts.Cancel(); } catch { /* swallow on teardown */ }
- try { _consumeRequest.Set(); } catch { /* nudge in case consumer Wait is parked */ }
- try { _drainTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
- try { _consumerTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
-
- // Complete writer + reader (in-memory Pipe — no underlying stream to dispose).
- try { _pipeWriter.CompleteAsync().AsTask().Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
- try { _pipeReader.Complete(); } catch { /* swallow on teardown */ }
- try { _input.Dispose(); } catch { /* swallow on teardown */ }
- try { _consumeRequest.Dispose(); } catch { /* swallow on teardown */ }
- try { _consumeDone.Dispose(); } catch { /* swallow on teardown */ }
- try { _cts.Dispose(); } catch { /* swallow on teardown */ }
- }
- }
-
- ///
- /// Raw byte[] over a long-lived NamedPipe — NO chunk-framing, NO AsyncPipeReaderInput,
- /// NO sliding-window buffer. Calling thread serialises + writes; a long-lived background consumer task
- /// reads and deserialises. Two-task pattern enables Ser↔Read overlap (kernel-pipe-pipelined) AND
- /// avoids the kernel-buffer-full deadlock when bytes.Length > inBufferSize.
- ///
- /// Side-by-side with (chunked-framed AsyncPipe stack) this
- /// isolates two cost components on the SAME kernel-pipe transport with the SAME inBufferSize:
- ///
- /// - This row vs (Byte[]) — pure kernel-NamedPipe
- /// overhead (WriteFile / ReadFile syscalls + IRP queueing + buffer-copy + thread-handoff).
- /// - This row vs (chunked-framed) — pure
- /// AsyncPipe-framework overhead (chunk header writes + sliding-window Feed + MRES wait inside
- /// AsyncPipeReaderInput) AND the streaming-pipeline benefit of intra-message Ser↔Des overlap (which
- /// raw lacks — raw can only Ser↔Read overlap, with Des sequential after Read completes).
- ///
- /// Per-iter byte[] allocation from AcBinarySerializer.Serialize is part of the cost (matches
- /// 's API contract); the receive-side scratch buffer is also allocated per-iter
- /// on the consumer-task (counted via GC.GetTotalAllocatedBytes in BenchmarkLoop.MeasureAllocationTotal).
- ///
- internal sealed class AcBinaryNamedPipeRawByteArrayBenchmark : ISerializerBenchmark, IDisposable
- {
- private readonly TestOrder _order;
- private readonly AcBinarySerializerOptions _options;
- private readonly byte[] _serialized; // for SerializedSize reporting + receive-side size known upfront
-
- // Long-lived pipe lifecycle (set up once in ctor — NOT timed).
- private readonly NamedPipeServerStream _pipeServer;
- private readonly NamedPipeClientStream _pipeClient;
-
- // Long-lived consumer-task infrastructure (Read + Deserialize on BG thread, signaled per iter).
- // Mirrors AcBinaryNamedPipeBenchmark's drain+consumer pair, but raw byte[] doesn't have an
- // intermediate sliding-window buffer, so Read+Des happen sequentially in one BG task: Read N bytes
- // → Deserialize(bytes) → signal done. Calling thread's Ser↔Write overlaps with this BG Read+Des
- // through kernel-pipe pipelining.
- private readonly CancellationTokenSource _cts;
- private readonly Task _consumerTask;
- private readonly ManualResetEventSlim _consumeRequest = new(false);
- private readonly ManualResetEventSlim _consumeDone = new(false);
- private int _pendingReadSize;
- private object? _lastResult; // captured during VerifyRoundTrip; null in benchmark iters
- private bool _captureResult; // toggle: when true, ConsumerLoop stores result; otherwise discards
- private bool _disposed;
-
- public string Engine => Configuration.EngineAcBinary;
- public string IoMode => Configuration.IoNamedPipeRaw;
- public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime;
- public string OptionsPreset { get; }
- public int SerializedSize => _serialized.Length;
- public long SetupSerializeAllocBytes { get; }
- public long SetupDeserializeAllocBytes { get; }
- public bool IsRoundTripOnly => true;
- public string OptionsDescription => BenchmarkOptions.BuildAcBinary(_options, $", BufferSize={_options.BufferWriterChunkSize}B, Transport=NamedPipe(raw,2-task)");
-
- public AcBinaryNamedPipeRawByteArrayBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset)
- {
- _order = order;
- // BufferWriterChunkSize comes from the caller — same source-of-truth contract as
- // AcBinaryNamedPipeBenchmark. The kernel pipe-buffer (inBufferSize) is wired to it so the
- // raw-vs-chunked comparison runs on identical transport conditions.
- _options = options;
- OptionsPreset = optionsPreset;
-
- _serialized = AcBinarySerializer.Serialize(order, _options);
-
- var pipeName = $"AcBinaryBenchRaw-{Guid.NewGuid():N}";
-
- // === SERIALIZE-side setup measurement ===
- // pipe-pair (server + client) + connect handshake. NO PipeWriter wrapper — we use the raw
- // Stream.Write API directly, matching the no-framing semantics of this benchmark.
- GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
- var beforeSer = GC.GetAllocatedBytesForCurrentThread();
- _pipeServer = new NamedPipeServerStream(pipeName, PipeDirection.In, 1, PipeTransmissionMode.Byte,
- System.IO.Pipes.PipeOptions.Asynchronous,
- inBufferSize: _options.BufferWriterChunkSize,
- outBufferSize: _options.BufferWriterChunkSize);
- _pipeClient = new NamedPipeClientStream(".", pipeName, PipeDirection.Out, System.IO.Pipes.PipeOptions.Asynchronous);
-
- var serverWait = _pipeServer.WaitForConnectionAsync();
- _pipeClient.Connect();
- serverWait.GetAwaiter().GetResult();
- var afterSer = GC.GetAllocatedBytesForCurrentThread();
- SetupSerializeAllocBytes = afterSer - beforeSer;
-
- // === DESERIALIZE-side setup measurement ===
- // 1× background consumer-task + 2× MRES (request / done) + cancellation source. Matches the
- // chunked benchmark's deserialize-side setup cost shape.
- GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
- var beforeDes = GC.GetAllocatedBytesForCurrentThread();
- _cts = new CancellationTokenSource();
- _consumerTask = Task.Run(ConsumerLoop);
- var afterDes = GC.GetAllocatedBytesForCurrentThread();
- SetupDeserializeAllocBytes = afterDes - beforeDes;
- }
-
- // BG consumer: parks on _consumeRequest, reads N bytes from pipe, runs Deserialize(bytes), signals
- // _consumeDone. The Read overlaps with the calling thread's Write through the kernel-pipe; Des happens
- // sequentially after Read completes (raw byte[] needs the full message to deserialize).
- private void ConsumerLoop()
- {
- var ct = _cts.Token;
- try
- {
- while (true)
- {
- _consumeRequest.Wait(ct);
- if (ct.IsCancellationRequested) return;
- _consumeRequest.Reset();
-
- try
- {
- var size = _pendingReadSize;
- var bytes = new byte[size]; // per-iter alloc — counted by BenchmarkLoop.MeasureAllocationTotal
- var totalRead = 0;
- while (totalRead < size)
- {
- var n = _pipeServer.Read(bytes, totalRead, size - totalRead);
- if (n == 0) break; // pipe closed / EOF — partial read swallowed
- totalRead += n;
- }
- var result = AcBinaryDeserializer.Deserialize(bytes, _options);
- if (_captureResult) _lastResult = result;
- }
- catch
- {
- // Swallow — calling thread sees the failure via missing/incorrect _lastResult during VerifyRoundTrip,
- // or the benchmark loop just continues (timing impacted). Production teardown handled in Dispose.
- }
- finally
- {
- _consumeDone.Set();
- }
- }
- }
- catch (OperationCanceledException)
- {
- // Cooperative cancel — Dispose path. Swallow.
- }
- }
-
- [MethodImpl(MethodImplOptions.NoInlining)]
- public void Serialize()
- {
- // 2-task streaming pipeline:
- // 1. Calling thread serialises → fresh byte[] (per-iter alloc, matches AcBinaryBenchmark contract).
- // 2. Calling thread hands off expected size + signals consumer task. Consumer task starts Read loop
- // on the pipe (BG thread). Calling thread proceeds to Write the bytes — Read and Write overlap
- // through the kernel-pipe (kernel buffer fills, drains as consumer reads, sender resumes).
- // 3. Calling thread waits for _consumeDone (consumer task finished Read+Des).
- //
- // Note: unlike chunked, raw byte[] cannot do Ser↔Des overlap (Des needs the full bytes before
- // starting). Only Write↔Read overlaps here. The Des sequence on BG thread is: Read full bytes →
- // Des the full graph → signal done. This is the architectural difference between raw and chunked.
- var bytes = AcBinarySerializer.Serialize(_order, _options);
-
- _pendingReadSize = bytes.Length;
- _consumeDone.Reset();
- _consumeRequest.Set();
-
- _pipeClient.Write(bytes, 0, bytes.Length);
- _pipeClient.Flush();
-
- _consumeDone.Wait();
- }
-
- [MethodImpl(MethodImplOptions.NoInlining)]
- public void Deserialize()
- {
- // No-op: per-iter round-trip is captured in Serialize(). See IsRoundTripOnly contract.
- }
-
- public bool VerifyRoundTrip()
- {
- // Use the same 2-task streaming path as the benchmark, but capture the result for graph-equality.
- _captureResult = true;
- try
- {
- Serialize();
- var result = _lastResult as TestOrder;
- return result != null && BenchmarkLoop.DeepEqualsViaJson(_order, result);
- }
- finally
- {
- _captureResult = false;
- _lastResult = null;
- }
- }
-
- public void Dispose()
- {
- if (_disposed) return;
- _disposed = true;
-
- // Cancel the consumer task → ConsumerLoop exits its Wait via OperationCanceledException.
- try { _cts.Cancel(); } catch { /* swallow on teardown */ }
- try { _consumeRequest.Set(); } catch { /* nudge in case consumer Wait is parked */ }
- try { _consumerTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
-
- // Symmetric teardown — close client first (writer side), then server.
- try { _pipeClient.Dispose(); } catch { /* swallow on teardown */ }
- try { _pipeServer.Dispose(); } catch { /* swallow on teardown */ }
- try { _consumeRequest.Dispose(); } catch { /* swallow on teardown */ }
- try { _consumeDone.Dispose(); } catch { /* swallow on teardown */ }
- try { _cts.Dispose(); } catch { /* swallow on teardown */ }
- }
- }
-
- ///
- /// Raw byte[] over an in-memory cross-thread handoff — NO transport (no NamedPipe, no Pipe, no
- /// Channel). Calling thread serialises into a fresh byte[], hands it to a
- /// background consumer task via a single byte[] slot + MRES pair; the consumer deserialises and signals done.
- ///
- /// Why this benchmark matters: completes the 2x2 transport × wire-format matrix:
- ///
- /// - NamedPipe + Chunked =
- /// - NamedPipe + Raw =
- /// - In-memory Pipe + Chunked =
- /// - In-memory + Raw = THIS row — apples-to-apples baseline for the in-memory chunked row
- ///
- /// Side-by-side with this isolates the chunked-streaming
- /// framework's pure CPU cost, with the same in-memory transport (zero kernel involvement) on both sides.
- /// Side-by-side with this isolates the kernel-NamedPipe
- /// overhead on the raw-byte[] side.
- ///
- internal sealed class AcBinaryInMemoryRawByteArrayBenchmark : ISerializerBenchmark, IDisposable
- {
- private readonly TestOrder _order;
- private readonly AcBinarySerializerOptions _options;
- private readonly byte[] _serialized; // for SerializedSize reporting only
-
- // Long-lived consumer-task infrastructure (Deserialize on BG thread, signaled per iter).
- // No transport — just a byte[] slot for handoff between calling thread and consumer task.
- private readonly CancellationTokenSource _cts;
- private readonly Task _consumerTask;
- private readonly ManualResetEventSlim _consumeRequest = new(false);
- private readonly ManualResetEventSlim _consumeDone = new(false);
- private byte[]? _pendingBytes; // calling thread → consumer task handoff slot
- private object? _lastResult; // captured during VerifyRoundTrip; null in benchmark iters
- private bool _captureResult;
- private bool _disposed;
-
- public string Engine => Configuration.EngineAcBinary;
- public string IoMode => Configuration.IoInMemoryRaw;
- public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime;
- public string OptionsPreset { get; }
- public int SerializedSize => _serialized.Length;
- public long SetupSerializeAllocBytes { get; }
- public long SetupDeserializeAllocBytes { get; }
- public bool IsRoundTripOnly => true;
- public string OptionsDescription => BenchmarkOptions.BuildAcBinary(_options, $", BufferSize={_options.BufferWriterChunkSize}B, Transport=in-memory(raw,2-task)");
-
- public AcBinaryInMemoryRawByteArrayBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset)
- {
- _order = order;
- _options = options;
- OptionsPreset = optionsPreset;
-
- _serialized = AcBinarySerializer.Serialize(order, _options);
-
- // === SERIALIZE-side setup measurement ===
- // Nothing to set up — calling thread allocates byte[] per iter via AcBinarySerializer.Serialize.
- SetupSerializeAllocBytes = 0;
-
- // === DESERIALIZE-side setup measurement ===
- // 1× background consumer-task + 2× MRES (request / done) + cancellation source.
- GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
- var beforeDes = GC.GetAllocatedBytesForCurrentThread();
- _cts = new CancellationTokenSource();
- _consumerTask = Task.Run(ConsumerLoop);
- var afterDes = GC.GetAllocatedBytesForCurrentThread();
- SetupDeserializeAllocBytes = afterDes - beforeDes;
- }
-
- // BG consumer: parks on _consumeRequest, picks up the byte[] from _pendingBytes, runs Deserialize(bytes),
- // signals _consumeDone. Direct in-process handoff — no transport syscall, no buffer copy beyond the byte[]
- // reference itself (zero-copy by reference).
- private void ConsumerLoop()
- {
- var ct = _cts.Token;
- try
- {
- while (true)
- {
- _consumeRequest.Wait(ct);
- if (ct.IsCancellationRequested) return;
- _consumeRequest.Reset();
-
- try
- {
- var bytes = _pendingBytes;
- if (bytes != null)
- {
- var result = AcBinaryDeserializer.Deserialize(bytes, _options);
- if (_captureResult) _lastResult = result;
- }
- }
- catch
- {
- // Swallow — see ConsumerLoop in NamedPipe variant for rationale.
- }
- finally
- {
- _consumeDone.Set();
- }
- }
- }
- catch (OperationCanceledException)
- {
- // Cooperative cancel — Dispose path. Swallow.
- }
- }
-
- [MethodImpl(MethodImplOptions.NoInlining)]
- public void Serialize()
- {
- // 2-task in-memory pipeline:
- // 1. Calling thread serialises → fresh byte[] (per-iter alloc, matches AcBinaryBenchmark contract).
- // 2. Calling thread parks the byte[] into _pendingBytes and signals consumer task. Consumer task
- // picks up the reference (zero-copy) and runs Deserialize(bytes).
- // 3. Calling thread waits for _consumeDone (consumer task finished Des).
- //
- // Same architectural limitation as the NamedPipe-raw variant: Des cannot start until full bytes
- // are available. Only the per-iter Ser↔Des thread-handoff overlaps slightly (calling thread starts
- // signalling and waiting while consumer thread takes the byte[]).
- var bytes = AcBinarySerializer.Serialize(_order, _options);
-
- _pendingBytes = bytes;
- _consumeDone.Reset();
- _consumeRequest.Set();
-
- _consumeDone.Wait();
- }
-
- [MethodImpl(MethodImplOptions.NoInlining)]
- public void Deserialize()
- {
- // No-op: per-iter round-trip is captured in Serialize(). See IsRoundTripOnly contract.
- }
-
- public bool VerifyRoundTrip()
- {
- _captureResult = true;
- try
- {
- Serialize();
- var result = _lastResult as TestOrder;
- return result != null && BenchmarkLoop.DeepEqualsViaJson(_order, result);
- }
- finally
- {
- _captureResult = false;
- _lastResult = null;
- }
- }
-
- public void Dispose()
- {
- if (_disposed) return;
- _disposed = true;
-
- try { _cts.Cancel(); } catch { /* swallow on teardown */ }
- try { _consumeRequest.Set(); } catch { /* nudge in case consumer Wait is parked */ }
- try { _consumerTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
-
- try { _consumeRequest.Dispose(); } catch { /* swallow on teardown */ }
- try { _consumeDone.Dispose(); } catch { /* swallow on teardown */ }
- try { _cts.Dispose(); } catch { /* swallow on teardown */ }
- }
- }
-
- ///
- /// Benchmarks MemoryPack via the IBufferWriter overload, allocating a FRESH ArrayBufferWriter on EVERY call.
- /// Apples-to-apples counterpart to AcBinaryFreshBufferWriterBenchmark.
- ///
- internal sealed class MemoryPackFreshBufferWriterBenchmark : ISerializerBenchmark
- {
- private readonly TestOrder _order;
- private readonly MemoryPackSerializerOptions _options;
- private readonly byte[] _serialized;
-
- public string Engine => Configuration.EngineMemoryPack;
- public string IoMode => Configuration.IoBufWrNew;
- public string DispatchMode => Configuration.ModeSGen; // MemoryPack always uses [MemoryPackable] source-generated formatters
- public string OptionsPreset { get; }
- public int SerializedSize => _serialized.Length;
- public long SetupSerializeAllocBytes => 0;
- public long SetupDeserializeAllocBytes => 0;
- public string? OptionsDescription => $"StringEncoding={_options.StringEncoding}";
-
- public MemoryPackFreshBufferWriterBenchmark(TestOrder order, string optionsPreset)
- {
- _order = order;
- OptionsPreset = optionsPreset;
- _options = BenchmarkOptions.GetMemPack();
- _serialized = MemoryPackSerializer.Serialize(order, _options);
- }
-
- [MethodImpl(MethodImplOptions.NoInlining)]
- public void Serialize()
- {
- var abw = new ArrayBufferWriter();
- MemoryPackSerializer.Serialize(abw, _order, _options);
- }
-
- // BufWr semantic: read from a ReadOnlySequence overload (apples-to-apples with AcBinary's
- // BufWr Deser path). MemoryPack's ROS overload also single-segment-fast-paths internally.
- [MethodImpl(MethodImplOptions.NoInlining)]
- public void Deserialize() => MemoryPackSerializer.Deserialize(new ReadOnlySequence(_serialized), _options);
-
- public bool VerifyRoundTrip()
- {
- var abw = new ArrayBufferWriter();
- MemoryPackSerializer.Serialize(abw, _order, _options);
- var roundTripped = MemoryPackSerializer.Deserialize(new ReadOnlySequence(abw.WrittenMemory), _options);
- return BenchmarkLoop.DeepEqualsViaJson(_order, roundTripped);
- }
- }
-
- internal sealed class AcBinaryBufferWriterBenchmark : ISerializerBenchmark
- {
- private readonly TestOrder _order;
- private readonly AcBinarySerializerOptions _options;
- private readonly byte[] _serialized;
- private readonly ArrayBufferWriter _bufferWriter;
-
- public string Engine => Configuration.EngineAcBinary;
- public string IoMode => Configuration.IoBufWrReuse;
- public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime;
- public string OptionsPreset { get; }
- public int SerializedSize => _serialized.Length;
- public long SetupSerializeAllocBytes { get; }
- public long SetupDeserializeAllocBytes => 0;
- public string OptionsDescription => BenchmarkOptions.BuildAcBinary(_options);
-
- public AcBinaryBufferWriterBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset)
- {
- _order = order;
- _options = options;
- OptionsPreset = optionsPreset;
- _serialized = AcBinarySerializer.Serialize(order, options);
-
- // Measure ONLY the BufferWriter infrastructure setup on the serialize side (excluding the
- // helper Serialize above). Deserialize side reads directly from `_serialized` byte[] — no
- // dedicated setup allocation, hence SetupDeserializeAllocBytes = 0.
- GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
- var beforeSetup = GC.GetAllocatedBytesForCurrentThread();
- _bufferWriter = new ArrayBufferWriter(_serialized.Length * 2);
- var afterSetup = GC.GetAllocatedBytesForCurrentThread();
- SetupSerializeAllocBytes = afterSetup - beforeSetup;
- }
-
- [MethodImpl(MethodImplOptions.NoInlining)]
- public void Serialize()
- {
- _bufferWriter.ResetWrittenCount(); // reuse — no alloc, no zeroing
- AcBinarySerializer.Serialize(_order, _bufferWriter, _options);
- }
-
- // BufWr semantic: read from a ReadOnlySequence (the ROS overload), NOT from byte[] —
- // single-segment array-backed sequence triggers the fast-path in AcBinaryDeserializer.cs:298 which
- // redirects to the byte[] overload. This means the bench actually exercises the ROS-input path
- // (the production-realistic surface for SignalR / Pipe consumers) rather than secretly testing
- // byte[] Deser under the BufWr label.
- [MethodImpl(MethodImplOptions.NoInlining)]
- public void Deserialize() => AcBinaryDeserializer.Deserialize(new ReadOnlySequence(_serialized), _options);
-
- public bool VerifyRoundTrip()
- {
- _bufferWriter.ResetWrittenCount();
- AcBinarySerializer.Serialize(_order, _bufferWriter, _options);
-
- var roundTripped = AcBinaryDeserializer.Deserialize(new ReadOnlySequence(_bufferWriter.WrittenMemory), _options);
- return BenchmarkLoop.DeepEqualsViaJson(_order, roundTripped);
- }
- }
-
- ///
- /// Benchmarks MemoryPack via the IBufferWriter overload with a pre-allocated, reused ArrayBufferWriter.
- /// Apples-to-apples counterpart to AcBinaryBufferWriterBenchmark — MemoryPack's IBufferWriter is the path it's designed for.
- ///
- internal sealed class MemoryPackBufferWriterBenchmark : ISerializerBenchmark
- {
- private readonly TestOrder _order;
- private readonly MemoryPackSerializerOptions _options;
- private readonly byte[] _serialized;
- private readonly ArrayBufferWriter _bufferWriter;
-
- public string Engine => Configuration.EngineMemoryPack;
- public string IoMode => Configuration.IoBufWrReuse;
- public string DispatchMode => Configuration.ModeSGen; // MemoryPack always uses [MemoryPackable] source-generated formatters
- public string OptionsPreset { get; }
- public int SerializedSize => _serialized.Length;
- public long SetupSerializeAllocBytes { get; }
- public long SetupDeserializeAllocBytes => 0;
- public string? OptionsDescription => $"StringEncoding={_options.StringEncoding}";
-
- public MemoryPackBufferWriterBenchmark(TestOrder order, string optionsPreset)
- {
- _order = order;
- OptionsPreset = optionsPreset;
- _options = BenchmarkOptions.GetMemPack();
- _serialized = MemoryPackSerializer.Serialize(order, _options);
-
- // Serialize-side setup only — see AcBinaryBufferWriterBenchmark for the full rationale.
- GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
- var beforeSetup = GC.GetAllocatedBytesForCurrentThread();
- _bufferWriter = new ArrayBufferWriter(_serialized.Length * 2);
- var afterSetup = GC.GetAllocatedBytesForCurrentThread();
- SetupSerializeAllocBytes = afterSetup - beforeSetup;
- }
-
- [MethodImpl(MethodImplOptions.NoInlining)]
- public void Serialize()
- {
- _bufferWriter.ResetWrittenCount();
- MemoryPackSerializer.Serialize(_bufferWriter, _order, _options);
- }
-
- // BufWr semantic: read from a ReadOnlySequence overload (apples-to-apples with AcBinary's
- // BufWr Deser path). MemoryPack's ROS overload also single-segment-fast-paths internally.
- [MethodImpl(MethodImplOptions.NoInlining)]
- public void Deserialize() => MemoryPackSerializer.Deserialize(new ReadOnlySequence(_serialized), _options);
-
- public bool VerifyRoundTrip()
- {
- _bufferWriter.ResetWrittenCount();
- MemoryPackSerializer.Serialize(_bufferWriter, _order, _options);
- var roundTripped = MemoryPackSerializer.Deserialize(new ReadOnlySequence(_bufferWriter.WrittenMemory), _options);
- return BenchmarkLoop.DeepEqualsViaJson(_order, roundTripped);
- }
- }
-
-#endregion
+ // Serializer implementations (ISerializerBenchmark + 12 concrete benchmark classes) → Benchmarks/
// Results / output formatters → Output.cs
// BenchmarkResult DTO → BenchmarkResult.cs