From c722f775f63cce93d77041eabdcc592512363047 Mon Sep 17 00:00:00 2001 From: Loretta Date: Tue, 12 May 2026 13:52:28 +0200 Subject: [PATCH] Refactor: move serializer benchmarks to separate files Moved all ISerializerBenchmark implementations for AcBinary and MemoryPack from Program.cs into dedicated files under Benchmarks/. Improves code organization and maintainability; no logic changes, only file structure refactor. --- .../AcBinaryBufferWriterBenchmark.cs | 68 ++ .../AcBinaryFreshBufferWriterBenchmark.cs | 63 + .../AcBinaryInMemoryPipeBenchmark.cs | 190 ++++ .../AcBinaryInMemoryRawByteArrayBenchmark.cs | 168 +++ .../Benchmarks/AcBinaryNamedPipeBenchmark.cs | 237 ++++ .../AcBinaryNamedPipeRawByteArrayBenchmark.cs | 213 ++++ .../MemoryPackBufferWriterBenchmark.cs | 63 + .../MemoryPackFreshBufferWriterBenchmark.cs | 54 + AyCode.Core.Serializers.Console/Program.cs | 1009 +---------------- 9 files changed, 1057 insertions(+), 1008 deletions(-) create mode 100644 AyCode.Core.Serializers.Console/Benchmarks/AcBinaryBufferWriterBenchmark.cs create mode 100644 AyCode.Core.Serializers.Console/Benchmarks/AcBinaryFreshBufferWriterBenchmark.cs create mode 100644 AyCode.Core.Serializers.Console/Benchmarks/AcBinaryInMemoryPipeBenchmark.cs create mode 100644 AyCode.Core.Serializers.Console/Benchmarks/AcBinaryInMemoryRawByteArrayBenchmark.cs create mode 100644 AyCode.Core.Serializers.Console/Benchmarks/AcBinaryNamedPipeBenchmark.cs create mode 100644 AyCode.Core.Serializers.Console/Benchmarks/AcBinaryNamedPipeRawByteArrayBenchmark.cs create mode 100644 AyCode.Core.Serializers.Console/Benchmarks/MemoryPackBufferWriterBenchmark.cs create mode 100644 AyCode.Core.Serializers.Console/Benchmarks/MemoryPackFreshBufferWriterBenchmark.cs diff --git a/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryBufferWriterBenchmark.cs b/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryBufferWriterBenchmark.cs new file mode 100644 index 0000000..aacafe1 --- /dev/null +++ b/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryBufferWriterBenchmark.cs @@ -0,0 +1,68 @@ +using AyCode.Core.Serializers.Binaries; +using AyCode.Core.Tests.TestModels; +using System.Buffers; +using System.Runtime.CompilerServices; + +namespace AyCode.Core.Serializers.Console.Benchmarks; + +/// +/// Benchmarks AcBinary via the IBufferWriter overload with a pre-allocated, reused ArrayBufferWriter. +/// Realistic IBufferWriter usage pattern: caller owns + reuses the writer (zero alloc per call after warmup). +/// +internal sealed class AcBinaryBufferWriterBenchmark : ISerializerBenchmark +{ + private readonly TestOrder _order; + private readonly AcBinarySerializerOptions _options; + private readonly byte[] _serialized; + private readonly ArrayBufferWriter _bufferWriter; + + public string Engine => Configuration.EngineAcBinary; + public string IoMode => Configuration.IoBufWrReuse; + public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime; + public string OptionsPreset { get; } + public int SerializedSize => _serialized.Length; + public long SetupSerializeAllocBytes { get; } + public long SetupDeserializeAllocBytes => 0; + public string OptionsDescription => BenchmarkOptions.BuildAcBinary(_options); + + public AcBinaryBufferWriterBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset) + { + _order = order; + _options = options; + OptionsPreset = optionsPreset; + _serialized = AcBinarySerializer.Serialize(order, options); + + // Measure ONLY the BufferWriter infrastructure setup on the serialize side (excluding the + // helper Serialize above). Deserialize side reads directly from `_serialized` byte[] — no + // dedicated setup allocation, hence SetupDeserializeAllocBytes = 0. + GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect(); + var beforeSetup = GC.GetAllocatedBytesForCurrentThread(); + _bufferWriter = new ArrayBufferWriter(_serialized.Length * 2); + var afterSetup = GC.GetAllocatedBytesForCurrentThread(); + SetupSerializeAllocBytes = afterSetup - beforeSetup; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + public void Serialize() + { + _bufferWriter.ResetWrittenCount(); // reuse — no alloc, no zeroing + AcBinarySerializer.Serialize(_order, _bufferWriter, _options); + } + + // BufWr semantic: read from a ReadOnlySequence (the ROS overload), NOT from byte[] — + // single-segment array-backed sequence triggers the fast-path in AcBinaryDeserializer.cs:298 which + // redirects to the byte[] overload. This means the bench actually exercises the ROS-input path + // (the production-realistic surface for SignalR / Pipe consumers) rather than secretly testing + // byte[] Deser under the BufWr label. + [MethodImpl(MethodImplOptions.NoInlining)] + public void Deserialize() => AcBinaryDeserializer.Deserialize(new ReadOnlySequence(_serialized), _options); + + public bool VerifyRoundTrip() + { + _bufferWriter.ResetWrittenCount(); + AcBinarySerializer.Serialize(_order, _bufferWriter, _options); + + var roundTripped = AcBinaryDeserializer.Deserialize(new ReadOnlySequence(_bufferWriter.WrittenMemory), _options); + return BenchmarkLoop.DeepEqualsViaJson(_order, roundTripped); + } +} diff --git a/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryFreshBufferWriterBenchmark.cs b/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryFreshBufferWriterBenchmark.cs new file mode 100644 index 0000000..2e43f42 --- /dev/null +++ b/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryFreshBufferWriterBenchmark.cs @@ -0,0 +1,63 @@ +using AyCode.Core.Serializers.Binaries; +using AyCode.Core.Tests.TestModels; +using System.Buffers; +using System.Runtime.CompilerServices; + +namespace AyCode.Core.Serializers.Console.Benchmarks; + +/// +/// Benchmarks AcBinary via the IBufferWriter overload, allocating a FRESH ArrayBufferWriter on EVERY call. +/// One-shot scenario — represents code that doesn't reuse a writer across calls. +/// Uses BufferWriterChunkSize=4096 (production-realistic, SignalR-aligned) instead of the 65535 default — +/// otherwise AcBinary would request 64KB upfront via GetSpan(), forcing the fresh ABW to allocate 64KB +/// regardless of payload size (heavy over-allocation for small payloads). +/// +internal sealed class AcBinaryFreshBufferWriterBenchmark : ISerializerBenchmark +{ + private readonly TestOrder _order; + private readonly AcBinarySerializerOptions _options; + private readonly byte[] _serialized; + + public string Engine => Configuration.EngineAcBinary; + public string IoMode => Configuration.IoBufWrNew; + public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime; + public string OptionsPreset { get; } + public int SerializedSize => _serialized.Length; + public long SetupSerializeAllocBytes => 0; + public long SetupDeserializeAllocBytes => 0; + public string OptionsDescription => BenchmarkOptions.BuildAcBinary(_options, $", BufferSize={_options.BufferWriterChunkSize}B"); + + public AcBinaryFreshBufferWriterBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset) + { + _order = order; + // BufferWriterChunkSize comes from the caller (central source of truth in CreateSerializers + // — the binaryFastMode4KbChunk options instance). Do NOT mutate _options here; tune the chunk + // size in CreateSerializers only. + _options = options; + OptionsPreset = optionsPreset; + _serialized = AcBinarySerializer.Serialize(order, _options); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + public void Serialize() + { + var abw = new ArrayBufferWriter(); // FRESH every call — alloc + grow as needed + AcBinarySerializer.Serialize(_order, abw, _options); + } + + // BufWr semantic: read from a ReadOnlySequence (the ROS overload), NOT from byte[] — + // single-segment array-backed sequence triggers the fast-path in AcBinaryDeserializer.cs:298 which + // redirects to the byte[] overload. This means the bench actually exercises the ROS-input path + // (the production-realistic surface for SignalR / Pipe consumers) rather than secretly testing + // byte[] Deser under the BufWr label. + [MethodImpl(MethodImplOptions.NoInlining)] + public void Deserialize() => AcBinaryDeserializer.Deserialize(new ReadOnlySequence(_serialized), _options); + + public bool VerifyRoundTrip() + { + var abw = new ArrayBufferWriter(); + AcBinarySerializer.Serialize(_order, abw, _options); + var roundTripped = AcBinaryDeserializer.Deserialize(new ReadOnlySequence(abw.WrittenMemory), _options); + return BenchmarkLoop.DeepEqualsViaJson(_order, roundTripped); + } +} diff --git a/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryInMemoryPipeBenchmark.cs b/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryInMemoryPipeBenchmark.cs new file mode 100644 index 0000000..1b09377 --- /dev/null +++ b/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryInMemoryPipeBenchmark.cs @@ -0,0 +1,190 @@ +using AyCode.Core.Serializers.Binaries; +using AyCode.Core.Tests.Serialization; // DrainFromAsync extension (test-only, used by benchmark) +using AyCode.Core.Tests.TestModels; +using System.IO.Pipelines; +using System.Runtime.CompilerServices; + +namespace AyCode.Core.Serializers.Console.Benchmarks; + +/// +/// Same chunked-framed AsyncPipe code path as , but the transport +/// is an in-memory instead of a kernel NamedPipe. The Pipe's +/// Writer/Reader pair is a managed-only zero-copy slab handoff — no syscalls, no kernel +/// buffer copy, no IRP queueing. +/// +/// Why this benchmark matters: by holding ALL other variables constant (same SerializeChunkedFramed, +/// same AsyncPipeReaderInput, same drain task, same consumer task, same multi-message wire format), this +/// row isolates the kernel-NamedPipe transport overhead from the chunked-streaming framework's pure +/// CPU cost. The expected delta vs : per-chunk overhead drops from +/// ~25-30 µs (kernel-syscall pair + IRP) to ~1-2 µs (managed slab handoff). Multi-chunk Large-message rows +/// should converge dramatically toward . +/// +/// Real-world relevance: in-memory Pipe is the typical primitive used for cross-thread serializer +/// pipelines inside a single process (e.g. SignalR's Kestrel transport adapter, gRPC framework internals, +/// custom message brokers). The numbers from this row reflect that scenario, NOT the kernel-pipe loopback +/// of the NamedPipe benchmark. +/// +internal sealed class AcBinaryInMemoryPipeBenchmark : ISerializerBenchmark, IDisposable +{ + private readonly TestOrder _order; + private readonly AcBinarySerializerOptions _options; + private readonly byte[] _serialized; // for SerializedSize reporting only + + // Long-lived in-memory pipe lifecycle (set up once in ctor — NOT timed). + private readonly Pipe _pipe; + private readonly PipeWriter _pipeWriter; + private readonly PipeReader _pipeReader; + + // Long-lived multi-message receive infrastructure (set up once in ctor) — same pattern as the NamedPipe + // variant: drain pumps reader into AsyncPipeReaderInput, consumer task drives Deserialize(input). + private readonly AsyncPipeReaderInput _input; + private readonly CancellationTokenSource _cts; + private readonly Task _drainTask; + private readonly Task _consumerTask; + private readonly ManualResetEventSlim _consumeRequest = new(false); + private readonly ManualResetEventSlim _consumeDone = new(false); + private object? _lastResult; + private bool _captureResult; + private bool _disposed; + + public string Engine => Configuration.EngineAcBinary; + public string IoMode => Configuration.IoInMemoryPipe; + public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime; + public string OptionsPreset { get; } + public int SerializedSize => _serialized.Length; + public long SetupSerializeAllocBytes { get; } + public long SetupDeserializeAllocBytes { get; } + public bool IsRoundTripOnly => true; + public string OptionsDescription => BenchmarkOptions.BuildAcBinary(_options, $", BufferSize={_options.BufferWriterChunkSize}B, Transport=Pipe(in-memory,multiMessage,2-task)"); + + public AcBinaryInMemoryPipeBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset) + { + _order = order; + _options = options; + OptionsPreset = optionsPreset; + + _serialized = AcBinarySerializer.Serialize(order, _options); + + // === SERIALIZE-side setup measurement === + // In-memory Pipe construction. NO kernel-pipe pair, NO Connect handshake — just a managed Pipe object + // and a reference to its Writer side. PipeWriterImpl (parallel-flush capable, NOT StreamPipeWriter). + GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect(); + var beforeSer = GC.GetAllocatedBytesForCurrentThread(); + _pipe = new Pipe(); + _pipeWriter = _pipe.Writer; + var afterSer = GC.GetAllocatedBytesForCurrentThread(); + SetupSerializeAllocBytes = afterSer - beforeSer; + + // === DESERIALIZE-side setup measurement === + // PipeReader reference + AsyncPipeReaderInput (ArrayPool rent + ManualResetEventSlim) + drain task + + // consumer task scaffolding. Identical to the NamedPipe variant on the receive side. + GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect(); + var beforeDes = GC.GetAllocatedBytesForCurrentThread(); + + _pipeReader = _pipe.Reader; + _input = new AsyncPipeReaderInput(_options.BufferWriterChunkSize * 2, multiMessage: true); + _cts = new CancellationTokenSource(); + _drainTask = Task.Run(() => _input.DrainFromAsync(_pipeReader, _cts.Token)); + _consumerTask = Task.Run(ConsumeLoop); + + var afterDes = GC.GetAllocatedBytesForCurrentThread(); + SetupDeserializeAllocBytes = afterDes - beforeDes; + } + + // BG consumer: parks on _consumeRequest, runs Deserialize(_input) when signaled, signals _consumeDone. + // Mirror of AcBinaryNamedPipeBenchmark.ConsumeLoop — same pattern, same MRES protocol. + private void ConsumeLoop() + { + var ct = _cts.Token; + try + { + while (true) + { + _consumeRequest.Wait(ct); + if (ct.IsCancellationRequested) return; + _consumeRequest.Reset(); + + try + { + var result = AcBinaryDeserializer.Deserialize(_input, _options); + if (_captureResult) _lastResult = result; + } + catch + { + // Swallow — see ConsumeLoop in NamedPipe variant for rationale. + } + finally + { + _consumeDone.Set(); + } + } + } + catch (OperationCanceledException) + { + // Cooperative cancel — Dispose path. Swallow. + } + } + + [MethodImpl(MethodImplOptions.NoInlining)] + public void Serialize() + { + // Same 2-task streaming pipeline as NamedPipe variant — only the transport differs (in-memory Pipe + // instead of kernel NamedPipe). Per-chunk SerializeChunkedFramed → PipeWriter slab → drain task + // reads from PipeReader → input.Feed → consumer Deserialize consumes byte-by-byte. + // + // Uses the Pipe-overload (instead of the PipeWriter-overload) so the FlushPolicy parameter is + // exposed for tuning. Toggle between FlushPolicy.PerChunk (bounded peak memory, per-chunk await + // FlushAsync) and FlushPolicy.Coalesced (fire-and-forget per chunk, pipe-coalesced flushes up to + // PauseWriterThreshold ~64 KB) to A/B-test the streaming-pipeline overhead. FlushPolicy.PerChunk + // is functionally equivalent to the PipeWriter-overload (both internally route to + // SerializeToPipeWriterCore with FlushPolicy.PerChunk). + _consumeDone.Reset(); + _consumeRequest.Set(); + + AcBinarySerializer.SerializeChunkedFramed(_order, _pipe, _options, FlushPolicy.Coalesced); + + _consumeDone.Wait(); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + public void Deserialize() + { + // No-op: per-iter round-trip is captured in Serialize(). See IsRoundTripOnly contract. + } + + public bool VerifyRoundTrip() + { + _captureResult = true; + try + { + Serialize(); + var result = _lastResult as TestOrder; + return result != null && BenchmarkLoop.DeepEqualsViaJson(_order, result); + } + finally + { + _captureResult = false; + _lastResult = null; + } + } + + public void Dispose() + { + if (_disposed) return; + _disposed = true; + + // Cancel drain + consumer tasks → both exit. Pulse _consumeRequest in case consumer is parked. + try { _cts.Cancel(); } catch { /* swallow on teardown */ } + try { _consumeRequest.Set(); } catch { /* nudge in case consumer Wait is parked */ } + try { _drainTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ } + try { _consumerTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ } + + // Complete writer + reader (in-memory Pipe — no underlying stream to dispose). + try { _pipeWriter.CompleteAsync().AsTask().Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ } + try { _pipeReader.Complete(); } catch { /* swallow on teardown */ } + try { _input.Dispose(); } catch { /* swallow on teardown */ } + try { _consumeRequest.Dispose(); } catch { /* swallow on teardown */ } + try { _consumeDone.Dispose(); } catch { /* swallow on teardown */ } + try { _cts.Dispose(); } catch { /* swallow on teardown */ } + } +} diff --git a/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryInMemoryRawByteArrayBenchmark.cs b/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryInMemoryRawByteArrayBenchmark.cs new file mode 100644 index 0000000..ccde326 --- /dev/null +++ b/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryInMemoryRawByteArrayBenchmark.cs @@ -0,0 +1,168 @@ +using AyCode.Core.Serializers.Binaries; +using AyCode.Core.Tests.TestModels; +using System.Runtime.CompilerServices; + +namespace AyCode.Core.Serializers.Console.Benchmarks; + +/// +/// Raw byte[] over an in-memory cross-thread handoff — NO transport (no NamedPipe, no Pipe, no +/// Channel). Calling thread serialises into a fresh byte[], hands it to a +/// background consumer task via a single byte[] slot + MRES pair; the consumer deserialises and signals done. +/// +/// Why this benchmark matters: completes the 2x2 transport × wire-format matrix: +/// +/// NamedPipe + Chunked = +/// NamedPipe + Raw = +/// In-memory Pipe + Chunked = +/// In-memory + Raw = THIS row — apples-to-apples baseline for the in-memory chunked row +/// +/// Side-by-side with this isolates the chunked-streaming +/// framework's pure CPU cost, with the same in-memory transport (zero kernel involvement) on both sides. +/// Side-by-side with this isolates the kernel-NamedPipe +/// overhead on the raw-byte[] side. +/// +internal sealed class AcBinaryInMemoryRawByteArrayBenchmark : ISerializerBenchmark, IDisposable +{ + private readonly TestOrder _order; + private readonly AcBinarySerializerOptions _options; + private readonly byte[] _serialized; // for SerializedSize reporting only + + // Long-lived consumer-task infrastructure (Deserialize on BG thread, signaled per iter). + // No transport — just a byte[] slot for handoff between calling thread and consumer task. + private readonly CancellationTokenSource _cts; + private readonly Task _consumerTask; + private readonly ManualResetEventSlim _consumeRequest = new(false); + private readonly ManualResetEventSlim _consumeDone = new(false); + private byte[]? _pendingBytes; // calling thread → consumer task handoff slot + private object? _lastResult; // captured during VerifyRoundTrip; null in benchmark iters + private bool _captureResult; + private bool _disposed; + + public string Engine => Configuration.EngineAcBinary; + public string IoMode => Configuration.IoInMemoryRaw; + public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime; + public string OptionsPreset { get; } + public int SerializedSize => _serialized.Length; + public long SetupSerializeAllocBytes { get; } + public long SetupDeserializeAllocBytes { get; } + public bool IsRoundTripOnly => true; + public string OptionsDescription => BenchmarkOptions.BuildAcBinary(_options, $", BufferSize={_options.BufferWriterChunkSize}B, Transport=in-memory(raw,2-task)"); + + public AcBinaryInMemoryRawByteArrayBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset) + { + _order = order; + _options = options; + OptionsPreset = optionsPreset; + + _serialized = AcBinarySerializer.Serialize(order, _options); + + // === SERIALIZE-side setup measurement === + // Nothing to set up — calling thread allocates byte[] per iter via AcBinarySerializer.Serialize. + SetupSerializeAllocBytes = 0; + + // === DESERIALIZE-side setup measurement === + // 1× background consumer-task + 2× MRES (request / done) + cancellation source. + GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect(); + var beforeDes = GC.GetAllocatedBytesForCurrentThread(); + _cts = new CancellationTokenSource(); + _consumerTask = Task.Run(ConsumerLoop); + var afterDes = GC.GetAllocatedBytesForCurrentThread(); + SetupDeserializeAllocBytes = afterDes - beforeDes; + } + + // BG consumer: parks on _consumeRequest, picks up the byte[] from _pendingBytes, runs Deserialize(bytes), + // signals _consumeDone. Direct in-process handoff — no transport syscall, no buffer copy beyond the byte[] + // reference itself (zero-copy by reference). + private void ConsumerLoop() + { + var ct = _cts.Token; + try + { + while (true) + { + _consumeRequest.Wait(ct); + if (ct.IsCancellationRequested) return; + _consumeRequest.Reset(); + + try + { + var bytes = _pendingBytes; + if (bytes != null) + { + var result = AcBinaryDeserializer.Deserialize(bytes, _options); + if (_captureResult) _lastResult = result; + } + } + catch + { + // Swallow — see ConsumerLoop in NamedPipe variant for rationale. + } + finally + { + _consumeDone.Set(); + } + } + } + catch (OperationCanceledException) + { + // Cooperative cancel — Dispose path. Swallow. + } + } + + [MethodImpl(MethodImplOptions.NoInlining)] + public void Serialize() + { + // 2-task in-memory pipeline: + // 1. Calling thread serialises → fresh byte[] (per-iter alloc, matches AcBinaryBenchmark contract). + // 2. Calling thread parks the byte[] into _pendingBytes and signals consumer task. Consumer task + // picks up the reference (zero-copy) and runs Deserialize(bytes). + // 3. Calling thread waits for _consumeDone (consumer task finished Des). + // + // Same architectural limitation as the NamedPipe-raw variant: Des cannot start until full bytes + // are available. Only the per-iter Ser↔Des thread-handoff overlaps slightly (calling thread starts + // signalling and waiting while consumer thread takes the byte[]). + var bytes = AcBinarySerializer.Serialize(_order, _options); + + _pendingBytes = bytes; + _consumeDone.Reset(); + _consumeRequest.Set(); + + _consumeDone.Wait(); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + public void Deserialize() + { + // No-op: per-iter round-trip is captured in Serialize(). See IsRoundTripOnly contract. + } + + public bool VerifyRoundTrip() + { + _captureResult = true; + try + { + Serialize(); + var result = _lastResult as TestOrder; + return result != null && BenchmarkLoop.DeepEqualsViaJson(_order, result); + } + finally + { + _captureResult = false; + _lastResult = null; + } + } + + public void Dispose() + { + if (_disposed) return; + _disposed = true; + + try { _cts.Cancel(); } catch { /* swallow on teardown */ } + try { _consumeRequest.Set(); } catch { /* nudge in case consumer Wait is parked */ } + try { _consumerTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ } + + try { _consumeRequest.Dispose(); } catch { /* swallow on teardown */ } + try { _consumeDone.Dispose(); } catch { /* swallow on teardown */ } + try { _cts.Dispose(); } catch { /* swallow on teardown */ } + } +} diff --git a/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryNamedPipeBenchmark.cs b/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryNamedPipeBenchmark.cs new file mode 100644 index 0000000..9065cfc --- /dev/null +++ b/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryNamedPipeBenchmark.cs @@ -0,0 +1,237 @@ +using AyCode.Core.Serializers.Binaries; +using AyCode.Core.Tests.Serialization; // DrainFromAsync extension (test-only, used by benchmark) +using AyCode.Core.Tests.TestModels; +using System.IO.Pipelines; +using System.IO.Pipes; +using System.Runtime.CompilerServices; + +namespace AyCode.Core.Serializers.Console.Benchmarks; + +/// +/// Benchmarks AcBinary over a long-lived NamedPipe IPC connection using the AcBinary native streaming API +/// ( +/// + + ). +/// Mirrors what a real consumer (e.g. DeserializeFromPipeReaderAsync) does per message: +/// long-lived with multi-message wire framing on top of a long-lived NamedPipe. +/// +/// Architecture: +/// +/// Constructor (NOT timed): sets up + , +/// waits for connection, creates one long-lived / +/// pair, ONE long-lived +/// in multiMessage = true mode, ONE drain Task that pumps +/// forever, and ONE deserialize Task that loops AcBinaryDeserializer.Deserialize<T>(input, opts) +/// producing into a . +/// Per-iteration (timed): sender writes via +/// +/// — multi-message wire ([201][UINT16][data]...[202]); the [202] end marker arms the input's +/// _readPos = -1 sentinel, so the next message's first AppendToBuffer recycles the buffer to 0. +/// Then receiver awaits the channel for the deserialized result. +/// is a no-op (full round-trip captured in ); +/// =true → Ser ms / SerAlloc oszlopok N/A, RT ms = full round-trip. +/// +/// +/// Per-iter overhead: 0 new Task.Run, 0 new AsyncPipeReaderInput, 0 new CancellationTokenSource. +/// Pure cost = SerializeChunkedFramed (CPU + chunk-onkénti flush) + kernel write/read syscalls + 1 sync barrier +/// (channel) + deserialized graph alloc. The "multi-message reuse" pattern enabled by Q4T8 fix (R5K2 minimum: _readPos = -1 +/// sentinel + AppendToBuffer sliding-window cycling). +/// +/// Approximation note: single-process loopback NamedPipe. Real cross-process / cross-machine SignalR +/// adds further transport latency (TCP, WebSocket framing) on top. The benchmark gives a lower bound. +/// +internal sealed class AcBinaryNamedPipeBenchmark : ISerializerBenchmark, IDisposable +{ + private readonly TestOrder _order; + private readonly AcBinarySerializerOptions _options; + private readonly byte[] _serialized; // for SerializedSize reporting only + + // Long-lived pipe lifecycle (set up once in ctor — NOT timed). + private readonly NamedPipeServerStream _pipeServer; + private readonly NamedPipeClientStream _pipeClient; + private readonly PipeWriter _pipeWriter; + private readonly PipeReader _pipeReader; + + // Long-lived multi-message receive infrastructure (set up once in ctor). + private readonly AsyncPipeReaderInput _input; + private readonly CancellationTokenSource _cts; + private readonly Task _drainTask; // BG: PipeReader → input.Feed (continuous pump) + private readonly Task _consumerTask; // BG: per-iter Deserialize(input) loop, signaled by calling thread + private readonly ManualResetEventSlim _consumeRequest = new(false); + private readonly ManualResetEventSlim _consumeDone = new(false); + private object? _lastResult; // captured during VerifyRoundTrip; null in benchmark iters + private bool _captureResult; // toggle: when true, ConsumeLoop stores result; otherwise discards + private bool _disposed; + + public string Engine => Configuration.EngineAcBinary; + public string IoMode => Configuration.IoNamedPipe; + public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime; + public string OptionsPreset { get; } + public int SerializedSize => _serialized.Length; + public long SetupSerializeAllocBytes { get; } + public long SetupDeserializeAllocBytes { get; } + public bool IsRoundTripOnly => true; + public string OptionsDescription => BenchmarkOptions.BuildAcBinary(_options, $", BufferSize={_options.BufferWriterChunkSize}B, Transport=NamedPipe(long-lived,multiMessage,2-task)"); + + public AcBinaryNamedPipeBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset) + { + _order = order; + // BufferWriterChunkSize comes from the caller (central source of truth in CreateSerializers + // — the binaryFastMode4KbChunk options instance). Do NOT mutate _options here; tune the chunk + // size in CreateSerializers only. + _options = options; + OptionsPreset = optionsPreset; + + _serialized = AcBinarySerializer.Serialize(order, _options); + + // 1× pipe setup. Kernel-side pipe buffer (inBufferSize / outBufferSize on the server ctor — the + // client inherits the server-defined buffer size at connect time) matches BufferWriterChunkSize + // exactly: AsyncPipeWriterOutput now treats chunkSize as the chunk-on-wire total size (header + + // data), so one WriteFile(chunkSize) syscall lands in exactly one kernel-page slot — page-aligned, + // no fragmentation, no IRP reordering. _options.BufferWriterChunkSize is the single tunable source. + var pipeName = $"AcBinaryBench-{Guid.NewGuid():N}"; + + // === SERIALIZE-side setup measurement === + // pipe-pair (server + client) + connect handshake + writer-side PipeWriter wrapper. + GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect(); + var beforeSer = GC.GetAllocatedBytesForCurrentThread(); + + _pipeServer = new NamedPipeServerStream(pipeName, PipeDirection.In, 1, PipeTransmissionMode.Byte, + System.IO.Pipes.PipeOptions.Asynchronous, + inBufferSize: _options.BufferWriterChunkSize, + outBufferSize: _options.BufferWriterChunkSize); + + _pipeClient = new NamedPipeClientStream(".", pipeName, PipeDirection.Out, System.IO.Pipes.PipeOptions.Asynchronous); + + var serverWait = _pipeServer.WaitForConnectionAsync(); + _pipeClient.Connect(); + serverWait.GetAwaiter().GetResult(); + + _pipeWriter = PipeWriter.Create(_pipeClient); + var afterSer = GC.GetAllocatedBytesForCurrentThread(); + SetupSerializeAllocBytes = afterSer - beforeSer; + + // === DESERIALIZE-side setup measurement === + // PipeReader wrapper + AsyncPipeReaderInput (ArrayPool rent + ManualResetEventSlim) + drain + // task + consumer task scaffolding. Two long-lived BG tasks total: drain pumps bytes from the + // kernel pipe into input; consumer drives Deserialize(input) per iter on signal. + GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect(); + var beforeDes = GC.GetAllocatedBytesForCurrentThread(); + + _pipeReader = PipeReader.Create(_pipeServer); + _input = new AsyncPipeReaderInput(_options.BufferWriterChunkSize * 2, multiMessage: true); + _cts = new CancellationTokenSource(); + + // Drain task: pumps PipeReader → input.Feed forever (or until cancel). Single Task.Run for + // the full benchmark lifetime — its overhead is amortised across all messages. + _drainTask = Task.Run(() => _input.DrainFromAsync(_pipeReader, _cts.Token)); + + // Consumer task: per-iter Deserialize(input) loop. Started here once; signaled per-iter via + // _consumeRequest. Enables Ser↔Des streaming overlap — calling thread runs SerializeChunkedFramed + // while THIS task simultaneously runs Deserialize, both consuming/producing through the + // sliding-window buffer pipelined by the drain task. + _consumerTask = Task.Run(ConsumeLoop); + + var afterDes = GC.GetAllocatedBytesForCurrentThread(); + SetupDeserializeAllocBytes = afterDes - beforeDes; + } + + // BG consumer: parks on _consumeRequest, runs Deserialize(_input) when signaled, signals _consumeDone. + // The Deserialize call internally blocks on the input's MRES whenever the drain hasn't yet fed enough + // bytes for the next read — that's where the streaming-pipeline overlap with the calling thread (Ser) + // happens. + private void ConsumeLoop() + { + var ct = _cts.Token; + try + { + while (true) + { + _consumeRequest.Wait(ct); + if (ct.IsCancellationRequested) return; + _consumeRequest.Reset(); + + try + { + var result = AcBinaryDeserializer.Deserialize(_input, _options); + if (_captureResult) _lastResult = result; + } + catch + { + // Swallow — calling thread sees the failure via missing/incorrect _lastResult during VerifyRoundTrip, + // or the benchmark loop just continues (timing impacted). Production teardown handled in Dispose. + } + finally + { + _consumeDone.Set(); + } + } + } + catch (OperationCanceledException) + { + // Cooperative cancel — Dispose path. Swallow. + } + } + + [MethodImpl(MethodImplOptions.NoInlining)] + public void Serialize() + { + // 2-task streaming pipeline: + // 1. Calling thread signals consumer task to begin Deserialize(input). Consumer immediately + // starts; first read blocks on input's MRES because no bytes flowed yet. + // 2. Calling thread starts SerializeChunkedFramed → chunks flow through PipeWriter → kernel pipe → + // drain task (BG) feeds input.Feed → MRES pulses → consumer's Deserialize consumes bytes + // chunk by chunk. Ser↔Des truly overlap here. + // 3. Calling thread waits for _consumeDone (signaling Deserialize returned). + _consumeDone.Reset(); + _consumeRequest.Set(); + + AcBinarySerializer.SerializeChunkedFramed(_order, _pipeWriter, _options); + + _consumeDone.Wait(); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + public void Deserialize() + { + // No-op: per-iter round-trip is captured in Serialize(). See IsRoundTripOnly contract. + } + + public bool VerifyRoundTrip() + { + // Use the same 2-task streaming path as the benchmark, but capture the result for graph-equality. + _captureResult = true; + try + { + Serialize(); + var result = _lastResult as TestOrder; + return result != null && BenchmarkLoop.DeepEqualsViaJson(_order, result); + } + finally + { + _captureResult = false; + _lastResult = null; + } + } + + public void Dispose() + { + if (_disposed) return; + _disposed = true; + + // Cancel drain + consumer tasks → both exit. Pulse _consumeRequest in case consumer is parked. + try { _cts.Cancel(); } catch { /* swallow on teardown */ } + try { _consumeRequest.Set(); } catch { /* nudge in case consumer Wait is parked */ } + try { _drainTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ } + try { _consumerTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ } + + // Complete writer + dispose pipe lifecycle. + try { _pipeWriter.CompleteAsync().AsTask().Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ } + try { _pipeReader.Complete(); } catch { /* swallow on teardown */ } + try { _pipeClient.Dispose(); } catch { /* swallow on teardown */ } + try { _pipeServer.Dispose(); } catch { /* swallow on teardown */ } + try { _input.Dispose(); } catch { /* swallow on teardown */ } + try { _consumeRequest.Dispose(); } catch { /* swallow on teardown */ } + try { _consumeDone.Dispose(); } catch { /* swallow on teardown */ } + try { _cts.Dispose(); } catch { /* swallow on teardown */ } + } +} diff --git a/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryNamedPipeRawByteArrayBenchmark.cs b/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryNamedPipeRawByteArrayBenchmark.cs new file mode 100644 index 0000000..d6b49ec --- /dev/null +++ b/AyCode.Core.Serializers.Console/Benchmarks/AcBinaryNamedPipeRawByteArrayBenchmark.cs @@ -0,0 +1,213 @@ +using AyCode.Core.Serializers.Binaries; +using AyCode.Core.Tests.TestModels; +using System.IO.Pipes; +using System.Runtime.CompilerServices; + +namespace AyCode.Core.Serializers.Console.Benchmarks; + +/// +/// Raw byte[] over a long-lived NamedPipe — NO chunk-framing, NO AsyncPipeReaderInput, +/// NO sliding-window buffer. Calling thread serialises + writes; a long-lived background consumer task +/// reads and deserialises. Two-task pattern enables Ser↔Read overlap (kernel-pipe-pipelined) AND +/// avoids the kernel-buffer-full deadlock when bytes.Length > inBufferSize. +/// +/// Side-by-side with (chunked-framed AsyncPipe stack) this +/// isolates two cost components on the SAME kernel-pipe transport with the SAME inBufferSize: +/// +/// This row vs (Byte[]) — pure kernel-NamedPipe +/// overhead (WriteFile / ReadFile syscalls + IRP queueing + buffer-copy + thread-handoff). +/// This row vs (chunked-framed) — pure +/// AsyncPipe-framework overhead (chunk header writes + sliding-window Feed + MRES wait inside +/// AsyncPipeReaderInput) AND the streaming-pipeline benefit of intra-message Ser↔Des overlap (which +/// raw lacks — raw can only Ser↔Read overlap, with Des sequential after Read completes). +/// +/// Per-iter byte[] allocation from AcBinarySerializer.Serialize is part of the cost (matches +/// 's API contract); the receive-side scratch buffer is also allocated per-iter +/// on the consumer-task (counted via GC.GetTotalAllocatedBytes in BenchmarkLoop.MeasureAllocationTotal). +/// +internal sealed class AcBinaryNamedPipeRawByteArrayBenchmark : ISerializerBenchmark, IDisposable +{ + private readonly TestOrder _order; + private readonly AcBinarySerializerOptions _options; + private readonly byte[] _serialized; // for SerializedSize reporting + receive-side size known upfront + + // Long-lived pipe lifecycle (set up once in ctor — NOT timed). + private readonly NamedPipeServerStream _pipeServer; + private readonly NamedPipeClientStream _pipeClient; + + // Long-lived consumer-task infrastructure (Read + Deserialize on BG thread, signaled per iter). + // Mirrors AcBinaryNamedPipeBenchmark's drain+consumer pair, but raw byte[] doesn't have an + // intermediate sliding-window buffer, so Read+Des happen sequentially in one BG task: Read N bytes + // → Deserialize(bytes) → signal done. Calling thread's Ser↔Write overlaps with this BG Read+Des + // through kernel-pipe pipelining. + private readonly CancellationTokenSource _cts; + private readonly Task _consumerTask; + private readonly ManualResetEventSlim _consumeRequest = new(false); + private readonly ManualResetEventSlim _consumeDone = new(false); + private int _pendingReadSize; + private object? _lastResult; // captured during VerifyRoundTrip; null in benchmark iters + private bool _captureResult; // toggle: when true, ConsumerLoop stores result; otherwise discards + private bool _disposed; + + public string Engine => Configuration.EngineAcBinary; + public string IoMode => Configuration.IoNamedPipeRaw; + public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime; + public string OptionsPreset { get; } + public int SerializedSize => _serialized.Length; + public long SetupSerializeAllocBytes { get; } + public long SetupDeserializeAllocBytes { get; } + public bool IsRoundTripOnly => true; + public string OptionsDescription => BenchmarkOptions.BuildAcBinary(_options, $", BufferSize={_options.BufferWriterChunkSize}B, Transport=NamedPipe(raw,2-task)"); + + public AcBinaryNamedPipeRawByteArrayBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset) + { + _order = order; + // BufferWriterChunkSize comes from the caller — same source-of-truth contract as + // AcBinaryNamedPipeBenchmark. The kernel pipe-buffer (inBufferSize) is wired to it so the + // raw-vs-chunked comparison runs on identical transport conditions. + _options = options; + OptionsPreset = optionsPreset; + + _serialized = AcBinarySerializer.Serialize(order, _options); + + var pipeName = $"AcBinaryBenchRaw-{Guid.NewGuid():N}"; + + // === SERIALIZE-side setup measurement === + // pipe-pair (server + client) + connect handshake. NO PipeWriter wrapper — we use the raw + // Stream.Write API directly, matching the no-framing semantics of this benchmark. + GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect(); + var beforeSer = GC.GetAllocatedBytesForCurrentThread(); + _pipeServer = new NamedPipeServerStream(pipeName, PipeDirection.In, 1, PipeTransmissionMode.Byte, + System.IO.Pipes.PipeOptions.Asynchronous, + inBufferSize: _options.BufferWriterChunkSize, + outBufferSize: _options.BufferWriterChunkSize); + _pipeClient = new NamedPipeClientStream(".", pipeName, PipeDirection.Out, System.IO.Pipes.PipeOptions.Asynchronous); + + var serverWait = _pipeServer.WaitForConnectionAsync(); + _pipeClient.Connect(); + serverWait.GetAwaiter().GetResult(); + var afterSer = GC.GetAllocatedBytesForCurrentThread(); + SetupSerializeAllocBytes = afterSer - beforeSer; + + // === DESERIALIZE-side setup measurement === + // 1× background consumer-task + 2× MRES (request / done) + cancellation source. Matches the + // chunked benchmark's deserialize-side setup cost shape. + GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect(); + var beforeDes = GC.GetAllocatedBytesForCurrentThread(); + _cts = new CancellationTokenSource(); + _consumerTask = Task.Run(ConsumerLoop); + var afterDes = GC.GetAllocatedBytesForCurrentThread(); + SetupDeserializeAllocBytes = afterDes - beforeDes; + } + + // BG consumer: parks on _consumeRequest, reads N bytes from pipe, runs Deserialize(bytes), signals + // _consumeDone. The Read overlaps with the calling thread's Write through the kernel-pipe; Des happens + // sequentially after Read completes (raw byte[] needs the full message to deserialize). + private void ConsumerLoop() + { + var ct = _cts.Token; + try + { + while (true) + { + _consumeRequest.Wait(ct); + if (ct.IsCancellationRequested) return; + _consumeRequest.Reset(); + + try + { + var size = _pendingReadSize; + var bytes = new byte[size]; // per-iter alloc — counted by BenchmarkLoop.MeasureAllocationTotal + var totalRead = 0; + while (totalRead < size) + { + var n = _pipeServer.Read(bytes, totalRead, size - totalRead); + if (n == 0) break; // pipe closed / EOF — partial read swallowed + totalRead += n; + } + var result = AcBinaryDeserializer.Deserialize(bytes, _options); + if (_captureResult) _lastResult = result; + } + catch + { + // Swallow — calling thread sees the failure via missing/incorrect _lastResult during VerifyRoundTrip, + // or the benchmark loop just continues (timing impacted). Production teardown handled in Dispose. + } + finally + { + _consumeDone.Set(); + } + } + } + catch (OperationCanceledException) + { + // Cooperative cancel — Dispose path. Swallow. + } + } + + [MethodImpl(MethodImplOptions.NoInlining)] + public void Serialize() + { + // 2-task streaming pipeline: + // 1. Calling thread serialises → fresh byte[] (per-iter alloc, matches AcBinaryBenchmark contract). + // 2. Calling thread hands off expected size + signals consumer task. Consumer task starts Read loop + // on the pipe (BG thread). Calling thread proceeds to Write the bytes — Read and Write overlap + // through the kernel-pipe (kernel buffer fills, drains as consumer reads, sender resumes). + // 3. Calling thread waits for _consumeDone (consumer task finished Read+Des). + // + // Note: unlike chunked, raw byte[] cannot do Ser↔Des overlap (Des needs the full bytes before + // starting). Only Write↔Read overlaps here. The Des sequence on BG thread is: Read full bytes → + // Des the full graph → signal done. This is the architectural difference between raw and chunked. + var bytes = AcBinarySerializer.Serialize(_order, _options); + + _pendingReadSize = bytes.Length; + _consumeDone.Reset(); + _consumeRequest.Set(); + + _pipeClient.Write(bytes, 0, bytes.Length); + _pipeClient.Flush(); + + _consumeDone.Wait(); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + public void Deserialize() + { + // No-op: per-iter round-trip is captured in Serialize(). See IsRoundTripOnly contract. + } + + public bool VerifyRoundTrip() + { + // Use the same 2-task streaming path as the benchmark, but capture the result for graph-equality. + _captureResult = true; + try + { + Serialize(); + var result = _lastResult as TestOrder; + return result != null && BenchmarkLoop.DeepEqualsViaJson(_order, result); + } + finally + { + _captureResult = false; + _lastResult = null; + } + } + + public void Dispose() + { + if (_disposed) return; + _disposed = true; + + // Cancel the consumer task → ConsumerLoop exits its Wait via OperationCanceledException. + try { _cts.Cancel(); } catch { /* swallow on teardown */ } + try { _consumeRequest.Set(); } catch { /* nudge in case consumer Wait is parked */ } + try { _consumerTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ } + + // Symmetric teardown — close client first (writer side), then server. + try { _pipeClient.Dispose(); } catch { /* swallow on teardown */ } + try { _pipeServer.Dispose(); } catch { /* swallow on teardown */ } + try { _consumeRequest.Dispose(); } catch { /* swallow on teardown */ } + try { _consumeDone.Dispose(); } catch { /* swallow on teardown */ } + try { _cts.Dispose(); } catch { /* swallow on teardown */ } + } +} diff --git a/AyCode.Core.Serializers.Console/Benchmarks/MemoryPackBufferWriterBenchmark.cs b/AyCode.Core.Serializers.Console/Benchmarks/MemoryPackBufferWriterBenchmark.cs new file mode 100644 index 0000000..0320e6f --- /dev/null +++ b/AyCode.Core.Serializers.Console/Benchmarks/MemoryPackBufferWriterBenchmark.cs @@ -0,0 +1,63 @@ +using AyCode.Core.Tests.TestModels; +using MemoryPack; +using System.Buffers; +using System.Runtime.CompilerServices; + +namespace AyCode.Core.Serializers.Console.Benchmarks; + +/// +/// Benchmarks MemoryPack via the IBufferWriter overload with a pre-allocated, reused ArrayBufferWriter. +/// Apples-to-apples counterpart to — MemoryPack's IBufferWriter +/// is the path it's designed for. +/// +internal sealed class MemoryPackBufferWriterBenchmark : ISerializerBenchmark +{ + private readonly TestOrder _order; + private readonly MemoryPackSerializerOptions _options; + private readonly byte[] _serialized; + private readonly ArrayBufferWriter _bufferWriter; + + public string Engine => Configuration.EngineMemoryPack; + public string IoMode => Configuration.IoBufWrReuse; + public string DispatchMode => Configuration.ModeSGen; // MemoryPack always uses [MemoryPackable] source-generated formatters + public string OptionsPreset { get; } + public int SerializedSize => _serialized.Length; + public long SetupSerializeAllocBytes { get; } + public long SetupDeserializeAllocBytes => 0; + public string? OptionsDescription => $"StringEncoding={_options.StringEncoding}"; + + public MemoryPackBufferWriterBenchmark(TestOrder order, string optionsPreset) + { + _order = order; + OptionsPreset = optionsPreset; + _options = BenchmarkOptions.GetMemPack(); + _serialized = MemoryPackSerializer.Serialize(order, _options); + + // Serialize-side setup only — see AcBinaryBufferWriterBenchmark for the full rationale. + GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect(); + var beforeSetup = GC.GetAllocatedBytesForCurrentThread(); + _bufferWriter = new ArrayBufferWriter(_serialized.Length * 2); + var afterSetup = GC.GetAllocatedBytesForCurrentThread(); + SetupSerializeAllocBytes = afterSetup - beforeSetup; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + public void Serialize() + { + _bufferWriter.ResetWrittenCount(); + MemoryPackSerializer.Serialize(_bufferWriter, _order, _options); + } + + // BufWr semantic: read from a ReadOnlySequence overload (apples-to-apples with AcBinary's + // BufWr Deser path). MemoryPack's ROS overload also single-segment-fast-paths internally. + [MethodImpl(MethodImplOptions.NoInlining)] + public void Deserialize() => MemoryPackSerializer.Deserialize(new ReadOnlySequence(_serialized), _options); + + public bool VerifyRoundTrip() + { + _bufferWriter.ResetWrittenCount(); + MemoryPackSerializer.Serialize(_bufferWriter, _order, _options); + var roundTripped = MemoryPackSerializer.Deserialize(new ReadOnlySequence(_bufferWriter.WrittenMemory), _options); + return BenchmarkLoop.DeepEqualsViaJson(_order, roundTripped); + } +} diff --git a/AyCode.Core.Serializers.Console/Benchmarks/MemoryPackFreshBufferWriterBenchmark.cs b/AyCode.Core.Serializers.Console/Benchmarks/MemoryPackFreshBufferWriterBenchmark.cs new file mode 100644 index 0000000..032f21a --- /dev/null +++ b/AyCode.Core.Serializers.Console/Benchmarks/MemoryPackFreshBufferWriterBenchmark.cs @@ -0,0 +1,54 @@ +using AyCode.Core.Tests.TestModels; +using MemoryPack; +using System.Buffers; +using System.Runtime.CompilerServices; + +namespace AyCode.Core.Serializers.Console.Benchmarks; + +/// +/// Benchmarks MemoryPack via the IBufferWriter overload, allocating a FRESH ArrayBufferWriter on EVERY call. +/// Apples-to-apples counterpart to . +/// +internal sealed class MemoryPackFreshBufferWriterBenchmark : ISerializerBenchmark +{ + private readonly TestOrder _order; + private readonly MemoryPackSerializerOptions _options; + private readonly byte[] _serialized; + + public string Engine => Configuration.EngineMemoryPack; + public string IoMode => Configuration.IoBufWrNew; + public string DispatchMode => Configuration.ModeSGen; // MemoryPack always uses [MemoryPackable] source-generated formatters + public string OptionsPreset { get; } + public int SerializedSize => _serialized.Length; + public long SetupSerializeAllocBytes => 0; + public long SetupDeserializeAllocBytes => 0; + public string? OptionsDescription => $"StringEncoding={_options.StringEncoding}"; + + public MemoryPackFreshBufferWriterBenchmark(TestOrder order, string optionsPreset) + { + _order = order; + OptionsPreset = optionsPreset; + _options = BenchmarkOptions.GetMemPack(); + _serialized = MemoryPackSerializer.Serialize(order, _options); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + public void Serialize() + { + var abw = new ArrayBufferWriter(); + MemoryPackSerializer.Serialize(abw, _order, _options); + } + + // BufWr semantic: read from a ReadOnlySequence overload (apples-to-apples with AcBinary's + // BufWr Deser path). MemoryPack's ROS overload also single-segment-fast-paths internally. + [MethodImpl(MethodImplOptions.NoInlining)] + public void Deserialize() => MemoryPackSerializer.Deserialize(new ReadOnlySequence(_serialized), _options); + + public bool VerifyRoundTrip() + { + var abw = new ArrayBufferWriter(); + MemoryPackSerializer.Serialize(abw, _order, _options); + var roundTripped = MemoryPackSerializer.Deserialize(new ReadOnlySequence(abw.WrittenMemory), _options); + return BenchmarkLoop.DeepEqualsViaJson(_order, roundTripped); + } +} diff --git a/AyCode.Core.Serializers.Console/Program.cs b/AyCode.Core.Serializers.Console/Program.cs index 19501ed..77d86d6 100644 --- a/AyCode.Core.Serializers.Console/Program.cs +++ b/AyCode.Core.Serializers.Console/Program.cs @@ -576,1014 +576,7 @@ private static List RunBenchmarksForTestData(TestDataSet testDa #endregion - #region Serializer Implementations - - /// - /// Benchmarks AcBinary via the IBufferWriter overload with a pre-allocated, reused ArrayBufferWriter. - /// Realistic IBufferWriter usage pattern: caller owns + reuses the writer (zero alloc per call after warmup). - /// - /// - /// Benchmarks AcBinary via the IBufferWriter overload, allocating a FRESH ArrayBufferWriter on EVERY call. - /// One-shot scenario — represents code that doesn't reuse a writer across calls. - /// Uses BufferWriterChunkSize=4096 (production-realistic, SignalR-aligned) instead of the 65535 default — - /// otherwise AcBinary would request 64KB upfront via GetSpan(), forcing the fresh ABW to allocate 64KB - /// regardless of payload size (heavy over-allocation for small payloads). - /// - internal sealed class AcBinaryFreshBufferWriterBenchmark : ISerializerBenchmark - { - private readonly TestOrder _order; - private readonly AcBinarySerializerOptions _options; - private readonly byte[] _serialized; - - public string Engine => Configuration.EngineAcBinary; - public string IoMode => Configuration.IoBufWrNew; - public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime; - public string OptionsPreset { get; } - public int SerializedSize => _serialized.Length; - public long SetupSerializeAllocBytes => 0; - public long SetupDeserializeAllocBytes => 0; - public string OptionsDescription => BenchmarkOptions.BuildAcBinary(_options, $", BufferSize={_options.BufferWriterChunkSize}B"); - - public AcBinaryFreshBufferWriterBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset) - { - _order = order; - // BufferWriterChunkSize comes from the caller (central source of truth in CreateSerializers - // — the binaryFastMode4KbChunk options instance). Do NOT mutate _options here; tune the chunk - // size in CreateSerializers only. - _options = options; - OptionsPreset = optionsPreset; - _serialized = AcBinarySerializer.Serialize(order, _options); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - public void Serialize() - { - var abw = new ArrayBufferWriter(); // FRESH every call — alloc + grow as needed - AcBinarySerializer.Serialize(_order, abw, _options); - } - - // BufWr semantic: read from a ReadOnlySequence (the ROS overload), NOT from byte[] — - // single-segment array-backed sequence triggers the fast-path in AcBinaryDeserializer.cs:298 which - // redirects to the byte[] overload. This means the bench actually exercises the ROS-input path - // (the production-realistic surface for SignalR / Pipe consumers) rather than secretly testing - // byte[] Deser under the BufWr label. - [MethodImpl(MethodImplOptions.NoInlining)] - public void Deserialize() => AcBinaryDeserializer.Deserialize(new ReadOnlySequence(_serialized), _options); - - public bool VerifyRoundTrip() - { - var abw = new ArrayBufferWriter(); - AcBinarySerializer.Serialize(_order, abw, _options); - var roundTripped = AcBinaryDeserializer.Deserialize(new ReadOnlySequence(abw.WrittenMemory), _options); - return BenchmarkLoop.DeepEqualsViaJson(_order, roundTripped); - } - } - - /// - /// Benchmarks AcBinary over a long-lived NamedPipe IPC connection using the AcBinary native streaming API - /// ( - /// + + ). - /// Mirrors what a real consumer (e.g. DeserializeFromPipeReaderAsync) does per message: - /// long-lived with multi-message wire framing on top of a long-lived NamedPipe. - /// - /// Architecture: - /// - /// Constructor (NOT timed): sets up + , - /// waits for connection, creates one long-lived / - /// pair, ONE long-lived - /// in multiMessage = true mode, ONE drain Task that pumps - /// forever, and ONE deserialize Task that loops AcBinaryDeserializer.Deserialize<T>(input, opts) - /// producing into a . - /// Per-iteration (timed): sender writes via - /// - /// — multi-message wire ([201][UINT16][data]...[202]); the [202] end marker arms the input's - /// _readPos = -1 sentinel, so the next message's first AppendToBuffer recycles the buffer to 0. - /// Then receiver awaits the channel for the deserialized result. - /// is a no-op (full round-trip captured in ); - /// =true → Ser ms / SerAlloc oszlopok N/A, RT ms = full round-trip. - /// - /// - /// Per-iter overhead: 0 new Task.Run, 0 new AsyncPipeReaderInput, 0 new CancellationTokenSource. - /// Pure cost = SerializeChunkedFramed (CPU + chunk-onkénti flush) + kernel write/read syscalls + 1 sync barrier - /// (channel) + deserialized graph alloc. The "multi-message reuse" pattern enabled by Q4T8 fix (R5K2 minimum: _readPos = -1 - /// sentinel + AppendToBuffer sliding-window cycling). - /// - /// Approximation note: single-process loopback NamedPipe. Real cross-process / cross-machine SignalR - /// adds further transport latency (TCP, WebSocket framing) on top. The benchmark gives a lower bound. - /// - internal sealed class AcBinaryNamedPipeBenchmark : ISerializerBenchmark, IDisposable - { - private readonly TestOrder _order; - private readonly AcBinarySerializerOptions _options; - private readonly byte[] _serialized; // for SerializedSize reporting only - - // Long-lived pipe lifecycle (set up once in ctor — NOT timed). - private readonly NamedPipeServerStream _pipeServer; - private readonly NamedPipeClientStream _pipeClient; - private readonly PipeWriter _pipeWriter; - private readonly PipeReader _pipeReader; - - // Long-lived multi-message receive infrastructure (set up once in ctor). - private readonly AsyncPipeReaderInput _input; - private readonly CancellationTokenSource _cts; - private readonly Task _drainTask; // BG: PipeReader → input.Feed (continuous pump) - private readonly Task _consumerTask; // BG: per-iter Deserialize(input) loop, signaled by calling thread - private readonly ManualResetEventSlim _consumeRequest = new(false); - private readonly ManualResetEventSlim _consumeDone = new(false); - private object? _lastResult; // captured during VerifyRoundTrip; null in benchmark iters - private bool _captureResult; // toggle: when true, ConsumeLoop stores result; otherwise discards - private bool _disposed; - - public string Engine => Configuration.EngineAcBinary; - public string IoMode => Configuration.IoNamedPipe; - public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime; - public string OptionsPreset { get; } - public int SerializedSize => _serialized.Length; - public long SetupSerializeAllocBytes { get; } - public long SetupDeserializeAllocBytes { get; } - public bool IsRoundTripOnly => true; - public string OptionsDescription => BenchmarkOptions.BuildAcBinary(_options, $", BufferSize={_options.BufferWriterChunkSize}B, Transport=NamedPipe(long-lived,multiMessage,2-task)"); - - public AcBinaryNamedPipeBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset) - { - _order = order; - // BufferWriterChunkSize comes from the caller (central source of truth in CreateSerializers - // — the binaryFastMode4KbChunk options instance). Do NOT mutate _options here; tune the chunk - // size in CreateSerializers only. - _options = options; - OptionsPreset = optionsPreset; - - _serialized = AcBinarySerializer.Serialize(order, _options); - - // 1× pipe setup. Kernel-side pipe buffer (inBufferSize / outBufferSize on the server ctor — the - // client inherits the server-defined buffer size at connect time) matches BufferWriterChunkSize - // exactly: AsyncPipeWriterOutput now treats chunkSize as the chunk-on-wire total size (header + - // data), so one WriteFile(chunkSize) syscall lands in exactly one kernel-page slot — page-aligned, - // no fragmentation, no IRP reordering. _options.BufferWriterChunkSize is the single tunable source. - var pipeName = $"AcBinaryBench-{Guid.NewGuid():N}"; - - // === SERIALIZE-side setup measurement === - // pipe-pair (server + client) + connect handshake + writer-side PipeWriter wrapper. - GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect(); - var beforeSer = GC.GetAllocatedBytesForCurrentThread(); - - _pipeServer = new NamedPipeServerStream(pipeName, PipeDirection.In, 1, PipeTransmissionMode.Byte, - System.IO.Pipes.PipeOptions.Asynchronous, - inBufferSize: _options.BufferWriterChunkSize, - outBufferSize: _options.BufferWriterChunkSize); - - _pipeClient = new NamedPipeClientStream(".", pipeName, PipeDirection.Out, System.IO.Pipes.PipeOptions.Asynchronous); - - var serverWait = _pipeServer.WaitForConnectionAsync(); - _pipeClient.Connect(); - serverWait.GetAwaiter().GetResult(); - - _pipeWriter = PipeWriter.Create(_pipeClient); - var afterSer = GC.GetAllocatedBytesForCurrentThread(); - SetupSerializeAllocBytes = afterSer - beforeSer; - - // === DESERIALIZE-side setup measurement === - // PipeReader wrapper + AsyncPipeReaderInput (ArrayPool rent + ManualResetEventSlim) + drain - // task + consumer task scaffolding. Two long-lived BG tasks total: drain pumps bytes from the - // kernel pipe into input; consumer drives Deserialize(input) per iter on signal. - GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect(); - var beforeDes = GC.GetAllocatedBytesForCurrentThread(); - - _pipeReader = PipeReader.Create(_pipeServer); - _input = new AsyncPipeReaderInput(_options.BufferWriterChunkSize * 2, multiMessage: true); - _cts = new CancellationTokenSource(); - - // Drain task: pumps PipeReader → input.Feed forever (or until cancel). Single Task.Run for - // the full benchmark lifetime — its overhead is amortised across all messages. - _drainTask = Task.Run(() => _input.DrainFromAsync(_pipeReader, _cts.Token)); - - // Consumer task: per-iter Deserialize(input) loop. Started here once; signaled per-iter via - // _consumeRequest. Enables Ser↔Des streaming overlap — calling thread runs SerializeChunkedFramed - // while THIS task simultaneously runs Deserialize, both consuming/producing through the - // sliding-window buffer pipelined by the drain task. - _consumerTask = Task.Run(ConsumeLoop); - - var afterDes = GC.GetAllocatedBytesForCurrentThread(); - SetupDeserializeAllocBytes = afterDes - beforeDes; - } - - // BG consumer: parks on _consumeRequest, runs Deserialize(_input) when signaled, signals _consumeDone. - // The Deserialize call internally blocks on the input's MRES whenever the drain hasn't yet fed enough - // bytes for the next read — that's where the streaming-pipeline overlap with the calling thread (Ser) - // happens. - private void ConsumeLoop() - { - var ct = _cts.Token; - try - { - while (true) - { - _consumeRequest.Wait(ct); - if (ct.IsCancellationRequested) return; - _consumeRequest.Reset(); - - try - { - var result = AcBinaryDeserializer.Deserialize(_input, _options); - if (_captureResult) _lastResult = result; - } - catch - { - // Swallow — calling thread sees the failure via missing/incorrect _lastResult during VerifyRoundTrip, - // or the benchmark loop just continues (timing impacted). Production teardown handled in Dispose. - } - finally - { - _consumeDone.Set(); - } - } - } - catch (OperationCanceledException) - { - // Cooperative cancel — Dispose path. Swallow. - } - } - - [MethodImpl(MethodImplOptions.NoInlining)] - public void Serialize() - { - // 2-task streaming pipeline: - // 1. Calling thread signals consumer task to begin Deserialize(input). Consumer immediately - // starts; first read blocks on input's MRES because no bytes flowed yet. - // 2. Calling thread starts SerializeChunkedFramed → chunks flow through PipeWriter → kernel pipe → - // drain task (BG) feeds input.Feed → MRES pulses → consumer's Deserialize consumes bytes - // chunk by chunk. Ser↔Des truly overlap here. - // 3. Calling thread waits for _consumeDone (signaling Deserialize returned). - _consumeDone.Reset(); - _consumeRequest.Set(); - - AcBinarySerializer.SerializeChunkedFramed(_order, _pipeWriter, _options); - - _consumeDone.Wait(); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - public void Deserialize() - { - // No-op: per-iter round-trip is captured in Serialize(). See IsRoundTripOnly contract. - } - - public bool VerifyRoundTrip() - { - // Use the same 2-task streaming path as the benchmark, but capture the result for graph-equality. - _captureResult = true; - try - { - Serialize(); - var result = _lastResult as TestOrder; - return result != null && BenchmarkLoop.DeepEqualsViaJson(_order, result); - } - finally - { - _captureResult = false; - _lastResult = null; - } - } - - public void Dispose() - { - if (_disposed) return; - _disposed = true; - - // Cancel drain + consumer tasks → both exit. Pulse _consumeRequest in case consumer is parked. - try { _cts.Cancel(); } catch { /* swallow on teardown */ } - try { _consumeRequest.Set(); } catch { /* nudge in case consumer Wait is parked */ } - try { _drainTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ } - try { _consumerTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ } - - // Complete writer + dispose pipe lifecycle. - try { _pipeWriter.CompleteAsync().AsTask().Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ } - try { _pipeReader.Complete(); } catch { /* swallow on teardown */ } - try { _pipeClient.Dispose(); } catch { /* swallow on teardown */ } - try { _pipeServer.Dispose(); } catch { /* swallow on teardown */ } - try { _input.Dispose(); } catch { /* swallow on teardown */ } - try { _consumeRequest.Dispose(); } catch { /* swallow on teardown */ } - try { _consumeDone.Dispose(); } catch { /* swallow on teardown */ } - try { _cts.Dispose(); } catch { /* swallow on teardown */ } - } - } - - /// - /// Same chunked-framed AsyncPipe code path as , but the transport - /// is an in-memory instead of a kernel NamedPipe. The Pipe's - /// Writer/Reader pair is a managed-only zero-copy slab handoff — no syscalls, no kernel - /// buffer copy, no IRP queueing. - /// - /// Why this benchmark matters: by holding ALL other variables constant (same SerializeChunkedFramed, - /// same AsyncPipeReaderInput, same drain task, same consumer task, same multi-message wire format), this - /// row isolates the kernel-NamedPipe transport overhead from the chunked-streaming framework's pure - /// CPU cost. The expected delta vs : per-chunk overhead drops from - /// ~25-30 µs (kernel-syscall pair + IRP) to ~1-2 µs (managed slab handoff). Multi-chunk Large-message rows - /// should converge dramatically toward . - /// - /// Real-world relevance: in-memory Pipe is the typical primitive used for cross-thread serializer - /// pipelines inside a single process (e.g. SignalR's Kestrel transport adapter, gRPC framework internals, - /// custom message brokers). The numbers from this row reflect that scenario, NOT the kernel-pipe loopback - /// of the NamedPipe benchmark. - /// - internal sealed class AcBinaryInMemoryPipeBenchmark : ISerializerBenchmark, IDisposable - { - private readonly TestOrder _order; - private readonly AcBinarySerializerOptions _options; - private readonly byte[] _serialized; // for SerializedSize reporting only - - // Long-lived in-memory pipe lifecycle (set up once in ctor — NOT timed). - private readonly Pipe _pipe; - private readonly PipeWriter _pipeWriter; - private readonly PipeReader _pipeReader; - - // Long-lived multi-message receive infrastructure (set up once in ctor) — same pattern as the NamedPipe - // variant: drain pumps reader into AsyncPipeReaderInput, consumer task drives Deserialize(input). - private readonly AsyncPipeReaderInput _input; - private readonly CancellationTokenSource _cts; - private readonly Task _drainTask; - private readonly Task _consumerTask; - private readonly ManualResetEventSlim _consumeRequest = new(false); - private readonly ManualResetEventSlim _consumeDone = new(false); - private object? _lastResult; - private bool _captureResult; - private bool _disposed; - - public string Engine => Configuration.EngineAcBinary; - public string IoMode => Configuration.IoInMemoryPipe; - public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime; - public string OptionsPreset { get; } - public int SerializedSize => _serialized.Length; - public long SetupSerializeAllocBytes { get; } - public long SetupDeserializeAllocBytes { get; } - public bool IsRoundTripOnly => true; - public string OptionsDescription => BenchmarkOptions.BuildAcBinary(_options, $", BufferSize={_options.BufferWriterChunkSize}B, Transport=Pipe(in-memory,multiMessage,2-task)"); - - public AcBinaryInMemoryPipeBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset) - { - _order = order; - _options = options; - OptionsPreset = optionsPreset; - - _serialized = AcBinarySerializer.Serialize(order, _options); - - // === SERIALIZE-side setup measurement === - // In-memory Pipe construction. NO kernel-pipe pair, NO Connect handshake — just a managed Pipe object - // and a reference to its Writer side. PipeWriterImpl (parallel-flush capable, NOT StreamPipeWriter). - GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect(); - var beforeSer = GC.GetAllocatedBytesForCurrentThread(); - _pipe = new Pipe(); - _pipeWriter = _pipe.Writer; - var afterSer = GC.GetAllocatedBytesForCurrentThread(); - SetupSerializeAllocBytes = afterSer - beforeSer; - - // === DESERIALIZE-side setup measurement === - // PipeReader reference + AsyncPipeReaderInput (ArrayPool rent + ManualResetEventSlim) + drain task + - // consumer task scaffolding. Identical to the NamedPipe variant on the receive side. - GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect(); - var beforeDes = GC.GetAllocatedBytesForCurrentThread(); - - _pipeReader = _pipe.Reader; - _input = new AsyncPipeReaderInput(_options.BufferWriterChunkSize * 2, multiMessage: true); - _cts = new CancellationTokenSource(); - _drainTask = Task.Run(() => _input.DrainFromAsync(_pipeReader, _cts.Token)); - _consumerTask = Task.Run(ConsumeLoop); - - var afterDes = GC.GetAllocatedBytesForCurrentThread(); - SetupDeserializeAllocBytes = afterDes - beforeDes; - } - - // BG consumer: parks on _consumeRequest, runs Deserialize(_input) when signaled, signals _consumeDone. - // Mirror of AcBinaryNamedPipeBenchmark.ConsumeLoop — same pattern, same MRES protocol. - private void ConsumeLoop() - { - var ct = _cts.Token; - try - { - while (true) - { - _consumeRequest.Wait(ct); - if (ct.IsCancellationRequested) return; - _consumeRequest.Reset(); - - try - { - var result = AcBinaryDeserializer.Deserialize(_input, _options); - if (_captureResult) _lastResult = result; - } - catch - { - // Swallow — see ConsumeLoop in NamedPipe variant for rationale. - } - finally - { - _consumeDone.Set(); - } - } - } - catch (OperationCanceledException) - { - // Cooperative cancel — Dispose path. Swallow. - } - } - - [MethodImpl(MethodImplOptions.NoInlining)] - public void Serialize() - { - // Same 2-task streaming pipeline as NamedPipe variant — only the transport differs (in-memory Pipe - // instead of kernel NamedPipe). Per-chunk SerializeChunkedFramed → PipeWriter slab → drain task - // reads from PipeReader → input.Feed → consumer Deserialize consumes byte-by-byte. - // - // Uses the Pipe-overload (instead of the PipeWriter-overload) so the FlushPolicy parameter is - // exposed for tuning. Toggle between FlushPolicy.PerChunk (bounded peak memory, per-chunk await - // FlushAsync) and FlushPolicy.Coalesced (fire-and-forget per chunk, pipe-coalesced flushes up to - // PauseWriterThreshold ~64 KB) to A/B-test the streaming-pipeline overhead. FlushPolicy.PerChunk - // is functionally equivalent to the PipeWriter-overload (both internally route to - // SerializeToPipeWriterCore with FlushPolicy.PerChunk). - _consumeDone.Reset(); - _consumeRequest.Set(); - - AcBinarySerializer.SerializeChunkedFramed(_order, _pipe, _options, FlushPolicy.Coalesced); - - _consumeDone.Wait(); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - public void Deserialize() - { - // No-op: per-iter round-trip is captured in Serialize(). See IsRoundTripOnly contract. - } - - public bool VerifyRoundTrip() - { - _captureResult = true; - try - { - Serialize(); - var result = _lastResult as TestOrder; - return result != null && BenchmarkLoop.DeepEqualsViaJson(_order, result); - } - finally - { - _captureResult = false; - _lastResult = null; - } - } - - public void Dispose() - { - if (_disposed) return; - _disposed = true; - - // Cancel drain + consumer tasks → both exit. Pulse _consumeRequest in case consumer is parked. - try { _cts.Cancel(); } catch { /* swallow on teardown */ } - try { _consumeRequest.Set(); } catch { /* nudge in case consumer Wait is parked */ } - try { _drainTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ } - try { _consumerTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ } - - // Complete writer + reader (in-memory Pipe — no underlying stream to dispose). - try { _pipeWriter.CompleteAsync().AsTask().Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ } - try { _pipeReader.Complete(); } catch { /* swallow on teardown */ } - try { _input.Dispose(); } catch { /* swallow on teardown */ } - try { _consumeRequest.Dispose(); } catch { /* swallow on teardown */ } - try { _consumeDone.Dispose(); } catch { /* swallow on teardown */ } - try { _cts.Dispose(); } catch { /* swallow on teardown */ } - } - } - - /// - /// Raw byte[] over a long-lived NamedPipe — NO chunk-framing, NO AsyncPipeReaderInput, - /// NO sliding-window buffer. Calling thread serialises + writes; a long-lived background consumer task - /// reads and deserialises. Two-task pattern enables Ser↔Read overlap (kernel-pipe-pipelined) AND - /// avoids the kernel-buffer-full deadlock when bytes.Length > inBufferSize. - /// - /// Side-by-side with (chunked-framed AsyncPipe stack) this - /// isolates two cost components on the SAME kernel-pipe transport with the SAME inBufferSize: - /// - /// This row vs (Byte[]) — pure kernel-NamedPipe - /// overhead (WriteFile / ReadFile syscalls + IRP queueing + buffer-copy + thread-handoff). - /// This row vs (chunked-framed) — pure - /// AsyncPipe-framework overhead (chunk header writes + sliding-window Feed + MRES wait inside - /// AsyncPipeReaderInput) AND the streaming-pipeline benefit of intra-message Ser↔Des overlap (which - /// raw lacks — raw can only Ser↔Read overlap, with Des sequential after Read completes). - /// - /// Per-iter byte[] allocation from AcBinarySerializer.Serialize is part of the cost (matches - /// 's API contract); the receive-side scratch buffer is also allocated per-iter - /// on the consumer-task (counted via GC.GetTotalAllocatedBytes in BenchmarkLoop.MeasureAllocationTotal). - /// - internal sealed class AcBinaryNamedPipeRawByteArrayBenchmark : ISerializerBenchmark, IDisposable - { - private readonly TestOrder _order; - private readonly AcBinarySerializerOptions _options; - private readonly byte[] _serialized; // for SerializedSize reporting + receive-side size known upfront - - // Long-lived pipe lifecycle (set up once in ctor — NOT timed). - private readonly NamedPipeServerStream _pipeServer; - private readonly NamedPipeClientStream _pipeClient; - - // Long-lived consumer-task infrastructure (Read + Deserialize on BG thread, signaled per iter). - // Mirrors AcBinaryNamedPipeBenchmark's drain+consumer pair, but raw byte[] doesn't have an - // intermediate sliding-window buffer, so Read+Des happen sequentially in one BG task: Read N bytes - // → Deserialize(bytes) → signal done. Calling thread's Ser↔Write overlaps with this BG Read+Des - // through kernel-pipe pipelining. - private readonly CancellationTokenSource _cts; - private readonly Task _consumerTask; - private readonly ManualResetEventSlim _consumeRequest = new(false); - private readonly ManualResetEventSlim _consumeDone = new(false); - private int _pendingReadSize; - private object? _lastResult; // captured during VerifyRoundTrip; null in benchmark iters - private bool _captureResult; // toggle: when true, ConsumerLoop stores result; otherwise discards - private bool _disposed; - - public string Engine => Configuration.EngineAcBinary; - public string IoMode => Configuration.IoNamedPipeRaw; - public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime; - public string OptionsPreset { get; } - public int SerializedSize => _serialized.Length; - public long SetupSerializeAllocBytes { get; } - public long SetupDeserializeAllocBytes { get; } - public bool IsRoundTripOnly => true; - public string OptionsDescription => BenchmarkOptions.BuildAcBinary(_options, $", BufferSize={_options.BufferWriterChunkSize}B, Transport=NamedPipe(raw,2-task)"); - - public AcBinaryNamedPipeRawByteArrayBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset) - { - _order = order; - // BufferWriterChunkSize comes from the caller — same source-of-truth contract as - // AcBinaryNamedPipeBenchmark. The kernel pipe-buffer (inBufferSize) is wired to it so the - // raw-vs-chunked comparison runs on identical transport conditions. - _options = options; - OptionsPreset = optionsPreset; - - _serialized = AcBinarySerializer.Serialize(order, _options); - - var pipeName = $"AcBinaryBenchRaw-{Guid.NewGuid():N}"; - - // === SERIALIZE-side setup measurement === - // pipe-pair (server + client) + connect handshake. NO PipeWriter wrapper — we use the raw - // Stream.Write API directly, matching the no-framing semantics of this benchmark. - GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect(); - var beforeSer = GC.GetAllocatedBytesForCurrentThread(); - _pipeServer = new NamedPipeServerStream(pipeName, PipeDirection.In, 1, PipeTransmissionMode.Byte, - System.IO.Pipes.PipeOptions.Asynchronous, - inBufferSize: _options.BufferWriterChunkSize, - outBufferSize: _options.BufferWriterChunkSize); - _pipeClient = new NamedPipeClientStream(".", pipeName, PipeDirection.Out, System.IO.Pipes.PipeOptions.Asynchronous); - - var serverWait = _pipeServer.WaitForConnectionAsync(); - _pipeClient.Connect(); - serverWait.GetAwaiter().GetResult(); - var afterSer = GC.GetAllocatedBytesForCurrentThread(); - SetupSerializeAllocBytes = afterSer - beforeSer; - - // === DESERIALIZE-side setup measurement === - // 1× background consumer-task + 2× MRES (request / done) + cancellation source. Matches the - // chunked benchmark's deserialize-side setup cost shape. - GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect(); - var beforeDes = GC.GetAllocatedBytesForCurrentThread(); - _cts = new CancellationTokenSource(); - _consumerTask = Task.Run(ConsumerLoop); - var afterDes = GC.GetAllocatedBytesForCurrentThread(); - SetupDeserializeAllocBytes = afterDes - beforeDes; - } - - // BG consumer: parks on _consumeRequest, reads N bytes from pipe, runs Deserialize(bytes), signals - // _consumeDone. The Read overlaps with the calling thread's Write through the kernel-pipe; Des happens - // sequentially after Read completes (raw byte[] needs the full message to deserialize). - private void ConsumerLoop() - { - var ct = _cts.Token; - try - { - while (true) - { - _consumeRequest.Wait(ct); - if (ct.IsCancellationRequested) return; - _consumeRequest.Reset(); - - try - { - var size = _pendingReadSize; - var bytes = new byte[size]; // per-iter alloc — counted by BenchmarkLoop.MeasureAllocationTotal - var totalRead = 0; - while (totalRead < size) - { - var n = _pipeServer.Read(bytes, totalRead, size - totalRead); - if (n == 0) break; // pipe closed / EOF — partial read swallowed - totalRead += n; - } - var result = AcBinaryDeserializer.Deserialize(bytes, _options); - if (_captureResult) _lastResult = result; - } - catch - { - // Swallow — calling thread sees the failure via missing/incorrect _lastResult during VerifyRoundTrip, - // or the benchmark loop just continues (timing impacted). Production teardown handled in Dispose. - } - finally - { - _consumeDone.Set(); - } - } - } - catch (OperationCanceledException) - { - // Cooperative cancel — Dispose path. Swallow. - } - } - - [MethodImpl(MethodImplOptions.NoInlining)] - public void Serialize() - { - // 2-task streaming pipeline: - // 1. Calling thread serialises → fresh byte[] (per-iter alloc, matches AcBinaryBenchmark contract). - // 2. Calling thread hands off expected size + signals consumer task. Consumer task starts Read loop - // on the pipe (BG thread). Calling thread proceeds to Write the bytes — Read and Write overlap - // through the kernel-pipe (kernel buffer fills, drains as consumer reads, sender resumes). - // 3. Calling thread waits for _consumeDone (consumer task finished Read+Des). - // - // Note: unlike chunked, raw byte[] cannot do Ser↔Des overlap (Des needs the full bytes before - // starting). Only Write↔Read overlaps here. The Des sequence on BG thread is: Read full bytes → - // Des the full graph → signal done. This is the architectural difference between raw and chunked. - var bytes = AcBinarySerializer.Serialize(_order, _options); - - _pendingReadSize = bytes.Length; - _consumeDone.Reset(); - _consumeRequest.Set(); - - _pipeClient.Write(bytes, 0, bytes.Length); - _pipeClient.Flush(); - - _consumeDone.Wait(); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - public void Deserialize() - { - // No-op: per-iter round-trip is captured in Serialize(). See IsRoundTripOnly contract. - } - - public bool VerifyRoundTrip() - { - // Use the same 2-task streaming path as the benchmark, but capture the result for graph-equality. - _captureResult = true; - try - { - Serialize(); - var result = _lastResult as TestOrder; - return result != null && BenchmarkLoop.DeepEqualsViaJson(_order, result); - } - finally - { - _captureResult = false; - _lastResult = null; - } - } - - public void Dispose() - { - if (_disposed) return; - _disposed = true; - - // Cancel the consumer task → ConsumerLoop exits its Wait via OperationCanceledException. - try { _cts.Cancel(); } catch { /* swallow on teardown */ } - try { _consumeRequest.Set(); } catch { /* nudge in case consumer Wait is parked */ } - try { _consumerTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ } - - // Symmetric teardown — close client first (writer side), then server. - try { _pipeClient.Dispose(); } catch { /* swallow on teardown */ } - try { _pipeServer.Dispose(); } catch { /* swallow on teardown */ } - try { _consumeRequest.Dispose(); } catch { /* swallow on teardown */ } - try { _consumeDone.Dispose(); } catch { /* swallow on teardown */ } - try { _cts.Dispose(); } catch { /* swallow on teardown */ } - } - } - - /// - /// Raw byte[] over an in-memory cross-thread handoff — NO transport (no NamedPipe, no Pipe, no - /// Channel). Calling thread serialises into a fresh byte[], hands it to a - /// background consumer task via a single byte[] slot + MRES pair; the consumer deserialises and signals done. - /// - /// Why this benchmark matters: completes the 2x2 transport × wire-format matrix: - /// - /// NamedPipe + Chunked = - /// NamedPipe + Raw = - /// In-memory Pipe + Chunked = - /// In-memory + Raw = THIS row — apples-to-apples baseline for the in-memory chunked row - /// - /// Side-by-side with this isolates the chunked-streaming - /// framework's pure CPU cost, with the same in-memory transport (zero kernel involvement) on both sides. - /// Side-by-side with this isolates the kernel-NamedPipe - /// overhead on the raw-byte[] side. - /// - internal sealed class AcBinaryInMemoryRawByteArrayBenchmark : ISerializerBenchmark, IDisposable - { - private readonly TestOrder _order; - private readonly AcBinarySerializerOptions _options; - private readonly byte[] _serialized; // for SerializedSize reporting only - - // Long-lived consumer-task infrastructure (Deserialize on BG thread, signaled per iter). - // No transport — just a byte[] slot for handoff between calling thread and consumer task. - private readonly CancellationTokenSource _cts; - private readonly Task _consumerTask; - private readonly ManualResetEventSlim _consumeRequest = new(false); - private readonly ManualResetEventSlim _consumeDone = new(false); - private byte[]? _pendingBytes; // calling thread → consumer task handoff slot - private object? _lastResult; // captured during VerifyRoundTrip; null in benchmark iters - private bool _captureResult; - private bool _disposed; - - public string Engine => Configuration.EngineAcBinary; - public string IoMode => Configuration.IoInMemoryRaw; - public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime; - public string OptionsPreset { get; } - public int SerializedSize => _serialized.Length; - public long SetupSerializeAllocBytes { get; } - public long SetupDeserializeAllocBytes { get; } - public bool IsRoundTripOnly => true; - public string OptionsDescription => BenchmarkOptions.BuildAcBinary(_options, $", BufferSize={_options.BufferWriterChunkSize}B, Transport=in-memory(raw,2-task)"); - - public AcBinaryInMemoryRawByteArrayBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset) - { - _order = order; - _options = options; - OptionsPreset = optionsPreset; - - _serialized = AcBinarySerializer.Serialize(order, _options); - - // === SERIALIZE-side setup measurement === - // Nothing to set up — calling thread allocates byte[] per iter via AcBinarySerializer.Serialize. - SetupSerializeAllocBytes = 0; - - // === DESERIALIZE-side setup measurement === - // 1× background consumer-task + 2× MRES (request / done) + cancellation source. - GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect(); - var beforeDes = GC.GetAllocatedBytesForCurrentThread(); - _cts = new CancellationTokenSource(); - _consumerTask = Task.Run(ConsumerLoop); - var afterDes = GC.GetAllocatedBytesForCurrentThread(); - SetupDeserializeAllocBytes = afterDes - beforeDes; - } - - // BG consumer: parks on _consumeRequest, picks up the byte[] from _pendingBytes, runs Deserialize(bytes), - // signals _consumeDone. Direct in-process handoff — no transport syscall, no buffer copy beyond the byte[] - // reference itself (zero-copy by reference). - private void ConsumerLoop() - { - var ct = _cts.Token; - try - { - while (true) - { - _consumeRequest.Wait(ct); - if (ct.IsCancellationRequested) return; - _consumeRequest.Reset(); - - try - { - var bytes = _pendingBytes; - if (bytes != null) - { - var result = AcBinaryDeserializer.Deserialize(bytes, _options); - if (_captureResult) _lastResult = result; - } - } - catch - { - // Swallow — see ConsumerLoop in NamedPipe variant for rationale. - } - finally - { - _consumeDone.Set(); - } - } - } - catch (OperationCanceledException) - { - // Cooperative cancel — Dispose path. Swallow. - } - } - - [MethodImpl(MethodImplOptions.NoInlining)] - public void Serialize() - { - // 2-task in-memory pipeline: - // 1. Calling thread serialises → fresh byte[] (per-iter alloc, matches AcBinaryBenchmark contract). - // 2. Calling thread parks the byte[] into _pendingBytes and signals consumer task. Consumer task - // picks up the reference (zero-copy) and runs Deserialize(bytes). - // 3. Calling thread waits for _consumeDone (consumer task finished Des). - // - // Same architectural limitation as the NamedPipe-raw variant: Des cannot start until full bytes - // are available. Only the per-iter Ser↔Des thread-handoff overlaps slightly (calling thread starts - // signalling and waiting while consumer thread takes the byte[]). - var bytes = AcBinarySerializer.Serialize(_order, _options); - - _pendingBytes = bytes; - _consumeDone.Reset(); - _consumeRequest.Set(); - - _consumeDone.Wait(); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - public void Deserialize() - { - // No-op: per-iter round-trip is captured in Serialize(). See IsRoundTripOnly contract. - } - - public bool VerifyRoundTrip() - { - _captureResult = true; - try - { - Serialize(); - var result = _lastResult as TestOrder; - return result != null && BenchmarkLoop.DeepEqualsViaJson(_order, result); - } - finally - { - _captureResult = false; - _lastResult = null; - } - } - - public void Dispose() - { - if (_disposed) return; - _disposed = true; - - try { _cts.Cancel(); } catch { /* swallow on teardown */ } - try { _consumeRequest.Set(); } catch { /* nudge in case consumer Wait is parked */ } - try { _consumerTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ } - - try { _consumeRequest.Dispose(); } catch { /* swallow on teardown */ } - try { _consumeDone.Dispose(); } catch { /* swallow on teardown */ } - try { _cts.Dispose(); } catch { /* swallow on teardown */ } - } - } - - /// - /// Benchmarks MemoryPack via the IBufferWriter overload, allocating a FRESH ArrayBufferWriter on EVERY call. - /// Apples-to-apples counterpart to AcBinaryFreshBufferWriterBenchmark. - /// - internal sealed class MemoryPackFreshBufferWriterBenchmark : ISerializerBenchmark - { - private readonly TestOrder _order; - private readonly MemoryPackSerializerOptions _options; - private readonly byte[] _serialized; - - public string Engine => Configuration.EngineMemoryPack; - public string IoMode => Configuration.IoBufWrNew; - public string DispatchMode => Configuration.ModeSGen; // MemoryPack always uses [MemoryPackable] source-generated formatters - public string OptionsPreset { get; } - public int SerializedSize => _serialized.Length; - public long SetupSerializeAllocBytes => 0; - public long SetupDeserializeAllocBytes => 0; - public string? OptionsDescription => $"StringEncoding={_options.StringEncoding}"; - - public MemoryPackFreshBufferWriterBenchmark(TestOrder order, string optionsPreset) - { - _order = order; - OptionsPreset = optionsPreset; - _options = BenchmarkOptions.GetMemPack(); - _serialized = MemoryPackSerializer.Serialize(order, _options); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - public void Serialize() - { - var abw = new ArrayBufferWriter(); - MemoryPackSerializer.Serialize(abw, _order, _options); - } - - // BufWr semantic: read from a ReadOnlySequence overload (apples-to-apples with AcBinary's - // BufWr Deser path). MemoryPack's ROS overload also single-segment-fast-paths internally. - [MethodImpl(MethodImplOptions.NoInlining)] - public void Deserialize() => MemoryPackSerializer.Deserialize(new ReadOnlySequence(_serialized), _options); - - public bool VerifyRoundTrip() - { - var abw = new ArrayBufferWriter(); - MemoryPackSerializer.Serialize(abw, _order, _options); - var roundTripped = MemoryPackSerializer.Deserialize(new ReadOnlySequence(abw.WrittenMemory), _options); - return BenchmarkLoop.DeepEqualsViaJson(_order, roundTripped); - } - } - - internal sealed class AcBinaryBufferWriterBenchmark : ISerializerBenchmark - { - private readonly TestOrder _order; - private readonly AcBinarySerializerOptions _options; - private readonly byte[] _serialized; - private readonly ArrayBufferWriter _bufferWriter; - - public string Engine => Configuration.EngineAcBinary; - public string IoMode => Configuration.IoBufWrReuse; - public string DispatchMode => _options.UseGeneratedCode ? Configuration.ModeSGen : Configuration.ModeRuntime; - public string OptionsPreset { get; } - public int SerializedSize => _serialized.Length; - public long SetupSerializeAllocBytes { get; } - public long SetupDeserializeAllocBytes => 0; - public string OptionsDescription => BenchmarkOptions.BuildAcBinary(_options); - - public AcBinaryBufferWriterBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset) - { - _order = order; - _options = options; - OptionsPreset = optionsPreset; - _serialized = AcBinarySerializer.Serialize(order, options); - - // Measure ONLY the BufferWriter infrastructure setup on the serialize side (excluding the - // helper Serialize above). Deserialize side reads directly from `_serialized` byte[] — no - // dedicated setup allocation, hence SetupDeserializeAllocBytes = 0. - GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect(); - var beforeSetup = GC.GetAllocatedBytesForCurrentThread(); - _bufferWriter = new ArrayBufferWriter(_serialized.Length * 2); - var afterSetup = GC.GetAllocatedBytesForCurrentThread(); - SetupSerializeAllocBytes = afterSetup - beforeSetup; - } - - [MethodImpl(MethodImplOptions.NoInlining)] - public void Serialize() - { - _bufferWriter.ResetWrittenCount(); // reuse — no alloc, no zeroing - AcBinarySerializer.Serialize(_order, _bufferWriter, _options); - } - - // BufWr semantic: read from a ReadOnlySequence (the ROS overload), NOT from byte[] — - // single-segment array-backed sequence triggers the fast-path in AcBinaryDeserializer.cs:298 which - // redirects to the byte[] overload. This means the bench actually exercises the ROS-input path - // (the production-realistic surface for SignalR / Pipe consumers) rather than secretly testing - // byte[] Deser under the BufWr label. - [MethodImpl(MethodImplOptions.NoInlining)] - public void Deserialize() => AcBinaryDeserializer.Deserialize(new ReadOnlySequence(_serialized), _options); - - public bool VerifyRoundTrip() - { - _bufferWriter.ResetWrittenCount(); - AcBinarySerializer.Serialize(_order, _bufferWriter, _options); - - var roundTripped = AcBinaryDeserializer.Deserialize(new ReadOnlySequence(_bufferWriter.WrittenMemory), _options); - return BenchmarkLoop.DeepEqualsViaJson(_order, roundTripped); - } - } - - /// - /// Benchmarks MemoryPack via the IBufferWriter overload with a pre-allocated, reused ArrayBufferWriter. - /// Apples-to-apples counterpart to AcBinaryBufferWriterBenchmark — MemoryPack's IBufferWriter is the path it's designed for. - /// - internal sealed class MemoryPackBufferWriterBenchmark : ISerializerBenchmark - { - private readonly TestOrder _order; - private readonly MemoryPackSerializerOptions _options; - private readonly byte[] _serialized; - private readonly ArrayBufferWriter _bufferWriter; - - public string Engine => Configuration.EngineMemoryPack; - public string IoMode => Configuration.IoBufWrReuse; - public string DispatchMode => Configuration.ModeSGen; // MemoryPack always uses [MemoryPackable] source-generated formatters - public string OptionsPreset { get; } - public int SerializedSize => _serialized.Length; - public long SetupSerializeAllocBytes { get; } - public long SetupDeserializeAllocBytes => 0; - public string? OptionsDescription => $"StringEncoding={_options.StringEncoding}"; - - public MemoryPackBufferWriterBenchmark(TestOrder order, string optionsPreset) - { - _order = order; - OptionsPreset = optionsPreset; - _options = BenchmarkOptions.GetMemPack(); - _serialized = MemoryPackSerializer.Serialize(order, _options); - - // Serialize-side setup only — see AcBinaryBufferWriterBenchmark for the full rationale. - GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect(); - var beforeSetup = GC.GetAllocatedBytesForCurrentThread(); - _bufferWriter = new ArrayBufferWriter(_serialized.Length * 2); - var afterSetup = GC.GetAllocatedBytesForCurrentThread(); - SetupSerializeAllocBytes = afterSetup - beforeSetup; - } - - [MethodImpl(MethodImplOptions.NoInlining)] - public void Serialize() - { - _bufferWriter.ResetWrittenCount(); - MemoryPackSerializer.Serialize(_bufferWriter, _order, _options); - } - - // BufWr semantic: read from a ReadOnlySequence overload (apples-to-apples with AcBinary's - // BufWr Deser path). MemoryPack's ROS overload also single-segment-fast-paths internally. - [MethodImpl(MethodImplOptions.NoInlining)] - public void Deserialize() => MemoryPackSerializer.Deserialize(new ReadOnlySequence(_serialized), _options); - - public bool VerifyRoundTrip() - { - _bufferWriter.ResetWrittenCount(); - MemoryPackSerializer.Serialize(_bufferWriter, _order, _options); - var roundTripped = MemoryPackSerializer.Deserialize(new ReadOnlySequence(_bufferWriter.WrittenMemory), _options); - return BenchmarkLoop.DeepEqualsViaJson(_order, roundTripped); - } - } - -#endregion + // Serializer implementations (ISerializerBenchmark + 12 concrete benchmark classes) → Benchmarks/ // Results / output formatters → Output.cs // BenchmarkResult DTO → BenchmarkResult.cs