AyCode.Core/AyCode.Core.Serializers.Con.../Benchmarks/AcBinaryNamedPipeRawByteArr...

215 lines
11 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

using AyCode.Core.Serializers.Binaries;
using AyCode.Core.Tests.TestModels;
using System.IO.Pipes;
using System.Runtime.CompilerServices;
namespace AyCode.Core.Serializers.Console.Benchmarks;
/// <summary>
/// Raw <c>byte[]</c> over a long-lived NamedPipe — NO chunk-framing, NO <c>AsyncPipeReaderInput</c>,
/// NO sliding-window buffer. Calling thread serialises + writes; a long-lived background consumer task
/// reads and deserialises. Two-task pattern enables Ser↔Read overlap (kernel-pipe-pipelined) AND
/// avoids the kernel-buffer-full deadlock when <c>bytes.Length &gt; inBufferSize</c>.
///
/// Side-by-side with <see cref="AcBinaryNamedPipeBenchmark"/> (chunked-framed AsyncPipe stack) this
/// isolates two cost components on the SAME kernel-pipe transport with the SAME <c>inBufferSize</c>:
/// <list type="bullet">
/// <item><description><b>This row vs <see cref="AcBinaryBenchmark"/> (Byte[])</b> — pure kernel-NamedPipe
/// overhead (WriteFile / ReadFile syscalls + IRP queueing + buffer-copy + thread-handoff).</description></item>
/// <item><description><b>This row vs <see cref="AcBinaryNamedPipeBenchmark"/> (chunked-framed)</b> — pure
/// AsyncPipe-framework overhead (chunk header writes + sliding-window <c>Feed</c> + MRES wait inside
/// <c>AsyncPipeReaderInput</c>) AND the streaming-pipeline benefit of intra-message Ser↔Des overlap (which
/// raw lacks — raw can only Ser↔Read overlap, with Des sequential after Read completes).</description></item>
/// </list>
/// Per-iter <c>byte[]</c> allocation from <c>AcBinarySerializer.Serialize</c> is part of the cost (matches
/// <see cref="AcBinaryBenchmark"/>'s API contract); the receive-side scratch buffer is also allocated per-iter
/// on the consumer-task (counted via <c>GC.GetTotalAllocatedBytes</c> in <c>BenchmarkLoop.MeasureAllocationTotal</c>).
/// </summary>
internal sealed class AcBinaryNamedPipeRawByteArrayBenchmark<T> : ISerializerBenchmark, IDisposable where T : class
{
private readonly T _order;
private readonly AcBinarySerializerOptions _options;
private readonly byte[] _serialized; // for SerializedSize reporting + receive-side size known upfront
// Long-lived pipe lifecycle (set up once in ctor — NOT timed).
private readonly NamedPipeServerStream _pipeServer;
private readonly NamedPipeClientStream _pipeClient;
// Long-lived consumer-task infrastructure (Read + Deserialize on BG thread, signaled per iter).
// Mirrors AcBinaryNamedPipeBenchmark's drain+consumer pair, but raw byte[] doesn't have an
// intermediate sliding-window buffer, so Read+Des happen sequentially in one BG task: Read N bytes
// → Deserialize<T>(bytes) → signal done. Calling thread's Ser↔Write overlaps with this BG Read+Des
// through kernel-pipe pipelining.
private readonly CancellationTokenSource _cts;
private readonly Task _consumerTask;
private readonly ManualResetEventSlim _consumeRequest = new(false);
private readonly ManualResetEventSlim _consumeDone = new(false);
private int _pendingReadSize;
private object? _lastResult; // captured during VerifyRoundTrip; null in benchmark iters
private bool _captureResult; // toggle: when true, ConsumerLoop stores result; otherwise discards
private bool _disposed;
public BenchmarkEngine Engine => BenchmarkEngine.AcBinary;
public BenchmarkIoMode IoMode => BenchmarkIoMode.NamedPipeRaw;
public BenchmarkDispatchMode DispatchMode => _options.UseGeneratedCode ? BenchmarkDispatchMode.SGen : BenchmarkDispatchMode.Runtime;
public Type OrderType => typeof(T);
public string OptionsPreset { get; }
public int SerializedSize => _serialized.Length;
public long SetupSerializeAllocBytes { get; }
public long SetupDeserializeAllocBytes { get; }
public bool IsRoundTripOnly => true;
public string OptionsDescription => BenchmarkOptions.BuildAcBinary(_options, $", BufferSize={_options.BufferWriterChunkSize}B, Transport=NamedPipe(raw,2-task)");
public AcBinaryNamedPipeRawByteArrayBenchmark(T order, AcBinarySerializerOptions options, string optionsPreset)
{
_order = order;
// BufferWriterChunkSize comes from the caller — same source-of-truth contract as
// AcBinaryNamedPipeBenchmark. The kernel pipe-buffer (inBufferSize) is wired to it so the
// raw-vs-chunked comparison runs on identical transport conditions.
_options = options;
OptionsPreset = optionsPreset;
_serialized = AcBinarySerializer.Serialize(order, _options);
var pipeName = $"AcBinaryBenchRaw-{Guid.NewGuid():N}";
// === SERIALIZE-side setup measurement ===
// pipe-pair (server + client) + connect handshake. NO PipeWriter wrapper — we use the raw
// Stream.Write API directly, matching the no-framing semantics of this benchmark.
GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
var beforeSer = GC.GetAllocatedBytesForCurrentThread();
_pipeServer = new NamedPipeServerStream(pipeName, PipeDirection.In, 1, PipeTransmissionMode.Byte,
System.IO.Pipes.PipeOptions.Asynchronous,
inBufferSize: _options.BufferWriterChunkSize,
outBufferSize: _options.BufferWriterChunkSize);
_pipeClient = new NamedPipeClientStream(".", pipeName, PipeDirection.Out, System.IO.Pipes.PipeOptions.Asynchronous);
var serverWait = _pipeServer.WaitForConnectionAsync();
_pipeClient.Connect();
serverWait.GetAwaiter().GetResult();
var afterSer = GC.GetAllocatedBytesForCurrentThread();
SetupSerializeAllocBytes = afterSer - beforeSer;
// === DESERIALIZE-side setup measurement ===
// 1× background consumer-task + 2× MRES (request / done) + cancellation source. Matches the
// chunked benchmark's deserialize-side setup cost shape.
GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
var beforeDes = GC.GetAllocatedBytesForCurrentThread();
_cts = new CancellationTokenSource();
_consumerTask = Task.Run(ConsumerLoop);
var afterDes = GC.GetAllocatedBytesForCurrentThread();
SetupDeserializeAllocBytes = afterDes - beforeDes;
}
// BG consumer: parks on _consumeRequest, reads N bytes from pipe, runs Deserialize<T>(bytes), signals
// _consumeDone. The Read overlaps with the calling thread's Write through the kernel-pipe; Des happens
// sequentially after Read completes (raw byte[] needs the full message to deserialize).
private void ConsumerLoop()
{
var ct = _cts.Token;
try
{
while (true)
{
_consumeRequest.Wait(ct);
if (ct.IsCancellationRequested) return;
_consumeRequest.Reset();
try
{
var size = _pendingReadSize;
var bytes = new byte[size]; // per-iter alloc — counted by BenchmarkLoop.MeasureAllocationTotal
var totalRead = 0;
while (totalRead < size)
{
var n = _pipeServer.Read(bytes, totalRead, size - totalRead);
if (n == 0) break; // pipe closed / EOF — partial read swallowed
totalRead += n;
}
var result = AcBinaryDeserializer.Deserialize<T>(bytes, _options);
if (_captureResult) _lastResult = result;
}
catch
{
// Swallow — calling thread sees the failure via missing/incorrect _lastResult during VerifyRoundTrip,
// or the benchmark loop just continues (timing impacted). Production teardown handled in Dispose.
}
finally
{
_consumeDone.Set();
}
}
}
catch (OperationCanceledException)
{
// Cooperative cancel — Dispose path. Swallow.
}
}
[MethodImpl(MethodImplOptions.NoInlining)]
public void Serialize()
{
// 2-task streaming pipeline:
// 1. Calling thread serialises → fresh byte[] (per-iter alloc, matches AcBinaryBenchmark contract).
// 2. Calling thread hands off expected size + signals consumer task. Consumer task starts Read loop
// on the pipe (BG thread). Calling thread proceeds to Write the bytes — Read and Write overlap
// through the kernel-pipe (kernel buffer fills, drains as consumer reads, sender resumes).
// 3. Calling thread waits for _consumeDone (consumer task finished Read+Des).
//
// Note: unlike chunked, raw byte[] cannot do Ser↔Des overlap (Des needs the full bytes before
// starting). Only Write↔Read overlaps here. The Des sequence on BG thread is: Read full bytes →
// Des the full graph → signal done. This is the architectural difference between raw and chunked.
var bytes = AcBinarySerializer.Serialize(_order, _options);
_pendingReadSize = bytes.Length;
_consumeDone.Reset();
_consumeRequest.Set();
_pipeClient.Write(bytes, 0, bytes.Length);
_pipeClient.Flush();
_consumeDone.Wait();
}
[MethodImpl(MethodImplOptions.NoInlining)]
public void Deserialize()
{
// No-op: per-iter round-trip is captured in Serialize(). See IsRoundTripOnly contract.
}
public bool VerifyRoundTrip()
{
// Use the same 2-task streaming path as the benchmark, but capture the result for graph-equality.
_captureResult = true;
try
{
Serialize();
var result = _lastResult as T;
return result != null && BenchmarkLoop.DeepEqualsViaJson(_order, result);
}
finally
{
_captureResult = false;
_lastResult = null;
}
}
public void Dispose()
{
if (_disposed) return;
_disposed = true;
// Cancel the consumer task → ConsumerLoop exits its Wait via OperationCanceledException.
try { _cts.Cancel(); } catch { /* swallow on teardown */ }
try { _consumeRequest.Set(); } catch { /* nudge in case consumer Wait is parked */ }
try { _consumerTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
// Symmetric teardown — close client first (writer side), then server.
try { _pipeClient.Dispose(); } catch { /* swallow on teardown */ }
try { _pipeServer.Dispose(); } catch { /* swallow on teardown */ }
try { _consumeRequest.Dispose(); } catch { /* swallow on teardown */ }
try { _consumeDone.Dispose(); } catch { /* swallow on teardown */ }
try { _cts.Dispose(); } catch { /* swallow on teardown */ }
}
}