[LOADED_DOCS: 3 files, no new loads]

Move DrainFromAsync to tests, add in-memory benchmarks

- Moved AsyncPipeReaderInputExtensions.DrainFromAsync from the main framework to test-only assembly; no longer public API.
- Removed AcBinaryDeserializer.DeserializeFromPipeReaderAsync<T> from public API; tests now inline drain+deserialize logic.
- Added AcBinaryInMemoryPipeBenchmark and AcBinaryInMemoryRawByteArrayBenchmark to complete 2x2 transport × wire-format benchmark matrix.
- Refactored benchmark runner for interactive menu, settings, and CLI parsing.
- Expanded XML docs for AsyncPipeReaderInput and AsyncPipeWriterOutput to clarify push-pattern and real-world usage.
- Updated BINARY_ASYNCPIPE_TODO.md and related docs to reflect these changes.
This commit is contained in:
Loretta 2026-05-02 15:51:07 +02:00
parent 05f90a5639
commit 67589f6b6f
8 changed files with 672 additions and 169 deletions

View File

@ -1,6 +1,7 @@
using AyCode.Core.Compression;
using AyCode.Core.Serializers.Attributes;
using AyCode.Core.Serializers.Binaries;
using AyCode.Core.Tests.Serialization; // DrainFromAsync extension (test-only, used by benchmark)
using AyCode.Core.Tests.TestModels;
using MemoryPack;
using MessagePack;
@ -42,8 +43,8 @@ public static class Program
private static int TestIterations = 1;
private static int BenchmarkSamples = 1; // Debug: single sample, fast iteration
#else
private static int WarmupIterations = 5000; //5000
private static int TestIterations = 1000; //1000
private static int WarmupIterations = 100; //5000
private static int TestIterations = 10; //1000
private static int BenchmarkSamples = 3;
#endif
@ -61,6 +62,15 @@ public static class Program
private const string IoString = "String";
private const string IoNamedPipe = "NamedPipe";
private const string IoNamedPipeRaw = "NamedPipe";
private const string IoInMemoryPipe = "Pipe(in-mem)";
private const string IoInMemoryRaw = "Bytes(in-mem)";
// Single source of truth for the chunk size used by ALL pipe-related benchmarks (NamedPipe PipeChunk,
// NamedPipe PipeRaw, in-memory Pipe, in-memory RawMem) AND the NamedPipe server's inBufferSize/outBufferSize.
// Same value across both layers ensures apples-to-apples comparison: chunked-streaming chunk-on-wire size
// matches the kernel pipe-buffer slot exactly. Tweak HERE when experimenting; do NOT scatter chunkSize
// overrides across individual benchmark rows.
private const int PipeChunkSize = 4096;
// Dispatch mode identifiers — describes how property access / type dispatch happens for a given run.
// SGen = compile-time source generator path (Unsafe.As<T> direct fields, slot-array wrapper lookup).
@ -152,68 +162,97 @@ public static class Program
// Done early so user is told immediately, not after warmup.
ValidateMemoryPackSetup();
// Determine layer (which test data to run), opMode (ser/des/all), and serializerMode (standard/asyncpipe).
// CLI args take precedence; if no args, show interactive menu.
// serializerMode: "standard" = all serializers EXCEPT AsyncPipe; "asyncpipe" = ONLY the AsyncPipe streaming benchmark.
// The two are mutually exclusive — AsyncPipe never runs alongside the standard set, so its long-lived pipe
// setup / kernel-buffer overhead does not skew the steady-state Byte[] / IBufferWriter measurements.
string layer;
var opMode = "all";
var serializerMode = "standard";
// CLI mode (args provided): run once, parse args, exit. Backward-compatible behaviour.
if (args.Length > 0)
{
if (!TryParseCliArgs(args, out var layer, out var opMode, out var serializerMode))
return; // profiler mode (already ran) or invalid args
RunBenchmark(layer, opMode, serializerMode);
return;
}
if (args.Length == 0)
// Interactive mode (no args): loop the menu so the user doesn't have to restart between runs.
// Q exits the menu (and the application).
while (true)
{
var selection = ShowInteractiveMenu();
if (selection == null) return; // user pressed Q
layer = selection.Value.layer;
serializerMode = selection.Value.serializerMode;
RunBenchmark(selection.Value.layer, "all", selection.Value.serializerMode);
System.Console.WriteLine();
System.Console.WriteLine("─────────────────────────────────────────────────────────────────────");
System.Console.WriteLine("Returning to menu — press any key to continue, or Q to quit...");
var key = System.Console.ReadKey(intercept: true);
if (key.Key == ConsoleKey.Q) return;
System.Console.WriteLine();
}
}
/// <summary>
/// Parses CLI arguments into (layer, opMode, serializerMode). Returns <c>false</c> if the args
/// indicate a special mode that has already been handled (e.g. <c>profiler</c>) or are invalid;
/// the caller should then exit without running the standard benchmark.
/// </summary>
private static bool TryParseCliArgs(string[] args, out string layer, out string opMode, out string serializerMode)
{
layer = "all";
opMode = "all";
serializerMode = "standard";
var arg = args[0].ToLower();
// Profiler mode: warmup only, then exit (for memory profiler analysis)
if (arg == "profiler")
{
RunProfilerMode();
return false;
}
// Quick mode: short warmup, few iterations, small sample count
if (arg == "quick")
{
WarmupIterations = 5;
TestIterations = 100;
BenchmarkSamples = 3;
layer = "all";
}
else if (arg is "core" or "comprehensive" or "edge" or "all")
{
layer = arg;
}
else if (arg is "asyncpipe" or "pipe")
{
// AsyncPipe-only mode: streaming I/O isolation across all test data.
layer = "all";
serializerMode = "asyncpipe";
}
else if (arg is "ser" or "serialize")
{
opMode = "serialize";
layer = "all";
}
else if (arg is "des" or "deserialize")
{
opMode = "deserialize";
layer = "all";
}
else
{
var arg = args[0].ToLower();
// Profiler mode: warmup only, then exit (for memory profiler analysis)
if (arg == "profiler")
{
RunProfilerMode();
return;
}
// Quick mode: short warmup, few iterations, small sample count
if (arg == "quick")
{
WarmupIterations = 5;
TestIterations = 100;
BenchmarkSamples = 3;
layer = "all";
}
else if (arg is "core" or "comprehensive" or "edge" or "all")
{
layer = arg;
}
else if (arg is "asyncpipe" or "pipe")
{
// AsyncPipe-only mode: streaming I/O isolation across all test data.
layer = "all";
serializerMode = "asyncpipe";
}
else if (arg is "ser" or "serialize")
{
opMode = "serialize";
layer = "all";
}
else if (arg is "des" or "deserialize")
{
opMode = "deserialize";
layer = "all";
}
else
{
// Backwards compat: unknown arg → treat as layer keyword
layer = arg;
}
// Backwards compat: unknown arg → treat as layer keyword
layer = arg;
}
return true;
}
/// <summary>
/// Runs the benchmark suite end-to-end for the given configuration: pre-warmup → per-cell warmup
/// + measurement → grouped results print → save to disk. Used by both the CLI and interactive
/// menu paths; the interactive loop calls this repeatedly without restarting the process.
/// </summary>
private static void RunBenchmark(string layer, string opMode, string serializerMode)
{
System.Console.WriteLine("╔══════════════════════════════════════════════════════════════════════╗");
System.Console.WriteLine("║ COMPREHENSIVE SERIALIZER BENCHMARK SUITE ║");
System.Console.WriteLine("╚══════════════════════════════════════════════════════════════════════╝");
@ -449,7 +488,7 @@ public static class Program
// fits blocking-free in one kernel pipe-buffer slot. Single source of truth for both app-level
// wire chunk AND kernel transfer unit; change ONLY this line when tuning.
var binaryFastModePipeChunkOnly = AcBinarySerializerOptions.FastMode;
binaryFastModePipeChunkOnly.BufferWriterChunkSize = 4096; //AsyncPipeWriterOutput.MaxChunkSize;
binaryFastModePipeChunkOnly.BufferWriterChunkSize = PipeChunkSize;
return new List<ISerializerBenchmark>
{
@ -463,6 +502,19 @@ public static class Program
// the chunked-row above this isolates AsyncPipe-framework-overhead (Δ vs raw) from
// kernel-transport-overhead (raw vs in-process Byte[]).
new AcBinaryNamedPipeRawByteArrayBenchmark(testData.Order, binaryFastModePipeChunkOnly, "FastMode (PipeRaw)"),
// Chunked-framed AsyncPipe over an IN-MEMORY System.IO.Pipelines.Pipe (NO NamedPipe, NO kernel).
// Same chunked-streaming code path (SerializeChunkedFramed → AsyncPipeReaderInput) but with the
// kernel-pipe replaced by a managed-only Pipe. Eliminates per-chunk syscall overhead (~30 µs/chunk
// on NamedPipe → ~1-2 µs/chunk on in-memory Pipe). Side-by-side with the NamedPipe row above this
// isolates pure CPU cost of the chunked-streaming framework (vs kernel-pipe transport cost) — the
// in-memory Pipe row should be much closer to the raw-byte[] row, validating that NamedPipe loopback
// is the worst-case benchmark scenario for chunked-streaming and not representative of real network
// / file / cross-thread Pipe scenarios.
new AcBinaryInMemoryPipeBenchmark(testData.Order, binaryFastModePipeChunkOnly, "FastMode (PipeChunk)"),
// Raw byte[] over IN-MEMORY direct cross-thread handoff (no transport at all). Apples-to-apples
// baseline for the in-memory chunked row above: same in-memory transport (zero kernel), but raw
// byte[] vs chunked-streaming wire format. Completes the 2x2 matrix [chunked,raw] × [kernel,memory].
new AcBinaryInMemoryRawByteArrayBenchmark(testData.Order, binaryFastModePipeChunkOnly, "FastMode (PipeRaw)"),
};
}
@ -484,7 +536,13 @@ public static class Program
// allocates a fresh ABW. Independent of the AsyncPipe profile (different mechanism: alloc overhead
// vs syscall count).
var binaryFastModeBufWrChunk = AcBinarySerializerOptions.FastMode;
binaryFastModeBufWrChunk.BufferWriterChunkSize = 4096;
binaryFastModeBufWrChunk.BufferWriterChunkSize = PipeChunkSize;
// In-memory Pipe variant — same 4 KB chunkSize as the AsyncPipe mode, no kernel-pipe alignment
// concern (managed slabs are not page-aligned anyway). Drives SerializeChunkedFramed via the in-memory
// System.IO.Pipelines.Pipe (zero-copy slab handoff between producer and drain task).
var binaryFastModePipeChunkInMem = AcBinarySerializerOptions.FastMode;
binaryFastModePipeChunkInMem.BufferWriterChunkSize = PipeChunkSize;
var defaultOptions = AcBinarySerializerOptions.Default;
defaultOptions.UseStringInterning = StringInterningMode.None;
@ -517,8 +575,20 @@ public static class Program
// allocation. Optimum for this scenario.
new AcBinaryFreshBufferWriterBenchmark(testData.Order, binaryFastModeBufWrChunk, "FastMode (4KB)"),
// AsyncPipe streaming (AcBinaryNamedPipeBenchmark) is intentionally OMITTED here — run it via
// the dedicated AsyncPipe menu / CLI mode for isolated streaming-I/O measurements.
// AcBinary chunked-streaming over an IN-MEMORY Pipe (no kernel transport). Side-by-side with the
// Byte[] / IBufferWriter rows above this shows the chunked-streaming framework's pure CPU cost
// (no NamedPipe loopback noise) vs the simpler in-process serialize-then-deserialize patterns.
// The IO column shows "Pipe(in-mem)" — distinct from the NamedPipe AsyncPipe rows in [P] mode.
new AcBinaryInMemoryPipeBenchmark(testData.Order, binaryFastModePipeChunkInMem, "FastMode (PipeChunk)"),
// Raw byte[] over IN-MEMORY direct cross-thread handoff (no transport, no kernel, no Pipe). Apples-to-
// apples baseline for the in-memory chunked row above: same in-memory pattern, but raw byte[] vs
// chunked-streaming wire format. The IO column shows "Bytes(in-mem)".
new AcBinaryInMemoryRawByteArrayBenchmark(testData.Order, binaryFastModePipeChunkInMem, "FastMode (PipeRaw)"),
// AsyncPipe streaming over kernel NamedPipe (AcBinaryNamedPipeBenchmark) is intentionally OMITTED
// here — run it via the dedicated AsyncPipe menu [P] / CLI mode for isolated kernel-transport
// measurements.
// ============================================================
// MemoryPack — three I/O modes for apples-to-apples comparison
@ -724,37 +794,80 @@ public static class Program
/// <summary>
/// Interactive menu shown when no CLI args. Returns the layer keyword (core/comprehensive/edge/all) or null on Quit.
/// Loops on settings-changes ([S]) — user is returned to this menu after modifying iteration counts.
/// </summary>
private static (string layer, string serializerMode)? ShowInteractiveMenu()
{
System.Console.WriteLine();
System.Console.WriteLine("╔══════════════════════════════════════════════════════════╗");
System.Console.WriteLine("║ AcBinary Benchmark Suite ║");
System.Console.WriteLine("╚══════════════════════════════════════════════════════════╝");
System.Console.WriteLine();
System.Console.WriteLine("Select benchmark layer:");
System.Console.WriteLine();
System.Console.WriteLine(" [1] Core — daily iteration");
System.Console.WriteLine(" [2] Comprehensive — release validation");
System.Console.WriteLine(" [3] Edge cases — refactor verification");
System.Console.WriteLine(" [A] All layers");
System.Console.WriteLine(" [P] AsyncPipe — streaming I/O isolation (only AsyncPipe, all test data)");
System.Console.WriteLine(" [Q] Quit");
System.Console.Write("\nSelection: ");
var key = System.Console.ReadKey(intercept: false).KeyChar;
System.Console.WriteLine();
return char.ToLower(key) switch
while (true)
{
'1' => ("core", "standard"),
'2' => ("comprehensive", "standard"),
'3' => ("edge", "standard"),
'a' => ("all", "standard"),
'p' => ("all", "asyncpipe"),
'q' => null,
_ => ("all", "standard")
};
System.Console.WriteLine();
System.Console.WriteLine("╔══════════════════════════════════════════════════════════╗");
System.Console.WriteLine("║ AcBinary Benchmark Suite ║");
System.Console.WriteLine("╚══════════════════════════════════════════════════════════╝");
System.Console.WriteLine();
System.Console.WriteLine("Select benchmark layer:");
System.Console.WriteLine();
System.Console.WriteLine(" [1] Core — daily iteration");
System.Console.WriteLine(" [2] Comprehensive — release validation");
System.Console.WriteLine(" [3] Edge cases — refactor verification");
System.Console.WriteLine(" [A] All layers");
System.Console.WriteLine(" [P] AsyncPipe — streaming I/O isolation (only AsyncPipe, all test data)");
System.Console.WriteLine($" [S] Settings — modify Warmup ({WarmupIterations}) / Iterations ({TestIterations}) / Samples ({BenchmarkSamples})");
System.Console.WriteLine(" [Q] Quit");
System.Console.Write("\nSelection: ");
var key = System.Console.ReadKey(intercept: false).KeyChar;
System.Console.WriteLine();
switch (char.ToLower(key))
{
case '1': return ("core", "standard");
case '2': return ("comprehensive", "standard");
case '3': return ("edge", "standard");
case 'a': return ("all", "standard");
case 'p': return ("all", "asyncpipe");
case 's':
ShowSettingsMenu();
continue; // re-display the main menu after settings update
case 'q': return null;
default: return ("all", "standard");
}
}
}
/// <summary>
/// Settings sub-menu — prompts for Warmup / Iterations / Samples values. Empty input keeps the
/// current value. Validation: WarmupIterations ≥ 0; TestIterations ≥ 1; BenchmarkSamples ≥ 1.
/// Returns to the caller (which re-displays the main menu).
/// </summary>
private static void ShowSettingsMenu()
{
System.Console.WriteLine();
System.Console.WriteLine("─────────────────────────────────────────────");
System.Console.WriteLine("Settings — press Enter to keep current value");
System.Console.WriteLine("─────────────────────────────────────────────");
System.Console.WriteLine();
WarmupIterations = PromptInt("WarmupIterations", WarmupIterations, min: 0);
TestIterations = PromptInt("TestIterations ", TestIterations, min: 1);
BenchmarkSamples = PromptInt("BenchmarkSamples", BenchmarkSamples, min: 1);
System.Console.WriteLine();
System.Console.WriteLine($"✓ Settings updated: Warmup={WarmupIterations} | Iterations={TestIterations} | Samples={BenchmarkSamples}");
}
/// <summary>
/// Prompts the user for an integer with a default (current value). Returns the current value if
/// the user presses Enter on empty input or if parsing fails / value is below the minimum.
/// </summary>
private static int PromptInt(string name, int currentValue, int min)
{
System.Console.Write($" {name} [{currentValue}]: ");
var input = System.Console.ReadLine()?.Trim() ?? "";
if (input.Length == 0) return currentValue;
if (int.TryParse(input, out var newValue) && newValue >= min) return newValue;
System.Console.WriteLine($" ! Invalid value (need int ≥ {min}) — keeping {currentValue}");
return currentValue;
}
/// <summary>
@ -1264,6 +1377,185 @@ public static class Program
}
}
/// <summary>
/// Same chunked-framed AsyncPipe code path as <see cref="AcBinaryNamedPipeBenchmark"/>, but the transport
/// is an in-memory <see cref="System.IO.Pipelines.Pipe"/> instead of a kernel <c>NamedPipe</c>. The Pipe's
/// <c>Writer</c>/<c>Reader</c> pair is a managed-only zero-copy slab handoff — no syscalls, no kernel
/// buffer copy, no IRP queueing.
///
/// <para><b>Why this benchmark matters</b>: by holding ALL other variables constant (same SerializeChunkedFramed,
/// same AsyncPipeReaderInput, same drain task, same consumer task, same multi-message wire format), this
/// row isolates the <b>kernel-NamedPipe transport overhead</b> from the chunked-streaming framework's pure
/// CPU cost. The expected delta vs <see cref="AcBinaryNamedPipeBenchmark"/>: per-chunk overhead drops from
/// ~25-30 µs (kernel-syscall pair + IRP) to ~1-2 µs (managed slab handoff). Multi-chunk Large-message rows
/// should converge dramatically toward <see cref="AcBinaryNamedPipeRawByteArrayBenchmark"/>.</para>
///
/// <para><b>Real-world relevance</b>: in-memory Pipe is the typical primitive used for cross-thread serializer
/// pipelines inside a single process (e.g. SignalR's Kestrel transport adapter, gRPC framework internals,
/// custom message brokers). The numbers from this row reflect that scenario, NOT the kernel-pipe loopback
/// of the NamedPipe benchmark.</para>
/// </summary>
private sealed class AcBinaryInMemoryPipeBenchmark : ISerializerBenchmark, IDisposable
{
private readonly TestOrder _order;
private readonly AcBinarySerializerOptions _options;
private readonly byte[] _serialized; // for SerializedSize reporting only
// Long-lived in-memory pipe lifecycle (set up once in ctor — NOT timed).
private readonly Pipe _pipe;
private readonly PipeWriter _pipeWriter;
private readonly PipeReader _pipeReader;
// Long-lived multi-message receive infrastructure (set up once in ctor) — same pattern as the NamedPipe
// variant: drain pumps reader into AsyncPipeReaderInput, consumer task drives Deserialize<T>(input).
private readonly AsyncPipeReaderInput _input;
private readonly CancellationTokenSource _cts;
private readonly Task _drainTask;
private readonly Task _consumerTask;
private readonly ManualResetEventSlim _consumeRequest = new(false);
private readonly ManualResetEventSlim _consumeDone = new(false);
private object? _lastResult;
private bool _captureResult;
private bool _disposed;
public string Engine => EngineAcBinary;
public string IoMode => IoInMemoryPipe;
public string DispatchMode => _options.UseGeneratedCode ? ModeSGen : ModeRuntime;
public string OptionsPreset { get; }
public int SerializedSize => _serialized.Length;
public long SetupSerializeAllocBytes { get; }
public long SetupDeserializeAllocBytes { get; }
public bool IsRoundTripOnly => true;
public string OptionsDescription => BuildAcBinaryOptionsDescription(_options, $", BufferSize={_options.BufferWriterChunkSize}B, Transport=Pipe(in-memory,multiMessage,2-task)");
public AcBinaryInMemoryPipeBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset)
{
_order = order;
_options = options;
OptionsPreset = optionsPreset;
_serialized = AcBinarySerializer.Serialize(order, _options);
// === SERIALIZE-side setup measurement ===
// In-memory Pipe construction. NO kernel-pipe pair, NO Connect handshake — just a managed Pipe object
// and a reference to its Writer side. PipeWriterImpl (parallel-flush capable, NOT StreamPipeWriter).
GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
var beforeSer = GC.GetAllocatedBytesForCurrentThread();
_pipe = new Pipe();
_pipeWriter = _pipe.Writer;
var afterSer = GC.GetAllocatedBytesForCurrentThread();
SetupSerializeAllocBytes = afterSer - beforeSer;
// === DESERIALIZE-side setup measurement ===
// PipeReader reference + AsyncPipeReaderInput (ArrayPool rent + ManualResetEventSlim) + drain task +
// consumer task scaffolding. Identical to the NamedPipe variant on the receive side.
GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
var beforeDes = GC.GetAllocatedBytesForCurrentThread();
_pipeReader = _pipe.Reader;
_input = new AsyncPipeReaderInput(_options.BufferWriterChunkSize * 2, multiMessage: true);
_cts = new CancellationTokenSource();
_drainTask = Task.Run(() => _input.DrainFromAsync(_pipeReader, _cts.Token));
_consumerTask = Task.Run(ConsumeLoop);
var afterDes = GC.GetAllocatedBytesForCurrentThread();
SetupDeserializeAllocBytes = afterDes - beforeDes;
}
// BG consumer: parks on _consumeRequest, runs Deserialize<T>(_input) when signaled, signals _consumeDone.
// Mirror of AcBinaryNamedPipeBenchmark.ConsumeLoop — same pattern, same MRES protocol.
private void ConsumeLoop()
{
var ct = _cts.Token;
try
{
while (true)
{
_consumeRequest.Wait(ct);
if (ct.IsCancellationRequested) return;
_consumeRequest.Reset();
try
{
var result = AcBinaryDeserializer.Deserialize<TestOrder>(_input, _options);
if (_captureResult) _lastResult = result;
}
catch
{
// Swallow — see ConsumeLoop in NamedPipe variant for rationale.
}
finally
{
_consumeDone.Set();
}
}
}
catch (OperationCanceledException)
{
// Cooperative cancel — Dispose path. Swallow.
}
}
public void Warmup(int iterations)
{
for (var i = 0; i < iterations; i++) Serialize();
}
[MethodImpl(MethodImplOptions.NoInlining)]
public void Serialize()
{
// Same 2-task streaming pipeline as NamedPipe variant — only the transport differs (in-memory Pipe
// instead of kernel NamedPipe). Per-chunk SerializeChunkedFramed → PipeWriter slab → drain task
// reads from PipeReader → input.Feed → consumer Deserialize<T> consumes byte-by-byte.
_consumeDone.Reset();
_consumeRequest.Set();
AcBinarySerializer.SerializeChunkedFramed(_order, _pipeWriter, _options);
_consumeDone.Wait();
}
[MethodImpl(MethodImplOptions.NoInlining)]
public void Deserialize()
{
// No-op: per-iter round-trip is captured in Serialize(). See IsRoundTripOnly contract.
}
public bool VerifyRoundTrip()
{
_captureResult = true;
try
{
Serialize();
var result = _lastResult as TestOrder;
return result != null && DeepEqualsViaJson(_order, result);
}
finally
{
_captureResult = false;
_lastResult = null;
}
}
public void Dispose()
{
if (_disposed) return;
_disposed = true;
// Cancel drain + consumer tasks → both exit. Pulse _consumeRequest in case consumer is parked.
try { _cts.Cancel(); } catch { /* swallow on teardown */ }
try { _consumeRequest.Set(); } catch { /* nudge in case consumer Wait is parked */ }
try { _drainTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
try { _consumerTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
// Complete writer + reader (in-memory Pipe — no underlying stream to dispose).
try { _pipeWriter.CompleteAsync().AsTask().Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
try { _pipeReader.Complete(); } catch { /* swallow on teardown */ }
try { _input.Dispose(); } catch { /* swallow on teardown */ }
try { _consumeRequest.Dispose(); } catch { /* swallow on teardown */ }
try { _consumeDone.Dispose(); } catch { /* swallow on teardown */ }
try { _cts.Dispose(); } catch { /* swallow on teardown */ }
}
}
/// <summary>
/// Raw <c>byte[]</c> over a long-lived NamedPipe — NO chunk-framing, NO <c>AsyncPipeReaderInput</c>,
/// NO sliding-window buffer. Calling thread serialises + writes; a long-lived background consumer task
@ -1479,6 +1771,174 @@ public static class Program
}
}
/// <summary>
/// Raw <c>byte[]</c> over an in-memory cross-thread handoff — NO transport (no NamedPipe, no Pipe, no
/// Channel<see langword="&lt;T&gt;"/>). Calling thread serialises into a fresh <c>byte[]</c>, hands it to a
/// background consumer task via a single byte[] slot + MRES pair; the consumer deserialises and signals done.
///
/// <para><b>Why this benchmark matters</b>: completes the 2x2 transport × wire-format matrix:</para>
/// <list type="bullet">
/// <item><description><b>NamedPipe + Chunked</b> = <see cref="AcBinaryNamedPipeBenchmark"/></description></item>
/// <item><description><b>NamedPipe + Raw</b> = <see cref="AcBinaryNamedPipeRawByteArrayBenchmark"/></description></item>
/// <item><description><b>In-memory Pipe + Chunked</b> = <see cref="AcBinaryInMemoryPipeBenchmark"/></description></item>
/// <item><description><b>In-memory + Raw</b> = THIS row — apples-to-apples baseline for the in-memory chunked row</description></item>
/// </list>
/// <para>Side-by-side with <see cref="AcBinaryInMemoryPipeBenchmark"/> this isolates the chunked-streaming
/// framework's pure CPU cost, with the same in-memory transport (zero kernel involvement) on both sides.
/// Side-by-side with <see cref="AcBinaryNamedPipeRawByteArrayBenchmark"/> this isolates the kernel-NamedPipe
/// overhead on the raw-byte[] side.</para>
/// </summary>
private sealed class AcBinaryInMemoryRawByteArrayBenchmark : ISerializerBenchmark, IDisposable
{
private readonly TestOrder _order;
private readonly AcBinarySerializerOptions _options;
private readonly byte[] _serialized; // for SerializedSize reporting only
// Long-lived consumer-task infrastructure (Deserialize on BG thread, signaled per iter).
// No transport — just a byte[] slot for handoff between calling thread and consumer task.
private readonly CancellationTokenSource _cts;
private readonly Task _consumerTask;
private readonly ManualResetEventSlim _consumeRequest = new(false);
private readonly ManualResetEventSlim _consumeDone = new(false);
private byte[]? _pendingBytes; // calling thread → consumer task handoff slot
private object? _lastResult; // captured during VerifyRoundTrip; null in benchmark iters
private bool _captureResult;
private bool _disposed;
public string Engine => EngineAcBinary;
public string IoMode => IoInMemoryRaw;
public string DispatchMode => _options.UseGeneratedCode ? ModeSGen : ModeRuntime;
public string OptionsPreset { get; }
public int SerializedSize => _serialized.Length;
public long SetupSerializeAllocBytes { get; }
public long SetupDeserializeAllocBytes { get; }
public bool IsRoundTripOnly => true;
public string OptionsDescription => BuildAcBinaryOptionsDescription(_options, $", BufferSize={_options.BufferWriterChunkSize}B, Transport=in-memory(raw,2-task)");
public AcBinaryInMemoryRawByteArrayBenchmark(TestOrder order, AcBinarySerializerOptions options, string optionsPreset)
{
_order = order;
_options = options;
OptionsPreset = optionsPreset;
_serialized = AcBinarySerializer.Serialize(order, _options);
// === SERIALIZE-side setup measurement ===
// Nothing to set up — calling thread allocates byte[] per iter via AcBinarySerializer.Serialize.
SetupSerializeAllocBytes = 0;
// === DESERIALIZE-side setup measurement ===
// 1× background consumer-task + 2× MRES (request / done) + cancellation source.
GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
var beforeDes = GC.GetAllocatedBytesForCurrentThread();
_cts = new CancellationTokenSource();
_consumerTask = Task.Run(ConsumerLoop);
var afterDes = GC.GetAllocatedBytesForCurrentThread();
SetupDeserializeAllocBytes = afterDes - beforeDes;
}
// BG consumer: parks on _consumeRequest, picks up the byte[] from _pendingBytes, runs Deserialize<T>(bytes),
// signals _consumeDone. Direct in-process handoff — no transport syscall, no buffer copy beyond the byte[]
// reference itself (zero-copy by reference).
private void ConsumerLoop()
{
var ct = _cts.Token;
try
{
while (true)
{
_consumeRequest.Wait(ct);
if (ct.IsCancellationRequested) return;
_consumeRequest.Reset();
try
{
var bytes = _pendingBytes;
if (bytes != null)
{
var result = AcBinaryDeserializer.Deserialize<TestOrder>(bytes, _options);
if (_captureResult) _lastResult = result;
}
}
catch
{
// Swallow — see ConsumerLoop in NamedPipe variant for rationale.
}
finally
{
_consumeDone.Set();
}
}
}
catch (OperationCanceledException)
{
// Cooperative cancel — Dispose path. Swallow.
}
}
public void Warmup(int iterations)
{
for (var i = 0; i < iterations; i++) Serialize();
}
[MethodImpl(MethodImplOptions.NoInlining)]
public void Serialize()
{
// 2-task in-memory pipeline:
// 1. Calling thread serialises → fresh byte[] (per-iter alloc, matches AcBinaryBenchmark contract).
// 2. Calling thread parks the byte[] into _pendingBytes and signals consumer task. Consumer task
// picks up the reference (zero-copy) and runs Deserialize<T>(bytes).
// 3. Calling thread waits for _consumeDone (consumer task finished Des).
//
// Same architectural limitation as the NamedPipe-raw variant: Des cannot start until full bytes
// are available. Only the per-iter Ser↔Des thread-handoff overlaps slightly (calling thread starts
// signalling and waiting while consumer thread takes the byte[]).
var bytes = AcBinarySerializer.Serialize(_order, _options);
_pendingBytes = bytes;
_consumeDone.Reset();
_consumeRequest.Set();
_consumeDone.Wait();
}
[MethodImpl(MethodImplOptions.NoInlining)]
public void Deserialize()
{
// No-op: per-iter round-trip is captured in Serialize(). See IsRoundTripOnly contract.
}
public bool VerifyRoundTrip()
{
_captureResult = true;
try
{
Serialize();
var result = _lastResult as TestOrder;
return result != null && DeepEqualsViaJson(_order, result);
}
finally
{
_captureResult = false;
_lastResult = null;
}
}
public void Dispose()
{
if (_disposed) return;
_disposed = true;
try { _cts.Cancel(); } catch { /* swallow on teardown */ }
try { _consumeRequest.Set(); } catch { /* nudge in case consumer Wait is parked */ }
try { _consumerTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
try { _consumeRequest.Dispose(); } catch { /* swallow on teardown */ }
try { _consumeDone.Dispose(); } catch { /* swallow on teardown */ }
try { _cts.Dispose(); } catch { /* swallow on teardown */ }
}
}
/// <summary>
/// Benchmarks MemoryPack via the IBufferWriter overload, allocating a FRESH ArrayBufferWriter on EVERY call.
/// Apples-to-apples counterpart to AcBinaryFreshBufferWriterBenchmark.

View File

@ -14,10 +14,11 @@ namespace AyCode.Core.Tests.Serialization;
/// the tests own the <see cref="NamedPipeServerStream"/> / <see cref="NamedPipeClientStream"/>
/// lifecycle directly and call the generic
/// <see cref="AcBinarySerializer.SerializeChunked{T}(T, PipeWriter, AcBinarySerializerOptions)"/> +
/// <see cref="AcBinaryDeserializer.DeserializeFromPipeReaderAsync{T}"/> primitives. This proves
/// the streaming framework works on arbitrary <c>PipeWriter</c>/<c>PipeReader</c> sources
/// (NamedPipe, FileStream, NetworkStream, custom transports) without per-transport adapters in
/// the framework.</para>
/// <see cref="AcBinaryDeserializer.Deserialize{T}(AsyncPipeReaderInput, AcBinarySerializerOptions)"/>
/// primitives, with the receive-side drain implemented via the test-only
/// <see cref="AsyncPipeReaderInputExtensions.DrainFromAsync"/> extension. This proves the streaming
/// framework works on arbitrary <c>PipeWriter</c>/<c>PipeReader</c> sources (NamedPipe, FileStream,
/// NetworkStream, custom transports) without per-transport adapters in the framework.</para>
///
/// <para>With <c>BufferWriterChunkSize = 256</c>, even small test payloads cross multiple chunk
/// boundaries on the wire — exercises the real chunking + sliding-window cycling behavior.</para>
@ -104,8 +105,10 @@ public class AcBinarySerializerNamedPipeTests
/// <summary>
/// Owns the full NamedPipe lifecycle: binds server, accepts connect, drives the generic
/// <see cref="AcBinarySerializer.SerializeChunked{T}(T, PipeWriter, AcBinarySerializerOptions)"/> on
/// the client side and <see cref="AcBinaryDeserializer.DeserializeFromPipeReaderAsync{T}"/>
/// on the server side. The framework helpers know nothing about NamedPipe — only PipeWriter /
/// the client side, and on the server side runs the canonical drain+deserialize pair
/// (test-only <see cref="AsyncPipeReaderInputExtensions.DrainFromAsync"/> on the calling thread,
/// <see cref="AcBinaryDeserializer.Deserialize{T}(AsyncPipeReaderInput, AcBinarySerializerOptions)"/>
/// on a Task.Run BG thread). The framework helpers know nothing about NamedPipe — only PipeWriter /
/// PipeReader.
/// </summary>
private static async Task<T?> RunNamedPipeRoundTripAsync<T>(string pipeName, T original, AcBinarySerializerOptions opts)
@ -119,7 +122,12 @@ public class AcBinarySerializerNamedPipeTests
await pipeServer.WaitForConnectionAsync().ConfigureAwait(false);
var pipeReader = PipeReader.Create(pipeServer);
return await AcBinaryDeserializer.DeserializeFromPipeReaderAsync<T>(pipeReader, opts).ConfigureAwait(false);
// Inlined version of what the removed DeserializeFromPipeReaderAsync used to do:
// single-message mode + drain on calling thread + deserialize on Task.Run BG.
using var input = new AsyncPipeReaderInput(initialCapacity: opts.BufferWriterChunkSize * 2, multiMessage: false);
var deserTask = Task.Run(() => AcBinaryDeserializer.Deserialize<T>(input, opts));
await input.DrainFromAsync(pipeReader).ConfigureAwait(false);
return await deserTask.ConfigureAwait(false);
});
await using var pipeClient = new NamedPipeClientStream(".", pipeName, PipeDirection.Out, System.IO.Pipes.PipeOptions.Asynchronous);

View File

@ -1,18 +1,22 @@
using AyCode.Core.Serializers.Binaries;
using System;
using System.IO.Pipelines;
using System.Threading;
using System.Threading.Tasks;
namespace AyCode.Core.Serializers.Binaries;
namespace AyCode.Core.Tests.Serialization;
/// <summary>
/// Extension methods for populating <see cref="AsyncPipeReaderInput"/> from
/// <see cref="System.IO.Pipelines.PipeReader"/>-backed transports (NamedPipe, FileStream,
/// Test/benchmark-only extension methods for populating <see cref="AsyncPipeReaderInput"/>
/// from <see cref="System.IO.Pipelines.PipeReader"/>-backed transports (NamedPipe, FileStream,
/// custom pipe sources).
///
/// Lives in a separate file from the core class so <see cref="AsyncPipeReaderInput"/> does not
/// import <c>System.IO.Pipelines</c> in its primary surface — the optional pull-mode is visible
/// at use-sites (per ADR-0003 Decision §3 at <c>docs/adr/0003-acbinary-streaming-receive-architecture.md</c>).
/// <para><b>Why test-only:</b> in real production, the consuming application already has its own
/// reader-task that reads from the pipe and pushes bytes via <c>AsyncPipeReaderInput.Feed</c>
/// — providing this drain extension publicly would duplicate that responsibility and confuse
/// the canonical push-pattern. The extension is kept here for unit-test scaffolding and the
/// streaming benchmark; production NuGet consumers should write their own drain logic in their
/// own reader-task following the application's threading model.</para>
/// </summary>
public static class AsyncPipeReaderInputExtensions
{
@ -21,9 +25,9 @@ public static class AsyncPipeReaderInputExtensions
/// calls <see cref="AsyncPipeReaderInput.Feed"/> on each segment and
/// <see cref="AsyncPipeReaderInput.Complete"/> when the pipe completes.
///
/// <para>Typical usage: NamedPipe IPC and FileStream-via-PipeReader transports schedule this
/// on a background task while the deserialization context reads from the same input on
/// another thread.</para>
/// <para>Typical usage (test-only): NamedPipe IPC and FileStream-via-PipeReader transports
/// schedule this on a background task while the deserialization context reads from the same
/// input on another thread.</para>
///
/// <para><see cref="AsyncPipeReaderInput.Complete"/> is invoked in a <c>finally</c> block —
/// ensures the consumer always wakes up even if the pipe read throws or the operation is

View File

@ -337,52 +337,6 @@ public static partial class AcBinaryDeserializer
}
}
/// <summary>
/// Deserialize from a <see cref="System.IO.Pipelines.PipeReader"/> with full streaming pipeline
/// parallelism — drains the reader on the calling thread, while a background <c>Task.Run</c>
/// deserializes incrementally from the same shared <see cref="AsyncPipeReaderInput"/>.
///
/// <para>Transport-agnostic: works with any <c>PipeReader</c> source — NamedPipe IPC
/// (<c>PipeReader.Create(namedPipeServerStream)</c>), file-stream
/// (<c>PipeReader.Create(fileStream)</c>), TCP (<c>PipeReader.Create(networkStream)</c>),
/// or custom <c>PipeReader</c> implementations. Reads <b>raw AcBinary bytes</b> verbatim from
/// the pipe — no wire-format unwrapping. Pair with the producer-side
/// <see cref="AcBinarySerializer.SerializeChunked{T}(T, System.IO.Pipelines.PipeWriter, AcBinarySerializerOptions)"/>
/// (or its <see cref="System.IO.Pipelines.Pipe"/> overload), which writes the same raw byte
/// stream as <see cref="AcBinarySerializer.Serialize{T}(T, AcBinarySerializerOptions)"/>'s
/// <c>byte[]</c> output.</para>
///
/// <para>Receive buffer initial capacity is derived from <c>options.BufferWriterChunkSize × 2</c>
/// — two-chunks-worth of headroom plus reset-to-0 cycling reuses the same buffer for the
/// message's lifetime regardless of total payload size.</para>
///
/// <para><b>For the multiplexed wire format</b> (per-chunk <c>[201][UINT16][data]</c> headers,
/// produced by <c>SerializeChunkedFramed</c> or SignalR's AsyncSegment mode): the parser
/// strips framing on its own (e.g. <c>AcBinaryHubProtocol.TryParseChunkData</c>) and feeds
/// only the data bytes here.</para>
/// </summary>
/// <param name="reader">Source pipe reader. Caller owns lifecycle (creation + completion).</param>
/// <param name="options">Serializer options. Defaults to <see cref="AcBinarySerializerOptions.Default"/>.
/// <c>BufferWriterChunkSize</c> controls the receive-side initial buffer (× 2 headroom).</param>
/// <param name="ct">Cancellation token. For connect-timeout, pass the token of a
/// <c>new CancellationTokenSource(timeout)</c>.</param>
public static async Task<T?> DeserializeFromPipeReaderAsync<T>(System.IO.Pipelines.PipeReader reader, AcBinarySerializerOptions? options = null, CancellationToken ct = default)
{
if (reader is null) throw new ArgumentNullException(nameof(reader));
var opts = options ?? AcBinarySerializerOptions.Default;
// Single-message mode (multiMessage: false) — bytes drained from the PipeReader are forwarded
// verbatim to the deserialization buffer. Pair with AcBinarySerializer.SerializeChunked
// (raw byte stream) on the producer side; for multi-message framed wire formats the parser
// strips framing upstream and feeds only data bytes here.
using var input = new AsyncPipeReaderInput(initialCapacity: opts.BufferWriterChunkSize * 2, multiMessage: false);
var deserTask = Task.Run(() => Deserialize<T>(input, opts), ct);
await input.DrainFromAsync(reader, ct).ConfigureAwait(false);
return await deserTask.ConfigureAwait(false);
}
/// <summary>
/// Internal: Deserialize with any TInput (multi-segment or other future input types).
/// </summary>

View File

@ -426,9 +426,10 @@ public static partial class AcBinarySerializer
/// Serialize to a <see cref="System.IO.Pipelines.Pipe"/> as a chunked stream — pure AcBinary
/// bytes are written via <see cref="AsyncPipeWriterOutput"/> in raw mode (no per-chunk header).
/// The output is byte-compatible with <see cref="Serialize{T}(T, AcBinarySerializerOptions)"/>'s
/// <c>byte[]</c> result; a consumer can drain <c>pipe.Reader</c> and feed the bytes directly to
/// <see cref="AcBinaryDeserializer"/> (or pipe-them through <c>DeserializeFromPipeReaderAsync</c>)
/// with no extra parser.
/// <c>byte[]</c> result; a consumer drains <c>pipe.Reader</c> in its own reader-task and pushes
/// bytes via <see cref="AsyncPipeReaderInput.Feed"/>, then calls
/// <see cref="AcBinaryDeserializer.Deserialize{T}(AsyncPipeReaderInput, AcBinarySerializerOptions)"/>
/// — no extra parser, no special transport adapter.
///
/// <para><b>Why <see cref="System.IO.Pipelines.Pipe"/> instead of <see cref="System.IO.Pipelines.PipeWriter"/>?</b>
/// <c>Pipe.Writer</c> is always the BCL <c>PipeWriterImpl</c>, which is parallel-capable

View File

@ -24,15 +24,49 @@ namespace AyCode.Core.Serializers.Binaries;
/// verbatim (matches <c>AcBinarySerializer.SerializeChunked</c> raw output drained from a
/// <see cref="System.IO.Pipelines.PipeReader"/>); single-message scenario, no auto-reset.</para>
///
/// <para>Usage modes:</para>
/// <para>Usage: <b>push pattern only</b>. The consumer's reader-task reads bytes from any
/// underlying transport (the framework knows nothing about which) and pushes them via
/// <see cref="Feed"/>; a separate consumer thread (or task) calls
/// <see cref="AcBinaryDeserializer.Deserialize{T}(AsyncPipeReaderInput, AcBinarySerializerOptions)"/>.
/// The framework does NOT own the transport — the consumer's reader-task does, following the
/// application's threading model.</para>
///
/// <para><b>When chunked-streaming is the right fit</b> (vs raw <c>byte[]</c> /
/// <see cref="AcBinaryDeserializer.Deserialize{T}(byte[], AcBinarySerializerOptions)"/>):</para>
/// <list type="bullet">
/// <item><b>Push (Feed-API)</b>: producer thread calls <see cref="Feed"/> with chunk bytes
/// (typical for SignalR <c>TryParseChunkData</c>).</item>
/// <item><b>Pull (DrainFromAsync extension)</b>: helper drains a
/// <see cref="System.IO.Pipelines.PipeReader"/> into the input via repeated
/// <see cref="Feed"/> calls (typical for NamedPipe / FileStream / NetworkStream).</item>
/// <item><b>Network transports</b> — TCP / UDP / WebSocket / SSE / HTTP/2 streams. Per-chunk
/// CPU overhead (~30 µs / chunk) is invisible next to ms-scale RTT; the streaming
/// pipeline lets sender, transport, and receiver work in parallel on different parts of
/// the message.</item>
/// <item><b>Multi-connection servers</b> — Kestrel-style (SignalR), gRPC servers, custom RPC
/// hosts. Per-connection peak memory bounded by buffer-size (e.g. 32 KB), not by max
/// message size — 1000 concurrent connections × 1 MB messages = 32 MB peak (vs 1 GB
/// with raw <c>byte[]</c>). LOH allocation pressure (≥ 85 KB messages) is also avoided.</item>
/// <item><b>Message brokers / queues</b> — Kafka / Redis Streams / Azure Service Bus clients
/// that expose <see cref="System.Buffers.IBufferWriter{T}"/> sinks. Streaming serialize
/// writes directly into the transport buffer — no intermediate <c>byte[]</c> allocation.</item>
/// <item><b>File streaming</b> — <c>FileStream</c> behind a
/// <see cref="System.IO.Pipelines.PipeReader"/>. 100 MB+ payloads from disk with constant
/// 32 KB peak memory.</item>
/// <item><b>In-memory <see cref="System.IO.Pipelines.Pipe"/> cross-thread handoff</b> —
/// producer + consumer threads coordinate over a shared <c>Pipe</c>; zero-copy slab handoff.</item>
/// <item><b>Custom transport adapters</b> — anything where the consumer wants to push bytes
/// from a transport-specific reader-task.</item>
/// </list>
///
/// <para><b>When raw <c>byte[]</c> is the right fit</b>: same-process loopback IPC where transport
/// latency is near zero, single-producer/single-consumer batch operations where peak memory is
/// not a constraint, sub-LOH messages (&lt; 85 KB) with no GC-pressure concerns. The chunked-streaming
/// per-chunk CPU overhead is fully visible in these scenarios — raw is faster end-to-end.</para>
///
/// <para><b>Performance characteristic</b>: per-chunk overhead is roughly constant (~25-30 µs —
/// FlushAsync syscall + ReadAsync syscall + framing-parse + sliding-window bookkeeping). Total
/// chunk-overhead = <c>(messageSize / chunkSize) × ~30 µs</c>. The streaming benefit is pipeline
/// parallelism + bounded peak memory — both of which require a non-trivial transport stage to
/// surface (network, file, cross-thread queue). On same-process loopback NamedPipe (the worst-case
/// benchmark scenario), the per-chunk cost dominates and chunked appears slower than raw — this
/// is a benchmark-artifact, not the production characteristic.</para>
///
/// Backed by a single contiguous <c>byte[]</c> from <see cref="ArrayPool{T}"/>. Positions reset
/// to 0 when the consumer catches up (sliding-window cycling — peak buffer memory bounded by
/// chunk size, NOT message size). Grow is the absolute last resort and practically never fires
@ -312,8 +346,8 @@ public sealed class AsyncPipeReaderInput : IBinaryInputBase, IDisposable
}
/// <summary>
/// Whether <see cref="Complete"/> has been called (or <see cref="DrainFromAsync"/>'s underlying
/// stream signalled EOF and the finally block closed the input). Once <c>true</c>, the session
/// Whether <see cref="Complete"/> has been called (typically by the consumer's reader-task
/// finally-block after the underlying transport signals EOF). Once <c>true</c>, the session
/// has ended — any pending <see cref="AcBinaryDeserializer.Deserialize{T}(AsyncPipeReaderInput, AcBinarySerializerOptions)"/>
/// call returns whatever partial buffer is left, and subsequent calls return immediately.
/// </summary>

View File

@ -53,7 +53,33 @@ namespace AyCode.Core.Serializers.Binaries;
/// <see cref="AcBinaryHubProtocol"/> passes 10 s from its options). A <see cref="TimeoutException"/>
/// propagates to the caller, allowing the connection to abort instead of blocking forever.</para>
///
/// Maximum chunk data size (in framed mode): 65535 bytes (UINT16 max).
/// <para>Maximum chunk data size (in framed mode): 65535 bytes (UINT16 max).</para>
///
/// <para><b>When chunked-streaming is the right fit</b> (vs raw <c>byte[]</c> output via
/// <see cref="AcBinarySerializer.Serialize{T}(T, AcBinarySerializerOptions)"/>):</para>
/// <list type="bullet">
/// <item><b>Network transports</b> — TCP / UDP / WebSocket / SSE / HTTP/2 streams. Per-chunk
/// CPU overhead is invisible next to ms-scale RTT; pipeline parallelism lets sender,
/// transport, and receiver work in parallel on different parts of the message.</item>
/// <item><b>Multi-connection servers</b> — Kestrel-style (SignalR), gRPC / proprietary RPC.
/// Per-connection peak memory bounded by chunk-size; LOH allocation pressure (≥ 85 KB
/// messages) is avoided.</item>
/// <item><b>Message brokers / queues</b> — Kafka / Redis Streams / Service Bus clients exposing
/// <see cref="System.Buffers.IBufferWriter{T}"/> or <c>PipeWriter</c> sinks. Streaming
/// serialize writes directly into the transport buffer.</item>
/// <item><b>File streaming</b> — <c>FileStream</c>-backed <see cref="System.IO.Pipelines.PipeWriter"/>.
/// 100 MB+ payloads to disk with constant peak memory.</item>
/// <item><b>In-memory cross-thread <see cref="System.IO.Pipelines.Pipe"/></b> — producer thread
/// serialises while consumer thread deserialises in parallel.</item>
/// <item><b>Custom transport adapters</b> — anything where the application owns a
/// <c>PipeWriter</c> or <c>IBufferWriter</c> sink and wants incremental output.</item>
/// </list>
///
/// <para><b>When raw <c>byte[]</c> output is the right fit</b>: same-process loopback IPC where
/// transport latency is near zero, single batch-style operations where peak memory is not a
/// constraint, sub-LOH messages (&lt; 85 KB) with no GC-pressure concerns. The chunked-streaming
/// per-chunk CPU overhead is fully visible in these scenarios — raw is faster end-to-end. Pick the
/// chunked path only when the transport stage is non-trivial (network, file, cross-thread queue).</para>
/// </summary>
public struct AsyncPipeWriterOutput : IBinaryOutputBase
{

File diff suppressed because one or more lines are too long