using AyCode.Core.Serializers.Binaries;
using AyCode.Core.Tests.TestModels;
using System.Runtime.CompilerServices;
namespace AyCode.Core.Benchmarks.Workloads.Scenarios;
///
/// Raw byte[] over an in-memory cross-thread handoff — NO transport (no NamedPipe, no Pipe, no
/// Channel). Calling thread serialises into a fresh byte[], hands it to a
/// background consumer task via a single byte[] slot + MRES pair; the consumer deserialises and signals done.
///
/// Why this benchmark matters: completes the 2x2 transport × wire-format matrix:
///
/// - NamedPipe + Chunked =
/// - NamedPipe + Raw =
/// - In-memory Pipe + Chunked =
/// - In-memory + Raw = THIS row — apples-to-apples baseline for the in-memory chunked row
///
/// Side-by-side with this isolates the chunked-streaming
/// framework's pure CPU cost, with the same in-memory transport (zero kernel involvement) on both sides.
/// Side-by-side with this isolates the kernel-NamedPipe
/// overhead on the raw-byte[] side.
///
public sealed class AcBinaryInMemoryRawByteArrayBenchmark : ISerializerBenchmark, IDisposable where T : class
{
private readonly T _order;
private readonly AcBinarySerializerOptions _options;
private readonly byte[] _serialized; // for SerializedSize reporting only
// Long-lived consumer-task infrastructure (Deserialize on BG thread, signaled per iter).
// No transport — just a byte[] slot for handoff between calling thread and consumer task.
private readonly CancellationTokenSource _cts;
private readonly Task _consumerTask;
private readonly ManualResetEventSlim _consumeRequest = new(false);
private readonly ManualResetEventSlim _consumeDone = new(false);
private byte[]? _pendingBytes; // calling thread → consumer task handoff slot
private object? _lastResult; // captured during VerifyRoundTrip; null in benchmark iters
private bool _captureResult;
private bool _disposed;
public BenchmarkEngine Engine => BenchmarkEngine.AcBinary;
public BenchmarkIoMode IoMode => BenchmarkIoMode.InMemoryRaw;
public BenchmarkDispatchMode DispatchMode => _options.UseGeneratedCode ? BenchmarkDispatchMode.SGen : BenchmarkDispatchMode.Runtime;
public Type OrderType => typeof(T);
public string OptionsPreset { get; }
public int SerializedSize => _serialized.Length;
public long SetupSerializeAllocBytes { get; }
public long SetupDeserializeAllocBytes { get; }
public bool IsRoundTripOnly => true;
public string OptionsDescription => BenchmarkOptions.BuildAcBinary(_options, $", BufferSize={_options.BufferWriterChunkSize}B, Transport=in-memory(raw,2-task)");
public AcBinaryInMemoryRawByteArrayBenchmark(T order, AcBinarySerializerOptions options, string optionsPreset)
{
_order = order;
_options = options;
OptionsPreset = optionsPreset;
_serialized = AcBinarySerializer.Serialize(order, _options);
// === SERIALIZE-side setup measurement ===
// Nothing to set up — calling thread allocates byte[] per iter via AcBinarySerializer.Serialize.
SetupSerializeAllocBytes = 0;
// === DESERIALIZE-side setup measurement ===
// 1× background consumer-task + 2× MRES (request / done) + cancellation source.
GC.Collect(); GC.WaitForPendingFinalizers(); GC.Collect();
var beforeDes = GC.GetAllocatedBytesForCurrentThread();
_cts = new CancellationTokenSource();
_consumerTask = Task.Run(ConsumerLoop);
var afterDes = GC.GetAllocatedBytesForCurrentThread();
SetupDeserializeAllocBytes = afterDes - beforeDes;
}
// BG consumer: parks on _consumeRequest, picks up the byte[] from _pendingBytes, runs Deserialize(bytes),
// signals _consumeDone. Direct in-process handoff — no transport syscall, no buffer copy beyond the byte[]
// reference itself (zero-copy by reference).
private void ConsumerLoop()
{
var ct = _cts.Token;
try
{
while (true)
{
_consumeRequest.Wait(ct);
if (ct.IsCancellationRequested) return;
_consumeRequest.Reset();
try
{
var bytes = _pendingBytes;
if (bytes != null)
{
var result = AcBinaryDeserializer.Deserialize(bytes, _options);
if (_captureResult) _lastResult = result;
}
}
catch
{
// Swallow — see ConsumerLoop in NamedPipe variant for rationale.
}
finally
{
_consumeDone.Set();
}
}
}
catch (OperationCanceledException)
{
// Cooperative cancel — Dispose path. Swallow.
}
}
[MethodImpl(MethodImplOptions.NoInlining)]
public void Serialize()
{
// 2-task in-memory pipeline:
// 1. Calling thread serialises → fresh byte[] (per-iter alloc, matches AcBinaryBenchmark contract).
// 2. Calling thread parks the byte[] into _pendingBytes and signals consumer task. Consumer task
// picks up the reference (zero-copy) and runs Deserialize(bytes).
// 3. Calling thread waits for _consumeDone (consumer task finished Des).
//
// Same architectural limitation as the NamedPipe-raw variant: Des cannot start until full bytes
// are available. Only the per-iter Ser↔Des thread-handoff overlaps slightly (calling thread starts
// signalling and waiting while consumer thread takes the byte[]).
var bytes = AcBinarySerializer.Serialize(_order, _options);
_pendingBytes = bytes;
_consumeDone.Reset();
_consumeRequest.Set();
_consumeDone.Wait();
}
[MethodImpl(MethodImplOptions.NoInlining)]
public void Deserialize()
{
// No-op: per-iter round-trip is captured in Serialize(). See IsRoundTripOnly contract.
}
public bool VerifyRoundTrip()
{
_captureResult = true;
try
{
Serialize();
var result = _lastResult as T;
return result != null && RoundTripValidator.DeepEqualsViaJson(_order, result);
}
finally
{
_captureResult = false;
_lastResult = null;
}
}
public void Dispose()
{
if (_disposed) return;
_disposed = true;
try { _cts.Cancel(); } catch { /* swallow on teardown */ }
try { _consumeRequest.Set(); } catch { /* nudge in case consumer Wait is parked */ }
try { _consumerTask.Wait(TimeSpan.FromSeconds(2)); } catch { /* swallow on teardown */ }
try { _consumeRequest.Dispose(); } catch { /* swallow on teardown */ }
try { _consumeDone.Dispose(); } catch { /* swallow on teardown */ }
try { _cts.Dispose(); } catch { /* swallow on teardown */ }
}
}