diff --git a/AyCode.Core.Serializers.SourceGenerator/AcBinarySourceGenerator.cs b/AyCode.Core.Serializers.SourceGenerator/AcBinarySourceGenerator.cs index 20f870c..19c7f83 100644 --- a/AyCode.Core.Serializers.SourceGenerator/AcBinarySourceGenerator.cs +++ b/AyCode.Core.Serializers.SourceGenerator/AcBinarySourceGenerator.cs @@ -1930,35 +1930,47 @@ public class AcBinarySourceGenerator : IIncrementalGenerator } else { - // Ref tracking possible — Object/ObjectRefFirst/ObjectRef/FixObj dispatch - // Inline: parent creates instance + handles cache registration - sb.AppendLine($"{i}if ({tc} == BinaryTypeCode.Object)"); + // Ref tracking possible — switch on tc (Object / ObjectRefFirst / [Null] / ObjectRef / = context._nextRuntimeSlot) context._nextRuntimeSlot = {tc} + 1;"); - sb.AppendLine($"{i} var rc_{p.Name} = new {p.TypeNameForTypeof}();"); - sb.AppendLine($"{i} {reader}.Instance.ReadProperties(rc_{p.Name}, context, {nd});"); - sb.AppendLine($"{i} {a} = rc_{p.Name};"); + sb.AppendLine($"{i} default:"); + sb.AppendLine($"{i} if ({tc} < BinaryTypeCode.Object)"); + sb.AppendLine($"{i} {{"); + sb.AppendLine($"{i} context.GetWrapper(typeof({p.TypeNameForTypeof}), {tc});"); + sb.AppendLine($"{i} if ({tc} >= context._nextRuntimeSlot) context._nextRuntimeSlot = {tc} + 1;"); + sb.AppendLine($"{i} var rc_{p.Name} = new {p.TypeNameForTypeof}();"); + sb.AppendLine($"{i} {reader}.Instance.ReadProperties(rc_{p.Name}, context, {nd});"); + sb.AppendLine($"{i} {a} = rc_{p.Name};"); + sb.AppendLine($"{i} }}"); + sb.AppendLine($"{i} break;"); sb.AppendLine($"{i}}}"); } } @@ -2286,36 +2298,48 @@ public class AcBinarySourceGenerator : IIncrementalGenerator } else { - // Object hot path first, then ref markers, then FixObj — inline ReadProperties - sb.AppendLine($"{i}if ({etc} == BinaryTypeCode.Object)"); + // Switch on etc (Object / ObjectRefFirst / Null / ObjectRef / = context._nextRuntimeSlot) context._nextRuntimeSlot = {etc} + 1;"); - sb.AppendLine($"{i} var re_{propSuffix} = new {elemTypeName}();"); - sb.AppendLine($"{i} {reader}.Instance.ReadProperties(re_{propSuffix}, context, nd_{propSuffix});"); - sb.AppendLine($"{i} {assignExpr}"); + sb.AppendLine($"{i} default:"); + sb.AppendLine($"{i} if ({etc} < BinaryTypeCode.Object)"); + sb.AppendLine($"{i} {{"); + sb.AppendLine($"{i} context.GetWrapper(typeof({elemTypeName}), {etc});"); + sb.AppendLine($"{i} if ({etc} >= context._nextRuntimeSlot) context._nextRuntimeSlot = {etc} + 1;"); + sb.AppendLine($"{i} var re_{propSuffix} = new {elemTypeName}();"); + sb.AppendLine($"{i} {reader}.Instance.ReadProperties(re_{propSuffix}, context, nd_{propSuffix});"); + sb.AppendLine($"{i} {assignExpr}"); + sb.AppendLine($"{i} }}"); + sb.AppendLine($"{i} break;"); sb.AppendLine($"{i}}}"); } } diff --git a/AyCode.Core/docs/BINARY/BINARY_TODO.md b/AyCode.Core/docs/BINARY/BINARY_TODO.md index 0dd9852..274be49 100644 --- a/AyCode.Core/docs/BINARY/BINARY_TODO.md +++ b/AyCode.Core/docs/BINARY/BINARY_TODO.md @@ -148,3 +148,284 @@ Pick one before touching code. Option 2 is the most correct but adds API surface - Wire format unchanged. Default values unchanged (65535 / equivalent). - Migration note in CHANGELOG / release notes since this is a breaking change to `AcBinarySerializerOptions`. +## ACCORE-BIN-T-M4D2: Add `ReadOnlyMemory` / `Memory` deserialize overloads +**Priority:** P3 · **Type:** Feature + +The public `AcBinaryDeserializer.Deserialize` surface accepts `byte[]` (with optional offset/length) and `ReadOnlySequence`, but **not** `ReadOnlyMemory` / `Memory`. Consumers that hold a `ReadOnlyMemory` (cached payloads, message-broker frames, in-memory pipe slices) must call `.ToArray()` to round-trip through `byte[]` — unnecessary copy + GC alloc. + +**Implementation:** +- `Deserialize(ReadOnlyMemory data, AcBinarySerializerOptions options)` and the non-generic `Type`-based variant. +- Body: `MemoryMarshal.TryGetArray(data, out var seg)` → array-backed path delegates to `Deserialize(seg.Array!, seg.Offset, seg.Count, options)` (zero-copy). Non-array-backed fallback (rare — custom `MemoryManager` with native memory) copies into a pooled `byte[]`. +- `Memory` overload trivially delegates to the `ReadOnlyMemory` one (`Memory` is implicitly convertible). +- No new input-strategy struct needed — reuses existing `ArrayBinaryInput`. + +**Acceptance:** +- Both overloads compile and pass round-trip tests against `byte[]`-equivalent input. +- Array-backed path measurably zero-alloc (BenchmarkDotNet allocation diagnoser). +- Non-array-backed path documented as fallback (separate `using var pooled = MemoryPool.Shared.Rent(...)` style copy). +- API doc-strings cross-reference the existing `byte[]` and `ReadOnlySequence` overloads. + +## ACCORE-BIN-T-S7X3: Add `ReadOnlySpan` deserialize overload +**Priority:** P2 · **Type:** Feature · **Related:** `ACCORE-BIN-T-M4D2` + +The MemoryPack-style `Deserialize(ReadOnlySpan)` API enables direct deserialization from stack-allocated buffers (`stackalloc byte[256]`), pinned native memory (`fixed` blocks), and `ReadOnlyMemory.Span` slices without round-tripping through a heap-allocated `byte[]`. The current AcBinary surface lacks this entry point. + +**Design tension:** the existing `IBinaryInputBase.Initialize(out byte[] buffer, ...)` contract returns a `byte[]` — a `ReadOnlySpan` cannot be stored in a regular struct field, only in a `ref struct` field. Two implementation paths to evaluate: + +1. **`ref struct SpanBinaryInput`** + interface bump to support `ref byte buffer` / `int length` fields. **Pure** zero-copy from any span. Cost: `BinaryDeserializationContext` and `IBinaryInputBase` need a parallel ref-struct-friendly track (the existing pooled context cannot hold a `ref struct`). Major surgery on the deser core. +2. **`MemoryMarshal.CreateReadOnlySpanFromNullTerminated`-style hack** — accept `ReadOnlySpan`, use `Unsafe.AsRef`/`MemoryMarshal.GetReference` to obtain a `ref byte`, then **copy into a pooled `byte[]`** before deserialization. **Not** zero-copy, defeats the purpose. Reject. +3. **Pinned-buffer trampoline** — accept `ReadOnlySpan`, allocate a `Memory` view via a `MemoryManager`-like wrapper, delegate to `ReadOnlyMemory` overload. Awkward, allocations per call. Reject. + +**Recommendation:** option (1) is the only correct path, but it's a substantial refactor — measure first whether real consumer demand justifies the surgery. The current `byte[]`-based pool-pattern outperforms MemoryPack on the dominant use-cases per existing benchmarks; this overload addresses an API-surface gap, not a perf gap. + +**Acceptance:** +- `Deserialize(ReadOnlySpan data, AcBinarySerializerOptions options)` compiles and round-trips against `byte[]`-equivalent input. +- Zero-alloc path verified for `stackalloc`-source spans (BenchmarkDotNet allocation diagnoser). +- `IBinaryInputBase` (or successor interface) refactor preserves backward compatibility for existing `ArrayBinaryInput` / `SequenceBinaryInput` / `AsyncPipeReaderInputAdapter` consumers. +- Doc-strings cross-reference the `byte[]` / `ReadOnlyMemory` (`ACCORE-BIN-T-M4D2`) / `ReadOnlySequence` overloads with use-case guidance. + +## ACCORE-BIN-T-T8K3: Add `SerializeAsync(Stream, T)` async overloads with mode-driven output strategy +**Priority:** P1 · **Type:** Feature · **Related:** `ACCORE-BIN-T-N9G6` (Type-based coordination) + +The mainstream serializer ecosystem (System.Text.Json, MessagePack, Newtonsoft.Json, MemoryPack) all expose `SerializeAsync(Stream, T)` as a primary entry point — async file I/O, network response body, log streaming. **AcBinary's public API surface MUST include this overload** regardless of what we do internally; consumers expect a `Stream` parameter and don't navigate `PipeWriter.Create(stream)` workarounds. Market-entry-blocking otherwise. + +### Mode-driven output strategy — three lanes for three workload shapes + +AcBinary already models the three output strategies in `BinaryProtocolMode` (`AyCode.Services/SignalRs/BinaryProtocolMode.cs`) for the SignalR side. The same three-lane shape applies to the public `SerializeAsync(Stream)` API. Promote the concept to AcBinary core scope (e.g. `AcBinaryOutputMode` in `AyCode.Core/Serializers/Binaries/`) and let the SignalR `BinaryProtocolMode` either alias it or migrate to it. Migration timing: **the existing `BinaryProtocolMode` keeps shipping until the new public API is stabilized**; both names live for one major version, then `BinaryProtocolMode` becomes a `using`-alias. + +| Mode | Output strategy | Peak memory | Pipeline parallelism | Use when | +|---|---|---|---|---| +| **`Bytes`** (default) | `Serialize(T) → byte[]` + `stream.WriteAsync(bytes)` | Full payload in `byte[]` (pooled) | No | Typical payloads (<10 MB), throughput-focus | +| **`Segment`** | `BufferWriterBinaryOutput` → `PipeWriter`, single closing flush | PipeWriter pause-threshold-bounded (~64 KB Kestrel default) | No | Mid-size payloads, zero-copy desired | +| **`AsyncSegment`** | `SerializeChunked(PipeWriter)`, per-chunk async flush | Chunk-size-bounded (~8 KB at default `BufferWriterChunkSize`) | Yes (on parallel-capable PipeWriter — Kestrel / `Pipe`) | Very large payloads (>10 MB), memory-tight hosts, parallel-capable transport | + +### Honest performance positioning vs. MemoryPack — three real axes + +MemoryPack's `SerializeAsync(Stream)` is **pseudo-streaming** — serializes the entire payload into a pool-allocated linked-list buffer first (`ReusableLinkedArrayBufferWriter`), then writes the completed buffer to the stream in a single closing fence. Peak memory ≈ payload size; no pipeline parallelism. AcBinary's `Bytes` mode is **architecturally similar** (single pooled contiguous `byte[]` vs. MemoryPack's linked-list) — comparable peak-memory cost, often faster on the wire due to one contiguous `WriteAsync` call. + +AcBinary's `AsyncSegment` mode is **architecturally different** in three real ways MemoryPack cannot match: + +| Axis | `Bytes` mode (default) | `AsyncSegment` mode | MemoryPack `SerializeAsync` | +|---|---|---|---| +| **Heap allocation per call** | Pooled `byte[]` rent (peak ≈ payload size) | **Truly zero** — `ArrayPool` + pooled context + `MemoryMarshal.TryGetArray` direct-buffer-write into the transport's own `byte[]` | Pool-allocated linked-list buffer per call (peak ≈ payload size) | +| **Peak managed memory** | ≈ payload size | ≈ chunk size (`BufferWriterChunkSize`, e.g. 4-8 KB) | ≈ payload size | +| **GC pressure** | Touches GC pool on every call | **Never touches GC** for the serialize itself | Touches GC pool on every call | +| **Pipeline parallelism** | No | Yes on parallel-capable PipeWriter (Kestrel transport, `new Pipe()`) | No | +| **GB-scale payload** | OOM risk on memory-tight hosts | Works | OOM risk | + +The `AsyncSegment` zero-alloc claim is **literal**, not "almost zero": `AsyncPipeWriterOutput.AcquireChunk` calls `_pipeWriter.GetMemory(chunkSize)` and uses `MemoryMarshal.TryGetArray(memory, out segment)` to obtain the **transport's own internal `byte[]`** — the serializer writes directly into it. With `chunkSize` aligned to the transport's internal buffer (e.g. NamedPipe-server pipe-buffer-size), one chunk is one kernel-level transfer; no managed-side double-fragmentation. + +### Throughput nuance — `AsyncSegment` cost on Stream-backed transports + +`AsyncSegment` IS slightly slower than `Bytes` on `StreamPipeWriter`-backed transports (NamedPipe / FileStream / NetworkStream), but **not for the reason that initially seems obvious**: + +- The cost is NOT "managed-side double-fragmentation on top of OS-level fragmentation" — that's not what happens. `MemoryMarshal.TryGetArray` zero-copy direct-buffer-writes mean the managed chunking is the **same** chunking the kernel does anyway, not redundant. +- The cost IS the per-chunk async-await round-trip (`SyncAwaitFlush(_lastFlush)` blocks until the kernel acknowledges the write), forced sequential by the `StreamPipeWriter._tailMemory` reset race (`ACCORE-BIN-I-...`). N async cycles vs 1 in `Bytes` mode. +- Empirically the gap is roughly **1.2-1.5x** on NamedPipe — not 2-5x. **The dominant cost on these transports is the transport itself** (Windows IRP / Linux FIFO syscall overhead), independent of the serializer mode. + +When `AsyncSegment` wins outright: +- **GC-sensitive hot-paths** (server hubs, real-time game tick loops, mobile UI thread, embedded targets): zero-alloc + zero-GC-pressure beats a 1.2x throughput edge every time. +- **Memory-tight hosts** (mobile, WASM, container-trimmed, embedded): chunk-bounded peak memory is the only option. +- **GB-scale payloads**: `Bytes` OOMs; `AsyncSegment` works. +- **Kestrel transport / parallel-capable `Pipe`**: pipeline parallelism makes `AsyncSegment` **faster** than `Bytes` for medium-to-large payloads. + +When `Bytes` wins outright: +- **Tipikus NuGet workload** (small-to-medium payload, throughput priority, GC-tolerant): one async cycle vs N is the simpler, faster path. +- **`MemoryStream`** (in-memory): one large `byte[]` copy decisively beats N managed chunks. + +### Marketing claim — three-way honest comparison + +> *"AcBinary offers a real choice. `Bytes` mode for typical throughput-priority workloads (matches MemoryPack's pseudo-streaming, often faster on the wire). `AsyncSegment` mode for the workloads MemoryPack cannot serve: zero-alloc serialize for GC-sensitive hot-paths, chunk-bounded peak memory for tight-budget hosts, GB-scale payloads, and pipeline parallelism on parallel-capable transports. You pick the mode; MemoryPack picks for you."* + +This is honest — does not overclaim universal speed, does not hide the small `AsyncSegment` cost on Stream-backed transports, AND clearly surfaces the three differentiator axes (alloc / memory / parallelism) where AcBinary architecturally beats MemoryPack. + +**Implementation outline:** + +- New enum `AcBinaryOutputMode { Bytes = 0, Segment = 1, AsyncSegment = 2 }` in `AyCode.Core/Serializers/Binaries/`. Default `Bytes`. +- New mode field on `AcBinarySerializerOptions`: `AcBinaryOutputMode OutputMode { get; set; } = AcBinaryOutputMode.Bytes;`. (Note: subject to `ACCORE-BIN-I-L8N5` thread-safety treatment — defensive copy / immutable refactor coordination.) +- `public static ValueTask SerializeAsync(T value, Stream stream, AcBinarySerializerOptions? options = null, bool leaveOpen = false, CancellationToken ct = default)`: + - Switch on `options.OutputMode`: + - `Bytes` → `var bytes = Serialize(value, options); await stream.WriteAsync(bytes, ct); ArrayPool.Return(bytes);` + - `Segment` → `var pw = PipeWriter.Create(stream, new(leaveOpen: leaveOpen)); Serialize(value, pw, options); await pw.CompleteAsync();` + - `AsyncSegment` → `var pw = PipeWriter.Create(stream, new(leaveOpen: leaveOpen)); SerializeChunked(value, pw, options); await pw.CompleteAsync();` +- `public static ValueTask SerializeAsync(object? value, Type type, Stream stream, ...)` — non-generic, same dispatch (coordinated with `ACCORE-BIN-T-N9G6`). +- `leaveOpen` parameter standard for stream-async serializers (System.Text.Json, MessagePack convention). +- The `Bytes` mode uses a pooled `byte[]` from `ArrayBinaryOutput` to keep alloc cost amortized. + +**SignalR migration coordination:** the existing `BinaryProtocolMode` enum (in `AyCode.Services`) keeps shipping unchanged until the new public API is stabilized. After stabilization, `BinaryProtocolMode` becomes a deprecated alias of `AcBinaryOutputMode`, eventually removed in a major-bump. No SignalR-side churn during this TODO's implementation. + +**Acceptance:** + +- `SerializeAsync` round-trips against `Deserialize(byte[])` via `MemoryStream` in all three modes. +- Cancellation propagates correctly (`OperationCanceledException` on cancelled token mid-stream). +- **Throughput matrix benchmark**: 4 transports (`MemoryStream`, `FileStream`, `NamedPipeStream`, `NetworkStream`) × 3 modes × 3 payload sizes (small ~1 KB / medium ~100 KB / large ~10 MB). Results documented in `Test_Benchmark_Results/Benchmark/SerializeAsync_Stream_Modes.LLM` (or similar) and surfaced as a doc-string table for consumer guidance. +- **Memory-bounded benchmark**: 100 MB payload to `FileStream` in `AsyncSegment` mode → peak managed-heap delta ≤ 1 MB throughout. Same payload in `Bytes` mode → peak ~100 MB (expected, documented). +- API doc-string contains a "When to use which mode?" decision matrix; explicitly compares with MemoryPack's pseudo-streaming. +- `leaveOpen` parameter behaves per the System.Text.Json / MessagePack convention across all three modes. + +## ACCORE-BIN-T-N9G6: Add non-generic `Type`-based `Serialize(object, Type, ...)` overloads +**Priority:** P2 · **Type:** Feature · **Related:** `ACCORE-BIN-T-T8K3` + +Plugin frameworks, ASP.NET ModelBinding, DI middleware, and DataContractSerializer-style "generic-API container" use-cases need to serialize an `object` whose type is known only at runtime. Current AcBinary surface forces a reflection trampoline through the generic `Serialize`: + +```csharp +// Today's workaround (slow + noisy): +typeof(AcBinarySerializer).GetMethod("Serialize", new[] { type, typeof(AcBinarySerializerOptions) }) + .MakeGenericMethod(type).Invoke(null, new[] { value, options }); +``` + +**Implementation outline:** +- `public static byte[] Serialize(object? value, Type type, AcBinarySerializerOptions? options = null)` +- `public static int Serialize(object? value, Type type, IBufferWriter writer, AcBinarySerializerOptions? options = null)` +- `public static int SerializeChunked(object? value, Type type, PipeWriter writer, AcBinarySerializerOptions? options = null)` and `Pipe` overload +- `public static int SerializeChunkedFramed(object? value, Type type, PipeWriter writer, AcBinarySerializerOptions? options = null)` and `Pipe` overload +- `public static ValueTask SerializeAsync(object? value, Type type, Stream stream, ...)` — coordinated with `ACCORE-BIN-T-T8K3` +- Internal dispatch: `value.GetType()` is the runtime type; the `Type type` parameter constrains the **declared** type for polymorphism handling (`ObjectWithTypeName` write decision). + +**Acceptance:** +- All non-generic overloads round-trip via the generic deserializer's `Deserialize(byte[], Type)` overload. +- Plugin-style scenario: serialize `IList` of mixed-type elements → all elements correctly typed in the wire output. +- API doc-strings call out the performance characteristics (slightly slower than generic due to runtime `Type` lookup but **without** the reflection trampoline cost). + +## ACCORE-BIN-T-R4P2: Expose low-level `ref Writer`-style API for custom formatters +**Priority:** P3 · **Type:** Feature + +The MemoryPack-style `Serialize(ref MemoryPackWriter writer, in T value)` low-level API enables: +- Custom formatters that compose write primitives without the full Serialize entry-point overhead. +- Nested-into-existing-stream scenarios where the caller already owns a writer-style cursor. +- Test harnesses that exercise specific wire-format paths in isolation. + +Today's `BufferWriterBinaryOutput` standalone-mode partly fills this gap — exposing `WriteByte`, `WriteVarUInt`, `WriteStringUtf8`, etc. — but it is **not** a `ref struct`, **not** a documented low-level public API for external custom formatters, and the relationship with `BinarySerializationContext` is unclear from the consumer's perspective. + +**Design tension** (decide before implementing): + +1. **Promote `BufferWriterBinaryOutput` to documented public surface** — add doc, examples, supported usage patterns. Cheapest, but the standalone-mode is currently a side-feature, not a primary API; documenting it commits to its current shape. +2. **New `ref struct AcBinaryWriter` wrapper** around `BufferWriterBinaryOutput` (or a dedicated impl) — explicit "this is the low-level writer" signal. More API surface but clearer mental model. Aesthetic alignment with MemoryPack. +3. **Skip entirely** — the `IBufferWriter` overload is already lower-level than most consumers need; custom formatters can write to an `ArrayBufferWriter` and use `IBufferWriter`-style primitives. This is what `BufferWriterBinaryOutput` already does internally. + +**Recommendation:** option 3 is honest — the existing `IBufferWriter` overload covers the use case, and adding a `ref struct AcBinaryWriter` is mostly aesthetic alignment with MemoryPack. Re-evaluate when there's a concrete custom-formatter request that the current API can't accommodate. + +**Acceptance (if implemented):** +- `AcBinaryWriter ref struct` (or equivalent) compiles, supports the same write primitives as `BufferWriterBinaryOutput` standalone-mode. +- At least one example custom formatter ships in tests (e.g., a `Vector3` struct formatter). +- Doc-string clearly distinguishes when to use the low-level writer vs. the high-level `Serialize` entry-point. + +## ACCORE-BIN-T-U6Y8: Attribute-driven polymorphism via `[AcBinaryUnion]` + SGen (opt-in, AOT-friendly) +**Priority:** P1 (if AOT target required) / P2 (non-AOT only) · **Type:** Feature + +**Design philosophy alignment:** AcBinary's market positioning is "JSON-style flexibility with MessagePack-class speed" — attributes are **opt-in optimization**, never required. The runtime polymorphism path (AQN-based, today's default) **stays the default** and continues to work for arbitrary unattributed types. This TODO adds a fast/AOT path **alongside** it, never replaces it. + +AcBinary today handles polymorphism at runtime: the wire writes `ObjectWithTypeName(72)` + AQN string, and the deserializer calls `Type.GetType(aqn)` to resolve. This is **flexible** (no upfront declaration), but has three significant drawbacks for some consumers: + +- **AOT-incompatible** — `Type.GetType(AQN)` requires reflection metadata that the Native AOT trimmer strips by default. The runtime polymorphism path **does not work at all** under Native AOT. Hard blocker for AOT-targeting consumers (Blazor WASM, MAUI mobile, container-trimmed deployments). +- **Slower** — AQN string parse + reflection lookup vs. a closed `switch (tag)` in code-gen. +- **Larger wire format** — full AQN string (often 100+ bytes) vs. a single-byte `tag`. + +**Design — three coordinated pieces:** + +### 1. New 5th bool parameter on `[AcBinarySerializable]`: `EnablePolymorphismFeature` + +Mirrors the existing `EnableMetadataFeature` / `EnableIdTrackingFeature` / `EnableRefHandlingFeature` / `EnableInternStringFeature` pattern. Per-type opt-out / opt-in via attribute parameter. + +```csharp +public AcBinarySerializableAttribute( + bool enableMetadataFeature, + bool enableIdTrackingFeature, + bool enableRefHandlingFeature, + bool enableInternStringFeature, + bool enablePolymorphismFeature) // ← ÚJ, default: true +``` + +Three behavior modes per type: +- `EnablePolymorphismFeature = false` → **disabled**. SGen never emits polymorphism dispatch for this type; runtime path also short-circuits — runtime type ≠ declared type is silently treated as declared (or throws, decision TBD). Use for hot-path closed types where polymorphism is impossible-by-design and the perf/AOT cost is unwanted. +- `EnablePolymorphismFeature = true` (default), no `[AcBinaryUnion]` → **runtime options control**. Behaves per `AcBinarySerializerOptions.PolymorphismMode` (Runtime/AQN today). This preserves the JSON-style flexibility for unattributed bases. +- `EnablePolymorphismFeature = true` + `[AcBinaryUnion(...)]` declared → **union-switch dispatch**. SGen emits a closed `switch (tag)` dispatch using the declared subtype set. Fast + AOT-friendly. Overrides the options-level default for this type. + +### 2. New `[AcBinaryUnion(byte tag, Type subtype)]` attribute + +Multiple instances per base class / interface declare the closed polymorphism set: + +```csharp +[AcBinarySerializable] // EnablePolymorphismFeature defaults to true +[AcBinaryUnion(0, typeof(Cat))] +[AcBinaryUnion(1, typeof(Dog))] +public abstract partial class Animal { ... } +``` + +SGen detects `[AcBinaryUnion]` on abstract / base type → emits the switch-based write/read dispatch instead of falling through to runtime AQN. + +### 3. New `PolymorphismMode` enum on `AcBinarySerializerOptions` + +Options-level default for unattributed polymorphism (i.e. the case where `EnablePolymorphismFeature = true` but no `[AcBinaryUnion]` is declared): + +- `Runtime` (today's default) — AQN-based. Flexible, AOT-incompatible. +- `Throw` — fail fast on any polymorphic write that lacks a `[AcBinaryUnion]` attribute. AOT-friendly diagnostic mode for migration scenarios. + +Note: there is **no `UnionAttribute`-only mode** — declaration is per-type via the attribute, not options-global. The options-level mode only governs the **fallback** when no `[AcBinaryUnion]` is present. + +**Wire-format addition:** + +New marker (e.g. `UnionTagBase = `) + `[byte tag][inner Object]`, parallel to existing `ObjectWithTypeName(72)`. Slot number to be assigned avoiding clashes with existing 64–134 / 192–255 ranges. + +**Implementation outline:** +- `AcBinarySerializableAttribute` — new ctor parameter `enablePolymorphismFeature`, all existing ctors default it to `true` (backward compatible). +- `AcBinaryUnionAttribute` — new attribute, `AttributeUsage(AttributeTargets.Class | Interface, AllowMultiple = true)`. +- Source generator — emit `WriteUnion(value, ctx, depth)` and `ReadUnion(ctx, depth)` static methods on the union-base type's generated writer/reader. Skipped entirely when `EnablePolymorphismFeature = false`. +- Wire-format new marker + `[byte tag][inner Object]` body. +- Runtime path: `WriteValueNonPrimitive` checks the wrapper's `PolymorphismFeatureEnabled` flag; when `false`, skips the `value.GetType() != declaredType` polymorphism branch entirely. + +**Acceptance:** +- `EnablePolymorphismFeature = false`: SGen-emitted dispatch contains zero `is`-typeof / GetType branches; runtime path also short-circuits. Verify in JIT disassembly. +- `EnablePolymorphismFeature = true`, no union: runtime AQN polymorphism works as today (full backward compat); preserved JSON-style flexibility for unattributed bases. +- `EnablePolymorphismFeature = true` + `[AcBinaryUnion]`: AOT-test (Native AOT publish) compiles and round-trips a polymorphic graph — `Type.GetType()` is never invoked on this path. +- Benchmark: union-switch polymorphism measurably faster than AQN polymorphism on deser side (typed switch vs. reflection lookup). +- Wire format documented in `BINARY_FORMAT.md`; `BINARY_FEATURES.md` cross-references the attribute pattern; `BINARY_OPTIONS.md` documents `PolymorphismMode`. `AcBinarySerializableAttribute` doc-string explains all three behavior modes. + +## ACCORE-BIN-T-B7H4: Implement `AcBinarySerializerOptions` thread-safety fix +**Priority:** P2 · **Type:** Refactor · **Related:** `BINARY_ISSUES.md#accore-bin-i-l8n5` (canonical issue) + +The latent thread-safety problem documented in `ACCORE-BIN-I-L8N5` — mutable `set;` properties on `AcBinarySerializerOptions` shared across concurrent serialize/deserialize calls — needs a fix before AcBinary ships as a NuGet package. The package cannot constrain how consumers scope their options instances; defensive contract is needed in the serializer itself. + +**Three candidate fix directions** (decide before implementing): + +1. **Defensive copy on ingress** — add `AcBinarySerializerOptions Clone()` method (member-wise copy). Every API entry point that retains an options instance clones it on entry. External mutation to the original becomes invisible to the holder. + - **Pro**: non-breaking. Existing consumer code unchanged. No major version bump required. + - **Pro**: API surface change limited to one new `Clone()` method. + - **Con**: per-call clone overhead (small, but non-zero). Cache keyed on options-identity becomes invalid for downstream code using reference equality. + - **Con**: doesn't fix the **underlying** mutability — internal code can still race-mutate the cloned snapshot if a method retains both the snapshot and modifies it concurrently. + +2. **Immutable record refactor** — `set;` → `init;` on all configuration properties. Mutation requires `with`-expression which produces a new instance. + - **Pro**: type-system-strong guarantee. Race becomes a compile error, not a runtime corruption risk. + - **Pro**: zero runtime overhead (init-only is compile-time check; record class semantics are unchanged at runtime). + - **Con**: **breaking change** for any consumer doing `opts.UseGeneratedCode = false` after construction. Major version bump. + - **Con**: source-generator coordination needed if SGen emits options-builder code that mutates properties. + +3. **Read-only flag pattern** (à la `JsonSerializerOptions.MakeReadOnly()`) — mutable by default, holder calls `MakeReadOnly()` on entry; subsequent property setters throw `InvalidOperationException`. + - **Pro**: BCL-precedent — Microsoft adopted it for `JsonSerializerOptions` in .NET 7 (`dotnet/runtime#74431`) for exactly this problem. Familiar pattern for consumers. + - **Pro**: minimal API surface change (one new method + `IsReadOnly` flag property). + - **Pro**: per-call overhead = single bool check per setter call. Negligible. + - **Con**: opt-in by the holder — if a custom consumer-side wrapper forgets to call `MakeReadOnly()`, the safety hole stays open for that wrapper's clients. Documentation-driven safety, not type-system-driven. + - **Con**: bypasses static-analysis tooling (the setter signature stays public; the throw is runtime). IDE doesn't surface "this property is currently read-only" in autocomplete. + +**Recommendation:** **Option 3 (`MakeReadOnly` pattern)** is the BCL-precedent, lowest-friction migration path. Microsoft adopted it for `JsonSerializerOptions` in .NET 7 to solve the same problem; AcBinary should follow the same pattern for consistency with consumers' mental model and zero migration cost. + +**Coordination with the existing `AcBinaryHubProtocol` setter side-effect** (the second risk surface in `ACCORE-BIN-I-L8N5`): the protocol ctor currently mutates the caller-provided options reference (`_options.BufferWriterChunkSize = options.BufferSize`). After the fix: +- **Option 1 (Clone)**: ctor mutates the **cloned** snapshot → no side-channel to the caller. Fix transparent. +- **Option 2 (Immutable)**: ctor cannot mutate; needs to construct a new options via `with`-expression. Breaking change in the ctor's options-handling. +- **Option 3 (MakeReadOnly)**: ctor mutates **before** calling `MakeReadOnly()` — same as today, but explicit "frozen" point afterwards. Caller-side mutation post-ctor is now a runtime throw. + +**Implementation outline (Option 3 path):** + +1. `AcBinarySerializerOptions.IsReadOnly { get; }` — public bool property. +2. `AcBinarySerializerOptions.MakeReadOnly()` — sets the flag; idempotent (no-op if already set). +3. All `set;` accessors guard: `if (IsReadOnly) throw new InvalidOperationException("AcBinarySerializerOptions has been made read-only and can no longer be mutated. Construct a new options instance instead.");`. +4. `AcBinarySerializer.Serialize` entry (and all sibling entries — `Deserialize`, `SerializeChunked`, etc.): `options.MakeReadOnly()` before any property read. +5. `AcBinaryHubProtocol` ctor: complete the `BufferWriterChunkSize` mutation **before** calling `options.MakeReadOnly()`. After ctor returns, the options instance is frozen for that protocol's lifetime. +6. Doc-string update on `AcBinarySerializerOptions` class header: explicit "thread-safety contract" section explaining the freeze-on-first-use semantics. + +**Acceptance:** +- Concurrent stress test (16 threads × 1000 iterations) on a shared `AcBinarySerializerOptions` instance with property-mutation-attempts mid-iteration — all mutations after `MakeReadOnly()` throw `InvalidOperationException`; no silent corruption observed. +- Existing tests pass unchanged (the `MakeReadOnly` is opt-in for the serializer entries; tests that build options + use them once continue to work transparently). +- `BINARY_ISSUES.md#accore-bin-i-l8n5` Status updated to `Closed (YYYY-MM-DD)` with a `### Resolution` sub-section pointing to this TODO + the implementing commit. +- Doc-string on `AcBinarySerializerOptions` documents the freeze-on-first-use contract; `BINARY_FEATURES.md` or `BINARY_OPTIONS.md` cross-references the BCL-precedent (`JsonSerializerOptions.MakeReadOnly`). +