diff --git a/.refactor-snapshot/.gitignore b/.refactor-snapshot/.gitignore deleted file mode 100644 index d6b7ef3..0000000 --- a/.refactor-snapshot/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -* -!.gitignore diff --git a/AyCode.Core/Extensions/SerializeObjectExtensions.cs b/AyCode.Core/Extensions/SerializeObjectExtensions.cs index b990eb6..b6648a7 100644 --- a/AyCode.Core/Extensions/SerializeObjectExtensions.cs +++ b/AyCode.Core/Extensions/SerializeObjectExtensions.cs @@ -1,5 +1,6 @@ using System.Buffers; using System.Collections.Concurrent; +using System.Diagnostics.CodeAnalysis; using System.Reflection; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; @@ -572,6 +573,37 @@ public static class SerializeObjectExtensions public static void ToBinary(this T source, IBufferWriter writer, AcBinarySerializerOptions options) => AcBinarySerializer.Serialize(source, writer, options); + /// + /// Serialize object to binary byte array with explicit runtime type. Use this overload at + /// heterogeneous object? call sites where the generic + /// would infer T = object and emit an object-typed wire payload instead of the concrete + /// runtime type's encoding. Typical use: value.ToBinary(value.GetType()) or + /// value.ToBinary(paramInfo.ParameterType). + /// + public static byte[] ToBinary(this object? source, [DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicProperties)] Type type) + => AcBinarySerializer.Serialize(source, type, AcBinarySerializerOptions.Default); + + /// + /// Serialize object to binary byte array with explicit runtime type and specified options. + /// See for use-case rationale. + /// + public static byte[] ToBinary(this object? source, [DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicProperties)] Type type, AcBinarySerializerOptions options) + => AcBinarySerializer.Serialize(source, type, options); + + /// + /// Serialize object directly to an IBufferWriter with explicit runtime type. Zero-copy variant. + /// See for use-case rationale. + /// + public static void ToBinary(this object? source, [DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicProperties)] Type type, IBufferWriter writer) + => AcBinarySerializer.Serialize(source, type, writer, AcBinarySerializerOptions.Default); + + /// + /// Serialize object directly to an IBufferWriter with explicit runtime type and specified options. + /// See for use-case rationale. + /// + public static void ToBinary(this object? source, [DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicProperties)] Type type, IBufferWriter writer, AcBinarySerializerOptions options) + => AcBinarySerializer.Serialize(source, type, writer, options); + /// /// Get the serialized binary size without allocating the final array. /// diff --git a/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs b/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs index 19e62ae..a945664 100644 --- a/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs +++ b/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs @@ -629,23 +629,44 @@ public static partial class AcBinarySerializer [MethodImpl(MethodImplOptions.NoInlining)] private void WriteVarUIntMultiByteUnsafe(uint value) { - if (value < 0x10000000) - { - // 2/3/4-byte tier: tier ∈ {2,3,4}, shift ∈ {8,16,24}, prefix ∈ {0x80,0xC0,0xE0}. - // Packed uint32: byte0 = prefix | (value >> shift); bytes 1..3 = value LE (low 24 bits). - var tier = (BitOperations.Log2(value) / 7) + 1; - var shift = (tier - 1) << 3; - var prefix = (0xFF00u >> (tier - 1)) & 0xFFu; - var packed = (prefix | (value >> shift)) | (value << 8); + // Writes EXACTLY `tier` bytes per call — does NOT overrun into following buffer space. + // (The earlier Unsafe.WriteUnaligned compact path wrote 4 bytes on 2/3-byte tiers + // expecting the trailing 1-2 bytes to be overwritten by the next encoded element. That + // assumption breaks for callers using a savedPos-rewind-then-prefix pattern — e.g. + // WriteStringUtf8 where the rewinded position sits right before already-emitted UTF-8 + // bytes; a 4-byte uint store overwrites the first 1-2 bytes of the UTF-8 body with zero + // padding, corrupting the string on the wire.) - Unsafe.WriteUnaligned(ref BufferAt(_position), packed); - _position += tier; + BufferAt(_position + 1) = (byte)value; + if (value < 0x4000) + { + // 2-byte tier: 10xxxxxx (high 6 bits of value) + low 8 bits. + BufferAt(_position) = (byte)(0x80 | (value >> 8)); + _position += 2; return; } - // 5-byte tier: 0xF0 marker + full uint32 LE (prefix nibble unused). + BufferAt(_position + 2) = (byte)(value >> 8); + if (value < 0x200000) + { + // 3-byte tier: 110xxxxx (high 5 bits) + 2 bytes LE (low 16 bits). + BufferAt(_position) = (byte)(0xC0 | (value >> 16)); + _position += 3; + return; + } + + BufferAt(_position + 3) = (byte)(value >> 16); + if (value < 0x10000000) + { + // 4-byte tier: 1110xxxx (high 4 bits) + 3 bytes LE (low 24 bits). + BufferAt(_position) = (byte)(0xE0 | (value >> 24)); + + _position += 4; + return; + } + // 5-byte tier: 1111xxxx (low nibble unused) + 4 bytes LE (full uint32). BufferAt(_position) = 0xF0; - Unsafe.WriteUnaligned(ref BufferAt(_position + 1), value); + BufferAt(_position + 4) = (byte)(value >> 24); _position += 5; } diff --git a/AyCode.Core/Serializers/Binaries/AcBinarySerializer.cs b/AyCode.Core/Serializers/Binaries/AcBinarySerializer.cs index a69cca3..385311f 100644 --- a/AyCode.Core/Serializers/Binaries/AcBinarySerializer.cs +++ b/AyCode.Core/Serializers/Binaries/AcBinarySerializer.cs @@ -331,7 +331,7 @@ public static partial class AcBinarySerializer try { // Full path: IQueryable/Expression conversion, primitive/collection dispatch - var actualValue = value; //ConvertExpressionValue(value, ref runtimeType); + var actualValue = ConvertExpressionValue(value, ref type); var wrapper = context.GetWrapper(type); ScanForDuplicates(actualValue, type, context, wrapper); @@ -634,6 +634,16 @@ public static partial class AcBinarySerializer internal static int Serialize(T value, System.IO.Pipelines.PipeWriter pipeWriter, AcBinarySerializerOptions options, FlushPolicy flushPolicy, TimeSpan? flushTimeout) => SerializeToPipeWriterCore(value, typeof(T), pipeWriter, options, flushPolicy, flushTimeout, multiMessage: true); + /// + /// Non-generic + /// overload with an explicit argument — used at heterogeneous object? + /// call sites (e.g. the SignalR hub protocol's streamed-arg path) where the generic overload + /// would infer T = object and emit an object-typed wire payload instead of the concrete + /// runtime type. Caller passes value.GetType() (or a project-known concrete type). + /// + internal static int Serialize(object? value, [DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicProperties)] Type type, System.IO.Pipelines.PipeWriter pipeWriter, AcBinarySerializerOptions options, FlushPolicy flushPolicy, TimeSpan? flushTimeout) + => SerializeToPipeWriterCore(value, type, pipeWriter, options, flushPolicy, flushTimeout, multiMessage: true); + /// /// Common pipe-output serialization core. Same loop for both raw () /// and framed () modes — the only difference flows through diff --git a/AyCode.Core/Serializers/Binaries/BufferWriterBinaryOutput.cs b/AyCode.Core/Serializers/Binaries/BufferWriterBinaryOutput.cs index 830efd8..113a5b2 100644 --- a/AyCode.Core/Serializers/Binaries/BufferWriterBinaryOutput.cs +++ b/AyCode.Core/Serializers/Binaries/BufferWriterBinaryOutput.cs @@ -233,20 +233,41 @@ public struct BufferWriterBinaryOutput : IBinaryOutputBase [MethodImpl(MethodImplOptions.NoInlining)] private void WriteVarUIntMultiByteUnsafe(uint value) { - if (value < 0x10000000) + // Writes EXACTLY `tier` bytes — no overrun. See the context-side + // WriteVarUIntMultiByteUnsafe for the rationale (Unsafe.WriteUnaligned 4-byte + // store would corrupt callers that use savedPos-rewind-then-prefix patterns like + // WriteStringUtf8). + + _buffer[_position + 1] = (byte)value; + if (value < 0x4000) { - // 2/3/4-byte tier: tier ∈ {2,3,4}, shift ∈ {8,16,24}, prefix ∈ {0x80,0xC0,0xE0}. - var tier = (BitOperations.Log2(value) / 7) + 1; - var shift = (tier - 1) << 3; - var prefix = (0xFF00u >> (tier - 1)) & 0xFFu; - var packed = (prefix | (value >> shift)) | (value << 8); - Unsafe.WriteUnaligned(ref _buffer[_position], packed); - _position += tier; + // 2-byte tier + _buffer[_position] = (byte)(0x80 | (value >> 8)); + _position += 2; return; } - // 5-byte tier: 0xF0 marker + full uint32 LE (prefix nibble unused). + + _buffer[_position + 2] = (byte)(value >> 8); + if (value < 0x200000) + { + // 3-byte tier + _buffer[_position] = (byte)(0xC0 | (value >> 16)); + _position += 3; + return; + } + + _buffer[_position + 3] = (byte)(value >> 16); + if (value < 0x10000000) + { + // 4-byte tier + _buffer[_position] = (byte)(0xE0 | (value >> 24)); + _position += 4; + return; + } + + // 5-byte tier: 1111xxxx (low nibble unused) + 4B LE (full uint32) _buffer[_position] = 0xF0; - Unsafe.WriteUnaligned(ref _buffer[_position + 1], value); + _buffer[_position + 4] = (byte)(value >> 24); _position += 5; } diff --git a/AyCode.Services.Server/SignalRs/AcSignalRDataSource.cs b/AyCode.Services.Server/SignalRs/AcSignalRDataSource.cs index c7720ba..008d2be 100644 --- a/AyCode.Services.Server/SignalRs/AcSignalRDataSource.cs +++ b/AyCode.Services.Server/SignalRs/AcSignalRDataSource.cs @@ -409,8 +409,9 @@ namespace AyCode.Services.Server.SignalRs else { // Fallback: incompatible collection type (e.g., List in test scenarios without protocol). - // Re-serialize to byte[] then process inline. - var reBytes = AcBinarySerializer.Serialize(responseData); + // Re-serialize to byte[] then process inline. Pass explicit runtime type — the + // generic ToBinary() overload would infer T = object here. + var reBytes = responseData.ToBinary(responseData.GetType()); if (!setSourceToWorkingReferenceList) { if (InnerList is IAcObservableCollection observable2) diff --git a/AyCode.Services.Server/SignalRs/AcWebSignalRHubBase.cs b/AyCode.Services.Server/SignalRs/AcWebSignalRHubBase.cs index 2d732e7..e422012 100644 --- a/AyCode.Services.Server/SignalRs/AcWebSignalRHubBase.cs +++ b/AyCode.Services.Server/SignalRs/AcWebSignalRHubBase.cs @@ -315,8 +315,11 @@ public abstract class AcWebSignalRHubBase(IConfiguration // Single serialize (here) → single deserialize (consumer). No double ser/deser. if (isRawBytes && responseData != null && responseData is not byte[]) { + // Pass explicit runtime type — the generic ToBinary() overload would infer T = object + // here (since `responseData` is statically `object?`), emitting an object-typed wire payload + // instead of the concrete type. See ACCORE-BIN bug fix 2026-05-26. responseData = SerializerOptions.SerializerType == AcSerializerType.Binary - ? AcBinarySerializer.Serialize(responseData) + ? responseData.ToBinary(responseData.GetType()) : AyCode.Core.Compression.GzipHelper.Compress(responseData.ToJson()); } diff --git a/AyCode.Services/SignalRs/AcBinaryHubProtocol.cs b/AyCode.Services/SignalRs/AcBinaryHubProtocol.cs index 633e8d1..6f24439 100644 --- a/AyCode.Services/SignalRs/AcBinaryHubProtocol.cs +++ b/AyCode.Services/SignalRs/AcBinaryHubProtocol.cs @@ -5,6 +5,7 @@ using System.IO.Pipelines; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Text; +using AyCode.Core.Extensions; using AyCode.Core.Serializers.Binaries; using Microsoft.AspNetCore.Connections; using Microsoft.AspNetCore.SignalR; @@ -557,7 +558,11 @@ public class AcBinaryHubProtocol : IHubProtocol // BINARY_ASYNCPIPE_TODO. try { - dataBytes = AcBinarySerializer.Serialize(streamedArg, pipeWriter, _options, _flushPolicy, _flushTimeout); + // Heterogeneous `object?` arg — pass the runtime type explicitly so the wire payload + // carries the concrete type's encoding (the generic overload would infer T = object + // and emit an object-typed body — the bug behind the 320 SignalR test regressions). + var streamedRuntimeType = streamedArg?.GetType() ?? typeof(object); + dataBytes = AcBinarySerializer.Serialize(streamedArg, streamedRuntimeType, pipeWriter, _options, _flushPolicy, _flushTimeout); _logger?.LogDebug("WriteMessageChunked CHUNK_DATA + CHUNK_END emitted via AsyncPipeWriterOutput dataBytes={DataBytes}", dataBytes); } catch (Exception serializeEx) @@ -1324,13 +1329,20 @@ public class AcBinaryHubProtocol : IHubProtocol return; } + // Runtime type for the heterogeneous `object?` arg — preserves polymorphism on the wire + // (the generic ToBinary() overload would infer T = object, losing the concrete type). + // Null-safe fallback to typeof(object); the underlying Serialize early-returns the Null + // marker for null values. + var runtimeType = value?.GetType() ?? typeof(object); + // Bytes mode: serialize to byte[], write through BWO (no FlushAndReset needed) if (_protocolMode == BinaryProtocolMode.Bytes) { - var serialized = AcBinarySerializer.Serialize(value, _options); + var serialized = value.ToBinary(runtimeType, _options); bw.WriteRaw(serialized.Length); bw.WriteBytes(serialized); + DebugLogArgument(runtimeType, serialized.Length, value); return; } @@ -1342,10 +1354,29 @@ public class AcBinaryHubProtocol : IHubProtocol var argLenSpan = output.GetSpan(LengthPrefixSize); output.Advance(LengthPrefixSize); - var argBytes = AcBinarySerializer.Serialize(value, output, _options); + // ToBinary(Type, IBufferWriter, options) doesn't return the byte count — call the + // type-explicit serializer overload directly to capture argBytes for the length prefix. + var argBytes = AcBinarySerializer.Serialize(value, runtimeType, output, _options); Unsafe.WriteUnaligned(ref argLenSpan[0], argBytes); externalBytes += LengthPrefixSize + argBytes; + + DebugLogArgument(runtimeType, argBytes, value); + } + + [Conditional("DEBUG")] + protected void DebugLogArgument(Type runtimeType, int argBytes, object? value) + { + var kind = value switch + { + null => "null", + System.Collections.IDictionary => "dictionary", + System.Collections.IEnumerable when value is not string => "collection", + _ => "scalar" + }; + + _logger?.LogDebug("WriteArgument runtimeType={RuntimeType} argBytes={ArgBytes} valueIsNull={ValueIsNull} valueTypeKind={Kind}", runtimeType.FullName, argBytes, value == null, kind); + Console.WriteLine($"[DEBUG] WriteArgument runtimeType={runtimeType.FullName} argBytes={argBytes} valueIsNull={value == null} kind={kind}"); } private object?[] ReadArguments(ref SequenceReader r, IReadOnlyList paramTypes, object? headerContext) @@ -1534,19 +1565,35 @@ public class AcBinaryHubProtocol : IHubProtocol return v; } + /// + /// Prefix-tier VarUInt decode (UTF-8-style). MUST stay symmetric with the write-side + /// and + /// . The previous LEB128 implementation + /// became wire-format-mismatched after the V3P9 prefix-tier VarUInt rewrite — root cause + /// of the SignalR test regressions. + /// First-byte prefix → total size: 0xxxxxxx (1B) | 10xxxxxx (2B) | 110xxxxx (3B) | 1110xxxx (4B) | 1111xxxx (5B). + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] protected static uint ReadVarUInt(ref SequenceReader r) { - uint value = 0; - var shift = 0; - while (r.TryRead(out var b)) - { - value |= (uint)(b & 0x7F) << shift; - if ((b & 0x80) == 0) - return value; - shift += 7; - } - return value; + if (!r.TryRead(out var b0)) return 0; + if (b0 < 0x80) return b0; + + // 2-byte tier + if (!r.TryRead(out var b1)) return 0; + if (b0 < 0xC0) return ((uint)(b0 & 0x3F) << 8) | b1; + + // 3-byte tier + if (!r.TryRead(out var b2)) return 0; + if (b0 < 0xE0) return ((uint)(b0 & 0x1F) << 16) | ((uint)b2 << 8) | b1; + + // 4-byte tier + if (!r.TryRead(out var b3)) return 0; + if (b0 < 0xF0) return ((uint)(b0 & 0x0F) << 24) | ((uint)b3 << 16) | ((uint)b2 << 8) | b1; + + // 5-byte tier (prefix nibble unused) + if (!r.TryRead(out var b4)) return 0; + return ((uint)b4 << 24) | ((uint)b3 << 16) | ((uint)b2 << 8) | b1; } protected static string ReadString(ref SequenceReader r) diff --git a/AyCode.Services/SignalRs/AcSignalRClientBase.cs b/AyCode.Services/SignalRs/AcSignalRClientBase.cs index 06c9022..9680a5f 100644 --- a/AyCode.Services/SignalRs/AcSignalRClientBase.cs +++ b/AyCode.Services/SignalRs/AcSignalRClientBase.cs @@ -194,9 +194,11 @@ namespace AyCode.Services.SignalRs yield break; } + // Heterogeneous `object?` projection — each p must be serialized with its runtime type, + // otherwise the generic SerializeToBinary(T) would infer T = object on every element. var msgBytes = contextParams is { Length: > 0 } ? SignalRSerializationHelper.SerializeToBinary( - contextParams.Select(p => SignalRSerializationHelper.SerializeToBinary(p)).ToArray()) + contextParams.Select(p => SignalRSerializationHelper.SerializeToBinary(p, p?.GetType() ?? typeof(object))).ToArray()) : null; var stream = HubConnection.StreamAsync( diff --git a/AyCode.Services/SignalRs/AyCodeBinaryHubProtocol.cs b/AyCode.Services/SignalRs/AyCodeBinaryHubProtocol.cs index 804ec75..56fe1c2 100644 --- a/AyCode.Services/SignalRs/AyCodeBinaryHubProtocol.cs +++ b/AyCode.Services/SignalRs/AyCodeBinaryHubProtocol.cs @@ -170,6 +170,8 @@ public class AyCodeBinaryHubProtocol : AcBinaryHubProtocol protected override object? ReadSingleArgument(ref SequenceReader r, Type targetType, object? headerContext) { r.TryReadLittleEndian(out int argLength); + DebugLogArgument(targetType, argLength, r.Remaining); + if (argLength == 0) return null; diff --git a/AyCode.Services/SignalRs/ISignalParams.cs b/AyCode.Services/SignalRs/ISignalParams.cs index 384c1e9..1a80670 100644 --- a/AyCode.Services/SignalRs/ISignalParams.cs +++ b/AyCode.Services/SignalRs/ISignalParams.cs @@ -2,6 +2,7 @@ using System.Reflection; using AyCode.Core.Extensions; using AyCode.Core.Serializers; using AyCode.Core.Serializers.Attributes; +using AyCode.Core.Serializers.Binaries; namespace AyCode.Services.SignalRs; @@ -60,7 +61,13 @@ public class SignalParams : ISignalParams // N× pool roundtrip — see PERF note in summary var paramBytes = new byte[parameters.Length][]; for (var i = 0; i < parameters.Length; i++) - paramBytes[i] = parameters[i].ToBinary(); + { + // Pass explicit runtime type — parameters[i] is statically object, so the generic + // ToBinary() overload would infer T = object and emit an object-typed wire payload + // (instead of the concrete int/string/DTO type). The server's GetParameterValues then + // deserializes to default(targetType) → 0 for int, null for reference types, etc. + paramBytes[i] = parameters[i].ToBinary(parameters[i]?.GetType() ?? typeof(object)); + } _parameterValues = paramBytes; Parameters = paramBytes.ToBinary(); diff --git a/AyCode.Services/SignalRs/SignalRSerializationHelper.cs b/AyCode.Services/SignalRs/SignalRSerializationHelper.cs index acc38a4..7fd8866 100644 --- a/AyCode.Services/SignalRs/SignalRSerializationHelper.cs +++ b/AyCode.Services/SignalRs/SignalRSerializationHelper.cs @@ -1,4 +1,5 @@ using System.Buffers; +using System.Diagnostics.CodeAnalysis; using System.Runtime.CompilerServices; using AyCode.Core.Compression; using AyCode.Core.Extensions; @@ -80,6 +81,20 @@ public static class SignalRSerializationHelper value.ToBinary(writer, options ?? AcBinarySerializerOptions.Default); } + /// + /// Serialize object to binary with explicit runtime type. Use this overload at heterogeneous + /// object? call sites where the generic + /// would infer T = object and emit an object-typed wire payload instead of the concrete + /// runtime type's encoding. Typical use: SerializeToBinary(value, value.GetType()). + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static byte[] SerializeToBinary(object? value, [DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicProperties)] Type type, AcBinarySerializerOptions? options = null) + { + var writer = new ArrayBufferWriter(256); + value.ToBinary(type, writer, options ?? AcBinarySerializerOptions.Default); + return writer.WrittenSpan.ToArray(); + } + /// /// Deserialize binary data to object. /// @@ -160,7 +175,9 @@ public static class SignalRSerializationHelper return byteData; var binaryOptions = serializerOptions as AcBinarySerializerOptions ?? AcBinarySerializerOptions.Default; - return SerializeToBinary(responseData, binaryOptions); + // Explicit runtime type — responseData is statically object?, so the generic + // SerializeToBinary(T) overload would infer T = object and emit object-typed bytes. + return SerializeToBinary(responseData, responseData.GetType(), binaryOptions); } // JSON mode with Brotli compression diff --git a/AyCode.Services/docs/SIGNALR_BINARY_PROTOCOL/SIGNALR_BINARY_PROTOCOL_VARUINT.md b/AyCode.Services/docs/SIGNALR_BINARY_PROTOCOL/SIGNALR_BINARY_PROTOCOL_VARUINT.md new file mode 100644 index 0000000..5b3c6f4 --- /dev/null +++ b/AyCode.Services/docs/SIGNALR_BINARY_PROTOCOL/SIGNALR_BINARY_PROTOCOL_VARUINT.md @@ -0,0 +1,223 @@ +# SignalR Binary Protocol — VarUInt Frame Marker + +Implementation plan for replacing the fixed-width `[201][UINT16 size][data]` chunk-data +frame with a unified VarUInt-based marker scheme. Single-grammar dispatch: the first +VarUInt encodes either a sentinel marker (1-4) or the chunk-data body length (≥5). + +> Related: `SIGNALR_BINARY_PROTOCOL_ISSUES.md` (B7K9 cap, Z2X9 cancel-recovery), +> `../../../../AyCode.Core/AyCode.Core/docs/BINARY/BINARY_ASYNCPIPE_ISSUES.md` +> (streaming I/O layer issues). + +## Motivation + +Three coordinated wins from a single wire-format change: + +1. **Wire-size reduction across all typical chunk-size tiers** — `-1 byte / chunk` + on the default 4 KB config, `-2 byte / chunk` on small (<128 B) chunks. +2. **65 KB single-value cap removed** (`ACCORE-BIN-I-B7K9`) — the UINT16 wire-field + constraint is gone; large string/byte[] property values fit a single chunk via + wider VarUInt encoding (up to ~4 GB / chunk). +3. **CHUNK_ABORT becomes a real wire marker** (`ACCORE-BIN-I-Z2X9`) — the + currently transport-layer-only abort signal moves to the wire format, enabling + in-band cancel/timeout recovery without TCP-disconnect ceremony. + +The hot-path frame decode collapses to a single VarUInt-read plus one branch +(`first >= 5 → CHUNK_DATA, body length = first`), the dominant case on any +typical workload. + +## Wire-format scheme + +``` +VarUInt(1) → CHUNK_START (followed by the standard SignalR length-prefixed message body) +VarUInt(2) → CHUNK_END (no body — end-of-message) +VarUInt(3) → CHUNK_ABORT (no body — sender-side cancel; receiver discards in-progress message) +VarUInt(4) → CHUNK_DATA (followed by a separate VarUInt body length, 0..4 byte body) +VarUInt(N≥5) → CHUNK_DATA (body length = N, body bytes follow) +``` + +Notes: +- `VarUInt(0)` is **never valid on the wire**. A decoded 0 signals stream corruption + → defensive `throw InvalidDataException`. One `cmp` instruction in the reader. +- `VarUInt(4)` is the explicit small-body CHUNK_DATA marker — covers `body length ∈ [0..4]` + which would otherwise collide with the sentinel range. Adding 1 byte for marker + but enabling 0..4-byte bodies (keepalive / flush / tiny payload) is still a net + size win vs the legacy 3-byte header. +- The marker space (1-4) and the body-length space (≥5) share the same VarUInt + grammar — the reader does not need to branch between "marker-byte then length" + vs "length-byte"; one read, one switch. + +### CHUNK_START semantics (unchanged from current) + +After `VarUInt(1)`, the bytes that follow are the standard SignalR length-prefixed +message envelope: `[INT32 LE payloadLength][payload bytes]`. The payload itself is +the InvocationMessage / CompletionMessage / etc. encoded per the existing rules +(streamed-arg-placeholder marker `INT32 -1`, args, headers). No change to that +inner structure. + +### Outer SignalR transport prefix (unchanged) + +The transport-level `[INT32 LE payloadLength]` that Kestrel / WebSocket pipes +expect at the head of every hub-protocol message **stays** — this is a SignalR +protocol contract, not part of the AcBinary chunk framing. The VarUInt scheme +described here lives **inside** that payload. + +## Size matrix — old vs new + +| Body length | Legacy (`[201][UINT16 size]`) | New | Δ | +|---|---|---|---| +| 0..4 | 3 byte | 2 byte (marker `4` + VarUInt 0..4) | **−1** | +| 5..127 | 3 byte | 1 byte (VarUInt 1-byte tier) | **−2** | +| 128..16 383 | 3 byte | 2 byte (VarUInt 2-byte tier) | **−1** | +| 16 384..65 535 | 3 byte | 3 byte (VarUInt 3-byte tier) | same | +| 65 536..2 097 151 | **not representable** | 3 byte | new capability | +| 2 097 152..2³² − 1 | **not representable** | 4-5 byte | new capability | + +Realistic chunk-size configurations: + +| Chunk-size | Body length tier | New header bytes | Δ vs legacy 3-byte | +|---|---|---|---| +| Tiny 64 B | 1-byte VarUInt | 1 | **−2** | +| Default 4 KB | 2-byte VarUInt | 2 | **−1** | +| Large 32 KB | 2-byte VarUInt | 2 | **−1** | +| Maximum 64 KB (legacy cap) | 3-byte VarUInt | 3 | same | +| > 64 KB (new tier) | 3-5 byte VarUInt | 3-5 | enables large single-chunk values | + +The default 4 KB config — the typical SignalR deployment — saves 1 byte per chunk. +A 275 KB payload at 4 KB chunks ≈ 68 chunks → **−68 byte / message** wire overhead +reduction. Per-message marginal, but cumulative on high-throughput streams. + +## Reader hot path + +```csharp +var first = context.ReadVarUInt(); + +if (first >= 5) +{ + // CHUNK_DATA fast path — body length = first + ProcessChunkData(bodyLength: (int)first); + return; +} + +// Rare branches — sentinel markers + small-body CHUNK_DATA +switch (first) +{ + case 1: HandleChunkStart(); break; + case 2: HandleChunkEnd(); break; + case 3: HandleChunkAbort(); break; + case 4: + var length = context.ReadVarUInt(); + ProcessChunkData(bodyLength: (int)length); + break; + default: + // first == 0 — corruption signal + throw new InvalidDataException("Invalid AcBinary chunk frame marker (0)."); +} +``` + +The `first >= 5` branch is the dominant hot path (>99% on typical workloads, +because chunk sizes are configured to ≥ 64 B and almost always exceed 5 bytes +of body content). One branch + one VarUInt read — minimal frame-decode cost. + +VarUInt-decode cost vs the legacy `[201][UINT16]` fixed read: marginally higher +on small-tier (1-2 byte VarUInts), comparable on 2-3 byte VarUInts. The wire-size +savings on the network compensate for any single-digit-nanosecond decode-time +difference. Benchmark validation recommended at impl time but not expected to +gate the change. + +## Cross-cutting issue resolutions + +The wire-format change closes two pre-existing open issues without separate fixes. + +### `ACCORE-BIN-I-B7K9` — 65 KB single-value cap removed + +The legacy `[201][UINT16 size]` field caps any single chunk's data payload at +65 535 bytes. The issue documents the workaround direction (producer-side +"transparent split-on-commit" with `ArrayPool`-rented owned buffer) as a complex +refactor. The new VarUInt scheme makes that **unnecessary** — a single chunk +can carry up to ~4 GB of body, so any practical large string / byte[] property +fits without splitting. + +The proposed B7K9 producer-side refactor is therefore **scope-eliminated** by +this change. The current temporary sanity guard +(`BinarySerializationContext.EnsureCapacity` → `GrowAndValidate` → +`ThrowGrowFailedToSatisfy`) can stay as a defensive check, but the underlying +cap it surfaces no longer exists. + +### `ACCORE-BIN-I-Z2X9` — Built-in cancel/timeout recovery + +The current wire-format has no marker for "abort current message, restart fresh". +The issue documents that: +> If a sender starts writing a multi-chunk message and then aborts (no [202] +> follows), the receiver's framing state machine is stuck in `AwaitingData` ... +> When the next message's [201] header byte arrives, the state machine +> interprets it as data — silent corruption follows. + +The new `VarUInt(3) → CHUNK_ABORT` marker provides the in-band abort signal: + +- Sender mid-message → emits `VarUInt(3)` → receiver resets its framing state to + `AwaitingHeader`, discards the in-progress message buffer, fires the abort + callback (`AyCodeBinaryHubProtocol.OnChunkAbort` already exists for this). +- No transport-disconnect needed. The connection stays open for the next message. + +This resolves Z2X9 without a separate fix and removes the +"transport-layer-recovery is the contract" workaround from the receiver. + +## Implementation scope + +| File | Change | +|---|---| +| `AyCode.Services/SignalRs/AcBinaryHubProtocol.cs` | Constants: replace `MsgAsyncChunkStart=200` / `MsgAsyncChunkEnd=202` byte constants with VarUInt-encoded sentinels (1, 2). Add `MsgAsyncChunkAbort=3`. Add `MsgAsyncChunkDataSmall=4`. `WriteMessageChunked` — emit start/end via VarUInt. `TryParseChunkData` / `TryParseMessage` — VarUInt-based dispatch. | +| `AyCode.Core/Serializers/Binaries/AsyncPipeWriterOutput.cs` | CHUNK_DATA frame emit: replace `WriteByte(201) + WriteUInt16LittleEndian(size)` with `WriteVarUInt(size)` for the `≥5` fast path, `WriteVarUInt(4) + WriteVarUInt(size)` for the small-body path. `[202]` end-marker → `WriteVarUInt(2)`. Add `WriteVarUInt(3)` ABORT-emit path. | +| `AyCode.Core/Serializers/Binaries/AsyncPipeReaderInput.cs` | `Feed` framing state machine: replace `[201/202]` byte-marker dispatch with VarUInt-decode + sentinel/length branch. Defensive `VarUInt(0) → throw`. ABORT-marker (`VarUInt(3)`) → reset state + fire abort callback. | +| `SIGNALR_BINARY_PROTOCOL/README.md` | Wire-format spec section update — replace `[201][UINT16][data]...[202]` documentation with the new VarUInt scheme. Update the "BinaryProtocolMode" / "AsyncSegment" section. | +| `BINARY_FORMAT.md` | **Not affected** — this is SignalR-protocol-layer framing, not AcBinary inner format. | +| Tests | Round-trip tests across all body-length tiers (0..4, 5..127, 128..16383, 16384..65535, 65536+ new tier). Corruption-detection test (wire-byte 0 → throw). ABORT-flow integration test (sender abort → receiver state reset). | + +Estimated effort: ~1-2 days for the wire-format change + state machine update, +plus test coverage. Low blast radius — all changes localized to three files + +wire-format spec doc. + +## Defensive validation + +### VarUInt(0) on the wire — corruption + +VarUInt encoding never produces a 0-valued first byte for a non-zero number, +and the protocol never legitimately emits the value 0 (no sentinel maps to it, +no chunk has negative-length body). A decoded 0 indicates either a corrupted +stream or a sender-side encoder bug. Reader response: + +```csharp +if (first == 0) + throw new InvalidDataException( + "AcBinary SignalR frame: invalid VarUInt(0) marker — stream corruption suspected."); +``` + +One `cmp` in the hot path's else-branch (the rare-marker switch's `default`). + +### Robustness vs corruption mid-stream + +Same property as the legacy `[201/202]` byte-marker format: once the framing +state machine loses sync, it cannot self-recover from a corrupted byte position +without external resynchronization (e.g. transport disconnect + reconnect). The +new scheme does not regress this — both formats rely on the same +"transport-layer-recovery is the contract" semantics, except the ABORT-marker +now provides an explicit in-band fast-path for known-sender-side aborts (no +disconnect needed for that case). + +## Open considerations + +- **VarUInt encoding choice** — the existing AcBinary `WriteVarUInt` / + `ReadVarUInt` implementations are used. No new encoding helpers. +- **Performance regression check** — BDN benchmark comparing legacy-byte-marker + vs VarUInt-marker decode cost on a representative chunk-stream. Expected: ≤2% + decode-time regression, offset by wire-size savings on real network. +- **ABORT-callback semantics** — `AyCodeBinaryHubProtocol.OnChunkAbort` already + exists for the transport-disconnect path; the in-band marker reuses the same + callback, no consumer-side change needed. + +## References + +- `SIGNALR_BINARY_PROTOCOL_ISSUES.md#accore-bin-i-b7k9` — 65 KB cap (resolved by this) +- `../../../../AyCode.Core/AyCode.Core/docs/BINARY/BINARY_ASYNCPIPE_ISSUES.md#accore-bin-i-z2x9` — cancel recovery (resolved by this) +- `SIGNALR_BINARY_PROTOCOL_ISSUES.md#accore-bin-i-m9p3` — chunk multiplexing (separate concern, not addressed here) +- `README.md` — current wire-format spec (to be updated on landing)