From 81bc41c1188ddd9a944b926a5d5017365e4265a5 Mon Sep 17 00:00:00 2001 From: Loretta Date: Sun, 10 May 2026 15:59:31 +0200 Subject: [PATCH] [LOADED_DOCS: 2 files, no new loads] FastWire: Add markerless string encoding/decoding Introduced a markerless FastWire path for string properties and collection elements in AcBinary serialization. Strings are now encoded with a 4-byte int32 sentinel header (-1=null, 0=empty, N>0=content) and UTF-16 bytes, eliminating the type code marker in FastWire mode. Updated code generation, runtime, and documentation to support this, while preserving Compact mode behavior and cross-mode compatibility. --- .../AcBinarySourceGenerator.cs | 52 ++++++++++++++++--- ...lizer.BinaryDeserializationContext.Read.cs | 52 ++++++++++++++----- .../Binaries/AcBinaryDeserializer.Populate.cs | 13 +++++ .../Binaries/AcBinaryDeserializer.cs | 17 +++++- ...rySerializer.BinarySerializationContext.cs | 42 ++++++++++----- .../Binaries/AcBinarySerializer.cs | 20 ++++++- 6 files changed, 159 insertions(+), 37 deletions(-) diff --git a/AyCode.Core.Serializers.SourceGenerator/AcBinarySourceGenerator.cs b/AyCode.Core.Serializers.SourceGenerator/AcBinarySourceGenerator.cs index 09b6611..7be815a 100644 --- a/AyCode.Core.Serializers.SourceGenerator/AcBinarySourceGenerator.cs +++ b/AyCode.Core.Serializers.SourceGenerator/AcBinarySourceGenerator.cs @@ -1782,6 +1782,26 @@ public class AcBinarySourceGenerator : IIncrementalGenerator return; } + // String FastWire markerless fast-path: int32 sentinel header (-1 = null, 0 = empty, N > 0 = content). + // Wire-symmetric with `WriteStringGenerated` (SGen) and `WriteStringUtf16Markerless` (Runtime). + // Skips the typeCode-read entirely in FastWire mode; falls through to markered dispatch in Compact. + if (p.TypeKind == PropertyTypeKind.String) + { + sb.AppendLine($"{i}if (context.FastWire)"); + sb.AppendLine($"{i}{{"); + sb.AppendLine($"{i} {a} = context.ReadStringUtf16Markerless()!;"); + sb.AppendLine($"{i}}}"); + sb.AppendLine($"{i}else"); + sb.AppendLine($"{i}{{"); + sb.AppendLine($"{i} var tc_{p.Name} = context.ReadByte();"); + sb.AppendLine($"{i} if (tc_{p.Name} != BinaryTypeCode.PropertySkip)"); + sb.AppendLine($"{i} {{"); + EmitReadString(sb, a, $"tc_{p.Name}", i + " "); + sb.AppendLine($"{i} }}"); + sb.AppendLine($"{i}}}"); + return; + } + // Markered types: read type code, then dispatch var tc = $"tc_{p.Name}"; sb.AppendLine($"{i}var {tc} = context.ReadByte();"); @@ -1888,8 +1908,11 @@ public class AcBinarySourceGenerator : IIncrementalGenerator sb.AppendLine($"{i} {{"); sb.AppendLine($"{i} if (context.FastWire)"); sb.AppendLine($"{i} {{"); + sb.AppendLine($"{i} // Collection/dictionary element strings: markered FastWire body — int32 charLen + UTF-16 bytes."); + sb.AppendLine($"{i} // (Property-level strings take a separate markerless path in EmitReadProp; this case handles"); + sb.AppendLine($"{i} // the markered StringSmall variant emitted by WriteStringWithDispatch from collection/runtime paths.)"); sb.AppendLine($"{i} var fwlen = context.ReadInt32Unsafe();"); - sb.AppendLine($"{i} {a} = fwlen == 0 ? string.Empty : context.ReadStringUtf8(fwlen);"); + sb.AppendLine($"{i} {a} = context.ReadStringUtf16(fwlen);"); sb.AppendLine($"{i} }}"); sb.AppendLine($"{i} else"); sb.AppendLine($"{i} {{"); @@ -2442,25 +2465,38 @@ public class AcBinarySourceGenerator : IIncrementalGenerator /// private static void EmitReadNonComplexCollectionElement(StringBuilder sb, PropInfo p, string indexVar, string propSuffix, string i, bool isArray, string? addMethod) { - var etc = $"etc_{propSuffix}"; - sb.AppendLine($"{i}var {etc} = context.ReadByte();"); - var addCall = addMethod ?? "Add"; var elemType = p.ElementFullTypeName!; var colRef = $"col_{propSuffix}"; + // String element FastWire markerless fast-path — same wire as property-level (int32 sentinel header). + // All FastWire string writes funnel through `WriteStringWithDispatch.FastWire = WriteStringUtf16Markerless`, + // so collection elements use the same markerless format. Skips the etc-read entirely in FastWire mode. if (p.ElementKind == PropertyTypeKind.String) { - // String element: FixStr / String / StringInternFirst / StringInterned / Null / StringEmpty var tempVar = $"sv_{propSuffix}"; - sb.AppendLine($"{i}string? {tempVar} = null;"); - EmitReadString(sb, tempVar, etc, i); + sb.AppendLine($"{i}string? {tempVar};"); + sb.AppendLine($"{i}if (context.FastWire)"); + sb.AppendLine($"{i}{{"); + sb.AppendLine($"{i} {tempVar} = context.ReadStringUtf16Markerless();"); + sb.AppendLine($"{i}}}"); + sb.AppendLine($"{i}else"); + sb.AppendLine($"{i}{{"); + sb.AppendLine($"{i} var etc_{propSuffix} = context.ReadByte();"); + sb.AppendLine($"{i} {tempVar} = null;"); + EmitReadString(sb, tempVar, $"etc_{propSuffix}", i + " "); + sb.AppendLine($"{i}}}"); if (isArray) sb.AppendLine($"{i}{colRef}[{indexVar}] = {tempVar}!;"); else sb.AppendLine($"{i}{colRef}.{addCall}({tempVar}!);"); + return; } - else if (p.ElementKind == PropertyTypeKind.Enum) + + var etc = $"etc_{propSuffix}"; + sb.AppendLine($"{i}var {etc} = context.ReadByte();"); + + if (p.ElementKind == PropertyTypeKind.Enum) { // Enum element: Enum marker or TinyInt var tempVar = $"ev_{propSuffix}"; diff --git a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.Read.cs b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.Read.cs index 832c91f..529f924 100644 --- a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.Read.cs +++ b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.Read.cs @@ -405,6 +405,45 @@ public static partial class AcBinaryDeserializer return result; } + /// + /// Reads a UTF-16 raw string of chars (FastWire mode body). + /// Wire body is charLength * 2 raw bytes (LE on Intel/AMD, native-endian elsewhere) — zero-decode + /// memcpy via . + /// Caller MUST be on the FastWire path. The companion is + /// for Compact/UTF-8 wire only — the two paths are statically separate (no FastWire-runtime-check + /// inside this method). + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public string ReadStringUtf16(int charLength) + { + if (charLength == 0) return string.Empty; + + var byteLen = charLength * 2; + EnsureAvailable(byteLen); + + var chars = MemoryMarshal.Cast(_buffer.AsSpan(_position, byteLen)); + var value = new string(chars); + + _position += byteLen; + return value; + } + + /// + /// FastWire markerless string read — int32 sentinel header. Self-contained: handles all three + /// states (null / empty / content) via int32 dispatch. -1 = null, 0 = empty, + /// N > 0 = content (followed by N×2 UTF-16 raw bytes). + /// Hot-path-first: positive length (content) is the common case, branch-prediction-favored. + /// Companion writer is . + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public string? ReadStringUtf16Markerless() + { + var len = ReadInt32Unsafe(); + if (len > 0) return ReadStringUtf16(len); + if (len == 0) return string.Empty; + return null; // len < 0 (sentinel -1) + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public string ReadStringUtf8(int length) { @@ -413,19 +452,6 @@ public static partial class AcBinaryDeserializer return string.Empty; } - // FastWire: length is char count, data is UTF-16 (2 bytes per char) - if (FastWire) - { - var byteLen = length * 2; - EnsureAvailable(byteLen); - - var chars = MemoryMarshal.Cast(_buffer.AsSpan(_position, byteLen)); - var value = new string(chars); - - _position += byteLen; - return value; - } - EnsureAvailable(length); // WASM optimization: cache short strings to reduce allocations diff --git a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.Populate.cs b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.Populate.cs index 28a22b6..32c3de6 100644 --- a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.Populate.cs +++ b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.Populate.cs @@ -179,6 +179,19 @@ public static partial class AcBinaryDeserializer return; } + // FastWire markerless string-property fast-path — int32 sentinel header (-1 null / 0 empty / N>0 + // content). Wire-symmetric with `WriteStringGenerated` / `WritePropertyOrSkip` String case via + // `WriteStringUtf16Markerless`. Skips the typeCode-read entirely; reader-writer pair eliminates + // 1 byte per content string in FastWire mode. Condition order: bool field-load (`FastWire`) + // first → cheap short-circuit in Compact mode (most-common case in many deployments) and + // branch-predictor-stable in FastWire mode (constant for the entire Deserialize). The + // `AccessorType == String` enum-compare (2 instructions: load + cmp) only runs when needed. + if (context.FastWire && propInfo.AccessorType == PropertyAccessorType.String) + { + propInfo.SetValue(target, context.ReadStringUtf16Markerless()); + return; + } + // Read marker once — eliminates redundant PeekByte + ReadByte boundary checks. // All branches below receive the already-consumed typeCode. var typeCode = context.ReadByte(); diff --git a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs index fe20de4..2c2d28b 100644 --- a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs +++ b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs @@ -1098,6 +1098,16 @@ public static partial class AcBinaryDeserializer { if (context.IsAtEnd) return null; + // FastWire markerless string fast-path — when the dispatch target is a string, the wire is + // int32 sentinel (no marker byte). Skips the typeCode-read; companion to `WriteStringWithDispatch` + // FastWire ag (which writes via `WriteStringUtf16Markerless`). Condition order: bool field-load + // (`FastWire`) first — branch-predictor-stable; `targetType == typeof(string)` ref-equality check + // (load + cmp) only runs when FastWire is true. + if (context.FastWire && targetType == typeof(string)) + { + return context.ReadStringUtf16Markerless(); + } + var typeCode = context.ReadByte(); // Handle tiny int first (most common case for small integers, >= 192) @@ -1157,10 +1167,13 @@ public static partial class AcBinaryDeserializer { if (context.FastWire) { - // Mode-shared marker: FastWire payload is [charLen:int32 LE][UTF-16 raw bytes] + // Mode-shared marker: FastWire payload is [charLen:int32 LE][UTF-16 raw bytes]. // Fix-int charLen (matches MemPack WriteUtf16 shape) — single 4-byte read, no VarUInt loop. + // Path used by collection/dictionary element string reads (markered) and runtime path. + // SGen property-level strings take the markerless EmitReadProp path which calls + // `ReadStringUtf16` directly, bypassing the `ReadStringSmall` marker dispatch. var charLenF = context.ReadInt32Unsafe(); - return context.ReadStringUtf8(charLenF); + return context.ReadStringUtf16(charLenF); } // Compact mode — H2Q6 StringSmall: [charLen:8][utf8Len:8][bytes] diff --git a/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs b/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs index 5072623..c7368e3 100644 --- a/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs +++ b/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs @@ -651,6 +651,29 @@ public static partial class AcBinarySerializer #region String Writes — inline + /// + /// FastWire markerless string write — int32 sentinel header. Self-contained: handles all three + /// states (null / empty / content) via int32 dispatch. -1 = null, 0 = empty, + /// N > 0 = content (followed by N×2 UTF-16 raw bytes). Saves 1 byte per content string vs + /// the markered StringSmall scheme; null/empty pay +3 bytes + /// (4-byte int32 vs 1-byte marker), but null/empty are rare in typical workloads → net wire-size win. + /// Companion reader is . + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void WriteStringUtf16Markerless(string? value) + { + if (value == null) { WriteRaw(-1); return; } + var charLength = value.Length; + if (charLength == 0) { WriteRaw(0); return; } + + var byteLenF = charLength * 2; + EnsureCapacity(4 + byteLenF); + Unsafe.WriteUnaligned(ref _buffer[_position], charLength); + _position += 4; + MemoryMarshal.AsBytes(value.AsSpan()).CopyTo(_buffer.AsSpan(_position, byteLenF)); + _position += byteLenF; + } + public void WriteStringUtf8(string value) { if (FastWire) @@ -753,18 +776,13 @@ public static partial class AcBinarySerializer if (FastWire) { - // FastWire: [StringSmall marker:1][charLen:int32 LE][UTF-16 raw bytes] - // Fix-int header (no tier-dispatch, no VarUInt branch loop) — matches MemPack `WriteUtf16` - // shape (which emits a fix `int` length). Single Unsafe.WriteUnaligned store on the - // writer; symmetric ReadInt32Unsafe on the reader. - var byteLenF = charLength * 2; // safe: charLength ≤ 0x1FFFFFFF guarantees no overflow - EnsureCapacity(7 + byteLenF); - var fwPos = _position; - var packed = (ulong)BinaryTypeCode.StringSmall | ((ulong)(uint)charLength << 8); - Unsafe.WriteUnaligned(ref _buffer[fwPos], packed); - _position = fwPos + 5; - MemoryMarshal.AsBytes(value.AsSpan()).CopyTo(_buffer.AsSpan(_position, byteLenF)); - _position += byteLenF; + // FastWire markerless: int32 sentinel (-1 = null, 0 = empty, N > 0 = content + N*2 UTF-16 bytes). + // All FastWire string writes funnel through here (WriteStringGenerated → WriteString → + // WriteStringWithDispatch + WritePropertyOrSkip String case + TryWritePrimitive String case), + // so a single change here propagates markerless wire to property + collection + dictionary + + // runtime paths. Caller (WriteString) guarantees value is non-empty content; null/empty + // sentinel encoding lives inside `WriteStringUtf16Markerless` for direct callers. + WriteStringUtf16Markerless(value); return; } diff --git a/AyCode.Core/Serializers/Binaries/AcBinarySerializer.cs b/AyCode.Core/Serializers/Binaries/AcBinarySerializer.cs index c99a48d..f635814 100644 --- a/AyCode.Core/Serializers/Binaries/AcBinarySerializer.cs +++ b/AyCode.Core/Serializers/Binaries/AcBinarySerializer.cs @@ -899,12 +899,21 @@ public static partial class AcBinarySerializer /// /// Bridge for generated writers to call the runtime WriteString. - /// Matches WritePropertyOrSkip String case exactly: null → PropertySkip, empty → StringEmpty. + /// FastWire mode: markerless wire — delegates to + /// which handles all three states (null / empty / content) via int32 sentinel header. + /// Compact mode: existing markerful path — null → PropertySkip, empty → StringEmpty, + /// content → with marker dispatch. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static void WriteStringGenerated(string? value, BinarySerializationContext context) where TOutput : struct, IBinaryOutputBase { + if (context.FastWire) + { + context.WriteStringUtf16Markerless(value); + return; + } + if (string.IsNullOrEmpty(value)) { context.WriteByte(value == null ? BinaryTypeCode.PropertySkip : BinaryTypeCode.StringEmpty); @@ -1951,8 +1960,15 @@ public static partial class AcBinarySerializer return; case PropertyAccessorType.String: { - // Fast path: typed getter, no boxing, no Type.GetTypeCode() call + // Fast path: typed getter, no boxing, no Type.GetTypeCode() call. + // FastWire: markerless int32 sentinel via `WriteStringUtf16Markerless` — wire-symmetric + // with `WriteStringGenerated` (SGen) so cross-mode interop holds. Compact: existing markered. string? value = prop.GetString(obj); + if (context.FastWire) + { + context.WriteStringUtf16Markerless(value); + return; + } if (string.IsNullOrEmpty(value)) { context.WriteByte(value == null ? BinaryTypeCode.PropertySkip : BinaryTypeCode.StringEmpty);