[LOADED_DOCS: 2 files, no new loads]

FastWire: Add markerless string encoding/decoding

Introduced a markerless FastWire path for string properties and collection elements in AcBinary serialization. Strings are now encoded with a 4-byte int32 sentinel header (-1=null, 0=empty, N>0=content) and UTF-16 bytes, eliminating the type code marker in FastWire mode. Updated code generation, runtime, and documentation to support this, while preserving Compact mode behavior and cross-mode compatibility.
This commit is contained in:
Loretta 2026-05-10 15:59:31 +02:00
parent 3f20948cde
commit 81bc41c118
6 changed files with 159 additions and 37 deletions

View File

@ -1782,6 +1782,26 @@ public class AcBinarySourceGenerator : IIncrementalGenerator
return;
}
// String FastWire markerless fast-path: int32 sentinel header (-1 = null, 0 = empty, N > 0 = content).
// Wire-symmetric with `WriteStringGenerated` (SGen) and `WriteStringUtf16Markerless` (Runtime).
// Skips the typeCode-read entirely in FastWire mode; falls through to markered dispatch in Compact.
if (p.TypeKind == PropertyTypeKind.String)
{
sb.AppendLine($"{i}if (context.FastWire)");
sb.AppendLine($"{i}{{");
sb.AppendLine($"{i} {a} = context.ReadStringUtf16Markerless()!;");
sb.AppendLine($"{i}}}");
sb.AppendLine($"{i}else");
sb.AppendLine($"{i}{{");
sb.AppendLine($"{i} var tc_{p.Name} = context.ReadByte();");
sb.AppendLine($"{i} if (tc_{p.Name} != BinaryTypeCode.PropertySkip)");
sb.AppendLine($"{i} {{");
EmitReadString(sb, a, $"tc_{p.Name}", i + " ");
sb.AppendLine($"{i} }}");
sb.AppendLine($"{i}}}");
return;
}
// Markered types: read type code, then dispatch
var tc = $"tc_{p.Name}";
sb.AppendLine($"{i}var {tc} = context.ReadByte();");
@ -1888,8 +1908,11 @@ public class AcBinarySourceGenerator : IIncrementalGenerator
sb.AppendLine($"{i} {{");
sb.AppendLine($"{i} if (context.FastWire)");
sb.AppendLine($"{i} {{");
sb.AppendLine($"{i} // Collection/dictionary element strings: markered FastWire body — int32 charLen + UTF-16 bytes.");
sb.AppendLine($"{i} // (Property-level strings take a separate markerless path in EmitReadProp; this case handles");
sb.AppendLine($"{i} // the markered StringSmall variant emitted by WriteStringWithDispatch from collection/runtime paths.)");
sb.AppendLine($"{i} var fwlen = context.ReadInt32Unsafe();");
sb.AppendLine($"{i} {a} = fwlen == 0 ? string.Empty : context.ReadStringUtf8(fwlen);");
sb.AppendLine($"{i} {a} = context.ReadStringUtf16(fwlen);");
sb.AppendLine($"{i} }}");
sb.AppendLine($"{i} else");
sb.AppendLine($"{i} {{");
@ -2442,25 +2465,38 @@ public class AcBinarySourceGenerator : IIncrementalGenerator
/// </summary>
private static void EmitReadNonComplexCollectionElement(StringBuilder sb, PropInfo p, string indexVar, string propSuffix, string i, bool isArray, string? addMethod)
{
var etc = $"etc_{propSuffix}";
sb.AppendLine($"{i}var {etc} = context.ReadByte();");
var addCall = addMethod ?? "Add";
var elemType = p.ElementFullTypeName!;
var colRef = $"col_{propSuffix}";
// String element FastWire markerless fast-path — same wire as property-level (int32 sentinel header).
// All FastWire string writes funnel through `WriteStringWithDispatch.FastWire = WriteStringUtf16Markerless`,
// so collection elements use the same markerless format. Skips the etc-read entirely in FastWire mode.
if (p.ElementKind == PropertyTypeKind.String)
{
// String element: FixStr / String / StringInternFirst / StringInterned / Null / StringEmpty
var tempVar = $"sv_{propSuffix}";
sb.AppendLine($"{i}string? {tempVar} = null;");
EmitReadString(sb, tempVar, etc, i);
sb.AppendLine($"{i}string? {tempVar};");
sb.AppendLine($"{i}if (context.FastWire)");
sb.AppendLine($"{i}{{");
sb.AppendLine($"{i} {tempVar} = context.ReadStringUtf16Markerless();");
sb.AppendLine($"{i}}}");
sb.AppendLine($"{i}else");
sb.AppendLine($"{i}{{");
sb.AppendLine($"{i} var etc_{propSuffix} = context.ReadByte();");
sb.AppendLine($"{i} {tempVar} = null;");
EmitReadString(sb, tempVar, $"etc_{propSuffix}", i + " ");
sb.AppendLine($"{i}}}");
if (isArray)
sb.AppendLine($"{i}{colRef}[{indexVar}] = {tempVar}!;");
else
sb.AppendLine($"{i}{colRef}.{addCall}({tempVar}!);");
return;
}
else if (p.ElementKind == PropertyTypeKind.Enum)
var etc = $"etc_{propSuffix}";
sb.AppendLine($"{i}var {etc} = context.ReadByte();");
if (p.ElementKind == PropertyTypeKind.Enum)
{
// Enum element: Enum marker or TinyInt
var tempVar = $"ev_{propSuffix}";

View File

@ -405,18 +405,20 @@ public static partial class AcBinaryDeserializer
return result;
}
/// <summary>
/// Reads a UTF-16 raw string of <paramref name="charLength"/> chars (FastWire mode body).
/// Wire body is <c>charLength * 2</c> raw bytes (LE on Intel/AMD, native-endian elsewhere) — zero-decode
/// memcpy via <see cref="MemoryMarshal.Cast{TFrom, TTo}(System.Span{TFrom})"/>.
/// <para>Caller MUST be on the FastWire path. The companion <see cref="ReadStringUtf8"/> is
/// for Compact/UTF-8 wire only — the two paths are statically separate (no FastWire-runtime-check
/// inside this method).</para>
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public string ReadStringUtf8(int length)
public string ReadStringUtf16(int charLength)
{
if (length == 0)
{
return string.Empty;
}
if (charLength == 0) return string.Empty;
// FastWire: length is char count, data is UTF-16 (2 bytes per char)
if (FastWire)
{
var byteLen = length * 2;
var byteLen = charLength * 2;
EnsureAvailable(byteLen);
var chars = MemoryMarshal.Cast<byte, char>(_buffer.AsSpan(_position, byteLen));
@ -426,6 +428,30 @@ public static partial class AcBinaryDeserializer
return value;
}
/// <summary>
/// FastWire markerless string read — int32 sentinel header. Self-contained: handles all three
/// states (null / empty / content) via int32 dispatch. <c>-1</c> = null, <c>0</c> = empty,
/// <c>N &gt; 0</c> = content (followed by N×2 UTF-16 raw bytes).
/// <para>Hot-path-first: positive length (content) is the common case, branch-prediction-favored.
/// Companion writer is <see cref="BinarySerializationContext{TOutput}.WriteStringUtf16Markerless"/>.</para>
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public string? ReadStringUtf16Markerless()
{
var len = ReadInt32Unsafe();
if (len > 0) return ReadStringUtf16(len);
if (len == 0) return string.Empty;
return null; // len < 0 (sentinel -1)
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public string ReadStringUtf8(int length)
{
if (length == 0)
{
return string.Empty;
}
EnsureAvailable(length);
// WASM optimization: cache short strings to reduce allocations

View File

@ -179,6 +179,19 @@ public static partial class AcBinaryDeserializer
return;
}
// FastWire markerless string-property fast-path — int32 sentinel header (-1 null / 0 empty / N>0
// content). Wire-symmetric with `WriteStringGenerated` / `WritePropertyOrSkip` String case via
// `WriteStringUtf16Markerless`. Skips the typeCode-read entirely; reader-writer pair eliminates
// 1 byte per content string in FastWire mode. Condition order: bool field-load (`FastWire`)
// first → cheap short-circuit in Compact mode (most-common case in many deployments) and
// branch-predictor-stable in FastWire mode (constant for the entire Deserialize). The
// `AccessorType == String` enum-compare (2 instructions: load + cmp) only runs when needed.
if (context.FastWire && propInfo.AccessorType == PropertyAccessorType.String)
{
propInfo.SetValue(target, context.ReadStringUtf16Markerless());
return;
}
// Read marker once — eliminates redundant PeekByte + ReadByte boundary checks.
// All branches below receive the already-consumed typeCode.
var typeCode = context.ReadByte();

View File

@ -1098,6 +1098,16 @@ public static partial class AcBinaryDeserializer
{
if (context.IsAtEnd) return null;
// FastWire markerless string fast-path — when the dispatch target is a string, the wire is
// int32 sentinel (no marker byte). Skips the typeCode-read; companion to `WriteStringWithDispatch`
// FastWire ag (which writes via `WriteStringUtf16Markerless`). Condition order: bool field-load
// (`FastWire`) first — branch-predictor-stable; `targetType == typeof(string)` ref-equality check
// (load + cmp) only runs when FastWire is true.
if (context.FastWire && targetType == typeof(string))
{
return context.ReadStringUtf16Markerless();
}
var typeCode = context.ReadByte();
// Handle tiny int first (most common case for small integers, >= 192)
@ -1157,10 +1167,13 @@ public static partial class AcBinaryDeserializer
{
if (context.FastWire)
{
// Mode-shared marker: FastWire payload is [charLen:int32 LE][UTF-16 raw bytes]
// Mode-shared marker: FastWire payload is [charLen:int32 LE][UTF-16 raw bytes].
// Fix-int charLen (matches MemPack WriteUtf16 shape) — single 4-byte read, no VarUInt loop.
// Path used by collection/dictionary element string reads (markered) and runtime path.
// SGen property-level strings take the markerless EmitReadProp path which calls
// `ReadStringUtf16` directly, bypassing the `ReadStringSmall` marker dispatch.
var charLenF = context.ReadInt32Unsafe();
return context.ReadStringUtf8(charLenF);
return context.ReadStringUtf16(charLenF);
}
// Compact mode — H2Q6 StringSmall: [charLen:8][utf8Len:8][bytes]

View File

@ -651,6 +651,29 @@ public static partial class AcBinarySerializer
#region String Writes inline
/// <summary>
/// FastWire markerless string write — int32 sentinel header. Self-contained: handles all three
/// states (null / empty / content) via int32 dispatch. <c>-1</c> = null, <c>0</c> = empty,
/// <c>N &gt; 0</c> = content (followed by N×2 UTF-16 raw bytes). Saves 1 byte per content string vs
/// the markered <see cref="WriteStringWithDispatch"/> StringSmall scheme; null/empty pay +3 bytes
/// (4-byte int32 vs 1-byte marker), but null/empty are rare in typical workloads → net wire-size win.
/// Companion reader is <see cref="BinaryDeserializationContext{TInput}.ReadStringUtf16Markerless"/>.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void WriteStringUtf16Markerless(string? value)
{
if (value == null) { WriteRaw(-1); return; }
var charLength = value.Length;
if (charLength == 0) { WriteRaw(0); return; }
var byteLenF = charLength * 2;
EnsureCapacity(4 + byteLenF);
Unsafe.WriteUnaligned<int>(ref _buffer[_position], charLength);
_position += 4;
MemoryMarshal.AsBytes(value.AsSpan()).CopyTo(_buffer.AsSpan(_position, byteLenF));
_position += byteLenF;
}
public void WriteStringUtf8(string value)
{
if (FastWire)
@ -753,18 +776,13 @@ public static partial class AcBinarySerializer
if (FastWire)
{
// FastWire: [StringSmall marker:1][charLen:int32 LE][UTF-16 raw bytes]
// Fix-int header (no tier-dispatch, no VarUInt branch loop) — matches MemPack `WriteUtf16`
// shape (which emits a fix `int` length). Single Unsafe.WriteUnaligned<int> store on the
// writer; symmetric ReadInt32Unsafe on the reader.
var byteLenF = charLength * 2; // safe: charLength ≤ 0x1FFFFFFF guarantees no overflow
EnsureCapacity(7 + byteLenF);
var fwPos = _position;
var packed = (ulong)BinaryTypeCode.StringSmall | ((ulong)(uint)charLength << 8);
Unsafe.WriteUnaligned<ulong>(ref _buffer[fwPos], packed);
_position = fwPos + 5;
MemoryMarshal.AsBytes(value.AsSpan()).CopyTo(_buffer.AsSpan(_position, byteLenF));
_position += byteLenF;
// FastWire markerless: int32 sentinel (-1 = null, 0 = empty, N > 0 = content + N*2 UTF-16 bytes).
// All FastWire string writes funnel through here (WriteStringGenerated → WriteString →
// WriteStringWithDispatch + WritePropertyOrSkip String case + TryWritePrimitive String case),
// so a single change here propagates markerless wire to property + collection + dictionary +
// runtime paths. Caller (WriteString) guarantees value is non-empty content; null/empty
// sentinel encoding lives inside `WriteStringUtf16Markerless` for direct callers.
WriteStringUtf16Markerless(value);
return;
}

View File

@ -899,12 +899,21 @@ public static partial class AcBinarySerializer
/// <summary>
/// Bridge for generated writers to call the runtime WriteString.
/// Matches WritePropertyOrSkip String case exactly: null → PropertySkip, empty → StringEmpty.
/// <para>FastWire mode: markerless wire — delegates to <see cref="BinarySerializationContext{TOutput}.WriteStringUtf16Markerless"/>
/// which handles all three states (null / empty / content) via int32 sentinel header.</para>
/// <para>Compact mode: existing markerful path — null → <c>PropertySkip</c>, empty → <c>StringEmpty</c>,
/// content → <see cref="WriteString{TOutput}"/> with marker dispatch.</para>
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static void WriteStringGenerated<TOutput>(string? value, BinarySerializationContext<TOutput> context)
where TOutput : struct, IBinaryOutputBase
{
if (context.FastWire)
{
context.WriteStringUtf16Markerless(value);
return;
}
if (string.IsNullOrEmpty(value))
{
context.WriteByte(value == null ? BinaryTypeCode.PropertySkip : BinaryTypeCode.StringEmpty);
@ -1951,8 +1960,15 @@ public static partial class AcBinarySerializer
return;
case PropertyAccessorType.String:
{
// Fast path: typed getter, no boxing, no Type.GetTypeCode() call
// Fast path: typed getter, no boxing, no Type.GetTypeCode() call.
// FastWire: markerless int32 sentinel via `WriteStringUtf16Markerless` — wire-symmetric
// with `WriteStringGenerated` (SGen) so cross-mode interop holds. Compact: existing markered.
string? value = prop.GetString(obj);
if (context.FastWire)
{
context.WriteStringUtf16Markerless(value);
return;
}
if (string.IsNullOrEmpty(value))
{
context.WriteByte(value == null ? BinaryTypeCode.PropertySkip : BinaryTypeCode.StringEmpty);