[LOADED_DOCS: 2 files, no new loads]

FastWire: Add markerless string encoding/decoding

Introduced a markerless FastWire path for string properties and collection elements in AcBinary serialization. Strings are now encoded with a 4-byte int32 sentinel header (-1=null, 0=empty, N>0=content) and UTF-16 bytes, eliminating the type code marker in FastWire mode. Updated code generation, runtime, and documentation to support this, while preserving Compact mode behavior and cross-mode compatibility.
This commit is contained in:
Loretta 2026-05-10 15:59:31 +02:00
parent 3f20948cde
commit 81bc41c118
6 changed files with 159 additions and 37 deletions

View File

@ -1782,6 +1782,26 @@ public class AcBinarySourceGenerator : IIncrementalGenerator
return; return;
} }
// String FastWire markerless fast-path: int32 sentinel header (-1 = null, 0 = empty, N > 0 = content).
// Wire-symmetric with `WriteStringGenerated` (SGen) and `WriteStringUtf16Markerless` (Runtime).
// Skips the typeCode-read entirely in FastWire mode; falls through to markered dispatch in Compact.
if (p.TypeKind == PropertyTypeKind.String)
{
sb.AppendLine($"{i}if (context.FastWire)");
sb.AppendLine($"{i}{{");
sb.AppendLine($"{i} {a} = context.ReadStringUtf16Markerless()!;");
sb.AppendLine($"{i}}}");
sb.AppendLine($"{i}else");
sb.AppendLine($"{i}{{");
sb.AppendLine($"{i} var tc_{p.Name} = context.ReadByte();");
sb.AppendLine($"{i} if (tc_{p.Name} != BinaryTypeCode.PropertySkip)");
sb.AppendLine($"{i} {{");
EmitReadString(sb, a, $"tc_{p.Name}", i + " ");
sb.AppendLine($"{i} }}");
sb.AppendLine($"{i}}}");
return;
}
// Markered types: read type code, then dispatch // Markered types: read type code, then dispatch
var tc = $"tc_{p.Name}"; var tc = $"tc_{p.Name}";
sb.AppendLine($"{i}var {tc} = context.ReadByte();"); sb.AppendLine($"{i}var {tc} = context.ReadByte();");
@ -1888,8 +1908,11 @@ public class AcBinarySourceGenerator : IIncrementalGenerator
sb.AppendLine($"{i} {{"); sb.AppendLine($"{i} {{");
sb.AppendLine($"{i} if (context.FastWire)"); sb.AppendLine($"{i} if (context.FastWire)");
sb.AppendLine($"{i} {{"); sb.AppendLine($"{i} {{");
sb.AppendLine($"{i} // Collection/dictionary element strings: markered FastWire body — int32 charLen + UTF-16 bytes.");
sb.AppendLine($"{i} // (Property-level strings take a separate markerless path in EmitReadProp; this case handles");
sb.AppendLine($"{i} // the markered StringSmall variant emitted by WriteStringWithDispatch from collection/runtime paths.)");
sb.AppendLine($"{i} var fwlen = context.ReadInt32Unsafe();"); sb.AppendLine($"{i} var fwlen = context.ReadInt32Unsafe();");
sb.AppendLine($"{i} {a} = fwlen == 0 ? string.Empty : context.ReadStringUtf8(fwlen);"); sb.AppendLine($"{i} {a} = context.ReadStringUtf16(fwlen);");
sb.AppendLine($"{i} }}"); sb.AppendLine($"{i} }}");
sb.AppendLine($"{i} else"); sb.AppendLine($"{i} else");
sb.AppendLine($"{i} {{"); sb.AppendLine($"{i} {{");
@ -2442,25 +2465,38 @@ public class AcBinarySourceGenerator : IIncrementalGenerator
/// </summary> /// </summary>
private static void EmitReadNonComplexCollectionElement(StringBuilder sb, PropInfo p, string indexVar, string propSuffix, string i, bool isArray, string? addMethod) private static void EmitReadNonComplexCollectionElement(StringBuilder sb, PropInfo p, string indexVar, string propSuffix, string i, bool isArray, string? addMethod)
{ {
var etc = $"etc_{propSuffix}";
sb.AppendLine($"{i}var {etc} = context.ReadByte();");
var addCall = addMethod ?? "Add"; var addCall = addMethod ?? "Add";
var elemType = p.ElementFullTypeName!; var elemType = p.ElementFullTypeName!;
var colRef = $"col_{propSuffix}"; var colRef = $"col_{propSuffix}";
// String element FastWire markerless fast-path — same wire as property-level (int32 sentinel header).
// All FastWire string writes funnel through `WriteStringWithDispatch.FastWire = WriteStringUtf16Markerless`,
// so collection elements use the same markerless format. Skips the etc-read entirely in FastWire mode.
if (p.ElementKind == PropertyTypeKind.String) if (p.ElementKind == PropertyTypeKind.String)
{ {
// String element: FixStr / String / StringInternFirst / StringInterned / Null / StringEmpty
var tempVar = $"sv_{propSuffix}"; var tempVar = $"sv_{propSuffix}";
sb.AppendLine($"{i}string? {tempVar} = null;"); sb.AppendLine($"{i}string? {tempVar};");
EmitReadString(sb, tempVar, etc, i); sb.AppendLine($"{i}if (context.FastWire)");
sb.AppendLine($"{i}{{");
sb.AppendLine($"{i} {tempVar} = context.ReadStringUtf16Markerless();");
sb.AppendLine($"{i}}}");
sb.AppendLine($"{i}else");
sb.AppendLine($"{i}{{");
sb.AppendLine($"{i} var etc_{propSuffix} = context.ReadByte();");
sb.AppendLine($"{i} {tempVar} = null;");
EmitReadString(sb, tempVar, $"etc_{propSuffix}", i + " ");
sb.AppendLine($"{i}}}");
if (isArray) if (isArray)
sb.AppendLine($"{i}{colRef}[{indexVar}] = {tempVar}!;"); sb.AppendLine($"{i}{colRef}[{indexVar}] = {tempVar}!;");
else else
sb.AppendLine($"{i}{colRef}.{addCall}({tempVar}!);"); sb.AppendLine($"{i}{colRef}.{addCall}({tempVar}!);");
return;
} }
else if (p.ElementKind == PropertyTypeKind.Enum)
var etc = $"etc_{propSuffix}";
sb.AppendLine($"{i}var {etc} = context.ReadByte();");
if (p.ElementKind == PropertyTypeKind.Enum)
{ {
// Enum element: Enum marker or TinyInt // Enum element: Enum marker or TinyInt
var tempVar = $"ev_{propSuffix}"; var tempVar = $"ev_{propSuffix}";

View File

@ -405,6 +405,45 @@ public static partial class AcBinaryDeserializer
return result; return result;
} }
/// <summary>
/// Reads a UTF-16 raw string of <paramref name="charLength"/> chars (FastWire mode body).
/// Wire body is <c>charLength * 2</c> raw bytes (LE on Intel/AMD, native-endian elsewhere) — zero-decode
/// memcpy via <see cref="MemoryMarshal.Cast{TFrom, TTo}(System.Span{TFrom})"/>.
/// <para>Caller MUST be on the FastWire path. The companion <see cref="ReadStringUtf8"/> is
/// for Compact/UTF-8 wire only — the two paths are statically separate (no FastWire-runtime-check
/// inside this method).</para>
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public string ReadStringUtf16(int charLength)
{
if (charLength == 0) return string.Empty;
var byteLen = charLength * 2;
EnsureAvailable(byteLen);
var chars = MemoryMarshal.Cast<byte, char>(_buffer.AsSpan(_position, byteLen));
var value = new string(chars);
_position += byteLen;
return value;
}
/// <summary>
/// FastWire markerless string read — int32 sentinel header. Self-contained: handles all three
/// states (null / empty / content) via int32 dispatch. <c>-1</c> = null, <c>0</c> = empty,
/// <c>N &gt; 0</c> = content (followed by N×2 UTF-16 raw bytes).
/// <para>Hot-path-first: positive length (content) is the common case, branch-prediction-favored.
/// Companion writer is <see cref="BinarySerializationContext{TOutput}.WriteStringUtf16Markerless"/>.</para>
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public string? ReadStringUtf16Markerless()
{
var len = ReadInt32Unsafe();
if (len > 0) return ReadStringUtf16(len);
if (len == 0) return string.Empty;
return null; // len < 0 (sentinel -1)
}
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public string ReadStringUtf8(int length) public string ReadStringUtf8(int length)
{ {
@ -413,19 +452,6 @@ public static partial class AcBinaryDeserializer
return string.Empty; return string.Empty;
} }
// FastWire: length is char count, data is UTF-16 (2 bytes per char)
if (FastWire)
{
var byteLen = length * 2;
EnsureAvailable(byteLen);
var chars = MemoryMarshal.Cast<byte, char>(_buffer.AsSpan(_position, byteLen));
var value = new string(chars);
_position += byteLen;
return value;
}
EnsureAvailable(length); EnsureAvailable(length);
// WASM optimization: cache short strings to reduce allocations // WASM optimization: cache short strings to reduce allocations

View File

@ -179,6 +179,19 @@ public static partial class AcBinaryDeserializer
return; return;
} }
// FastWire markerless string-property fast-path — int32 sentinel header (-1 null / 0 empty / N>0
// content). Wire-symmetric with `WriteStringGenerated` / `WritePropertyOrSkip` String case via
// `WriteStringUtf16Markerless`. Skips the typeCode-read entirely; reader-writer pair eliminates
// 1 byte per content string in FastWire mode. Condition order: bool field-load (`FastWire`)
// first → cheap short-circuit in Compact mode (most-common case in many deployments) and
// branch-predictor-stable in FastWire mode (constant for the entire Deserialize). The
// `AccessorType == String` enum-compare (2 instructions: load + cmp) only runs when needed.
if (context.FastWire && propInfo.AccessorType == PropertyAccessorType.String)
{
propInfo.SetValue(target, context.ReadStringUtf16Markerless());
return;
}
// Read marker once — eliminates redundant PeekByte + ReadByte boundary checks. // Read marker once — eliminates redundant PeekByte + ReadByte boundary checks.
// All branches below receive the already-consumed typeCode. // All branches below receive the already-consumed typeCode.
var typeCode = context.ReadByte(); var typeCode = context.ReadByte();

View File

@ -1098,6 +1098,16 @@ public static partial class AcBinaryDeserializer
{ {
if (context.IsAtEnd) return null; if (context.IsAtEnd) return null;
// FastWire markerless string fast-path — when the dispatch target is a string, the wire is
// int32 sentinel (no marker byte). Skips the typeCode-read; companion to `WriteStringWithDispatch`
// FastWire ag (which writes via `WriteStringUtf16Markerless`). Condition order: bool field-load
// (`FastWire`) first — branch-predictor-stable; `targetType == typeof(string)` ref-equality check
// (load + cmp) only runs when FastWire is true.
if (context.FastWire && targetType == typeof(string))
{
return context.ReadStringUtf16Markerless();
}
var typeCode = context.ReadByte(); var typeCode = context.ReadByte();
// Handle tiny int first (most common case for small integers, >= 192) // Handle tiny int first (most common case for small integers, >= 192)
@ -1157,10 +1167,13 @@ public static partial class AcBinaryDeserializer
{ {
if (context.FastWire) if (context.FastWire)
{ {
// Mode-shared marker: FastWire payload is [charLen:int32 LE][UTF-16 raw bytes] // Mode-shared marker: FastWire payload is [charLen:int32 LE][UTF-16 raw bytes].
// Fix-int charLen (matches MemPack WriteUtf16 shape) — single 4-byte read, no VarUInt loop. // Fix-int charLen (matches MemPack WriteUtf16 shape) — single 4-byte read, no VarUInt loop.
// Path used by collection/dictionary element string reads (markered) and runtime path.
// SGen property-level strings take the markerless EmitReadProp path which calls
// `ReadStringUtf16` directly, bypassing the `ReadStringSmall` marker dispatch.
var charLenF = context.ReadInt32Unsafe(); var charLenF = context.ReadInt32Unsafe();
return context.ReadStringUtf8(charLenF); return context.ReadStringUtf16(charLenF);
} }
// Compact mode — H2Q6 StringSmall: [charLen:8][utf8Len:8][bytes] // Compact mode — H2Q6 StringSmall: [charLen:8][utf8Len:8][bytes]

View File

@ -651,6 +651,29 @@ public static partial class AcBinarySerializer
#region String Writes inline #region String Writes inline
/// <summary>
/// FastWire markerless string write — int32 sentinel header. Self-contained: handles all three
/// states (null / empty / content) via int32 dispatch. <c>-1</c> = null, <c>0</c> = empty,
/// <c>N &gt; 0</c> = content (followed by N×2 UTF-16 raw bytes). Saves 1 byte per content string vs
/// the markered <see cref="WriteStringWithDispatch"/> StringSmall scheme; null/empty pay +3 bytes
/// (4-byte int32 vs 1-byte marker), but null/empty are rare in typical workloads → net wire-size win.
/// Companion reader is <see cref="BinaryDeserializationContext{TInput}.ReadStringUtf16Markerless"/>.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void WriteStringUtf16Markerless(string? value)
{
if (value == null) { WriteRaw(-1); return; }
var charLength = value.Length;
if (charLength == 0) { WriteRaw(0); return; }
var byteLenF = charLength * 2;
EnsureCapacity(4 + byteLenF);
Unsafe.WriteUnaligned<int>(ref _buffer[_position], charLength);
_position += 4;
MemoryMarshal.AsBytes(value.AsSpan()).CopyTo(_buffer.AsSpan(_position, byteLenF));
_position += byteLenF;
}
public void WriteStringUtf8(string value) public void WriteStringUtf8(string value)
{ {
if (FastWire) if (FastWire)
@ -753,18 +776,13 @@ public static partial class AcBinarySerializer
if (FastWire) if (FastWire)
{ {
// FastWire: [StringSmall marker:1][charLen:int32 LE][UTF-16 raw bytes] // FastWire markerless: int32 sentinel (-1 = null, 0 = empty, N > 0 = content + N*2 UTF-16 bytes).
// Fix-int header (no tier-dispatch, no VarUInt branch loop) — matches MemPack `WriteUtf16` // All FastWire string writes funnel through here (WriteStringGenerated → WriteString →
// shape (which emits a fix `int` length). Single Unsafe.WriteUnaligned<int> store on the // WriteStringWithDispatch + WritePropertyOrSkip String case + TryWritePrimitive String case),
// writer; symmetric ReadInt32Unsafe on the reader. // so a single change here propagates markerless wire to property + collection + dictionary +
var byteLenF = charLength * 2; // safe: charLength ≤ 0x1FFFFFFF guarantees no overflow // runtime paths. Caller (WriteString) guarantees value is non-empty content; null/empty
EnsureCapacity(7 + byteLenF); // sentinel encoding lives inside `WriteStringUtf16Markerless` for direct callers.
var fwPos = _position; WriteStringUtf16Markerless(value);
var packed = (ulong)BinaryTypeCode.StringSmall | ((ulong)(uint)charLength << 8);
Unsafe.WriteUnaligned<ulong>(ref _buffer[fwPos], packed);
_position = fwPos + 5;
MemoryMarshal.AsBytes(value.AsSpan()).CopyTo(_buffer.AsSpan(_position, byteLenF));
_position += byteLenF;
return; return;
} }

View File

@ -899,12 +899,21 @@ public static partial class AcBinarySerializer
/// <summary> /// <summary>
/// Bridge for generated writers to call the runtime WriteString. /// Bridge for generated writers to call the runtime WriteString.
/// Matches WritePropertyOrSkip String case exactly: null → PropertySkip, empty → StringEmpty. /// <para>FastWire mode: markerless wire — delegates to <see cref="BinarySerializationContext{TOutput}.WriteStringUtf16Markerless"/>
/// which handles all three states (null / empty / content) via int32 sentinel header.</para>
/// <para>Compact mode: existing markerful path — null → <c>PropertySkip</c>, empty → <c>StringEmpty</c>,
/// content → <see cref="WriteString{TOutput}"/> with marker dispatch.</para>
/// </summary> /// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static void WriteStringGenerated<TOutput>(string? value, BinarySerializationContext<TOutput> context) internal static void WriteStringGenerated<TOutput>(string? value, BinarySerializationContext<TOutput> context)
where TOutput : struct, IBinaryOutputBase where TOutput : struct, IBinaryOutputBase
{ {
if (context.FastWire)
{
context.WriteStringUtf16Markerless(value);
return;
}
if (string.IsNullOrEmpty(value)) if (string.IsNullOrEmpty(value))
{ {
context.WriteByte(value == null ? BinaryTypeCode.PropertySkip : BinaryTypeCode.StringEmpty); context.WriteByte(value == null ? BinaryTypeCode.PropertySkip : BinaryTypeCode.StringEmpty);
@ -1951,8 +1960,15 @@ public static partial class AcBinarySerializer
return; return;
case PropertyAccessorType.String: case PropertyAccessorType.String:
{ {
// Fast path: typed getter, no boxing, no Type.GetTypeCode() call // Fast path: typed getter, no boxing, no Type.GetTypeCode() call.
// FastWire: markerless int32 sentinel via `WriteStringUtf16Markerless` — wire-symmetric
// with `WriteStringGenerated` (SGen) so cross-mode interop holds. Compact: existing markered.
string? value = prop.GetString(obj); string? value = prop.GetString(obj);
if (context.FastWire)
{
context.WriteStringUtf16Markerless(value);
return;
}
if (string.IsNullOrEmpty(value)) if (string.IsNullOrEmpty(value))
{ {
context.WriteByte(value == null ? BinaryTypeCode.PropertySkip : BinaryTypeCode.StringEmpty); context.WriteByte(value == null ? BinaryTypeCode.PropertySkip : BinaryTypeCode.StringEmpty);