[LOADED_DOCS: 3 files, no new loads]
SIMD Utf8Transcoder.GetUtf8ByteCount + test suite Introduced SIMD-accelerated Utf8Transcoder.GetUtf8ByteCount for efficient UTF-8 byte counting, replacing all writer-side Encoding.UTF8.GetByteCount usages. Added 29 unit tests for correctness across ASCII, Hungarian, CJK, emoji, and boundary cases. Updated benchmarks to ensure FixStr is bypassed and wire mode is selectable. Documented implementation and dead-code review in BINARY_TODO.md. No public API changes.
This commit is contained in:
parent
304a4a7bdb
commit
abee22b31a
|
|
@ -166,5 +166,4 @@ Full doctrine: `../docs/ARCHITECTURE.md#framework-vs-consumer-boundary`
|
|||
19. **Documentation layering** — write `.md` documentation at the **defining layer** (where the code lives). Higher-layer `.md` files reference the base docs (e.g. `see AyCode.Services/docs/SIGNALR/README.md`) and document only project-specific overrides or extensions. Never duplicate base-layer descriptions in consumer-level docs.
|
||||
20. **Do not re-read .md files** already in your context window. They only change if you modify them yourself (new content is already in context) or if the developer tells you they changed — in that case re-read them once.
|
||||
21. **Folder navigation** — start from the root `README.md` for solution-level navigation. When you need to understand a folder's contents or find a type/class, read the `README.md` in that folder first — it indexes the local files and sub-folders. Follow this before grepping or reading source files.
|
||||
|
||||
22. **Language Preference**: Communicate in Hungarian as requested by the user.
|
||||
|
|
|
|||
|
|
@ -47,9 +47,13 @@ public static class Program
|
|||
#else
|
||||
private static int WarmupIterations = 10000; //5000
|
||||
private static int TestIterations = 1000; //1000
|
||||
private static int BenchmarkSamples = 3;
|
||||
private static int BenchmarkSamples = 5;
|
||||
#endif
|
||||
|
||||
// Interactive settings: selected AcBinary wire mode for benchmark runs.
|
||||
// 1 = Compact, 2 = Fast
|
||||
private static WireMode SelectedWireMode = WireMode.Compact;
|
||||
|
||||
// Serializer name constants
|
||||
// Engine identifiers (used in Engine column + comparison logic)
|
||||
private const string EngineAcBinary = "AcBinary";
|
||||
|
|
@ -480,21 +484,22 @@ public static class Program
|
|||
private static List<ISerializerBenchmark> CreateSerializers(TestDataSet testData, string serializerMode)
|
||||
{
|
||||
// FastestByte mode — focused 1:1 comparison on the "fastest Byte[]" path.
|
||||
// THREE benchmarks: AcBinary FastMode Byte[] (Compact UTF-8) + AcBinary FastMode Byte[]
|
||||
// (WireMode.Fast = UTF-16 raw memcpy) + MemoryPack Byte[]. Shows BOTH sides of AcBinary's
|
||||
// positioning vs MemPack:
|
||||
// - Compact: smallest wire, UTF-8 encode/decode CPU cost
|
||||
// - Fast (UTF-16 raw): comparable wire to MemPack, no encoding cost
|
||||
// TWO benchmarks: AcBinary FastMode Byte[] (Compact UTF-8) + MemoryPack Byte[].
|
||||
// - Compact: smallest wire, UTF-8 encode/decode CPU cost vs MemPack head-to-head.
|
||||
// Tight optimization-iteration loop: ~30-45 sec vs full 2-3 min.
|
||||
//
|
||||
// FastWire row (UTF-16 raw memcpy) commented out for the current optimization sprint —
|
||||
// we are tuning Compact mode against MemPack directly; FastWire was used as a noise-floor
|
||||
// reference earlier. Re-enable when revisiting Fast wire-mode performance.
|
||||
if (serializerMode == "fastestbyte")
|
||||
{
|
||||
var fastWireOptions = AcBinarySerializerOptions.FastMode;
|
||||
fastWireOptions.WireMode = WireMode.Fast;
|
||||
var fastestByteOptions = AcBinarySerializerOptions.FastMode;
|
||||
fastestByteOptions.WireMode = SelectedWireMode;
|
||||
|
||||
return new List<ISerializerBenchmark>
|
||||
{
|
||||
new AcBinaryBenchmark(testData.Order, AcBinarySerializerOptions.FastMode, "FastMode"),
|
||||
new AcBinaryBenchmark(testData.Order, fastWireOptions, "FastMode (FastWire)"),
|
||||
new AcBinaryBenchmark(testData.Order, fastestByteOptions, "FastMode"),
|
||||
//new AcBinaryBenchmark(testData.Order, fastWireOptions, "FastMode (FastWire)"),
|
||||
new MemoryPackBenchmark(testData.Order, "Default"),
|
||||
};
|
||||
}
|
||||
|
|
@ -513,6 +518,7 @@ public static class Program
|
|||
// wire chunk AND kernel transfer unit; change ONLY this line when tuning.
|
||||
var binaryFastModePipeChunkOnly = AcBinarySerializerOptions.FastMode;
|
||||
binaryFastModePipeChunkOnly.BufferWriterChunkSize = PipeChunkSize;
|
||||
binaryFastModePipeChunkOnly.WireMode = SelectedWireMode;
|
||||
|
||||
return new List<ISerializerBenchmark>
|
||||
{
|
||||
|
|
@ -547,12 +553,18 @@ public static class Program
|
|||
|
||||
var binaryNoInternOption = AcBinarySerializerOptions.Default;
|
||||
binaryNoInternOption.UseStringInterning = StringInterningMode.None;
|
||||
binaryNoInternOption.WireMode = SelectedWireMode;
|
||||
|
||||
var binaryDefaultNoSgenOption = AcBinarySerializerOptions.Default;
|
||||
binaryDefaultNoSgenOption.UseGeneratedCode = false;
|
||||
binaryDefaultNoSgenOption.WireMode = SelectedWireMode;
|
||||
|
||||
var binaryFastModeNoSgenOption = AcBinarySerializerOptions.FastMode;
|
||||
binaryFastModeNoSgenOption.UseGeneratedCode = false;
|
||||
binaryFastModeNoSgenOption.WireMode = SelectedWireMode;
|
||||
|
||||
var binaryFastModeOption = AcBinarySerializerOptions.FastMode;
|
||||
binaryFastModeOption.WireMode = SelectedWireMode;
|
||||
|
||||
// BufWr new — 4 KB chunk size for the FRESH ArrayBufferWriter scenario. The chunkSize here drives
|
||||
// the serializer's GetSpan(N) request → the ArrayBufferWriter's internal allocation per call.
|
||||
|
|
@ -561,16 +573,19 @@ public static class Program
|
|||
// vs syscall count).
|
||||
var binaryFastModeBufWrChunk = AcBinarySerializerOptions.FastMode;
|
||||
binaryFastModeBufWrChunk.BufferWriterChunkSize = PipeChunkSize;
|
||||
binaryFastModeBufWrChunk.WireMode = SelectedWireMode;
|
||||
|
||||
// In-memory Pipe variant — same 4 KB chunkSize as the AsyncPipe mode, no kernel-pipe alignment
|
||||
// concern (managed slabs are not page-aligned anyway). Drives SerializeChunkedFramed via the in-memory
|
||||
// System.IO.Pipelines.Pipe (zero-copy slab handoff between producer and drain task).
|
||||
var binaryFastModePipeChunkInMem = AcBinarySerializerOptions.FastMode;
|
||||
binaryFastModePipeChunkInMem.BufferWriterChunkSize = PipeChunkSize;
|
||||
binaryFastModePipeChunkInMem.WireMode = SelectedWireMode;
|
||||
|
||||
var defaultOptions = AcBinarySerializerOptions.Default;
|
||||
defaultOptions.UseStringInterning = StringInterningMode.None;
|
||||
defaultOptions.ReferenceHandling = ReferenceHandlingMode.OnlyId;
|
||||
defaultOptions.WireMode = SelectedWireMode;
|
||||
|
||||
return new List<ISerializerBenchmark>
|
||||
{
|
||||
|
|
@ -578,7 +593,7 @@ public static class Program
|
|||
// AcBinary — Byte[] API (uncomment to compare option presets side-by-side)
|
||||
// ============================================================
|
||||
// Fastest Byte[] — SGen path (UseGeneratedCode=true, default).
|
||||
new AcBinaryBenchmark(testData.Order, AcBinarySerializerOptions.FastMode, "FastMode"),
|
||||
new AcBinaryBenchmark(testData.Order, binaryFastModeOption, "FastMode"),
|
||||
// Fastest Byte[] — Runtime path (UseGeneratedCode=false). Same wire/options, no source-generated dispatch.
|
||||
// Always paired with the SGen variant so every layer can compare the SGen speed-up apples-to-apples.
|
||||
// NativeAOT-safe: AcSerializerCommon.Create*Getter/Setter falls back to reflection-based delegates
|
||||
|
|
@ -594,7 +609,7 @@ public static class Program
|
|||
//new AcBinaryBenchmark(testData.Order, binaryNoInternOption, "NoIntern"),
|
||||
|
||||
// AcBinary via IBufferWriter (reused ArrayBufferWriter — long-running service / batch scenario)
|
||||
new AcBinaryBufferWriterBenchmark(testData.Order, AcBinarySerializerOptions.FastMode, "FastMode"),
|
||||
new AcBinaryBufferWriterBenchmark(testData.Order, binaryFastModeOption, "FastMode"),
|
||||
|
||||
// AcBinary via IBufferWriter (FRESH ArrayBufferWriter per call — one-shot scenario).
|
||||
// 4 KB chunk size from binaryFastModeBufWrChunk — minimises the per-call ArrayBufferWriter
|
||||
|
|
@ -859,7 +874,7 @@ public static class Program
|
|||
System.Console.WriteLine(" [A] All layers");
|
||||
System.Console.WriteLine(" [F] FastestByte — AcBinary FastMode Byte[] vs MemoryPack Byte[] only (tight optimization loop)");
|
||||
System.Console.WriteLine(" [P] AsyncPipe — streaming I/O isolation (only AsyncPipe, all test data)");
|
||||
System.Console.WriteLine($" [S] Settings — modify Warmup ({WarmupIterations}) / Iterations ({TestIterations}) / Samples ({BenchmarkSamples})");
|
||||
System.Console.WriteLine($" [S] Settings — Iteration / WireMode (current: {SelectedWireMode})");
|
||||
System.Console.WriteLine(" [Q] Quit");
|
||||
System.Console.Write("\nSelection: ");
|
||||
|
||||
|
|
@ -889,10 +904,42 @@ public static class Program
|
|||
/// Returns to the caller (which re-displays the main menu).
|
||||
/// </summary>
|
||||
private static void ShowSettingsMenu()
|
||||
{
|
||||
while (true)
|
||||
{
|
||||
System.Console.WriteLine();
|
||||
System.Console.WriteLine("─────────────────────────────────────────────");
|
||||
System.Console.WriteLine("Settings — press Enter to keep current value");
|
||||
System.Console.WriteLine("Settings");
|
||||
System.Console.WriteLine("─────────────────────────────────────────────");
|
||||
System.Console.WriteLine(" [1] Iteration — Warmup / Iterations / Samples");
|
||||
System.Console.WriteLine($" [2] WireMode — current: {SelectedWireMode}");
|
||||
System.Console.WriteLine(" [B] Back");
|
||||
System.Console.Write("\nSelection: ");
|
||||
|
||||
var key = System.Console.ReadKey(intercept: false).KeyChar;
|
||||
System.Console.WriteLine();
|
||||
|
||||
switch (char.ToLower(key))
|
||||
{
|
||||
case '1':
|
||||
ShowIterationSettingsMenu();
|
||||
break;
|
||||
case '2':
|
||||
ShowWireModeSettingsMenu();
|
||||
break;
|
||||
case 'b':
|
||||
return;
|
||||
default:
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void ShowIterationSettingsMenu()
|
||||
{
|
||||
System.Console.WriteLine();
|
||||
System.Console.WriteLine("─────────────────────────────────────────────");
|
||||
System.Console.WriteLine("Iteration settings — press Enter to keep current value");
|
||||
System.Console.WriteLine("─────────────────────────────────────────────");
|
||||
System.Console.WriteLine();
|
||||
|
||||
|
|
@ -901,7 +948,42 @@ public static class Program
|
|||
BenchmarkSamples = PromptInt("BenchmarkSamples", BenchmarkSamples, min: 1);
|
||||
|
||||
System.Console.WriteLine();
|
||||
System.Console.WriteLine($"✓ Settings updated: Warmup={WarmupIterations} | Iterations={TestIterations} | Samples={BenchmarkSamples}");
|
||||
System.Console.WriteLine($"✓ Iteration settings updated: Warmup={WarmupIterations} | Iterations={TestIterations} | Samples={BenchmarkSamples}");
|
||||
}
|
||||
|
||||
private static void ShowWireModeSettingsMenu()
|
||||
{
|
||||
while (true)
|
||||
{
|
||||
System.Console.WriteLine();
|
||||
System.Console.WriteLine("─────────────────────────────────────────────");
|
||||
System.Console.WriteLine("WireMode settings");
|
||||
System.Console.WriteLine("─────────────────────────────────────────────");
|
||||
System.Console.WriteLine($"Current: {SelectedWireMode}");
|
||||
System.Console.WriteLine(" [1] Compact");
|
||||
System.Console.WriteLine(" [2] Fast");
|
||||
System.Console.WriteLine(" [B] Back");
|
||||
System.Console.Write("\nSelection: ");
|
||||
|
||||
var key = System.Console.ReadKey(intercept: false).KeyChar;
|
||||
System.Console.WriteLine();
|
||||
|
||||
switch (char.ToLower(key))
|
||||
{
|
||||
case '1':
|
||||
SelectedWireMode = WireMode.Compact;
|
||||
System.Console.WriteLine("✓ WireMode set to Compact");
|
||||
return;
|
||||
case '2':
|
||||
SelectedWireMode = WireMode.Fast;
|
||||
System.Console.WriteLine("✓ WireMode set to Fast");
|
||||
return;
|
||||
case 'b':
|
||||
return;
|
||||
default:
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
|
|
|||
|
|
@ -226,6 +226,214 @@ public class Utf8TranscoderTests
|
|||
AssertRoundTrip("😀");
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
// GetUtf8ByteCount — content classes
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
[TestMethod]
|
||||
public void GetUtf8ByteCount_AsciiOnly_MatchesBcl()
|
||||
{
|
||||
AssertGetUtf8ByteCountMatchesBcl("Hello, World! Plain ASCII text.");
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void GetUtf8ByteCount_AsciiExactly7Bytes_MatchesBcl()
|
||||
{
|
||||
// Boundary: just below Vector128<ushort>.Count (8) — scalar tail only
|
||||
AssertGetUtf8ByteCountMatchesBcl(new string('a', 7));
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void GetUtf8ByteCount_AsciiExactly8Bytes_MatchesBcl()
|
||||
{
|
||||
// Boundary: exactly Vector128<ushort>.Count — Vector128 path triggers
|
||||
AssertGetUtf8ByteCountMatchesBcl(new string('a', 8));
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void GetUtf8ByteCount_AsciiExactly16Bytes_MatchesBcl()
|
||||
{
|
||||
// Boundary: exactly Vector256<ushort>.Count — Vector256 path triggers
|
||||
AssertGetUtf8ByteCountMatchesBcl(new string('a', 16));
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void GetUtf8ByteCount_AsciiExactly32Bytes_MatchesBcl()
|
||||
{
|
||||
// Boundary: exactly Vector512<ushort>.Count — Vector512 path triggers on AVX-512BW
|
||||
AssertGetUtf8ByteCountMatchesBcl(new string('a', 32));
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void GetUtf8ByteCount_AsciiVeryLong_500Chars_MatchesBcl()
|
||||
{
|
||||
AssertGetUtf8ByteCountMatchesBcl(new string('z', 500));
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void GetUtf8ByteCount_HungarianShort_MatchesBcl()
|
||||
{
|
||||
AssertGetUtf8ByteCountMatchesBcl("Termék");
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void GetUtf8ByteCount_HungarianMedium_MatchesBcl()
|
||||
{
|
||||
AssertGetUtf8ByteCountMatchesBcl("árvíztűrő tükörfúrógép");
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void GetUtf8ByteCount_HungarianLong_MatchesBcl()
|
||||
{
|
||||
AssertGetUtf8ByteCountMatchesBcl(string.Concat(Enumerable.Repeat("árvíztűrő tükörfúrógép ", 20)));
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void GetUtf8ByteCount_CjkBmp_MatchesBcl()
|
||||
{
|
||||
AssertGetUtf8ByteCountMatchesBcl("你好世界 こんにちは 안녕하세요");
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void GetUtf8ByteCount_CjkBmpLong_MatchesBcl()
|
||||
{
|
||||
AssertGetUtf8ByteCountMatchesBcl(string.Concat(Enumerable.Repeat("你好世界 ", 30)));
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void GetUtf8ByteCount_SupplementaryPlane_MatchesBcl()
|
||||
{
|
||||
// Each emoji is 2 UTF-16 chars (surrogate pair) → 4 UTF-8 bytes total
|
||||
AssertGetUtf8ByteCountMatchesBcl("😀😁😂🎉🌟");
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void GetUtf8ByteCount_MixedAllClasses_MatchesBcl()
|
||||
{
|
||||
AssertGetUtf8ByteCountMatchesBcl("ASCII Magyar:árvíz CJK:你好 Emoji:😀");
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void GetUtf8ByteCount_LongMixed_MatchesBcl()
|
||||
{
|
||||
var sb = new StringBuilder();
|
||||
for (var i = 0; i < 50; i++)
|
||||
{
|
||||
sb.Append("ASCII run-").Append(i).Append(" Magyar:árvíz CJK:你好 ");
|
||||
}
|
||||
AssertGetUtf8ByteCountMatchesBcl(sb.ToString());
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void GetUtf8ByteCount_Empty_ReturnsZero()
|
||||
{
|
||||
Assert.AreEqual(0, Utf8Transcoder.GetUtf8ByteCount(ReadOnlySpan<char>.Empty));
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void GetUtf8ByteCount_SingleAsciiChar_MatchesBcl()
|
||||
{
|
||||
AssertGetUtf8ByteCountMatchesBcl("X");
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void GetUtf8ByteCount_SingleHungarianChar_MatchesBcl()
|
||||
{
|
||||
AssertGetUtf8ByteCountMatchesBcl("é");
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void GetUtf8ByteCount_SingleCjkChar_MatchesBcl()
|
||||
{
|
||||
AssertGetUtf8ByteCountMatchesBcl("好");
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void GetUtf8ByteCount_SingleEmoji_MatchesBcl()
|
||||
{
|
||||
// Single emoji = surrogate pair, exact 4 bytes
|
||||
AssertGetUtf8ByteCountMatchesBcl("😀");
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void GetUtf8ByteCount_BoundaryAsciiToHungarian_MatchesBcl()
|
||||
{
|
||||
// Exercises split between SIMD ASCII region and 2-byte tail
|
||||
for (var asciiLen = 0; asciiLen <= 64; asciiLen++)
|
||||
{
|
||||
var s = new string('a', asciiLen) + "árvíz";
|
||||
var expected = Utf8.GetByteCount(s);
|
||||
var actual = Utf8Transcoder.GetUtf8ByteCount(s.AsSpan());
|
||||
Assert.AreEqual(expected, actual, $"asciiLen={asciiLen}");
|
||||
}
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void GetUtf8ByteCount_BoundaryAsciiToCjk_MatchesBcl()
|
||||
{
|
||||
// 3-byte sequence boundary stress
|
||||
for (var asciiLen = 0; asciiLen <= 64; asciiLen++)
|
||||
{
|
||||
var s = new string('a', asciiLen) + "你好世界";
|
||||
var expected = Utf8.GetByteCount(s);
|
||||
var actual = Utf8Transcoder.GetUtf8ByteCount(s.AsSpan());
|
||||
Assert.AreEqual(expected, actual, $"asciiLen={asciiLen}");
|
||||
}
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void GetUtf8ByteCount_BoundaryAsciiToEmoji_MatchesBcl()
|
||||
{
|
||||
// CRITICAL: tests that surrogate pairs split across SIMD chunks still produce correct count.
|
||||
// High surrogate may land in chunk N, low surrogate in chunk N+1; total must remain 4 bytes.
|
||||
for (var asciiLen = 0; asciiLen <= 64; asciiLen++)
|
||||
{
|
||||
var s = new string('a', asciiLen) + "😀";
|
||||
var expected = Utf8.GetByteCount(s);
|
||||
var actual = Utf8Transcoder.GetUtf8ByteCount(s.AsSpan());
|
||||
Assert.AreEqual(expected, actual, $"asciiLen={asciiLen}");
|
||||
}
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void GetUtf8ByteCount_MultipleEmojiBoundary_MatchesBcl()
|
||||
{
|
||||
// Surrogate pair split-stress: many emojis at varying offsets
|
||||
for (var prefixLen = 0; prefixLen <= 32; prefixLen++)
|
||||
{
|
||||
var s = new string('a', prefixLen) + "😀😁😂🎉🌟😀😁😂🎉🌟";
|
||||
var expected = Utf8.GetByteCount(s);
|
||||
var actual = Utf8Transcoder.GetUtf8ByteCount(s.AsSpan());
|
||||
Assert.AreEqual(expected, actual, $"prefixLen={prefixLen}");
|
||||
}
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void GetUtf8ByteCount_AgreesWithEncodeUtf8SinglePass_AllContentClasses()
|
||||
{
|
||||
// Round-trip contract: the byte count returned must equal the bytesWritten by EncodeUtf8SinglePass.
|
||||
// This is the load-bearing invariant for two-pass [VarUInt][bytes] writes in cold-fallback paths.
|
||||
var samples = new[]
|
||||
{
|
||||
"Hello",
|
||||
"árvíztűrő tükörfúrógép",
|
||||
"你好世界",
|
||||
"😀🎉🌟",
|
||||
"ASCII Magyar:árvíz CJK:你好 Emoji:😀",
|
||||
new string('z', 500),
|
||||
string.Concat(Enumerable.Repeat("árvíztűrő tükörfúrógép ", 20))
|
||||
};
|
||||
|
||||
foreach (var s in samples)
|
||||
{
|
||||
var byteCountFromCounter = Utf8Transcoder.GetUtf8ByteCount(s.AsSpan());
|
||||
var dst = new byte[s.Length * 4];
|
||||
var bytesWritten = Utf8Transcoder.EncodeUtf8SinglePass(s.AsSpan(), dst.AsSpan());
|
||||
Assert.AreEqual(bytesWritten, byteCountFromCounter,
|
||||
$"GetUtf8ByteCount disagrees with EncodeUtf8SinglePass for [{s.Substring(0, Math.Min(20, s.Length))}...]");
|
||||
}
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
// Decoder-side cross-check: BCL Encoding.UTF8.GetString reference
|
||||
// ──────────────────────────────────────────────────────────────────────
|
||||
|
|
@ -291,6 +499,19 @@ public class Utf8TranscoderTests
|
|||
Assert.AreEqual(original, decoded, $"Decoder output mismatch{ctx}");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verifies that <see cref="Utf8Transcoder.GetUtf8ByteCount"/> matches
|
||||
/// <see cref="Encoding.GetByteCount(string)"/> for the same input. This is the BCL parity
|
||||
/// invariant — any divergence means the SIMD byte counter is producing wrong values that
|
||||
/// would corrupt VarUInt length prefixes in <c>WriteStringUtf8Internal</c>.
|
||||
/// </summary>
|
||||
private static void AssertGetUtf8ByteCountMatchesBcl(string original)
|
||||
{
|
||||
var expected = Utf8.GetByteCount(original);
|
||||
var actual = Utf8Transcoder.GetUtf8ByteCount(original.AsSpan());
|
||||
Assert.AreEqual(expected, actual, $"GetUtf8ByteCount mismatch for input length {original.Length}");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verifies that DecodeUtf8SinglePass produces output identical to <see cref="Encoding.UTF8.GetString"/>
|
||||
/// for the same byte input. Catches silent decoder bugs that pass the round-trip test
|
||||
|
|
|
|||
|
|
@ -1,9 +1,22 @@
|
|||
using AyCode.Core.Serializers.Binaries;
|
||||
using System.Collections;
|
||||
using System.Reflection;
|
||||
using System.Runtime.CompilerServices;
|
||||
|
||||
namespace AyCode.Core.Tests.TestModels;
|
||||
|
||||
public static class BenchmarkTestDataProvider
|
||||
{
|
||||
private const int FixStrMaxLength = 31;
|
||||
private const string LongStringSuffix = "__Benchmárk_Long_String_Söffix__";
|
||||
|
||||
private sealed class ReferenceComparer : IEqualityComparer<object>
|
||||
{
|
||||
public static readonly ReferenceComparer Instance = new();
|
||||
public new bool Equals(object? x, object? y) => ReferenceEquals(x, y);
|
||||
public int GetHashCode(object obj) => RuntimeHelpers.GetHashCode(obj);
|
||||
}
|
||||
|
||||
public static List<TestDataSet> CreateTestDataSets(bool resetId = true)
|
||||
{
|
||||
return new List<TestDataSet>
|
||||
|
|
@ -45,6 +58,8 @@ public static class BenchmarkTestDataProvider
|
|||
sharedTag: sharedTag,
|
||||
sharedUser: sharedUser);
|
||||
|
||||
EnsureAllStringsBypassFixStr(order);
|
||||
|
||||
ClearDeepLevelRefs(order);
|
||||
|
||||
return new TestDataSet("Small (2x2x2x2)", order, iidRefPercent: 20);
|
||||
|
|
@ -77,6 +92,8 @@ public static class BenchmarkTestDataProvider
|
|||
sharedMetadata: sharedMeta,
|
||||
sharedPreferences: sharedPreferences);
|
||||
|
||||
EnsureAllStringsBypassFixStr(order);
|
||||
|
||||
ClearDeepLevelRefs(order);
|
||||
|
||||
return new TestDataSet("Medium (3x3x3x4)", order, iidRefPercent: 20);
|
||||
|
|
@ -107,6 +124,8 @@ public static class BenchmarkTestDataProvider
|
|||
sharedUser: sharedUser,
|
||||
sharedPreferences: sharedPreferences);
|
||||
|
||||
EnsureAllStringsBypassFixStr(order);
|
||||
|
||||
ClearDeepLevelRefs(order);
|
||||
|
||||
return new TestDataSet("Large (5x5x5x10)", order, iidRefPercent: 20);
|
||||
|
|
@ -153,6 +172,8 @@ public static class BenchmarkTestDataProvider
|
|||
}
|
||||
}
|
||||
|
||||
EnsureAllStringsBypassFixStr(order);
|
||||
|
||||
ClearDeepLevelRefs(order);
|
||||
|
||||
return new TestDataSet("Repeated Strings (10 items)", order, iidRefPercent: 20);
|
||||
|
|
@ -185,6 +206,8 @@ public static class BenchmarkTestDataProvider
|
|||
sharedPreferences: sharedPreferences,
|
||||
sharedCategory: sharedCategory);
|
||||
|
||||
EnsureAllStringsBypassFixStr(order);
|
||||
|
||||
ClearDeepLevelRefs(order);
|
||||
|
||||
return new TestDataSet("Deep Nested (2x4x4x8)", order, iidRefPercent: 20);
|
||||
|
|
@ -218,6 +241,65 @@ public static class BenchmarkTestDataProvider
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void EnsureAllStringsBypassFixStr(object? root)
|
||||
{
|
||||
if (root == null) return;
|
||||
|
||||
var visited = new HashSet<object>(ReferenceComparer.Instance);
|
||||
var stack = new Stack<object>();
|
||||
stack.Push(root);
|
||||
|
||||
while (stack.Count > 0)
|
||||
{
|
||||
var current = stack.Pop();
|
||||
if (!visited.Add(current)) continue;
|
||||
|
||||
if (current is IEnumerable enumerable && current is not string)
|
||||
{
|
||||
foreach (var item in enumerable)
|
||||
{
|
||||
if (item != null)
|
||||
stack.Push(item);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
var type = current.GetType();
|
||||
foreach (var property in type.GetProperties(BindingFlags.Instance | BindingFlags.Public))
|
||||
{
|
||||
if (!property.CanRead) continue;
|
||||
|
||||
if (property.PropertyType == typeof(string))
|
||||
{
|
||||
if (!property.CanWrite) continue;
|
||||
|
||||
var value = (string?)property.GetValue(current);
|
||||
property.SetValue(current, ToLongString(value));
|
||||
continue;
|
||||
}
|
||||
|
||||
if (property.PropertyType.IsValueType || property.PropertyType.IsEnum)
|
||||
continue;
|
||||
|
||||
var child = property.GetValue(current);
|
||||
if (child != null)
|
||||
stack.Push(child);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private static string ToLongString(string? value)
|
||||
{
|
||||
if (string.IsNullOrEmpty(value))
|
||||
return "Benchmark_String_Value" + LongStringSuffix;
|
||||
|
||||
if (value.Length > FixStrMaxLength)
|
||||
return value;
|
||||
|
||||
return value + LongStringSuffix;
|
||||
}
|
||||
}
|
||||
|
||||
public sealed class TestDataSet
|
||||
|
|
|
|||
|
|
@ -117,7 +117,7 @@ public static partial class AcBinarySerializer
|
|||
|
||||
foreach (var (stringValue, properties) in analysis)
|
||||
{
|
||||
var byteLength = Encoding.UTF8.GetByteCount(stringValue);
|
||||
var byteLength = Utf8Transcoder.GetUtf8ByteCount(stringValue.AsSpan());
|
||||
foreach (var (propPath, count) in properties)
|
||||
{
|
||||
if (!propertyStats.TryGetValue(propPath, out var list))
|
||||
|
|
|
|||
|
|
@ -188,6 +188,181 @@ internal static class Utf8Transcoder
|
|||
return dstIdx;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Counts the UTF-8 byte length produced by encoding the given UTF-16 char span.
|
||||
/// Symmetric encode-side helper to <see cref="CountUtf8Chars"/>; the value returned equals
|
||||
/// the <c>bytesWritten</c> that <see cref="EncodeUtf8SinglePass"/> would produce.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Trusted-input — assumes well-formed UTF-16 (every high surrogate paired with a low surrogate),
|
||||
/// matching <see cref="EncodeUtf8SinglePass"/>'s contract. Bypasses
|
||||
/// <see cref="System.Text.Encoding.UTF8"/>.GetByteCount virtual-dispatch + encoder-fallback overhead.
|
||||
///
|
||||
/// <para>Layered SIMD: Vector512 (32 chars/iter) on AVX-512BW hosts → Vector256 (16 chars/iter)
|
||||
/// on AVX2 hosts → Vector128 (8 chars/iter) on Apple Silicon NEON / WASM SIMD / SSE2 → scalar tail.
|
||||
/// JIT/AOT path-selection via <c>Avx512BW.IsSupported</c> / <c>Vector{N}.IsHardwareAccelerated</c>
|
||||
/// <c>[Intrinsic]</c> booleans (constant-folded dead branches per host).</para>
|
||||
///
|
||||
/// <para>Per-char UTF-8 byte contribution:</para>
|
||||
/// <list type="bullet">
|
||||
/// <item><c>c < 0x80</c> → 1 byte (ASCII)</item>
|
||||
/// <item><c>0x80 ≤ c < 0x800</c> → 2 bytes (Latin extended, Cyrillic, Greek, Hebrew, Arabic)</item>
|
||||
/// <item><c>0x800 ≤ c < 0xD800</c> or <c>c ≥ 0xE000</c> → 3 bytes (CJK BMP, other BMP)</item>
|
||||
/// <item><c>0xD800 ≤ c < 0xDC00</c> (high surrogate) → 4 bytes (whole pair encoded here)</item>
|
||||
/// <item><c>0xDC00 ≤ c < 0xE000</c> (low surrogate) → 0 bytes (absorbed by paired high surrogate)</item>
|
||||
/// </list>
|
||||
///
|
||||
/// <para>SIMD per-block: 5 popcount-on-threshold-mask operations
|
||||
/// (< 0x80, < 0x800, < 0xD800, < 0xDC00, < 0xE000). Closed-form aggregation:
|
||||
/// <c>bytes = 3*N - ascii - c_lt_0x800 + highSur - 3*lowSur</c>
|
||||
/// where <c>ascii = popcount(c < 0x80)</c>,
|
||||
/// <c>highSur = popcount(c < 0xDC00) - popcount(c < 0xD800)</c>,
|
||||
/// <c>lowSur = popcount(c < 0xE000) - popcount(c < 0xDC00)</c>.</para>
|
||||
///
|
||||
/// <para>Both <c>highSur</c> and <c>lowSur</c> must be counted independently — feature-equivalent
|
||||
/// to the per-char model (high → 4 bytes, low → 0 bytes). A natural-looking shortcut
|
||||
/// (<c>lowSur == highSur</c> for well-formed UTF-16) is FALSE within a single SIMD chunk when
|
||||
/// a surrogate pair straddles the chunk boundary; over the whole string the counts equalize
|
||||
/// but per-block they don't. Across-the-boundary correctness: a high surrogate counted in
|
||||
/// chunk N contributes 4 bytes there; its low surrogate (in chunk N+1) contributes 0 bytes —
|
||||
/// total 4 bytes per pair regardless of where the boundary falls.</para>
|
||||
///
|
||||
/// <para>Pairs with <see cref="EncodeUtf8SinglePass"/> for two-pass [VarUInt][bytes] writes in
|
||||
/// cold-fallback paths (e.g. <c>WriteFixStrDirect</c>'s non-ASCII fallback in
|
||||
/// <c>BinarySerializationContext</c>).</para>
|
||||
/// </remarks>
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
internal static int GetUtf8ByteCount(ReadOnlySpan<char> src)
|
||||
{
|
||||
var byteCount = 0;
|
||||
var i = 0;
|
||||
var n = src.Length;
|
||||
ref ushort srcRef = ref Unsafe.As<char, ushort>(ref MemoryMarshal.GetReference(src));
|
||||
|
||||
// SIMD path 1: Vector512 (32 chars/iter) on AVX-512BW hosts
|
||||
if (Avx512BW.IsSupported && n >= Vector512<ushort>.Count)
|
||||
{
|
||||
var v_0x80 = Vector512.Create((ushort)0x80);
|
||||
var v_0x800 = Vector512.Create((ushort)0x800);
|
||||
var v_0xD800 = Vector512.Create((ushort)0xD800);
|
||||
var v_0xDC00 = Vector512.Create((ushort)0xDC00);
|
||||
var v_0xE000 = Vector512.Create((ushort)0xE000);
|
||||
|
||||
do
|
||||
{
|
||||
var v = Vector512.LoadUnsafe(ref srcRef, (uint)i);
|
||||
|
||||
var c_lt_0x80 = BitOperations.PopCount(Vector512.LessThan(v, v_0x80).ExtractMostSignificantBits());
|
||||
var c_lt_0x800 = BitOperations.PopCount(Vector512.LessThan(v, v_0x800).ExtractMostSignificantBits());
|
||||
var c_lt_0xD800 = BitOperations.PopCount(Vector512.LessThan(v, v_0xD800).ExtractMostSignificantBits());
|
||||
var c_lt_0xDC00 = BitOperations.PopCount(Vector512.LessThan(v, v_0xDC00).ExtractMostSignificantBits());
|
||||
var c_lt_0xE000 = BitOperations.PopCount(Vector512.LessThan(v, v_0xE000).ExtractMostSignificantBits());
|
||||
|
||||
var highSur = c_lt_0xDC00 - c_lt_0xD800;
|
||||
var lowSur = c_lt_0xE000 - c_lt_0xDC00;
|
||||
byteCount += 3 * Vector512<ushort>.Count - c_lt_0x80 - c_lt_0x800 + highSur - 3 * lowSur;
|
||||
|
||||
i += Vector512<ushort>.Count;
|
||||
} while (n - i >= Vector512<ushort>.Count);
|
||||
}
|
||||
|
||||
// SIMD path 2: Vector256 (16 chars/iter) on AVX2 hosts; also handles AVX-512 tail < 32 chars
|
||||
if (Vector256.IsHardwareAccelerated && n - i >= Vector256<ushort>.Count)
|
||||
{
|
||||
var v_0x80 = Vector256.Create((ushort)0x80);
|
||||
var v_0x800 = Vector256.Create((ushort)0x800);
|
||||
var v_0xD800 = Vector256.Create((ushort)0xD800);
|
||||
var v_0xDC00 = Vector256.Create((ushort)0xDC00);
|
||||
var v_0xE000 = Vector256.Create((ushort)0xE000);
|
||||
|
||||
do
|
||||
{
|
||||
var v = Vector256.LoadUnsafe(ref srcRef, (uint)i);
|
||||
|
||||
var c_lt_0x80 = BitOperations.PopCount(Vector256.LessThan(v, v_0x80).ExtractMostSignificantBits());
|
||||
var c_lt_0x800 = BitOperations.PopCount(Vector256.LessThan(v, v_0x800).ExtractMostSignificantBits());
|
||||
var c_lt_0xD800 = BitOperations.PopCount(Vector256.LessThan(v, v_0xD800).ExtractMostSignificantBits());
|
||||
var c_lt_0xDC00 = BitOperations.PopCount(Vector256.LessThan(v, v_0xDC00).ExtractMostSignificantBits());
|
||||
var c_lt_0xE000 = BitOperations.PopCount(Vector256.LessThan(v, v_0xE000).ExtractMostSignificantBits());
|
||||
|
||||
var highSur = c_lt_0xDC00 - c_lt_0xD800;
|
||||
var lowSur = c_lt_0xE000 - c_lt_0xDC00;
|
||||
byteCount += 3 * Vector256<ushort>.Count - c_lt_0x80 - c_lt_0x800 + highSur - 3 * lowSur;
|
||||
|
||||
i += Vector256<ushort>.Count;
|
||||
} while (n - i >= Vector256<ushort>.Count);
|
||||
}
|
||||
|
||||
// SIMD path 3: Vector128 (8 chars/iter) on Apple Silicon NEON, WASM SIMD, legacy SSE2;
|
||||
// also handles tail < 16 from higher tiers. Cross-platform via Vector128.IsHardwareAccelerated.
|
||||
if (Vector128.IsHardwareAccelerated && n - i >= Vector128<ushort>.Count)
|
||||
{
|
||||
var v_0x80 = Vector128.Create((ushort)0x80);
|
||||
var v_0x800 = Vector128.Create((ushort)0x800);
|
||||
var v_0xD800 = Vector128.Create((ushort)0xD800);
|
||||
var v_0xDC00 = Vector128.Create((ushort)0xDC00);
|
||||
var v_0xE000 = Vector128.Create((ushort)0xE000);
|
||||
|
||||
do
|
||||
{
|
||||
var v = Vector128.LoadUnsafe(ref srcRef, (uint)i);
|
||||
|
||||
var c_lt_0x80 = BitOperations.PopCount(Vector128.LessThan(v, v_0x80).ExtractMostSignificantBits());
|
||||
var c_lt_0x800 = BitOperations.PopCount(Vector128.LessThan(v, v_0x800).ExtractMostSignificantBits());
|
||||
var c_lt_0xD800 = BitOperations.PopCount(Vector128.LessThan(v, v_0xD800).ExtractMostSignificantBits());
|
||||
var c_lt_0xDC00 = BitOperations.PopCount(Vector128.LessThan(v, v_0xDC00).ExtractMostSignificantBits());
|
||||
var c_lt_0xE000 = BitOperations.PopCount(Vector128.LessThan(v, v_0xE000).ExtractMostSignificantBits());
|
||||
|
||||
var highSur = c_lt_0xDC00 - c_lt_0xD800;
|
||||
var lowSur = c_lt_0xE000 - c_lt_0xDC00;
|
||||
byteCount += 3 * Vector128<ushort>.Count - c_lt_0x80 - c_lt_0x800 + highSur - 3 * lowSur;
|
||||
|
||||
i += Vector128<ushort>.Count;
|
||||
} while (n - i >= Vector128<ushort>.Count);
|
||||
}
|
||||
|
||||
// Scalar tail (and fallback for non-SIMD hardware).
|
||||
// CRITICAL: must use the SAME per-char accounting model as the SIMD path so that surrogate
|
||||
// pairs split across a SIMD/scalar boundary count correctly. The SIMD path counts each char
|
||||
// independently — high surrogate → 4 bytes, low surrogate → 0 bytes. The scalar tail must
|
||||
// do the same (i += 1 per char, NOT i += 2 on high surrogate). If the scalar tail
|
||||
// double-consumed surrogate pairs (i += 2 on high), a high surrogate landing in the last
|
||||
// SIMD chunk would be counted there as 4 bytes, then its low surrogate in the scalar tail
|
||||
// would re-trigger the surrogate branch and add 4 more bytes (with i += 2 advancing past
|
||||
// an unrelated next char). Net: +4 byte miscount per split-pair.
|
||||
while (i < n)
|
||||
{
|
||||
var c = Unsafe.Add(ref srcRef, i);
|
||||
if (c < 0x80)
|
||||
{
|
||||
byteCount += 1;
|
||||
}
|
||||
else if (c < 0x800)
|
||||
{
|
||||
byteCount += 2;
|
||||
}
|
||||
else if (c < 0xD800)
|
||||
{
|
||||
byteCount += 3; // BMP below surrogate range
|
||||
}
|
||||
else if (c < 0xDC00)
|
||||
{
|
||||
byteCount += 4; // high surrogate → owns the 4-byte encoding for the pair
|
||||
}
|
||||
else if (c < 0xE000)
|
||||
{
|
||||
// low surrogate → 0 bytes (the paired high surrogate already accounted for the 4)
|
||||
}
|
||||
else
|
||||
{
|
||||
byteCount += 3; // BMP at or above 0xE000
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
|
||||
return byteCount;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Counts UTF-16 chars produced by decoding the given UTF-8 byte span.
|
||||
/// </summary>
|
||||
|
|
|
|||
|
|
@ -99,7 +99,15 @@ public abstract class PropertyMetadataBase
|
|||
[DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicProperties)] Type declaringType)
|
||||
{
|
||||
Name = prop.Name;
|
||||
NameUtf8 = Encoding.UTF8.GetBytes(prop.Name);
|
||||
// Ctor-once init: SIMD path via Utf8Transcoder (GetUtf8ByteCount + EncodeUtf8SinglePass)
|
||||
// bypasses Encoding.UTF8 virtual-dispatch + encoder-fallback overhead. Ascii.FromUtf16
|
||||
// would be slightly faster for the (overwhelmingly common) ASCII property name case, but
|
||||
// the symmetric Utf8Transcoder API keeps this consistent with the binary serializer's
|
||||
// writer-side BCL-free policy and handles non-ASCII property names without a fallback.
|
||||
var nameByteCount = Utf8Transcoder.GetUtf8ByteCount(prop.Name.AsSpan());
|
||||
var nameBytes = new byte[nameByteCount];
|
||||
Utf8Transcoder.EncodeUtf8SinglePass(prop.Name.AsSpan(), nameBytes);
|
||||
NameUtf8 = nameBytes;
|
||||
DeclaringType = declaringType;
|
||||
PropertyType = prop.PropertyType;
|
||||
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue