[LOADED_DOCS: 3 files, no new loads]
Switch to BCL UTF-8 APIs for string (de)serialization Replaced custom Utf8Transcoder logic with System.Text.Encoding.UTF8 and System.Text.Unicode.Utf8 for string encoding/decoding in AcBinarySerializer and AcBinaryDeserializer. PropertyMetadataBase now uses Encoding.UTF8.GetBytes for property name encoding. Retained Utf8Transcoder for any remaining SIMD/custom logic. No public API changes; internal refactoring for performance and maintainability.
This commit is contained in:
parent
8eaae4dda3
commit
1d256ea386
|
|
@ -520,11 +520,11 @@ public static partial class AcBinaryDeserializer
|
|||
var pos = _position;
|
||||
_position += byteLength;
|
||||
var src = _buffer.AsSpan(pos, byteLength);
|
||||
var charCount = Utf8Transcoder.CountUtf8Chars(src);
|
||||
var charCount = Encoding.UTF8.GetCharCount(src);
|
||||
|
||||
return string.Create(charCount, (Buffer: _buffer, Pos: pos, Len: byteLength), static (chars, state) =>
|
||||
{
|
||||
Utf8Transcoder.DecodeUtf8SinglePass(state.Buffer.AsSpan(state.Pos, state.Len), chars);
|
||||
System.Text.Unicode.Utf8.ToUtf16(state.Buffer.AsSpan(state.Pos, state.Len), chars, out _, out _, replaceInvalidSequences: false);
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -562,7 +562,7 @@ public static partial class AcBinaryDeserializer
|
|||
|
||||
return string.Create(charLength, (Buffer: _buffer, Pos: pos, Len: byteLength), static (chars, state) =>
|
||||
{
|
||||
Utf8Transcoder.DecodeUtf8SinglePass(state.Buffer.AsSpan(state.Pos, state.Len), chars);
|
||||
System.Text.Unicode.Utf8.ToUtf16(state.Buffer.AsSpan(state.Pos, state.Len), chars, out _, out _, replaceInvalidSequences: false);
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -691,7 +691,7 @@ public static partial class AcBinarySerializer
|
|||
|
||||
var savedPos = _position;
|
||||
var encodeStart = savedPos + reserveSize;
|
||||
var bytesWritten = Utf8Transcoder.EncodeUtf8SinglePass(value.AsSpan(), _buffer.AsSpan(encodeStart, maxBytes));
|
||||
System.Text.Unicode.Utf8.FromUtf16(value.AsSpan(), _buffer.AsSpan(encodeStart, maxBytes), out _, out var bytesWritten, replaceInvalidSequences: false);
|
||||
|
||||
var actualVarUIntSize = VarUIntSize((uint)bytesWritten);
|
||||
if (actualVarUIntSize < reserveSize)
|
||||
|
|
@ -781,16 +781,14 @@ public static partial class AcBinarySerializer
|
|||
|
||||
var savedPos = _position;
|
||||
var encodeStart = savedPos + reserveHeader;
|
||||
var bytesWritten = Utf8Transcoder.EncodeUtf8SinglePass(value.AsSpan(), _buffer.AsSpan(encodeStart, maxBytes));
|
||||
System.Text.Unicode.Utf8.FromUtf16(value.AsSpan(), _buffer.AsSpan(encodeStart, maxBytes), out _, out var bytesWritten, replaceInvalidSequences: false);
|
||||
|
||||
if (bytesWritten == charLength)
|
||||
{
|
||||
// ASCII override — FixStrAscii (≤31) or StringAscii (>31) with compact header
|
||||
if (bytesWritten <= BinaryTypeCode.FixStrAsciiMaxLength)
|
||||
{
|
||||
var shift = reserveHeader - 1;
|
||||
if (shift > 0)
|
||||
_buffer.AsSpan(encodeStart, bytesWritten).CopyTo(_buffer.AsSpan(savedPos + 1, bytesWritten));
|
||||
_buffer.AsSpan(encodeStart, bytesWritten).CopyTo(_buffer.AsSpan(savedPos + 1, bytesWritten));
|
||||
_buffer[savedPos] = BinaryTypeCode.EncodeFixStrAscii(bytesWritten);
|
||||
_position = savedPos + 1 + bytesWritten;
|
||||
}
|
||||
|
|
@ -898,7 +896,7 @@ public static partial class AcBinarySerializer
|
|||
|
||||
var savedPos = _position;
|
||||
var encodeStart = savedPos + cacheIdxSize + reserveHeader;
|
||||
var bytesWritten = Utf8Transcoder.EncodeUtf8SinglePass(value.AsSpan(), _buffer.AsSpan(encodeStart, maxBytes));
|
||||
System.Text.Unicode.Utf8.FromUtf16(value.AsSpan(), _buffer.AsSpan(encodeStart, maxBytes), out _, out var bytesWritten, replaceInvalidSequences: false);
|
||||
|
||||
// Choose tier from actual bytesWritten (smallest fits)
|
||||
var actualHeader = bytesWritten <= 255 ? 3 : 5;
|
||||
|
|
|
|||
|
|
@ -117,7 +117,7 @@ public static partial class AcBinarySerializer
|
|||
|
||||
foreach (var (stringValue, properties) in analysis)
|
||||
{
|
||||
var byteLength = Utf8Transcoder.GetUtf8ByteCount(stringValue.AsSpan());
|
||||
var byteLength = System.Text.Encoding.UTF8.GetByteCount(stringValue.AsSpan());
|
||||
foreach (var (propPath, count) in properties)
|
||||
{
|
||||
if (!propertyStats.TryGetValue(propPath, out var list))
|
||||
|
|
|
|||
|
|
@ -132,7 +132,7 @@ internal static class Utf8Transcoder
|
|||
var c3 = Unsafe.Add(ref srcRefChar, srcIdx + 3);
|
||||
if (((c0 | c1 | c2 | c3) & 0xFF80) == 0)
|
||||
{
|
||||
Unsafe.Add(ref dstRef, dstIdx) = (byte)c0;
|
||||
Unsafe.Add(ref dstRef, dstIdx) = (byte)c0;
|
||||
Unsafe.Add(ref dstRef, dstIdx + 1) = (byte)c1;
|
||||
Unsafe.Add(ref dstRef, dstIdx + 2) = (byte)c2;
|
||||
Unsafe.Add(ref dstRef, dstIdx + 3) = (byte)c3;
|
||||
|
|
@ -155,7 +155,7 @@ internal static class Utf8Transcoder
|
|||
// 2-byte: 110xxxxx 10xxxxxx → U+0080–U+07FF
|
||||
// Latin extended (Hungarian, Polish, Czech, Spanish, French, German diacritics),
|
||||
// Greek, Cyrillic, Hebrew, Arabic.
|
||||
Unsafe.Add(ref dstRef, dstIdx) = (byte)(0xC0 | (c >> 6));
|
||||
Unsafe.Add(ref dstRef, dstIdx) = (byte)(0xC0 | (c >> 6));
|
||||
Unsafe.Add(ref dstRef, dstIdx + 1) = (byte)(0x80 | (c & 0x3F));
|
||||
dstIdx += 2;
|
||||
srcIdx += 1;
|
||||
|
|
@ -164,7 +164,7 @@ internal static class Utf8Transcoder
|
|||
{
|
||||
// 3-byte BMP: 1110xxxx 10xxxxxx 10xxxxxx → U+0800–U+FFFF (excluding surrogate range)
|
||||
// CJK BMP, various other BMP scripts.
|
||||
Unsafe.Add(ref dstRef, dstIdx) = (byte)(0xE0 | (c >> 12));
|
||||
Unsafe.Add(ref dstRef, dstIdx) = (byte)(0xE0 | (c >> 12));
|
||||
Unsafe.Add(ref dstRef, dstIdx + 1) = (byte)(0x80 | ((c >> 6) & 0x3F));
|
||||
Unsafe.Add(ref dstRef, dstIdx + 2) = (byte)(0x80 | (c & 0x3F));
|
||||
dstIdx += 3;
|
||||
|
|
@ -176,7 +176,7 @@ internal static class Utf8Transcoder
|
|||
// High surrogate (0xD800–0xDBFF) followed by low surrogate (0xDC00–0xDFFF).
|
||||
var c2 = Unsafe.Add(ref srcRefChar, srcIdx + 1);
|
||||
var codepoint = 0x10000 + ((c - 0xD800) << 10) + (c2 - 0xDC00);
|
||||
Unsafe.Add(ref dstRef, dstIdx) = (byte)(0xF0 | (codepoint >> 18));
|
||||
Unsafe.Add(ref dstRef, dstIdx) = (byte)(0xF0 | (codepoint >> 18));
|
||||
Unsafe.Add(ref dstRef, dstIdx + 1) = (byte)(0x80 | ((codepoint >> 12) & 0x3F));
|
||||
Unsafe.Add(ref dstRef, dstIdx + 2) = (byte)(0x80 | ((codepoint >> 6) & 0x3F));
|
||||
Unsafe.Add(ref dstRef, dstIdx + 3) = (byte)(0x80 | (codepoint & 0x3F));
|
||||
|
|
@ -252,8 +252,8 @@ internal static class Utf8Transcoder
|
|||
{
|
||||
var v = Vector512.LoadUnsafe(ref srcRef, (uint)i);
|
||||
|
||||
var c_lt_0x80 = BitOperations.PopCount(Vector512.LessThan(v, v_0x80).ExtractMostSignificantBits());
|
||||
var c_lt_0x800 = BitOperations.PopCount(Vector512.LessThan(v, v_0x800).ExtractMostSignificantBits());
|
||||
var c_lt_0x80 = BitOperations.PopCount(Vector512.LessThan(v, v_0x80).ExtractMostSignificantBits());
|
||||
var c_lt_0x800 = BitOperations.PopCount(Vector512.LessThan(v, v_0x800).ExtractMostSignificantBits());
|
||||
var c_lt_0xD800 = BitOperations.PopCount(Vector512.LessThan(v, v_0xD800).ExtractMostSignificantBits());
|
||||
var c_lt_0xDC00 = BitOperations.PopCount(Vector512.LessThan(v, v_0xDC00).ExtractMostSignificantBits());
|
||||
var c_lt_0xE000 = BitOperations.PopCount(Vector512.LessThan(v, v_0xE000).ExtractMostSignificantBits());
|
||||
|
|
@ -279,8 +279,8 @@ internal static class Utf8Transcoder
|
|||
{
|
||||
var v = Vector256.LoadUnsafe(ref srcRef, (uint)i);
|
||||
|
||||
var c_lt_0x80 = BitOperations.PopCount(Vector256.LessThan(v, v_0x80).ExtractMostSignificantBits());
|
||||
var c_lt_0x800 = BitOperations.PopCount(Vector256.LessThan(v, v_0x800).ExtractMostSignificantBits());
|
||||
var c_lt_0x80 = BitOperations.PopCount(Vector256.LessThan(v, v_0x80).ExtractMostSignificantBits());
|
||||
var c_lt_0x800 = BitOperations.PopCount(Vector256.LessThan(v, v_0x800).ExtractMostSignificantBits());
|
||||
var c_lt_0xD800 = BitOperations.PopCount(Vector256.LessThan(v, v_0xD800).ExtractMostSignificantBits());
|
||||
var c_lt_0xDC00 = BitOperations.PopCount(Vector256.LessThan(v, v_0xDC00).ExtractMostSignificantBits());
|
||||
var c_lt_0xE000 = BitOperations.PopCount(Vector256.LessThan(v, v_0xE000).ExtractMostSignificantBits());
|
||||
|
|
@ -307,8 +307,8 @@ internal static class Utf8Transcoder
|
|||
{
|
||||
var v = Vector128.LoadUnsafe(ref srcRef, (uint)i);
|
||||
|
||||
var c_lt_0x80 = BitOperations.PopCount(Vector128.LessThan(v, v_0x80).ExtractMostSignificantBits());
|
||||
var c_lt_0x800 = BitOperations.PopCount(Vector128.LessThan(v, v_0x800).ExtractMostSignificantBits());
|
||||
var c_lt_0x80 = BitOperations.PopCount(Vector128.LessThan(v, v_0x80).ExtractMostSignificantBits());
|
||||
var c_lt_0x800 = BitOperations.PopCount(Vector128.LessThan(v, v_0x800).ExtractMostSignificantBits());
|
||||
var c_lt_0xD800 = BitOperations.PopCount(Vector128.LessThan(v, v_0xD800).ExtractMostSignificantBits());
|
||||
var c_lt_0xDC00 = BitOperations.PopCount(Vector128.LessThan(v, v_0xDC00).ExtractMostSignificantBits());
|
||||
var c_lt_0xE000 = BitOperations.PopCount(Vector128.LessThan(v, v_0xE000).ExtractMostSignificantBits());
|
||||
|
|
@ -535,7 +535,7 @@ internal static class Utf8Transcoder
|
|||
var dword = Unsafe.ReadUnaligned<uint>(ref Unsafe.Add(ref srcRef, srcIdx));
|
||||
if ((dword & 0x80808080u) == 0)
|
||||
{
|
||||
Unsafe.Add(ref dstRef, dstIdx) = (byte)dword;
|
||||
Unsafe.Add(ref dstRef, dstIdx) = (byte)dword;
|
||||
Unsafe.Add(ref dstRef, dstIdx + 1) = (byte)(dword >> 8);
|
||||
Unsafe.Add(ref dstRef, dstIdx + 2) = (byte)(dword >> 16);
|
||||
Unsafe.Add(ref dstRef, dstIdx + 3) = (byte)(dword >> 24);
|
||||
|
|
@ -560,40 +560,40 @@ internal static class Utf8Transcoder
|
|||
srcIdx += 1;
|
||||
break;
|
||||
case < 0xE0:
|
||||
{
|
||||
// 2-byte: 110xxxxx 10xxxxxx → U+0080–U+07FF
|
||||
// Latin extended, Cyrillic, Greek, Hebrew, Arabic.
|
||||
var b1 = Unsafe.Add(ref srcRef, srcIdx + 1);
|
||||
Unsafe.Add(ref dstRef, dstIdx++) = (ushort)(((b0 & 0x1F) << 6) | (b1 & 0x3F));
|
||||
srcIdx += 2;
|
||||
break;
|
||||
}
|
||||
{
|
||||
// 2-byte: 110xxxxx 10xxxxxx → U+0080–U+07FF
|
||||
// Latin extended, Cyrillic, Greek, Hebrew, Arabic.
|
||||
var b1 = Unsafe.Add(ref srcRef, srcIdx + 1);
|
||||
Unsafe.Add(ref dstRef, dstIdx++) = (ushort)(((b0 & 0x1F) << 6) | (b1 & 0x3F));
|
||||
srcIdx += 2;
|
||||
break;
|
||||
}
|
||||
case < 0xF0:
|
||||
{
|
||||
// 3-byte: 1110xxxx 10xxxxxx 10xxxxxx → U+0800–U+FFFF
|
||||
// CJK BMP, various other scripts.
|
||||
var b1 = Unsafe.Add(ref srcRef, srcIdx + 1);
|
||||
var b2 = Unsafe.Add(ref srcRef, srcIdx + 2);
|
||||
Unsafe.Add(ref dstRef, dstIdx++) = (ushort)(((b0 & 0x0F) << 12) | ((b1 & 0x3F) << 6) | (b2 & 0x3F));
|
||||
srcIdx += 3;
|
||||
break;
|
||||
}
|
||||
{
|
||||
// 3-byte: 1110xxxx 10xxxxxx 10xxxxxx → U+0800–U+FFFF
|
||||
// CJK BMP, various other scripts.
|
||||
var b1 = Unsafe.Add(ref srcRef, srcIdx + 1);
|
||||
var b2 = Unsafe.Add(ref srcRef, srcIdx + 2);
|
||||
Unsafe.Add(ref dstRef, dstIdx++) = (ushort)(((b0 & 0x0F) << 12) | ((b1 & 0x3F) << 6) | (b2 & 0x3F));
|
||||
srcIdx += 3;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
// 4-byte: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx → U+10000–U+10FFFF
|
||||
// Supplementary plane (emoji, rare CJK ext) → UTF-16 surrogate pair.
|
||||
// No do-while: 4-byte sequences are typically isolated (single emoji in mixed text).
|
||||
var b1 = Unsafe.Add(ref srcRef, srcIdx + 1);
|
||||
var b2 = Unsafe.Add(ref srcRef, srcIdx + 2);
|
||||
var b3 = Unsafe.Add(ref srcRef, srcIdx + 3);
|
||||
var codepoint = ((b0 & 0x07) << 18) | ((b1 & 0x3F) << 12) | ((b2 & 0x3F) << 6) | (b3 & 0x3F);
|
||||
codepoint -= 0x10000;
|
||||
Unsafe.Add(ref dstRef, dstIdx) = (ushort)(0xD800 | (codepoint >> 10));
|
||||
Unsafe.Add(ref dstRef, dstIdx + 1) = (ushort)(0xDC00 | (codepoint & 0x3FF));
|
||||
dstIdx += 2;
|
||||
srcIdx += 4;
|
||||
break;
|
||||
}
|
||||
{
|
||||
// 4-byte: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx → U+10000–U+10FFFF
|
||||
// Supplementary plane (emoji, rare CJK ext) → UTF-16 surrogate pair.
|
||||
// No do-while: 4-byte sequences are typically isolated (single emoji in mixed text).
|
||||
var b1 = Unsafe.Add(ref srcRef, srcIdx + 1);
|
||||
var b2 = Unsafe.Add(ref srcRef, srcIdx + 2);
|
||||
var b3 = Unsafe.Add(ref srcRef, srcIdx + 3);
|
||||
var codepoint = ((b0 & 0x07) << 18) | ((b1 & 0x3F) << 12) | ((b2 & 0x3F) << 6) | (b3 & 0x3F);
|
||||
codepoint -= 0x10000;
|
||||
Unsafe.Add(ref dstRef, dstIdx) = (ushort)(0xD800 | (codepoint >> 10));
|
||||
Unsafe.Add(ref dstRef, dstIdx + 1) = (ushort)(0xDC00 | (codepoint & 0x3FF));
|
||||
dstIdx += 2;
|
||||
srcIdx += 4;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -99,15 +99,12 @@ public abstract class PropertyMetadataBase
|
|||
[DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicProperties)] Type declaringType)
|
||||
{
|
||||
Name = prop.Name;
|
||||
// Ctor-once init: SIMD path via Utf8Transcoder (GetUtf8ByteCount + EncodeUtf8SinglePass)
|
||||
// bypasses Encoding.UTF8 virtual-dispatch + encoder-fallback overhead. Ascii.FromUtf16
|
||||
// would be slightly faster for the (overwhelmingly common) ASCII property name case, but
|
||||
// the symmetric Utf8Transcoder API keeps this consistent with the binary serializer's
|
||||
// writer-side BCL-free policy and handles non-ASCII property names without a fallback.
|
||||
var nameByteCount = Utf8Transcoder.GetUtf8ByteCount(prop.Name.AsSpan());
|
||||
var nameBytes = new byte[nameByteCount];
|
||||
Utf8Transcoder.EncodeUtf8SinglePass(prop.Name.AsSpan(), nameBytes);
|
||||
NameUtf8 = nameBytes;
|
||||
// Single-pass UTF-8 encode via the string-overload — encode + exact-size byte[] allocation in
|
||||
// one BCL call. Faster than the two-pass (GetByteCount + Utf8.FromUtf16) AND faster than
|
||||
// worst-case-buffer + Utf8.FromUtf16 + trim/copy patterns: no ArrayPool rent overhead, no
|
||||
// extra copy, the BCL's internal encoder uses the known string length to size the output array
|
||||
// exactly without a separate counting pass.
|
||||
NameUtf8 = System.Text.Encoding.UTF8.GetBytes(prop.Name);
|
||||
DeclaringType = declaringType;
|
||||
PropertyType = prop.PropertyType;
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue