[LOADED_DOCS: 3 files, no new loads]

Switch to BCL UTF-8 APIs for string (de)serialization

Replaced custom Utf8Transcoder logic with System.Text.Encoding.UTF8 and System.Text.Unicode.Utf8 for string encoding/decoding in AcBinarySerializer and AcBinaryDeserializer. PropertyMetadataBase now uses Encoding.UTF8.GetBytes for property name encoding. Retained Utf8Transcoder for any remaining SIMD/custom logic. No public API changes; internal refactoring for performance and maintainability.
This commit is contained in:
Loretta 2026-05-07 23:54:57 +02:00
parent 8eaae4dda3
commit 1d256ea386
5 changed files with 57 additions and 62 deletions

View File

@ -520,11 +520,11 @@ public static partial class AcBinaryDeserializer
var pos = _position;
_position += byteLength;
var src = _buffer.AsSpan(pos, byteLength);
var charCount = Utf8Transcoder.CountUtf8Chars(src);
var charCount = Encoding.UTF8.GetCharCount(src);
return string.Create(charCount, (Buffer: _buffer, Pos: pos, Len: byteLength), static (chars, state) =>
{
Utf8Transcoder.DecodeUtf8SinglePass(state.Buffer.AsSpan(state.Pos, state.Len), chars);
System.Text.Unicode.Utf8.ToUtf16(state.Buffer.AsSpan(state.Pos, state.Len), chars, out _, out _, replaceInvalidSequences: false);
});
}
@ -562,7 +562,7 @@ public static partial class AcBinaryDeserializer
return string.Create(charLength, (Buffer: _buffer, Pos: pos, Len: byteLength), static (chars, state) =>
{
Utf8Transcoder.DecodeUtf8SinglePass(state.Buffer.AsSpan(state.Pos, state.Len), chars);
System.Text.Unicode.Utf8.ToUtf16(state.Buffer.AsSpan(state.Pos, state.Len), chars, out _, out _, replaceInvalidSequences: false);
});
}

View File

@ -691,7 +691,7 @@ public static partial class AcBinarySerializer
var savedPos = _position;
var encodeStart = savedPos + reserveSize;
var bytesWritten = Utf8Transcoder.EncodeUtf8SinglePass(value.AsSpan(), _buffer.AsSpan(encodeStart, maxBytes));
System.Text.Unicode.Utf8.FromUtf16(value.AsSpan(), _buffer.AsSpan(encodeStart, maxBytes), out _, out var bytesWritten, replaceInvalidSequences: false);
var actualVarUIntSize = VarUIntSize((uint)bytesWritten);
if (actualVarUIntSize < reserveSize)
@ -781,15 +781,13 @@ public static partial class AcBinarySerializer
var savedPos = _position;
var encodeStart = savedPos + reserveHeader;
var bytesWritten = Utf8Transcoder.EncodeUtf8SinglePass(value.AsSpan(), _buffer.AsSpan(encodeStart, maxBytes));
System.Text.Unicode.Utf8.FromUtf16(value.AsSpan(), _buffer.AsSpan(encodeStart, maxBytes), out _, out var bytesWritten, replaceInvalidSequences: false);
if (bytesWritten == charLength)
{
// ASCII override — FixStrAscii (≤31) or StringAscii (>31) with compact header
if (bytesWritten <= BinaryTypeCode.FixStrAsciiMaxLength)
{
var shift = reserveHeader - 1;
if (shift > 0)
_buffer.AsSpan(encodeStart, bytesWritten).CopyTo(_buffer.AsSpan(savedPos + 1, bytesWritten));
_buffer[savedPos] = BinaryTypeCode.EncodeFixStrAscii(bytesWritten);
_position = savedPos + 1 + bytesWritten;
@ -898,7 +896,7 @@ public static partial class AcBinarySerializer
var savedPos = _position;
var encodeStart = savedPos + cacheIdxSize + reserveHeader;
var bytesWritten = Utf8Transcoder.EncodeUtf8SinglePass(value.AsSpan(), _buffer.AsSpan(encodeStart, maxBytes));
System.Text.Unicode.Utf8.FromUtf16(value.AsSpan(), _buffer.AsSpan(encodeStart, maxBytes), out _, out var bytesWritten, replaceInvalidSequences: false);
// Choose tier from actual bytesWritten (smallest fits)
var actualHeader = bytesWritten <= 255 ? 3 : 5;

View File

@ -117,7 +117,7 @@ public static partial class AcBinarySerializer
foreach (var (stringValue, properties) in analysis)
{
var byteLength = Utf8Transcoder.GetUtf8ByteCount(stringValue.AsSpan());
var byteLength = System.Text.Encoding.UTF8.GetByteCount(stringValue.AsSpan());
foreach (var (propPath, count) in properties)
{
if (!propertyStats.TryGetValue(propPath, out var list))

View File

@ -99,15 +99,12 @@ public abstract class PropertyMetadataBase
[DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicProperties)] Type declaringType)
{
Name = prop.Name;
// Ctor-once init: SIMD path via Utf8Transcoder (GetUtf8ByteCount + EncodeUtf8SinglePass)
// bypasses Encoding.UTF8 virtual-dispatch + encoder-fallback overhead. Ascii.FromUtf16
// would be slightly faster for the (overwhelmingly common) ASCII property name case, but
// the symmetric Utf8Transcoder API keeps this consistent with the binary serializer's
// writer-side BCL-free policy and handles non-ASCII property names without a fallback.
var nameByteCount = Utf8Transcoder.GetUtf8ByteCount(prop.Name.AsSpan());
var nameBytes = new byte[nameByteCount];
Utf8Transcoder.EncodeUtf8SinglePass(prop.Name.AsSpan(), nameBytes);
NameUtf8 = nameBytes;
// Single-pass UTF-8 encode via the string-overload — encode + exact-size byte[] allocation in
// one BCL call. Faster than the two-pass (GetByteCount + Utf8.FromUtf16) AND faster than
// worst-case-buffer + Utf8.FromUtf16 + trim/copy patterns: no ArrayPool rent overhead, no
// extra copy, the BCL's internal encoder uses the known string length to size the output array
// exactly without a separate counting pass.
NameUtf8 = System.Text.Encoding.UTF8.GetBytes(prop.Name);
DeclaringType = declaringType;
PropertyType = prop.PropertyType;