diff --git a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.Read.cs b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.Read.cs index 7778394..d741c7f 100644 --- a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.Read.cs +++ b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.Read.cs @@ -520,11 +520,11 @@ public static partial class AcBinaryDeserializer var pos = _position; _position += byteLength; var src = _buffer.AsSpan(pos, byteLength); - var charCount = Utf8Transcoder.CountUtf8Chars(src); + var charCount = Encoding.UTF8.GetCharCount(src); return string.Create(charCount, (Buffer: _buffer, Pos: pos, Len: byteLength), static (chars, state) => { - Utf8Transcoder.DecodeUtf8SinglePass(state.Buffer.AsSpan(state.Pos, state.Len), chars); + System.Text.Unicode.Utf8.ToUtf16(state.Buffer.AsSpan(state.Pos, state.Len), chars, out _, out _, replaceInvalidSequences: false); }); } @@ -562,7 +562,7 @@ public static partial class AcBinaryDeserializer return string.Create(charLength, (Buffer: _buffer, Pos: pos, Len: byteLength), static (chars, state) => { - Utf8Transcoder.DecodeUtf8SinglePass(state.Buffer.AsSpan(state.Pos, state.Len), chars); + System.Text.Unicode.Utf8.ToUtf16(state.Buffer.AsSpan(state.Pos, state.Len), chars, out _, out _, replaceInvalidSequences: false); }); } diff --git a/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs b/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs index 18bc650..034a406 100644 --- a/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs +++ b/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs @@ -691,7 +691,7 @@ public static partial class AcBinarySerializer var savedPos = _position; var encodeStart = savedPos + reserveSize; - var bytesWritten = Utf8Transcoder.EncodeUtf8SinglePass(value.AsSpan(), _buffer.AsSpan(encodeStart, maxBytes)); + System.Text.Unicode.Utf8.FromUtf16(value.AsSpan(), _buffer.AsSpan(encodeStart, maxBytes), out _, out var bytesWritten, replaceInvalidSequences: false); var actualVarUIntSize = VarUIntSize((uint)bytesWritten); if (actualVarUIntSize < reserveSize) @@ -781,16 +781,14 @@ public static partial class AcBinarySerializer var savedPos = _position; var encodeStart = savedPos + reserveHeader; - var bytesWritten = Utf8Transcoder.EncodeUtf8SinglePass(value.AsSpan(), _buffer.AsSpan(encodeStart, maxBytes)); + System.Text.Unicode.Utf8.FromUtf16(value.AsSpan(), _buffer.AsSpan(encodeStart, maxBytes), out _, out var bytesWritten, replaceInvalidSequences: false); if (bytesWritten == charLength) { // ASCII override — FixStrAscii (≤31) or StringAscii (>31) with compact header if (bytesWritten <= BinaryTypeCode.FixStrAsciiMaxLength) { - var shift = reserveHeader - 1; - if (shift > 0) - _buffer.AsSpan(encodeStart, bytesWritten).CopyTo(_buffer.AsSpan(savedPos + 1, bytesWritten)); + _buffer.AsSpan(encodeStart, bytesWritten).CopyTo(_buffer.AsSpan(savedPos + 1, bytesWritten)); _buffer[savedPos] = BinaryTypeCode.EncodeFixStrAscii(bytesWritten); _position = savedPos + 1 + bytesWritten; } @@ -898,7 +896,7 @@ public static partial class AcBinarySerializer var savedPos = _position; var encodeStart = savedPos + cacheIdxSize + reserveHeader; - var bytesWritten = Utf8Transcoder.EncodeUtf8SinglePass(value.AsSpan(), _buffer.AsSpan(encodeStart, maxBytes)); + System.Text.Unicode.Utf8.FromUtf16(value.AsSpan(), _buffer.AsSpan(encodeStart, maxBytes), out _, out var bytesWritten, replaceInvalidSequences: false); // Choose tier from actual bytesWritten (smallest fits) var actualHeader = bytesWritten <= 255 ? 3 : 5; diff --git a/AyCode.Core/Serializers/Binaries/AcBinarySerializer.cs b/AyCode.Core/Serializers/Binaries/AcBinarySerializer.cs index ad3dfab..c99a48d 100644 --- a/AyCode.Core/Serializers/Binaries/AcBinarySerializer.cs +++ b/AyCode.Core/Serializers/Binaries/AcBinarySerializer.cs @@ -117,7 +117,7 @@ public static partial class AcBinarySerializer foreach (var (stringValue, properties) in analysis) { - var byteLength = Utf8Transcoder.GetUtf8ByteCount(stringValue.AsSpan()); + var byteLength = System.Text.Encoding.UTF8.GetByteCount(stringValue.AsSpan()); foreach (var (propPath, count) in properties) { if (!propertyStats.TryGetValue(propPath, out var list)) diff --git a/AyCode.Core/Serializers/Binaries/Utf8Transcoder.cs b/AyCode.Core/Serializers/Binaries/Utf8Transcoder.cs index 8fb9817..4a46534 100644 --- a/AyCode.Core/Serializers/Binaries/Utf8Transcoder.cs +++ b/AyCode.Core/Serializers/Binaries/Utf8Transcoder.cs @@ -132,7 +132,7 @@ internal static class Utf8Transcoder var c3 = Unsafe.Add(ref srcRefChar, srcIdx + 3); if (((c0 | c1 | c2 | c3) & 0xFF80) == 0) { - Unsafe.Add(ref dstRef, dstIdx) = (byte)c0; + Unsafe.Add(ref dstRef, dstIdx) = (byte)c0; Unsafe.Add(ref dstRef, dstIdx + 1) = (byte)c1; Unsafe.Add(ref dstRef, dstIdx + 2) = (byte)c2; Unsafe.Add(ref dstRef, dstIdx + 3) = (byte)c3; @@ -155,7 +155,7 @@ internal static class Utf8Transcoder // 2-byte: 110xxxxx 10xxxxxx → U+0080–U+07FF // Latin extended (Hungarian, Polish, Czech, Spanish, French, German diacritics), // Greek, Cyrillic, Hebrew, Arabic. - Unsafe.Add(ref dstRef, dstIdx) = (byte)(0xC0 | (c >> 6)); + Unsafe.Add(ref dstRef, dstIdx) = (byte)(0xC0 | (c >> 6)); Unsafe.Add(ref dstRef, dstIdx + 1) = (byte)(0x80 | (c & 0x3F)); dstIdx += 2; srcIdx += 1; @@ -164,7 +164,7 @@ internal static class Utf8Transcoder { // 3-byte BMP: 1110xxxx 10xxxxxx 10xxxxxx → U+0800–U+FFFF (excluding surrogate range) // CJK BMP, various other BMP scripts. - Unsafe.Add(ref dstRef, dstIdx) = (byte)(0xE0 | (c >> 12)); + Unsafe.Add(ref dstRef, dstIdx) = (byte)(0xE0 | (c >> 12)); Unsafe.Add(ref dstRef, dstIdx + 1) = (byte)(0x80 | ((c >> 6) & 0x3F)); Unsafe.Add(ref dstRef, dstIdx + 2) = (byte)(0x80 | (c & 0x3F)); dstIdx += 3; @@ -176,7 +176,7 @@ internal static class Utf8Transcoder // High surrogate (0xD800–0xDBFF) followed by low surrogate (0xDC00–0xDFFF). var c2 = Unsafe.Add(ref srcRefChar, srcIdx + 1); var codepoint = 0x10000 + ((c - 0xD800) << 10) + (c2 - 0xDC00); - Unsafe.Add(ref dstRef, dstIdx) = (byte)(0xF0 | (codepoint >> 18)); + Unsafe.Add(ref dstRef, dstIdx) = (byte)(0xF0 | (codepoint >> 18)); Unsafe.Add(ref dstRef, dstIdx + 1) = (byte)(0x80 | ((codepoint >> 12) & 0x3F)); Unsafe.Add(ref dstRef, dstIdx + 2) = (byte)(0x80 | ((codepoint >> 6) & 0x3F)); Unsafe.Add(ref dstRef, dstIdx + 3) = (byte)(0x80 | (codepoint & 0x3F)); @@ -252,8 +252,8 @@ internal static class Utf8Transcoder { var v = Vector512.LoadUnsafe(ref srcRef, (uint)i); - var c_lt_0x80 = BitOperations.PopCount(Vector512.LessThan(v, v_0x80).ExtractMostSignificantBits()); - var c_lt_0x800 = BitOperations.PopCount(Vector512.LessThan(v, v_0x800).ExtractMostSignificantBits()); + var c_lt_0x80 = BitOperations.PopCount(Vector512.LessThan(v, v_0x80).ExtractMostSignificantBits()); + var c_lt_0x800 = BitOperations.PopCount(Vector512.LessThan(v, v_0x800).ExtractMostSignificantBits()); var c_lt_0xD800 = BitOperations.PopCount(Vector512.LessThan(v, v_0xD800).ExtractMostSignificantBits()); var c_lt_0xDC00 = BitOperations.PopCount(Vector512.LessThan(v, v_0xDC00).ExtractMostSignificantBits()); var c_lt_0xE000 = BitOperations.PopCount(Vector512.LessThan(v, v_0xE000).ExtractMostSignificantBits()); @@ -279,8 +279,8 @@ internal static class Utf8Transcoder { var v = Vector256.LoadUnsafe(ref srcRef, (uint)i); - var c_lt_0x80 = BitOperations.PopCount(Vector256.LessThan(v, v_0x80).ExtractMostSignificantBits()); - var c_lt_0x800 = BitOperations.PopCount(Vector256.LessThan(v, v_0x800).ExtractMostSignificantBits()); + var c_lt_0x80 = BitOperations.PopCount(Vector256.LessThan(v, v_0x80).ExtractMostSignificantBits()); + var c_lt_0x800 = BitOperations.PopCount(Vector256.LessThan(v, v_0x800).ExtractMostSignificantBits()); var c_lt_0xD800 = BitOperations.PopCount(Vector256.LessThan(v, v_0xD800).ExtractMostSignificantBits()); var c_lt_0xDC00 = BitOperations.PopCount(Vector256.LessThan(v, v_0xDC00).ExtractMostSignificantBits()); var c_lt_0xE000 = BitOperations.PopCount(Vector256.LessThan(v, v_0xE000).ExtractMostSignificantBits()); @@ -307,8 +307,8 @@ internal static class Utf8Transcoder { var v = Vector128.LoadUnsafe(ref srcRef, (uint)i); - var c_lt_0x80 = BitOperations.PopCount(Vector128.LessThan(v, v_0x80).ExtractMostSignificantBits()); - var c_lt_0x800 = BitOperations.PopCount(Vector128.LessThan(v, v_0x800).ExtractMostSignificantBits()); + var c_lt_0x80 = BitOperations.PopCount(Vector128.LessThan(v, v_0x80).ExtractMostSignificantBits()); + var c_lt_0x800 = BitOperations.PopCount(Vector128.LessThan(v, v_0x800).ExtractMostSignificantBits()); var c_lt_0xD800 = BitOperations.PopCount(Vector128.LessThan(v, v_0xD800).ExtractMostSignificantBits()); var c_lt_0xDC00 = BitOperations.PopCount(Vector128.LessThan(v, v_0xDC00).ExtractMostSignificantBits()); var c_lt_0xE000 = BitOperations.PopCount(Vector128.LessThan(v, v_0xE000).ExtractMostSignificantBits()); @@ -535,7 +535,7 @@ internal static class Utf8Transcoder var dword = Unsafe.ReadUnaligned(ref Unsafe.Add(ref srcRef, srcIdx)); if ((dword & 0x80808080u) == 0) { - Unsafe.Add(ref dstRef, dstIdx) = (byte)dword; + Unsafe.Add(ref dstRef, dstIdx) = (byte)dword; Unsafe.Add(ref dstRef, dstIdx + 1) = (byte)(dword >> 8); Unsafe.Add(ref dstRef, dstIdx + 2) = (byte)(dword >> 16); Unsafe.Add(ref dstRef, dstIdx + 3) = (byte)(dword >> 24); @@ -560,40 +560,40 @@ internal static class Utf8Transcoder srcIdx += 1; break; case < 0xE0: - { - // 2-byte: 110xxxxx 10xxxxxx → U+0080–U+07FF - // Latin extended, Cyrillic, Greek, Hebrew, Arabic. - var b1 = Unsafe.Add(ref srcRef, srcIdx + 1); - Unsafe.Add(ref dstRef, dstIdx++) = (ushort)(((b0 & 0x1F) << 6) | (b1 & 0x3F)); - srcIdx += 2; - break; - } + { + // 2-byte: 110xxxxx 10xxxxxx → U+0080–U+07FF + // Latin extended, Cyrillic, Greek, Hebrew, Arabic. + var b1 = Unsafe.Add(ref srcRef, srcIdx + 1); + Unsafe.Add(ref dstRef, dstIdx++) = (ushort)(((b0 & 0x1F) << 6) | (b1 & 0x3F)); + srcIdx += 2; + break; + } case < 0xF0: - { - // 3-byte: 1110xxxx 10xxxxxx 10xxxxxx → U+0800–U+FFFF - // CJK BMP, various other scripts. - var b1 = Unsafe.Add(ref srcRef, srcIdx + 1); - var b2 = Unsafe.Add(ref srcRef, srcIdx + 2); - Unsafe.Add(ref dstRef, dstIdx++) = (ushort)(((b0 & 0x0F) << 12) | ((b1 & 0x3F) << 6) | (b2 & 0x3F)); - srcIdx += 3; - break; - } + { + // 3-byte: 1110xxxx 10xxxxxx 10xxxxxx → U+0800–U+FFFF + // CJK BMP, various other scripts. + var b1 = Unsafe.Add(ref srcRef, srcIdx + 1); + var b2 = Unsafe.Add(ref srcRef, srcIdx + 2); + Unsafe.Add(ref dstRef, dstIdx++) = (ushort)(((b0 & 0x0F) << 12) | ((b1 & 0x3F) << 6) | (b2 & 0x3F)); + srcIdx += 3; + break; + } default: - { - // 4-byte: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx → U+10000–U+10FFFF - // Supplementary plane (emoji, rare CJK ext) → UTF-16 surrogate pair. - // No do-while: 4-byte sequences are typically isolated (single emoji in mixed text). - var b1 = Unsafe.Add(ref srcRef, srcIdx + 1); - var b2 = Unsafe.Add(ref srcRef, srcIdx + 2); - var b3 = Unsafe.Add(ref srcRef, srcIdx + 3); - var codepoint = ((b0 & 0x07) << 18) | ((b1 & 0x3F) << 12) | ((b2 & 0x3F) << 6) | (b3 & 0x3F); - codepoint -= 0x10000; - Unsafe.Add(ref dstRef, dstIdx) = (ushort)(0xD800 | (codepoint >> 10)); - Unsafe.Add(ref dstRef, dstIdx + 1) = (ushort)(0xDC00 | (codepoint & 0x3FF)); - dstIdx += 2; - srcIdx += 4; - break; - } + { + // 4-byte: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx → U+10000–U+10FFFF + // Supplementary plane (emoji, rare CJK ext) → UTF-16 surrogate pair. + // No do-while: 4-byte sequences are typically isolated (single emoji in mixed text). + var b1 = Unsafe.Add(ref srcRef, srcIdx + 1); + var b2 = Unsafe.Add(ref srcRef, srcIdx + 2); + var b3 = Unsafe.Add(ref srcRef, srcIdx + 3); + var codepoint = ((b0 & 0x07) << 18) | ((b1 & 0x3F) << 12) | ((b2 & 0x3F) << 6) | (b3 & 0x3F); + codepoint -= 0x10000; + Unsafe.Add(ref dstRef, dstIdx) = (ushort)(0xD800 | (codepoint >> 10)); + Unsafe.Add(ref dstRef, dstIdx + 1) = (ushort)(0xDC00 | (codepoint & 0x3FF)); + dstIdx += 2; + srcIdx += 4; + break; + } } } diff --git a/AyCode.Core/Serializers/PropertyMetadataBase.cs b/AyCode.Core/Serializers/PropertyMetadataBase.cs index dd8370b..0396107 100644 --- a/AyCode.Core/Serializers/PropertyMetadataBase.cs +++ b/AyCode.Core/Serializers/PropertyMetadataBase.cs @@ -99,15 +99,12 @@ public abstract class PropertyMetadataBase [DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicProperties)] Type declaringType) { Name = prop.Name; - // Ctor-once init: SIMD path via Utf8Transcoder (GetUtf8ByteCount + EncodeUtf8SinglePass) - // bypasses Encoding.UTF8 virtual-dispatch + encoder-fallback overhead. Ascii.FromUtf16 - // would be slightly faster for the (overwhelmingly common) ASCII property name case, but - // the symmetric Utf8Transcoder API keeps this consistent with the binary serializer's - // writer-side BCL-free policy and handles non-ASCII property names without a fallback. - var nameByteCount = Utf8Transcoder.GetUtf8ByteCount(prop.Name.AsSpan()); - var nameBytes = new byte[nameByteCount]; - Utf8Transcoder.EncodeUtf8SinglePass(prop.Name.AsSpan(), nameBytes); - NameUtf8 = nameBytes; + // Single-pass UTF-8 encode via the string-overload — encode + exact-size byte[] allocation in + // one BCL call. Faster than the two-pass (GetByteCount + Utf8.FromUtf16) AND faster than + // worst-case-buffer + Utf8.FromUtf16 + trim/copy patterns: no ArrayPool rent overhead, no + // extra copy, the BCL's internal encoder uses the known string length to size the output array + // exactly without a separate counting pass. + NameUtf8 = System.Text.Encoding.UTF8.GetBytes(prop.Name); DeclaringType = declaringType; PropertyType = prop.PropertyType;