diff --git a/AyCode.Benchmark/BdnSummaryAdapter.cs b/AyCode.Benchmark/BdnSummaryAdapter.cs index 5bf8c7e..9b21f28 100644 --- a/AyCode.Benchmark/BdnSummaryAdapter.cs +++ b/AyCode.Benchmark/BdnSummaryAdapter.cs @@ -90,17 +90,18 @@ public static class BdnSummaryAdapter var s = BenchmarkTestDataProvider.LongStringSuffix; return s switch { - CharsetSuffixes.Latin1FixAscii => "Latin1FixAscii", - CharsetSuffixes.AsciiShort => "AsciiShort", - CharsetSuffixes.AsciiLong => "AsciiLong", - CharsetSuffixes.Latin1Short => "Latin1Short", - CharsetSuffixes.Latin1Long => "Latin1Long", - CharsetSuffixes.CjkBmpShort => "CjkBmpShort", - CharsetSuffixes.CjkBmpLong => "CjkBmpLong", - CharsetSuffixes.CyrillicShort => "CyrillicShort", - CharsetSuffixes.CyrillicLong => "CyrillicLong", - CharsetSuffixes.MixedShort => "MixedShort", - CharsetSuffixes.MixedLong => "MixedLong", + CharsetSuffixes.AsciiFix => nameof(CharsetSuffixes.AsciiFix), + CharsetSuffixes.AsciiShort => nameof(CharsetSuffixes.AsciiShort), + CharsetSuffixes.AsciiLong => nameof(CharsetSuffixes.AsciiLong), + CharsetSuffixes.Latin1Fix => nameof(CharsetSuffixes.Latin1Fix), + CharsetSuffixes.Latin1Short => nameof(CharsetSuffixes.Latin1Short), + CharsetSuffixes.Latin1Long => nameof(CharsetSuffixes.Latin1Long), + CharsetSuffixes.CjkBmpShort => nameof(CharsetSuffixes.CjkBmpShort), + CharsetSuffixes.CjkBmpLong => nameof(CharsetSuffixes.CjkBmpLong), + CharsetSuffixes.CyrillicShort => nameof(CharsetSuffixes.CyrillicShort), + CharsetSuffixes.CyrillicLong => nameof(CharsetSuffixes.CyrillicLong), + CharsetSuffixes.MixedShort => nameof(CharsetSuffixes.MixedShort), + CharsetSuffixes.MixedLong => nameof(CharsetSuffixes.MixedLong), _ => "Custom" }; } diff --git a/AyCode.Core.Serializers.Console/Configuration.cs b/AyCode.Core.Serializers.Console/Configuration.cs index c84b930..0cf0401 100644 --- a/AyCode.Core.Serializers.Console/Configuration.cs +++ b/AyCode.Core.Serializers.Console/Configuration.cs @@ -86,17 +86,18 @@ internal static class Configuration return s switch { - CharsetSuffixes.Latin1FixAscii => "Latin1FixAscii", - CharsetSuffixes.AsciiShort => "AsciiShort", - CharsetSuffixes.AsciiLong => "AsciiLong", - CharsetSuffixes.Latin1Short => "Latin1Short", - CharsetSuffixes.Latin1Long => "Latin1Long", - CharsetSuffixes.CjkBmpShort => "CjkBmpShort", - CharsetSuffixes.CjkBmpLong => "CjkBmpLong", - CharsetSuffixes.CyrillicShort => "CyrillicShort", - CharsetSuffixes.CyrillicLong => "CyrillicLong", - CharsetSuffixes.MixedShort => "MixedShort", - CharsetSuffixes.MixedLong => "MixedLong", + CharsetSuffixes.AsciiFix => nameof(CharsetSuffixes.AsciiFix), + CharsetSuffixes.AsciiShort => nameof(CharsetSuffixes.AsciiShort), + CharsetSuffixes.AsciiLong => nameof(CharsetSuffixes.AsciiLong), + CharsetSuffixes.Latin1Fix => nameof(CharsetSuffixes.Latin1Fix), + CharsetSuffixes.Latin1Short => nameof(CharsetSuffixes.Latin1Short), + CharsetSuffixes.Latin1Long => nameof(CharsetSuffixes.Latin1Long), + CharsetSuffixes.CjkBmpShort => nameof(CharsetSuffixes.CjkBmpShort), + CharsetSuffixes.CjkBmpLong => nameof(CharsetSuffixes.CjkBmpLong), + CharsetSuffixes.CyrillicShort => nameof(CharsetSuffixes.CyrillicShort), + CharsetSuffixes.CyrillicLong => nameof(CharsetSuffixes.CyrillicLong), + CharsetSuffixes.MixedShort => nameof(CharsetSuffixes.MixedShort), + CharsetSuffixes.MixedLong => nameof(CharsetSuffixes.MixedLong), _ => "Custom" }; } diff --git a/AyCode.Core.Serializers.Console/Menu.cs b/AyCode.Core.Serializers.Console/Menu.cs index 7dce829..5466f6d 100644 --- a/AyCode.Core.Serializers.Console/Menu.cs +++ b/AyCode.Core.Serializers.Console/Menu.cs @@ -109,17 +109,18 @@ internal static class Menu System.Console.WriteLine(); System.Console.WriteLine(" All *Short = 40 char, all *Long = 280 char (= Short × 7) — length-consistent across charsets."); System.Console.WriteLine(); - System.Console.WriteLine(" [1] Latin1FixAscii — empty suffix; baseline-only short values → FixStrAscii tier"); + System.Console.WriteLine(" [1] AsciiFix — empty suffix; baseline-only short values → FixStrAscii tier"); System.Console.WriteLine(" [2] AsciiShort — 40 char pure ASCII (quic × 8) → StringAscii tier"); System.Console.WriteLine(" [3] AsciiLong — 280 char pure ASCII → StringAscii tier"); - System.Console.WriteLine(" [4] Latin1Short — 40 char Hungarian (árví × 8) → StringSmall tier"); - System.Console.WriteLine(" [5] Latin1Long — 280 char Hungarian (default) → StringMedium tier"); - System.Console.WriteLine(" [6] CjkBmpShort — 40 char CJK BMP (3-byte runs) → StringSmall tier"); - System.Console.WriteLine(" [7] CjkBmpLong — 280 char CJK BMP → StringMedium tier"); - System.Console.WriteLine(" [8] CyrillicShort — 40 char Cyrillic (2-byte runs) → StringSmall tier"); - System.Console.WriteLine(" [9] CyrillicLong — 280 char Cyrillic → StringMedium tier"); - System.Console.WriteLine(" [0] MixedShort — 40 char multi-codepage → StringSmall tier"); - System.Console.WriteLine(" [A] MixedLong — 280 char multi-codepage → StringMedium tier"); + System.Console.WriteLine(" [4] Latin1Fix — 5 char Hungarian (árví) → FixStr-lean tier"); + System.Console.WriteLine(" [5] Latin1Short — 40 char Hungarian (árví × 8) → StringSmall tier"); + System.Console.WriteLine(" [6] Latin1Long — 280 char Hungarian (default) → StringMedium tier"); + System.Console.WriteLine(" [7] CjkBmpShort — 40 char CJK BMP (3-byte runs) → StringSmall tier"); + System.Console.WriteLine(" [8] CjkBmpLong — 280 char CJK BMP → StringMedium tier"); + System.Console.WriteLine(" [9] CyrillicShort — 40 char Cyrillic (2-byte runs) → StringSmall tier"); + System.Console.WriteLine(" [0] CyrillicLong — 280 char Cyrillic → StringMedium tier"); + System.Console.WriteLine(" [A] MixedShort — 40 char multi-codepage → StringSmall tier"); + System.Console.WriteLine(" [C] MixedLong — 280 char multi-codepage → StringMedium tier"); System.Console.WriteLine(" [B] Back"); System.Console.Write("\nSelection: "); @@ -129,8 +130,8 @@ internal static class Menu switch (char.ToLower(key)) { case '1': - BenchmarkTestDataProvider.LongStringSuffix = CharsetSuffixes.Latin1FixAscii; - System.Console.WriteLine("✓ Charset set to Latin1FixAscii"); + BenchmarkTestDataProvider.LongStringSuffix = CharsetSuffixes.AsciiFix; + System.Console.WriteLine("✓ Charset set to AsciiFix"); return; case '2': BenchmarkTestDataProvider.LongStringSuffix = CharsetSuffixes.AsciiShort; @@ -141,34 +142,38 @@ internal static class Menu System.Console.WriteLine("✓ Charset set to AsciiLong"); return; case '4': + BenchmarkTestDataProvider.LongStringSuffix = CharsetSuffixes.Latin1Fix; + System.Console.WriteLine("✓ Charset set to Latin1Fix"); + return; + case '5': BenchmarkTestDataProvider.LongStringSuffix = CharsetSuffixes.Latin1Short; System.Console.WriteLine("✓ Charset set to Latin1Short"); return; - case '5': + case '6': BenchmarkTestDataProvider.LongStringSuffix = CharsetSuffixes.Latin1Long; System.Console.WriteLine("✓ Charset set to Latin1Long"); return; - case '6': + case '7': BenchmarkTestDataProvider.LongStringSuffix = CharsetSuffixes.CjkBmpShort; System.Console.WriteLine("✓ Charset set to CjkBmpShort"); return; - case '7': + case '8': BenchmarkTestDataProvider.LongStringSuffix = CharsetSuffixes.CjkBmpLong; System.Console.WriteLine("✓ Charset set to CjkBmpLong"); return; - case '8': + case '9': BenchmarkTestDataProvider.LongStringSuffix = CharsetSuffixes.CyrillicShort; System.Console.WriteLine("✓ Charset set to CyrillicShort"); return; - case '9': + case '0': BenchmarkTestDataProvider.LongStringSuffix = CharsetSuffixes.CyrillicLong; System.Console.WriteLine("✓ Charset set to CyrillicLong"); return; - case '0': + case 'a': BenchmarkTestDataProvider.LongStringSuffix = CharsetSuffixes.MixedShort; System.Console.WriteLine("✓ Charset set to MixedShort"); return; - case 'a': + case 'c': BenchmarkTestDataProvider.LongStringSuffix = CharsetSuffixes.MixedLong; System.Console.WriteLine("✓ Charset set to MixedLong"); return; diff --git a/AyCode.Core.Serializers.Console/Program.cs b/AyCode.Core.Serializers.Console/Program.cs index 652237e..30e8a5d 100644 --- a/AyCode.Core.Serializers.Console/Program.cs +++ b/AyCode.Core.Serializers.Console/Program.cs @@ -149,9 +149,10 @@ public static class Program { string? suffix = arg.ToLowerInvariant() switch { - "latin1fixascii" => CharsetSuffixes.Latin1FixAscii, + "asciifix" => CharsetSuffixes.AsciiFix, "asciishort" => CharsetSuffixes.AsciiShort, "asciilong" => CharsetSuffixes.AsciiLong, + "latin1fix" => CharsetSuffixes.Latin1Fix, "latin1short" => CharsetSuffixes.Latin1Short, "latin1long" => CharsetSuffixes.Latin1Long, "cjkbmpshort" => CharsetSuffixes.CjkBmpShort, diff --git a/AyCode.Core.Serializers.SourceGenerator/AcBinarySourceGenerator.GenReader.cs b/AyCode.Core.Serializers.SourceGenerator/AcBinarySourceGenerator.GenReader.cs index 8e95d00..59e2331 100644 --- a/AyCode.Core.Serializers.SourceGenerator/AcBinarySourceGenerator.GenReader.cs +++ b/AyCode.Core.Serializers.SourceGenerator/AcBinarySourceGenerator.GenReader.cs @@ -242,8 +242,10 @@ public partial class AcBinarySourceGenerator sb.AppendLine($"{i} case BinaryTypeCode.StringUtf16:"); sb.AppendLine($"{i} {a} = context.ReadStringUtf16Marker();"); sb.AppendLine($"{i} break;"); - sb.AppendLine($"{i} case BinaryTypeCode.String:"); - sb.AppendLine($"{i} {a} = context.ReadUniversalLongString();"); + sb.AppendLine($"{i} case BinaryTypeCode.StringLen8:"); + sb.AppendLine($"{i} case BinaryTypeCode.StringLen16:"); + sb.AppendLine($"{i} case BinaryTypeCode.StringLen32:"); + sb.AppendLine($"{i} {a} = context.ReadUniversalLongStringByMarker({tc});"); sb.AppendLine($"{i} break;"); // Interning first-occurrence cases — see comment above. if (enableInternString) diff --git a/AyCode.Core.Tests/TestModels/BenchmarkTestDataProvider.cs b/AyCode.Core.Tests/TestModels/BenchmarkTestDataProvider.cs index 89560d7..93c231a 100644 --- a/AyCode.Core.Tests/TestModels/BenchmarkTestDataProvider.cs +++ b/AyCode.Core.Tests/TestModels/BenchmarkTestDataProvider.cs @@ -32,7 +32,7 @@ public static class CharsetSuffixes /// Empty suffix — baseline string property values stay short, hitting the /// FixStrAscii / short-string fast-path. Stress-test for short-string code paths. - public const string Latin1FixAscii = ""; + public const string AsciiFix = ""; // ── Pure ASCII (every byte < 0x80) ── // Tier: StringAscii (167) — byte→char SIMD widening, zero UTF-8 decode. @@ -47,6 +47,7 @@ public static class CharsetSuffixes // Tier: StringSmall (91) Short / StringMedium (94) Long. // UTF-8 byte count: ~72 byte Short (5 char base = 9 byte UTF-8: space+á+r+v+í), ~504 byte Long. private const string Latin1Base = " árví"; // 5 char (space + á + r + v + í) — multi-byte mix + public const string Latin1Fix = Latin1Base; // 5 char (FixStr-lean profile) public const string Latin1Short = Latin1Base + Latin1Base + Latin1Base + Latin1Base + Latin1Base + Latin1Base + Latin1Base + Latin1Base; // 40 char public const string Latin1Long = Latin1Short + Latin1Short + Latin1Short + Latin1Short diff --git a/AyCode.Core/Serializers/AcSerializerOptions.cs b/AyCode.Core/Serializers/AcSerializerOptions.cs index 29b57d7..ec1e5d0 100644 --- a/AyCode.Core/Serializers/AcSerializerOptions.cs +++ b/AyCode.Core/Serializers/AcSerializerOptions.cs @@ -110,6 +110,15 @@ public enum AcSerializerType : byte /// public enum MaxDepthBehavior : byte { + /// + /// Throw when depth limit is reached — cycle + /// detection and bug surfacing. Recommended default: an over-depth payload is almost always a bug + /// (circular reference without proper tracking, or + /// pathologically deep graph). The exception message reports the offending type and depth so the + /// failure mode is debuggable rather than silent. + /// + Throw = 0, + /// /// Replace the over-depth value with a Null marker in the wire stream — intentional shallow /// serialization. Use when sending partial graphs (e.g., client → server delta updates, view-model @@ -118,16 +127,7 @@ public enum MaxDepthBehavior : byte /// the consumer must encode that semantic in its protocol (e.g., "only modified properties are /// persisted; nested nulls are skipped, not overwritten"). /// - Truncate = 0, - - /// - /// Throw when depth limit is reached — cycle - /// detection and bug surfacing. Recommended default: an over-depth payload is almost always a bug - /// (circular reference without proper tracking, or - /// pathologically deep graph). The exception message reports the offending type and depth so the - /// failure mode is debuggable rather than silent. - /// - Throw = 1, + Truncate = 1, /// /// Skip the depth limit check entirely — maximum hot-path performance. Use only when the developer diff --git a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.Read.cs b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.Read.cs index c63140f..4bc05d6 100644 --- a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.Read.cs +++ b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.Read.cs @@ -700,7 +700,14 @@ public static partial class AcBinaryDeserializer [MethodImpl(MethodImplOptions.AggressiveInlining)] internal string ReadUniversalLongString() { - ReadUniversalLongStringHeader(out var charLength, out var excess); + ReadUniversalLongStringHeader(BinaryTypeCode.StringLen32, out var charLength, out var excess); + return ReadStringByUnsignedExcess(charLength, excess); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal string ReadUniversalLongStringByMarker(byte marker) + { + ReadUniversalLongStringHeader(marker, out var charLength, out var excess); return ReadStringByUnsignedExcess(charLength, excess); } @@ -712,9 +719,23 @@ public static partial class AcBinaryDeserializer } [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal void ReadUniversalLongStringHeader(out int charLength, out uint excess) + internal void ReadUniversalLongStringHeader(byte marker, out int charLength, out uint excess) { - charLength = (int)ReadVarUInt(); + if (marker == BinaryTypeCode.StringLen8) + { + charLength = ReadByte(); + } + else if (marker == BinaryTypeCode.StringLen16) + { + charLength = ReadUInt16Unsafe(); + } + else + { + charLength = ReadInt32Unsafe(); + if (charLength < 0) + throw new AcBinaryDeserializationException($"Invalid string header: negative charLength ({charLength}).", _position); + } + var slotSize = BinaryTypeCode.GetUniversalStringExcessSlotSize(charLength); if (slotSize == 1) excess = ReadByte(); @@ -804,8 +825,10 @@ public static partial class AcBinaryDeserializer case BinaryTypeCode.StringUtf16: value = ReadStringUtf16Marker(); return true; - case BinaryTypeCode.String: - ReadUniversalLongStringHeader(out charLength, out excess); + case BinaryTypeCode.StringLen8: + case BinaryTypeCode.StringLen16: + case BinaryTypeCode.StringLen32: + ReadUniversalLongStringHeader(tc, out charLength, out excess); break; case BinaryTypeCode.Null: return true; @@ -820,6 +843,7 @@ public static partial class AcBinaryDeserializer ReadUniversalFixStrHeader(tc, out charLength, out excess); break; } + // Interning marker, PropertySkip, or unknown — caller continues via short-circuit || // to TryReadStringColdPath (interning types) or leaves the property at default. return false; diff --git a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs index 9bf890c..bc3c6f4 100644 --- a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs +++ b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs @@ -105,7 +105,9 @@ public static partial class AcBinaryDeserializer // H2Q6 interning tier readers (Compact mode only — Big tier never engages on interning path) readers[BinaryTypeCode.StringInternFirstSmall] = static (ctx, _) => ctx.ReadAndRegisterInternedStringSmall(); readers[BinaryTypeCode.StringInternFirstMedium] = static (ctx, _) => ctx.ReadAndRegisterInternedStringMedium(); - readers[BinaryTypeCode.String] = static (ctx, _) => ctx.ReadUniversalLongString(); + readers[BinaryTypeCode.StringLen8] = static (ctx, _) => ctx.ReadUniversalLongStringByMarker(BinaryTypeCode.StringLen8); + readers[BinaryTypeCode.StringLen16] = static (ctx, _) => ctx.ReadUniversalLongStringByMarker(BinaryTypeCode.StringLen16); + readers[BinaryTypeCode.StringLen32] = static (ctx, _) => ctx.ReadUniversalLongStringByMarker(BinaryTypeCode.StringLen32); readers[BinaryTypeCode.DateTime] = static (ctx, _) => ctx.ReadDateTimeUnsafe(); readers[BinaryTypeCode.DateTimeOffset] = static (ctx, _) => ctx.ReadDateTimeOffsetUnsafe(); readers[BinaryTypeCode.TimeSpan] = static (ctx, _) => ctx.ReadTimeSpanUnsafe(); @@ -1048,8 +1050,10 @@ public static partial class AcBinaryDeserializer case BinaryTypeCode.StringUtf16: propInfo.SetValue(target, context.ReadStringUtf16Marker()); return true; - case BinaryTypeCode.String: - propInfo.SetValue(target, context.ReadUniversalLongString()); + case BinaryTypeCode.StringLen8: + case BinaryTypeCode.StringLen16: + case BinaryTypeCode.StringLen32: + propInfo.SetValue(target, context.ReadUniversalLongStringByMarker(typeCode)); return true; case BinaryTypeCode.StringEmpty: propInfo.SetValue(target, string.Empty); @@ -2041,10 +2045,27 @@ public static partial class AcBinaryDeserializer case BinaryTypeCode.Decimal: context.Skip(16); return; - case BinaryTypeCode.String: - // Skip layout: [VarUInt charLength][unsigned excess slot][bytes] + case BinaryTypeCode.StringLen8: + case BinaryTypeCode.StringLen16: + case BinaryTypeCode.StringLen32: + // Skip layout: [charLength:1|2|4 by marker][unsigned excess slot][bytes] { - var charLength = (int)context.ReadVarUInt(); + int charLength; + if (typeCode == BinaryTypeCode.StringLen8) + { + charLength = context.ReadByte(); + } + else if (typeCode == BinaryTypeCode.StringLen16) + { + charLength = context.ReadUInt16Unsafe(); + } + else + { + charLength = context.ReadInt32Unsafe(); + if (charLength < 0) + throw new AcBinaryDeserializationException($"Invalid string header while skipping: negative charLength ({charLength}).", context.Position); + } + var slotSize = BinaryTypeCode.GetUniversalStringExcessSlotSize(charLength); var excess = slotSize switch { diff --git a/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs b/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs index 2bec662..9c408bc 100644 --- a/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs +++ b/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs @@ -863,7 +863,7 @@ public static partial class AcBinarySerializer /// Header is fully determined before encode: /// /// charLength <= 31: [FixStr(marker carries charLength)][unsigned excess:1] - /// charLength > 31: [String][VarUInt(charLength)][unsigned excess:1|2|4] + /// charLength > 31: [StringLen8|StringLen16|StringLen32][charLength:1|2|4][unsigned excess:1|2|4] /// /// Body is UTF-8-encoded exactly once to the final destination (encodeStart) — no post-encode /// body shift/copy. For the current path, excess = bytesWritten - charLength is expected to be @@ -888,16 +888,16 @@ public static partial class AcBinarySerializer // Tight UTF-8 upper bound for valid UTF-16 input: max 3 bytes per UTF-16 code unit. var maxBytes = charLength * 3; var isFixStr = charLength <= BinaryTypeCode.FixStrMaxLength; + var charLengthSize = isFixStr ? 0 : charLength <= byte.MaxValue ? 1 : charLength <= ushort.MaxValue ? 2 : 4; // IMPORTANT: the slot VALUE (excess) is not known before UTF-8 encode, but the slot SIZE is. // We reserve the slot by width (1/2/4) from charLength, so encodeStart is final and no body shift is needed. var slotSize = isFixStr ? 1 : BinaryTypeCode.GetUniversalStringExcessSlotSize(charLength); - var varUIntSize = isFixStr ? 0 : VarUIntSize((uint)charLength); - var headerSize = isFixStr ? 2 : 1 + varUIntSize + slotSize; + var headerSize = isFixStr ? 2 : 1 + charLengthSize + slotSize; EnsureCapacity(headerSize + maxBytes); var headerPos = _position; - var slotPos = isFixStr ? headerPos + 1 : headerPos + 1 + varUIntSize; + var slotPos = isFixStr ? headerPos + 1 : headerPos + 1 + charLengthSize; var encodeStart = headerPos + headerSize; if (isFixStr) @@ -907,10 +907,22 @@ public static partial class AcBinarySerializer } else { - // Universal long-form string marker + VarUInt(charLength) + unsigned excess slot. - BufferAt(headerPos) = BinaryTypeCode.String; - _position = headerPos + 1; - WriteVarUIntUnsafe((uint)charLength); + // Universal long-form markers with marker-coded charLength width. + if (charLengthSize == 1) + { + BufferAt(headerPos) = BinaryTypeCode.StringLen8; + BufferAt(headerPos + 1) = unchecked((byte)charLength); + } + else if (charLengthSize == 2) + { + BufferAt(headerPos) = BinaryTypeCode.StringLen16; + Unsafe.WriteUnaligned(ref BufferAt(headerPos + 1), unchecked((ushort)charLength)); + } + else + { + BufferAt(headerPos) = BinaryTypeCode.StringLen32; + Unsafe.WriteUnaligned(ref BufferAt(headerPos + 1), charLength); + } } var status = System.Text.Unicode.Utf8.FromUtf16(value.AsSpan(), _buffer.AsSpan(encodeStart, maxBytes), out _, out var bytesWritten, replaceInvalidSequences: false); diff --git a/AyCode.Core/Serializers/Binaries/BinaryTypeCode.cs b/AyCode.Core/Serializers/Binaries/BinaryTypeCode.cs index 6ea4f15..43d5323 100644 --- a/AyCode.Core/Serializers/Binaries/BinaryTypeCode.cs +++ b/AyCode.Core/Serializers/Binaries/BinaryTypeCode.cs @@ -70,8 +70,8 @@ internal static class BinaryTypeCode // Marker 91 is reserved for FastWire UTF-16 payloads: // [StringUtf16][charLen:int32 LE][UTF-16 raw bytes] // - // Universal compact-mode strings use FixStr (135..166) + String (167): - // [FixStr] / [String][VarUInt charLen][unsigned excess slot][UTF-8 bytes] + // Universal compact-mode strings use FixStr (135..166) + StringLen8/16/32 (167..169): + // [FixStr] / [StringLenN][charLen:N][unsigned excess slot][UTF-8 bytes] // // Interning tiers keep dedicated markers. public const byte StringUtf16 = SlotCount + 27; // 91 — FastWire UTF-16 marker payload @@ -128,15 +128,22 @@ internal static class BinaryTypeCode public const byte FixStrAsciiMax = FixStrMax; public const int FixStrAsciiMaxLength = FixStrMaxLength; - // Long universal string marker: 167 - // Layout: [String] [VarUInt charLength] [excess slot] [bytes] - public const byte String = SlotCount + 103; // 167 - // Backward-compatible alias (old naming) - public const byte StringAscii = String; + // Long universal string markers (marker encodes charLength field width) + // Layout: + // StringLen8 (167): [marker][charLen:1][excess slot][bytes] + // StringLen16 (168): [marker][charLen:2][excess slot][bytes] + // StringLen32 (169): [marker][charLen:4][excess slot][bytes] + public const byte StringLen8 = SlotCount + 103; // 167 + public const byte StringLen16 = SlotCount + 104; // 168 + public const byte StringLen32 = SlotCount + 105; // 169 - // Reserved slot block: 168..175 (8 slots) for future string-related markers - // (e.g., StringCompressed, StringEncoded, StringMixedAscii, etc.). Keeping the 135..167 range - // dedicated to ASCII variants for clean range-checks (see IsAsciiString below). + // Backward-compatible aliases + public const byte String = StringLen32; + public const byte StringAscii = StringLen32; + + // Reserved slot block: 170..175 (6 slots) for future string-related markers + // (e.g., StringCompressed, StringEncoded, StringMixedAscii, etc.). Keeping the 135..169 range + // dedicated to universal compact string markers for clean range-checks (see IsString below). // Flag-based header markers (must be 16-aligned for flag bits in lower nibble). // Header byte structure: (marker & 0xF0) == HeaderFlagsBase, flags in (marker & 0x0F). @@ -186,7 +193,7 @@ internal static class BinaryTypeCode || typeCode == StringEmpty || typeCode == StringInternFirstSmall || typeCode == StringInternFirstMedium - || (typeCode is >= FixStrBase and <= String); // 135..167: FixStr + String + || (typeCode is >= FixStrBase and <= StringLen32); // 135..169: FixStr + StringLen8/16/32 /// /// Check if type code is the FastWire UTF-16 string marker. @@ -203,11 +210,11 @@ internal static class BinaryTypeCode => typeCode == StringInternFirstSmall || typeCode == StringInternFirstMedium; /// - /// Check if type code is any universal string marker — FixStr (short) or String (long). - /// Single contiguous range (135..167) for branch-friendly dispatch on the reader hot path. + /// Check if type code is any universal string marker — FixStr (short) or StringLen8/16/32 (long). + /// Single contiguous range (135..169) for branch-friendly dispatch on the reader hot path. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static bool IsStringUniversalMarker(byte typeCode) => typeCode is >= FixStrBase and <= String; + public static bool IsStringUniversalMarker(byte typeCode) => typeCode is >= FixStrBase and <= StringLen32; [MethodImpl(MethodImplOptions.AggressiveInlining)] public static bool IsAsciiString(byte typeCode) => IsStringUniversalMarker(typeCode);