From 466782007d7d2eea80c6ed63cfb1272133bee577 Mon Sep 17 00:00:00 2001 From: Loretta Date: Tue, 27 Jan 2026 13:02:16 +0100 Subject: [PATCH] Refactor string interning to use position-based cache Implement a new position-based string interning mechanism in AcBinarySerializer/AcBinaryDeserializer. This approach tracks stream positions for interned strings, ensuring 100% reliable cache matching during deserialization, even when strings are skipped or reordered. The serializer now writes (position, cacheIndex) pairs in the footer for all repeated strings, and the deserializer uses this mapping for robust cache population. Removes the old buffer-based interned string logic, updates all relevant code paths, and simplifies interned string handling for greater correctness and maintainability. Also updates benchmarks and test data construction to use the new interning mode. --- AyCode.Core.Serializers.Console/Program.cs | 168 ++++++++--------- .../Serialization/QuickBenchmark.cs | 6 +- ...serializer.BinaryDeserializationContext.cs | 117 ++++++++---- .../Binaries/AcBinaryDeserializer.cs | 54 ++++-- ...rySerializer.BinarySerializationContext.cs | 176 +++++++++--------- .../Binaries/AcBinarySerializer.cs | 19 +- 6 files changed, 311 insertions(+), 229 deletions(-) diff --git a/AyCode.Core.Serializers.Console/Program.cs b/AyCode.Core.Serializers.Console/Program.cs index 246fb91..64e1369 100644 --- a/AyCode.Core.Serializers.Console/Program.cs +++ b/AyCode.Core.Serializers.Console/Program.cs @@ -24,13 +24,13 @@ namespace AyCode.Core.Serializers.Console; public static class Program { private const string ResultsDirectory = @"H:\Applications\Aycode\Source\AyCode.Core\Test_Benchmark_Results\Benchmark"; - + #if DEBUG private const string BuildConfiguration = "Debug"; #else private const string BuildConfiguration = "Release"; #endif - + // Serializer name constants private const string SerializerMessagePack = "MessagePack"; private const string SerializerAcBinaryDefault = "AcBinary (Default)"; @@ -40,9 +40,9 @@ public static class Program private const string SerializerAcJsonDefault = "AcJson (Default)"; private const string SerializerNewtonsoftJson = "Newtonsoft.Json"; private const string SerializerSystemTextJson = "System.Text.Json"; - + private static readonly UTF8Encoding Utf8NoBom = new(encoderShouldEmitUTF8Identifier: false); - + private static int WarmupIterations = 2000; private static int TestIterations = 1000; @@ -50,16 +50,16 @@ public static class Program { // Set console encoding to UTF-8 for proper Unicode character display System.Console.OutputEncoding = System.Text.Encoding.UTF8; - + var mode = args.Length > 0 ? args[0].ToLower() : "all"; - + if (mode == "quick") { WarmupIterations = 5; TestIterations = 100; mode = "all"; } - + // Profiler mode: warmup only, then exit (for memory profiler analysis) if (mode == "profiler") { @@ -95,7 +95,7 @@ public static class Program System.Console.WriteLine("\n✓ Benchmark complete!"); } - + /// /// Profiler mode: warmup only, then EXIT immediately. /// Usage: dotnet run -- profiler @@ -107,7 +107,7 @@ public static class Program System.Console.WriteLine("╚══════════════════════════════════════════════════════════════════════╝"); System.Console.WriteLine($"Build: {BuildConfiguration} | .NET: {Environment.Version}"); System.Console.WriteLine(); - + // Create medium test data TestDataFactory.ResetIdCounter(); var sharedTag = TestDataFactory.CreateTag("SharedTag"); @@ -119,10 +119,10 @@ public static class Program pointsPerMeasurement: 4, sharedTag: sharedTag, sharedUser: sharedUser); - + var options = AcBinarySerializerOptions.WithoutReferenceHandling; options.UseStringInterning = StringInterningMode.None; - + // Warmup (fills caches) System.Console.WriteLine("Warming up (10 iterations)..."); for (var i = 0; i < 10; i++) @@ -131,7 +131,7 @@ public static class Program } System.Console.WriteLine("Warmup complete. Caches are now populated."); System.Console.WriteLine(); - + // HOT PATH - this is what the profiler should capture! System.Console.WriteLine("Running hot path (1000 iterations for profiling)..."); for (var i = 0; i < 1000; i++) @@ -140,7 +140,7 @@ public static class Program } System.Console.WriteLine("Hot path complete."); System.Console.WriteLine(); - + System.Console.WriteLine(">>> ATTACH MEMORY PROFILER NOW <<<"); System.Console.WriteLine("Press any key to exit..."); System.Console.ReadKey(intercept: true); @@ -165,11 +165,11 @@ public static class Program private static TestDataSet CreateSmallTestData() { TestDataFactory.ResetIdCounter(); - + // Create shared references - IId types (only at Order/Item level) var sharedTag = TestDataFactory.CreateTag("SharedTag"); var sharedUser = TestDataFactory.CreateUser("shareduser"); - + var order = TestDataFactory.CreateOrder( itemCount: 2, palletsPerItem: 2, @@ -177,22 +177,22 @@ public static class Program pointsPerMeasurement: 2, sharedTag: sharedTag, sharedUser: sharedUser); - + // Clear deeper level refs for realistic ~10% ratio ClearDeepLevelRefs(order); - + return new TestDataSet("Small (2x2x2x2)", order, iidRefPercent: 10); } private static TestDataSet CreateMediumTestData() { TestDataFactory.ResetIdCounter(); - + // IId shared references var sharedTag = TestDataFactory.CreateTag("SharedTag"); var sharedUser = TestDataFactory.CreateUser("shareduser"); var sharedMeta = TestDataFactory.CreateMetadata("shared", withChild: true); - + // Non-IId shared reference - create separate preferences for 2 users var sharedPreferences = new UserPreferences { @@ -222,11 +222,11 @@ public static class Program private static TestDataSet CreateLargeTestData() { TestDataFactory.ResetIdCounter(); - + // IId shared references var sharedTag = TestDataFactory.CreateTag("SharedTag"); var sharedUser = TestDataFactory.CreateUser("shareduser"); - + // Non-IId shared reference var sharedPreferences = new UserPreferences { @@ -255,11 +255,11 @@ public static class Program private static TestDataSet CreateRepeatedStringsTestData() { TestDataFactory.ResetIdCounter(); - + // IId shared references var sharedTag = TestDataFactory.CreateTag("RepeatedTag"); var sharedUser = TestDataFactory.CreateUser("repeateduser"); - + // Non-IId shared reference var sharedPreferences = new UserPreferences { @@ -269,7 +269,7 @@ public static class Program EmailDigestFrequency = "weekly" }; sharedUser.Preferences = sharedPreferences; - + // Create order with many items to test string interning on repeated property names var order = TestDataFactory.CreateOrder( itemCount: 10, @@ -279,20 +279,20 @@ public static class Program sharedTag: sharedTag, sharedUser: sharedUser, sharedPreferences: sharedPreferences); - + // Set same status and ProductName on all items to test enum and string handling foreach (var item in order.Items) { item.Status = TestStatus.Processing; item.ProductName = "CommonProductName_RepeatedForTesting"; } - + // Clear deeper level refs for realistic ~10% ratio ClearDeepLevelRefs(order); return new TestDataSet("Repeated Strings (10 items)", order, iidRefPercent: 10); } - + /// /// Clears IId shared references from Pallet, Measurement, and Point levels. /// This creates a realistic ~10% IId ref ratio (only Order and Item levels have refs). @@ -306,12 +306,12 @@ public static class Program pallet.Tag = null; pallet.Inspector = null; pallet.Category = null; - + foreach (var measurement in pallet.Measurements) { measurement.Tag = null; measurement.Operator = null; - + foreach (var point in measurement.Points) { point.Tag = null; @@ -330,12 +330,12 @@ public static class Program private static TestDataSet CreateDeepNestedTestData() { TestDataFactory.ResetIdCounter(); - + // IId shared references - only at Order and Item levels for ~10% ratio var sharedTag = TestDataFactory.CreateTag("DeepTag"); var sharedUser = TestDataFactory.CreateUser("deepuser"); var sharedCategory = TestDataFactory.CreateCategory("DeepCategory"); - + // Non-IId shared reference var sharedPreferences = new UserPreferences { @@ -345,7 +345,7 @@ public static class Program EmailDigestFrequency = "monthly" }; sharedUser.Preferences = sharedPreferences; - + var order = TestDataFactory.CreateOrder( itemCount: 2, palletsPerItem: 4, @@ -355,7 +355,7 @@ public static class Program sharedUser: sharedUser, sharedPreferences: sharedPreferences, sharedCategory: sharedCategory); - + // Clear deeper level refs for realistic ~10% ratio ClearDeepLevelRefs(order); @@ -647,7 +647,7 @@ public static class Program { public string Name { get; } public TestOrder Order { get; } - + /// /// Percentage of IId shared references in the data (0-100). /// Higher values mean more deduplication benefit for Default mode. @@ -660,12 +660,12 @@ public static class Program Order = order; IIdRefPercent = iidRefPercent; } - + /// /// Gets display name including IId ref percentage if set. /// - public string DisplayName => IIdRefPercent > 0 - ? $"{Name} [{IIdRefPercent}% IId refs]" + public string DisplayName => IIdRefPercent > 0 + ? $"{Name} [{IIdRefPercent}% IId refs]" : Name; } @@ -700,7 +700,7 @@ public static class Program var testResults = results.Where(r => r.TestDataName == testData.DisplayName).OrderBy(r => r.RoundTripTimeMs).ToList(); var msgPackResult = testResults.FirstOrDefault(r => r.SerializerName == SerializerMessagePack); var acBinaryResult = testResults.FirstOrDefault(r => r.SerializerName == SerializerAcBinaryDefault); - + System.Console.WriteLine($"\n┌─ {testData.DisplayName} ─".PadRight(98, '─') + "┐"); System.Console.WriteLine($"│ {"#",-4} │ {"Serializer",-25} │ {"Size",-10} │ {"Serialize",-12} │ {"Deserialize",-12} │ {"Round-trip",-12} │"); System.Console.WriteLine($"├{"─".PadRight(6, '─')}┼{"─".PadRight(27, '─')}┼{"─".PadRight(12, '─')}┼{"─".PadRight(14, '─')}┼{"─".PadRight(14, '─')}┼{"─".PadRight(14, '─')}┤"); @@ -712,18 +712,18 @@ public static class Program var ser = result.SerializeTimeMs > 0 ? $"{result.SerializeTimeMs:F2} ms" : "N/A"; var des = result.DeserializeTimeMs > 0 ? $"{result.DeserializeTimeMs:F2} ms" : "N/A"; var rt = result.RoundTripTimeMs > 0 ? $"{result.RoundTripTimeMs:F2} ms" : "N/A"; - + // Highlight MessagePack and AcBinary (Default) with win/lose colors var isHighlighted = result.SerializerName is SerializerMessagePack or SerializerAcBinaryDefault; var prefix = isHighlighted ? "│►" : "│ "; var suffix = isHighlighted ? "◄│" : " │"; - + // Color logic: Green = winner (faster), Red = loser (slower) if (isHighlighted && msgPackResult != null && acBinaryResult != null) { var isMsgPack = result.SerializerName == SerializerMessagePack; var msgPackFaster = msgPackResult.RoundTripTimeMs < acBinaryResult.RoundTripTimeMs; - + if (isMsgPack) { System.Console.ForegroundColor = msgPackFaster ? ConsoleColor.Green : ConsoleColor.Red; @@ -733,15 +733,15 @@ public static class Program System.Console.ForegroundColor = msgPackFaster ? ConsoleColor.Red : ConsoleColor.Green; } } - + System.Console.WriteLine($"{prefix}{rank++,4} │ {result.SerializerName,-25} │ {size,10} │ {ser,12} │ {des,12} │ {rt,12}{suffix}"); - + if (isHighlighted) { System.Console.ResetColor(); } } - + // Footer row: AcBinary (Default) vs MessagePack comparison per column if (msgPackResult != null && acBinaryResult != null) { @@ -749,35 +749,35 @@ public static class Program var serPct = msgPackResult.SerializeTimeMs > 0 ? (acBinaryResult.SerializeTimeMs / msgPackResult.SerializeTimeMs - 1) * 100 : 0; var desPct = msgPackResult.DeserializeTimeMs > 0 ? (acBinaryResult.DeserializeTimeMs / msgPackResult.DeserializeTimeMs - 1) * 100 : 0; var rtPct = msgPackResult.RoundTripTimeMs > 0 ? (acBinaryResult.RoundTripTimeMs / msgPackResult.RoundTripTimeMs - 1) * 100 : 0; - + System.Console.WriteLine($"├{"─".PadRight(6, '─')}┴{"─".PadRight(27, '─')}┼{"─".PadRight(12, '─')}┼{"─".PadRight(14, '─')}┼{"─".PadRight(14, '─')}┼{"─".PadRight(13, '─')}┤"); System.Console.Write($"│ ► Default vs {SerializerMessagePack,-19} │ "); - + // Size System.Console.ForegroundColor = sizePct <= 0 ? ConsoleColor.Green : ConsoleColor.Red; System.Console.Write($"{sizePct,+9:+0;-0}%"); System.Console.ResetColor(); System.Console.Write(" │ "); - + // Serialize System.Console.ForegroundColor = serPct <= 0 ? ConsoleColor.Green : ConsoleColor.Red; System.Console.Write($"{serPct,+11:+0;-0}%"); System.Console.ResetColor(); System.Console.Write(" │ "); - + // Deserialize System.Console.ForegroundColor = desPct <= 0 ? ConsoleColor.Green : ConsoleColor.Red; System.Console.Write($"{desPct,+11:+0;-0}%"); System.Console.ResetColor(); System.Console.Write(" │ "); - + // Round-trip System.Console.ForegroundColor = rtPct <= 0 ? ConsoleColor.Green : ConsoleColor.Red; System.Console.Write($"{rtPct,+10:+0;-0}%"); System.Console.ResetColor(); System.Console.WriteLine(" │"); } - + System.Console.WriteLine($"└{"─".PadRight(6, '─')}─{"─".PadRight(27, '─')}┴{"─".PadRight(12, '─')}┴{"─".PadRight(14, '─')}┴{"─".PadRight(14, '─')}┴{"─".PadRight(13, '─')}┘"); } @@ -786,10 +786,10 @@ public static class Program System.Console.WriteLine("╔══════════════════════════════════════════════════════════════════════════════════════════════════════╗"); System.Console.WriteLine("║ SUMMARY: WINNERS ║"); System.Console.WriteLine("╚══════════════════════════════════════════════════════════════════════════════════════════════════════╝"); - + System.Console.WriteLine($"\n{"Category",-20} │ {"Winner",-25} │ {"Avg Value",-18}"); System.Console.WriteLine($"{"─".PadRight(20, '─')}─┼─{"─".PadRight(25, '─')}─┼─{"─".PadRight(18, '─')}"); - + // Fastest Serialize var fastestSer = results.Where(r => r.SerializeTimeMs > 0) .GroupBy(r => r.SerializerName) @@ -825,16 +825,16 @@ public static class Program .FirstOrDefault(); if (fastestRt != null) System.Console.WriteLine($"{"Fastest Round-trip",-20} │ {fastestRt.Name,-25} │ {fastestRt.AvgTime,15:F2} ms"); - + // Overall AcBinary Default vs MessagePack comparison var msgPackSerResults = results.Where(r => r.SerializerName == SerializerMessagePack && r.SerializeTimeMs > 0).ToList(); var msgPackDesResults = results.Where(r => r.SerializerName == SerializerMessagePack && r.DeserializeTimeMs > 0).ToList(); var msgPackRtResults = results.Where(r => r.SerializerName == SerializerMessagePack && r.RoundTripTimeMs > 0).ToList(); - + var acBinarySerResults = results.Where(r => r.SerializerName == SerializerAcBinaryDefault && r.SerializeTimeMs > 0).ToList(); var acBinaryDesResults = results.Where(r => r.SerializerName == SerializerAcBinaryDefault && r.DeserializeTimeMs > 0).ToList(); var acBinaryRtResults = results.Where(r => r.SerializerName == SerializerAcBinaryDefault && r.RoundTripTimeMs > 0).ToList(); - + // Skip comparison if no data available if (msgPackRtResults.Count == 0 || acBinaryRtResults.Count == 0) { @@ -843,20 +843,20 @@ public static class Program System.Console.WriteLine(" (Comparison requires both serialize and deserialize data)"); return; } - + var msgPackAvgSer = msgPackSerResults.Count > 0 ? msgPackSerResults.Average(r => r.SerializeTimeMs) : 0; var msgPackAvgDes = msgPackDesResults.Average(r => r.DeserializeTimeMs); var msgPackAvgRt = msgPackRtResults.Average(r => r.RoundTripTimeMs); var msgPackAvgSize = results.Where(r => r.SerializerName == SerializerMessagePack).Average(r => r.SerializedSize); - + var acBinaryAvgSer = acBinarySerResults.Count > 0 ? acBinarySerResults.Average(r => r.SerializeTimeMs) : 0; var acBinaryAvgDes = acBinaryDesResults.Average(r => r.DeserializeTimeMs); var acBinaryAvgRt = acBinaryRtResults.Average(r => r.RoundTripTimeMs); var acBinaryAvgSize = results.Where(r => r.SerializerName == SerializerAcBinaryDefault).Average(r => r.SerializedSize); - + System.Console.WriteLine(); System.Console.WriteLine($"── {SerializerAcBinaryDefault} vs {SerializerMessagePack} (Overall) ──"); - + // Only show serialize comparison if data available if (msgPackAvgSer > 0 && acBinaryAvgSer > 0) { @@ -865,19 +865,19 @@ public static class Program System.Console.WriteLine($" Serialize: {serPctAll:+0;-0}% ({acBinaryAvgSer:F2} ms vs {msgPackAvgSer:F2} ms)"); System.Console.ResetColor(); } - + var desPctAll = (acBinaryAvgDes / msgPackAvgDes - 1) * 100; var rtPctAll = (acBinaryAvgRt / msgPackAvgRt - 1) * 100; var sizePctAll = (acBinaryAvgSize / msgPackAvgSize - 1) * 100; - + System.Console.ForegroundColor = desPctAll <= 0 ? ConsoleColor.Green : ConsoleColor.Red; System.Console.WriteLine($" Deserialize: {desPctAll:+0;-0}% ({acBinaryAvgDes:F2} ms vs {msgPackAvgDes:F2} ms)"); System.Console.ResetColor(); - + System.Console.ForegroundColor = rtPctAll <= 0 ? ConsoleColor.Green : ConsoleColor.Red; System.Console.WriteLine($" Round-trip: {rtPctAll:+0;-0}% ({acBinaryAvgRt:F2} ms vs {msgPackAvgRt:F2} ms)"); System.Console.ResetColor(); - + System.Console.ForegroundColor = sizePctAll <= 0 ? ConsoleColor.Green : ConsoleColor.Red; System.Console.WriteLine($" Size: {sizePctAll:+0;-0}% ({acBinaryAvgSize:F0} B vs {msgPackAvgSize:F0} B)"); System.Console.ResetColor(); @@ -886,7 +886,7 @@ public static class Program private static void SaveResults(List results, List testDataSets) { Directory.CreateDirectory(ResultsDirectory); - + var timestamp = DateTime.Now.ToString("yyyy-MM-dd_HH-mm-ss"); var baseFileName = $"Console.FullBenchmark_{BuildConfiguration}_{timestamp}"; var logFilePath = Path.Combine(ResultsDirectory, $"{baseFileName}.log"); @@ -902,14 +902,14 @@ public static class Program outputSb.AppendLine($"║ Generated: {DateTime.Now:yyyy-MM-dd HH:mm:ss}".PadRight(100) + "║"); outputSb.AppendLine("╚══════════════════════════════════════════════════════════════════════════════════════════════════════╝"); outputSb.AppendLine(); - + outputSb.AppendLine("=== SERIALIZED BYTES: Large (5x5x5x10) - AcBinary (Default) ==="); var serializedBytes = AcBinarySerializer.Serialize(largeTestData.Order, AcBinarySerializerOptions.Default); outputSb.AppendLine($"Size: {serializedBytes.Length:N0} bytes"); outputSb.AppendLine(); outputSb.AppendLine("Hex dump:"); outputSb.AppendLine(FormatHexDump(serializedBytes)); - + File.WriteAllText(outputFilePath, outputSb.ToString(), Utf8NoBom); System.Console.WriteLine($"✓ Binary output saved to: {outputFilePath}"); } @@ -941,13 +941,13 @@ public static class Program sb.AppendLine("=== FORMATTED RESULTS BY TEST DATA ==="); sb.AppendLine($"(►) = Highlighted: {SerializerMessagePack} (baseline) and {SerializerAcBinaryDefault}"); sb.AppendLine(); - + foreach (var testData in testDataSets) { var testResults = results.Where(r => r.TestDataName == testData.DisplayName).OrderBy(r => r.RoundTripTimeMs).ToList(); var msgPackResult = testResults.FirstOrDefault(r => r.SerializerName == SerializerMessagePack); var acBinaryResult = testResults.FirstOrDefault(r => r.SerializerName == SerializerAcBinaryDefault); - + sb.AppendLine(); sb.AppendLine($"--- {testData.DisplayName} ---"); sb.AppendLine($"{"#",-4} {"Serializer",-26} {"Size",-12} {"Serialize",-14} {"Deserialize",-14} {"Round-trip",-14}"); @@ -958,15 +958,15 @@ public static class Program { var isHighlighted = result.SerializerName is SerializerMessagePack or SerializerAcBinaryDefault; var prefix = isHighlighted ? "► " : " "; - + var size = $"{result.SerializedSize:N0}"; var ser = result.SerializeTimeMs > 0 ? $"{result.SerializeTimeMs:F2} ms" : "N/A"; var des = result.DeserializeTimeMs > 0 ? $"{result.DeserializeTimeMs:F2} ms" : "N/A"; var rt = result.RoundTripTimeMs > 0 ? $"{result.RoundTripTimeMs:F2} ms" : "N/A"; - + sb.AppendLine($"{rank++,2} {prefix}{result.SerializerName,-24} {size,-12} {ser,-14} {des,-14} {rt,-14}"); } - + // Summary row for this test data if (msgPackResult != null && acBinaryResult != null) { @@ -974,44 +974,44 @@ public static class Program var serPct = msgPackResult.SerializeTimeMs > 0 ? (acBinaryResult.SerializeTimeMs / msgPackResult.SerializeTimeMs - 1) * 100 : 0; var desPct = msgPackResult.DeserializeTimeMs > 0 ? (acBinaryResult.DeserializeTimeMs / msgPackResult.DeserializeTimeMs - 1) * 100 : 0; var rtPct = msgPackResult.RoundTripTimeMs > 0 ? (acBinaryResult.RoundTripTimeMs / msgPackResult.RoundTripTimeMs - 1) * 100 : 0; - + sb.AppendLine($" {SerializerAcBinaryDefault} vs {SerializerMessagePack}: Size {sizePct:+0;-0}% │ Ser {serPct:+0;-0}% │ Des {desPct:+0;-0}% │ RT {rtPct:+0;-0}%"); } } - + // Summary comparison sb.AppendLine(); sb.AppendLine($"=== {SerializerAcBinaryDefault} vs {SerializerMessagePack} (Overall) ==="); - + var msgPackSerResults2 = results.Where(r => r.SerializerName == SerializerMessagePack && r.SerializeTimeMs > 0).ToList(); var msgPackDesResults2 = results.Where(r => r.SerializerName == SerializerMessagePack && r.DeserializeTimeMs > 0).ToList(); var msgPackRtResults2 = results.Where(r => r.SerializerName == SerializerMessagePack && r.RoundTripTimeMs > 0).ToList(); - + var acBinarySerResults2 = results.Where(r => r.SerializerName == SerializerAcBinaryDefault && r.SerializeTimeMs > 0).ToList(); var acBinaryDesResults2 = results.Where(r => r.SerializerName == SerializerAcBinaryDefault && r.DeserializeTimeMs > 0).ToList(); var acBinaryRtResults2 = results.Where(r => r.SerializerName == SerializerAcBinaryDefault && r.RoundTripTimeMs > 0).ToList(); - + if (msgPackSerResults2.Count > 0 && acBinarySerResults2.Count > 0) { var msgPackAvgSer2 = msgPackSerResults2.Average(r => r.SerializeTimeMs); var acBinaryAvgSer2 = acBinarySerResults2.Average(r => r.SerializeTimeMs); sb.AppendLine($" Serialize: {((acBinaryAvgSer2 / msgPackAvgSer2 - 1) * 100):+0;-0}% ({acBinaryAvgSer2:F2} ms vs {msgPackAvgSer2:F2} ms)"); } - + if (msgPackDesResults2.Count > 0 && acBinaryDesResults2.Count > 0) { var msgPackAvgDes2 = msgPackDesResults2.Average(r => r.DeserializeTimeMs); var acBinaryAvgDes2 = acBinaryDesResults2.Average(r => r.DeserializeTimeMs); sb.AppendLine($" Deserialize: {((acBinaryAvgDes2 / msgPackAvgDes2 - 1) * 100):+0;-0}% ({acBinaryAvgDes2:F2} ms vs {msgPackAvgDes2:F2} ms)"); } - + if (msgPackRtResults2.Count > 0 && acBinaryRtResults2.Count > 0) { var msgPackAvgRt2 = msgPackRtResults2.Average(r => r.RoundTripTimeMs); var acBinaryAvgRt2 = acBinaryRtResults2.Average(r => r.RoundTripTimeMs); sb.AppendLine($" Round-trip: {((acBinaryAvgRt2 / msgPackAvgRt2 - 1) * 100):+0;-0}% ({acBinaryAvgRt2:F2} ms vs {msgPackAvgRt2:F2} ms)"); } - + var msgPackAvgSize2 = results.Where(r => r.SerializerName == SerializerMessagePack).Average(r => r.SerializedSize); var acBinaryAvgSize2 = results.Where(r => r.SerializerName == SerializerAcBinaryDefault).Average(r => r.SerializedSize); sb.AppendLine($" Size: {((acBinaryAvgSize2 / msgPackAvgSize2 - 1) * 100):+0;-0}% ({acBinaryAvgSize2:F0} B vs {msgPackAvgSize2:F0} B)"); @@ -1030,7 +1030,7 @@ public static class Program { // Offset sb.Append($"{i:X8} "); - + // Hex bytes for (var j = 0; j < bytesPerLine; j++) { @@ -1038,19 +1038,19 @@ public static class Program sb.Append($"{bytes[i + j]:X2} "); else sb.Append(" "); - + if (j == 7) sb.Append(' '); // Extra space in middle } - + sb.Append(" |"); - + // ASCII representation for (var j = 0; j < bytesPerLine && i + j < bytes.Length; j++) { var b = bytes[i + j]; sb.Append(b is >= 32 and < 127 ? (char)b : '.'); } - + sb.AppendLine("|"); } return sb.ToString(); diff --git a/AyCode.Core.Tests/Serialization/QuickBenchmark.cs b/AyCode.Core.Tests/Serialization/QuickBenchmark.cs index 2d9b9a6..34edc15 100644 --- a/AyCode.Core.Tests/Serialization/QuickBenchmark.cs +++ b/AyCode.Core.Tests/Serialization/QuickBenchmark.cs @@ -16,7 +16,7 @@ public class QuickBenchmark private static readonly MessagePackSerializerOptions MsgPackOptions = ContractlessStandardResolver.Options.WithCompression(MessagePackCompression.None); - private const int DefaultIterations = 1000; + private const int DefaultIterations = 10; #region Helper Methods @@ -426,8 +426,8 @@ public class QuickBenchmark sharedUser: sharedUser, sharedMetadata: sharedMeta); - var singleOptions = AcBinarySerializerOptions.FastMode; - singleOptions.UseStringInterning = StringInterningMode.None; + var singleOptions = AcBinarySerializerOptions.WithoutReferenceHandling; + singleOptions.UseStringInterning = StringInterningMode.All; Console.WriteLine("=== MINIMAL WARMUP TEST ==="); Console.WriteLine(); diff --git a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.cs b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.cs index 409ff0c..53f7b13 100644 --- a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.cs +++ b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.cs @@ -17,10 +17,22 @@ public static partial class AcBinaryDeserializer { private readonly ReadOnlySpan _buffer; private int _position; - private List? _internedStrings; private List? _propertyNames; - //private Dictionary? _objectReferences; private Dictionary? _stringCache; + + /// + /// Footer entry for position-based string interning. + /// + private struct DupEntry + { + public int Position; // Stream position where string was first written + public int CacheIndex; // Index in _internStringCache + } + + // Position-based string interning: 100% reliable cache matching + private DupEntry[]? _dupEntries; // Footer: (position, cacheIndex) pairs sorted by position + private string[]? _internStringCache; // Cache for duplicated strings only + private int _dupCheckIndex; // Current position in _dupEntries /// /// Heap-allocated context class for IId-based reference tracking. @@ -68,10 +80,14 @@ public static partial class AcBinaryDeserializer { _buffer = data; _position = 0; - _internedStrings = null; _propertyNames = null; - //_objectReferences = null; _stringCache = null; + + // Position-based string interning fields + _dupEntries = null; + _internStringCache = null; + _dupCheckIndex = 0; + HasMetadata = false; IsMergeMode = false; RemoveOrphanedItems = false; @@ -98,7 +114,6 @@ public static partial class AcBinaryDeserializer var marker = ReadByteInternal(); var hasPropertyTable = false; - var hasInternTable = false; var hasInternFooter = false; var footerPosition = 0; @@ -151,44 +166,46 @@ public static partial class AcBinaryDeserializer } } - // Legacy: interned strings in header - if (hasInternTable) - { - var internCount = (int)ReadVarUInt(); - _internedStrings = new List(internCount); - for (var i = 0; i < internCount; i++) - { - _internedStrings.Add(ReadHeaderString()); - } - } - - // Footer-based: read interned strings from footer, then return to data position + // Footer-based: read string intern indices from footer if (hasInternFooter && footerPosition > 0) { - ReadFooterStrings(footerPosition); + ReadFooterStringIndices(footerPosition); } } /// - /// Reads interned strings from footer position, then returns to data position. - /// Uses seek to footer, read strings, seek back to data. + /// Reads string intern footer: [dupCount][(position, cacheIndex), ...] + /// Position-based format for 100% reliable cache matching. /// - private void ReadFooterStrings(int footerPosition) + private void ReadFooterStringIndices(int footerPosition) { // Save current position (start of data) var dataPosition = _position; - + // Seek to footer _position = footerPosition; - - // Read interned strings - var internCount = (int)ReadVarUInt(); - _internedStrings = new List(internCount); - for (var i = 0; i < internCount; i++) + + // Read dup count and (position, cacheIndex) pairs + var dupCount = (int)ReadVarUInt(); + if (dupCount == 0) { - _internedStrings.Add(ReadHeaderString()); + _dupEntries = Array.Empty(); + _internStringCache = Array.Empty(); } - + else + { + _dupEntries = new DupEntry[dupCount]; + for (var i = 0; i < dupCount; i++) + { + var position = (int)ReadVarUInt(); + var cacheIndex = (int)ReadVarUInt(); + _dupEntries[i] = new DupEntry { Position = position, CacheIndex = cacheIndex }; + } + + // Cache size: dupCount (cacheIndex is always 0, 1, 2, ..., dupCount-1) + _internStringCache = new string[dupCount]; + } + // Seek back to data position _position = dataPosition; } @@ -540,23 +557,49 @@ public static partial class AcBinaryDeserializer _position += count; } + /// + /// Registers an interned string during body read (StringInternNew). + /// Uses position-based check for 100% reliable cache matching. + /// + /// The string value read from stream + /// Stream position BEFORE reading the string (type code position) [MethodImpl(MethodImplOptions.AggressiveInlining)] - public int RegisterInternedString(string value) + public void RegisterInternedString(string value, int streamPosition) { - _internedStrings ??= new List(); - _internedStrings.Add(value); - return _internedStrings.Count - 1; + // Fast path: no duplicates or already processed all + var entries = _dupEntries; + if (entries == null || (uint)_dupCheckIndex >= (uint)entries.Length) + return; + + // Check if this position matches the next expected duplicate + ref var entry = ref entries[_dupCheckIndex]; + if (entry.Position == streamPosition) + { + _internStringCache![entry.CacheIndex] = value; + _dupCheckIndex++; + } } + /// + /// Gets an interned string by cache index (StringInterned type code). + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public string GetInternedString(int index) + public string GetInternedString(int cacheIndex) { - if (_internedStrings == null || (uint)index >= (uint)_internedStrings.Count) + if (_internStringCache == null || (uint)cacheIndex >= (uint)_internStringCache.Length) { - throw new AcBinaryDeserializationException($"Invalid interned string index '{index}'.", _position); + throw new AcBinaryDeserializationException($"Invalid interned string cache index '{cacheIndex}'.", _position); } - return _internedStrings[index]; + var result = _internStringCache[cacheIndex]; + if (result == null) + { + throw new AcBinaryDeserializationException( + $"Interned string at cache index '{cacheIndex}' was not populated.", + _position); + } + + return result; } [MethodImpl(MethodImplOptions.AggressiveInlining)] diff --git a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs index 44fb88b..bdb8d03 100644 --- a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs +++ b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs @@ -60,7 +60,12 @@ public static partial class AcBinaryDeserializer RegisterReader(BinaryTypeCode.String, static (ref BinaryDeserializationContext ctx, Type _, int _) => ReadPlainString(ref ctx)); RegisterReader(BinaryTypeCode.StringInterned, static (ref BinaryDeserializationContext ctx, Type _, int _) => ctx.GetInternedString((int)ctx.ReadVarUInt())); RegisterReader(BinaryTypeCode.StringEmpty, static (ref BinaryDeserializationContext _, Type _, int _) => string.Empty); - RegisterReader(BinaryTypeCode.StringInternNew, static (ref BinaryDeserializationContext ctx, Type _, int _) => ReadAndRegisterInternedString(ref ctx)); + // StringInternNew: position is captured as Position-1 (after type code was read) + RegisterReader(BinaryTypeCode.StringInternNew, static (ref BinaryDeserializationContext ctx, Type _, int _) => + { + var streamPosition = ctx.Position - 1; // Position before type code + return ReadAndRegisterInternedString(ref ctx, streamPosition); + }); RegisterReader(BinaryTypeCode.DateTime, static (ref BinaryDeserializationContext ctx, Type _, int _) => ctx.ReadDateTimeUnsafe()); RegisterReader(BinaryTypeCode.DateTimeOffset, static (ref BinaryDeserializationContext ctx, Type _, int _) => ctx.ReadDateTimeOffsetUnsafe()); RegisterReader(BinaryTypeCode.TimeSpan, static (ref BinaryDeserializationContext ctx, Type _, int _) => ctx.ReadTimeSpanUnsafe()); @@ -136,6 +141,7 @@ public static partial class AcBinaryDeserializer { context.ReadHeader(); var result = ReadValue(ref context, targetType, 0); + // Position-based string interning - no validation needed return (T?)result; } catch (AcBinaryDeserializationException) @@ -175,7 +181,9 @@ public static partial class AcBinaryDeserializer try { context.ReadHeader(); - return ReadValue(ref context, targetType, 0); + var result = ReadValue(ref context, targetType, 0); + // Position-based string interning - no validation needed + return result; } catch (AcBinaryDeserializationException) { @@ -201,6 +209,7 @@ public static partial class AcBinaryDeserializer { context.ReadHeader(); var node = (AcExpressionNode?)ReadValue(ref context, typeof(AcExpressionNode), 0); + // Position-based string interning - no validation needed if (node == null) return null; var entityType = AcSerializerCommon.GetExpressionEntityType(targetExpressionType); @@ -269,6 +278,8 @@ public static partial class AcBinaryDeserializer $"Cannot populate type '{targetType.Name}' from binary type code {typeCode}", context.Position, targetType); } + + // Position-based string interning - no validation needed } catch (AcBinaryDeserializationException) { @@ -333,6 +344,7 @@ public static partial class AcBinaryDeserializer if (elementMetadata.IsComplexType && elementMetadata.IsIId && elementMetadata.IdGetter != null) { MergeIIdCollectionWithMetadata(ref context, targetList, elementType, wrapper, 0); + // Position-based string interning - no validation needed return; } } @@ -346,6 +358,8 @@ public static partial class AcBinaryDeserializer $"Cannot populate type '{targetType.Name}' from binary type code {typeCode}", context.Position, targetType); } + + // Position-based string interning - no validation needed } catch (AcBinaryDeserializationException) { @@ -389,6 +403,7 @@ public static partial class AcBinaryDeserializer { context.ReadHeader(); var result = ReadValue(ref context, targetType, 0); + // Position-based string interning - no validation needed return new BinaryDeserializeChain(dataArray, options, chainTracker, (T?)result); } catch @@ -433,6 +448,7 @@ public static partial class AcBinaryDeserializer { context.ReadHeader(); var result = ReadValue(ref context, targetType, 0); + // Position-based string interning - no validation needed return (TResult?)result; } catch (AcBinaryDeserializationException) { throw; } @@ -473,7 +489,8 @@ public static partial class AcBinaryDeserializer $"Cannot populate type '{targetType.Name}' from binary type code {typeCode}", context.Position, targetType); } - + + // Position-based string interning - no validation needed return this; } catch (AcBinaryDeserializationException) { throw; } @@ -764,15 +781,16 @@ public static partial class AcBinaryDeserializer } /// - /// �j intern�lt string olvas�sa �s regisztr�l�sa az intern t�bl�ba. + /// Read new interned string and register it in the intern cache. + /// Position is captured BEFORE the type code was read (by caller). /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static string ReadAndRegisterInternedString(ref BinaryDeserializationContext context) + private static string ReadAndRegisterInternedString(ref BinaryDeserializationContext context, int streamPosition) { var length = (int)context.ReadVarUInt(); if (length == 0) return string.Empty; var str = context.ReadStringUtf8(length); - context.RegisterInternedString(str); + context.RegisterInternedString(str, streamPosition); return str; } @@ -780,7 +798,7 @@ public static partial class AcBinaryDeserializer /// Read a string and register it in the intern table for future references. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static string ReadAndInternString(ref BinaryDeserializationContext context) + private static string ReadAndInternString(ref BinaryDeserializationContext context, int streamPosition) { var length = (int)context.ReadVarUInt(); if (length == 0) return string.Empty; @@ -788,7 +806,7 @@ public static partial class AcBinaryDeserializer // Always register strings that meet the minimum intern length threshold if (str.Length >= context.MinStringInternLength) { - context.RegisterInternedString(str); + context.RegisterInternedString(str, streamPosition); } return str; @@ -1286,6 +1304,8 @@ public static partial class AcBinaryDeserializer private static void SkipValue(ref BinaryDeserializationContext context, BinaryDeserializeTypeMetadata metaData) { + // Capture position before reading type code (needed for string interning) + var streamPosition = context.Position; var typeCode = context.ReadByte(); if (typeCode == BinaryTypeCode.Null) return; @@ -1353,8 +1373,8 @@ public static partial class AcBinaryDeserializer context.ReadVarUInt(); return; case BinaryTypeCode.StringInternNew: - // �j intern�lt string - regisztr�lni kell m�g skip eset�n is - SkipAndRegisterInternedString(ref context); + // New interned string - must register even when skipping + SkipAndRegisterInternedString(ref context, streamPosition); return; case BinaryTypeCode.ByteArray: var byteLen = (int)context.ReadVarUInt(); @@ -1394,22 +1414,26 @@ public static partial class AcBinaryDeserializer } /// - /// �j intern�lt string kihagy�sa - DE regisztr�lni kell! + /// Skip a new interned string - must still register in cache. /// + /// Deserialization context + /// Position before the type code was read [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static void SkipAndRegisterInternedString(ref BinaryDeserializationContext context) + private static void SkipAndRegisterInternedString(ref BinaryDeserializationContext context, int streamPosition) { var byteLen = (int)context.ReadVarUInt(); if (byteLen == 0) return; var str = context.ReadStringUtf8(byteLen); - context.RegisterInternedString(str); + context.RegisterInternedString(str, streamPosition); } /// /// Skip a string but still register it in the intern table if it meets the length threshold. /// + /// Deserialization context + /// Position before the type code was read [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static void SkipAndInternString(ref BinaryDeserializationContext context) + private static void SkipAndInternString(ref BinaryDeserializationContext context, int streamPosition) { var byteLen = (int)context.ReadVarUInt(); if (byteLen == 0) return; @@ -1417,7 +1441,7 @@ public static partial class AcBinaryDeserializer var str = context.ReadStringUtf8(byteLen); if (str.Length >= context.MinStringInternLength) { - context.RegisterInternedString(str); + context.RegisterInternedString(str, streamPosition); } } diff --git a/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs b/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs index a7040b2..66034e6 100644 --- a/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs +++ b/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs @@ -63,13 +63,18 @@ public static partial class AcBinarySerializer // Use shared reference tracker from AcSerializerCommon //private readonly AcSerializerCommon.SerializationReferenceTracker _refTracker = new(); - private Dictionary? _internedStrings; - private List? _internedStringList; + /// + /// String intern entry for tracking string occurrences. + /// StreamPosition-based approach for 100% reliable cache matching. + /// + private struct StringInternEntry + { + public int StreamPosition; // Position in stream where string was first written + public int CacheIndex; // Dense cache index (0, 1, 2, ...) - assigned at 2nd occurrence; -1 = first occurrence only + } - // Single contiguous buffer for all interned string UTF8 bytes (reused across serializations) - private byte[]? _internedStringBuffer; - private int _internedStringBufferPos; - private List? _internedStringLengths; + private Dictionary? _stringInternMap; + private int _nextCacheIndex; // Next dense cache index to assign private Dictionary? _propertyNames; private List? _propertyNameList; @@ -139,15 +144,11 @@ public static partial class AcBinarySerializer _position = 0; //_refTracker.Reset(); - ClearAndTrimIfNeeded(_internedStrings, InitialInternCapacity * 4); + ClearAndTrimIfNeeded(_stringInternMap, InitialInternCapacity * 4); ClearAndTrimIfNeeded(_propertyNames, InitialPropertyNameCapacity * 4); _propertyNameList?.Clear(); - _internedStringList?.Clear(); - _internedStringLengths?.Clear(); - - // Reset intern buffer position (no deallocation - buffer is reused!) - _internedStringBufferPos = 0; + _nextCacheIndex = 0; if (_propertyIndexBuffer != null && _propertyIndexBuffer.Length > PropertyIndexBufferMaxCache) { @@ -183,81 +184,89 @@ public static partial class AcBinarySerializer _propertyStateBuffer = null; } - // _internedStringBuffer is a simple byte[] - no pool return needed, GC handles it - _internedStringBuffer = null; } #region String Interning /// - /// Registers a string for interning. Returns the index of the string. - /// Uses single contiguous buffer for UTF8 bytes to minimize allocations. + /// Tries to intern a string. Returns true if string was seen before (write index). + /// Returns false if first occurrence (write inline). + /// Uses stream position for 100% reliable deserializer cache matching. /// + /// The string value to intern + /// Current stream position (before writing the string) + /// Output: cache index for 2+ occurrence, -1 for 1st occurrence + /// True if 2+ occurrence (write cacheIndex), false if 1st occurrence (write inline) [MethodImpl(MethodImplOptions.AggressiveInlining)] - public int RegisterInternedString(string value) + public bool TryGetInternedString(string value, int streamPosition, out int cacheIndex) { - _internedStrings ??= new Dictionary(InitialInternCapacity, StringComparer.Ordinal); - _internedStringList ??= new List(InitialInternCapacity); - _internedStringLengths ??= new List(InitialInternCapacity); + _stringInternMap ??= new Dictionary(InitialInternCapacity, StringComparer.Ordinal); - // Single operation: lookup + conditional add - ref var index = ref CollectionsMarshal.GetValueRefOrAddDefault(_internedStrings, value, out var exists); - if (exists) + ref var entry = ref CollectionsMarshal.GetValueRefOrNullRef(_stringInternMap, value); + + if (!Unsafe.IsNullRef(ref entry)) { - return index; + // 2+ occurrence: assign CacheIndex if first repeat + if (entry.CacheIndex < 0) + { + entry.CacheIndex = _nextCacheIndex++; + } + cacheIndex = entry.CacheIndex; + return true; } - // New string - add to list and write UTF8 to buffer - index = _internedStringList.Count; - _internedStringList.Add(value); - - // Calculate UTF8 byte length - var utf8Length = Ascii.IsValid(value) ? value.Length : Utf8NoBom.GetByteCount(value); - - // Ensure intern buffer has capacity - EnsureInternBufferCapacity(utf8Length); - - // Write UTF8 bytes to contiguous buffer - if (Ascii.IsValid(value)) + // 1st occurrence: store stream position + _stringInternMap[value] = new StringInternEntry { - Ascii.FromUtf16(value.AsSpan(), _internedStringBuffer.AsSpan(_internedStringBufferPos, utf8Length), out _); - } - else - { - Utf8NoBom.GetBytes(value.AsSpan(), _internedStringBuffer.AsSpan(_internedStringBufferPos, utf8Length)); - } - - _internedStringLengths.Add(utf8Length); - _internedStringBufferPos += utf8Length; - - return index; + StreamPosition = streamPosition, + CacheIndex = -1 // Not assigned until 2nd occurrence + }; + cacheIndex = -1; + return false; } /// - /// Ensures the intern buffer has enough capacity for additional bytes. - /// Initial size is calculated from MaxStringInternLength * InitialInternCapacity. + /// Returns true if there are any interned strings that occurred more than once. /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private void EnsureInternBufferCapacity(int additionalBytes) + public bool HasInternedStrings => _stringInternMap is { Count: > 0 }; + + /// + /// Gets the count of strings that occurred more than once (for footer). + /// + public int GetDupCount() => _nextCacheIndex; + + /// + /// Writes the footer with (position, cacheIndex) pairs sorted by position. + /// Position-based approach ensures 100% reliable cache matching in deserializer. + /// + public void WriteInternedStringFooter() { - var required = _internedStringBufferPos + additionalBytes; - - if (_internedStringBuffer == null) + if (_stringInternMap == null || _nextCacheIndex == 0) return; + + // Collect entries with CacheIndex >= 0 (occurred more than once) + // We need to sort by StreamPosition for deserializer sequential access + Span<(int Position, int CacheIndex)> entries = _nextCacheIndex <= 64 + ? stackalloc (int, int)[_nextCacheIndex] + : new (int, int)[_nextCacheIndex]; + + var idx = 0; + foreach (var entry in _stringInternMap.Values) { - // Initial size: MaxStringInternLength * InitialInternCapacity (e.g., 64 * 32 = 2048) - var initialSize = MaxStringInternLength * InitialInternCapacity; - _internedStringBuffer = new byte[Math.Max(initialSize, required)]; - return; + if (entry.CacheIndex >= 0) + { + entries[idx++] = (entry.StreamPosition, entry.CacheIndex); + } } - - if (required <= _internedStringBuffer.Length) + + // Sort by StreamPosition (ascending) for deserializer sequential check + entries.Sort((a, b) => a.Position.CompareTo(b.Position)); + + // Write pairs: (position, cacheIndex) + for (var i = 0; i < _nextCacheIndex; i++) { - return; + WriteVarUInt((uint)entries[i].Position); + WriteVarUInt((uint)entries[i].CacheIndex); } - - // Grow buffer (double size) - var newSize = Math.Max(_internedStringBuffer.Length * 2, required); - Array.Resize(ref _internedStringBuffer, newSize); } #endregion @@ -948,8 +957,9 @@ public static partial class AcBinarySerializer public void FinalizeHeaderSections() { var hasPropertyNames = UseMetadata && _propertyNameList is { Count: > 0 }; - var hasInternTable = UseStringInterning && _internedStringList is { Count: > 0 }; - + var dupCount = UseStringInterning ? GetDupCount() : 0; + var hasInternTable = dupCount > 0; + // Calculate property names header size (strings go to footer now) var headerPayloadSize = 0; if (hasPropertyNames) @@ -976,12 +986,12 @@ public static partial class AcBinarySerializer } } - // Footer-based string interning: write strings at the end + // Footer: write indices of strings that occurred more than once var footerPosition = 0; if (hasInternTable) { footerPosition = _position; - WriteFooterStrings(); + WriteFooterStringIndices(dupCount); } // Write header @@ -999,7 +1009,7 @@ public static partial class AcBinarySerializer _buffer[_headerPosition] = AcBinarySerializerOptions.FormatVersion; _buffer[_headerPosition + 1] = flags; - + // Always write footer position if string interning is enabled in options // (even if there's no actual interned data - footer position will be 0) if (UseStringInterning) @@ -1007,25 +1017,21 @@ public static partial class AcBinarySerializer Unsafe.WriteUnaligned(ref _buffer[_headerPosition + 2], footerPosition); } } - + /// - /// Writes interned strings to the footer (end of stream). - /// Uses contiguous buffer - no re-encoding needed. + /// Writes the footer with total count (for verification) + dup count + indices. + /// Footer format: [totalStringCount][dupCount][dupIndex0][dupIndex1]... /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - private void WriteFooterStrings() + /// + /// Writes footer: [dupCount][(position, cacheIndex), ...] + /// Position-based format for 100% reliable deserializer matching. + /// + private void WriteFooterStringIndices(int dupCount) { - WriteVarUInt((uint)_internedStringList!.Count); - - // Write from contiguous buffer using stored lengths - var offset = 0; - for (var i = 0; i < _internedStringLengths!.Count; i++) - { - var length = _internedStringLengths[i]; - WriteVarUInt((uint)length); - WriteBytes(_internedStringBuffer.AsSpan(offset, length)); - offset += length; - } + // Dup count + (position, cacheIndex) pairs + WriteVarUInt((uint)dupCount); + WriteInternedStringFooter(); } /// diff --git a/AyCode.Core/Serializers/Binaries/AcBinarySerializer.cs b/AyCode.Core/Serializers/Binaries/AcBinarySerializer.cs index 5867477..78c3bc4 100644 --- a/AyCode.Core/Serializers/Binaries/AcBinarySerializer.cs +++ b/AyCode.Core/Serializers/Binaries/AcBinarySerializer.cs @@ -759,7 +759,7 @@ public static partial class AcBinarySerializer /// /// Optimized string writer with FixStr for short strings. - /// Uses stackalloc for small strings to avoid allocations. + /// New interning strategy: inline on first occurrence, index on 2+. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] private static void WriteString(string value, BinarySerializationContext context) @@ -772,16 +772,25 @@ public static partial class AcBinarySerializer // String interning: only for strings within length range // MaxStringInternLength == 0 means no max limit - if (context.UseStringInterning + if (context.UseStringInterning && value.Length >= context.MinStringInternLength && (context.MaxStringInternLength == 0 || value.Length <= context.MaxStringInternLength)) { - var index = context.RegisterInternedString(value); + // Capture position BEFORE writing - this is where deserializer will be when reading + var streamPosition = context.Position; + if (context.TryGetInternedString(value, streamPosition, out var index)) + { + // 2+ occurrence: write index reference + context.WriteByte(BinaryTypeCode.StringInterned); + context.WriteVarUInt((uint)index); + return; + } #if DEBUG context.OnStringInterned?.Invoke(context.CurrentPropertyPath, value); #endif - context.WriteByte(BinaryTypeCode.StringInterned); - context.WriteVarUInt((uint)index); + // 1st occurrence: write inline with StringInternNew type code + context.WriteByte(BinaryTypeCode.StringInternNew); + context.WriteStringUtf8(value); return; }