From e5d4b1091f63d8fe34d3280df2d675170c73e5b1 Mon Sep 17 00:00:00 2001 From: Loretta Date: Fri, 6 Feb 2026 09:55:28 +0100 Subject: [PATCH] Two-pass serialization for string/object interning Refactor binary serializer to use a true two-pass process for string interning and object reference tracking. Adds a scan pass to identify duplicates and assigns cache indices deterministically in first-occurrence order. Updates wire format to write explicit cache indices after *First markers. Refactors InternEntry, removes marker rewriting, and updates deserializer to match new format. Improves performance, correctness, and robustness for complex object graphs with shared references and repeated strings. --- .../Serializers/AcSerializerContextBase.cs | 7 + ...serializer.BinaryDeserializationContext.cs | 10 + .../Binaries/AcBinaryDeserializer.cs | 65 ++-- ...rySerializer.BinarySerializationContext.cs | 277 +++++++++++++----- .../Binaries/AcBinarySerializer.ScanPass.cs | 102 +++++++ .../Binaries/AcBinarySerializer.cs | 172 +++++++---- AyCode.Core/Serializers/IdentityMap.cs | 48 ++- .../Serializers/SerializationContextBase.cs | 15 +- .../Serializers/TypeMetadataWrapper.cs | 96 +++++- 9 files changed, 621 insertions(+), 171 deletions(-) create mode 100644 AyCode.Core/Serializers/Binaries/AcBinarySerializer.ScanPass.cs diff --git a/AyCode.Core/Serializers/AcSerializerContextBase.cs b/AyCode.Core/Serializers/AcSerializerContextBase.cs index 91a7755..548d850 100644 --- a/AyCode.Core/Serializers/AcSerializerContextBase.cs +++ b/AyCode.Core/Serializers/AcSerializerContextBase.cs @@ -91,6 +91,13 @@ public abstract class AcSerializerContextBase [MethodImpl(MethodImplOptions.AggressiveInlining)] public Dictionary>.ValueCollection GetWrappers() => _wrappers.Values; + /// + /// Returns wrappers as span for direct indexed access without allocation. + /// Uses CollectionsMarshal for zero-copy access to dictionary values. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public int GetWrapperCount() => _wrappers.Count; + #endregion #region Reset diff --git a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.cs b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.cs index 9149795..dc0d113 100644 --- a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.cs +++ b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.BinaryDeserializationContext.cs @@ -516,6 +516,16 @@ public static partial class AcBinaryDeserializer _internCache![_nextCacheIndex++] = value; } + /// + /// Registers an interned value at a specific cache index. + /// Used when the serializer writes explicit cache indices (non-sequential). + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void RegisterInternedValueAt(int cacheIndex, object value) + { + _internCache![cacheIndex] = value; + } + /// /// Gets an interned string by cache index (StringInterned type code). /// diff --git a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs index 1a4f4d9..d66fb5a 100644 --- a/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs +++ b/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.cs @@ -60,7 +60,7 @@ public static partial class AcBinaryDeserializer RegisterReader(BinaryTypeCode.String, static (ref BinaryDeserializationContext ctx, Type _, int _) => ReadPlainString(ref ctx)); RegisterReader(BinaryTypeCode.StringInterned, static (ref BinaryDeserializationContext ctx, Type _, int _) => ctx.GetInternedString((int)ctx.ReadVarUInt())); RegisterReader(BinaryTypeCode.StringEmpty, static (ref BinaryDeserializationContext _, Type _, int _) => string.Empty); - // StringInternFirst: first occurrence of interned string - read content + register in cache + // StringInternFirst: first occurrence of interned string - read cacheIndex + content + register in cache RegisterReader(BinaryTypeCode.StringInternFirst, static (ref BinaryDeserializationContext ctx, Type _, int _) => ReadAndRegisterInternedString(ref ctx)); RegisterReader(BinaryTypeCode.DateTime, static (ref BinaryDeserializationContext ctx, Type _, int _) => ctx.ReadDateTimeUnsafe()); @@ -798,15 +798,17 @@ public static partial class AcBinaryDeserializer } /// - /// Read interned string (StringInternFirst marker) and register in cache. + /// Read interned string (StringInternFirst marker) and register in cache at specified index. + /// Wire format: [StringInternFirst][VarUInt cacheIndex][VarUInt length][UTF8 bytes] /// [MethodImpl(MethodImplOptions.AggressiveInlining)] private static string ReadAndRegisterInternedString(ref BinaryDeserializationContext context) { + var cacheIndex = (int)context.ReadVarUInt(); var length = (int)context.ReadVarUInt(); if (length == 0) return string.Empty; var str = context.ReadStringUtf8(length); - context.RegisterNextInternedValue(str); + context.RegisterInternedValueAt(cacheIndex, str); return str; } @@ -969,23 +971,25 @@ public static partial class AcBinaryDeserializer /// private static object? ReadObject(ref BinaryDeserializationContext context, Type targetType, int depth) { - return ReadObjectCore(ref context, targetType, depth, registerInCache: false); + return ReadObjectCore(ref context, targetType, depth, cacheIndex: -1); } /// /// Object olvasása első előforduláskor (ObjectRefFirst marker). - /// Wire format: [ObjectRefFirst][props...] - /// Az objektumot regisztráljuk a cache-be. + /// Wire format: [ObjectRefFirst][VarUInt cacheIndex][props...] + /// Az objektumot regisztráljuk a cache-be a megadott index-re. /// private static object? ReadObjectRefFirst(ref BinaryDeserializationContext context, Type targetType, int depth) { - return ReadObjectCore(ref context, targetType, depth, registerInCache: true); + var cacheIndex = (int)context.ReadVarUInt(); + return ReadObjectCore(ref context, targetType, depth, cacheIndex: cacheIndex); } /// /// Object olvasás core implementáció. /// - private static object? ReadObjectCore(ref BinaryDeserializationContext context, Type targetType, int depth, bool registerInCache) + /// -1 = not cached, 0+ = register at this cache index + private static object? ReadObjectCore(ref BinaryDeserializationContext context, Type targetType, int depth, int cacheIndex) { // Handle dictionary types if (IsDictionaryType(targetType, out var keyType, out var valueType)) @@ -999,9 +1003,9 @@ public static partial class AcBinaryDeserializer var instance = CreateInstance(targetType, metadata); if (instance == null) return null; - if (registerInCache) + if (cacheIndex >= 0) { - context.RegisterNextInternedValue(instance); + context.RegisterInternedValueAt(cacheIndex, instance); } PopulateObject(ref context, instance, wrapper, depth, skipDefaultWrite: true); @@ -1032,22 +1036,25 @@ public static partial class AcBinaryDeserializer /// private static object? ReadObjectWithMetadata(ref BinaryDeserializationContext context, Type targetType, int depth) { - return ReadObjectWithMetadataCore(ref context, targetType, depth, registerInCache: false); + return ReadObjectWithMetadataCore(ref context, targetType, depth, cacheIndex: -1); } /// /// Object olvasása UseMetadata módban, első tracked előfordulás (ObjectWithMetadataRefFirst marker). - /// Az objektumot regisztráljuk a cache-be. + /// Wire format: [ObjectWithMetadataRefFirst][VarUInt cacheIndex][propNameHash (4b)][...][props...] + /// Az objektumot regisztráljuk a cache-be a megadott index-re. /// private static object? ReadObjectWithMetadataRefFirst(ref BinaryDeserializationContext context, Type targetType, int depth) { - return ReadObjectWithMetadataCore(ref context, targetType, depth, registerInCache: true); + var cacheIndex = (int)context.ReadVarUInt(); + return ReadObjectWithMetadataCore(ref context, targetType, depth, cacheIndex: cacheIndex); } /// /// ObjectWithMetadata olvasás core implementáció. /// - private static object? ReadObjectWithMetadataCore(ref BinaryDeserializationContext context, Type targetType, int depth, bool registerInCache) + /// -1 = not cached, 0+ = register at this cache index + private static object? ReadObjectWithMetadataCore(ref BinaryDeserializationContext context, Type targetType, int depth, int cacheIndex) { // Inline metadata: propNameHash mindig jön var propNameHash = context.ReadInt32Raw(); @@ -1077,9 +1084,9 @@ public static partial class AcBinaryDeserializer var instance = CreateInstance(targetType, metadata); if (instance == null) return null; - if (registerInCache) + if (cacheIndex >= 0) { - context.RegisterNextInternedValue(instance); + context.RegisterInternedValueAt(cacheIndex, instance); } // CacheMap felépítése ha még nincs @@ -1471,11 +1478,14 @@ public static partial class AcBinaryDeserializer SkipObjectRefFirst(ref context, metaData); return; case BinaryTypeCode.ObjectWithMetadata: - SkipObjectWithMetadata(ref context, metaData, registerInCache: false); + SkipObjectWithMetadata(ref context, metaData, cacheIndex: -1); return; case BinaryTypeCode.ObjectWithMetadataRefFirst: - SkipObjectWithMetadata(ref context, metaData, registerInCache: true); + { + var cacheIdx = (int)context.ReadVarUInt(); + SkipObjectWithMetadata(ref context, metaData, cacheIndex: cacheIdx); return; + } case BinaryTypeCode.ObjectRef: context.ReadVarUInt(); return; @@ -1502,24 +1512,28 @@ public static partial class AcBinaryDeserializer } /// - /// Skip an interned string (StringInternFirst) - must still register in cache. + /// Skip an interned string (StringInternFirst) - must still read cacheIndex and register in cache. + /// Wire format: [StringInternFirst][VarUInt cacheIndex][VarUInt length][UTF8 bytes] /// [MethodImpl(MethodImplOptions.AggressiveInlining)] private static void SkipAndRegisterInternedString(ref BinaryDeserializationContext context) { + var cacheIndex = (int)context.ReadVarUInt(); var byteLen = (int)context.ReadVarUInt(); if (byteLen == 0) return; var str = context.ReadStringUtf8(byteLen); - context.RegisterNextInternedValue(str); + context.RegisterInternedValueAt(cacheIndex, str); } /// - /// Skip ObjectRefFirst - must register placeholder in cache. + /// Skip ObjectRefFirst - must read cacheIndex and register placeholder in cache. + /// Wire format: [ObjectRefFirst][VarUInt cacheIndex][props...] /// private static void SkipObjectRefFirst(ref BinaryDeserializationContext context, BinaryDeserializeTypeMetadata metaData) { + var cacheIndex = (int)context.ReadVarUInt(); // Register placeholder (stream position as boxed int for potential lazy load) - context.RegisterNextInternedValue(context.Position); + context.RegisterInternedValueAt(cacheIndex, context.Position); SkipObject(ref context, metaData); } @@ -1554,12 +1568,13 @@ public static partial class AcBinaryDeserializer /// /// Skip ObjectWithMetadata/ObjectWithMetadataRefFirst. /// - private static void SkipObjectWithMetadata(ref BinaryDeserializationContext context, BinaryDeserializeTypeMetadata metaData, bool registerInCache) + /// -1 = not cached, 0+ = register at this cache index + private static void SkipObjectWithMetadata(ref BinaryDeserializationContext context, BinaryDeserializeTypeMetadata metaData, int cacheIndex) { - if (registerInCache) + if (cacheIndex >= 0) { // Register placeholder for potential lazy load - context.RegisterNextInternedValue(context.Position); + context.RegisterInternedValueAt(cacheIndex, context.Position); } var propNameHash = context.ReadInt32Raw(); diff --git a/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs b/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs index c0f5368..987590f 100644 --- a/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs +++ b/AyCode.Core/Serializers/Binaries/AcBinarySerializer.BinarySerializationContext.cs @@ -97,6 +97,18 @@ public static partial class AcBinarySerializer private IdentityMap? _stringInternMap; private int _nextCacheIndex; // Next dense cache index to assign + private int _nextFirstIndex; // Next first occurrence index to assign (scan pass) + + /// + /// Next first occurrence index for scan pass. Direct access for performance. + /// + public int NextFirstIndex + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get => _nextFirstIndex; + [MethodImpl(MethodImplOptions.AggressiveInlining)] + set => _nextFirstIndex = value; + } private int[]? _propertyIndexBuffer; private byte[]? _propertyStateBuffer; @@ -118,6 +130,12 @@ public static partial class AcBinarySerializer // These properties delegate to Options for convenience public bool UseStringInterning => Options.UseStringInterning != StringInterningMode.None; + + public bool IsValidForInterningString(int strLength) + { + return strLength >= MinStringInternLength && (MaxStringInternLength == 0 || strLength <= MaxStringInternLength); + } + /// /// True if we have interning/ref tracking (cache count needed in header). /// @@ -173,6 +191,7 @@ public static partial class AcBinarySerializer //_refTracker.Reset(); _stringInternMap?.Reset(); _nextCacheIndex = 0; + _nextFirstIndex = 0; if (_propertyIndexBuffer != null && _propertyIndexBuffer.Length > PropertyIndexBufferMaxCache) { @@ -216,37 +235,50 @@ public static partial class AcBinarySerializer #region String Interning /// - /// Tries to intern a string. Returns true if string was seen before (write index). - /// Returns false if first occurrence (write inline). - /// Stores marker position for later rewriting (marker-based interning, no footer). + /// Serialize pass: looks up interned string state. + /// Returns the entry ref for caller to check IsFirstWrite and update it. /// - /// The string value to intern - /// Position of the type code marker (for rewriting) - /// Output: cache index for 2+ occurrence, -1 for 1st occurrence - /// True if 2+ occurrence (write cacheIndex), false if 1st occurrence (write inline) [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool TryGetInternedString(string value, int markerPosition, out int cacheIndex) + public ref InternEntry GetInternedStringEntry(string value, out bool found) + { + if (_stringInternMap == null) + { + found = false; + return ref System.Runtime.CompilerServices.Unsafe.NullRef(); + } + + if (_stringInternMap.TryAdd(value, out var slotIndex)) + { + // Not in map (shouldn't happen after scan pass for cached strings) + found = false; + return ref _stringInternMap.GetValueRef(slotIndex); + } + + found = true; + return ref _stringInternMap.GetValueRef(slotIndex); + } + + /// + /// Scan pass: tracks a string for interning. Marks as cached on 2nd occurrence. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void ScanInternString(string value) { _stringInternMap ??= new IdentityMap(); if (!_stringInternMap.TryAdd(value, out var slotIndex)) { - // 2+ occurrence: assign CacheIndex if first repeat + // 2+ occurrence: mark as cached ref var entry = ref _stringInternMap.GetValueRef(slotIndex); - if (entry.CacheIndex < 0) - { - entry.CacheIndex = _nextCacheIndex++; - } - cacheIndex = entry.CacheIndex; - return true; + if (entry.CacheIndex == -1) + entry.CacheIndex = -2; // -2 = cached, pending CacheIndex assignment + return; } - // 1st occurrence: store marker position for later rewriting + // 1st occurrence: store FirstIndex ref var newEntry = ref _stringInternMap.GetValueRef(slotIndex); - newEntry.StreamPosition = markerPosition; - newEntry.CacheIndex = -1; // Not assigned until 2nd occurrence - cacheIndex = -1; - return false; + newEntry.FirstIndex = _nextFirstIndex++; + newEntry.CacheIndex = -1; } /// @@ -260,78 +292,185 @@ public static partial class AcBinarySerializer public int GetCacheCount() => _nextCacheIndex; /// - /// Rewrites markers for all entries with CacheIndex >= 0. - /// Called at end of serialization to mark first occurrences of interned/tracked values. - /// String: String → StringInternFirst - /// Object: Object → ObjectRefFirst, ObjectWithMetadata → ObjectWithMetadataRefFirst - /// - /// IMPORTANT: CacheIndex must be reassigned in StreamPosition order, because - /// deserializer registers values sequentially as it encounters *First markers. + /// Assigns CacheIndex values in FirstIndex order after scan pass. + /// Collects all cached entries (CacheIndex == -2), sorts by FirstIndex, assigns 0, 1, 2... + /// Optimized: single pass collection, no allocations for wrapper iteration. /// - public void RewriteMarkers() + public void AssignCacheIndicesInOrder() { - if (_nextCacheIndex == 0) return; + // Fast path: no caching at all + if (_stringInternMap == null && !HasAnyIdentityMap()) + { + return; + } - // Collect all first-occurrence positions that need markers - Span positions = _nextCacheIndex <= 64 - ? stackalloc int[_nextCacheIndex] - : new int[_nextCacheIndex]; + // Count cached entries in single pass + var cachedCount = CountAllCachedEntries(); + if (cachedCount == 0) + return; + + // Collect entries for sorting + Span<(int SlotIndex, int FirstIndex, int MapType)> entries = cachedCount <= 64 + ? stackalloc (int, int, int)[cachedCount] + : new (int, int, int)[cachedCount]; var idx = 0; - // 1. String intern entries + // Collect from string intern map (mapType = 0) if (_stringInternMap != null) { var count = _stringInternMap.Count; for (var i = 0; i < count; i++) { ref var entry = ref _stringInternMap.GetValueRefAt(i); - if (entry.CacheIndex >= 0) + if (entry.CacheIndex == -2) + entries[idx++] = (i, entry.FirstIndex, 0); + } + } + + // Collect from wrapper identity maps - use foreach, no allocation + var wrapperIdx = 1; + foreach (var wrapper in GetWrappers()) + { + var baseMapType = wrapperIdx * 3; + CollectCachedEntries(wrapper.IdentityMapInt32, baseMapType + 0, ref entries, ref idx); + CollectCachedEntries(wrapper.IdentityMapInt64, baseMapType + 1, ref entries, ref idx); + CollectCachedEntries(wrapper.IdentityMapGuid, baseMapType + 2, ref entries, ref idx); + wrapperIdx++; + } + + // Sort by FirstIndex + var usedEntries = entries.Slice(0, idx); + usedEntries.Sort((a, b) => a.FirstIndex.CompareTo(b.FirstIndex)); + + // Assign CacheIndex in sorted order + for (var i = 0; i < idx; i++) + { + var (slotIndex, _, mapType) = usedEntries[i]; + if (mapType == 0) + { + ref var entry = ref _stringInternMap!.GetValueRefAt(slotIndex); + entry.CacheIndex = _nextCacheIndex++; + entry.IsFirstWrite = true; + } + else + { + // Find wrapper by index + var wrapperIndex = mapType / 3 - 1; + var mapIndex = mapType % 3; + var wIdx = 0; + foreach (var wrapper in GetWrappers()) { - positions[idx++] = entry.StreamPosition; + if (wIdx == wrapperIndex) + { + switch (mapIndex) + { + case 0: + ref var entry32 = ref wrapper.IdentityMapInt32!.GetValueRefAt(slotIndex); + entry32.CacheIndex = _nextCacheIndex++; + entry32.IsFirstWrite = true; + break; + case 1: + ref var entry64 = ref wrapper.IdentityMapInt64!.GetValueRefAt(slotIndex); + entry64.CacheIndex = _nextCacheIndex++; + entry64.IsFirstWrite = true; + break; + case 2: + ref var entryGuid = ref wrapper.IdentityMapGuid!.GetValueRefAt(slotIndex); + entryGuid.CacheIndex = _nextCacheIndex++; + entryGuid.IsFirstWrite = true; + break; + } + break; + } + wIdx++; } } } - // 2. ID tracking entries from all wrappers - foreach (var wrapper in GetWrappers()) +#if DEBUG + // DEBUG: Print string intern map contents + if (_stringInternMap != null) { - CollectPositions(wrapper.IdentityMapInt32, ref positions, ref idx); - CollectPositions(wrapper.IdentityMapInt64, ref positions, ref idx); - CollectPositions(wrapper.IdentityMapGuid, ref positions, ref idx); - } - - // Sort by position to match deserializer's sequential registration order - var usedPositions = positions.Slice(0, idx); - usedPositions.Sort(); - - // Rewrite markers at sorted positions - for (var i = 0; i < idx; i++) - { - var pos = usedPositions[i]; - var currentMarker = _buffer[pos]; - _buffer[pos] = currentMarker switch + Console.WriteLine($"\n=== AssignCacheIndicesInOrder completed ==="); + Console.WriteLine($"Total strings in map: {_stringInternMap.Count}"); + Console.WriteLine($"Total cached (CacheIndex >= 0): {cachedCount}"); + Console.WriteLine($"NextCacheIndex: {_nextCacheIndex}"); + Console.WriteLine("String entries:"); + for (var i = 0; i < _stringInternMap.Count; i++) { - BinaryTypeCode.String => BinaryTypeCode.StringInternFirst, - BinaryTypeCode.Object => BinaryTypeCode.ObjectRefFirst, - BinaryTypeCode.ObjectWithMetadata => BinaryTypeCode.ObjectWithMetadataRefFirst, - _ => currentMarker - }; + ref var entry = ref _stringInternMap.GetValueRefAt(i); + var key = _stringInternMap.GetKeyAt(i); + Console.WriteLine($" [{i}] Key=\"{key}\" FirstIndex={entry.FirstIndex} CacheIndex={entry.CacheIndex} IsFirstWrite={entry.IsFirstWrite}"); + } + Console.WriteLine(); } +#endif } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static void CollectPositions(IdentityMap? map, ref Span positions, ref int idx) where TKey : notnull + private bool HasAnyIdentityMap() + { + foreach (var wrapper in GetWrappers()) + { + if (wrapper.IdentityMapInt32 != null || wrapper.IdentityMapInt64 != null || wrapper.IdentityMapGuid != null) + return true; + } + return false; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private int CountAllCachedEntries() + { + var cachedCount = 0; + if (_stringInternMap != null) + { + var count = _stringInternMap.Count; + for (var i = 0; i < count; i++) + { + ref var entry = ref _stringInternMap.GetValueRefAt(i); + if (entry.CacheIndex == -2) + cachedCount++; + } + } + foreach (var wrapper in GetWrappers()) + { + cachedCount += CountCachedEntries(wrapper.IdentityMapInt32); + cachedCount += CountCachedEntries(wrapper.IdentityMapInt64); + cachedCount += CountCachedEntries(wrapper.IdentityMapGuid); + } + return cachedCount; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static int CountCachedEntries(IdentityMap? map) where TKey : notnull + { + if (map == null) return 0; + var count = 0; + var mapCount = map.Count; + for (var i = 0; i < mapCount; i++) + { + ref var entry = ref map.GetValueRefAt(i); + if (entry.CacheIndex == -2) + count++; + } + return count; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void CollectCachedEntries( + IdentityMap? map, + int mapType, + ref Span<(int SlotIndex, int FirstIndex, int MapType)> entries, + ref int idx) where TKey : notnull { if (map == null) return; var count = map.Count; for (var i = 0; i < count; i++) { ref var entry = ref map.GetValueRefAt(i); - if (entry.CacheIndex >= 0) - { - positions[idx++] = entry.StreamPosition; - } + if (entry.CacheIndex == -2) + entries[idx++] = (i, entry.FirstIndex, mapType); } } @@ -340,13 +479,13 @@ public static partial class AcBinarySerializer #region Object Reference Tracking (IId + Non-IId) /// - /// Tries to track an IId object (Int32 Id). Uses shared _nextCacheIndex with string interning. - /// Returns true if first occurrence, false if already seen (cacheIndex assigned). + /// Tries to track an IId object (Int32 Id). + /// Returns true if first occurrence, false if already seen. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool TryTrackObject(TypeMetadataWrapper wrapper, object obj, out int cacheIndex) { - return TryTrack(wrapper, obj, _position, ref _nextCacheIndex, out cacheIndex); + return TryTrack(wrapper, obj, _nextFirstIndex++, out cacheIndex); } /// @@ -355,7 +494,7 @@ public static partial class AcBinarySerializer [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool TryTrackObjectLong(TypeMetadataWrapper wrapper, object obj, out int cacheIndex) { - return TryTrackLong(wrapper, obj, _position, ref _nextCacheIndex, out cacheIndex); + return TryTrackLong(wrapper, obj, _nextFirstIndex++, out cacheIndex); } /// @@ -364,7 +503,7 @@ public static partial class AcBinarySerializer [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool TryTrackObjectGuid(TypeMetadataWrapper wrapper, object obj, out int cacheIndex) { - return TryTrackGuid(wrapper, obj, _position, ref _nextCacheIndex, out cacheIndex); + return TryTrackGuid(wrapper, obj, _nextFirstIndex++, out cacheIndex); } #endregion @@ -1037,7 +1176,7 @@ public static partial class AcBinarySerializer var cacheCount = GetCacheCount(); // Rewrite markers for first occurrences (String→StringInternFirst, Object→ObjectRefFirst, etc.) - RewriteMarkers(); + //RewriteMarkers(); // Write header var flags = BinaryTypeCode.HeaderFlagsBase; diff --git a/AyCode.Core/Serializers/Binaries/AcBinarySerializer.ScanPass.cs b/AyCode.Core/Serializers/Binaries/AcBinarySerializer.ScanPass.cs new file mode 100644 index 0000000..5a76731 --- /dev/null +++ b/AyCode.Core/Serializers/Binaries/AcBinarySerializer.ScanPass.cs @@ -0,0 +1,102 @@ +using System.Collections; +using static AyCode.Core.Helpers.JsonUtilities; + +namespace AyCode.Core.Serializers.Binaries; + +public static partial class AcBinarySerializer +{ + /// + /// First pass: scans object graph to identify duplicates (strings + objects). + /// Only traverses reference properties (complex types + strings). + /// Stops traversing an object after its 2nd occurrence. + /// After scan: assigns CacheIndex in FirstIndex order. + /// + private static void ScanForDuplicates(object value, Type type, BinarySerializationContext context) + { + if (!context.HasCaching) + return; + + ScanValue(value, type, context, 0); + context.AssignCacheIndicesInOrder(); + } + + private static void ScanValue(object? value, Type type, BinarySerializationContext context, int depth) + { + if (value == null || depth > context.MaxDepth) + return; + + // String → intern tracking (with length check to match serialize pass) + if (value is string str) + { + if (context.UseStringInterning && context.IsValidForInterningString(str.Length)) + { + context.ScanInternString(str); + } + + return; + } + + // Skip primitives + if (IsPrimitiveOrStringFast(type)) + return; + + // Collection → iterate elements + if (value is IEnumerable enumerable) + { + var elementType = GetCollectionElementType(type) ?? typeof(object); + if (!IsPrimitiveOrStringFast(elementType) || elementType == typeof(string)) + { + var nextDepth = depth + 1; + foreach (var item in enumerable) + { + if (item != null) + ScanValue(item, item.GetType(), context, nextDepth); + } + } + + return; + } + + // Object → ref tracking + recursive scan + var wrapper = context.GetWrapper(type); + var metadata = wrapper.Metadata; + + // Reference tracking for IId types (or all types when ReferenceHandling == All) + if (context.UseTypeReferenceHandling(metadata)) + { + // Direct tracking call - avoid extra indirection through context + bool isFirst; + switch (metadata.IdAccessorType) + { + case IdAccessorType.Int32: + var id32 = wrapper.RefIdGetterInt32!(value); + isFirst = wrapper.TryTrackInt32(id32, context.NextFirstIndex++, out _); + break; + case IdAccessorType.Int64: + var id64 = wrapper.RefIdGetterInt64!(value); + isFirst = wrapper.TryTrackInt64(id64, context.NextFirstIndex++, out _); + break; + case IdAccessorType.Guid: + var idGuid = wrapper.RefIdGetterGuid!(value); + isFirst = wrapper.TryTrackGuid(idGuid, context.NextFirstIndex++, out _); + break; + default: + isFirst = true; + break; + } + + if (!isFirst) + return; // 2nd occurrence → skip children + } + + // Recursive scan on reference properties only + var refProperties = metadata.ReferenceProperties; + var nextDepth2 = depth + 1; + for (var i = 0; i < refProperties.Length; i++) + { + var propValue = refProperties[i].GetValue(value); + if (propValue != null) + ScanValue(propValue, refProperties[i].PropertyType, context, nextDepth2); + } + } +} diff --git a/AyCode.Core/Serializers/Binaries/AcBinarySerializer.cs b/AyCode.Core/Serializers/Binaries/AcBinarySerializer.cs index 4355185..a76092d 100644 --- a/AyCode.Core/Serializers/Binaries/AcBinarySerializer.cs +++ b/AyCode.Core/Serializers/Binaries/AcBinarySerializer.cs @@ -381,11 +381,10 @@ public static partial class AcBinarySerializer var context = BinarySerializationContextPool.Get(options); context.WriteHeaderPlaceholder(); - // Single-pass serialization with footer-based string interning - // - No header size estimation needed (strings go to footer) - // - No body shifting (footer is appended at the end) - // - Reference tracking happens inline via TryTrack during WriteObject - // - UseMetadata: per-type property hashes written to footer + // Two-pass serialization when caching is enabled: + // 1. Scan pass: identify duplicates (strings + objects), assign CacheIndex + // 2. Serialize pass: write data with references + ScanForDuplicates(value, runtimeType, context); WriteValue(value, runtimeType, context, 0); context.FinalizeHeaderSections(); @@ -724,19 +723,35 @@ public static partial class AcBinarySerializer && value.Length >= context.MinStringInternLength && (context.MaxStringInternLength == 0 || value.Length <= context.MaxStringInternLength)) { - // Capture marker position BEFORE writing - var markerPosition = context.Position; - if (context.TryGetInternedString(value, markerPosition, out var index)) + ref var interEntry = ref context.GetInternedStringEntry(value, out bool found); + + if (found) { - // 2+ occurrence: write index reference - context.WriteByte(BinaryTypeCode.StringInterned); - context.WriteVarUInt((uint)index); - return; + // String was seen in scan pass + if (interEntry.CacheIndex >= 0) + { + if (interEntry.IsFirstWrite) + { + // 1st serialize occurrence of a cached string - write StringInternFirst + cacheIndex + data + interEntry.IsFirstWrite = false; + context.WriteByte(BinaryTypeCode.StringInternFirst); + context.WriteVarUInt((uint)interEntry.CacheIndex); + context.WriteStringUtf8(value); + } + else + { + // 2+ serialize occurrence: write index reference + context.WriteByte(BinaryTypeCode.StringInterned); + context.WriteVarUInt((uint)interEntry.CacheIndex); + } + return; + } + // CacheIndex < 0 means string appeared only once in scan - write as plain string } #if DEBUG context.OnStringInterned?.Invoke(context.CurrentPropertyPath, value); #endif - // 1st occurrence: write String marker (will be rewritten to StringInternFirst if repeated) + // String not cached (single occurrence or not found) - write plain String context.WriteByte(BinaryTypeCode.String); context.WriteStringUtf8(value); return; @@ -787,62 +802,117 @@ public static partial class AcBinarySerializer isFirstMetadataOccurrence = context.RegisterMetadataType(wrapper); } + // Reference handling: lookup entry from scan pass, check IsFirstWrite + var cachedObjectCacheIndex = -1; // -1 = not cached, 0+ = cache index for first write if (context.UseTypeReferenceHandling(metadata)) { - if (metadata.IsIId) + // Lookup by Id (IId types) or by object identity hash (non-IId types) + // Both use IdAccessorType.Int32 - for non-IId, RefIdGetterInt32 returns RuntimeHelpers.GetHashCode + switch (metadata.IdAccessorType) { - // IId típus: track by Id, ObjectRef writes cacheIndex - switch (metadata.IdAccessorType) + case IdAccessorType.Int32: { - case IdAccessorType.Int32: - if (!context.TryTrackObject(wrapper, value, out int cacheIndex32)) + var id = wrapper.RefIdGetterInt32!(value); + // For IId: skip default Id (0). For non-IId (hash): hash is never 0 for valid objects + if ((!metadata.IsIId || id != 0) && wrapper.TryGetEntryInt32(id, out var slotIndex)) + { + ref var entry = ref wrapper.GetEntryRefInt32(slotIndex); + if (entry.CacheIndex >= 0) { - context.WriteByte(BinaryTypeCode.ObjectRef); - context.WriteVarUInt((uint)cacheIndex32); - return; + if (entry.IsFirstWrite) + { + entry.IsFirstWrite = false; + cachedObjectCacheIndex = entry.CacheIndex; + } + else + { + // 2+ occurrence → write ObjectRef + context.WriteByte(BinaryTypeCode.ObjectRef); + context.WriteVarUInt((uint)entry.CacheIndex); + return; + } } - break; - - case IdAccessorType.Int64: - if (!context.TryTrackObjectLong(wrapper, value, out int cacheIndex64)) - { - context.WriteByte(BinaryTypeCode.ObjectRef); - context.WriteVarUInt((uint)cacheIndex64); - return; - } - break; - - case IdAccessorType.Guid: - if (!context.TryTrackObjectGuid(wrapper, value, out int cacheIndexGuid)) - { - context.WriteByte(BinaryTypeCode.ObjectRef); - context.WriteVarUInt((uint)cacheIndexGuid); - return; - } - break; + } + break; } - } - else - { - // Non-IId + RefHandling=All: track by hashcode - if (!context.TryTrackObject(wrapper, value, out int cacheIndexHash)) + + case IdAccessorType.Int64: { - context.WriteByte(BinaryTypeCode.ObjectRef); - context.WriteVarUInt((uint)cacheIndexHash); - return; + var id = wrapper.RefIdGetterInt64!(value); + if (id != 0 && wrapper.TryGetEntryInt64(id, out var slotIndex)) + { + ref var entry = ref wrapper.GetEntryRefInt64(slotIndex); + if (entry.CacheIndex >= 0) + { + if (entry.IsFirstWrite) + { + entry.IsFirstWrite = false; + cachedObjectCacheIndex = entry.CacheIndex; + } + else + { + context.WriteByte(BinaryTypeCode.ObjectRef); + context.WriteVarUInt((uint)entry.CacheIndex); + return; + } + } + } + break; + } + + case IdAccessorType.Guid: + { + var id = wrapper.RefIdGetterGuid!(value); + if (id != Guid.Empty && wrapper.TryGetEntryGuid(id, out var slotIndex)) + { + ref var entry = ref wrapper.GetEntryRefGuid(slotIndex); + if (entry.CacheIndex >= 0) + { + if (entry.IsFirstWrite) + { + entry.IsFirstWrite = false; + cachedObjectCacheIndex = entry.CacheIndex; + } + else + { + context.WriteByte(BinaryTypeCode.ObjectRef); + context.WriteVarUInt((uint)entry.CacheIndex); + return; + } + } + } + break; } } } - // Marker kiírása: UseMetadata → ObjectWithMetadata + inline metadata, egyébként Object + // Marker kiírása: + // - Cached object first occurrence: ObjectRefFirst/ObjectWithMetadataRefFirst + cacheIndex + // - Non-cached: Object/ObjectWithMetadata if (context.UseMetadata) { - context.WriteByte(BinaryTypeCode.ObjectWithMetadata); + if (cachedObjectCacheIndex >= 0) + { + context.WriteByte(BinaryTypeCode.ObjectWithMetadataRefFirst); + context.WriteVarUInt((uint)cachedObjectCacheIndex); + } + else + { + context.WriteByte(BinaryTypeCode.ObjectWithMetadata); + } context.WriteInlineMetadata(wrapper.Metadata, isFirstMetadataOccurrence); } else { - context.WriteByte(BinaryTypeCode.Object); + if (cachedObjectCacheIndex >= 0) + { + context.WriteByte(BinaryTypeCode.ObjectRefFirst); + context.WriteVarUInt((uint)cachedObjectCacheIndex); + } + else + { + context.WriteByte(BinaryTypeCode.Object); + } } // Write all properties (startIndex=0, including Id for IId types) diff --git a/AyCode.Core/Serializers/IdentityMap.cs b/AyCode.Core/Serializers/IdentityMap.cs index 682774a..8a9ded6 100644 --- a/AyCode.Core/Serializers/IdentityMap.cs +++ b/AyCode.Core/Serializers/IdentityMap.cs @@ -28,10 +28,12 @@ public enum IdAccessorType : byte /// public struct InternEntry { - /// Position in stream where the value was first written. - public int StreamPosition; - /// Dense cache index (0, 1, 2, ...) assigned at 2nd occurrence. -1 = first occurrence only. + /// Order of first occurrence during scan pass (0, 1, 2, ...). Used for CacheIndex assignment. + public int FirstIndex; + /// Dense cache index (0, 1, 2, ...) assigned after scan pass. -1 = not cached, 0+ = cache index. public int CacheIndex; + /// True if this is the first serialize (write *First marker), false after (write *Ref + index). + public bool IsFirstWrite; } /// @@ -104,6 +106,36 @@ public sealed class IdentityMap : IIdentityMap where TKey : notnul return TryAddHash(key, out slotIndex); } + /// + /// Lookup only - returns true if key exists, with slotIndex for ref access. + /// Does NOT add the key if not found. + /// Use for serialize pass after scan pass has populated the map. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public bool TryGetEntry(TKey key, out int slotIndex) + { + if (_buckets == null) + { + slotIndex = -1; + return false; + } + + var hash = GetHashCode(key); + var bucketIdx = (hash & 0x7FFFFFFF) % _bucketsLength; + + for (var i = _buckets[bucketIdx]; i >= 0; i = _entries![i].Next) + { + if (KeyEquals(_keys![i], key)) + { + slotIndex = i; + return true; + } + } + + slotIndex = -1; + return false; + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] private static bool KeyEquals(TKey a, TKey b) { @@ -260,6 +292,16 @@ public sealed class IdentityMap : IIdentityMap where TKey : notnul return ref _entries![index].Value; } + /// + /// Returns the key at the given sequential index (0..Count-1). + /// For debugging/iteration over all entries. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public TKey GetKeyAt(int index) + { + return _keys![index]; + } + /// /// Resets the identity map for reuse. /// Small arrays (≤ InitialHashCapacity*5): keep and clear (faster than pool round-trip). diff --git a/AyCode.Core/Serializers/SerializationContextBase.cs b/AyCode.Core/Serializers/SerializationContextBase.cs index b96360b..931ad18 100644 --- a/AyCode.Core/Serializers/SerializationContextBase.cs +++ b/AyCode.Core/Serializers/SerializationContextBase.cs @@ -20,37 +20,36 @@ public abstract class SerializationContextBase : AcSerializ /// /// Tries to track an object with int RefId. - /// Returns true if first occurrence, false if already seen (cacheIndex assigned). - /// Uses shared nextCacheIndex counter (shared with string interning). + /// Returns true if first occurrence, false if already seen. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool TryTrack(TypeMetadataWrapper wrapper, object obj, int streamPosition, ref int nextCacheIndex, out int cacheIndex) + public bool TryTrack(TypeMetadataWrapper wrapper, object obj, int firstIndex, out int cacheIndex) { Debug.Assert(wrapper.Metadata.IdAccessorType == IdAccessorType.Int32); var id = wrapper.RefIdGetterInt32!(obj); - return wrapper.TryTrackInt32(id, streamPosition, ref nextCacheIndex, out cacheIndex); + return wrapper.TryTrackInt32(id, firstIndex, out cacheIndex); } /// /// Tries to track an object with long RefId. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool TryTrackLong(TypeMetadataWrapper wrapper, object obj, int streamPosition, ref int nextCacheIndex, out int cacheIndex) + public bool TryTrackLong(TypeMetadataWrapper wrapper, object obj, int firstIndex, out int cacheIndex) { Debug.Assert(wrapper.Metadata.IdAccessorType == IdAccessorType.Int64); var id = wrapper.RefIdGetterInt64!(obj); - return wrapper.TryTrackInt64(id, streamPosition, ref nextCacheIndex, out cacheIndex); + return wrapper.TryTrackInt64(id, firstIndex, out cacheIndex); } /// /// Tries to track an object with Guid RefId. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool TryTrackGuid(TypeMetadataWrapper wrapper, object obj, int streamPosition, ref int nextCacheIndex, out int cacheIndex) + public bool TryTrackGuid(TypeMetadataWrapper wrapper, object obj, int firstIndex, out int cacheIndex) { Debug.Assert(wrapper.Metadata.IdAccessorType == IdAccessorType.Guid); var id = wrapper.RefIdGetterGuid!(obj); - return wrapper.TryTrackGuid(id, streamPosition, ref nextCacheIndex, out cacheIndex); + return wrapper.TryTrackGuid(id, firstIndex, out cacheIndex); } #endregion diff --git a/AyCode.Core/Serializers/TypeMetadataWrapper.cs b/AyCode.Core/Serializers/TypeMetadataWrapper.cs index 827e1c4..b1f5641 100644 --- a/AyCode.Core/Serializers/TypeMetadataWrapper.cs +++ b/AyCode.Core/Serializers/TypeMetadataWrapper.cs @@ -140,27 +140,28 @@ public sealed class TypeMetadataWrapper where TMetadata : TypeMetadat /// /// Tries to track Int32 Id. Returns true if first occurrence. - /// On repeat: assigns CacheIndex from shared counter, returns cacheIndex. + /// On 2+ occurrence: marks as cached (-2), returns existing CacheIndex. + /// CacheIndex is assigned later by AssignCacheIndicesInOrder(). /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool TryTrackInt32(int id, int streamPosition, ref int nextCacheIndex, out int cacheIndex) + public bool TryTrackInt32(int id, int firstIndex, out int cacheIndex) { if (id == 0) { cacheIndex = -1; return true; } // Default Id - no tracking var map = IdentityMapInt32 ??= new IdentityMap(); if (!map.TryAdd(id, out var slotIndex)) { - // 2+ occurrence: assign CacheIndex if first repeat + // 2+ occurrence: mark as cached ref var entry = ref map.GetValueRef(slotIndex); - if (entry.CacheIndex < 0) - entry.CacheIndex = nextCacheIndex++; + if (entry.CacheIndex == -1) + entry.CacheIndex = -2; // -2 = cached, pending assignment cacheIndex = entry.CacheIndex; return false; } - // 1st occurrence: store stream position + // 1st occurrence: store FirstIndex ref var newEntry = ref map.GetValueRef(slotIndex); - newEntry.StreamPosition = streamPosition; + newEntry.FirstIndex = firstIndex; newEntry.CacheIndex = -1; cacheIndex = -1; return true; @@ -170,7 +171,7 @@ public sealed class TypeMetadataWrapper where TMetadata : TypeMetadat /// Tries to track Int64 Id. Returns true if first occurrence. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool TryTrackInt64(long id, int streamPosition, ref int nextCacheIndex, out int cacheIndex) + public bool TryTrackInt64(long id, int firstIndex, out int cacheIndex) { if (id == 0) { cacheIndex = -1; return true; } @@ -178,14 +179,14 @@ public sealed class TypeMetadataWrapper where TMetadata : TypeMetadat if (!map.TryAdd(id, out var slotIndex)) { ref var entry = ref map.GetValueRef(slotIndex); - if (entry.CacheIndex < 0) - entry.CacheIndex = nextCacheIndex++; + if (entry.CacheIndex == -1) + entry.CacheIndex = -2; cacheIndex = entry.CacheIndex; return false; } ref var newEntry = ref map.GetValueRef(slotIndex); - newEntry.StreamPosition = streamPosition; + newEntry.FirstIndex = firstIndex; newEntry.CacheIndex = -1; cacheIndex = -1; return true; @@ -195,7 +196,7 @@ public sealed class TypeMetadataWrapper where TMetadata : TypeMetadat /// Tries to track Guid Id. Returns true if first occurrence. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool TryTrackGuid(Guid id, int streamPosition, ref int nextCacheIndex, out int cacheIndex) + public bool TryTrackGuid(Guid id, int firstIndex, out int cacheIndex) { if (id == Guid.Empty) { cacheIndex = -1; return true; } @@ -203,18 +204,83 @@ public sealed class TypeMetadataWrapper where TMetadata : TypeMetadat if (!map.TryAdd(id, out var slotIndex)) { ref var entry = ref map.GetValueRef(slotIndex); - if (entry.CacheIndex < 0) - entry.CacheIndex = nextCacheIndex++; + if (entry.CacheIndex == -1) + entry.CacheIndex = -2; cacheIndex = entry.CacheIndex; return false; } ref var newEntry = ref map.GetValueRef(slotIndex); - newEntry.StreamPosition = streamPosition; + newEntry.FirstIndex = firstIndex; newEntry.CacheIndex = -1; cacheIndex = -1; return true; } #endregion + + #region Lookup for Serialize Pass (after scan) + + /// + /// Lookup Int32 Id entry. Returns ref to InternEntry for IsFirstWrite check. + /// Use in serialize pass after scan pass has populated the map. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public bool TryGetEntryInt32(int id, out int slotIndex) + { + if (id == 0 || IdentityMapInt32 == null) + { + slotIndex = -1; + return false; + } + return IdentityMapInt32.TryGetEntry(id, out slotIndex); + } + + /// + /// Lookup Int64 Id entry. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public bool TryGetEntryInt64(long id, out int slotIndex) + { + if (id == 0 || IdentityMapInt64 == null) + { + slotIndex = -1; + return false; + } + return IdentityMapInt64.TryGetEntry(id, out slotIndex); + } + + /// + /// Lookup Guid Id entry. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public bool TryGetEntryGuid(Guid id, out int slotIndex) + { + if (id == Guid.Empty || IdentityMapGuid == null) + { + slotIndex = -1; + return false; + } + return IdentityMapGuid.TryGetEntry(id, out slotIndex); + } + + /// + /// Get ref to InternEntry at slot for Int32 map. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public ref InternEntry GetEntryRefInt32(int slotIndex) => ref IdentityMapInt32!.GetValueRef(slotIndex); + + /// + /// Get ref to InternEntry at slot for Int64 map. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public ref InternEntry GetEntryRefInt64(int slotIndex) => ref IdentityMapInt64!.GetValueRef(slotIndex); + + /// + /// Get ref to InternEntry at slot for Guid map. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public ref InternEntry GetEntryRefGuid(int slotIndex) => ref IdentityMapGuid!.GetValueRef(slotIndex); + + #endregion }