Two-pass serialization for string/object interning

Refactor binary serializer to use a true two-pass process for string interning and object reference tracking. Adds a scan pass to identify duplicates and assigns cache indices deterministically in first-occurrence order. Updates wire format to write explicit cache indices after *First markers. Refactors InternEntry, removes marker rewriting, and updates deserializer to match new format. Improves performance, correctness, and robustness for complex object graphs with shared references and repeated strings.
This commit is contained in:
Loretta 2026-02-06 09:55:28 +01:00
parent a87dc37b8b
commit e5d4b1091f
9 changed files with 621 additions and 171 deletions

View File

@ -91,6 +91,13 @@ public abstract class AcSerializerContextBase<TMetadata, TOptions>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public Dictionary<Type, TypeMetadataWrapper<TMetadata>>.ValueCollection GetWrappers() => _wrappers.Values;
/// <summary>
/// Returns wrappers as span for direct indexed access without allocation.
/// Uses CollectionsMarshal for zero-copy access to dictionary values.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public int GetWrapperCount() => _wrappers.Count;
#endregion
#region Reset

View File

@ -516,6 +516,16 @@ public static partial class AcBinaryDeserializer
_internCache![_nextCacheIndex++] = value;
}
/// <summary>
/// Registers an interned value at a specific cache index.
/// Used when the serializer writes explicit cache indices (non-sequential).
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void RegisterInternedValueAt(int cacheIndex, object value)
{
_internCache![cacheIndex] = value;
}
/// <summary>
/// Gets an interned string by cache index (StringInterned type code).
/// </summary>

View File

@ -60,7 +60,7 @@ public static partial class AcBinaryDeserializer
RegisterReader(BinaryTypeCode.String, static (ref BinaryDeserializationContext ctx, Type _, int _) => ReadPlainString(ref ctx));
RegisterReader(BinaryTypeCode.StringInterned, static (ref BinaryDeserializationContext ctx, Type _, int _) => ctx.GetInternedString((int)ctx.ReadVarUInt()));
RegisterReader(BinaryTypeCode.StringEmpty, static (ref BinaryDeserializationContext _, Type _, int _) => string.Empty);
// StringInternFirst: first occurrence of interned string - read content + register in cache
// StringInternFirst: first occurrence of interned string - read cacheIndex + content + register in cache
RegisterReader(BinaryTypeCode.StringInternFirst, static (ref BinaryDeserializationContext ctx, Type _, int _) =>
ReadAndRegisterInternedString(ref ctx));
RegisterReader(BinaryTypeCode.DateTime, static (ref BinaryDeserializationContext ctx, Type _, int _) => ctx.ReadDateTimeUnsafe());
@ -798,15 +798,17 @@ public static partial class AcBinaryDeserializer
}
/// <summary>
/// Read interned string (StringInternFirst marker) and register in cache.
/// Read interned string (StringInternFirst marker) and register in cache at specified index.
/// Wire format: [StringInternFirst][VarUInt cacheIndex][VarUInt length][UTF8 bytes]
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static string ReadAndRegisterInternedString(ref BinaryDeserializationContext context)
{
var cacheIndex = (int)context.ReadVarUInt();
var length = (int)context.ReadVarUInt();
if (length == 0) return string.Empty;
var str = context.ReadStringUtf8(length);
context.RegisterNextInternedValue(str);
context.RegisterInternedValueAt(cacheIndex, str);
return str;
}
@ -969,23 +971,25 @@ public static partial class AcBinaryDeserializer
/// </summary>
private static object? ReadObject(ref BinaryDeserializationContext context, Type targetType, int depth)
{
return ReadObjectCore(ref context, targetType, depth, registerInCache: false);
return ReadObjectCore(ref context, targetType, depth, cacheIndex: -1);
}
/// <summary>
/// Object olvasása első előforduláskor (ObjectRefFirst marker).
/// Wire format: [ObjectRefFirst][props...]
/// Az objektumot regisztráljuk a cache-be.
/// Wire format: [ObjectRefFirst][VarUInt cacheIndex][props...]
/// Az objektumot regisztráljuk a cache-be a megadott index-re.
/// </summary>
private static object? ReadObjectRefFirst(ref BinaryDeserializationContext context, Type targetType, int depth)
{
return ReadObjectCore(ref context, targetType, depth, registerInCache: true);
var cacheIndex = (int)context.ReadVarUInt();
return ReadObjectCore(ref context, targetType, depth, cacheIndex: cacheIndex);
}
/// <summary>
/// Object olvasás core implementáció.
/// </summary>
private static object? ReadObjectCore(ref BinaryDeserializationContext context, Type targetType, int depth, bool registerInCache)
/// <param name="cacheIndex">-1 = not cached, 0+ = register at this cache index</param>
private static object? ReadObjectCore(ref BinaryDeserializationContext context, Type targetType, int depth, int cacheIndex)
{
// Handle dictionary types
if (IsDictionaryType(targetType, out var keyType, out var valueType))
@ -999,9 +1003,9 @@ public static partial class AcBinaryDeserializer
var instance = CreateInstance(targetType, metadata);
if (instance == null) return null;
if (registerInCache)
if (cacheIndex >= 0)
{
context.RegisterNextInternedValue(instance);
context.RegisterInternedValueAt(cacheIndex, instance);
}
PopulateObject(ref context, instance, wrapper, depth, skipDefaultWrite: true);
@ -1032,22 +1036,25 @@ public static partial class AcBinaryDeserializer
/// </summary>
private static object? ReadObjectWithMetadata(ref BinaryDeserializationContext context, Type targetType, int depth)
{
return ReadObjectWithMetadataCore(ref context, targetType, depth, registerInCache: false);
return ReadObjectWithMetadataCore(ref context, targetType, depth, cacheIndex: -1);
}
/// <summary>
/// Object olvasása UseMetadata módban, első tracked előfordulás (ObjectWithMetadataRefFirst marker).
/// Az objektumot regisztráljuk a cache-be.
/// Wire format: [ObjectWithMetadataRefFirst][VarUInt cacheIndex][propNameHash (4b)][...][props...]
/// Az objektumot regisztráljuk a cache-be a megadott index-re.
/// </summary>
private static object? ReadObjectWithMetadataRefFirst(ref BinaryDeserializationContext context, Type targetType, int depth)
{
return ReadObjectWithMetadataCore(ref context, targetType, depth, registerInCache: true);
var cacheIndex = (int)context.ReadVarUInt();
return ReadObjectWithMetadataCore(ref context, targetType, depth, cacheIndex: cacheIndex);
}
/// <summary>
/// ObjectWithMetadata olvasás core implementáció.
/// </summary>
private static object? ReadObjectWithMetadataCore(ref BinaryDeserializationContext context, Type targetType, int depth, bool registerInCache)
/// <param name="cacheIndex">-1 = not cached, 0+ = register at this cache index</param>
private static object? ReadObjectWithMetadataCore(ref BinaryDeserializationContext context, Type targetType, int depth, int cacheIndex)
{
// Inline metadata: propNameHash mindig jön
var propNameHash = context.ReadInt32Raw();
@ -1077,9 +1084,9 @@ public static partial class AcBinaryDeserializer
var instance = CreateInstance(targetType, metadata);
if (instance == null) return null;
if (registerInCache)
if (cacheIndex >= 0)
{
context.RegisterNextInternedValue(instance);
context.RegisterInternedValueAt(cacheIndex, instance);
}
// CacheMap felépítése ha még nincs
@ -1471,11 +1478,14 @@ public static partial class AcBinaryDeserializer
SkipObjectRefFirst(ref context, metaData);
return;
case BinaryTypeCode.ObjectWithMetadata:
SkipObjectWithMetadata(ref context, metaData, registerInCache: false);
SkipObjectWithMetadata(ref context, metaData, cacheIndex: -1);
return;
case BinaryTypeCode.ObjectWithMetadataRefFirst:
SkipObjectWithMetadata(ref context, metaData, registerInCache: true);
{
var cacheIdx = (int)context.ReadVarUInt();
SkipObjectWithMetadata(ref context, metaData, cacheIndex: cacheIdx);
return;
}
case BinaryTypeCode.ObjectRef:
context.ReadVarUInt();
return;
@ -1502,24 +1512,28 @@ public static partial class AcBinaryDeserializer
}
/// <summary>
/// Skip an interned string (StringInternFirst) - must still register in cache.
/// Skip an interned string (StringInternFirst) - must still read cacheIndex and register in cache.
/// Wire format: [StringInternFirst][VarUInt cacheIndex][VarUInt length][UTF8 bytes]
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void SkipAndRegisterInternedString(ref BinaryDeserializationContext context)
{
var cacheIndex = (int)context.ReadVarUInt();
var byteLen = (int)context.ReadVarUInt();
if (byteLen == 0) return;
var str = context.ReadStringUtf8(byteLen);
context.RegisterNextInternedValue(str);
context.RegisterInternedValueAt(cacheIndex, str);
}
/// <summary>
/// Skip ObjectRefFirst - must register placeholder in cache.
/// Skip ObjectRefFirst - must read cacheIndex and register placeholder in cache.
/// Wire format: [ObjectRefFirst][VarUInt cacheIndex][props...]
/// </summary>
private static void SkipObjectRefFirst(ref BinaryDeserializationContext context, BinaryDeserializeTypeMetadata metaData)
{
var cacheIndex = (int)context.ReadVarUInt();
// Register placeholder (stream position as boxed int for potential lazy load)
context.RegisterNextInternedValue(context.Position);
context.RegisterInternedValueAt(cacheIndex, context.Position);
SkipObject(ref context, metaData);
}
@ -1554,12 +1568,13 @@ public static partial class AcBinaryDeserializer
/// <summary>
/// Skip ObjectWithMetadata/ObjectWithMetadataRefFirst.
/// </summary>
private static void SkipObjectWithMetadata(ref BinaryDeserializationContext context, BinaryDeserializeTypeMetadata metaData, bool registerInCache)
/// <param name="cacheIndex">-1 = not cached, 0+ = register at this cache index</param>
private static void SkipObjectWithMetadata(ref BinaryDeserializationContext context, BinaryDeserializeTypeMetadata metaData, int cacheIndex)
{
if (registerInCache)
if (cacheIndex >= 0)
{
// Register placeholder for potential lazy load
context.RegisterNextInternedValue(context.Position);
context.RegisterInternedValueAt(cacheIndex, context.Position);
}
var propNameHash = context.ReadInt32Raw();

View File

@ -97,6 +97,18 @@ public static partial class AcBinarySerializer
private IdentityMap<string, InternEntry>? _stringInternMap;
private int _nextCacheIndex; // Next dense cache index to assign
private int _nextFirstIndex; // Next first occurrence index to assign (scan pass)
/// <summary>
/// Next first occurrence index for scan pass. Direct access for performance.
/// </summary>
public int NextFirstIndex
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get => _nextFirstIndex;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
set => _nextFirstIndex = value;
}
private int[]? _propertyIndexBuffer;
private byte[]? _propertyStateBuffer;
@ -118,6 +130,12 @@ public static partial class AcBinarySerializer
// These properties delegate to Options for convenience
public bool UseStringInterning => Options.UseStringInterning != StringInterningMode.None;
public bool IsValidForInterningString(int strLength)
{
return strLength >= MinStringInternLength && (MaxStringInternLength == 0 || strLength <= MaxStringInternLength);
}
/// <summary>
/// True if we have interning/ref tracking (cache count needed in header).
/// </summary>
@ -173,6 +191,7 @@ public static partial class AcBinarySerializer
//_refTracker.Reset();
_stringInternMap?.Reset();
_nextCacheIndex = 0;
_nextFirstIndex = 0;
if (_propertyIndexBuffer != null && _propertyIndexBuffer.Length > PropertyIndexBufferMaxCache)
{
@ -216,37 +235,50 @@ public static partial class AcBinarySerializer
#region String Interning
/// <summary>
/// Tries to intern a string. Returns true if string was seen before (write index).
/// Returns false if first occurrence (write inline).
/// Stores marker position for later rewriting (marker-based interning, no footer).
/// Serialize pass: looks up interned string state.
/// Returns the entry ref for caller to check IsFirstWrite and update it.
/// </summary>
/// <param name="value">The string value to intern</param>
/// <param name="markerPosition">Position of the type code marker (for rewriting)</param>
/// <param name="cacheIndex">Output: cache index for 2+ occurrence, -1 for 1st occurrence</param>
/// <returns>True if 2+ occurrence (write cacheIndex), false if 1st occurrence (write inline)</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public bool TryGetInternedString(string value, int markerPosition, out int cacheIndex)
public ref InternEntry GetInternedStringEntry(string value, out bool found)
{
if (_stringInternMap == null)
{
found = false;
return ref System.Runtime.CompilerServices.Unsafe.NullRef<InternEntry>();
}
if (_stringInternMap.TryAdd(value, out var slotIndex))
{
// Not in map (shouldn't happen after scan pass for cached strings)
found = false;
return ref _stringInternMap.GetValueRef(slotIndex);
}
found = true;
return ref _stringInternMap.GetValueRef(slotIndex);
}
/// <summary>
/// Scan pass: tracks a string for interning. Marks as cached on 2nd occurrence.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void ScanInternString(string value)
{
_stringInternMap ??= new IdentityMap<string, InternEntry>();
if (!_stringInternMap.TryAdd(value, out var slotIndex))
{
// 2+ occurrence: assign CacheIndex if first repeat
// 2+ occurrence: mark as cached
ref var entry = ref _stringInternMap.GetValueRef(slotIndex);
if (entry.CacheIndex < 0)
{
entry.CacheIndex = _nextCacheIndex++;
}
cacheIndex = entry.CacheIndex;
return true;
if (entry.CacheIndex == -1)
entry.CacheIndex = -2; // -2 = cached, pending CacheIndex assignment
return;
}
// 1st occurrence: store marker position for later rewriting
// 1st occurrence: store FirstIndex
ref var newEntry = ref _stringInternMap.GetValueRef(slotIndex);
newEntry.StreamPosition = markerPosition;
newEntry.CacheIndex = -1; // Not assigned until 2nd occurrence
cacheIndex = -1;
return false;
newEntry.FirstIndex = _nextFirstIndex++;
newEntry.CacheIndex = -1;
}
/// <summary>
@ -260,78 +292,185 @@ public static partial class AcBinarySerializer
public int GetCacheCount() => _nextCacheIndex;
/// <summary>
/// Rewrites markers for all entries with CacheIndex >= 0.
/// Called at end of serialization to mark first occurrences of interned/tracked values.
/// String: String → StringInternFirst
/// Object: Object → ObjectRefFirst, ObjectWithMetadata → ObjectWithMetadataRefFirst
///
/// IMPORTANT: CacheIndex must be reassigned in StreamPosition order, because
/// deserializer registers values sequentially as it encounters *First markers.
/// Assigns CacheIndex values in FirstIndex order after scan pass.
/// Collects all cached entries (CacheIndex == -2), sorts by FirstIndex, assigns 0, 1, 2...
/// Optimized: single pass collection, no allocations for wrapper iteration.
/// </summary>
public void RewriteMarkers()
public void AssignCacheIndicesInOrder()
{
if (_nextCacheIndex == 0) return;
// Fast path: no caching at all
if (_stringInternMap == null && !HasAnyIdentityMap())
{
return;
}
// Collect all first-occurrence positions that need markers
Span<int> positions = _nextCacheIndex <= 64
? stackalloc int[_nextCacheIndex]
: new int[_nextCacheIndex];
// Count cached entries in single pass
var cachedCount = CountAllCachedEntries();
if (cachedCount == 0)
return;
// Collect entries for sorting
Span<(int SlotIndex, int FirstIndex, int MapType)> entries = cachedCount <= 64
? stackalloc (int, int, int)[cachedCount]
: new (int, int, int)[cachedCount];
var idx = 0;
// 1. String intern entries
// Collect from string intern map (mapType = 0)
if (_stringInternMap != null)
{
var count = _stringInternMap.Count;
for (var i = 0; i < count; i++)
{
ref var entry = ref _stringInternMap.GetValueRefAt(i);
if (entry.CacheIndex >= 0)
if (entry.CacheIndex == -2)
entries[idx++] = (i, entry.FirstIndex, 0);
}
}
// Collect from wrapper identity maps - use foreach, no allocation
var wrapperIdx = 1;
foreach (var wrapper in GetWrappers())
{
var baseMapType = wrapperIdx * 3;
CollectCachedEntries(wrapper.IdentityMapInt32, baseMapType + 0, ref entries, ref idx);
CollectCachedEntries(wrapper.IdentityMapInt64, baseMapType + 1, ref entries, ref idx);
CollectCachedEntries(wrapper.IdentityMapGuid, baseMapType + 2, ref entries, ref idx);
wrapperIdx++;
}
// Sort by FirstIndex
var usedEntries = entries.Slice(0, idx);
usedEntries.Sort((a, b) => a.FirstIndex.CompareTo(b.FirstIndex));
// Assign CacheIndex in sorted order
for (var i = 0; i < idx; i++)
{
var (slotIndex, _, mapType) = usedEntries[i];
if (mapType == 0)
{
ref var entry = ref _stringInternMap!.GetValueRefAt(slotIndex);
entry.CacheIndex = _nextCacheIndex++;
entry.IsFirstWrite = true;
}
else
{
// Find wrapper by index
var wrapperIndex = mapType / 3 - 1;
var mapIndex = mapType % 3;
var wIdx = 0;
foreach (var wrapper in GetWrappers())
{
positions[idx++] = entry.StreamPosition;
if (wIdx == wrapperIndex)
{
switch (mapIndex)
{
case 0:
ref var entry32 = ref wrapper.IdentityMapInt32!.GetValueRefAt(slotIndex);
entry32.CacheIndex = _nextCacheIndex++;
entry32.IsFirstWrite = true;
break;
case 1:
ref var entry64 = ref wrapper.IdentityMapInt64!.GetValueRefAt(slotIndex);
entry64.CacheIndex = _nextCacheIndex++;
entry64.IsFirstWrite = true;
break;
case 2:
ref var entryGuid = ref wrapper.IdentityMapGuid!.GetValueRefAt(slotIndex);
entryGuid.CacheIndex = _nextCacheIndex++;
entryGuid.IsFirstWrite = true;
break;
}
break;
}
wIdx++;
}
}
}
// 2. ID tracking entries from all wrappers
foreach (var wrapper in GetWrappers())
#if DEBUG
// DEBUG: Print string intern map contents
if (_stringInternMap != null)
{
CollectPositions(wrapper.IdentityMapInt32, ref positions, ref idx);
CollectPositions(wrapper.IdentityMapInt64, ref positions, ref idx);
CollectPositions(wrapper.IdentityMapGuid, ref positions, ref idx);
}
// Sort by position to match deserializer's sequential registration order
var usedPositions = positions.Slice(0, idx);
usedPositions.Sort();
// Rewrite markers at sorted positions
for (var i = 0; i < idx; i++)
{
var pos = usedPositions[i];
var currentMarker = _buffer[pos];
_buffer[pos] = currentMarker switch
Console.WriteLine($"\n=== AssignCacheIndicesInOrder completed ===");
Console.WriteLine($"Total strings in map: {_stringInternMap.Count}");
Console.WriteLine($"Total cached (CacheIndex >= 0): {cachedCount}");
Console.WriteLine($"NextCacheIndex: {_nextCacheIndex}");
Console.WriteLine("String entries:");
for (var i = 0; i < _stringInternMap.Count; i++)
{
BinaryTypeCode.String => BinaryTypeCode.StringInternFirst,
BinaryTypeCode.Object => BinaryTypeCode.ObjectRefFirst,
BinaryTypeCode.ObjectWithMetadata => BinaryTypeCode.ObjectWithMetadataRefFirst,
_ => currentMarker
};
ref var entry = ref _stringInternMap.GetValueRefAt(i);
var key = _stringInternMap.GetKeyAt(i);
Console.WriteLine($" [{i}] Key=\"{key}\" FirstIndex={entry.FirstIndex} CacheIndex={entry.CacheIndex} IsFirstWrite={entry.IsFirstWrite}");
}
Console.WriteLine();
}
#endif
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void CollectPositions<TKey>(IdentityMap<TKey, InternEntry>? map, ref Span<int> positions, ref int idx) where TKey : notnull
private bool HasAnyIdentityMap()
{
foreach (var wrapper in GetWrappers())
{
if (wrapper.IdentityMapInt32 != null || wrapper.IdentityMapInt64 != null || wrapper.IdentityMapGuid != null)
return true;
}
return false;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private int CountAllCachedEntries()
{
var cachedCount = 0;
if (_stringInternMap != null)
{
var count = _stringInternMap.Count;
for (var i = 0; i < count; i++)
{
ref var entry = ref _stringInternMap.GetValueRefAt(i);
if (entry.CacheIndex == -2)
cachedCount++;
}
}
foreach (var wrapper in GetWrappers())
{
cachedCount += CountCachedEntries(wrapper.IdentityMapInt32);
cachedCount += CountCachedEntries(wrapper.IdentityMapInt64);
cachedCount += CountCachedEntries(wrapper.IdentityMapGuid);
}
return cachedCount;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int CountCachedEntries<TKey>(IdentityMap<TKey, InternEntry>? map) where TKey : notnull
{
if (map == null) return 0;
var count = 0;
var mapCount = map.Count;
for (var i = 0; i < mapCount; i++)
{
ref var entry = ref map.GetValueRefAt(i);
if (entry.CacheIndex == -2)
count++;
}
return count;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void CollectCachedEntries<TKey>(
IdentityMap<TKey, InternEntry>? map,
int mapType,
ref Span<(int SlotIndex, int FirstIndex, int MapType)> entries,
ref int idx) where TKey : notnull
{
if (map == null) return;
var count = map.Count;
for (var i = 0; i < count; i++)
{
ref var entry = ref map.GetValueRefAt(i);
if (entry.CacheIndex >= 0)
{
positions[idx++] = entry.StreamPosition;
}
if (entry.CacheIndex == -2)
entries[idx++] = (i, entry.FirstIndex, mapType);
}
}
@ -340,13 +479,13 @@ public static partial class AcBinarySerializer
#region Object Reference Tracking (IId + Non-IId)
/// <summary>
/// Tries to track an IId object (Int32 Id). Uses shared _nextCacheIndex with string interning.
/// Returns true if first occurrence, false if already seen (cacheIndex assigned).
/// Tries to track an IId object (Int32 Id).
/// Returns true if first occurrence, false if already seen.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public bool TryTrackObject(TypeMetadataWrapper<BinarySerializeTypeMetadata> wrapper, object obj, out int cacheIndex)
{
return TryTrack(wrapper, obj, _position, ref _nextCacheIndex, out cacheIndex);
return TryTrack(wrapper, obj, _nextFirstIndex++, out cacheIndex);
}
/// <summary>
@ -355,7 +494,7 @@ public static partial class AcBinarySerializer
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public bool TryTrackObjectLong(TypeMetadataWrapper<BinarySerializeTypeMetadata> wrapper, object obj, out int cacheIndex)
{
return TryTrackLong(wrapper, obj, _position, ref _nextCacheIndex, out cacheIndex);
return TryTrackLong(wrapper, obj, _nextFirstIndex++, out cacheIndex);
}
/// <summary>
@ -364,7 +503,7 @@ public static partial class AcBinarySerializer
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public bool TryTrackObjectGuid(TypeMetadataWrapper<BinarySerializeTypeMetadata> wrapper, object obj, out int cacheIndex)
{
return TryTrackGuid(wrapper, obj, _position, ref _nextCacheIndex, out cacheIndex);
return TryTrackGuid(wrapper, obj, _nextFirstIndex++, out cacheIndex);
}
#endregion
@ -1037,7 +1176,7 @@ public static partial class AcBinarySerializer
var cacheCount = GetCacheCount();
// Rewrite markers for first occurrences (String→StringInternFirst, Object→ObjectRefFirst, etc.)
RewriteMarkers();
//RewriteMarkers();
// Write header
var flags = BinaryTypeCode.HeaderFlagsBase;

View File

@ -0,0 +1,102 @@
using System.Collections;
using static AyCode.Core.Helpers.JsonUtilities;
namespace AyCode.Core.Serializers.Binaries;
public static partial class AcBinarySerializer
{
/// <summary>
/// First pass: scans object graph to identify duplicates (strings + objects).
/// Only traverses reference properties (complex types + strings).
/// Stops traversing an object after its 2nd occurrence.
/// After scan: assigns CacheIndex in FirstIndex order.
/// </summary>
private static void ScanForDuplicates(object value, Type type, BinarySerializationContext context)
{
if (!context.HasCaching)
return;
ScanValue(value, type, context, 0);
context.AssignCacheIndicesInOrder();
}
private static void ScanValue(object? value, Type type, BinarySerializationContext context, int depth)
{
if (value == null || depth > context.MaxDepth)
return;
// String → intern tracking (with length check to match serialize pass)
if (value is string str)
{
if (context.UseStringInterning && context.IsValidForInterningString(str.Length))
{
context.ScanInternString(str);
}
return;
}
// Skip primitives
if (IsPrimitiveOrStringFast(type))
return;
// Collection → iterate elements
if (value is IEnumerable enumerable)
{
var elementType = GetCollectionElementType(type) ?? typeof(object);
if (!IsPrimitiveOrStringFast(elementType) || elementType == typeof(string))
{
var nextDepth = depth + 1;
foreach (var item in enumerable)
{
if (item != null)
ScanValue(item, item.GetType(), context, nextDepth);
}
}
return;
}
// Object → ref tracking + recursive scan
var wrapper = context.GetWrapper(type);
var metadata = wrapper.Metadata;
// Reference tracking for IId types (or all types when ReferenceHandling == All)
if (context.UseTypeReferenceHandling(metadata))
{
// Direct tracking call - avoid extra indirection through context
bool isFirst;
switch (metadata.IdAccessorType)
{
case IdAccessorType.Int32:
var id32 = wrapper.RefIdGetterInt32!(value);
isFirst = wrapper.TryTrackInt32(id32, context.NextFirstIndex++, out _);
break;
case IdAccessorType.Int64:
var id64 = wrapper.RefIdGetterInt64!(value);
isFirst = wrapper.TryTrackInt64(id64, context.NextFirstIndex++, out _);
break;
case IdAccessorType.Guid:
var idGuid = wrapper.RefIdGetterGuid!(value);
isFirst = wrapper.TryTrackGuid(idGuid, context.NextFirstIndex++, out _);
break;
default:
isFirst = true;
break;
}
if (!isFirst)
return; // 2nd occurrence → skip children
}
// Recursive scan on reference properties only
var refProperties = metadata.ReferenceProperties;
var nextDepth2 = depth + 1;
for (var i = 0; i < refProperties.Length; i++)
{
var propValue = refProperties[i].GetValue(value);
if (propValue != null)
ScanValue(propValue, refProperties[i].PropertyType, context, nextDepth2);
}
}
}

View File

@ -381,11 +381,10 @@ public static partial class AcBinarySerializer
var context = BinarySerializationContextPool.Get(options);
context.WriteHeaderPlaceholder();
// Single-pass serialization with footer-based string interning
// - No header size estimation needed (strings go to footer)
// - No body shifting (footer is appended at the end)
// - Reference tracking happens inline via TryTrack during WriteObject
// - UseMetadata: per-type property hashes written to footer
// Two-pass serialization when caching is enabled:
// 1. Scan pass: identify duplicates (strings + objects), assign CacheIndex
// 2. Serialize pass: write data with references
ScanForDuplicates(value, runtimeType, context);
WriteValue(value, runtimeType, context, 0);
context.FinalizeHeaderSections();
@ -724,19 +723,35 @@ public static partial class AcBinarySerializer
&& value.Length >= context.MinStringInternLength
&& (context.MaxStringInternLength == 0 || value.Length <= context.MaxStringInternLength))
{
// Capture marker position BEFORE writing
var markerPosition = context.Position;
if (context.TryGetInternedString(value, markerPosition, out var index))
ref var interEntry = ref context.GetInternedStringEntry(value, out bool found);
if (found)
{
// 2+ occurrence: write index reference
context.WriteByte(BinaryTypeCode.StringInterned);
context.WriteVarUInt((uint)index);
return;
// String was seen in scan pass
if (interEntry.CacheIndex >= 0)
{
if (interEntry.IsFirstWrite)
{
// 1st serialize occurrence of a cached string - write StringInternFirst + cacheIndex + data
interEntry.IsFirstWrite = false;
context.WriteByte(BinaryTypeCode.StringInternFirst);
context.WriteVarUInt((uint)interEntry.CacheIndex);
context.WriteStringUtf8(value);
}
else
{
// 2+ serialize occurrence: write index reference
context.WriteByte(BinaryTypeCode.StringInterned);
context.WriteVarUInt((uint)interEntry.CacheIndex);
}
return;
}
// CacheIndex < 0 means string appeared only once in scan - write as plain string
}
#if DEBUG
context.OnStringInterned?.Invoke(context.CurrentPropertyPath, value);
#endif
// 1st occurrence: write String marker (will be rewritten to StringInternFirst if repeated)
// String not cached (single occurrence or not found) - write plain String
context.WriteByte(BinaryTypeCode.String);
context.WriteStringUtf8(value);
return;
@ -787,62 +802,117 @@ public static partial class AcBinarySerializer
isFirstMetadataOccurrence = context.RegisterMetadataType(wrapper);
}
// Reference handling: lookup entry from scan pass, check IsFirstWrite
var cachedObjectCacheIndex = -1; // -1 = not cached, 0+ = cache index for first write
if (context.UseTypeReferenceHandling(metadata))
{
if (metadata.IsIId)
// Lookup by Id (IId types) or by object identity hash (non-IId types)
// Both use IdAccessorType.Int32 - for non-IId, RefIdGetterInt32 returns RuntimeHelpers.GetHashCode
switch (metadata.IdAccessorType)
{
// IId típus: track by Id, ObjectRef writes cacheIndex
switch (metadata.IdAccessorType)
case IdAccessorType.Int32:
{
case IdAccessorType.Int32:
if (!context.TryTrackObject(wrapper, value, out int cacheIndex32))
var id = wrapper.RefIdGetterInt32!(value);
// For IId: skip default Id (0). For non-IId (hash): hash is never 0 for valid objects
if ((!metadata.IsIId || id != 0) && wrapper.TryGetEntryInt32(id, out var slotIndex))
{
ref var entry = ref wrapper.GetEntryRefInt32(slotIndex);
if (entry.CacheIndex >= 0)
{
context.WriteByte(BinaryTypeCode.ObjectRef);
context.WriteVarUInt((uint)cacheIndex32);
return;
if (entry.IsFirstWrite)
{
entry.IsFirstWrite = false;
cachedObjectCacheIndex = entry.CacheIndex;
}
else
{
// 2+ occurrence → write ObjectRef
context.WriteByte(BinaryTypeCode.ObjectRef);
context.WriteVarUInt((uint)entry.CacheIndex);
return;
}
}
break;
case IdAccessorType.Int64:
if (!context.TryTrackObjectLong(wrapper, value, out int cacheIndex64))
{
context.WriteByte(BinaryTypeCode.ObjectRef);
context.WriteVarUInt((uint)cacheIndex64);
return;
}
break;
case IdAccessorType.Guid:
if (!context.TryTrackObjectGuid(wrapper, value, out int cacheIndexGuid))
{
context.WriteByte(BinaryTypeCode.ObjectRef);
context.WriteVarUInt((uint)cacheIndexGuid);
return;
}
break;
}
break;
}
}
else
{
// Non-IId + RefHandling=All: track by hashcode
if (!context.TryTrackObject(wrapper, value, out int cacheIndexHash))
case IdAccessorType.Int64:
{
context.WriteByte(BinaryTypeCode.ObjectRef);
context.WriteVarUInt((uint)cacheIndexHash);
return;
var id = wrapper.RefIdGetterInt64!(value);
if (id != 0 && wrapper.TryGetEntryInt64(id, out var slotIndex))
{
ref var entry = ref wrapper.GetEntryRefInt64(slotIndex);
if (entry.CacheIndex >= 0)
{
if (entry.IsFirstWrite)
{
entry.IsFirstWrite = false;
cachedObjectCacheIndex = entry.CacheIndex;
}
else
{
context.WriteByte(BinaryTypeCode.ObjectRef);
context.WriteVarUInt((uint)entry.CacheIndex);
return;
}
}
}
break;
}
case IdAccessorType.Guid:
{
var id = wrapper.RefIdGetterGuid!(value);
if (id != Guid.Empty && wrapper.TryGetEntryGuid(id, out var slotIndex))
{
ref var entry = ref wrapper.GetEntryRefGuid(slotIndex);
if (entry.CacheIndex >= 0)
{
if (entry.IsFirstWrite)
{
entry.IsFirstWrite = false;
cachedObjectCacheIndex = entry.CacheIndex;
}
else
{
context.WriteByte(BinaryTypeCode.ObjectRef);
context.WriteVarUInt((uint)entry.CacheIndex);
return;
}
}
}
break;
}
}
}
// Marker kiírása: UseMetadata → ObjectWithMetadata + inline metadata, egyébként Object
// Marker kiírása:
// - Cached object first occurrence: ObjectRefFirst/ObjectWithMetadataRefFirst + cacheIndex
// - Non-cached: Object/ObjectWithMetadata
if (context.UseMetadata)
{
context.WriteByte(BinaryTypeCode.ObjectWithMetadata);
if (cachedObjectCacheIndex >= 0)
{
context.WriteByte(BinaryTypeCode.ObjectWithMetadataRefFirst);
context.WriteVarUInt((uint)cachedObjectCacheIndex);
}
else
{
context.WriteByte(BinaryTypeCode.ObjectWithMetadata);
}
context.WriteInlineMetadata(wrapper.Metadata, isFirstMetadataOccurrence);
}
else
{
context.WriteByte(BinaryTypeCode.Object);
if (cachedObjectCacheIndex >= 0)
{
context.WriteByte(BinaryTypeCode.ObjectRefFirst);
context.WriteVarUInt((uint)cachedObjectCacheIndex);
}
else
{
context.WriteByte(BinaryTypeCode.Object);
}
}
// Write all properties (startIndex=0, including Id for IId types)

View File

@ -28,10 +28,12 @@ public enum IdAccessorType : byte
/// </summary>
public struct InternEntry
{
/// <summary>Position in stream where the value was first written.</summary>
public int StreamPosition;
/// <summary>Dense cache index (0, 1, 2, ...) assigned at 2nd occurrence. -1 = first occurrence only.</summary>
/// <summary>Order of first occurrence during scan pass (0, 1, 2, ...). Used for CacheIndex assignment.</summary>
public int FirstIndex;
/// <summary>Dense cache index (0, 1, 2, ...) assigned after scan pass. -1 = not cached, 0+ = cache index.</summary>
public int CacheIndex;
/// <summary>True if this is the first serialize (write *First marker), false after (write *Ref + index).</summary>
public bool IsFirstWrite;
}
/// <summary>
@ -104,6 +106,36 @@ public sealed class IdentityMap<TKey, TValue> : IIdentityMap where TKey : notnul
return TryAddHash(key, out slotIndex);
}
/// <summary>
/// Lookup only - returns true if key exists, with slotIndex for ref access.
/// Does NOT add the key if not found.
/// Use for serialize pass after scan pass has populated the map.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public bool TryGetEntry(TKey key, out int slotIndex)
{
if (_buckets == null)
{
slotIndex = -1;
return false;
}
var hash = GetHashCode(key);
var bucketIdx = (hash & 0x7FFFFFFF) % _bucketsLength;
for (var i = _buckets[bucketIdx]; i >= 0; i = _entries![i].Next)
{
if (KeyEquals(_keys![i], key))
{
slotIndex = i;
return true;
}
}
slotIndex = -1;
return false;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static bool KeyEquals(TKey a, TKey b)
{
@ -260,6 +292,16 @@ public sealed class IdentityMap<TKey, TValue> : IIdentityMap where TKey : notnul
return ref _entries![index].Value;
}
/// <summary>
/// Returns the key at the given sequential index (0..Count-1).
/// For debugging/iteration over all entries.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public TKey GetKeyAt(int index)
{
return _keys![index];
}
/// <summary>
/// Resets the identity map for reuse.
/// Small arrays (≤ InitialHashCapacity*5): keep and clear (faster than pool round-trip).

View File

@ -20,37 +20,36 @@ public abstract class SerializationContextBase<TMetadata, TOptions> : AcSerializ
/// <summary>
/// Tries to track an object with int RefId.
/// Returns true if first occurrence, false if already seen (cacheIndex assigned).
/// Uses shared nextCacheIndex counter (shared with string interning).
/// Returns true if first occurrence, false if already seen.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public bool TryTrack(TypeMetadataWrapper<TMetadata> wrapper, object obj, int streamPosition, ref int nextCacheIndex, out int cacheIndex)
public bool TryTrack(TypeMetadataWrapper<TMetadata> wrapper, object obj, int firstIndex, out int cacheIndex)
{
Debug.Assert(wrapper.Metadata.IdAccessorType == IdAccessorType.Int32);
var id = wrapper.RefIdGetterInt32!(obj);
return wrapper.TryTrackInt32(id, streamPosition, ref nextCacheIndex, out cacheIndex);
return wrapper.TryTrackInt32(id, firstIndex, out cacheIndex);
}
/// <summary>
/// Tries to track an object with long RefId.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public bool TryTrackLong(TypeMetadataWrapper<TMetadata> wrapper, object obj, int streamPosition, ref int nextCacheIndex, out int cacheIndex)
public bool TryTrackLong(TypeMetadataWrapper<TMetadata> wrapper, object obj, int firstIndex, out int cacheIndex)
{
Debug.Assert(wrapper.Metadata.IdAccessorType == IdAccessorType.Int64);
var id = wrapper.RefIdGetterInt64!(obj);
return wrapper.TryTrackInt64(id, streamPosition, ref nextCacheIndex, out cacheIndex);
return wrapper.TryTrackInt64(id, firstIndex, out cacheIndex);
}
/// <summary>
/// Tries to track an object with Guid RefId.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public bool TryTrackGuid(TypeMetadataWrapper<TMetadata> wrapper, object obj, int streamPosition, ref int nextCacheIndex, out int cacheIndex)
public bool TryTrackGuid(TypeMetadataWrapper<TMetadata> wrapper, object obj, int firstIndex, out int cacheIndex)
{
Debug.Assert(wrapper.Metadata.IdAccessorType == IdAccessorType.Guid);
var id = wrapper.RefIdGetterGuid!(obj);
return wrapper.TryTrackGuid(id, streamPosition, ref nextCacheIndex, out cacheIndex);
return wrapper.TryTrackGuid(id, firstIndex, out cacheIndex);
}
#endregion

View File

@ -140,27 +140,28 @@ public sealed class TypeMetadataWrapper<TMetadata> where TMetadata : TypeMetadat
/// <summary>
/// Tries to track Int32 Id. Returns true if first occurrence.
/// On repeat: assigns CacheIndex from shared counter, returns cacheIndex.
/// On 2+ occurrence: marks as cached (-2), returns existing CacheIndex.
/// CacheIndex is assigned later by AssignCacheIndicesInOrder().
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public bool TryTrackInt32(int id, int streamPosition, ref int nextCacheIndex, out int cacheIndex)
public bool TryTrackInt32(int id, int firstIndex, out int cacheIndex)
{
if (id == 0) { cacheIndex = -1; return true; } // Default Id - no tracking
var map = IdentityMapInt32 ??= new IdentityMap<int, InternEntry>();
if (!map.TryAdd(id, out var slotIndex))
{
// 2+ occurrence: assign CacheIndex if first repeat
// 2+ occurrence: mark as cached
ref var entry = ref map.GetValueRef(slotIndex);
if (entry.CacheIndex < 0)
entry.CacheIndex = nextCacheIndex++;
if (entry.CacheIndex == -1)
entry.CacheIndex = -2; // -2 = cached, pending assignment
cacheIndex = entry.CacheIndex;
return false;
}
// 1st occurrence: store stream position
// 1st occurrence: store FirstIndex
ref var newEntry = ref map.GetValueRef(slotIndex);
newEntry.StreamPosition = streamPosition;
newEntry.FirstIndex = firstIndex;
newEntry.CacheIndex = -1;
cacheIndex = -1;
return true;
@ -170,7 +171,7 @@ public sealed class TypeMetadataWrapper<TMetadata> where TMetadata : TypeMetadat
/// Tries to track Int64 Id. Returns true if first occurrence.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public bool TryTrackInt64(long id, int streamPosition, ref int nextCacheIndex, out int cacheIndex)
public bool TryTrackInt64(long id, int firstIndex, out int cacheIndex)
{
if (id == 0) { cacheIndex = -1; return true; }
@ -178,14 +179,14 @@ public sealed class TypeMetadataWrapper<TMetadata> where TMetadata : TypeMetadat
if (!map.TryAdd(id, out var slotIndex))
{
ref var entry = ref map.GetValueRef(slotIndex);
if (entry.CacheIndex < 0)
entry.CacheIndex = nextCacheIndex++;
if (entry.CacheIndex == -1)
entry.CacheIndex = -2;
cacheIndex = entry.CacheIndex;
return false;
}
ref var newEntry = ref map.GetValueRef(slotIndex);
newEntry.StreamPosition = streamPosition;
newEntry.FirstIndex = firstIndex;
newEntry.CacheIndex = -1;
cacheIndex = -1;
return true;
@ -195,7 +196,7 @@ public sealed class TypeMetadataWrapper<TMetadata> where TMetadata : TypeMetadat
/// Tries to track Guid Id. Returns true if first occurrence.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public bool TryTrackGuid(Guid id, int streamPosition, ref int nextCacheIndex, out int cacheIndex)
public bool TryTrackGuid(Guid id, int firstIndex, out int cacheIndex)
{
if (id == Guid.Empty) { cacheIndex = -1; return true; }
@ -203,18 +204,83 @@ public sealed class TypeMetadataWrapper<TMetadata> where TMetadata : TypeMetadat
if (!map.TryAdd(id, out var slotIndex))
{
ref var entry = ref map.GetValueRef(slotIndex);
if (entry.CacheIndex < 0)
entry.CacheIndex = nextCacheIndex++;
if (entry.CacheIndex == -1)
entry.CacheIndex = -2;
cacheIndex = entry.CacheIndex;
return false;
}
ref var newEntry = ref map.GetValueRef(slotIndex);
newEntry.StreamPosition = streamPosition;
newEntry.FirstIndex = firstIndex;
newEntry.CacheIndex = -1;
cacheIndex = -1;
return true;
}
#endregion
#region Lookup for Serialize Pass (after scan)
/// <summary>
/// Lookup Int32 Id entry. Returns ref to InternEntry for IsFirstWrite check.
/// Use in serialize pass after scan pass has populated the map.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public bool TryGetEntryInt32(int id, out int slotIndex)
{
if (id == 0 || IdentityMapInt32 == null)
{
slotIndex = -1;
return false;
}
return IdentityMapInt32.TryGetEntry(id, out slotIndex);
}
/// <summary>
/// Lookup Int64 Id entry.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public bool TryGetEntryInt64(long id, out int slotIndex)
{
if (id == 0 || IdentityMapInt64 == null)
{
slotIndex = -1;
return false;
}
return IdentityMapInt64.TryGetEntry(id, out slotIndex);
}
/// <summary>
/// Lookup Guid Id entry.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public bool TryGetEntryGuid(Guid id, out int slotIndex)
{
if (id == Guid.Empty || IdentityMapGuid == null)
{
slotIndex = -1;
return false;
}
return IdentityMapGuid.TryGetEntry(id, out slotIndex);
}
/// <summary>
/// Get ref to InternEntry at slot for Int32 map.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public ref InternEntry GetEntryRefInt32(int slotIndex) => ref IdentityMapInt32!.GetValueRef(slotIndex);
/// <summary>
/// Get ref to InternEntry at slot for Int64 map.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public ref InternEntry GetEntryRefInt64(int slotIndex) => ref IdentityMapInt64!.GetValueRef(slotIndex);
/// <summary>
/// Get ref to InternEntry at slot for Guid map.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public ref InternEntry GetEntryRefGuid(int slotIndex) => ref IdentityMapGuid!.GetValueRef(slotIndex);
#endregion
}