Refactor string interning to use IdentityMap and InternEntry

Replaces Dictionary-based string interning in AcBinarySerializer.BinarySerializationContext with a new IdentityMap<string, InternEntry> approach. Introduces the InternEntry struct for efficient tracking of stream position and cache index. Updates all related logic and iteration to use the new IdentityMap API, improving performance, memory usage, and code clarity for interned string and identity tracking during serialization.
This commit is contained in:
Loretta 2026-02-01 10:52:53 +01:00
parent 1c41eba96e
commit 23af1fc98b
2 changed files with 52 additions and 24 deletions

View File

@ -97,17 +97,7 @@ public static partial class AcBinarySerializer
// Use shared reference tracker from AcSerializerCommon
//private readonly AcSerializerCommon.SerializationReferenceTracker _refTracker = new();
/// <summary>
/// String intern entry for tracking string occurrences.
/// StreamPosition-based approach for 100% reliable cache matching.
/// </summary>
private struct StringInternEntry
{
public int StreamPosition; // Position in stream where string was first written
public int CacheIndex; // Dense cache index (0, 1, 2, ...) - assigned at 2nd occurrence; -1 = first occurrence only
}
private Dictionary<string, StringInternEntry>? _stringInternMap;
private IdentityMap<string, InternEntry>? _stringInternMap;
private int _nextCacheIndex; // Next dense cache index to assign
private Dictionary<string, int>? _propertyNames;
@ -181,7 +171,7 @@ public static partial class AcBinarySerializer
_position = 0;
//_refTracker.Reset();
ClearAndTrimIfNeeded(_stringInternMap, InitialInternCapacity * 4);
_stringInternMap?.Reset();
ClearAndTrimIfNeeded(_propertyNames, InitialPropertyNameCapacity * 4);
_propertyNameList?.Clear();
@ -240,13 +230,12 @@ public static partial class AcBinarySerializer
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public bool TryGetInternedString(string value, int streamPosition, out int cacheIndex)
{
_stringInternMap ??= new Dictionary<string, StringInternEntry>(InitialInternCapacity, StringComparer.Ordinal);
_stringInternMap ??= new IdentityMap<string, InternEntry>();
ref var entry = ref CollectionsMarshal.GetValueRefOrNullRef(_stringInternMap, value);
if (!Unsafe.IsNullRef(ref entry))
if (!_stringInternMap.TryAdd(value, out var slotIndex))
{
// 2+ occurrence: assign CacheIndex if first repeat
ref var entry = ref _stringInternMap.GetValueRef(slotIndex);
if (entry.CacheIndex < 0)
{
entry.CacheIndex = _nextCacheIndex++;
@ -256,11 +245,9 @@ public static partial class AcBinarySerializer
}
// 1st occurrence: store stream position
_stringInternMap[value] = new StringInternEntry
{
StreamPosition = streamPosition,
CacheIndex = -1 // Not assigned until 2nd occurrence
};
ref var newEntry = ref _stringInternMap.GetValueRef(slotIndex);
newEntry.StreamPosition = streamPosition;
newEntry.CacheIndex = -1; // Not assigned until 2nd occurrence
cacheIndex = -1;
return false;
}
@ -268,7 +255,7 @@ public static partial class AcBinarySerializer
/// <summary>
/// Returns true if there are any interned strings that occurred more than once.
/// </summary>
public bool HasInternedStrings => _stringInternMap is { Count: > 0 };
public bool HasInternedStrings => _stringInternMap != null && _stringInternMap.Count > 0;
/// <summary>
/// Gets the count of strings that occurred more than once (for footer).
@ -290,8 +277,10 @@ public static partial class AcBinarySerializer
: new (int, int)[_nextCacheIndex];
var idx = 0;
foreach (var entry in _stringInternMap.Values)
var count = _stringInternMap.Count;
for (var i = 0; i < count; i++)
{
ref var entry = ref _stringInternMap.GetValueRefAt(i);
if (entry.CacheIndex >= 0)
{
entries[idx++] = (entry.StreamPosition, entry.CacheIndex);

View File

@ -22,6 +22,18 @@ public enum IdAccessorType : byte
/// <summary>Id is Guid.</summary>
Guid = 3,
}
/// <summary>
/// Common entry for tracking interned values (strings and IId objects) during serialization.
/// Used as TValue in IdentityMap&lt;TKey, InternEntry&gt;.
/// </summary>
public struct InternEntry
{
/// <summary>Position in stream where the value was first written.</summary>
public int StreamPosition;
/// <summary>Dense cache index (0, 1, 2, ...) assigned at 2nd occurrence. -1 = first occurrence only.</summary>
public int CacheIndex;
}
/// <summary>
/// Interface for identity maps used in serialization tracking.
/// Enables type-safe Reset() without knowing the generic type parameter.
@ -111,6 +123,23 @@ public sealed class IdentityMap<TKey, TValue> : IIdentityMap where TKey : notnul
return TryAddHash(key, out _);
}
/// <summary>
/// Tries to add a key and returns slot index for ref access to value.
/// Returns true if first occurrence (key was added).
/// Returns false if already seen.
/// Use GetValueRef(slotIndex) to read/write the value.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public bool TryAdd(TKey key, out int slotIndex)
{
return TryAddHash(key, out slotIndex);
}
/// <summary>
/// Number of entries in the hash table. Use with GetValueRefAt for iteration.
/// </summary>
public int Count => _count;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private bool TryAddSmallInt(int key)
{
@ -360,7 +389,7 @@ public sealed class IdentityMap<TKey, TValue> : IIdentityMap where TKey : notnul
/// <summary>
/// Returns a reference to the value at the given slot index.
/// Use with slotIndex from TryAddHash for in-place value modification.
/// Use with slotIndex from TryAdd for in-place value modification.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public ref TValue GetValueRef(int slotIndex)
@ -368,6 +397,16 @@ public sealed class IdentityMap<TKey, TValue> : IIdentityMap where TKey : notnul
return ref _entries![slotIndex].Value;
}
/// <summary>
/// Returns the value at the given sequential index (0..Count-1).
/// For iteration over all entries (e.g., footer writing).
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public ref TValue GetValueRefAt(int index)
{
return ref _entries![index].Value;
}
/// <summary>
/// Resets the identity map for reuse.
/// Small arrays (≤ InitialHashCapacity*5): keep and clear (faster than pool round-trip).