AyCode.Core/AyCode.Core/Serializers/IdentityMap.cs

388 lines
13 KiB
C#

using System;
using System.Buffers;
using System.Collections.Generic;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace AyCode.Core.Serializers;
#region IId Reference Tracking
/// <summary>
/// Specifies the accessor type for IId.Id property to enable typed getter dispatch without boxing.
/// </summary>
public enum IdAccessorType : byte
{
None = 0,
/// <summary>Id is int (most common).</summary>
Int32 = 1,
/// <summary>Id is long.</summary>
Int64 = 2,
/// <summary>Id is Guid.</summary>
Guid = 3,
}
/// <summary>
/// Common entry for tracking interned values (strings and IId objects) during serialization.
/// Used as TValue in IdentityMap&lt;TKey, InternEntry&gt;.
/// </summary>
public struct InternEntry
{
/// <summary>Scan visit index of first occurrence. Used to create WriteDuplicateEntry on 2nd occurrence.</summary>
public int FirstIndex;
/// <summary>Dense cache index (0, 1, 2, ...) assigned after scan pass. -1 = not cached, 0+ = cache index.</summary>
public int CacheIndex;
/// <summary>True if this is the first serialize (write *First marker), false after (write *Ref + index).</summary>
public bool IsFirstWrite;
}
/// <summary>
/// Pre-computed write instruction for duplicate strings and IId object references.
/// Built during scan pass, sorted by VisitIndex, consumed sequentially by write pass cursor.
/// Eliminates IdentityMap lookups and redundant getter calls from the write hot path.
/// </summary>
public struct WriteDuplicateEntry
{
/// <summary>Sequential visit index matching the write pass traversal order.</summary>
public int VisitIndex;
/// <summary>Cache/intern index to write (intern index for strings, cache index for IId objects).</summary>
public int CacheMapIndex;
/// <summary>True = first occurrence (StringFirst / RefFirst). False = subsequent reference (StringRef / ObjRef).</summary>
public bool IsFirst;
/// <summary>Non-null for StringFirst: the interned string value (avoids getter call). Null for all other cases.</summary>
public string? Value;
}
/// <summary>
/// Interface for identity maps used in serialization tracking.
/// Enables type-safe Reset() without knowing the generic type parameter.
/// </summary>
public interface IIdentityMap
{
/// <summary>
/// Resets the identity map for reuse between serializations.
/// </summary>
/// <param name="preRentBuckets">If true, pre-rent arrays at next capacity</param>
void Reset(bool preRentBuckets = false);
}
/// <summary>
/// High-performance identity map for tracking IId values during serialization/deserialization.
/// Uses custom hash table optimized for our use case:
/// - Small int keys (0-4095): bitmap for fast lookup + direct value array
/// - Large keys: custom hash table with chaining
/// No Dictionary overhead, no per-entry allocation.
/// </summary>
/// <typeparam name="TKey">The key type (int, long, Guid, string)</typeparam>
/// <typeparam name="TValue">The value type</typeparam>
public sealed class IdentityMap<TKey, TValue> : IIdentityMap where TKey : notnull
{
// Slot for hash table entries (generation needed for hash table validity)
private struct HashSlot
{
public TValue Value;
public int Next; // next slot index in chain (-1 = end)
}
// Hash table storage (for large ints and other types)
private int[]? _buckets; // bucket index → first entry index
private HashSlot[]? _entries; // hash table entries
private TKey[]? _keys; // keys for equality check
private int _count; // number of entries in hash table
private int _bucketsLength; // actual rented length (for modulo)
private int _entriesLength; // actual capacity
private const int InitialHashCapacity = 16;
// Type checks (JIT eliminates these at compile time)
private static readonly bool IsInt32 = typeof(TKey) == typeof(int);
private static readonly bool IsInt64 = typeof(TKey) == typeof(long);
private static readonly bool IsGuid = typeof(TKey) == typeof(Guid);
private static readonly bool IsString = typeof(TKey) == typeof(string);
private static readonly bool IsValueTypeValue = typeof(TValue).IsValueType;
/// <summary>
/// Number of entries in the hash table. Use with GetValueRefAt for iteration.
/// </summary>
public int Count => _count;
public IdentityMap()
{
}
/// <summary>
/// Tries to add a key and returns slot index for ref access to value.
/// Returns true if first occurrence (key was added).
/// Returns false if already seen.
/// Use GetValueRef(slotIndex) to read/write the value.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public bool TryAdd(TKey key, out int slotIndex)
{
return TryAddHash(key, out slotIndex);
}
/// <summary>
/// Lookup only - returns true if key exists, with slotIndex for ref access.
/// Does NOT add the key if not found.
/// Use for serialize pass after scan pass has populated the map.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public bool TryGetEntry(TKey key, out int slotIndex)
{
if (_buckets == null)
{
slotIndex = -1;
return false;
}
var hash = GetHashCode(key);
var bucketIdx = (hash & 0x7FFFFFFF) % _bucketsLength;
for (var i = _buckets[bucketIdx]; i >= 0; i = _entries![i].Next)
{
if (KeyEquals(_keys![i], key))
{
slotIndex = i;
return true;
}
}
slotIndex = -1;
return false;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static bool KeyEquals(TKey a, TKey b)
{
// JIT eliminates these branches at compile time for each TKey instantiation
if (IsInt32)
{
return Unsafe.As<TKey, int>(ref a) == Unsafe.As<TKey, int>(ref b);
}
if (IsString)
{
var strA = Unsafe.As<TKey, string>(ref a);
var strB = Unsafe.As<TKey, string>(ref b);
// Fast path: reference equality (interned strings)
//if (ReferenceEquals(strA, strB)) return true;
// Ordinal comparison is fastest for non-interned
return string.Equals(strA, strB, StringComparison.Ordinal);
}
if (IsInt64)
{
return Unsafe.As<TKey, long>(ref a) == Unsafe.As<TKey, long>(ref b);
}
if (IsGuid)
{
return Unsafe.As<TKey, Guid>(ref a) == Unsafe.As<TKey, Guid>(ref b);
}
// Fallback for other types
return EqualityComparer<TKey>.Default.Equals(a, b);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private bool TryAddHash(TKey key, out int slotIndex)
{
var hash = GetHashCode(key);
// Lazy init
if (_buckets == null)
{
InitHashTable(InitialHashCapacity);
}
var bucketIdx = (hash & 0x7FFFFFFF) % _bucketsLength;
// Search chain
for (var i = _buckets[bucketIdx]; i >= 0; i = _entries![i].Next)
{
//if (EqualityComparer<TKey>.Default.Equals(_keys![i], key))
if (KeyEquals(_keys![i], key)) // Direct comparison, no virtual call
{
slotIndex = i;
return false; // already seen
}
}
// Resize if needed
if (_count >= _entriesLength)
{
Resize();
bucketIdx = (hash & 0x7FFFFFFF) % _bucketsLength;
}
// Add new entry
slotIndex = _count++;
_keys![slotIndex] = key;
_entries![slotIndex] = new HashSlot { Next = _buckets[bucketIdx] };
_buckets[bucketIdx] = slotIndex;
return true;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int GetHashCode(TKey key)
{
// Specialized hash for known types - JIT eliminates branches
if (IsInt32) return Unsafe.As<TKey, int>(ref key);
if (IsInt64) return Unsafe.As<TKey, long>(ref key).GetHashCode();
if (IsGuid) return Unsafe.As<TKey, Guid>(ref key).GetHashCode();
if (IsString) return string.GetHashCode(Unsafe.As<TKey, string>(ref key), StringComparison.Ordinal);
return key.GetHashCode();
}
private void InitHashTable(int capacity)
{
// Use remembered capacity if larger (from previous serialization)
var actualCapacity = Math.Max(capacity, _bucketsLength);
_buckets = ArrayPool<int>.Shared.Rent(actualCapacity);
_bucketsLength = _buckets.Length;
Array.Fill(_buckets, -1, 0, _bucketsLength);
_entries = ArrayPool<HashSlot>.Shared.Rent(actualCapacity);
_entriesLength = _entries.Length;
//Array.Clear(_entries, 0, _entriesLength);
_keys = ArrayPool<TKey>.Shared.Rent(actualCapacity);
//Array.Clear(_keys, 0, actualCapacity);
_count = 0;
}
private void Resize()
{
var newCapacity = _bucketsLength * 2;
// Rent new arrays
var newBuckets = ArrayPool<int>.Shared.Rent(newCapacity);
var newBucketsLength = newBuckets.Length;
Array.Fill(newBuckets, -1, 0, newBucketsLength);
var newEntries = ArrayPool<HashSlot>.Shared.Rent(newCapacity);
var newKeys = ArrayPool<TKey>.Shared.Rent(newCapacity);
// Copy entries (no clear needed)
Array.Copy(_entries!, newEntries, _count);
Array.Copy(_keys!, newKeys, _count);
// Rebuild bucket chains
for (var i = 0; i < _count; i++)
{
var bucketIdx = (GetHashCode(newKeys[i]) & 0x7FFFFFFF) % newBucketsLength;
newEntries[i].Next = newBuckets[bucketIdx];
newBuckets[bucketIdx] = i;
}
// Return old arrays to pool
ArrayPool<int>.Shared.Return(_buckets!);
ArrayPool<HashSlot>.Shared.Return(_entries!);
ArrayPool<TKey>.Shared.Return(_keys!);
_buckets = newBuckets;
_bucketsLength = newBucketsLength;
_entries = newEntries;
_entriesLength = newEntries.Length;
_keys = newKeys;
}
/// <summary>
/// Returns a reference to the value at the given slot index.
/// Use with slotIndex from TryAdd for in-place value modification.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public ref TValue GetValueRef(int slotIndex)
{
return ref _entries![slotIndex].Value;
}
/// <summary>
/// Returns the value at the given sequential index (0..Count-1).
/// For iteration over all entries (e.g., footer writing).
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public ref TValue GetValueRefAt(int index)
{
return ref _entries![index].Value;
}
/// <summary>
/// Returns the key at the given sequential index (0..Count-1).
/// For debugging/iteration over all entries.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public TKey GetKeyAt(int index)
{
return _keys![index];
}
/// <summary>
/// Resets the identity map for reuse.
/// Small arrays (≤ InitialHashCapacity*5): keep and clear (faster than pool round-trip).
/// Large arrays: return to pool, remember half capacity for next use.
/// </summary>
/// <param name="preRentBuckets">If true, pre-rent arrays at next capacity (useful for async Clear to shift work from hot path)</param>
public void Reset(bool preRentBuckets = false)
{
if (_buckets == null) return;
// Small arrays: keep and clear (faster than pool round-trip)
if (_bucketsLength <= InitialHashCapacity * 5)
{
Array.Fill(_buckets, -1, 0, _bucketsLength);
// Clear to release object references (GC can collect)
if (_count > 0)
{
Array.Clear(_entries!, 0, _count);
Array.Clear(_keys!, 0, _count);
}
_count = 0;
return;
}
// Large arrays: return to pool, remember half capacity
var nextCapacity = Math.Max(_bucketsLength / 2, InitialHashCapacity * 5);
// Clear entries/keys to release object references before returning to pool
// Otherwise pool holds refs → GC can't collect!
if (_count > 0)
{
Array.Clear(_entries!, 0, _count);
Array.Clear(_keys!, 0, _count);
}
ArrayPool<int>.Shared.Return(_buckets);
ArrayPool<HashSlot>.Shared.Return(_entries!,false);
ArrayPool<TKey>.Shared.Return(_keys!, false);
if (preRentBuckets)
{
// Pre-rent arrays now (async background) so Pool.Get() is faster
_buckets = ArrayPool<int>.Shared.Rent(nextCapacity);
_bucketsLength = _buckets.Length;
Array.Fill(_buckets, -1, 0, _bucketsLength);
_entries = ArrayPool<HashSlot>.Shared.Rent(nextCapacity);
_entriesLength = _entries.Length;
//Array.Clear(_entries, 0, _entriesLength);
_keys = ArrayPool<TKey>.Shared.Rent(nextCapacity);
//Array.Clear(_keys, 0, nextCapacity);
}
else
{
_buckets = null;
_entries = null;
_keys = null;
_bucketsLength = nextCapacity; // Remember for next InitHashTable
_entriesLength = 0;
}
_count = 0;
}
}
#endregion