388 lines
13 KiB
C#
388 lines
13 KiB
C#
using System;
|
|
using System.Buffers;
|
|
using System.Collections.Generic;
|
|
using System.Numerics;
|
|
using System.Runtime.CompilerServices;
|
|
using System.Runtime.InteropServices;
|
|
|
|
namespace AyCode.Core.Serializers;
|
|
|
|
#region IId Reference Tracking
|
|
|
|
/// <summary>
|
|
/// Specifies the accessor type for IId.Id property to enable typed getter dispatch without boxing.
|
|
/// </summary>
|
|
public enum IdAccessorType : byte
|
|
{
|
|
None = 0,
|
|
/// <summary>Id is int (most common).</summary>
|
|
Int32 = 1,
|
|
/// <summary>Id is long.</summary>
|
|
Int64 = 2,
|
|
/// <summary>Id is Guid.</summary>
|
|
Guid = 3,
|
|
}
|
|
/// <summary>
|
|
/// Common entry for tracking interned values (strings and IId objects) during serialization.
|
|
/// Used as TValue in IdentityMap<TKey, InternEntry>.
|
|
/// </summary>
|
|
public struct InternEntry
|
|
{
|
|
/// <summary>Scan visit index of first occurrence. Used to create WriteDuplicateEntry on 2nd occurrence.</summary>
|
|
public int FirstIndex;
|
|
/// <summary>Dense cache index (0, 1, 2, ...) assigned after scan pass. -1 = not cached, 0+ = cache index.</summary>
|
|
public int CacheIndex;
|
|
/// <summary>True if this is the first serialize (write *First marker), false after (write *Ref + index).</summary>
|
|
public bool IsFirstWrite;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Pre-computed write instruction for duplicate strings and IId object references.
|
|
/// Built during scan pass, sorted by VisitIndex, consumed sequentially by write pass cursor.
|
|
/// Eliminates IdentityMap lookups and redundant getter calls from the write hot path.
|
|
/// </summary>
|
|
public struct WriteDuplicateEntry
|
|
{
|
|
/// <summary>Sequential visit index matching the write pass traversal order.</summary>
|
|
public int VisitIndex;
|
|
/// <summary>Cache/intern index to write (intern index for strings, cache index for IId objects).</summary>
|
|
public int CacheMapIndex;
|
|
/// <summary>True = first occurrence (StringFirst / RefFirst). False = subsequent reference (StringRef / ObjRef).</summary>
|
|
public bool IsFirst;
|
|
/// <summary>Non-null for StringFirst: the interned string value (avoids getter call). Null for all other cases.</summary>
|
|
public string? Value;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Interface for identity maps used in serialization tracking.
|
|
/// Enables type-safe Reset() without knowing the generic type parameter.
|
|
/// </summary>
|
|
public interface IIdentityMap
|
|
{
|
|
/// <summary>
|
|
/// Resets the identity map for reuse between serializations.
|
|
/// </summary>
|
|
/// <param name="preRentBuckets">If true, pre-rent arrays at next capacity</param>
|
|
void Reset(bool preRentBuckets = false);
|
|
}
|
|
|
|
|
|
/// <summary>
|
|
/// High-performance identity map for tracking IId values during serialization/deserialization.
|
|
/// Uses custom hash table optimized for our use case:
|
|
/// - Small int keys (0-4095): bitmap for fast lookup + direct value array
|
|
/// - Large keys: custom hash table with chaining
|
|
/// No Dictionary overhead, no per-entry allocation.
|
|
/// </summary>
|
|
/// <typeparam name="TKey">The key type (int, long, Guid, string)</typeparam>
|
|
/// <typeparam name="TValue">The value type</typeparam>
|
|
public sealed class IdentityMap<TKey, TValue> : IIdentityMap where TKey : notnull
|
|
{
|
|
// Slot for hash table entries (generation needed for hash table validity)
|
|
private struct HashSlot
|
|
{
|
|
public TValue Value;
|
|
public int Next; // next slot index in chain (-1 = end)
|
|
}
|
|
|
|
// Hash table storage (for large ints and other types)
|
|
private int[]? _buckets; // bucket index → first entry index
|
|
private HashSlot[]? _entries; // hash table entries
|
|
private TKey[]? _keys; // keys for equality check
|
|
private int _count; // number of entries in hash table
|
|
private int _bucketsLength; // actual rented length (for modulo)
|
|
private int _entriesLength; // actual capacity
|
|
|
|
private const int InitialHashCapacity = 16;
|
|
|
|
// Type checks (JIT eliminates these at compile time)
|
|
private static readonly bool IsInt32 = typeof(TKey) == typeof(int);
|
|
private static readonly bool IsInt64 = typeof(TKey) == typeof(long);
|
|
private static readonly bool IsGuid = typeof(TKey) == typeof(Guid);
|
|
private static readonly bool IsString = typeof(TKey) == typeof(string);
|
|
private static readonly bool IsValueTypeValue = typeof(TValue).IsValueType;
|
|
|
|
/// <summary>
|
|
/// Number of entries in the hash table. Use with GetValueRefAt for iteration.
|
|
/// </summary>
|
|
public int Count => _count;
|
|
|
|
public IdentityMap()
|
|
{
|
|
}
|
|
|
|
/// <summary>
|
|
/// Tries to add a key and returns slot index for ref access to value.
|
|
/// Returns true if first occurrence (key was added).
|
|
/// Returns false if already seen.
|
|
/// Use GetValueRef(slotIndex) to read/write the value.
|
|
/// </summary>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public bool TryAdd(TKey key, out int slotIndex)
|
|
{
|
|
return TryAddHash(key, out slotIndex);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Lookup only - returns true if key exists, with slotIndex for ref access.
|
|
/// Does NOT add the key if not found.
|
|
/// Use for serialize pass after scan pass has populated the map.
|
|
/// </summary>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public bool TryGetEntry(TKey key, out int slotIndex)
|
|
{
|
|
if (_buckets == null)
|
|
{
|
|
slotIndex = -1;
|
|
return false;
|
|
}
|
|
|
|
var hash = GetHashCode(key);
|
|
var bucketIdx = (hash & 0x7FFFFFFF) % _bucketsLength;
|
|
|
|
for (var i = _buckets[bucketIdx]; i >= 0; i = _entries![i].Next)
|
|
{
|
|
if (KeyEquals(_keys![i], key))
|
|
{
|
|
slotIndex = i;
|
|
return true;
|
|
}
|
|
}
|
|
|
|
slotIndex = -1;
|
|
return false;
|
|
}
|
|
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
private static bool KeyEquals(TKey a, TKey b)
|
|
{
|
|
// JIT eliminates these branches at compile time for each TKey instantiation
|
|
if (IsInt32)
|
|
{
|
|
return Unsafe.As<TKey, int>(ref a) == Unsafe.As<TKey, int>(ref b);
|
|
}
|
|
if (IsString)
|
|
{
|
|
var strA = Unsafe.As<TKey, string>(ref a);
|
|
var strB = Unsafe.As<TKey, string>(ref b);
|
|
|
|
// Fast path: reference equality (interned strings)
|
|
//if (ReferenceEquals(strA, strB)) return true;
|
|
|
|
// Ordinal comparison is fastest for non-interned
|
|
return string.Equals(strA, strB, StringComparison.Ordinal);
|
|
}
|
|
if (IsInt64)
|
|
{
|
|
return Unsafe.As<TKey, long>(ref a) == Unsafe.As<TKey, long>(ref b);
|
|
}
|
|
if (IsGuid)
|
|
{
|
|
return Unsafe.As<TKey, Guid>(ref a) == Unsafe.As<TKey, Guid>(ref b);
|
|
}
|
|
// Fallback for other types
|
|
return EqualityComparer<TKey>.Default.Equals(a, b);
|
|
}
|
|
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
private bool TryAddHash(TKey key, out int slotIndex)
|
|
{
|
|
var hash = GetHashCode(key);
|
|
|
|
// Lazy init
|
|
if (_buckets == null)
|
|
{
|
|
InitHashTable(InitialHashCapacity);
|
|
}
|
|
|
|
var bucketIdx = (hash & 0x7FFFFFFF) % _bucketsLength;
|
|
|
|
// Search chain
|
|
for (var i = _buckets[bucketIdx]; i >= 0; i = _entries![i].Next)
|
|
{
|
|
//if (EqualityComparer<TKey>.Default.Equals(_keys![i], key))
|
|
if (KeyEquals(_keys![i], key)) // Direct comparison, no virtual call
|
|
{
|
|
slotIndex = i;
|
|
return false; // already seen
|
|
}
|
|
}
|
|
|
|
// Resize if needed
|
|
if (_count >= _entriesLength)
|
|
{
|
|
Resize();
|
|
bucketIdx = (hash & 0x7FFFFFFF) % _bucketsLength;
|
|
}
|
|
|
|
// Add new entry
|
|
slotIndex = _count++;
|
|
_keys![slotIndex] = key;
|
|
_entries![slotIndex] = new HashSlot { Next = _buckets[bucketIdx] };
|
|
_buckets[bucketIdx] = slotIndex;
|
|
return true;
|
|
}
|
|
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
private static int GetHashCode(TKey key)
|
|
{
|
|
// Specialized hash for known types - JIT eliminates branches
|
|
if (IsInt32) return Unsafe.As<TKey, int>(ref key);
|
|
if (IsInt64) return Unsafe.As<TKey, long>(ref key).GetHashCode();
|
|
if (IsGuid) return Unsafe.As<TKey, Guid>(ref key).GetHashCode();
|
|
if (IsString) return string.GetHashCode(Unsafe.As<TKey, string>(ref key), StringComparison.Ordinal);
|
|
|
|
return key.GetHashCode();
|
|
}
|
|
|
|
private void InitHashTable(int capacity)
|
|
{
|
|
// Use remembered capacity if larger (from previous serialization)
|
|
var actualCapacity = Math.Max(capacity, _bucketsLength);
|
|
|
|
_buckets = ArrayPool<int>.Shared.Rent(actualCapacity);
|
|
_bucketsLength = _buckets.Length;
|
|
Array.Fill(_buckets, -1, 0, _bucketsLength);
|
|
|
|
_entries = ArrayPool<HashSlot>.Shared.Rent(actualCapacity);
|
|
_entriesLength = _entries.Length;
|
|
//Array.Clear(_entries, 0, _entriesLength);
|
|
|
|
_keys = ArrayPool<TKey>.Shared.Rent(actualCapacity);
|
|
//Array.Clear(_keys, 0, actualCapacity);
|
|
|
|
_count = 0;
|
|
}
|
|
|
|
private void Resize()
|
|
{
|
|
var newCapacity = _bucketsLength * 2;
|
|
|
|
// Rent new arrays
|
|
var newBuckets = ArrayPool<int>.Shared.Rent(newCapacity);
|
|
var newBucketsLength = newBuckets.Length;
|
|
Array.Fill(newBuckets, -1, 0, newBucketsLength);
|
|
var newEntries = ArrayPool<HashSlot>.Shared.Rent(newCapacity);
|
|
var newKeys = ArrayPool<TKey>.Shared.Rent(newCapacity);
|
|
|
|
// Copy entries (no clear needed)
|
|
Array.Copy(_entries!, newEntries, _count);
|
|
Array.Copy(_keys!, newKeys, _count);
|
|
|
|
// Rebuild bucket chains
|
|
for (var i = 0; i < _count; i++)
|
|
{
|
|
var bucketIdx = (GetHashCode(newKeys[i]) & 0x7FFFFFFF) % newBucketsLength;
|
|
newEntries[i].Next = newBuckets[bucketIdx];
|
|
newBuckets[bucketIdx] = i;
|
|
}
|
|
|
|
// Return old arrays to pool
|
|
ArrayPool<int>.Shared.Return(_buckets!);
|
|
ArrayPool<HashSlot>.Shared.Return(_entries!);
|
|
ArrayPool<TKey>.Shared.Return(_keys!);
|
|
|
|
_buckets = newBuckets;
|
|
_bucketsLength = newBucketsLength;
|
|
_entries = newEntries;
|
|
_entriesLength = newEntries.Length;
|
|
_keys = newKeys;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Returns a reference to the value at the given slot index.
|
|
/// Use with slotIndex from TryAdd for in-place value modification.
|
|
/// </summary>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public ref TValue GetValueRef(int slotIndex)
|
|
{
|
|
return ref _entries![slotIndex].Value;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Returns the value at the given sequential index (0..Count-1).
|
|
/// For iteration over all entries (e.g., footer writing).
|
|
/// </summary>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public ref TValue GetValueRefAt(int index)
|
|
{
|
|
return ref _entries![index].Value;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Returns the key at the given sequential index (0..Count-1).
|
|
/// For debugging/iteration over all entries.
|
|
/// </summary>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public TKey GetKeyAt(int index)
|
|
{
|
|
return _keys![index];
|
|
}
|
|
|
|
/// <summary>
|
|
/// Resets the identity map for reuse.
|
|
/// Small arrays (≤ InitialHashCapacity*5): keep and clear (faster than pool round-trip).
|
|
/// Large arrays: return to pool, remember half capacity for next use.
|
|
/// </summary>
|
|
/// <param name="preRentBuckets">If true, pre-rent arrays at next capacity (useful for async Clear to shift work from hot path)</param>
|
|
public void Reset(bool preRentBuckets = false)
|
|
{
|
|
if (_buckets == null) return;
|
|
|
|
// Small arrays: keep and clear (faster than pool round-trip)
|
|
if (_bucketsLength <= InitialHashCapacity * 5)
|
|
{
|
|
Array.Fill(_buckets, -1, 0, _bucketsLength);
|
|
// Clear to release object references (GC can collect)
|
|
if (_count > 0)
|
|
{
|
|
Array.Clear(_entries!, 0, _count);
|
|
Array.Clear(_keys!, 0, _count);
|
|
}
|
|
_count = 0;
|
|
return;
|
|
}
|
|
|
|
// Large arrays: return to pool, remember half capacity
|
|
var nextCapacity = Math.Max(_bucketsLength / 2, InitialHashCapacity * 5);
|
|
|
|
// Clear entries/keys to release object references before returning to pool
|
|
// Otherwise pool holds refs → GC can't collect!
|
|
if (_count > 0)
|
|
{
|
|
Array.Clear(_entries!, 0, _count);
|
|
Array.Clear(_keys!, 0, _count);
|
|
}
|
|
|
|
ArrayPool<int>.Shared.Return(_buckets);
|
|
ArrayPool<HashSlot>.Shared.Return(_entries!,false);
|
|
ArrayPool<TKey>.Shared.Return(_keys!, false);
|
|
|
|
if (preRentBuckets)
|
|
{
|
|
// Pre-rent arrays now (async background) so Pool.Get() is faster
|
|
_buckets = ArrayPool<int>.Shared.Rent(nextCapacity);
|
|
_bucketsLength = _buckets.Length;
|
|
Array.Fill(_buckets, -1, 0, _bucketsLength);
|
|
|
|
_entries = ArrayPool<HashSlot>.Shared.Rent(nextCapacity);
|
|
_entriesLength = _entries.Length;
|
|
//Array.Clear(_entries, 0, _entriesLength);
|
|
|
|
_keys = ArrayPool<TKey>.Shared.Rent(nextCapacity);
|
|
//Array.Clear(_keys, 0, nextCapacity);
|
|
}
|
|
else
|
|
{
|
|
_buckets = null;
|
|
_entries = null;
|
|
_keys = null;
|
|
_bucketsLength = nextCapacity; // Remember for next InitHashTable
|
|
_entriesLength = 0;
|
|
}
|
|
_count = 0;
|
|
}
|
|
}
|
|
|
|
#endregion
|