AyCode.Core/AyCode.Core/Serializers/Binaries/AcBinaryDeserializer.Binary...

634 lines
24 KiB
C#

using System;
using System.Buffers.Binary;
using System.Collections.Generic;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Text;
namespace AyCode.Core.Serializers.Binaries;
public static partial class AcBinaryDeserializer
{
/// <summary>
/// Binary deserialization context. Public for generated serializers.
/// Uses composition with BinaryDeserializationContextClass for IId-based tracking.
/// </summary>
internal ref struct BinaryDeserializationContext
{
private readonly ReadOnlySpan<byte> _buffer;
private int _position;
private List<string>? _internedStrings;
private List<string>? _propertyNames;
//private Dictionary<int, object>? _objectReferences;
private Dictionary<int, string>? _stringCache;
/// <summary>
/// Heap-allocated context class for IId-based reference tracking.
/// Also holds Options - all options-derived properties delegate to ContextClass.Options.
/// </summary>
public readonly BinaryDeserializationContextClass ContextClass;
public bool HasMetadata { get; private set; }
/// <summary>
/// Convenience property - true if any reference handling is enabled.
/// </summary>
//public readonly bool HasReferenceHandling => ContextClass.ReferenceHandling != ReferenceHandlingMode.None;
public bool IsMergeMode { readonly get; set; }
public bool RemoveOrphanedItems { readonly get; set; }
public readonly bool IsAtEnd => _position >= _buffer.Length;
public readonly int Position => _position;
// Options-derived properties - delegate to ContextClass.Options
public readonly byte MinStringInternLength => ContextClass.Options.MinStringInternLength;
public readonly bool UseStringCaching => ContextClass.Options.UseStringCaching;
public readonly int MaxCachedStringLength => ContextClass.Options.MaxCachedStringLength;
/// <summary>
/// Chain reference tracker for maintaining object identity across chain operations.
/// Only set when in chain mode (CreateDeserializeChain).
/// </summary>
public AcSerializerCommon.ChainReferenceTracker? ChainTracker { readonly get; set; }
/// <summary>
/// Returns true if in chain mode (ChainTracker is set).
/// </summary>
public readonly bool IsChainMode => ChainTracker != null;
public BinaryDeserializationContext(ReadOnlySpan<byte> data)
: this(data, AcBinarySerializerOptions.Default, new BinaryDeserializationContextClass())
{
}
public BinaryDeserializationContext(ReadOnlySpan<byte> data, AcBinarySerializerOptions options)
: this(data, options, new BinaryDeserializationContextClass())
{
}
public BinaryDeserializationContext(ReadOnlySpan<byte> data, AcBinarySerializerOptions options, BinaryDeserializationContextClass contextClass)
{
_buffer = data;
_position = 0;
_internedStrings = null;
_propertyNames = null;
//_objectReferences = null;
_stringCache = null;
HasMetadata = false;
IsMergeMode = false;
RemoveOrphanedItems = false;
ChainTracker = null;
ContextClass = contextClass;
// Reset ContextClass with options - this sets Options and clears any previous state
ContextClass.Reset(options);
}
public void ReadHeader()
{
if (_buffer.Length < 2)
{
throw new AcBinaryDeserializationException("Binary payload is too short to contain a header.");
}
var version = ReadByteInternal();
if (version != AcBinarySerializerOptions.FormatVersion)
{
throw new AcBinaryDeserializationException(
$"Unsupported binary format version '{version}'. Expected '{AcBinarySerializerOptions.FormatVersion}'.",
_position - 1);
}
var marker = ReadByteInternal();
var hasPropertyTable = false;
var hasInternTable = false;
var hasInternFooter = false;
var footerPosition = 0;
if (marker == BinaryTypeCode.MetadataHeader)
{
hasPropertyTable = true;
ContextClass.Options.ReferenceHandling = ReferenceHandlingMode.OnlyId; // Legacy: assume OnlyId
}
else if (marker == BinaryTypeCode.NoMetadataHeader)
{
ContextClass.Options.ReferenceHandling = ReferenceHandlingMode.OnlyId; // Legacy: assume OnlyId
}
else if ((marker & 0xF0) == BinaryTypeCode.HeaderFlagsBase)
{
var flags = (byte)(marker & 0x0F);
hasPropertyTable = (flags & BinaryTypeCode.HeaderFlag_Metadata) != 0;
// Decode ReferenceHandlingMode from separate bits
var hasOnlyId = (flags & BinaryTypeCode.HeaderFlag_RefHandling_OnlyId) != 0;
var hasAll = (flags & BinaryTypeCode.HeaderFlag_RefHandling_All) != 0;
ContextClass.Options.ReferenceHandling = hasAll ? ReferenceHandlingMode.All
: hasOnlyId ? ReferenceHandlingMode.OnlyId
: ReferenceHandlingMode.None;
// Read footer position if flag is set
var hasFooterPosition = (flags & BinaryTypeCode.HeaderFlag_HasFooterPosition) != 0;
if (hasFooterPosition)
{
EnsureAvailable(4);
footerPosition = BinaryPrimitives.ReadInt32LittleEndian(_buffer.Slice(_position, 4));
_position += 4;
hasInternFooter = footerPosition > 0;
}
}
else
{
throw new AcBinaryDeserializationException(
$"Unsupported binary header marker '{marker}'.",
_position - 1);
}
HasMetadata = hasPropertyTable;
if (hasPropertyTable)
{
var propertyCount = (int)ReadVarUInt();
_propertyNames = new List<string>(propertyCount);
for (var i = 0; i < propertyCount; i++)
{
_propertyNames.Add(ReadHeaderString());
}
}
// Legacy: interned strings in header
if (hasInternTable)
{
var internCount = (int)ReadVarUInt();
_internedStrings = new List<string>(internCount);
for (var i = 0; i < internCount; i++)
{
_internedStrings.Add(ReadHeaderString());
}
}
// Footer-based: read interned strings from footer, then return to data position
if (hasInternFooter && footerPosition > 0)
{
ReadFooterStrings(footerPosition);
}
}
/// <summary>
/// Reads interned strings from footer position, then returns to data position.
/// Uses seek to footer, read strings, seek back to data.
/// </summary>
private void ReadFooterStrings(int footerPosition)
{
// Save current position (start of data)
var dataPosition = _position;
// Seek to footer
_position = footerPosition;
// Read interned strings
var internCount = (int)ReadVarUInt();
_internedStrings = new List<string>(internCount);
for (var i = 0; i < internCount; i++)
{
_internedStrings.Add(ReadHeaderString());
}
// Seek back to data position
_position = dataPosition;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public byte ReadByte() => ReadByteInternal();
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private byte ReadByteInternal()
{
if (_position >= _buffer.Length)
{
throw new AcBinaryDeserializationException("Unexpected end of binary payload.", _position);
}
return _buffer[_position++];
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public byte PeekByte()
{
if (_position >= _buffer.Length)
{
throw new AcBinaryDeserializationException("Unexpected end of binary payload.", _position);
}
return _buffer[_position];
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public short ReadInt16Unsafe()
{
EnsureAvailable(2);
var value = BinaryPrimitives.ReadInt16LittleEndian(_buffer.Slice(_position, 2));
_position += 2;
return value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public ushort ReadUInt16Unsafe()
{
EnsureAvailable(2);
var value = BinaryPrimitives.ReadUInt16LittleEndian(_buffer.Slice(_position, 2));
_position += 2;
return value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public char ReadCharUnsafe()
{
EnsureAvailable(2);
var value = (char)BinaryPrimitives.ReadUInt16LittleEndian(_buffer.Slice(_position, 2));
_position += 2;
return value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public float ReadSingleUnsafe()
{
EnsureAvailable(4);
var bits = BinaryPrimitives.ReadInt32LittleEndian(_buffer.Slice(_position, 4));
_position += 4;
return BitConverter.Int32BitsToSingle(bits);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public double ReadDoubleUnsafe()
{
EnsureAvailable(8);
var bits = BinaryPrimitives.ReadInt64LittleEndian(_buffer.Slice(_position, 8));
_position += 8;
return BitConverter.Int64BitsToDouble(bits);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public decimal ReadDecimalUnsafe()
{
EnsureAvailable(16);
var ints = MemoryMarshal.Cast<byte, int>(_buffer.Slice(_position, 16));
var lo = ints[0];
var mid = ints[1];
var hi = ints[2];
var flags = ints[3];
var isNegative = (flags & unchecked((int)0x80000000)) != 0;
var scale = (byte)((flags >> 16) & 0x7F);
_position += 16;
return new decimal(lo, mid, hi, isNegative, scale);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public DateTime ReadDateTimeUnsafe()
{
EnsureAvailable(9);
var ticks = BinaryPrimitives.ReadInt64LittleEndian(_buffer.Slice(_position, 8));
var kind = (DateTimeKind)_buffer[_position + 8];
_position += 9;
return new DateTime(ticks, kind);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public DateTimeOffset ReadDateTimeOffsetUnsafe()
{
EnsureAvailable(10);
var utcTicks = BinaryPrimitives.ReadInt64LittleEndian(_buffer.Slice(_position, 8));
var offsetMinutes = BinaryPrimitives.ReadInt16LittleEndian(_buffer.Slice(_position + 8, 2));
_position += 10;
var utcValue = new DateTime(utcTicks, DateTimeKind.Utc);
return new DateTimeOffset(utcValue).ToOffset(TimeSpan.FromMinutes(offsetMinutes));
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public TimeSpan ReadTimeSpanUnsafe()
{
EnsureAvailable(8);
var ticks = BinaryPrimitives.ReadInt64LittleEndian(_buffer.Slice(_position, 8));
_position += 8;
return new TimeSpan(ticks);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public Guid ReadGuidUnsafe()
{
EnsureAvailable(16);
var value = new Guid(_buffer.Slice(_position, 16));
_position += 16;
return value;
}
/// <summary>
/// Optimized VarInt reader with fast path for 1-2 byte values (most common case).
/// Uses ZigZag decoding to handle signed integers.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public int ReadVarInt()
{
var raw = ReadVarUInt();
// ZigZag decode: handle full uint range before casting to int
// This correctly handles values like int.MaxValue which encode to uint > int.MaxValue
var value = (int)(raw >> 1) ^ -(int)(raw & 1);
return value;
}
/// <summary>
/// Optimized VarUInt reader with fast path for 1-2 byte values.
/// Most VarInts in real data are small (property indices, array lengths, etc.)
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public uint ReadVarUInt()
{
// Fast path: single byte (0-127) - ~70% of cases
var b0 = _buffer[_position];
if ((b0 & 0x80) == 0)
{
_position++;
return b0;
}
// Fast path: two bytes (128-16383) - ~25% of cases
if (_position + 1 < _buffer.Length)
{
var b1 = _buffer[_position + 1];
if ((b1 & 0x80) == 0)
{
_position += 2;
return (uint)(b0 & 0x7F) | ((uint)b1 << 7);
}
}
// Slow path: 3+ bytes - ~5% of cases
return ReadVarUIntSlow();
}
private uint ReadVarUIntSlow()
{
uint value = 0;
var shift = 0;
while (true)
{
var b = ReadByteInternal();
value |= (uint)(b & 0x7F) << shift;
if ((b & 0x80) == 0)
{
break;
}
shift += 7;
if (shift > 35)
{
throw new AcBinaryDeserializationException("Invalid VarUInt encoding.", _position);
}
}
return value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public long ReadVarLong()
{
var raw = ReadVarULong();
var value = (long)(raw >> 1) ^ -((long)raw & 1);
return value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public ulong ReadVarULong()
{
ulong value = 0;
var shift = 0;
while (true)
{
var b = ReadByteInternal();
value |= (ulong)(b & 0x7F) << shift;
if ((b & 0x80) == 0)
{
break;
}
shift += 7;
if (shift > 70)
{
throw new AcBinaryDeserializationException("Invalid VarULong encoding.", _position);
}
}
return value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public byte[] ReadBytes(int length)
{
if (length == 0)
{
return Array.Empty<byte>();
}
EnsureAvailable(length);
var result = GC.AllocateUninitializedArray<byte>(length);
_buffer.Slice(_position, length).CopyTo(result);
_position += length;
return result;
}
/// <summary>
/// Read UTF8 string with optional caching for WASM optimization.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public string ReadStringUtf8(int length)
{
if (length == 0)
{
return string.Empty;
}
EnsureAvailable(length);
// WASM optimization: cache short strings to reduce allocations
if (UseStringCaching && length <= MaxCachedStringLength)
{
return ReadStringUtf8Cached(length);
}
var value = Utf8NoBom.GetString(_buffer.Slice(_position, length));
_position += length;
return value;
}
/// <summary>
/// Read string with caching - reduces allocations in WASM.
/// </summary>
private string ReadStringUtf8Cached(int length)
{
// CRITICAL FIX (2025-01-24): Use full-content hash to avoid collisions.
// BUG: Property names like "Creator" and "Created" have same length (7) and same
// first 4 bytes ("Crea"), causing hash collision with the old hash function.
// This caused WASM deserialization failures where "Creator" (int) value was
// incorrectly assigned to "Created" (DateTime) property.
// DO NOT REMOVE OR SIMPLIFY THIS HASH FUNCTION!
// See: ComputeStringHashFull() for the fix.
var slice = _buffer.Slice(_position, length);
var hash = ComputeStringHashFull(slice);
_stringCache ??= new Dictionary<int, string>(128);
if (_stringCache.TryGetValue(hash, out var cached))
{
// Hash includes all bytes for short strings, so collision is extremely unlikely
// For longer strings, we still verify length as a sanity check
if (cached.Length == length)
{
_position += length;
return cached;
}
// Hash collision with different length - fall through to read new value
}
var value = Utf8NoBom.GetString(slice);
_stringCache[hash] = value;
_position += length;
return value;
}
/// <summary>
/// Compute hash that includes ALL bytes for short strings to avoid collisions.
///
/// CRITICAL FIX (2025-01-24): DO NOT MODIFY THIS FUNCTION!
///
/// PROBLEM: The original hash function only used first 4 bytes + length.
/// This caused hash collisions in WASM for similar property names like:
/// - "Creator" vs "Created" (both 7 bytes, both start with "Crea")
/// - "Modifier" vs "Modified" (both 8 bytes, both start with "Modi")
///
/// SYMPTOM: In WASM, when "Created" was cached first, reading "Creator" returned
/// "Created" from cache, causing type mismatch (int value ? DateTime property).
/// Error: "Cannot set property 'Created' - PropertyType: DateTime, ValueType: Int32"
///
/// FIX: Hash ALL bytes for strings ?32 bytes (covers all typical property names).
/// This eliminates collisions between similar property names completely.
///
/// PERFORMANCE: ~5% slower hash computation, but zero-allocation cache hits.
/// This is acceptable for the reliability improvement.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int ComputeStringHashFull(ReadOnlySpan<byte> data)
{
// For strings up to 32 bytes (covers most property names), hash ALL bytes
// This completely eliminates collisions like Creator/Created
if (data.Length <= 32)
{
var hash = new HashCode();
hash.AddBytes(data);
return hash.ToHashCode();
}
// For longer strings (rare for property names), use sampling strategy:
// first 8 bytes + last 8 bytes + middle 8 bytes + length
// This provides good collision resistance with O(1) performance
var h = new HashCode();
h.Add(data.Length);
h.AddBytes(data.Slice(0, 8));
h.AddBytes(data.Slice(data.Length - 8, 8));
h.AddBytes(data.Slice(data.Length / 2 - 4, 8));
return h.ToHashCode();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void Skip(int count)
{
EnsureAvailable(count);
_position += count;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public int RegisterInternedString(string value)
{
_internedStrings ??= new List<string>();
_internedStrings.Add(value);
return _internedStrings.Count - 1;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public string GetInternedString(int index)
{
if (_internedStrings == null || (uint)index >= (uint)_internedStrings.Count)
{
throw new AcBinaryDeserializationException($"Invalid interned string index '{index}'.", _position);
}
return _internedStrings[index];
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public string GetPropertyName(int index)
{
if (_propertyNames == null || (uint)index >= (uint)_propertyNames.Count)
{
throw new AcBinaryDeserializationException($"Invalid property metadata index '{index}'.", _position);
}
return _propertyNames[index];
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void RegisterObject(TypeMetadataWrapper<BinaryDeserializeTypeMetadata> wrapper, int refId, object instance)
{
if (refId == 0) throw new Exception("refId == 0");
ContextClass.TryGetOrStoreInt32(wrapper, instance, refId);
//if (refId <= 0)
//{
// return;
//}
//_objectReferences ??= new Dictionary<int, object>(16);
//_objectReferences[refId] = instance;
}
//[MethodImpl(MethodImplOptions.AggressiveInlining)]
//public object? GetReferencedObject(TypeMetadataWrapper<BinaryDeserializeTypeMetadata> wrapper, int refId)
//{
// //if (refId <= 0)
// //{
// // return null;
// //}
// //if (_objectReferences == null || !_objectReferences.TryGetValue(refId, out var value))
// //{
// // throw new AcBinaryDeserializationException($"Unknown object reference id '{refId}'.", _position);
// //}
// //return value;
//}
private void EnsureAvailable(int length)
{
if (_position > _buffer.Length - length)
{
throw new AcBinaryDeserializationException("Unexpected end of binary payload.", _position);
}
}
private string ReadHeaderString()
{
var byteLength = (int)ReadVarUInt();
return ReadStringUtf8(byteLength);
}
#region IId Reference Cache - Delegates to ContextClass
/// <summary>
/// After PopulateObject, checks if we should reuse an existing IId object.
/// Delegates to ContextClass which uses AcSerializerContextBase infrastructure.
/// Returns the object to use (either the new one or an existing cached one).
/// </summary>
//[MethodImpl(MethodImplOptions.AggressiveInlining)]
//public object GetOrRegisterIIdObject(object newObj, TypeMetadataWrapper<BinaryDeserializeTypeMetadata> wrapper)
//{
// return ContextClass.GetOrRegisterIIdObject(newObj, wrapper);
//}
#endregion
}
}