Switch to marker-based interned value serialization

Refactor interned string/object tracking from footer-based to marker-based format.
- Serializer rewrites type code markers for first occurrences (StringInternFirst, ObjectRefFirst, ObjectWithMetadataRefFirst).
- Header now stores VarUInt cache count instead of footer position.
- Deserializer registers interned values sequentially as markers are encountered, eliminating footer parsing.
- Simplified registration logic and updated BinaryTypeCode constants.
- Improves cache locality, format compactness, and performance.
This commit is contained in:
Loretta 2026-02-05 08:03:44 +01:00
parent 097c1e8efe
commit e8a0d36e43
6 changed files with 200 additions and 229 deletions

View File

@ -19,13 +19,10 @@ public static partial class AcBinaryDeserializer
private int _position; private int _position;
private Dictionary<int, string>? _stringCache; private Dictionary<int, string>? _stringCache;
// Position-based interning: flat int[] for cache-friendly access // Marker-based interning: sequential cache (no footer needed)
// Layout: [pos0, cacheIdx0, pos1, cacheIdx1, ...] - pairs sorted by position // StringInternFirst/ObjectRefFirst markers register values in order
// Shared for string interning AND IId object references
private int[]? _dupData; // Footer data: (position, cacheIndex) pairs as flat int array
private object?[]? _internCache; // Shared cache for interned strings AND IId objects private object?[]? _internCache; // Shared cache for interned strings AND IId objects
private int _dupCheckIndex; // Current index in _dupData (increments by 2) private int _nextCacheIndex; // Next index to assign when registering
private int _nextDupPosition; // Cached next dup position - avoids array access in hot path
/// <summary> /// <summary>
/// Heap-allocated context class for IId-based reference tracking. /// Heap-allocated context class for IId-based reference tracking.
@ -75,11 +72,9 @@ public static partial class AcBinaryDeserializer
_position = 0; _position = 0;
_stringCache = null; _stringCache = null;
// Position-based interning fields (shared: string + IId) // Marker-based interning fields
_dupData = null;
_internCache = null; _internCache = null;
_dupCheckIndex = 0; _nextCacheIndex = 0;
_nextDupPosition = int.MaxValue;
HasMetadata = false; HasMetadata = false;
IsMergeMode = false; IsMergeMode = false;
@ -107,8 +102,6 @@ public static partial class AcBinaryDeserializer
var marker = ReadByteInternal(); var marker = ReadByteInternal();
var hasPropertyTable = false; var hasPropertyTable = false;
var hasInternFooter = false;
var footerPosition = 0;
if (marker == BinaryTypeCode.MetadataHeader) if (marker == BinaryTypeCode.MetadataHeader)
{ {
@ -130,14 +123,16 @@ public static partial class AcBinaryDeserializer
: hasOnlyId ? ReferenceHandlingMode.OnlyId : hasOnlyId ? ReferenceHandlingMode.OnlyId
: ReferenceHandlingMode.None; : ReferenceHandlingMode.None;
// Read footer position if flag is set // Read cache count if flag is set (marker-based format)
var hasFooterPosition = (flags & BinaryTypeCode.HeaderFlag_HasFooterPosition) != 0; var hasCacheCount = (flags & BinaryTypeCode.HeaderFlag_HasCacheCount) != 0;
if (hasFooterPosition) if (hasCacheCount)
{ {
EnsureAvailable(4); var cacheCount = (int)ReadVarUInt();
footerPosition = BinaryPrimitives.ReadInt32LittleEndian(_buffer.Slice(_position, 4)); if (cacheCount > 0)
_position += 4; {
hasInternFooter = footerPosition > 0; _internCache = ContextClass.RentInternCache(cacheCount);
ContextClass.SetInternCacheUsed(cacheCount);
}
} }
} }
else else
@ -148,58 +143,6 @@ public static partial class AcBinaryDeserializer
} }
HasMetadata = hasPropertyTable; HasMetadata = hasPropertyTable;
// Footer-based: read intern indices and metadata from footer
if (hasInternFooter && footerPosition > 0)
{
ReadFooterIndices(footerPosition);
}
}
/// <summary>
/// Reads intern footer: [dupCount][pos0][idx0][pos1][idx1]...
/// Shared for string interning AND IId object references.
/// VarUInt format read into flat int[] for fast hot path access.
/// Arrays are pooled via ContextClass for zero steady-state allocation.
/// </summary>
private void ReadFooterIndices(int footerPosition)
{
// Save current position (start of data)
var dataPosition = _position;
// Seek to footer
_position = footerPosition;
// Read dup count (intern entries)
var dupCount = (int)ReadVarUInt();
if (dupCount == 0)
{
_dupData = Array.Empty<int>();
_internCache = Array.Empty<object?>();
_nextDupPosition = int.MaxValue;
}
else
{
// Read VarUInt pairs into pooled flat int[]
var intCount = dupCount * 2;
_dupData = ContextClass.RentDupData(intCount);
for (var i = 0; i < dupCount; i++)
{
_dupData[i * 2] = (int)ReadVarUInt(); // position
_dupData[i * 2 + 1] = (int)ReadVarUInt(); // cacheIndex
}
_internCache = ContextClass.RentInternCache(dupCount);
ContextClass.SetInternCacheUsed(dupCount);
// Cache first dup position for ultra-fast hot path
_nextDupPosition = _dupData[0];
}
// Metadata is now inline in the body (not in footer).
// No ReadMetadataFooter() call needed.
// Seek back to data position
_position = dataPosition;
} }
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
@ -563,30 +506,14 @@ public static partial class AcBinaryDeserializer
} }
/// <summary> /// <summary>
/// Registers an interned value (string or IId object) during body read. /// Registers an interned value (string or object) in the cache.
/// Uses position-based check for 100% reliable cache matching. /// Called when StringInternFirst/ObjectRefFirst marker is encountered.
/// Ultra-fast: single int comparison in hot path. /// Sequential: values are registered in order (0, 1, 2, ...).
/// </summary> /// </summary>
/// <param name="value">The value read from stream (string or IId object)</param>
/// <param name="streamPosition">Stream position BEFORE reading the value (type code position)</param>
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public void RegisterInternedValue(object value, int streamPosition) public void RegisterNextInternedValue(object value)
{ {
// Ultra-fast hot path: single int comparison _internCache![_nextCacheIndex++] = value;
if (streamPosition != _nextDupPosition)
return;
// Match! Store in cache and advance to next dup position
// _dupData layout: [pos0, cacheIdx0, pos1, cacheIdx1, ...]
var data = _dupData!;
var idx = _dupCheckIndex;
_internCache![data[idx + 1]] = value; // cacheIndex is at odd positions
idx += 2;
_dupCheckIndex = idx;
_nextDupPosition = idx < data.Length
? data[idx] // next position is at even index
: int.MaxValue;
} }
/// <summary> /// <summary>

View File

@ -204,29 +204,27 @@ public static partial class AcBinaryDeserializer
return typeCode switch return typeCode switch
{ {
BinaryTypeCode.Null => null, BinaryTypeCode.Null => null,
BinaryTypeCode.Object => ReadObjectWithMapping(ref context, destType, indexMapping, depth), BinaryTypeCode.Object => ReadObjectWithMapping(ref context, destType, indexMapping, depth, registerInCache: false),
BinaryTypeCode.ObjectRefFirst => ReadObjectWithMapping(ref context, destType, indexMapping, depth, registerInCache: true),
_ => ReadValue(ref context, destType, depth) // Primitives, arrays, etc. use normal path _ => ReadValue(ref context, destType, depth) // Primitives, arrays, etc. use normal path
}; };
} }
/// <summary> /// <summary>
/// Reads an object using index mapping for property resolution. /// Reads an object using index mapping for property resolution.
/// Note: Object marker already consumed by caller.
/// </summary> /// </summary>
private static object? ReadObjectWithMapping(ref BinaryDeserializationContext context, Type destType, int[] indexMapping, int depth) private static object? ReadObjectWithMapping(ref BinaryDeserializationContext context, Type destType, int[] indexMapping, int depth, bool registerInCache)
{ {
// Note: streamPosition captured before Object type code (already consumed by caller)
var streamPosition = context.Position - 1;
var wrapper = context.ContextClass.GetWrapper(destType); var wrapper = context.ContextClass.GetWrapper(destType);
var metadata = wrapper.Metadata; var metadata = wrapper.Metadata;
// Wire format: [Object][props...] - no refId prefix in new format
var obj = CreateInstance(destType, metadata); var obj = CreateInstance(destType, metadata);
if (obj != null) if (obj != null)
{ {
// Register in shared intern cache BEFORE populate (position-based sequential check) if (registerInCache)
if (context.ContextClass.UseTypeReferenceHandling(metadata))
{ {
context.RegisterInternedValue(obj, streamPosition); context.RegisterNextInternedValue(obj);
} }
PopulateObjectWithMapping(ref context, obj, destType, indexMapping, depth); PopulateObjectWithMapping(ref context, obj, destType, indexMapping, depth);
@ -318,21 +316,19 @@ public static partial class AcBinaryDeserializer
} }
// Handle nested complex objects - reuse existing if available // Handle nested complex objects - reuse existing if available
if (peekCode == BinaryTypeCode.Object && propInfo.IsComplexType) if ((peekCode == BinaryTypeCode.Object || peekCode == BinaryTypeCode.ObjectRefFirst) && propInfo.IsComplexType)
{ {
var existingObj = propInfo.GetValue(target); var existingObj = propInfo.GetValue(target);
if (existingObj != null) if (existingObj != null)
{ {
var objStreamPos = context.Position; // position of Object type code var registerInCache = peekCode == BinaryTypeCode.ObjectRefFirst;
context.ReadByte(); // consume Object marker context.ReadByte(); // consume Object/ObjectRefFirst marker
// Register in shared intern cache BEFORE populate (position-based sequential check) if (registerInCache)
if (context.ContextClass.UseTypeReferenceHandling(wrapper.Metadata))
{ {
context.RegisterInternedValue(existingObj, objStreamPos); context.RegisterNextInternedValue(existingObj);
} }
// Wire format: [Object][props...] - no refId prefix in new format
PopulateObjectCore(ref context, existingObj, wrapper, nextDepth, skipDefaultWrite: false); PopulateObjectCore(ref context, existingObj, wrapper, nextDepth, skipDefaultWrite: false);
return; return;
} }

View File

@ -60,19 +60,18 @@ public static partial class AcBinaryDeserializer
RegisterReader(BinaryTypeCode.String, static (ref BinaryDeserializationContext ctx, Type _, int _) => ReadPlainString(ref ctx)); RegisterReader(BinaryTypeCode.String, static (ref BinaryDeserializationContext ctx, Type _, int _) => ReadPlainString(ref ctx));
RegisterReader(BinaryTypeCode.StringInterned, static (ref BinaryDeserializationContext ctx, Type _, int _) => ctx.GetInternedString((int)ctx.ReadVarUInt())); RegisterReader(BinaryTypeCode.StringInterned, static (ref BinaryDeserializationContext ctx, Type _, int _) => ctx.GetInternedString((int)ctx.ReadVarUInt()));
RegisterReader(BinaryTypeCode.StringEmpty, static (ref BinaryDeserializationContext _, Type _, int _) => string.Empty); RegisterReader(BinaryTypeCode.StringEmpty, static (ref BinaryDeserializationContext _, Type _, int _) => string.Empty);
// StringInternNew: position is captured as Position-1 (after type code was read) // StringInternFirst: first occurrence of interned string - read content + register in cache
RegisterReader(BinaryTypeCode.StringInternNew, static (ref BinaryDeserializationContext ctx, Type _, int _) => RegisterReader(BinaryTypeCode.StringInternFirst, static (ref BinaryDeserializationContext ctx, Type _, int _) =>
{ ReadAndRegisterInternedString(ref ctx));
var streamPosition = ctx.Position - 1; // Position before type code
return ReadAndRegisterInternedString(ref ctx, streamPosition);
});
RegisterReader(BinaryTypeCode.DateTime, static (ref BinaryDeserializationContext ctx, Type _, int _) => ctx.ReadDateTimeUnsafe()); RegisterReader(BinaryTypeCode.DateTime, static (ref BinaryDeserializationContext ctx, Type _, int _) => ctx.ReadDateTimeUnsafe());
RegisterReader(BinaryTypeCode.DateTimeOffset, static (ref BinaryDeserializationContext ctx, Type _, int _) => ctx.ReadDateTimeOffsetUnsafe()); RegisterReader(BinaryTypeCode.DateTimeOffset, static (ref BinaryDeserializationContext ctx, Type _, int _) => ctx.ReadDateTimeOffsetUnsafe());
RegisterReader(BinaryTypeCode.TimeSpan, static (ref BinaryDeserializationContext ctx, Type _, int _) => ctx.ReadTimeSpanUnsafe()); RegisterReader(BinaryTypeCode.TimeSpan, static (ref BinaryDeserializationContext ctx, Type _, int _) => ctx.ReadTimeSpanUnsafe());
RegisterReader(BinaryTypeCode.Guid, static (ref BinaryDeserializationContext ctx, Type _, int _) => ctx.ReadGuidUnsafe()); RegisterReader(BinaryTypeCode.Guid, static (ref BinaryDeserializationContext ctx, Type _, int _) => ctx.ReadGuidUnsafe());
RegisterReader(BinaryTypeCode.Enum, static (ref BinaryDeserializationContext ctx, Type type, int _) => ReadEnumValue(ref ctx, type)); RegisterReader(BinaryTypeCode.Enum, static (ref BinaryDeserializationContext ctx, Type type, int _) => ReadEnumValue(ref ctx, type));
RegisterReader(BinaryTypeCode.Object, ReadObject); RegisterReader(BinaryTypeCode.Object, ReadObject);
RegisterReader(BinaryTypeCode.ObjectRefFirst, ReadObjectRefFirst);
RegisterReader(BinaryTypeCode.ObjectWithMetadata, ReadObjectWithMetadata); RegisterReader(BinaryTypeCode.ObjectWithMetadata, ReadObjectWithMetadata);
RegisterReader(BinaryTypeCode.ObjectWithMetadataRefFirst, ReadObjectWithMetadataRefFirst);
RegisterReader(BinaryTypeCode.ObjectRef, ReadObjectRef); RegisterReader(BinaryTypeCode.ObjectRef, ReadObjectRef);
RegisterReader(BinaryTypeCode.Array, ReadArray); RegisterReader(BinaryTypeCode.Array, ReadArray);
RegisterReader(BinaryTypeCode.Dictionary, ReadDictionary); RegisterReader(BinaryTypeCode.Dictionary, ReadDictionary);
@ -799,16 +798,15 @@ public static partial class AcBinaryDeserializer
} }
/// <summary> /// <summary>
/// Read new interned string and register it in the intern cache. /// Read interned string (StringInternFirst marker) and register in cache.
/// Position is captured BEFORE the type code was read (by caller).
/// </summary> /// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
private static string ReadAndRegisterInternedString(ref BinaryDeserializationContext context, int streamPosition) private static string ReadAndRegisterInternedString(ref BinaryDeserializationContext context)
{ {
var length = (int)context.ReadVarUInt(); var length = (int)context.ReadVarUInt();
if (length == 0) return string.Empty; if (length == 0) return string.Empty;
var str = context.ReadStringUtf8(length); var str = context.ReadStringUtf8(length);
context.RegisterInternedValue(str, streamPosition); context.RegisterNextInternedValue(str);
return str; return str;
} }
@ -966,15 +964,29 @@ public static partial class AcBinaryDeserializer
} }
/// <summary> /// <summary>
/// Root object olvasása. /// Object olvasása (nem tracked, vagy UseMetadata nélkül).
/// Wire format: [Object][props 0-tól...] - Id a props-ban, nincs extra /// Wire format: [Object][props...]
/// UseMetadata esetén a root = footer entry 0 (nincs footer index a body-ban).
/// </summary> /// </summary>
private static object? ReadObject(ref BinaryDeserializationContext context, Type targetType, int depth) private static object? ReadObject(ref BinaryDeserializationContext context, Type targetType, int depth)
{ {
// Capture stream position of the Object type code (already consumed) return ReadObjectCore(ref context, targetType, depth, registerInCache: false);
var streamPosition = context.Position - 1; }
/// <summary>
/// Object olvasása első előforduláskor (ObjectRefFirst marker).
/// Wire format: [ObjectRefFirst][props...]
/// Az objektumot regisztráljuk a cache-be.
/// </summary>
private static object? ReadObjectRefFirst(ref BinaryDeserializationContext context, Type targetType, int depth)
{
return ReadObjectCore(ref context, targetType, depth, registerInCache: true);
}
/// <summary>
/// Object olvasás core implementáció.
/// </summary>
private static object? ReadObjectCore(ref BinaryDeserializationContext context, Type targetType, int depth, bool registerInCache)
{
// Handle dictionary types // Handle dictionary types
if (IsDictionaryType(targetType, out var keyType, out var valueType)) if (IsDictionaryType(targetType, out var keyType, out var valueType))
{ {
@ -987,12 +999,11 @@ public static partial class AcBinaryDeserializer
var instance = CreateInstance(targetType, metadata); var instance = CreateInstance(targetType, metadata);
if (instance == null) return null; if (instance == null) return null;
if (context.ContextClass.UseTypeReferenceHandling(metadata)) if (registerInCache)
{ {
context.RegisterInternedValue(instance, streamPosition); context.RegisterNextInternedValue(instance);
} }
// UseMetadata: root object is now ObjectWithMetadata marker — no footer entry 0 handling needed here.
PopulateObject(ref context, instance, wrapper, depth, skipDefaultWrite: true); PopulateObject(ref context, instance, wrapper, depth, skipDefaultWrite: true);
// ChainMode kezelés // ChainMode kezelés
@ -1014,17 +1025,30 @@ public static partial class AcBinaryDeserializer
} }
/// <summary> /// <summary>
/// Object olvasása UseMetadata módban (inline metadata). /// Object olvasása UseMetadata módban (nem tracked).
/// Wire format: /// Wire format:
/// Első előfordulás: [ObjectWithMetadata][propNameHash (4b)][propCount (VarUInt)][hash0 (4b)][hash1]...[props...] /// Első előfordulás: [ObjectWithMetadata][propNameHash (4b)][propCount (VarUInt)][hash0..N][props...]
/// Ismételt: [ObjectWithMetadata][propNameHash (4b)][props...] /// Ismételt: [ObjectWithMetadata][propNameHash (4b)][props...]
/// A propNameHash-ből a ContextClass megkeresi a source hash-eket (lineáris, kis tömb).
/// Ha nincs találat → első előfordulás, beolvassuk a hash-eket inline-ból.
/// </summary> /// </summary>
private static object? ReadObjectWithMetadata(ref BinaryDeserializationContext context, Type targetType, int depth) private static object? ReadObjectWithMetadata(ref BinaryDeserializationContext context, Type targetType, int depth)
{ {
var streamPosition = context.Position - 1; return ReadObjectWithMetadataCore(ref context, targetType, depth, registerInCache: false);
}
/// <summary>
/// Object olvasása UseMetadata módban, első tracked előfordulás (ObjectWithMetadataRefFirst marker).
/// Az objektumot regisztráljuk a cache-be.
/// </summary>
private static object? ReadObjectWithMetadataRefFirst(ref BinaryDeserializationContext context, Type targetType, int depth)
{
return ReadObjectWithMetadataCore(ref context, targetType, depth, registerInCache: true);
}
/// <summary>
/// ObjectWithMetadata olvasás core implementáció.
/// </summary>
private static object? ReadObjectWithMetadataCore(ref BinaryDeserializationContext context, Type targetType, int depth, bool registerInCache)
{
// Inline metadata: propNameHash mindig jön // Inline metadata: propNameHash mindig jön
var propNameHash = context.ReadInt32Raw(); var propNameHash = context.ReadInt32Raw();
@ -1053,12 +1077,12 @@ public static partial class AcBinaryDeserializer
var instance = CreateInstance(targetType, metadata); var instance = CreateInstance(targetType, metadata);
if (instance == null) return null; if (instance == null) return null;
if (context.ContextClass.UseTypeReferenceHandling(metadata)) if (registerInCache)
{ {
context.RegisterInternedValue(instance, streamPosition); context.RegisterNextInternedValue(instance);
} }
// CacheMap felépítése ha még nincs (1x per target type × source type kombináció) // CacheMap felépítése ha még nincs
if (wrapper.CacheMap == null) if (wrapper.CacheMap == null)
BuildCacheMap(wrapper, sourceHashes); BuildCacheMap(wrapper, sourceHashes);
@ -1362,8 +1386,6 @@ public static partial class AcBinaryDeserializer
private static void SkipValue(ref BinaryDeserializationContext context, BinaryDeserializeTypeMetadata metaData) private static void SkipValue(ref BinaryDeserializationContext context, BinaryDeserializeTypeMetadata metaData)
{ {
// Capture position before reading type code (needed for string interning)
var streamPosition = context.Position;
var typeCode = context.ReadByte(); var typeCode = context.ReadByte();
if (typeCode == BinaryTypeCode.Null) return; if (typeCode == BinaryTypeCode.Null) return;
@ -1395,7 +1417,7 @@ public static partial class AcBinaryDeserializer
context.Skip(2); context.Skip(2);
return; return;
case BinaryTypeCode.Int32: case BinaryTypeCode.Int32:
context.ReadVarInt(); // Skip VarInt context.ReadVarInt();
return; return;
case BinaryTypeCode.UInt32: case BinaryTypeCode.UInt32:
context.ReadVarUInt(); context.ReadVarUInt();
@ -1424,15 +1446,14 @@ public static partial class AcBinaryDeserializer
context.Skip(16); context.Skip(16);
return; return;
case BinaryTypeCode.String: case BinaryTypeCode.String:
// Sima string - nem regisztr<74>lunk
SkipPlainString(ref context); SkipPlainString(ref context);
return; return;
case BinaryTypeCode.StringInterned: case BinaryTypeCode.StringInterned:
context.ReadVarUInt(); context.ReadVarUInt();
return; return;
case BinaryTypeCode.StringInternNew: case BinaryTypeCode.StringInternFirst:
// New interned string - must register even when skipping // First occurrence - must register even when skipping
SkipAndRegisterInternedString(ref context, streamPosition); SkipAndRegisterInternedString(ref context);
return; return;
case BinaryTypeCode.ByteArray: case BinaryTypeCode.ByteArray:
var byteLen = (int)context.ReadVarUInt(); var byteLen = (int)context.ReadVarUInt();
@ -1446,11 +1467,17 @@ public static partial class AcBinaryDeserializer
case BinaryTypeCode.Object: case BinaryTypeCode.Object:
SkipObject(ref context, metaData); SkipObject(ref context, metaData);
return; return;
case BinaryTypeCode.ObjectRefFirst:
SkipObjectRefFirst(ref context, metaData);
return;
case BinaryTypeCode.ObjectWithMetadata: case BinaryTypeCode.ObjectWithMetadata:
SkipObjectWithMetadata(ref context, metaData); SkipObjectWithMetadata(ref context, metaData, registerInCache: false);
return;
case BinaryTypeCode.ObjectWithMetadataRefFirst:
SkipObjectWithMetadata(ref context, metaData, registerInCache: true);
return; return;
case BinaryTypeCode.ObjectRef: case BinaryTypeCode.ObjectRef:
context.ReadVarInt(); context.ReadVarUInt();
return; return;
case BinaryTypeCode.Array: case BinaryTypeCode.Array:
SkipArray(ref context, metaData); SkipArray(ref context, metaData);
@ -1475,17 +1502,25 @@ public static partial class AcBinaryDeserializer
} }
/// <summary> /// <summary>
/// Skip a new interned string - must still register in cache. /// Skip an interned string (StringInternFirst) - must still register in cache.
/// </summary> /// </summary>
/// <param name="context">Deserialization context</param>
/// <param name="streamPosition">Position before the type code was read</param>
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void SkipAndRegisterInternedString(ref BinaryDeserializationContext context, int streamPosition) private static void SkipAndRegisterInternedString(ref BinaryDeserializationContext context)
{ {
var byteLen = (int)context.ReadVarUInt(); var byteLen = (int)context.ReadVarUInt();
if (byteLen == 0) return; if (byteLen == 0) return;
var str = context.ReadStringUtf8(byteLen); var str = context.ReadStringUtf8(byteLen);
context.RegisterInternedValue(str, streamPosition); context.RegisterNextInternedValue(str);
}
/// <summary>
/// Skip ObjectRefFirst - must register placeholder in cache.
/// </summary>
private static void SkipObjectRefFirst(ref BinaryDeserializationContext context, BinaryDeserializeTypeMetadata metaData)
{
// Register placeholder (stream position as boxed int for potential lazy load)
context.RegisterNextInternedValue(context.Position);
SkipObject(ref context, metaData);
} }
///// <summary> ///// <summary>
@ -1517,18 +1552,21 @@ public static partial class AcBinaryDeserializer
} }
/// <summary> /// <summary>
/// Skip ObjectWithMetadata: inline metadata-ból olvassuk a propCount-ot. /// Skip ObjectWithMetadata/ObjectWithMetadataRefFirst.
/// Ha az adott propNameHash-hez már van source hash → propCount onnan.
/// Ha első előfordulás → propCount + hash-ek a stream-ből.
/// </summary> /// </summary>
private static void SkipObjectWithMetadata(ref BinaryDeserializationContext context, BinaryDeserializeTypeMetadata metaData) private static void SkipObjectWithMetadata(ref BinaryDeserializationContext context, BinaryDeserializeTypeMetadata metaData, bool registerInCache)
{ {
if (registerInCache)
{
// Register placeholder for potential lazy load
context.RegisterNextInternedValue(context.Position);
}
var propNameHash = context.ReadInt32Raw(); var propNameHash = context.ReadInt32Raw();
var sourceHashes = context.ContextClass.FindSourceHashes(propNameHash); var sourceHashes = context.ContextClass.FindSourceHashes(propNameHash);
if (sourceHashes == null) if (sourceHashes == null)
{ {
// Első előfordulás: propCount + hash-ek jönnek a stream-ben
var propCount = (int)context.ReadVarUInt(); var propCount = (int)context.ReadVarUInt();
sourceHashes = new int[propCount]; sourceHashes = new int[propCount];
for (var i = 0; i < propCount; i++) for (var i = 0; i < propCount; i++)

View File

@ -119,13 +119,9 @@ public static partial class AcBinarySerializer
// These properties delegate to Options for convenience // These properties delegate to Options for convenience
public bool UseStringInterning => Options.UseStringInterning != StringInterningMode.None; public bool UseStringInterning => Options.UseStringInterning != StringInterningMode.None;
/// <summary> /// <summary>
/// True if we need footer position in header (string interning OR reference handling OR metadata). /// True if we have interning/ref tracking (cache count needed in header).
/// </summary> /// </summary>
/// <summary> public bool HasCaching => UseStringInterning || ReferenceHandling != ReferenceHandlingMode.None;
/// True if we need footer position in header (string interning OR reference handling).
/// UseMetadata no longer uses footer — metadata is inline in the body.
/// </summary>
public bool HasFooter => UseStringInterning || ReferenceHandling != ReferenceHandlingMode.None;
public bool UseMetadata => Options.UseMetadata; public bool UseMetadata => Options.UseMetadata;
public byte MinStringInternLength => Options.MinStringInternLength; public byte MinStringInternLength => Options.MinStringInternLength;
public byte MaxStringInternLength => Options.MaxStringInternLength; public byte MaxStringInternLength => Options.MaxStringInternLength;
@ -222,14 +218,14 @@ public static partial class AcBinarySerializer
/// <summary> /// <summary>
/// Tries to intern a string. Returns true if string was seen before (write index). /// Tries to intern a string. Returns true if string was seen before (write index).
/// Returns false if first occurrence (write inline). /// Returns false if first occurrence (write inline).
/// Uses stream position for 100% reliable deserializer cache matching. /// Stores marker position for later rewriting (marker-based interning, no footer).
/// </summary> /// </summary>
/// <param name="value">The string value to intern</param> /// <param name="value">The string value to intern</param>
/// <param name="streamPosition">Current stream position (before writing the string)</param> /// <param name="markerPosition">Position of the type code marker (for rewriting)</param>
/// <param name="cacheIndex">Output: cache index for 2+ occurrence, -1 for 1st occurrence</param> /// <param name="cacheIndex">Output: cache index for 2+ occurrence, -1 for 1st occurrence</param>
/// <returns>True if 2+ occurrence (write cacheIndex), false if 1st occurrence (write inline)</returns> /// <returns>True if 2+ occurrence (write cacheIndex), false if 1st occurrence (write inline)</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public bool TryGetInternedString(string value, int streamPosition, out int cacheIndex) public bool TryGetInternedString(string value, int markerPosition, out int cacheIndex)
{ {
_stringInternMap ??= new IdentityMap<string, InternEntry>(); _stringInternMap ??= new IdentityMap<string, InternEntry>();
@ -245,9 +241,9 @@ public static partial class AcBinarySerializer
return true; return true;
} }
// 1st occurrence: store stream position // 1st occurrence: store marker position for later rewriting
ref var newEntry = ref _stringInternMap.GetValueRef(slotIndex); ref var newEntry = ref _stringInternMap.GetValueRef(slotIndex);
newEntry.StreamPosition = streamPosition; newEntry.StreamPosition = markerPosition;
newEntry.CacheIndex = -1; // Not assigned until 2nd occurrence newEntry.CacheIndex = -1; // Not assigned until 2nd occurrence
cacheIndex = -1; cacheIndex = -1;
return false; return false;
@ -259,23 +255,27 @@ public static partial class AcBinarySerializer
public bool HasInternedStrings => _stringInternMap != null && _stringInternMap.Count > 0; public bool HasInternedStrings => _stringInternMap != null && _stringInternMap.Count > 0;
/// <summary> /// <summary>
/// Gets the count of strings that occurred more than once (for footer). /// Gets the count of cached values (string intern + object ref that occurred more than once).
/// </summary> /// </summary>
public int GetDupCount() => _nextCacheIndex; public int GetCacheCount() => _nextCacheIndex;
/// <summary> /// <summary>
/// Writes the merged footer with (position, cacheIndex) pairs sorted by position. /// Rewrites markers for all entries with CacheIndex >= 0.
/// Collects entries from string interning AND ID tracking (all wrappers). /// Called at end of serialization to mark first occurrences of interned/tracked values.
/// VarUInt format for compact size, deserializer reads into flat int[]. /// String: String → StringInternFirst
/// Object: Object → ObjectRefFirst, ObjectWithMetadata → ObjectWithMetadataRefFirst
///
/// IMPORTANT: CacheIndex must be reassigned in StreamPosition order, because
/// deserializer registers values sequentially as it encounters *First markers.
/// </summary> /// </summary>
public void WriteInternedFooter() public void RewriteMarkers()
{ {
if (_nextCacheIndex == 0) return; if (_nextCacheIndex == 0) return;
// Collect ALL entries with CacheIndex >= 0 (string + ID, all occurred more than once) // Collect all first-occurrence positions that need markers
Span<(int Position, int CacheIndex)> entries = _nextCacheIndex <= 64 Span<int> positions = _nextCacheIndex <= 64
? stackalloc (int, int)[_nextCacheIndex] ? stackalloc int[_nextCacheIndex]
: new (int, int)[_nextCacheIndex]; : new int[_nextCacheIndex];
var idx = 0; var idx = 0;
@ -287,33 +287,41 @@ public static partial class AcBinarySerializer
{ {
ref var entry = ref _stringInternMap.GetValueRefAt(i); ref var entry = ref _stringInternMap.GetValueRefAt(i);
if (entry.CacheIndex >= 0) if (entry.CacheIndex >= 0)
entries[idx++] = (entry.StreamPosition, entry.CacheIndex); {
positions[idx++] = entry.StreamPosition;
}
} }
} }
// 2. ID tracking entries from all wrappers // 2. ID tracking entries from all wrappers
foreach (var wrapper in GetWrappers()) foreach (var wrapper in GetWrappers())
{ {
CollectInternEntries(wrapper.IdentityMapInt32, ref entries, ref idx); CollectPositions(wrapper.IdentityMapInt32, ref positions, ref idx);
CollectInternEntries(wrapper.IdentityMapInt64, ref entries, ref idx); CollectPositions(wrapper.IdentityMapInt64, ref positions, ref idx);
CollectInternEntries(wrapper.IdentityMapGuid, ref entries, ref idx); CollectPositions(wrapper.IdentityMapGuid, ref positions, ref idx);
} }
// Sort by StreamPosition (ascending) for deserializer sequential check // Sort by position to match deserializer's sequential registration order
var usedEntries = entries.Slice(0, idx); var usedPositions = positions.Slice(0, idx);
usedEntries.Sort((a, b) => a.Position.CompareTo(b.Position)); usedPositions.Sort();
// Write pairs as VarUInt for compact size // Rewrite markers at sorted positions
for (var i = 0; i < idx; i++) for (var i = 0; i < idx; i++)
{ {
WriteVarUInt((uint)usedEntries[i].Position); var pos = usedPositions[i];
WriteVarUInt((uint)usedEntries[i].CacheIndex); var currentMarker = _buffer[pos];
_buffer[pos] = currentMarker switch
{
BinaryTypeCode.String => BinaryTypeCode.StringInternFirst,
BinaryTypeCode.Object => BinaryTypeCode.ObjectRefFirst,
BinaryTypeCode.ObjectWithMetadata => BinaryTypeCode.ObjectWithMetadataRefFirst,
_ => currentMarker
};
} }
} }
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void CollectInternEntries<TKey>(IdentityMap<TKey, InternEntry>? map, private static void CollectPositions<TKey>(IdentityMap<TKey, InternEntry>? map, ref Span<int> positions, ref int idx) where TKey : notnull
ref Span<(int Position, int CacheIndex)> entries, ref int idx) where TKey : notnull
{ {
if (map == null) return; if (map == null) return;
var count = map.Count; var count = map.Count;
@ -321,7 +329,9 @@ public static partial class AcBinarySerializer
{ {
ref var entry = ref map.GetValueRefAt(i); ref var entry = ref map.GetValueRefAt(i);
if (entry.CacheIndex >= 0) if (entry.CacheIndex >= 0)
entries[idx++] = (entry.StreamPosition, entry.CacheIndex); {
positions[idx++] = entry.StreamPosition;
}
} }
} }
@ -1007,59 +1017,54 @@ public static partial class AcBinarySerializer
private int _headerPosition; private int _headerPosition;
// Footer-based string interning: no estimation or shifting needed // Marker-based interning: no footer needed
// Header: [version][flags][footerPosition (4 bytes, only if string interning)] // Header: [version][flags][cacheCount (VarUInt, if caching enabled)]
// Body: data with StringInterned indices // Body: data with markers (StringInternFirst, ObjectRefFirst, etc.)
// Footer: interned strings table
public void WriteHeaderPlaceholder() public void WriteHeaderPlaceholder()
{ {
// Header layout: // Header layout:
// [0] version (1 byte) // [0] version (1 byte)
// [1] flags (1 byte) // [1] flags (1 byte)
// [2-5] footer position (4 bytes, if footer is needed) // [2+] cache count (VarUInt, max 5 bytes, if caching enabled)
EnsureCapacity(HasFooter ? 6 : 2); EnsureCapacity(HasCaching ? 7 : 2);
_headerPosition = _position; _headerPosition = _position;
_position += HasFooter ? 6 : 2; _position += HasCaching ? 7 : 2; // Reserve max VarUInt size
} }
public void FinalizeHeaderSections() public void FinalizeHeaderSections()
{ {
var dupCount = GetDupCount(); // Shared counter: string intern + ID tracking var cacheCount = GetCacheCount();
var hasInternTable = dupCount > 0;
// Footer: write merged intern entries (string + ID) // Rewrite markers for first occurrences (String→StringInternFirst, Object→ObjectRefFirst, etc.)
// Metadata footer is no longer written here — metadata is inline in the body. RewriteMarkers();
var footerPosition = 0;
if (hasInternTable)
{
footerPosition = _position;
// Intern footer
WriteVarUInt((uint)dupCount);
WriteInternedFooter();
}
// Write header // Write header
var flags = BinaryTypeCode.HeaderFlagsBase; var flags = BinaryTypeCode.HeaderFlagsBase;
if (UseMetadata) if (UseMetadata)
flags |= BinaryTypeCode.HeaderFlag_Metadata; flags |= BinaryTypeCode.HeaderFlag_Metadata;
// Encode ReferenceHandlingMode using separate bits
if (ReferenceHandling == ReferenceHandlingMode.OnlyId) if (ReferenceHandling == ReferenceHandlingMode.OnlyId)
flags |= BinaryTypeCode.HeaderFlag_RefHandling_OnlyId; flags |= BinaryTypeCode.HeaderFlag_RefHandling_OnlyId;
else if (ReferenceHandling == ReferenceHandlingMode.All) else if (ReferenceHandling == ReferenceHandlingMode.All)
flags |= (byte)(BinaryTypeCode.HeaderFlag_RefHandling_OnlyId | BinaryTypeCode.HeaderFlag_RefHandling_All); flags |= (byte)(BinaryTypeCode.HeaderFlag_RefHandling_OnlyId | BinaryTypeCode.HeaderFlag_RefHandling_All);
// Set footer position flag if footer is needed if (HasCaching)
if (HasFooter) flags |= BinaryTypeCode.HeaderFlag_HasCacheCount;
flags |= BinaryTypeCode.HeaderFlag_HasFooterPosition;
_buffer[_headerPosition] = AcBinarySerializerOptions.FormatVersion; _buffer[_headerPosition] = AcBinarySerializerOptions.FormatVersion;
_buffer[_headerPosition + 1] = flags; _buffer[_headerPosition + 1] = flags;
// Write footer position if footer is needed // Write cache count and compact header if needed
if (HasFooter) if (HasCaching)
{ {
Unsafe.WriteUnaligned(ref _buffer[_headerPosition + 2], footerPosition); var headerEnd = WriteVarUIntAt(_headerPosition + 2, (uint)cacheCount);
var reserved = _headerPosition + 7;
if (headerEnd < reserved)
{
// Shift body left to remove unused header bytes
var shift = reserved - headerEnd;
_buffer.AsSpan(reserved, _position - reserved).CopyTo(_buffer.AsSpan(headerEnd));
_position -= shift;
}
} }
} }

View File

@ -707,7 +707,7 @@ public static partial class AcBinarySerializer
/// <summary> /// <summary>
/// Optimized string writer with FixStr for short strings. /// Optimized string writer with FixStr for short strings.
/// New interning strategy: inline on first occurrence, index on 2+. /// Marker-based interning: write String marker, rewrite to StringInternFirst at end if needed.
/// </summary> /// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void WriteString(string value, BinarySerializationContext context) private static void WriteString(string value, BinarySerializationContext context)
@ -724,9 +724,9 @@ public static partial class AcBinarySerializer
&& value.Length >= context.MinStringInternLength && value.Length >= context.MinStringInternLength
&& (context.MaxStringInternLength == 0 || value.Length <= context.MaxStringInternLength)) && (context.MaxStringInternLength == 0 || value.Length <= context.MaxStringInternLength))
{ {
// Capture position BEFORE writing - this is where deserializer will be when reading // Capture marker position BEFORE writing
var streamPosition = context.Position; var markerPosition = context.Position;
if (context.TryGetInternedString(value, streamPosition, out var index)) if (context.TryGetInternedString(value, markerPosition, out var index))
{ {
// 2+ occurrence: write index reference // 2+ occurrence: write index reference
context.WriteByte(BinaryTypeCode.StringInterned); context.WriteByte(BinaryTypeCode.StringInterned);
@ -736,8 +736,8 @@ public static partial class AcBinarySerializer
#if DEBUG #if DEBUG
context.OnStringInterned?.Invoke(context.CurrentPropertyPath, value); context.OnStringInterned?.Invoke(context.CurrentPropertyPath, value);
#endif #endif
// 1st occurrence: write inline with StringInternNew type code // 1st occurrence: write String marker (will be rewritten to StringInternFirst if repeated)
context.WriteByte(BinaryTypeCode.StringInternNew); context.WriteByte(BinaryTypeCode.String);
context.WriteStringUtf8(value); context.WriteStringUtf8(value);
return; return;
} }

View File

@ -28,10 +28,10 @@ internal static class BinaryTypeCode
public const byte Char = 14; public const byte Char = 14;
// String types (16-19) // String types (16-19)
public const byte String = 16; // Inline UTF8 string public const byte String = 16; // Inline UTF8 string (non-interned)
public const byte StringInterned = 17; // Reference to interned string by index public const byte StringInterned = 17; // Reference to interned string by index (2+ occurrence)
public const byte StringEmpty = 18; // Empty string marker public const byte StringEmpty = 18; // Empty string marker
public const byte StringInternNew = 19; // New interned string - full content + register in table public const byte StringInternFirst = 19; // First occurrence of interned string - read content + register in cache
// Date/Time types (20-23) // Date/Time types (20-23)
public const byte DateTime = 20; public const byte DateTime = 20;
@ -43,13 +43,17 @@ internal static class BinaryTypeCode
public const byte Enum = 24; public const byte Enum = 24;
// Complex types (25-31) // Complex types (25-31)
public const byte Object = 25; // Start of object public const byte Object = 25; // Start of object (non-tracked OR first occurrence when ref tracking)
public const byte ObjectEnd = 26; // End of object marker public const byte ObjectEnd = 26; // End of object marker
public const byte ObjectRef = 27; // Reference to previously serialized object public const byte ObjectRef = 27; // Reference to previously serialized object (2+ occurrence)
public const byte Array = 28; // Start of array/list public const byte Array = 28; // Start of array/list
public const byte Dictionary = 29; // Start of dictionary public const byte Dictionary = 29; // Start of dictionary
public const byte ByteArray = 30; // Optimized byte[] storage public const byte ByteArray = 30; // Optimized byte[] storage
public const byte ObjectWithMetadata = 31; // Object with metadata footer index (UseMetadata nested objects) public const byte ObjectWithMetadata = 31; // Object with metadata (UseMetadata mode, non-tracked OR first occurrence)
// Extended markers for first occurrence tracking (66-67, after FixStr range)
public const byte ObjectRefFirst = 66; // First occurrence of tracked object (ref handling enabled)
public const byte ObjectWithMetadataRefFirst = 67; // First occurrence of tracked object with metadata
// Special markers (32+, for header/meta) // Special markers (32+, for header/meta)
// Header flags byte structure (for values >= 64): // Header flags byte structure (for values >= 64):
@ -77,7 +81,8 @@ internal static class BinaryTypeCode
// None = both false, OnlyId = 0x02, All = 0x06 (both bits set) // None = both false, OnlyId = 0x02, All = 0x06 (both bits set)
public const byte HeaderFlag_RefHandling_OnlyId = 0x02; public const byte HeaderFlag_RefHandling_OnlyId = 0x02;
public const byte HeaderFlag_RefHandling_All = 0x04; public const byte HeaderFlag_RefHandling_All = 0x04;
public const byte HeaderFlag_HasFooterPosition = 0x08; // Bit 3: 4-byte footer position follows flags public const byte HeaderFlag_HasFooterPosition = 0x08; // Bit 3: 4-byte footer position follows flags (legacy)
public const byte HeaderFlag_HasCacheCount = 0x08; // Bit 3 (reused): VarUInt cache count follows flags (new marker-based format)
// Compact integer variants (for VarInt optimization) // Compact integer variants (for VarInt optimization)
public const byte Int32Tiny = 192; // -16 to 63 stored in single byte (value = code - 192 - 16) public const byte Int32Tiny = 192; // -16 to 63 stored in single byte (value = code - 192 - 16)