Switch to marker-based interned value serialization

Refactor interned string/object tracking from footer-based to marker-based format.
- Serializer rewrites type code markers for first occurrences (StringInternFirst, ObjectRefFirst, ObjectWithMetadataRefFirst).
- Header now stores VarUInt cache count instead of footer position.
- Deserializer registers interned values sequentially as markers are encountered, eliminating footer parsing.
- Simplified registration logic and updated BinaryTypeCode constants.
- Improves cache locality, format compactness, and performance.
This commit is contained in:
Loretta 2026-02-05 08:03:44 +01:00
parent 097c1e8efe
commit e8a0d36e43
6 changed files with 200 additions and 229 deletions

View File

@ -19,13 +19,10 @@ public static partial class AcBinaryDeserializer
private int _position;
private Dictionary<int, string>? _stringCache;
// Position-based interning: flat int[] for cache-friendly access
// Layout: [pos0, cacheIdx0, pos1, cacheIdx1, ...] - pairs sorted by position
// Shared for string interning AND IId object references
private int[]? _dupData; // Footer data: (position, cacheIndex) pairs as flat int array
// Marker-based interning: sequential cache (no footer needed)
// StringInternFirst/ObjectRefFirst markers register values in order
private object?[]? _internCache; // Shared cache for interned strings AND IId objects
private int _dupCheckIndex; // Current index in _dupData (increments by 2)
private int _nextDupPosition; // Cached next dup position - avoids array access in hot path
private int _nextCacheIndex; // Next index to assign when registering
/// <summary>
/// Heap-allocated context class for IId-based reference tracking.
@ -75,11 +72,9 @@ public static partial class AcBinaryDeserializer
_position = 0;
_stringCache = null;
// Position-based interning fields (shared: string + IId)
_dupData = null;
// Marker-based interning fields
_internCache = null;
_dupCheckIndex = 0;
_nextDupPosition = int.MaxValue;
_nextCacheIndex = 0;
HasMetadata = false;
IsMergeMode = false;
@ -107,8 +102,6 @@ public static partial class AcBinaryDeserializer
var marker = ReadByteInternal();
var hasPropertyTable = false;
var hasInternFooter = false;
var footerPosition = 0;
if (marker == BinaryTypeCode.MetadataHeader)
{
@ -130,14 +123,16 @@ public static partial class AcBinaryDeserializer
: hasOnlyId ? ReferenceHandlingMode.OnlyId
: ReferenceHandlingMode.None;
// Read footer position if flag is set
var hasFooterPosition = (flags & BinaryTypeCode.HeaderFlag_HasFooterPosition) != 0;
if (hasFooterPosition)
// Read cache count if flag is set (marker-based format)
var hasCacheCount = (flags & BinaryTypeCode.HeaderFlag_HasCacheCount) != 0;
if (hasCacheCount)
{
EnsureAvailable(4);
footerPosition = BinaryPrimitives.ReadInt32LittleEndian(_buffer.Slice(_position, 4));
_position += 4;
hasInternFooter = footerPosition > 0;
var cacheCount = (int)ReadVarUInt();
if (cacheCount > 0)
{
_internCache = ContextClass.RentInternCache(cacheCount);
ContextClass.SetInternCacheUsed(cacheCount);
}
}
}
else
@ -148,58 +143,6 @@ public static partial class AcBinaryDeserializer
}
HasMetadata = hasPropertyTable;
// Footer-based: read intern indices and metadata from footer
if (hasInternFooter && footerPosition > 0)
{
ReadFooterIndices(footerPosition);
}
}
/// <summary>
/// Reads intern footer: [dupCount][pos0][idx0][pos1][idx1]...
/// Shared for string interning AND IId object references.
/// VarUInt format read into flat int[] for fast hot path access.
/// Arrays are pooled via ContextClass for zero steady-state allocation.
/// </summary>
private void ReadFooterIndices(int footerPosition)
{
// Save current position (start of data)
var dataPosition = _position;
// Seek to footer
_position = footerPosition;
// Read dup count (intern entries)
var dupCount = (int)ReadVarUInt();
if (dupCount == 0)
{
_dupData = Array.Empty<int>();
_internCache = Array.Empty<object?>();
_nextDupPosition = int.MaxValue;
}
else
{
// Read VarUInt pairs into pooled flat int[]
var intCount = dupCount * 2;
_dupData = ContextClass.RentDupData(intCount);
for (var i = 0; i < dupCount; i++)
{
_dupData[i * 2] = (int)ReadVarUInt(); // position
_dupData[i * 2 + 1] = (int)ReadVarUInt(); // cacheIndex
}
_internCache = ContextClass.RentInternCache(dupCount);
ContextClass.SetInternCacheUsed(dupCount);
// Cache first dup position for ultra-fast hot path
_nextDupPosition = _dupData[0];
}
// Metadata is now inline in the body (not in footer).
// No ReadMetadataFooter() call needed.
// Seek back to data position
_position = dataPosition;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
@ -563,30 +506,14 @@ public static partial class AcBinaryDeserializer
}
/// <summary>
/// Registers an interned value (string or IId object) during body read.
/// Uses position-based check for 100% reliable cache matching.
/// Ultra-fast: single int comparison in hot path.
/// Registers an interned value (string or object) in the cache.
/// Called when StringInternFirst/ObjectRefFirst marker is encountered.
/// Sequential: values are registered in order (0, 1, 2, ...).
/// </summary>
/// <param name="value">The value read from stream (string or IId object)</param>
/// <param name="streamPosition">Stream position BEFORE reading the value (type code position)</param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void RegisterInternedValue(object value, int streamPosition)
public void RegisterNextInternedValue(object value)
{
// Ultra-fast hot path: single int comparison
if (streamPosition != _nextDupPosition)
return;
// Match! Store in cache and advance to next dup position
// _dupData layout: [pos0, cacheIdx0, pos1, cacheIdx1, ...]
var data = _dupData!;
var idx = _dupCheckIndex;
_internCache![data[idx + 1]] = value; // cacheIndex is at odd positions
idx += 2;
_dupCheckIndex = idx;
_nextDupPosition = idx < data.Length
? data[idx] // next position is at even index
: int.MaxValue;
_internCache![_nextCacheIndex++] = value;
}
/// <summary>

View File

@ -204,29 +204,27 @@ public static partial class AcBinaryDeserializer
return typeCode switch
{
BinaryTypeCode.Null => null,
BinaryTypeCode.Object => ReadObjectWithMapping(ref context, destType, indexMapping, depth),
BinaryTypeCode.Object => ReadObjectWithMapping(ref context, destType, indexMapping, depth, registerInCache: false),
BinaryTypeCode.ObjectRefFirst => ReadObjectWithMapping(ref context, destType, indexMapping, depth, registerInCache: true),
_ => ReadValue(ref context, destType, depth) // Primitives, arrays, etc. use normal path
};
}
/// <summary>
/// Reads an object using index mapping for property resolution.
/// Note: Object marker already consumed by caller.
/// </summary>
private static object? ReadObjectWithMapping(ref BinaryDeserializationContext context, Type destType, int[] indexMapping, int depth)
private static object? ReadObjectWithMapping(ref BinaryDeserializationContext context, Type destType, int[] indexMapping, int depth, bool registerInCache)
{
// Note: streamPosition captured before Object type code (already consumed by caller)
var streamPosition = context.Position - 1;
var wrapper = context.ContextClass.GetWrapper(destType);
var metadata = wrapper.Metadata;
// Wire format: [Object][props...] - no refId prefix in new format
var obj = CreateInstance(destType, metadata);
if (obj != null)
{
// Register in shared intern cache BEFORE populate (position-based sequential check)
if (context.ContextClass.UseTypeReferenceHandling(metadata))
if (registerInCache)
{
context.RegisterInternedValue(obj, streamPosition);
context.RegisterNextInternedValue(obj);
}
PopulateObjectWithMapping(ref context, obj, destType, indexMapping, depth);
@ -318,21 +316,19 @@ public static partial class AcBinaryDeserializer
}
// Handle nested complex objects - reuse existing if available
if (peekCode == BinaryTypeCode.Object && propInfo.IsComplexType)
if ((peekCode == BinaryTypeCode.Object || peekCode == BinaryTypeCode.ObjectRefFirst) && propInfo.IsComplexType)
{
var existingObj = propInfo.GetValue(target);
if (existingObj != null)
{
var objStreamPos = context.Position; // position of Object type code
context.ReadByte(); // consume Object marker
var registerInCache = peekCode == BinaryTypeCode.ObjectRefFirst;
context.ReadByte(); // consume Object/ObjectRefFirst marker
// Register in shared intern cache BEFORE populate (position-based sequential check)
if (context.ContextClass.UseTypeReferenceHandling(wrapper.Metadata))
if (registerInCache)
{
context.RegisterInternedValue(existingObj, objStreamPos);
context.RegisterNextInternedValue(existingObj);
}
// Wire format: [Object][props...] - no refId prefix in new format
PopulateObjectCore(ref context, existingObj, wrapper, nextDepth, skipDefaultWrite: false);
return;
}

View File

@ -60,19 +60,18 @@ public static partial class AcBinaryDeserializer
RegisterReader(BinaryTypeCode.String, static (ref BinaryDeserializationContext ctx, Type _, int _) => ReadPlainString(ref ctx));
RegisterReader(BinaryTypeCode.StringInterned, static (ref BinaryDeserializationContext ctx, Type _, int _) => ctx.GetInternedString((int)ctx.ReadVarUInt()));
RegisterReader(BinaryTypeCode.StringEmpty, static (ref BinaryDeserializationContext _, Type _, int _) => string.Empty);
// StringInternNew: position is captured as Position-1 (after type code was read)
RegisterReader(BinaryTypeCode.StringInternNew, static (ref BinaryDeserializationContext ctx, Type _, int _) =>
{
var streamPosition = ctx.Position - 1; // Position before type code
return ReadAndRegisterInternedString(ref ctx, streamPosition);
});
// StringInternFirst: first occurrence of interned string - read content + register in cache
RegisterReader(BinaryTypeCode.StringInternFirst, static (ref BinaryDeserializationContext ctx, Type _, int _) =>
ReadAndRegisterInternedString(ref ctx));
RegisterReader(BinaryTypeCode.DateTime, static (ref BinaryDeserializationContext ctx, Type _, int _) => ctx.ReadDateTimeUnsafe());
RegisterReader(BinaryTypeCode.DateTimeOffset, static (ref BinaryDeserializationContext ctx, Type _, int _) => ctx.ReadDateTimeOffsetUnsafe());
RegisterReader(BinaryTypeCode.TimeSpan, static (ref BinaryDeserializationContext ctx, Type _, int _) => ctx.ReadTimeSpanUnsafe());
RegisterReader(BinaryTypeCode.Guid, static (ref BinaryDeserializationContext ctx, Type _, int _) => ctx.ReadGuidUnsafe());
RegisterReader(BinaryTypeCode.Enum, static (ref BinaryDeserializationContext ctx, Type type, int _) => ReadEnumValue(ref ctx, type));
RegisterReader(BinaryTypeCode.Object, ReadObject);
RegisterReader(BinaryTypeCode.ObjectRefFirst, ReadObjectRefFirst);
RegisterReader(BinaryTypeCode.ObjectWithMetadata, ReadObjectWithMetadata);
RegisterReader(BinaryTypeCode.ObjectWithMetadataRefFirst, ReadObjectWithMetadataRefFirst);
RegisterReader(BinaryTypeCode.ObjectRef, ReadObjectRef);
RegisterReader(BinaryTypeCode.Array, ReadArray);
RegisterReader(BinaryTypeCode.Dictionary, ReadDictionary);
@ -799,16 +798,15 @@ public static partial class AcBinaryDeserializer
}
/// <summary>
/// Read new interned string and register it in the intern cache.
/// Position is captured BEFORE the type code was read (by caller).
/// Read interned string (StringInternFirst marker) and register in cache.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static string ReadAndRegisterInternedString(ref BinaryDeserializationContext context, int streamPosition)
private static string ReadAndRegisterInternedString(ref BinaryDeserializationContext context)
{
var length = (int)context.ReadVarUInt();
if (length == 0) return string.Empty;
var str = context.ReadStringUtf8(length);
context.RegisterInternedValue(str, streamPosition);
context.RegisterNextInternedValue(str);
return str;
}
@ -966,15 +964,29 @@ public static partial class AcBinaryDeserializer
}
/// <summary>
/// Root object olvasása.
/// Wire format: [Object][props 0-tól...] - Id a props-ban, nincs extra
/// UseMetadata esetén a root = footer entry 0 (nincs footer index a body-ban).
/// Object olvasása (nem tracked, vagy UseMetadata nélkül).
/// Wire format: [Object][props...]
/// </summary>
private static object? ReadObject(ref BinaryDeserializationContext context, Type targetType, int depth)
{
// Capture stream position of the Object type code (already consumed)
var streamPosition = context.Position - 1;
return ReadObjectCore(ref context, targetType, depth, registerInCache: false);
}
/// <summary>
/// Object olvasása első előforduláskor (ObjectRefFirst marker).
/// Wire format: [ObjectRefFirst][props...]
/// Az objektumot regisztráljuk a cache-be.
/// </summary>
private static object? ReadObjectRefFirst(ref BinaryDeserializationContext context, Type targetType, int depth)
{
return ReadObjectCore(ref context, targetType, depth, registerInCache: true);
}
/// <summary>
/// Object olvasás core implementáció.
/// </summary>
private static object? ReadObjectCore(ref BinaryDeserializationContext context, Type targetType, int depth, bool registerInCache)
{
// Handle dictionary types
if (IsDictionaryType(targetType, out var keyType, out var valueType))
{
@ -987,12 +999,11 @@ public static partial class AcBinaryDeserializer
var instance = CreateInstance(targetType, metadata);
if (instance == null) return null;
if (context.ContextClass.UseTypeReferenceHandling(metadata))
if (registerInCache)
{
context.RegisterInternedValue(instance, streamPosition);
context.RegisterNextInternedValue(instance);
}
// UseMetadata: root object is now ObjectWithMetadata marker — no footer entry 0 handling needed here.
PopulateObject(ref context, instance, wrapper, depth, skipDefaultWrite: true);
// ChainMode kezelés
@ -1014,17 +1025,30 @@ public static partial class AcBinaryDeserializer
}
/// <summary>
/// Object olvasása UseMetadata módban (inline metadata).
/// Object olvasása UseMetadata módban (nem tracked).
/// Wire format:
/// Első előfordulás: [ObjectWithMetadata][propNameHash (4b)][propCount (VarUInt)][hash0 (4b)][hash1]...[props...]
/// Első előfordulás: [ObjectWithMetadata][propNameHash (4b)][propCount (VarUInt)][hash0..N][props...]
/// Ismételt: [ObjectWithMetadata][propNameHash (4b)][props...]
/// A propNameHash-ből a ContextClass megkeresi a source hash-eket (lineáris, kis tömb).
/// Ha nincs találat → első előfordulás, beolvassuk a hash-eket inline-ból.
/// </summary>
private static object? ReadObjectWithMetadata(ref BinaryDeserializationContext context, Type targetType, int depth)
{
var streamPosition = context.Position - 1;
return ReadObjectWithMetadataCore(ref context, targetType, depth, registerInCache: false);
}
/// <summary>
/// Object olvasása UseMetadata módban, első tracked előfordulás (ObjectWithMetadataRefFirst marker).
/// Az objektumot regisztráljuk a cache-be.
/// </summary>
private static object? ReadObjectWithMetadataRefFirst(ref BinaryDeserializationContext context, Type targetType, int depth)
{
return ReadObjectWithMetadataCore(ref context, targetType, depth, registerInCache: true);
}
/// <summary>
/// ObjectWithMetadata olvasás core implementáció.
/// </summary>
private static object? ReadObjectWithMetadataCore(ref BinaryDeserializationContext context, Type targetType, int depth, bool registerInCache)
{
// Inline metadata: propNameHash mindig jön
var propNameHash = context.ReadInt32Raw();
@ -1053,12 +1077,12 @@ public static partial class AcBinaryDeserializer
var instance = CreateInstance(targetType, metadata);
if (instance == null) return null;
if (context.ContextClass.UseTypeReferenceHandling(metadata))
if (registerInCache)
{
context.RegisterInternedValue(instance, streamPosition);
context.RegisterNextInternedValue(instance);
}
// CacheMap felépítése ha még nincs (1x per target type × source type kombináció)
// CacheMap felépítése ha még nincs
if (wrapper.CacheMap == null)
BuildCacheMap(wrapper, sourceHashes);
@ -1362,8 +1386,6 @@ public static partial class AcBinaryDeserializer
private static void SkipValue(ref BinaryDeserializationContext context, BinaryDeserializeTypeMetadata metaData)
{
// Capture position before reading type code (needed for string interning)
var streamPosition = context.Position;
var typeCode = context.ReadByte();
if (typeCode == BinaryTypeCode.Null) return;
@ -1395,7 +1417,7 @@ public static partial class AcBinaryDeserializer
context.Skip(2);
return;
case BinaryTypeCode.Int32:
context.ReadVarInt(); // Skip VarInt
context.ReadVarInt();
return;
case BinaryTypeCode.UInt32:
context.ReadVarUInt();
@ -1424,15 +1446,14 @@ public static partial class AcBinaryDeserializer
context.Skip(16);
return;
case BinaryTypeCode.String:
// Sima string - nem regisztr<74>lunk
SkipPlainString(ref context);
return;
case BinaryTypeCode.StringInterned:
context.ReadVarUInt();
return;
case BinaryTypeCode.StringInternNew:
// New interned string - must register even when skipping
SkipAndRegisterInternedString(ref context, streamPosition);
case BinaryTypeCode.StringInternFirst:
// First occurrence - must register even when skipping
SkipAndRegisterInternedString(ref context);
return;
case BinaryTypeCode.ByteArray:
var byteLen = (int)context.ReadVarUInt();
@ -1446,11 +1467,17 @@ public static partial class AcBinaryDeserializer
case BinaryTypeCode.Object:
SkipObject(ref context, metaData);
return;
case BinaryTypeCode.ObjectRefFirst:
SkipObjectRefFirst(ref context, metaData);
return;
case BinaryTypeCode.ObjectWithMetadata:
SkipObjectWithMetadata(ref context, metaData);
SkipObjectWithMetadata(ref context, metaData, registerInCache: false);
return;
case BinaryTypeCode.ObjectWithMetadataRefFirst:
SkipObjectWithMetadata(ref context, metaData, registerInCache: true);
return;
case BinaryTypeCode.ObjectRef:
context.ReadVarInt();
context.ReadVarUInt();
return;
case BinaryTypeCode.Array:
SkipArray(ref context, metaData);
@ -1475,17 +1502,25 @@ public static partial class AcBinaryDeserializer
}
/// <summary>
/// Skip a new interned string - must still register in cache.
/// Skip an interned string (StringInternFirst) - must still register in cache.
/// </summary>
/// <param name="context">Deserialization context</param>
/// <param name="streamPosition">Position before the type code was read</param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void SkipAndRegisterInternedString(ref BinaryDeserializationContext context, int streamPosition)
private static void SkipAndRegisterInternedString(ref BinaryDeserializationContext context)
{
var byteLen = (int)context.ReadVarUInt();
if (byteLen == 0) return;
var str = context.ReadStringUtf8(byteLen);
context.RegisterInternedValue(str, streamPosition);
context.RegisterNextInternedValue(str);
}
/// <summary>
/// Skip ObjectRefFirst - must register placeholder in cache.
/// </summary>
private static void SkipObjectRefFirst(ref BinaryDeserializationContext context, BinaryDeserializeTypeMetadata metaData)
{
// Register placeholder (stream position as boxed int for potential lazy load)
context.RegisterNextInternedValue(context.Position);
SkipObject(ref context, metaData);
}
///// <summary>
@ -1517,18 +1552,21 @@ public static partial class AcBinaryDeserializer
}
/// <summary>
/// Skip ObjectWithMetadata: inline metadata-ból olvassuk a propCount-ot.
/// Ha az adott propNameHash-hez már van source hash → propCount onnan.
/// Ha első előfordulás → propCount + hash-ek a stream-ből.
/// Skip ObjectWithMetadata/ObjectWithMetadataRefFirst.
/// </summary>
private static void SkipObjectWithMetadata(ref BinaryDeserializationContext context, BinaryDeserializeTypeMetadata metaData)
private static void SkipObjectWithMetadata(ref BinaryDeserializationContext context, BinaryDeserializeTypeMetadata metaData, bool registerInCache)
{
if (registerInCache)
{
// Register placeholder for potential lazy load
context.RegisterNextInternedValue(context.Position);
}
var propNameHash = context.ReadInt32Raw();
var sourceHashes = context.ContextClass.FindSourceHashes(propNameHash);
if (sourceHashes == null)
{
// Első előfordulás: propCount + hash-ek jönnek a stream-ben
var propCount = (int)context.ReadVarUInt();
sourceHashes = new int[propCount];
for (var i = 0; i < propCount; i++)

View File

@ -119,13 +119,9 @@ public static partial class AcBinarySerializer
// These properties delegate to Options for convenience
public bool UseStringInterning => Options.UseStringInterning != StringInterningMode.None;
/// <summary>
/// True if we need footer position in header (string interning OR reference handling OR metadata).
/// True if we have interning/ref tracking (cache count needed in header).
/// </summary>
/// <summary>
/// True if we need footer position in header (string interning OR reference handling).
/// UseMetadata no longer uses footer — metadata is inline in the body.
/// </summary>
public bool HasFooter => UseStringInterning || ReferenceHandling != ReferenceHandlingMode.None;
public bool HasCaching => UseStringInterning || ReferenceHandling != ReferenceHandlingMode.None;
public bool UseMetadata => Options.UseMetadata;
public byte MinStringInternLength => Options.MinStringInternLength;
public byte MaxStringInternLength => Options.MaxStringInternLength;
@ -222,14 +218,14 @@ public static partial class AcBinarySerializer
/// <summary>
/// Tries to intern a string. Returns true if string was seen before (write index).
/// Returns false if first occurrence (write inline).
/// Uses stream position for 100% reliable deserializer cache matching.
/// Stores marker position for later rewriting (marker-based interning, no footer).
/// </summary>
/// <param name="value">The string value to intern</param>
/// <param name="streamPosition">Current stream position (before writing the string)</param>
/// <param name="markerPosition">Position of the type code marker (for rewriting)</param>
/// <param name="cacheIndex">Output: cache index for 2+ occurrence, -1 for 1st occurrence</param>
/// <returns>True if 2+ occurrence (write cacheIndex), false if 1st occurrence (write inline)</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public bool TryGetInternedString(string value, int streamPosition, out int cacheIndex)
public bool TryGetInternedString(string value, int markerPosition, out int cacheIndex)
{
_stringInternMap ??= new IdentityMap<string, InternEntry>();
@ -245,9 +241,9 @@ public static partial class AcBinarySerializer
return true;
}
// 1st occurrence: store stream position
// 1st occurrence: store marker position for later rewriting
ref var newEntry = ref _stringInternMap.GetValueRef(slotIndex);
newEntry.StreamPosition = streamPosition;
newEntry.StreamPosition = markerPosition;
newEntry.CacheIndex = -1; // Not assigned until 2nd occurrence
cacheIndex = -1;
return false;
@ -259,23 +255,27 @@ public static partial class AcBinarySerializer
public bool HasInternedStrings => _stringInternMap != null && _stringInternMap.Count > 0;
/// <summary>
/// Gets the count of strings that occurred more than once (for footer).
/// Gets the count of cached values (string intern + object ref that occurred more than once).
/// </summary>
public int GetDupCount() => _nextCacheIndex;
public int GetCacheCount() => _nextCacheIndex;
/// <summary>
/// Writes the merged footer with (position, cacheIndex) pairs sorted by position.
/// Collects entries from string interning AND ID tracking (all wrappers).
/// VarUInt format for compact size, deserializer reads into flat int[].
/// Rewrites markers for all entries with CacheIndex >= 0.
/// Called at end of serialization to mark first occurrences of interned/tracked values.
/// String: String → StringInternFirst
/// Object: Object → ObjectRefFirst, ObjectWithMetadata → ObjectWithMetadataRefFirst
///
/// IMPORTANT: CacheIndex must be reassigned in StreamPosition order, because
/// deserializer registers values sequentially as it encounters *First markers.
/// </summary>
public void WriteInternedFooter()
public void RewriteMarkers()
{
if (_nextCacheIndex == 0) return;
// Collect ALL entries with CacheIndex >= 0 (string + ID, all occurred more than once)
Span<(int Position, int CacheIndex)> entries = _nextCacheIndex <= 64
? stackalloc (int, int)[_nextCacheIndex]
: new (int, int)[_nextCacheIndex];
// Collect all first-occurrence positions that need markers
Span<int> positions = _nextCacheIndex <= 64
? stackalloc int[_nextCacheIndex]
: new int[_nextCacheIndex];
var idx = 0;
@ -287,33 +287,41 @@ public static partial class AcBinarySerializer
{
ref var entry = ref _stringInternMap.GetValueRefAt(i);
if (entry.CacheIndex >= 0)
entries[idx++] = (entry.StreamPosition, entry.CacheIndex);
{
positions[idx++] = entry.StreamPosition;
}
}
}
// 2. ID tracking entries from all wrappers
foreach (var wrapper in GetWrappers())
{
CollectInternEntries(wrapper.IdentityMapInt32, ref entries, ref idx);
CollectInternEntries(wrapper.IdentityMapInt64, ref entries, ref idx);
CollectInternEntries(wrapper.IdentityMapGuid, ref entries, ref idx);
CollectPositions(wrapper.IdentityMapInt32, ref positions, ref idx);
CollectPositions(wrapper.IdentityMapInt64, ref positions, ref idx);
CollectPositions(wrapper.IdentityMapGuid, ref positions, ref idx);
}
// Sort by StreamPosition (ascending) for deserializer sequential check
var usedEntries = entries.Slice(0, idx);
usedEntries.Sort((a, b) => a.Position.CompareTo(b.Position));
// Sort by position to match deserializer's sequential registration order
var usedPositions = positions.Slice(0, idx);
usedPositions.Sort();
// Write pairs as VarUInt for compact size
// Rewrite markers at sorted positions
for (var i = 0; i < idx; i++)
{
WriteVarUInt((uint)usedEntries[i].Position);
WriteVarUInt((uint)usedEntries[i].CacheIndex);
var pos = usedPositions[i];
var currentMarker = _buffer[pos];
_buffer[pos] = currentMarker switch
{
BinaryTypeCode.String => BinaryTypeCode.StringInternFirst,
BinaryTypeCode.Object => BinaryTypeCode.ObjectRefFirst,
BinaryTypeCode.ObjectWithMetadata => BinaryTypeCode.ObjectWithMetadataRefFirst,
_ => currentMarker
};
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void CollectInternEntries<TKey>(IdentityMap<TKey, InternEntry>? map,
ref Span<(int Position, int CacheIndex)> entries, ref int idx) where TKey : notnull
private static void CollectPositions<TKey>(IdentityMap<TKey, InternEntry>? map, ref Span<int> positions, ref int idx) where TKey : notnull
{
if (map == null) return;
var count = map.Count;
@ -321,7 +329,9 @@ public static partial class AcBinarySerializer
{
ref var entry = ref map.GetValueRefAt(i);
if (entry.CacheIndex >= 0)
entries[idx++] = (entry.StreamPosition, entry.CacheIndex);
{
positions[idx++] = entry.StreamPosition;
}
}
}
@ -1007,59 +1017,54 @@ public static partial class AcBinarySerializer
private int _headerPosition;
// Footer-based string interning: no estimation or shifting needed
// Header: [version][flags][footerPosition (4 bytes, only if string interning)]
// Body: data with StringInterned indices
// Footer: interned strings table
// Marker-based interning: no footer needed
// Header: [version][flags][cacheCount (VarUInt, if caching enabled)]
// Body: data with markers (StringInternFirst, ObjectRefFirst, etc.)
public void WriteHeaderPlaceholder()
{
// Header layout:
// [0] version (1 byte)
// [1] flags (1 byte)
// [2-5] footer position (4 bytes, if footer is needed)
EnsureCapacity(HasFooter ? 6 : 2);
// [2+] cache count (VarUInt, max 5 bytes, if caching enabled)
EnsureCapacity(HasCaching ? 7 : 2);
_headerPosition = _position;
_position += HasFooter ? 6 : 2;
_position += HasCaching ? 7 : 2; // Reserve max VarUInt size
}
public void FinalizeHeaderSections()
{
var dupCount = GetDupCount(); // Shared counter: string intern + ID tracking
var hasInternTable = dupCount > 0;
var cacheCount = GetCacheCount();
// Footer: write merged intern entries (string + ID)
// Metadata footer is no longer written here — metadata is inline in the body.
var footerPosition = 0;
if (hasInternTable)
{
footerPosition = _position;
// Intern footer
WriteVarUInt((uint)dupCount);
WriteInternedFooter();
}
// Rewrite markers for first occurrences (String→StringInternFirst, Object→ObjectRefFirst, etc.)
RewriteMarkers();
// Write header
var flags = BinaryTypeCode.HeaderFlagsBase;
if (UseMetadata)
flags |= BinaryTypeCode.HeaderFlag_Metadata;
// Encode ReferenceHandlingMode using separate bits
if (ReferenceHandling == ReferenceHandlingMode.OnlyId)
flags |= BinaryTypeCode.HeaderFlag_RefHandling_OnlyId;
else if (ReferenceHandling == ReferenceHandlingMode.All)
flags |= (byte)(BinaryTypeCode.HeaderFlag_RefHandling_OnlyId | BinaryTypeCode.HeaderFlag_RefHandling_All);
// Set footer position flag if footer is needed
if (HasFooter)
flags |= BinaryTypeCode.HeaderFlag_HasFooterPosition;
if (HasCaching)
flags |= BinaryTypeCode.HeaderFlag_HasCacheCount;
_buffer[_headerPosition] = AcBinarySerializerOptions.FormatVersion;
_buffer[_headerPosition + 1] = flags;
// Write footer position if footer is needed
if (HasFooter)
// Write cache count and compact header if needed
if (HasCaching)
{
Unsafe.WriteUnaligned(ref _buffer[_headerPosition + 2], footerPosition);
var headerEnd = WriteVarUIntAt(_headerPosition + 2, (uint)cacheCount);
var reserved = _headerPosition + 7;
if (headerEnd < reserved)
{
// Shift body left to remove unused header bytes
var shift = reserved - headerEnd;
_buffer.AsSpan(reserved, _position - reserved).CopyTo(_buffer.AsSpan(headerEnd));
_position -= shift;
}
}
}

View File

@ -707,7 +707,7 @@ public static partial class AcBinarySerializer
/// <summary>
/// Optimized string writer with FixStr for short strings.
/// New interning strategy: inline on first occurrence, index on 2+.
/// Marker-based interning: write String marker, rewrite to StringInternFirst at end if needed.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void WriteString(string value, BinarySerializationContext context)
@ -724,9 +724,9 @@ public static partial class AcBinarySerializer
&& value.Length >= context.MinStringInternLength
&& (context.MaxStringInternLength == 0 || value.Length <= context.MaxStringInternLength))
{
// Capture position BEFORE writing - this is where deserializer will be when reading
var streamPosition = context.Position;
if (context.TryGetInternedString(value, streamPosition, out var index))
// Capture marker position BEFORE writing
var markerPosition = context.Position;
if (context.TryGetInternedString(value, markerPosition, out var index))
{
// 2+ occurrence: write index reference
context.WriteByte(BinaryTypeCode.StringInterned);
@ -736,8 +736,8 @@ public static partial class AcBinarySerializer
#if DEBUG
context.OnStringInterned?.Invoke(context.CurrentPropertyPath, value);
#endif
// 1st occurrence: write inline with StringInternNew type code
context.WriteByte(BinaryTypeCode.StringInternNew);
// 1st occurrence: write String marker (will be rewritten to StringInternFirst if repeated)
context.WriteByte(BinaryTypeCode.String);
context.WriteStringUtf8(value);
return;
}

View File

@ -28,10 +28,10 @@ internal static class BinaryTypeCode
public const byte Char = 14;
// String types (16-19)
public const byte String = 16; // Inline UTF8 string
public const byte StringInterned = 17; // Reference to interned string by index
public const byte String = 16; // Inline UTF8 string (non-interned)
public const byte StringInterned = 17; // Reference to interned string by index (2+ occurrence)
public const byte StringEmpty = 18; // Empty string marker
public const byte StringInternNew = 19; // New interned string - full content + register in table
public const byte StringInternFirst = 19; // First occurrence of interned string - read content + register in cache
// Date/Time types (20-23)
public const byte DateTime = 20;
@ -43,13 +43,17 @@ internal static class BinaryTypeCode
public const byte Enum = 24;
// Complex types (25-31)
public const byte Object = 25; // Start of object
public const byte Object = 25; // Start of object (non-tracked OR first occurrence when ref tracking)
public const byte ObjectEnd = 26; // End of object marker
public const byte ObjectRef = 27; // Reference to previously serialized object
public const byte ObjectRef = 27; // Reference to previously serialized object (2+ occurrence)
public const byte Array = 28; // Start of array/list
public const byte Dictionary = 29; // Start of dictionary
public const byte ByteArray = 30; // Optimized byte[] storage
public const byte ObjectWithMetadata = 31; // Object with metadata footer index (UseMetadata nested objects)
public const byte ObjectWithMetadata = 31; // Object with metadata (UseMetadata mode, non-tracked OR first occurrence)
// Extended markers for first occurrence tracking (66-67, after FixStr range)
public const byte ObjectRefFirst = 66; // First occurrence of tracked object (ref handling enabled)
public const byte ObjectWithMetadataRefFirst = 67; // First occurrence of tracked object with metadata
// Special markers (32+, for header/meta)
// Header flags byte structure (for values >= 64):
@ -77,7 +81,8 @@ internal static class BinaryTypeCode
// None = both false, OnlyId = 0x02, All = 0x06 (both bits set)
public const byte HeaderFlag_RefHandling_OnlyId = 0x02;
public const byte HeaderFlag_RefHandling_All = 0x04;
public const byte HeaderFlag_HasFooterPosition = 0x08; // Bit 3: 4-byte footer position follows flags
public const byte HeaderFlag_HasFooterPosition = 0x08; // Bit 3: 4-byte footer position follows flags (legacy)
public const byte HeaderFlag_HasCacheCount = 0x08; // Bit 3 (reused): VarUInt cache count follows flags (new marker-based format)
// Compact integer variants (for VarInt optimization)
public const byte Int32Tiny = 192; // -16 to 63 stored in single byte (value = code - 192 - 16)