Refactor string interning to use position-based cache
Implement a new position-based string interning mechanism in AcBinarySerializer/AcBinaryDeserializer. This approach tracks stream positions for interned strings, ensuring 100% reliable cache matching during deserialization, even when strings are skipped or reordered. The serializer now writes (position, cacheIndex) pairs in the footer for all repeated strings, and the deserializer uses this mapping for robust cache population. Removes the old buffer-based interned string logic, updates all relevant code paths, and simplifies interned string handling for greater correctness and maintainability. Also updates benchmarks and test data construction to use the new interning mode.
This commit is contained in:
parent
11ac2beb71
commit
466782007d
|
|
@ -16,7 +16,7 @@ public class QuickBenchmark
|
|||
private static readonly MessagePackSerializerOptions MsgPackOptions =
|
||||
ContractlessStandardResolver.Options.WithCompression(MessagePackCompression.None);
|
||||
|
||||
private const int DefaultIterations = 1000;
|
||||
private const int DefaultIterations = 10;
|
||||
|
||||
#region Helper Methods
|
||||
|
||||
|
|
@ -426,8 +426,8 @@ public class QuickBenchmark
|
|||
sharedUser: sharedUser,
|
||||
sharedMetadata: sharedMeta);
|
||||
|
||||
var singleOptions = AcBinarySerializerOptions.FastMode;
|
||||
singleOptions.UseStringInterning = StringInterningMode.None;
|
||||
var singleOptions = AcBinarySerializerOptions.WithoutReferenceHandling;
|
||||
singleOptions.UseStringInterning = StringInterningMode.All;
|
||||
|
||||
Console.WriteLine("=== MINIMAL WARMUP TEST ===");
|
||||
Console.WriteLine();
|
||||
|
|
|
|||
|
|
@ -17,11 +17,23 @@ public static partial class AcBinaryDeserializer
|
|||
{
|
||||
private readonly ReadOnlySpan<byte> _buffer;
|
||||
private int _position;
|
||||
private List<string>? _internedStrings;
|
||||
private List<string>? _propertyNames;
|
||||
//private Dictionary<int, object>? _objectReferences;
|
||||
private Dictionary<int, string>? _stringCache;
|
||||
|
||||
/// <summary>
|
||||
/// Footer entry for position-based string interning.
|
||||
/// </summary>
|
||||
private struct DupEntry
|
||||
{
|
||||
public int Position; // Stream position where string was first written
|
||||
public int CacheIndex; // Index in _internStringCache
|
||||
}
|
||||
|
||||
// Position-based string interning: 100% reliable cache matching
|
||||
private DupEntry[]? _dupEntries; // Footer: (position, cacheIndex) pairs sorted by position
|
||||
private string[]? _internStringCache; // Cache for duplicated strings only
|
||||
private int _dupCheckIndex; // Current position in _dupEntries
|
||||
|
||||
/// <summary>
|
||||
/// Heap-allocated context class for IId-based reference tracking.
|
||||
/// Also holds Options - all options-derived properties delegate to ContextClass.Options.
|
||||
|
|
@ -68,10 +80,14 @@ public static partial class AcBinaryDeserializer
|
|||
{
|
||||
_buffer = data;
|
||||
_position = 0;
|
||||
_internedStrings = null;
|
||||
_propertyNames = null;
|
||||
//_objectReferences = null;
|
||||
_stringCache = null;
|
||||
|
||||
// Position-based string interning fields
|
||||
_dupEntries = null;
|
||||
_internStringCache = null;
|
||||
_dupCheckIndex = 0;
|
||||
|
||||
HasMetadata = false;
|
||||
IsMergeMode = false;
|
||||
RemoveOrphanedItems = false;
|
||||
|
|
@ -98,7 +114,6 @@ public static partial class AcBinaryDeserializer
|
|||
|
||||
var marker = ReadByteInternal();
|
||||
var hasPropertyTable = false;
|
||||
var hasInternTable = false;
|
||||
var hasInternFooter = false;
|
||||
var footerPosition = 0;
|
||||
|
||||
|
|
@ -151,29 +166,18 @@ public static partial class AcBinaryDeserializer
|
|||
}
|
||||
}
|
||||
|
||||
// Legacy: interned strings in header
|
||||
if (hasInternTable)
|
||||
{
|
||||
var internCount = (int)ReadVarUInt();
|
||||
_internedStrings = new List<string>(internCount);
|
||||
for (var i = 0; i < internCount; i++)
|
||||
{
|
||||
_internedStrings.Add(ReadHeaderString());
|
||||
}
|
||||
}
|
||||
|
||||
// Footer-based: read interned strings from footer, then return to data position
|
||||
// Footer-based: read string intern indices from footer
|
||||
if (hasInternFooter && footerPosition > 0)
|
||||
{
|
||||
ReadFooterStrings(footerPosition);
|
||||
ReadFooterStringIndices(footerPosition);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Reads interned strings from footer position, then returns to data position.
|
||||
/// Uses seek to footer, read strings, seek back to data.
|
||||
/// Reads string intern footer: [dupCount][(position, cacheIndex), ...]
|
||||
/// Position-based format for 100% reliable cache matching.
|
||||
/// </summary>
|
||||
private void ReadFooterStrings(int footerPosition)
|
||||
private void ReadFooterStringIndices(int footerPosition)
|
||||
{
|
||||
// Save current position (start of data)
|
||||
var dataPosition = _position;
|
||||
|
|
@ -181,12 +185,25 @@ public static partial class AcBinaryDeserializer
|
|||
// Seek to footer
|
||||
_position = footerPosition;
|
||||
|
||||
// Read interned strings
|
||||
var internCount = (int)ReadVarUInt();
|
||||
_internedStrings = new List<string>(internCount);
|
||||
for (var i = 0; i < internCount; i++)
|
||||
// Read dup count and (position, cacheIndex) pairs
|
||||
var dupCount = (int)ReadVarUInt();
|
||||
if (dupCount == 0)
|
||||
{
|
||||
_internedStrings.Add(ReadHeaderString());
|
||||
_dupEntries = Array.Empty<DupEntry>();
|
||||
_internStringCache = Array.Empty<string>();
|
||||
}
|
||||
else
|
||||
{
|
||||
_dupEntries = new DupEntry[dupCount];
|
||||
for (var i = 0; i < dupCount; i++)
|
||||
{
|
||||
var position = (int)ReadVarUInt();
|
||||
var cacheIndex = (int)ReadVarUInt();
|
||||
_dupEntries[i] = new DupEntry { Position = position, CacheIndex = cacheIndex };
|
||||
}
|
||||
|
||||
// Cache size: dupCount (cacheIndex is always 0, 1, 2, ..., dupCount-1)
|
||||
_internStringCache = new string[dupCount];
|
||||
}
|
||||
|
||||
// Seek back to data position
|
||||
|
|
@ -540,23 +557,49 @@ public static partial class AcBinaryDeserializer
|
|||
_position += count;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Registers an interned string during body read (StringInternNew).
|
||||
/// Uses position-based check for 100% reliable cache matching.
|
||||
/// </summary>
|
||||
/// <param name="value">The string value read from stream</param>
|
||||
/// <param name="streamPosition">Stream position BEFORE reading the string (type code position)</param>
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public int RegisterInternedString(string value)
|
||||
public void RegisterInternedString(string value, int streamPosition)
|
||||
{
|
||||
_internedStrings ??= new List<string>();
|
||||
_internedStrings.Add(value);
|
||||
return _internedStrings.Count - 1;
|
||||
// Fast path: no duplicates or already processed all
|
||||
var entries = _dupEntries;
|
||||
if (entries == null || (uint)_dupCheckIndex >= (uint)entries.Length)
|
||||
return;
|
||||
|
||||
// Check if this position matches the next expected duplicate
|
||||
ref var entry = ref entries[_dupCheckIndex];
|
||||
if (entry.Position == streamPosition)
|
||||
{
|
||||
_internStringCache![entry.CacheIndex] = value;
|
||||
_dupCheckIndex++;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets an interned string by cache index (StringInterned type code).
|
||||
/// </summary>
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public string GetInternedString(int index)
|
||||
public string GetInternedString(int cacheIndex)
|
||||
{
|
||||
if (_internedStrings == null || (uint)index >= (uint)_internedStrings.Count)
|
||||
if (_internStringCache == null || (uint)cacheIndex >= (uint)_internStringCache.Length)
|
||||
{
|
||||
throw new AcBinaryDeserializationException($"Invalid interned string index '{index}'.", _position);
|
||||
throw new AcBinaryDeserializationException($"Invalid interned string cache index '{cacheIndex}'.", _position);
|
||||
}
|
||||
|
||||
return _internedStrings[index];
|
||||
var result = _internStringCache[cacheIndex];
|
||||
if (result == null)
|
||||
{
|
||||
throw new AcBinaryDeserializationException(
|
||||
$"Interned string at cache index '{cacheIndex}' was not populated.",
|
||||
_position);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
|
|
|
|||
|
|
@ -60,7 +60,12 @@ public static partial class AcBinaryDeserializer
|
|||
RegisterReader(BinaryTypeCode.String, static (ref BinaryDeserializationContext ctx, Type _, int _) => ReadPlainString(ref ctx));
|
||||
RegisterReader(BinaryTypeCode.StringInterned, static (ref BinaryDeserializationContext ctx, Type _, int _) => ctx.GetInternedString((int)ctx.ReadVarUInt()));
|
||||
RegisterReader(BinaryTypeCode.StringEmpty, static (ref BinaryDeserializationContext _, Type _, int _) => string.Empty);
|
||||
RegisterReader(BinaryTypeCode.StringInternNew, static (ref BinaryDeserializationContext ctx, Type _, int _) => ReadAndRegisterInternedString(ref ctx));
|
||||
// StringInternNew: position is captured as Position-1 (after type code was read)
|
||||
RegisterReader(BinaryTypeCode.StringInternNew, static (ref BinaryDeserializationContext ctx, Type _, int _) =>
|
||||
{
|
||||
var streamPosition = ctx.Position - 1; // Position before type code
|
||||
return ReadAndRegisterInternedString(ref ctx, streamPosition);
|
||||
});
|
||||
RegisterReader(BinaryTypeCode.DateTime, static (ref BinaryDeserializationContext ctx, Type _, int _) => ctx.ReadDateTimeUnsafe());
|
||||
RegisterReader(BinaryTypeCode.DateTimeOffset, static (ref BinaryDeserializationContext ctx, Type _, int _) => ctx.ReadDateTimeOffsetUnsafe());
|
||||
RegisterReader(BinaryTypeCode.TimeSpan, static (ref BinaryDeserializationContext ctx, Type _, int _) => ctx.ReadTimeSpanUnsafe());
|
||||
|
|
@ -136,6 +141,7 @@ public static partial class AcBinaryDeserializer
|
|||
{
|
||||
context.ReadHeader();
|
||||
var result = ReadValue(ref context, targetType, 0);
|
||||
// Position-based string interning - no validation needed
|
||||
return (T?)result;
|
||||
}
|
||||
catch (AcBinaryDeserializationException)
|
||||
|
|
@ -175,7 +181,9 @@ public static partial class AcBinaryDeserializer
|
|||
try
|
||||
{
|
||||
context.ReadHeader();
|
||||
return ReadValue(ref context, targetType, 0);
|
||||
var result = ReadValue(ref context, targetType, 0);
|
||||
// Position-based string interning - no validation needed
|
||||
return result;
|
||||
}
|
||||
catch (AcBinaryDeserializationException)
|
||||
{
|
||||
|
|
@ -201,6 +209,7 @@ public static partial class AcBinaryDeserializer
|
|||
{
|
||||
context.ReadHeader();
|
||||
var node = (AcExpressionNode?)ReadValue(ref context, typeof(AcExpressionNode), 0);
|
||||
// Position-based string interning - no validation needed
|
||||
if (node == null) return null;
|
||||
|
||||
var entityType = AcSerializerCommon.GetExpressionEntityType(targetExpressionType);
|
||||
|
|
@ -269,6 +278,8 @@ public static partial class AcBinaryDeserializer
|
|||
$"Cannot populate type '{targetType.Name}' from binary type code {typeCode}",
|
||||
context.Position, targetType);
|
||||
}
|
||||
|
||||
// Position-based string interning - no validation needed
|
||||
}
|
||||
catch (AcBinaryDeserializationException)
|
||||
{
|
||||
|
|
@ -333,6 +344,7 @@ public static partial class AcBinaryDeserializer
|
|||
if (elementMetadata.IsComplexType && elementMetadata.IsIId && elementMetadata.IdGetter != null)
|
||||
{
|
||||
MergeIIdCollectionWithMetadata(ref context, targetList, elementType, wrapper, 0);
|
||||
// Position-based string interning - no validation needed
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
|
@ -346,6 +358,8 @@ public static partial class AcBinaryDeserializer
|
|||
$"Cannot populate type '{targetType.Name}' from binary type code {typeCode}",
|
||||
context.Position, targetType);
|
||||
}
|
||||
|
||||
// Position-based string interning - no validation needed
|
||||
}
|
||||
catch (AcBinaryDeserializationException)
|
||||
{
|
||||
|
|
@ -389,6 +403,7 @@ public static partial class AcBinaryDeserializer
|
|||
{
|
||||
context.ReadHeader();
|
||||
var result = ReadValue(ref context, targetType, 0);
|
||||
// Position-based string interning - no validation needed
|
||||
return new BinaryDeserializeChain<T>(dataArray, options, chainTracker, (T?)result);
|
||||
}
|
||||
catch
|
||||
|
|
@ -433,6 +448,7 @@ public static partial class AcBinaryDeserializer
|
|||
{
|
||||
context.ReadHeader();
|
||||
var result = ReadValue(ref context, targetType, 0);
|
||||
// Position-based string interning - no validation needed
|
||||
return (TResult?)result;
|
||||
}
|
||||
catch (AcBinaryDeserializationException) { throw; }
|
||||
|
|
@ -474,6 +490,7 @@ public static partial class AcBinaryDeserializer
|
|||
context.Position, targetType);
|
||||
}
|
||||
|
||||
// Position-based string interning - no validation needed
|
||||
return this;
|
||||
}
|
||||
catch (AcBinaryDeserializationException) { throw; }
|
||||
|
|
@ -764,15 +781,16 @@ public static partial class AcBinaryDeserializer
|
|||
}
|
||||
|
||||
/// <summary>
|
||||
/// <20>j intern<72>lt string olvas<61>sa <20>s regisztr<74>l<EFBFBD>sa az intern t<>bl<62>ba.
|
||||
/// Read new interned string and register it in the intern cache.
|
||||
/// Position is captured BEFORE the type code was read (by caller).
|
||||
/// </summary>
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private static string ReadAndRegisterInternedString(ref BinaryDeserializationContext context)
|
||||
private static string ReadAndRegisterInternedString(ref BinaryDeserializationContext context, int streamPosition)
|
||||
{
|
||||
var length = (int)context.ReadVarUInt();
|
||||
if (length == 0) return string.Empty;
|
||||
var str = context.ReadStringUtf8(length);
|
||||
context.RegisterInternedString(str);
|
||||
context.RegisterInternedString(str, streamPosition);
|
||||
return str;
|
||||
}
|
||||
|
||||
|
|
@ -780,7 +798,7 @@ public static partial class AcBinaryDeserializer
|
|||
/// Read a string and register it in the intern table for future references.
|
||||
/// </summary>
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private static string ReadAndInternString(ref BinaryDeserializationContext context)
|
||||
private static string ReadAndInternString(ref BinaryDeserializationContext context, int streamPosition)
|
||||
{
|
||||
var length = (int)context.ReadVarUInt();
|
||||
if (length == 0) return string.Empty;
|
||||
|
|
@ -788,7 +806,7 @@ public static partial class AcBinaryDeserializer
|
|||
// Always register strings that meet the minimum intern length threshold
|
||||
if (str.Length >= context.MinStringInternLength)
|
||||
{
|
||||
context.RegisterInternedString(str);
|
||||
context.RegisterInternedString(str, streamPosition);
|
||||
}
|
||||
|
||||
return str;
|
||||
|
|
@ -1286,6 +1304,8 @@ public static partial class AcBinaryDeserializer
|
|||
|
||||
private static void SkipValue(ref BinaryDeserializationContext context, BinaryDeserializeTypeMetadata metaData)
|
||||
{
|
||||
// Capture position before reading type code (needed for string interning)
|
||||
var streamPosition = context.Position;
|
||||
var typeCode = context.ReadByte();
|
||||
|
||||
if (typeCode == BinaryTypeCode.Null) return;
|
||||
|
|
@ -1353,8 +1373,8 @@ public static partial class AcBinaryDeserializer
|
|||
context.ReadVarUInt();
|
||||
return;
|
||||
case BinaryTypeCode.StringInternNew:
|
||||
// <EFBFBD>j intern<72>lt string - regisztr<74>lni kell m<>g skip eset<65>n is
|
||||
SkipAndRegisterInternedString(ref context);
|
||||
// New interned string - must register even when skipping
|
||||
SkipAndRegisterInternedString(ref context, streamPosition);
|
||||
return;
|
||||
case BinaryTypeCode.ByteArray:
|
||||
var byteLen = (int)context.ReadVarUInt();
|
||||
|
|
@ -1394,22 +1414,26 @@ public static partial class AcBinaryDeserializer
|
|||
}
|
||||
|
||||
/// <summary>
|
||||
/// <EFBFBD>j intern<72>lt string kihagy<67>sa - DE regisztr<74>lni kell!
|
||||
/// Skip a new interned string - must still register in cache.
|
||||
/// </summary>
|
||||
/// <param name="context">Deserialization context</param>
|
||||
/// <param name="streamPosition">Position before the type code was read</param>
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private static void SkipAndRegisterInternedString(ref BinaryDeserializationContext context)
|
||||
private static void SkipAndRegisterInternedString(ref BinaryDeserializationContext context, int streamPosition)
|
||||
{
|
||||
var byteLen = (int)context.ReadVarUInt();
|
||||
if (byteLen == 0) return;
|
||||
var str = context.ReadStringUtf8(byteLen);
|
||||
context.RegisterInternedString(str);
|
||||
context.RegisterInternedString(str, streamPosition);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Skip a string but still register it in the intern table if it meets the length threshold.
|
||||
/// </summary>
|
||||
/// <param name="context">Deserialization context</param>
|
||||
/// <param name="streamPosition">Position before the type code was read</param>
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private static void SkipAndInternString(ref BinaryDeserializationContext context)
|
||||
private static void SkipAndInternString(ref BinaryDeserializationContext context, int streamPosition)
|
||||
{
|
||||
var byteLen = (int)context.ReadVarUInt();
|
||||
if (byteLen == 0) return;
|
||||
|
|
@ -1417,7 +1441,7 @@ public static partial class AcBinaryDeserializer
|
|||
var str = context.ReadStringUtf8(byteLen);
|
||||
if (str.Length >= context.MinStringInternLength)
|
||||
{
|
||||
context.RegisterInternedString(str);
|
||||
context.RegisterInternedString(str, streamPosition);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -63,13 +63,18 @@ public static partial class AcBinarySerializer
|
|||
// Use shared reference tracker from AcSerializerCommon
|
||||
//private readonly AcSerializerCommon.SerializationReferenceTracker _refTracker = new();
|
||||
|
||||
private Dictionary<string, int>? _internedStrings;
|
||||
private List<string>? _internedStringList;
|
||||
/// <summary>
|
||||
/// String intern entry for tracking string occurrences.
|
||||
/// StreamPosition-based approach for 100% reliable cache matching.
|
||||
/// </summary>
|
||||
private struct StringInternEntry
|
||||
{
|
||||
public int StreamPosition; // Position in stream where string was first written
|
||||
public int CacheIndex; // Dense cache index (0, 1, 2, ...) - assigned at 2nd occurrence; -1 = first occurrence only
|
||||
}
|
||||
|
||||
// Single contiguous buffer for all interned string UTF8 bytes (reused across serializations)
|
||||
private byte[]? _internedStringBuffer;
|
||||
private int _internedStringBufferPos;
|
||||
private List<int>? _internedStringLengths;
|
||||
private Dictionary<string, StringInternEntry>? _stringInternMap;
|
||||
private int _nextCacheIndex; // Next dense cache index to assign
|
||||
|
||||
private Dictionary<string, int>? _propertyNames;
|
||||
private List<string>? _propertyNameList;
|
||||
|
|
@ -139,15 +144,11 @@ public static partial class AcBinarySerializer
|
|||
_position = 0;
|
||||
|
||||
//_refTracker.Reset();
|
||||
ClearAndTrimIfNeeded(_internedStrings, InitialInternCapacity * 4);
|
||||
ClearAndTrimIfNeeded(_stringInternMap, InitialInternCapacity * 4);
|
||||
ClearAndTrimIfNeeded(_propertyNames, InitialPropertyNameCapacity * 4);
|
||||
|
||||
_propertyNameList?.Clear();
|
||||
_internedStringList?.Clear();
|
||||
_internedStringLengths?.Clear();
|
||||
|
||||
// Reset intern buffer position (no deallocation - buffer is reused!)
|
||||
_internedStringBufferPos = 0;
|
||||
_nextCacheIndex = 0;
|
||||
|
||||
if (_propertyIndexBuffer != null && _propertyIndexBuffer.Length > PropertyIndexBufferMaxCache)
|
||||
{
|
||||
|
|
@ -183,81 +184,89 @@ public static partial class AcBinarySerializer
|
|||
_propertyStateBuffer = null;
|
||||
}
|
||||
|
||||
// _internedStringBuffer is a simple byte[] - no pool return needed, GC handles it
|
||||
_internedStringBuffer = null;
|
||||
}
|
||||
|
||||
#region String Interning
|
||||
|
||||
/// <summary>
|
||||
/// Registers a string for interning. Returns the index of the string.
|
||||
/// Uses single contiguous buffer for UTF8 bytes to minimize allocations.
|
||||
/// Tries to intern a string. Returns true if string was seen before (write index).
|
||||
/// Returns false if first occurrence (write inline).
|
||||
/// Uses stream position for 100% reliable deserializer cache matching.
|
||||
/// </summary>
|
||||
/// <param name="value">The string value to intern</param>
|
||||
/// <param name="streamPosition">Current stream position (before writing the string)</param>
|
||||
/// <param name="cacheIndex">Output: cache index for 2+ occurrence, -1 for 1st occurrence</param>
|
||||
/// <returns>True if 2+ occurrence (write cacheIndex), false if 1st occurrence (write inline)</returns>
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public int RegisterInternedString(string value)
|
||||
public bool TryGetInternedString(string value, int streamPosition, out int cacheIndex)
|
||||
{
|
||||
_internedStrings ??= new Dictionary<string, int>(InitialInternCapacity, StringComparer.Ordinal);
|
||||
_internedStringList ??= new List<string>(InitialInternCapacity);
|
||||
_internedStringLengths ??= new List<int>(InitialInternCapacity);
|
||||
_stringInternMap ??= new Dictionary<string, StringInternEntry>(InitialInternCapacity, StringComparer.Ordinal);
|
||||
|
||||
// Single operation: lookup + conditional add
|
||||
ref var index = ref CollectionsMarshal.GetValueRefOrAddDefault(_internedStrings, value, out var exists);
|
||||
if (exists)
|
||||
ref var entry = ref CollectionsMarshal.GetValueRefOrNullRef(_stringInternMap, value);
|
||||
|
||||
if (!Unsafe.IsNullRef(ref entry))
|
||||
{
|
||||
return index;
|
||||
// 2+ occurrence: assign CacheIndex if first repeat
|
||||
if (entry.CacheIndex < 0)
|
||||
{
|
||||
entry.CacheIndex = _nextCacheIndex++;
|
||||
}
|
||||
cacheIndex = entry.CacheIndex;
|
||||
return true;
|
||||
}
|
||||
|
||||
// New string - add to list and write UTF8 to buffer
|
||||
index = _internedStringList.Count;
|
||||
_internedStringList.Add(value);
|
||||
|
||||
// Calculate UTF8 byte length
|
||||
var utf8Length = Ascii.IsValid(value) ? value.Length : Utf8NoBom.GetByteCount(value);
|
||||
|
||||
// Ensure intern buffer has capacity
|
||||
EnsureInternBufferCapacity(utf8Length);
|
||||
|
||||
// Write UTF8 bytes to contiguous buffer
|
||||
if (Ascii.IsValid(value))
|
||||
// 1st occurrence: store stream position
|
||||
_stringInternMap[value] = new StringInternEntry
|
||||
{
|
||||
Ascii.FromUtf16(value.AsSpan(), _internedStringBuffer.AsSpan(_internedStringBufferPos, utf8Length), out _);
|
||||
}
|
||||
else
|
||||
{
|
||||
Utf8NoBom.GetBytes(value.AsSpan(), _internedStringBuffer.AsSpan(_internedStringBufferPos, utf8Length));
|
||||
}
|
||||
|
||||
_internedStringLengths.Add(utf8Length);
|
||||
_internedStringBufferPos += utf8Length;
|
||||
|
||||
return index;
|
||||
StreamPosition = streamPosition,
|
||||
CacheIndex = -1 // Not assigned until 2nd occurrence
|
||||
};
|
||||
cacheIndex = -1;
|
||||
return false;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Ensures the intern buffer has enough capacity for additional bytes.
|
||||
/// Initial size is calculated from MaxStringInternLength * InitialInternCapacity.
|
||||
/// Returns true if there are any interned strings that occurred more than once.
|
||||
/// </summary>
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private void EnsureInternBufferCapacity(int additionalBytes)
|
||||
{
|
||||
var required = _internedStringBufferPos + additionalBytes;
|
||||
public bool HasInternedStrings => _stringInternMap is { Count: > 0 };
|
||||
|
||||
if (_internedStringBuffer == null)
|
||||
/// <summary>
|
||||
/// Gets the count of strings that occurred more than once (for footer).
|
||||
/// </summary>
|
||||
public int GetDupCount() => _nextCacheIndex;
|
||||
|
||||
/// <summary>
|
||||
/// Writes the footer with (position, cacheIndex) pairs sorted by position.
|
||||
/// Position-based approach ensures 100% reliable cache matching in deserializer.
|
||||
/// </summary>
|
||||
public void WriteInternedStringFooter()
|
||||
{
|
||||
// Initial size: MaxStringInternLength * InitialInternCapacity (e.g., 64 * 32 = 2048)
|
||||
var initialSize = MaxStringInternLength * InitialInternCapacity;
|
||||
_internedStringBuffer = new byte[Math.Max(initialSize, required)];
|
||||
return;
|
||||
if (_stringInternMap == null || _nextCacheIndex == 0) return;
|
||||
|
||||
// Collect entries with CacheIndex >= 0 (occurred more than once)
|
||||
// We need to sort by StreamPosition for deserializer sequential access
|
||||
Span<(int Position, int CacheIndex)> entries = _nextCacheIndex <= 64
|
||||
? stackalloc (int, int)[_nextCacheIndex]
|
||||
: new (int, int)[_nextCacheIndex];
|
||||
|
||||
var idx = 0;
|
||||
foreach (var entry in _stringInternMap.Values)
|
||||
{
|
||||
if (entry.CacheIndex >= 0)
|
||||
{
|
||||
entries[idx++] = (entry.StreamPosition, entry.CacheIndex);
|
||||
}
|
||||
}
|
||||
|
||||
if (required <= _internedStringBuffer.Length)
|
||||
{
|
||||
return;
|
||||
}
|
||||
// Sort by StreamPosition (ascending) for deserializer sequential check
|
||||
entries.Sort((a, b) => a.Position.CompareTo(b.Position));
|
||||
|
||||
// Grow buffer (double size)
|
||||
var newSize = Math.Max(_internedStringBuffer.Length * 2, required);
|
||||
Array.Resize(ref _internedStringBuffer, newSize);
|
||||
// Write pairs: (position, cacheIndex)
|
||||
for (var i = 0; i < _nextCacheIndex; i++)
|
||||
{
|
||||
WriteVarUInt((uint)entries[i].Position);
|
||||
WriteVarUInt((uint)entries[i].CacheIndex);
|
||||
}
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
|
@ -948,7 +957,8 @@ public static partial class AcBinarySerializer
|
|||
public void FinalizeHeaderSections()
|
||||
{
|
||||
var hasPropertyNames = UseMetadata && _propertyNameList is { Count: > 0 };
|
||||
var hasInternTable = UseStringInterning && _internedStringList is { Count: > 0 };
|
||||
var dupCount = UseStringInterning ? GetDupCount() : 0;
|
||||
var hasInternTable = dupCount > 0;
|
||||
|
||||
// Calculate property names header size (strings go to footer now)
|
||||
var headerPayloadSize = 0;
|
||||
|
|
@ -976,12 +986,12 @@ public static partial class AcBinarySerializer
|
|||
}
|
||||
}
|
||||
|
||||
// Footer-based string interning: write strings at the end
|
||||
// Footer: write indices of strings that occurred more than once
|
||||
var footerPosition = 0;
|
||||
if (hasInternTable)
|
||||
{
|
||||
footerPosition = _position;
|
||||
WriteFooterStrings();
|
||||
WriteFooterStringIndices(dupCount);
|
||||
}
|
||||
|
||||
// Write header
|
||||
|
|
@ -1009,23 +1019,19 @@ public static partial class AcBinarySerializer
|
|||
}
|
||||
|
||||
/// <summary>
|
||||
/// Writes interned strings to the footer (end of stream).
|
||||
/// Uses contiguous buffer - no re-encoding needed.
|
||||
/// Writes the footer with total count (for verification) + dup count + indices.
|
||||
/// Footer format: [totalStringCount][dupCount][dupIndex0][dupIndex1]...
|
||||
/// </summary>
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private void WriteFooterStrings()
|
||||
/// <summary>
|
||||
/// Writes footer: [dupCount][(position, cacheIndex), ...]
|
||||
/// Position-based format for 100% reliable deserializer matching.
|
||||
/// </summary>
|
||||
private void WriteFooterStringIndices(int dupCount)
|
||||
{
|
||||
WriteVarUInt((uint)_internedStringList!.Count);
|
||||
|
||||
// Write from contiguous buffer using stored lengths
|
||||
var offset = 0;
|
||||
for (var i = 0; i < _internedStringLengths!.Count; i++)
|
||||
{
|
||||
var length = _internedStringLengths[i];
|
||||
WriteVarUInt((uint)length);
|
||||
WriteBytes(_internedStringBuffer.AsSpan(offset, length));
|
||||
offset += length;
|
||||
}
|
||||
// Dup count + (position, cacheIndex) pairs
|
||||
WriteVarUInt((uint)dupCount);
|
||||
WriteInternedStringFooter();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
|
|
|||
|
|
@ -759,7 +759,7 @@ public static partial class AcBinarySerializer
|
|||
|
||||
/// <summary>
|
||||
/// Optimized string writer with FixStr for short strings.
|
||||
/// Uses stackalloc for small strings to avoid allocations.
|
||||
/// New interning strategy: inline on first occurrence, index on 2+.
|
||||
/// </summary>
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private static void WriteString(string value, BinarySerializationContext context)
|
||||
|
|
@ -776,12 +776,21 @@ public static partial class AcBinarySerializer
|
|||
&& value.Length >= context.MinStringInternLength
|
||||
&& (context.MaxStringInternLength == 0 || value.Length <= context.MaxStringInternLength))
|
||||
{
|
||||
var index = context.RegisterInternedString(value);
|
||||
// Capture position BEFORE writing - this is where deserializer will be when reading
|
||||
var streamPosition = context.Position;
|
||||
if (context.TryGetInternedString(value, streamPosition, out var index))
|
||||
{
|
||||
// 2+ occurrence: write index reference
|
||||
context.WriteByte(BinaryTypeCode.StringInterned);
|
||||
context.WriteVarUInt((uint)index);
|
||||
return;
|
||||
}
|
||||
#if DEBUG
|
||||
context.OnStringInterned?.Invoke(context.CurrentPropertyPath, value);
|
||||
#endif
|
||||
context.WriteByte(BinaryTypeCode.StringInterned);
|
||||
context.WriteVarUInt((uint)index);
|
||||
// 1st occurrence: write inline with StringInternNew type code
|
||||
context.WriteByte(BinaryTypeCode.StringInternNew);
|
||||
context.WriteStringUtf8(value);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue