Improve string interning logic in AcBinarySerializer

- Respect both global and property-level [AcStringIntern] settings for string interning
- Add UseStringPropertyInterning method and flag-based caching in property accessors for fast runtime checks
- Update scan and write passes to use property-level interning decisions
- Introduce FilteredReferenceProperties for efficient scan filtering in TypeMetadataWrapper
- Refactor benchmarks to use correct serializer options
- Add TODOs and minor cleanups for clarity and future improvements
This commit is contained in:
Loretta 2026-02-14 11:07:26 +01:00
parent 7e7918e071
commit bfab7c16b9
6 changed files with 73 additions and 47 deletions

View File

@ -212,15 +212,15 @@ public static class Program
{
// AcBinary variants
//new AcBinaryBenchmark(testData.Order, AcBinarySerializerOptions.Default, SerializerAcBinaryDefault),
//new AcBinaryBenchmark(testData.Order, AcBinarySerializerOptions.WithoutReferenceHandling, SerializerAcBinaryNoRef),
//new AcBinaryBenchmark(testData.Order, AcBinarySerializerOptions.FastMode, SerializerAcBinaryFastMode),
//new AcBinaryBenchmark(testData.Order, new AcBinarySerializerOptions { UseStringInterning = StringInterningMode.None }, SerializerAcBinaryNoIntern),
new AcBinaryBenchmark(testData.Order, AcBinarySerializerOptions.FastMode, SerializerAcBinaryDefault),
new AcBinaryBenchmark(testData.Order, AcBinarySerializerOptions.FastMode, SerializerAcBinaryNoRef),
new AcBinaryBenchmark(testData.Order, AcBinarySerializerOptions.Default, SerializerAcBinaryDefault),
new AcBinaryBenchmark(testData.Order, AcBinarySerializerOptions.WithoutReferenceHandling, SerializerAcBinaryNoRef),
new AcBinaryBenchmark(testData.Order, AcBinarySerializerOptions.FastMode, SerializerAcBinaryFastMode),
new AcBinaryBenchmark(testData.Order, AcBinarySerializerOptions.FastMode, SerializerAcBinaryNoIntern),
new AcBinaryBenchmark(testData.Order, new AcBinarySerializerOptions { UseStringInterning = StringInterningMode.None }, SerializerAcBinaryNoIntern),
//new AcBinaryBenchmark(testData.Order, AcBinarySerializerOptions.FastMode, SerializerAcBinaryDefault),
//new AcBinaryBenchmark(testData.Order, AcBinarySerializerOptions.FastMode, SerializerAcBinaryNoRef),
//new AcBinaryBenchmark(testData.Order, AcBinarySerializerOptions.FastMode, SerializerAcBinaryFastMode),
//new AcBinaryBenchmark(testData.Order, AcBinarySerializerOptions.FastMode, SerializerAcBinaryNoIntern),
// AcJson
new AcJsonBenchmark(testData.Order, AcJsonSerializerOptions.Default, SerializerAcJsonDefault),

View File

@ -42,6 +42,7 @@ public static partial class AcBinarySerializer
{
var isStringCollectionElementType = false;
//TODO: A collection esetén is vizsgálni kéne hogy UseStringPropertyInterning - J.
if (metadata.ElementNeedsScan &&
!((isStringCollectionElementType = ReferenceEquals(metadata.CollectionElementType, StringType)) && !context.UseStringInterning))
{
@ -120,6 +121,8 @@ public static partial class AcBinarySerializer
if (prop.AccessorType == PropertyAccessorType.String)
{
if (!prop.UseStringPropertyInterning(context.Options.UseStringInterning)) continue;
// Fast path: typed getter for string
var str2 = prop.GetString(value);
if (str2 != null && context.IsValidForInterningString(str2.Length))

View File

@ -842,42 +842,33 @@ public static partial class AcBinarySerializer
// String interning: only for strings within length range
// MaxStringInternLength == 0 means no max limit
if (context.UseStringInterning
&& value.Length >= context.MinStringInternLength
&& (context.MaxStringInternLength == 0 || value.Length <= context.MaxStringInternLength))
//TODO: A prop.UseStringPropertyInterning-et kéne használni! - J.
if (context.UseStringInterning && context.IsValidForInterningString(value.Length))
{
ref var interEntry = ref context.GetInternedStringEntry(value, out bool found);
if (found)
if (found && interEntry.CacheIndex >= 0)
{
// String was seen in scan pass
if (interEntry.CacheIndex >= 0)
if (interEntry.IsFirstWrite)
{
if (interEntry.IsFirstWrite)
{
// 1st serialize occurrence of a cached string - write StringInternFirst + cacheIndex + data
interEntry.IsFirstWrite = false;
context.WriteByte(BinaryTypeCode.StringInternFirst);
context.WriteVarUInt((uint)interEntry.CacheIndex);
context.WriteStringUtf8(value);
}
else
{
// 2+ serialize occurrence: write index reference
context.WriteByte(BinaryTypeCode.StringInterned);
context.WriteVarUInt((uint)interEntry.CacheIndex);
}
return;
// 1st serialize occurrence of a cached string - write StringInternFirst + cacheIndex + data
interEntry.IsFirstWrite = false;
context.WriteByte(BinaryTypeCode.StringInternFirst);
context.WriteVarUInt((uint)interEntry.CacheIndex);
context.WriteStringUtf8(value);
}
// CacheIndex < 0 means string appeared only once in scan - write as plain string
else
{
// 2+ serialize occurrence: write index reference
context.WriteByte(BinaryTypeCode.StringInterned);
context.WriteVarUInt((uint)interEntry.CacheIndex);
}
return;
}
// CacheIndex < 0 or not found → single occurrence, fall through to FixStr/String path
#if DEBUG
context.OnStringInterned?.Invoke(context.CurrentPropertyPath, value);
#endif
// String not cached (single occurrence or not found) - write plain String
context.WriteByte(BinaryTypeCode.String);
context.WriteStringUtf8(value);
return;
}
// Fast path for short strings: check length first (cheap), then ASCII

View File

@ -26,13 +26,18 @@ public abstract class BinaryPropertyAccessorBase : PropertyAccessorBase
public int ComplexPropertyIndex { get; internal set; } = -1;
/// <summary>
/// Cached string intern attribute value for this property.
/// null = no attribute (use global StringInterningMode setting)
/// true = [AcStringIntern(true)] - always intern
/// false = [AcStringIntern(false)] - never intern
/// Cached [AcStringIntern] attribute value for this property.
/// null = no attribute (follow global StringInterningMode)
/// true = [AcStringIntern(true)] — force intern
/// false = [AcStringIntern(false)] — force skip
/// </summary>
public bool? IsStringInternProperty { get; }
private readonly byte _interningFlags;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public bool UseStringPropertyInterning(StringInterningMode stringInterningMode)
{
return (_interningFlags & (1 << (int)stringInterningMode)) != 0;
}
/// <summary>
/// Object getter for property filter context.
/// </summary>
@ -51,10 +56,18 @@ public abstract class BinaryPropertyAccessorBase : PropertyAccessorBase
: base(prop, declaringType)
{
// All typed getters are initialized in PropertyAccessorBase
if (AccessorType == PropertyAccessorType.String)
{
// Cache [AcStringIntern] attribute (inherit: true to check base class properties)
var internAttr = prop.GetCustomAttribute<AcStringInternAttribute>(inherit: true);
var stringInternAttributeValue = internAttr?.Enabled;
// Cache string intern attribute (inherit: true to check base class properties)
var attr = prop.GetCustomAttribute<AcStringInternAttribute>(inherit: true);
IsStringInternProperty = attr?.Enabled;
byte flags = 0;
if (stringInternAttributeValue == true) flags |= (1 << (int)StringInterningMode.Attribute);
if (stringInternAttributeValue != false) flags |= (1 << (int)StringInterningMode.All);
_interningFlags = flags;
}
ExpectedTypeCode = ComputeExpectedTypeCode(AccessorType);
}

View File

@ -1,6 +1,7 @@
using System.Reflection;
using System.Runtime.CompilerServices;
using System.Text;
using AyCode.Core.Serializers.Binaries;
using static AyCode.Core.Helpers.JsonUtilities;
namespace AyCode.Core.Serializers;
@ -85,7 +86,7 @@ public abstract class PropertyMetadataBase
/// The accessor type for fast typed getter/setter dispatch.
/// </summary>
public PropertyAccessorType AccessorType { get; }
/// <summary>
/// Compiled getter delegate for reading property values (boxed).
/// Used by serialize (for reading values) and deserialize (for Populate/Merge to get existing references).
@ -98,19 +99,20 @@ public abstract class PropertyMetadataBase
NameUtf8 = Encoding.UTF8.GetBytes(prop.Name);
DeclaringType = declaringType;
PropertyType = prop.PropertyType;
var underlying = Nullable.GetUnderlyingType(PropertyType);
IsNullable = underlying != null;
UnderlyingType = underlying ?? PropertyType;
PropertyTypeCode = Type.GetTypeCode(UnderlyingType);
// Pre-compute: is this a complex type that needs recursive handling?
IsComplexType = !IsPrimitiveOrStringFast(PropertyType);
PropertyNameHash = FnvHash.ComputeString(Name);
AccessorType = DetermineAccessorType(PropertyType);
_dynamicGetter = AcSerializerCommon.CreateCompiledGetter(declaringType, prop);
_dynamicGetter = AcSerializerCommon.CreateCompiledGetter(declaringType, prop);
}
/// <summary>

View File

@ -58,6 +58,20 @@ public sealed class TypeMetadataWrapper<TMetadata> where TMetadata : TypeMetadat
/// </summary>
internal TypeMetadataWrapper<TMetadata>?[]? PropertyTypeWrappers;
/// <summary>
/// Options-filtered subset of metadata.ReferenceProperties for the scan pass.
/// Built lazily on first scan pass call, stable during session, cleared in ResetTracking.
/// Filters applied at build time (0 runtime checks in scan loop):
/// - StringInterningMode: None → strings excluded; Attribute → only [AcStringIntern(true)];
/// All → [AcStringIntern(false)] excluded
/// - IsComplexType: always included
///
/// TODO: PropertyFilter support — when PropertyFilter is set, pre-filter with
/// IsMetadataPhase=true (instance=null). Properties returning false are excluded.
/// Instance-dependent filtering remains in write pass only.
/// </summary>
internal BinaryPropertyAccessorBase[]? FilteredReferenceProperties;
#region Typed IdentityMaps - No generic type checks in hot path!
/// <summary>
@ -169,6 +183,9 @@ public sealed class TypeMetadataWrapper<TMetadata> where TMetadata : TypeMetadat
MetadataFooterIndex = -1;
CacheMap = null;
// Options may change between sessions (pool reuse) → rebuild on next scan
FilteredReferenceProperties = null;
if (SmallIdBitmap != null)
Array.Clear(SmallIdBitmap);