AyCode.Core/AyCode.Core.Tests/TestModels/BenchmarkTestDataProvider.cs

511 lines
22 KiB
C#

using AyCode.Core.Serializers.Binaries;
using System.Collections;
using System.Reflection;
using System.Runtime.CompilerServices;
namespace AyCode.Core.Tests.TestModels;
/// <summary>
/// Charset suffix presets for the per-property string augmentation in
/// <c>BenchmarkStringSupport.ToLongString</c>. The benchmark applies the configured suffix to every
/// short (≤ <c>FixStrMaxLength</c>) string property across the test data graph (via reflection in
/// <c>BenchmarkStringSupport.EnsureAllStringsBypassFixStr</c>), producing long-string benchmark payloads
/// with a controlled UTF-8 content profile.
///
/// Switch by assigning to <see cref="BenchmarkTestDataProvider.LongStringSuffix"/> from the interactive
/// Settings → Charset submenu (or programmatically). The active charset is recorded in the .LLM
/// markdown output header so per-charset bench files are self-documenting.
/// </summary>
public static class CharsetSuffixes
{
/// <summary>Empty suffix — short Hungarian baseline strings (e.g. "SharedTag") stay short, hitting
/// the FixStr fast-path. Stress-test for FixStr / short-string code paths. Note: the baseline
/// property values remain Hungarian; only the suffix is empty. Despite the "FixAscii" name, this
/// option does NOT change baseline values to ASCII — it suppresses the suffix that would otherwise
/// push every property past the FixStr boundary.</summary>
public const string Latin1FixAscii = "";
/// <summary>Short Latin1 mixed (Hungarian, ~24 char) — typical European i18n payload, short
/// multi-byte runs. Below the 32-char FixStr boundary on the suffix alone, but combined with
/// baseline values pushes every property past it.</summary>
public const string Latin1Short = " árvíztűrő tükörfúrógép";
/// <summary>Long Latin1 mixed (~47 char) — exceeds the 32-char FixStr boundary on the suffix alone,
/// exercising the StringSmall+ tier path with Latin1 mixed content (Hungarian accented letters).</summary>
public const string Latin1Long = " árvíztűrő tükörfúrógép a magyar betűzés tesztje";
/// <summary>CJK BMP (Chinese / Japanese / Korean Basic Multilingual Plane) — long homogeneous
/// 3-byte UTF-8 runs. Primary win region for V4N2 Phase 3 SIMD multi-byte transcoder work.</summary>
public const string CjkBmp = " 你好世界 こんにちは 안녕하세요";
/// <summary>Cyrillic (Russian / Ukrainian / etc.) — long homogeneous 2-byte runs, different shape
/// than Hungarian mixed (where 2-byte chars are short interspersed runs).</summary>
public const string Cyrillic = " Привет мир дорогой друг";
/// <summary>Mixed full-spectrum (Hungarian + CJK + Cyrillic + emoji surrogate pairs) — multi-tier
/// coverage in one payload. Stresses surrogate-pair handling in the UTF-8 transcoder.</summary>
public const string Mixed = " árvíz 你好 Привет 😀";
}
// ============================================================================================
// Cross-family shared state. The charset suffix is a global benchmark configuration — settable
// once via the interactive Menu, applied uniformly to every family's data construction. Lives in
// a non-generic helper so it ISN'T per-closed-generic (which would cause the Menu setter to affect
// only one family). The <see cref="BenchmarkTestDataProvider.LongStringSuffix"/> forwarding
// property preserves the existing Menu.cs API surface.
// ============================================================================================
internal static class BenchmarkStringSupport
{
internal const int FixStrMaxLength = 31;
internal static string LongStringSuffix = CharsetSuffixes.Latin1Long;
private sealed class ReferenceComparer : IEqualityComparer<object>
{
public static readonly ReferenceComparer Instance = new();
public new bool Equals(object? x, object? y) => ReferenceEquals(x, y);
public int GetHashCode(object obj) => RuntimeHelpers.GetHashCode(obj);
}
internal static void EnsureAllStringsBypassFixStr(object? root)
{
if (root == null) return;
var visited = new HashSet<object>(ReferenceComparer.Instance);
var stack = new Stack<object>();
stack.Push(root);
while (stack.Count > 0)
{
var current = stack.Pop();
if (!visited.Add(current)) continue;
if (current is IEnumerable enumerable && current is not string)
{
foreach (var item in enumerable)
{
if (item != null)
stack.Push(item);
}
continue;
}
var type = current.GetType();
foreach (var property in type.GetProperties(BindingFlags.Instance | BindingFlags.Public))
{
if (!property.CanRead) continue;
if (property.PropertyType == typeof(string))
{
if (!property.CanWrite) continue;
var value = (string?)property.GetValue(current);
property.SetValue(current, ToLongString(value));
continue;
}
if (property.PropertyType.IsValueType || property.PropertyType.IsEnum)
continue;
var child = property.GetValue(current);
if (child != null)
stack.Push(child);
}
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static string ToLongString(string? value)
{
if (string.IsNullOrEmpty(value))
return "Benchmark_String_Value" + LongStringSuffix;
if (value.Length > FixStrMaxLength)
return value;
return value + LongStringSuffix;
}
}
// ============================================================================================
// Generic test-data provider. One closing-generic alias per family — see
// <see cref="BenchmarkTestDataProvider"/> (the <c>_All_True</c> family, MSTEST-compatible name) and
// <see cref="BenchmarkTestDataProvider_All_False"/> (the <c>_All_False</c> family, Phase 1 benchmark
// target). The five cell-creator methods + ClearDeepLevelRefs are written once on the generic base,
// using the constrained <c>TestDataFactory&lt;TOrder, ...&gt;</c> for per-family element creation.
// ============================================================================================
public abstract class BenchmarkTestDataProvider<TOrder, TItem, TPallet, TMeasurement, TPoint, TTag, TUser, TCategory, TMetadata, TPreferences>
where TOrder : TestOrderBase<TItem, TTag, TUser, TCategory, TMetadata, TPreferences>, new()
where TItem : TestOrderItemBase<TPallet, TTag, TUser, TMetadata, TOrder, TPreferences>, new()
where TPallet : TestPalletBase<TMeasurement, TTag, TUser, TCategory, TMetadata, TItem, TPreferences>, new()
where TMeasurement : TestMeasurementBase<TPoint, TTag, TUser, TPallet, TPreferences>, new()
where TPoint : TestMeasurementPointBase<TTag, TUser, TMeasurement, TPreferences>, new()
where TTag : SharedTagBase, new()
where TUser : SharedUserBase<TPreferences>, new()
where TCategory : SharedCategoryBase, new()
where TMetadata : MetadataInfoBase<TMetadata>, new()
where TPreferences : UserPreferencesBase, new()
{
/// <summary>
/// Active long-string suffix appended to short string properties during benchmark data construction.
/// Forwards to <see cref="BenchmarkStringSupport.LongStringSuffix"/> (a non-generic shared field) so
/// the setter is family-agnostic — both <c>BenchmarkTestDataProvider.LongStringSuffix = …</c> and
/// <c>BenchmarkTestDataProvider_All_False.LongStringSuffix = …</c> route to the same backing value.
/// Without this forwarding, a per-closed-generic static field on the base would store the suffix
/// independently per family — the Menu setter would only affect whichever alias it addressed.
/// </summary>
public static string LongStringSuffix
{
get => BenchmarkStringSupport.LongStringSuffix;
set => BenchmarkStringSupport.LongStringSuffix = value;
}
// Shortcut alias for the matching factory closing-generic. Saves typing the 10-param cluster
// on every Create* call inside this class.
private static class Factory
{
public static void ResetIdCounter() =>
TestDataFactory<TOrder, TItem, TPallet, TMeasurement, TPoint, TTag, TUser, TCategory, TMetadata, TPreferences>.ResetIdCounter();
public static TTag CreateTag(string? name = null) =>
TestDataFactory<TOrder, TItem, TPallet, TMeasurement, TPoint, TTag, TUser, TCategory, TMetadata, TPreferences>.CreateTag(name);
public static TUser CreateUser(string? username = null) =>
TestDataFactory<TOrder, TItem, TPallet, TMeasurement, TPoint, TTag, TUser, TCategory, TMetadata, TPreferences>.CreateUser(username);
public static TCategory CreateCategory(string? name = null) =>
TestDataFactory<TOrder, TItem, TPallet, TMeasurement, TPoint, TTag, TUser, TCategory, TMetadata, TPreferences>.CreateCategory(name);
public static TMetadata CreateMetadata(string? key = null, bool withChild = false) =>
TestDataFactory<TOrder, TItem, TPallet, TMeasurement, TPoint, TTag, TUser, TCategory, TMetadata, TPreferences>.CreateMetadata(key, withChild);
public static TOrder CreateOrder(
int itemCount, int palletsPerItem, int measurementsPerPallet, int pointsPerMeasurement,
TTag? sharedTag = null, TUser? sharedUser = null, TMetadata? sharedMetadata = null,
TPreferences? sharedPreferences = null, TCategory? sharedCategory = null) =>
TestDataFactory<TOrder, TItem, TPallet, TMeasurement, TPoint, TTag, TUser, TCategory, TMetadata, TPreferences>.CreateOrder(
itemCount, palletsPerItem, measurementsPerPallet, pointsPerMeasurement,
sharedTag, sharedUser, sharedMetadata, sharedPreferences, sharedCategory);
}
public static List<TestDataSet> CreateTestDataSets(bool resetId = true)
{
return new List<TestDataSet>
{
CreateSmallTestData(resetId),
CreateMediumTestData(resetId),
CreateLargeTestData(resetId),
CreateRepeatedStringsTestData(resetId),
CreateDeepNestedTestData(resetId)
};
}
private static TestDataSet<TOrder> CreateSmallTestData(bool resetId = true)
{
if (resetId) Factory.ResetIdCounter();
var sharedTag = Factory.CreateTag("SharedTag");
var sharedUser = Factory.CreateUser("shareduser");
var order = Factory.CreateOrder(
itemCount: 2,
palletsPerItem: 2,
measurementsPerPallet: 2,
pointsPerMeasurement: 2,
sharedTag: sharedTag,
sharedUser: sharedUser);
BenchmarkStringSupport.EnsureAllStringsBypassFixStr(order);
ClearDeepLevelRefs(order);
return new TestDataSet<TOrder>("Small (2x2x2x2)", order, iidRefPercent: 20);
}
private static TestDataSet<TOrder> CreateMediumTestData(bool resetId = true)
{
if (resetId) Factory.ResetIdCounter();
var sharedTag = Factory.CreateTag("SharedTag");
var sharedUser = Factory.CreateUser("shareduser");
var sharedMeta = Factory.CreateMetadata("shared", withChild: true);
var sharedPreferences = new TPreferences
{
Theme = "dark",
Language = "hungarian",
NotificationsEnabled = true,
EmailDigestFrequency = "weekly"
};
sharedUser.Preferences = sharedPreferences;
var order = Factory.CreateOrder(
itemCount: 3,
palletsPerItem: 3,
measurementsPerPallet: 3,
pointsPerMeasurement: 4,
sharedTag: sharedTag,
sharedUser: sharedUser,
sharedMetadata: sharedMeta,
sharedPreferences: sharedPreferences);
BenchmarkStringSupport.EnsureAllStringsBypassFixStr(order);
ClearDeepLevelRefs(order);
return new TestDataSet<TOrder>("Medium (3x3x3x4)", order, iidRefPercent: 20);
}
private static TestDataSet<TOrder> CreateLargeTestData(bool resetId = true)
{
if (resetId) Factory.ResetIdCounter();
var sharedTag = Factory.CreateTag("SharedTag");
var sharedUser = Factory.CreateUser("shareduser");
var sharedPreferences = new TPreferences
{
Theme = "light",
Language = "german",
NotificationsEnabled = false,
EmailDigestFrequency = "daily"
};
sharedUser.Preferences = sharedPreferences;
var order = Factory.CreateOrder(
itemCount: 5,
palletsPerItem: 5,
measurementsPerPallet: 5,
pointsPerMeasurement: 10,
sharedTag: sharedTag,
sharedUser: sharedUser,
sharedPreferences: sharedPreferences);
BenchmarkStringSupport.EnsureAllStringsBypassFixStr(order);
ClearDeepLevelRefs(order);
return new TestDataSet<TOrder>("Large (5x5x5x10)", order, iidRefPercent: 20);
}
private static TestDataSet<TOrder> CreateRepeatedStringsTestData(bool resetId = true)
{
if (resetId) Factory.ResetIdCounter();
var sharedTag = Factory.CreateTag("RepeatedTag");
var sharedUser = Factory.CreateUser("repeateduser");
var sharedPreferences = new TPreferences
{
Theme = "dark",
Language = "hungarian",
NotificationsEnabled = true,
EmailDigestFrequency = "weekly"
};
sharedUser.Preferences = sharedPreferences;
var order = Factory.CreateOrder(
itemCount: 10,
palletsPerItem: 2,
measurementsPerPallet: 2,
pointsPerMeasurement: 2,
sharedTag: sharedTag,
sharedUser: sharedUser,
sharedPreferences: sharedPreferences);
// Repeated string fields — ProductName on items + PalletCode on pallets. Both are common
// across the hierarchy, exercising string-interning deduplication on the Default preset
// (which has UseStringInterning = All). Targeting ~20% repeated-string share overall.
// Baselines are short ASCII (≤ FixStrMaxLength) so EnsureAllStringsBypassFixStr appends the
// active CharsetSuffix — the resulting payload's UTF-8 content profile is governed entirely
// by the selected charset (not contaminated by hard-coded Hungarian baseline values).
foreach (var item in order.Items)
{
item.Status = TestStatus.Processing;
item.ProductName = "ProductName";
foreach (var pallet in item.Pallets)
{
pallet.PalletCode = "PalletCode";
}
}
BenchmarkStringSupport.EnsureAllStringsBypassFixStr(order);
ClearDeepLevelRefs(order);
return new TestDataSet<TOrder>("Repeated Strings (10 items)", order, iidRefPercent: 20);
}
private static TestDataSet<TOrder> CreateDeepNestedTestData(bool resetId = true)
{
if (resetId) Factory.ResetIdCounter();
var sharedTag = Factory.CreateTag("DeepTag");
var sharedUser = Factory.CreateUser("deepuser");
var sharedCategory = Factory.CreateCategory("DeepCategory");
var sharedPreferences = new TPreferences
{
Theme = "light",
Language = "french",
NotificationsEnabled = false,
EmailDigestFrequency = "monthly"
};
sharedUser.Preferences = sharedPreferences;
var order = Factory.CreateOrder(
itemCount: 2,
palletsPerItem: 4,
measurementsPerPallet: 4,
pointsPerMeasurement: 8,
sharedTag: sharedTag,
sharedUser: sharedUser,
sharedPreferences: sharedPreferences,
sharedCategory: sharedCategory);
BenchmarkStringSupport.EnsureAllStringsBypassFixStr(order);
ClearDeepLevelRefs(order);
return new TestDataSet<TOrder>("Deep Nested (2x4x4x8)", order, iidRefPercent: 20);
}
private static void ClearDeepLevelRefs(TOrder order)
{
// Keep shared IId refs at the pallet level (Tag + Inspector) — these contribute the bulk of
// the ~20% IId-ref share that the test data targets. Only Category is cleared at this level
// (one-of-three clears keep the share moderate). The deeper measurement / point levels are
// cleared entirely so deep-tree ref noise does not skew the share upward beyond ~20%.
foreach (var item in order.Items)
{
foreach (var pallet in item.Pallets)
{
// pallet.Tag = null; // KEEP for ~20% IId-ref share (was cleared)
// pallet.Inspector = null; // KEEP for ~20% IId-ref share (was cleared)
pallet.Category = null;
foreach (var measurement in pallet.Measurements)
{
measurement.Tag = null;
measurement.Operator = null;
foreach (var point in measurement.Points)
{
point.Tag = null;
point.Verifier = null;
}
}
}
}
}
}
// ============================================================================================
// Closing-generic aliases for the provider. Same pattern as the factory: a bare-name class for
// MSTEST backward compatibility (kept on _All_True), and a _All_False suffix variant for the
// Phase 1 benchmark target. The static <c>LongStringSuffix</c> forwarding property lives on the
// generic base above — accessible identically through either alias (<c>BenchmarkTestDataProvider.LongStringSuffix</c>
// or <c>BenchmarkTestDataProvider_All_False.LongStringSuffix</c>), both routing to the same
// <see cref="BenchmarkStringSupport.LongStringSuffix"/> shared field. Symmetric API surface across
// families — no per-alias asymmetry.
// ============================================================================================
/// <summary>
/// <c>_All_True</c> family provider — preserves the bare-name API surface
/// (<c>BenchmarkTestDataProvider.CreateTestDataSets()</c>) that the SGen-vs-runtime compatibility
/// test depends on. <c>LongStringSuffix</c> is inherited from the generic base.
/// </summary>
public sealed class BenchmarkTestDataProvider : BenchmarkTestDataProvider<
TestOrder_All_True, TestOrderItem_All_True, TestPallet_All_True, TestMeasurement_All_True, TestMeasurementPoint_All_True,
SharedTag_All_True, SharedUser_All_True, SharedCategory_All_True, MetadataInfo_All_True, UserPreferences_All_True>
{
}
/// <summary>
/// <c>_All_False</c> family provider — Phase 1 benchmark target. Inherits the generic cell-creator
/// methods unchanged; the closed-generic <c>new TOrder()</c> calls inside the cell methods construct
/// <c>TestOrder_All_False</c> graphs.
/// </summary>
public sealed class BenchmarkTestDataProvider_All_False : BenchmarkTestDataProvider<
TestOrder_All_False, TestOrderItem_All_False, TestPallet_All_False, TestMeasurement_All_False, TestMeasurementPoint_All_False,
SharedTag_All_False, SharedUser_All_False, SharedCategory_All_False, MetadataInfo_All_False, UserPreferences_All_False>
{
}
// ============================================================================================
// TestDataSet — abstract metadata base + generic-ordered concrete. Orchestration code iterates
// over the base type (Name/DisplayName/TypeName/IIdRefPercent only); concrete consumers
// (CreateSerializers, Output binary-output dump) downcast to TestDataSet<TOrder> to access the
// typed Order.
// ============================================================================================
public abstract class TestDataSet
{
public string Name { get; }
/// <summary>
/// Percentage of IId shared references in the data (0-100).
/// Higher values mean more deduplication benefit for Default mode.
/// </summary>
public int IIdRefPercent { get; }
// Type-keyed variant registry. Phase 2 multi-variant dispatch: AcBinary's options preset
// decides which variant graph it serializes (FastMode → _All_False, Default → _All_True),
// while MemPack/MsgPack canonically use one (typically _All_True). The cells build all
// known variants upfront and register them here so CreateSerializers can hand each benchmark
// its matching graph instance.
private readonly Dictionary<Type, object> _variants = new();
protected TestDataSet(string name, int iidRefPercent)
{
Name = name;
IIdRefPercent = iidRefPercent;
}
public abstract string TypeName { get; }
/// <summary>
/// Gets display name including IId ref percentage if set.
/// </summary>
public string DisplayName => IIdRefPercent > 0
? $"{Name} [{IIdRefPercent}% IId refs]"
: Name;
/// <summary>
/// Register a variant graph for this cell. Called by builders. Idempotent on the same type
/// (last-write-wins, no error) so an alias's primary registration is harmless even if
/// cross-registration adds the same variant later.
/// </summary>
public void RegisterVariant<T>(T variant) where T : class => _variants[typeof(T)] = variant;
/// <summary>
/// Get a registered variant by type. Throws <see cref="InvalidOperationException"/> if not
/// registered — fail-fast surfaces a mismatch between the variant a benchmark expects and
/// what the cell-builder populated.
/// </summary>
public T GetOrder<T>() where T : class
{
if (_variants.TryGetValue(typeof(T), out var v)) return (T)v;
throw new InvalidOperationException($"Variant '{typeof(T).Name}' not registered for cell '{Name}' (registered: {string.Join(", ", _variants.Keys.Select(k => k.Name))})");
}
/// <summary>
/// Check whether a variant is registered. Use to gate optional benchmarks that may not have
/// their variant prepared in every cell.
/// </summary>
public bool HasOrder<T>() where T : class => _variants.ContainsKey(typeof(T));
}
public sealed class TestDataSet<TOrder> : TestDataSet
where TOrder : class
{
public TOrder Order { get; }
public TestDataSet(string name, TOrder order, int iidRefPercent = 0)
: base(name, iidRefPercent)
{
Order = order;
RegisterVariant(order); // primary registers itself
}
public override string TypeName => Order.GetType().Name;
}