AyCode.Core/AyCode.Core.Tests/TestModels/BenchmarkTestDataProvider.cs

548 lines
26 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

using AyCode.Core.Serializers.Binaries;
using System.Collections;
using System.Reflection;
using System.Runtime.CompilerServices;
namespace AyCode.Core.Tests.TestModels;
/// <summary>
/// Charset suffix presets for the per-property string augmentation in
/// <c>BenchmarkStringSupport.ToLongString</c>. The benchmark applies the configured suffix to every
/// short (≤ <c>FixStrMaxLength</c>) string property across the test data graph (via reflection in
/// <c>BenchmarkStringSupport.EnsureAllStringsBypassFixStr</c>), producing long-string benchmark payloads
/// with a controlled UTF-8 content profile.
///
/// Switch by assigning to <see cref="BenchmarkTestDataProvider.LongStringSuffix"/> from the interactive
/// Settings → Charset submenu (or programmatically). The active charset is recorded in the .LLM
/// markdown output header so per-charset bench files are self-documenting.
/// </summary>
public static class CharsetSuffixes
{
// ─────────────────────────────────────────────────────────────────────────
// Consistent length across all charsets (UTF-16 char count, NOT UTF-8 byte count):
// *Short = 40 char (5-char base × 8 repetitions) → StringSmall / StringAscii tier
// *Long = 280 char (Short × 7) → StringMedium / StringAscii tier
//
// Same length across charsets isolates the workload variable to UTF-8 byte content
// (1-byte ASCII vs 2-byte Latin1 / Cyrillic vs 3-byte CJK vs mixed) — wire-size and
// encode/decode cost differences are pure charset effects, not length effects.
//
// Const-concat for compile-time evaluation (usable as attribute / DataRow source).
// ─────────────────────────────────────────────────────────────────────────
/// <summary>Empty suffix — baseline string property values stay short, hitting the
/// <c>FixStrAscii</c> / short-string fast-path. Stress-test for short-string code paths.</summary>
public const string Latin1FixAscii = "";
// ── Pure ASCII (every byte < 0x80) ──
// Tier: StringAscii (167) — byte→char SIMD widening, zero UTF-8 decode.
// UTF-8 byte count: 40 byte (Short), 280 byte (Long) — 1:1 char:byte.
private const string AsciiBase = " quic"; // 5 char ASCII
public const string AsciiShort = AsciiBase + AsciiBase + AsciiBase + AsciiBase
+ AsciiBase + AsciiBase + AsciiBase + AsciiBase; // 40 char
public const string AsciiLong = AsciiShort + AsciiShort + AsciiShort + AsciiShort
+ AsciiShort + AsciiShort + AsciiShort; // 280 char
// ── Latin1 (Hungarian proxy — ISO-8859-1 + Latin-2 ő/ű) ──
// Tier: StringSmall (91) Short / StringMedium (94) Long.
// UTF-8 byte count: ~72 byte Short (5 char base = 9 byte UTF-8: space+á+r+v+í), ~504 byte Long.
private const string Latin1Base = " árví"; // 5 char (space + á + r + v + í) — multi-byte mix
public const string Latin1Short = Latin1Base + Latin1Base + Latin1Base + Latin1Base
+ Latin1Base + Latin1Base + Latin1Base + Latin1Base; // 40 char
public const string Latin1Long = Latin1Short + Latin1Short + Latin1Short + Latin1Short
+ Latin1Short + Latin1Short + Latin1Short; // 280 char
// ── CJK BMP (Chinese / Japanese / Korean Basic Multilingual Plane) ──
// Tier: StringSmall (91) Short / StringMedium (94) Long.
// UTF-8 byte count: ~104 byte Short (5 char base = 13 byte UTF-8: 1 ASCII space + 4×3-byte CJK),
// ~728 byte Long. Homogeneous 3-byte runs — primary win region for SIMD multi-byte transcoder.
private const string CjkBmpBase = " 你好世界"; // 5 char (space + 4 Chinese)
public const string CjkBmpShort = CjkBmpBase + CjkBmpBase + CjkBmpBase + CjkBmpBase
+ CjkBmpBase + CjkBmpBase + CjkBmpBase + CjkBmpBase; // 40 char
public const string CjkBmpLong = CjkBmpShort + CjkBmpShort + CjkBmpShort + CjkBmpShort
+ CjkBmpShort + CjkBmpShort + CjkBmpShort; // 280 char
// ── Cyrillic (Russian / Ukrainian) ──
// Tier: StringSmall (91) Short / StringMedium (94) Long.
// UTF-8 byte count: ~72 byte Short (5 char base = 9 byte UTF-8: 1 ASCII + 4×2-byte Cyrillic),
// ~504 byte Long. Homogeneous 2-byte runs — different shape than Latin1 interspersed.
private const string CyrillicBase = " Прив"; // 5 char (space + 4 Cyrillic)
public const string CyrillicShort = CyrillicBase + CyrillicBase + CyrillicBase + CyrillicBase
+ CyrillicBase + CyrillicBase + CyrillicBase + CyrillicBase; // 40 char
public const string CyrillicLong = CyrillicShort + CyrillicShort + CyrillicShort + CyrillicShort
+ CyrillicShort + CyrillicShort + CyrillicShort; // 280 char
// ── Mixed (multi-codepage in one payload) ──
// Tier: StringSmall (91) Short / StringMedium (94) Long.
// UTF-8 byte count: ~88 byte Short (5 char base = 11 byte UTF-8: 1 ASCII + 1×2-byte Hungarian
// + 1×3-byte CJK + 2×2-byte Cyrillic), ~616 byte Long. No surrogate pairs (keeps UTF-16
// length predictable); cross-tier transcoder coverage in one payload.
private const string MixedBase = " á你Пй"; // 5 char (space + Hungarian + Chinese + 2× Cyrillic)
public const string MixedShort = MixedBase + MixedBase + MixedBase + MixedBase
+ MixedBase + MixedBase + MixedBase + MixedBase; // 40 char
public const string MixedLong = MixedShort + MixedShort + MixedShort + MixedShort
+ MixedShort + MixedShort + MixedShort; // 280 char
}
// ============================================================================================
// Cross-family shared state. The charset suffix is a global benchmark configuration — settable
// once via the interactive Menu, applied uniformly to every family's data construction. Lives in
// a non-generic helper so it ISN'T per-closed-generic (which would cause the Menu setter to affect
// only one family). The <see cref="BenchmarkTestDataProvider.LongStringSuffix"/> forwarding
// property preserves the existing Menu.cs API surface.
// ============================================================================================
internal static class BenchmarkStringSupport
{
internal const int FixStrMaxLength = 31;
internal static string LongStringSuffix = CharsetSuffixes.Latin1Long;
private sealed class ReferenceComparer : IEqualityComparer<object>
{
public static readonly ReferenceComparer Instance = new();
public new bool Equals(object? x, object? y) => ReferenceEquals(x, y);
public int GetHashCode(object obj) => RuntimeHelpers.GetHashCode(obj);
}
internal static void EnsureAllStringsBypassFixStr(object? root)
{
if (root == null) return;
var visited = new HashSet<object>(ReferenceComparer.Instance);
var stack = new Stack<object>();
stack.Push(root);
while (stack.Count > 0)
{
var current = stack.Pop();
if (!visited.Add(current)) continue;
if (current is IEnumerable enumerable && current is not string)
{
foreach (var item in enumerable)
{
if (item != null)
stack.Push(item);
}
continue;
}
var type = current.GetType();
foreach (var property in type.GetProperties(BindingFlags.Instance | BindingFlags.Public))
{
if (!property.CanRead) continue;
if (property.PropertyType == typeof(string))
{
if (!property.CanWrite) continue;
var value = (string?)property.GetValue(current);
property.SetValue(current, ToLongString(value));
continue;
}
if (property.PropertyType.IsValueType || property.PropertyType.IsEnum)
continue;
var child = property.GetValue(current);
if (child != null)
stack.Push(child);
}
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static string ToLongString(string? value)
{
if (string.IsNullOrEmpty(value))
return "Benchmark_String_Value" + LongStringSuffix;
if (value.Length > FixStrMaxLength)
return value;
return value + LongStringSuffix;
}
}
// ============================================================================================
// Generic test-data provider. One closing-generic alias per family — see
// <see cref="BenchmarkTestDataProvider"/> (the <c>_All_True</c> family, MSTEST-compatible name) and
// <see cref="BenchmarkTestDataProvider_All_False"/> (the <c>_All_False</c> family, Phase 1 benchmark
// target). The five cell-creator methods + ClearDeepLevelRefs are written once on the generic base,
// using the constrained <c>TestDataFactory&lt;TOrder, ...&gt;</c> for per-family element creation.
// ============================================================================================
public abstract class BenchmarkTestDataProvider<TOrder, TItem, TPallet, TMeasurement, TPoint, TTag, TUser, TCategory, TMetadata, TPreferences>
where TOrder : TestOrderBase<TItem, TTag, TUser, TCategory, TMetadata, TPreferences>, new()
where TItem : TestOrderItemBase<TPallet, TTag, TUser, TMetadata, TOrder, TPreferences>, new()
where TPallet : TestPalletBase<TMeasurement, TTag, TUser, TCategory, TMetadata, TItem, TPreferences>, new()
where TMeasurement : TestMeasurementBase<TPoint, TTag, TUser, TPallet, TPreferences>, new()
where TPoint : TestMeasurementPointBase<TTag, TUser, TMeasurement, TPreferences>, new()
where TTag : SharedTagBase, new()
where TUser : SharedUserBase<TPreferences>, new()
where TCategory : SharedCategoryBase, new()
where TMetadata : MetadataInfoBase<TMetadata>, new()
where TPreferences : UserPreferencesBase, new()
{
/// <summary>
/// Active long-string suffix appended to short string properties during benchmark data construction.
/// Forwards to <see cref="BenchmarkStringSupport.LongStringSuffix"/> (a non-generic shared field) so
/// the setter is family-agnostic — both <c>BenchmarkTestDataProvider.LongStringSuffix = …</c> and
/// <c>BenchmarkTestDataProvider_All_False.LongStringSuffix = …</c> route to the same backing value.
/// Without this forwarding, a per-closed-generic static field on the base would store the suffix
/// independently per family — the Menu setter would only affect whichever alias it addressed.
/// </summary>
public static string LongStringSuffix
{
get => BenchmarkStringSupport.LongStringSuffix;
set => BenchmarkStringSupport.LongStringSuffix = value;
}
// Shortcut alias for the matching factory closing-generic. Saves typing the 10-param cluster
// on every Create* call inside this class.
private static class Factory
{
public static void ResetIdCounter() =>
TestDataFactory<TOrder, TItem, TPallet, TMeasurement, TPoint, TTag, TUser, TCategory, TMetadata, TPreferences>.ResetIdCounter();
public static TTag CreateTag(string? name = null) =>
TestDataFactory<TOrder, TItem, TPallet, TMeasurement, TPoint, TTag, TUser, TCategory, TMetadata, TPreferences>.CreateTag(name);
public static TUser CreateUser(string? username = null) =>
TestDataFactory<TOrder, TItem, TPallet, TMeasurement, TPoint, TTag, TUser, TCategory, TMetadata, TPreferences>.CreateUser(username);
public static TCategory CreateCategory(string? name = null) =>
TestDataFactory<TOrder, TItem, TPallet, TMeasurement, TPoint, TTag, TUser, TCategory, TMetadata, TPreferences>.CreateCategory(name);
public static TMetadata CreateMetadata(string? key = null, bool withChild = false) =>
TestDataFactory<TOrder, TItem, TPallet, TMeasurement, TPoint, TTag, TUser, TCategory, TMetadata, TPreferences>.CreateMetadata(key, withChild);
public static TOrder CreateOrder(
int itemCount, int palletsPerItem, int measurementsPerPallet, int pointsPerMeasurement,
TTag? sharedTag = null, TUser? sharedUser = null, TMetadata? sharedMetadata = null,
TPreferences? sharedPreferences = null, TCategory? sharedCategory = null) =>
TestDataFactory<TOrder, TItem, TPallet, TMeasurement, TPoint, TTag, TUser, TCategory, TMetadata, TPreferences>.CreateOrder(
itemCount, palletsPerItem, measurementsPerPallet, pointsPerMeasurement,
sharedTag, sharedUser, sharedMetadata, sharedPreferences, sharedCategory);
}
public static List<TestDataSet> CreateTestDataSets(bool resetId = true)
{
return new List<TestDataSet>
{
CreateSmallTestData(resetId),
CreateMediumTestData(resetId),
CreateLargeTestData(resetId),
CreateRepeatedStringsTestData(resetId),
CreateDeepNestedTestData(resetId)
};
}
private static TestDataSet<TOrder> CreateSmallTestData(bool resetId = true)
{
if (resetId) Factory.ResetIdCounter();
var sharedTag = Factory.CreateTag("SharedTag");
var sharedUser = Factory.CreateUser("shareduser");
var order = Factory.CreateOrder(
itemCount: 2,
palletsPerItem: 2,
measurementsPerPallet: 2,
pointsPerMeasurement: 2,
sharedTag: sharedTag,
sharedUser: sharedUser);
BenchmarkStringSupport.EnsureAllStringsBypassFixStr(order);
ClearDeepLevelRefs(order);
return new TestDataSet<TOrder>("Small (2x2x2x2)", order, iidRefPercent: 20);
}
private static TestDataSet<TOrder> CreateMediumTestData(bool resetId = true)
{
if (resetId) Factory.ResetIdCounter();
var sharedTag = Factory.CreateTag("SharedTag");
var sharedUser = Factory.CreateUser("shareduser");
var sharedMeta = Factory.CreateMetadata("shared", withChild: true);
var sharedPreferences = new TPreferences
{
Theme = "dark",
Language = "hungarian",
NotificationsEnabled = true,
EmailDigestFrequency = "weekly"
};
sharedUser.Preferences = sharedPreferences;
var order = Factory.CreateOrder(
itemCount: 3,
palletsPerItem: 3,
measurementsPerPallet: 3,
pointsPerMeasurement: 4,
sharedTag: sharedTag,
sharedUser: sharedUser,
sharedMetadata: sharedMeta,
sharedPreferences: sharedPreferences);
BenchmarkStringSupport.EnsureAllStringsBypassFixStr(order);
ClearDeepLevelRefs(order);
return new TestDataSet<TOrder>("Medium (3x3x3x4)", order, iidRefPercent: 20);
}
private static TestDataSet<TOrder> CreateLargeTestData(bool resetId = true)
{
if (resetId) Factory.ResetIdCounter();
var sharedTag = Factory.CreateTag("SharedTag");
var sharedUser = Factory.CreateUser("shareduser");
var sharedPreferences = new TPreferences
{
Theme = "light",
Language = "german",
NotificationsEnabled = false,
EmailDigestFrequency = "daily"
};
sharedUser.Preferences = sharedPreferences;
var order = Factory.CreateOrder(
itemCount: 5,
palletsPerItem: 5,
measurementsPerPallet: 5,
pointsPerMeasurement: 10,
sharedTag: sharedTag,
sharedUser: sharedUser,
sharedPreferences: sharedPreferences);
BenchmarkStringSupport.EnsureAllStringsBypassFixStr(order);
ClearDeepLevelRefs(order);
return new TestDataSet<TOrder>("Large (5x5x5x10)", order, iidRefPercent: 20);
}
private static TestDataSet<TOrder> CreateRepeatedStringsTestData(bool resetId = true)
{
if (resetId) Factory.ResetIdCounter();
var sharedTag = Factory.CreateTag("RepeatedTag");
var sharedUser = Factory.CreateUser("repeateduser");
var sharedPreferences = new TPreferences
{
Theme = "dark",
Language = "hungarian",
NotificationsEnabled = true,
EmailDigestFrequency = "weekly"
};
sharedUser.Preferences = sharedPreferences;
var order = Factory.CreateOrder(
itemCount: 10,
palletsPerItem: 2,
measurementsPerPallet: 2,
pointsPerMeasurement: 2,
sharedTag: sharedTag,
sharedUser: sharedUser,
sharedPreferences: sharedPreferences);
// Repeated string fields — ProductName on items + PalletCode on pallets. Both are common
// across the hierarchy, exercising string-interning deduplication on the Default preset
// (which has UseStringInterning = All). Targeting ~20% repeated-string share overall.
// Baselines are short ASCII (≤ FixStrMaxLength) so EnsureAllStringsBypassFixStr appends the
// active CharsetSuffix — the resulting payload's UTF-8 content profile is governed entirely
// by the selected charset (not contaminated by hard-coded Hungarian baseline values).
foreach (var item in order.Items)
{
item.Status = TestStatus.Processing;
item.ProductName = "ProductName";
foreach (var pallet in item.Pallets)
{
pallet.PalletCode = "PalletCode";
}
}
BenchmarkStringSupport.EnsureAllStringsBypassFixStr(order);
ClearDeepLevelRefs(order);
return new TestDataSet<TOrder>("Repeated Strings (10 items)", order, iidRefPercent: 20);
}
private static TestDataSet<TOrder> CreateDeepNestedTestData(bool resetId = true)
{
if (resetId) Factory.ResetIdCounter();
var sharedTag = Factory.CreateTag("DeepTag");
var sharedUser = Factory.CreateUser("deepuser");
var sharedCategory = Factory.CreateCategory("DeepCategory");
var sharedPreferences = new TPreferences
{
Theme = "light",
Language = "french",
NotificationsEnabled = false,
EmailDigestFrequency = "monthly"
};
sharedUser.Preferences = sharedPreferences;
var order = Factory.CreateOrder(
itemCount: 2,
palletsPerItem: 4,
measurementsPerPallet: 4,
pointsPerMeasurement: 8,
sharedTag: sharedTag,
sharedUser: sharedUser,
sharedPreferences: sharedPreferences,
sharedCategory: sharedCategory);
BenchmarkStringSupport.EnsureAllStringsBypassFixStr(order);
ClearDeepLevelRefs(order);
return new TestDataSet<TOrder>("Deep Nested (2x4x4x8)", order, iidRefPercent: 20);
}
private static void ClearDeepLevelRefs(TOrder order)
{
// Keep shared IId refs at the pallet level (Tag + Inspector) — these contribute the bulk of
// the ~20% IId-ref share that the test data targets. Only Category is cleared at this level
// (one-of-three clears keep the share moderate). The deeper measurement / point levels are
// cleared entirely so deep-tree ref noise does not skew the share upward beyond ~20%.
foreach (var item in order.Items)
{
foreach (var pallet in item.Pallets)
{
// pallet.Tag = null; // KEEP for ~20% IId-ref share (was cleared)
// pallet.Inspector = null; // KEEP for ~20% IId-ref share (was cleared)
pallet.Category = null;
foreach (var measurement in pallet.Measurements)
{
measurement.Tag = null;
measurement.Operator = null;
foreach (var point in measurement.Points)
{
point.Tag = null;
point.Verifier = null;
}
}
}
}
}
}
// ============================================================================================
// Closing-generic aliases for the provider. Same pattern as the factory: a bare-name class for
// MSTEST backward compatibility (kept on _All_True), and a _All_False suffix variant for the
// Phase 1 benchmark target. The static <c>LongStringSuffix</c> forwarding property lives on the
// generic base above — accessible identically through either alias (<c>BenchmarkTestDataProvider.LongStringSuffix</c>
// or <c>BenchmarkTestDataProvider_All_False.LongStringSuffix</c>), both routing to the same
// <see cref="BenchmarkStringSupport.LongStringSuffix"/> shared field. Symmetric API surface across
// families — no per-alias asymmetry.
// ============================================================================================
/// <summary>
/// <c>_All_True</c> family provider — preserves the bare-name API surface
/// (<c>BenchmarkTestDataProvider.CreateTestDataSets()</c>) that the SGen-vs-runtime compatibility
/// test depends on. <c>LongStringSuffix</c> is inherited from the generic base.
/// </summary>
public sealed class BenchmarkTestDataProvider : BenchmarkTestDataProvider<
TestOrder_All_True, TestOrderItem_All_True, TestPallet_All_True, TestMeasurement_All_True, TestMeasurementPoint_All_True,
SharedTag_All_True, SharedUser_All_True, SharedCategory_All_True, MetadataInfo_All_True, UserPreferences_All_True>
{
}
/// <summary>
/// <c>_All_False</c> family provider — Phase 1 benchmark target. Inherits the generic cell-creator
/// methods unchanged; the closed-generic <c>new TOrder()</c> calls inside the cell methods construct
/// <c>TestOrder_All_False</c> graphs.
/// </summary>
public sealed class BenchmarkTestDataProvider_All_False : BenchmarkTestDataProvider<
TestOrder_All_False, TestOrderItem_All_False, TestPallet_All_False, TestMeasurement_All_False, TestMeasurementPoint_All_False,
SharedTag_All_False, SharedUser_All_False, SharedCategory_All_False, MetadataInfo_All_False, UserPreferences_All_False>
{
}
// ============================================================================================
// TestDataSet — abstract metadata base + generic-ordered concrete. Orchestration code iterates
// over the base type (Name/DisplayName/TypeName/IIdRefPercent only); concrete consumers
// (CreateSerializers, Output binary-output dump) downcast to TestDataSet<TOrder> to access the
// typed Order.
// ============================================================================================
public abstract class TestDataSet
{
public string Name { get; }
/// <summary>
/// Percentage of IId shared references in the data (0-100).
/// Higher values mean more deduplication benefit for Default mode.
/// </summary>
public int IIdRefPercent { get; }
// Type-keyed variant registry. Phase 2 multi-variant dispatch: AcBinary's options preset
// decides which variant graph it serializes (FastMode → _All_False, Default → _All_True),
// while MemPack/MsgPack canonically use one (typically _All_True). The cells build all
// known variants upfront and register them here so CreateSerializers can hand each benchmark
// its matching graph instance.
private readonly Dictionary<Type, object> _variants = new();
protected TestDataSet(string name, int iidRefPercent)
{
Name = name;
IIdRefPercent = iidRefPercent;
}
public abstract string TypeName { get; }
/// <summary>
/// Gets display name including IId ref percentage if set.
/// </summary>
public string DisplayName => IIdRefPercent > 0
? $"{Name} [{IIdRefPercent}% IId refs]"
: Name;
/// <summary>
/// Register a variant graph for this cell. Called by builders. Idempotent on the same type
/// (last-write-wins, no error) so an alias's primary registration is harmless even if
/// cross-registration adds the same variant later.
/// </summary>
public void RegisterVariant<T>(T variant) where T : class => _variants[typeof(T)] = variant;
/// <summary>
/// Get a registered variant by type. Throws <see cref="InvalidOperationException"/> if not
/// registered — fail-fast surfaces a mismatch between the variant a benchmark expects and
/// what the cell-builder populated.
/// </summary>
public T GetOrder<T>() where T : class
{
if (_variants.TryGetValue(typeof(T), out var v)) return (T)v;
throw new InvalidOperationException($"Variant '{typeof(T).Name}' not registered for cell '{Name}' (registered: {string.Join(", ", _variants.Keys.Select(k => k.Name))})");
}
/// <summary>
/// Check whether a variant is registered. Use to gate optional benchmarks that may not have
/// their variant prepared in every cell.
/// </summary>
public bool HasOrder<T>() where T : class => _variants.ContainsKey(typeof(T));
}
public sealed class TestDataSet<TOrder> : TestDataSet
where TOrder : class
{
public TOrder Order { get; }
public TestDataSet(string name, TOrder order, int iidRefPercent = 0)
: base(name, iidRefPercent)
{
Order = order;
RegisterVariant(order); // primary registers itself
}
public override string TypeName => Order.GetType().Name;
}