AyCode.Core/AyCode.Core.Tests/TestModels/BenchmarkTestDataProvider.cs

376 lines
14 KiB
C#

using AyCode.Core.Serializers.Binaries;
using System.Collections;
using System.Reflection;
using System.Runtime.CompilerServices;
namespace AyCode.Core.Tests.TestModels;
/// <summary>
/// Charset suffix presets for the per-property string augmentation in
/// <c>BenchmarkTestDataProvider.ToLongString</c>. The benchmark applies the configured suffix
/// to every short (≤ <c>FixStrMaxLength</c>) string property across the test data graph (via reflection
/// in <c>BenchmarkTestDataProvider.EnsureAllStringsBypassFixStr</c>), producing long-string
/// benchmark payloads with a controlled UTF-8 content profile.
///
/// Switch by assigning to <see cref="BenchmarkTestDataProvider.LongStringSuffix"/> from the interactive
/// Settings → Charset submenu (or programmatically). The active charset is recorded in the .LLM
/// markdown output header so per-charset bench files are self-documenting.
/// </summary>
public static class CharsetSuffixes
{
/// <summary>Empty suffix — short Hungarian baseline strings (e.g. "SharedTag_All_True") stay short, hitting
/// the FixStr fast-path. Stress-test for FixStr / short-string code paths. Note: the baseline
/// property values remain Hungarian; only the suffix is empty. Despite the "FixAscii" name, this
/// option does NOT change baseline values to ASCII — it suppresses the suffix that would otherwise
/// push every property past the FixStr boundary.</summary>
public const string Latin1FixAscii = "";
/// <summary>Short Latin1 mixed (Hungarian, ~24 char) — typical European i18n payload, short
/// multi-byte runs. Below the 32-char FixStr boundary on the suffix alone, but combined with
/// baseline values pushes every property past it.</summary>
public const string Latin1Short = " árvíztűrő tükörfúrógép";
/// <summary>Long Latin1 mixed (~47 char) — exceeds the 32-char FixStr boundary on the suffix alone,
/// exercising the StringSmall+ tier path with Latin1 mixed content (Hungarian accented letters).</summary>
public const string Latin1Long = " árvíztűrő tükörfúrógép a magyar betűzés tesztje";
/// <summary>CJK BMP (Chinese / Japanese / Korean Basic Multilingual Plane) — long homogeneous
/// 3-byte UTF-8 runs. Primary win region for V4N2 Phase 3 SIMD multi-byte transcoder work.</summary>
public const string CjkBmp = " 你好世界 こんにちは 안녕하세요";
/// <summary>Cyrillic (Russian / Ukrainian / etc.) — long homogeneous 2-byte runs, different shape
/// than Hungarian mixed (where 2-byte chars are short interspersed runs).</summary>
public const string Cyrillic = " Привет мир дорогой друг";
/// <summary>Mixed full-spectrum (Hungarian + CJK + Cyrillic + emoji surrogate pairs) — multi-tier
/// coverage in one payload. Stresses surrogate-pair handling in the UTF-8 transcoder.</summary>
public const string Mixed = " árvíz 你好 Привет 😀";
}
public static class BenchmarkTestDataProvider
{
private const int FixStrMaxLength = 31;
/// <summary>
/// Active long-string suffix appended to short string properties during benchmark data construction.
/// Defaults to <see cref="CharsetSuffixes.Latin1Long"/> (~47-char Latin1 mixed) — backward-compatible
/// in spirit with the prior fixed default (Latin1 mixed family, ~32 char). Switch from
/// <see cref="CharsetSuffixes"/> to measure other UTF-8 content profiles.
/// </summary>
public static string LongStringSuffix = CharsetSuffixes.Latin1Long;
private sealed class ReferenceComparer : IEqualityComparer<object>
{
public static readonly ReferenceComparer Instance = new();
public new bool Equals(object? x, object? y) => ReferenceEquals(x, y);
public int GetHashCode(object obj) => RuntimeHelpers.GetHashCode(obj);
}
public static List<TestDataSet> CreateTestDataSets(bool resetId = true)
{
return new List<TestDataSet>
{
CreateSmallTestData(resetId),
CreateMediumTestData(resetId),
CreateLargeTestData(resetId),
CreateRepeatedStringsTestData(resetId),
CreateDeepNestedTestData(resetId)
};
}
private static TestDataSet CreateSmallTestData(bool resetId = true)
{
if (resetId) TestDataFactory.ResetIdCounter();
var sharedTag = TestDataFactory.CreateTag("SharedTag_All_True");
var sharedUser = TestDataFactory.CreateUser("shareduser");
var order = TestDataFactory.CreateOrder(
itemCount: 2,
palletsPerItem: 2,
measurementsPerPallet: 2,
pointsPerMeasurement: 2,
sharedTag: sharedTag,
sharedUser: sharedUser);
EnsureAllStringsBypassFixStr(order);
ClearDeepLevelRefs(order);
return new TestDataSet("Small (2x2x2x2)", order, iidRefPercent: 20);
}
private static TestDataSet CreateMediumTestData(bool resetId = true)
{
if (resetId) TestDataFactory.ResetIdCounter();
var sharedTag = TestDataFactory.CreateTag("SharedTag_All_True");
var sharedUser = TestDataFactory.CreateUser("shareduser");
var sharedMeta = TestDataFactory.CreateMetadata("shared", withChild: true);
var sharedPreferences = new UserPreferences_All_True
{
Theme = "dark",
Language = "hungarian",
NotificationsEnabled = true,
EmailDigestFrequency = "weekly"
};
sharedUser.Preferences = sharedPreferences;
var order = TestDataFactory.CreateOrder(
itemCount: 3,
palletsPerItem: 3,
measurementsPerPallet: 3,
pointsPerMeasurement: 4,
sharedTag: sharedTag,
sharedUser: sharedUser,
sharedMetadata: sharedMeta,
sharedPreferences: sharedPreferences);
EnsureAllStringsBypassFixStr(order);
ClearDeepLevelRefs(order);
return new TestDataSet("Medium (3x3x3x4)", order, iidRefPercent: 20);
}
private static TestDataSet CreateLargeTestData(bool resetId = true)
{
if (resetId) TestDataFactory.ResetIdCounter();
var sharedTag = TestDataFactory.CreateTag("SharedTag_All_True");
var sharedUser = TestDataFactory.CreateUser("shareduser");
var sharedPreferences = new UserPreferences_All_True
{
Theme = "light",
Language = "german",
NotificationsEnabled = false,
EmailDigestFrequency = "daily"
};
sharedUser.Preferences = sharedPreferences;
var order = TestDataFactory.CreateOrder(
itemCount: 5,
palletsPerItem: 5,
measurementsPerPallet: 5,
pointsPerMeasurement: 10,
sharedTag: sharedTag,
sharedUser: sharedUser,
sharedPreferences: sharedPreferences);
EnsureAllStringsBypassFixStr(order);
ClearDeepLevelRefs(order);
return new TestDataSet("Large (5x5x5x10)", order, iidRefPercent: 20);
}
private static TestDataSet CreateRepeatedStringsTestData(bool resetId = true)
{
if (resetId) TestDataFactory.ResetIdCounter();
var sharedTag = TestDataFactory.CreateTag("RepeatedTag");
var sharedUser = TestDataFactory.CreateUser("repeateduser");
var sharedPreferences = new UserPreferences_All_True
{
Theme = "dark",
Language = "hungarian",
NotificationsEnabled = true,
EmailDigestFrequency = "weekly"
};
sharedUser.Preferences = sharedPreferences;
var order = TestDataFactory.CreateOrder(
itemCount: 10,
palletsPerItem: 2,
measurementsPerPallet: 2,
pointsPerMeasurement: 2,
sharedTag: sharedTag,
sharedUser: sharedUser,
sharedPreferences: sharedPreferences);
// Repeated string fields — ProductName on items + PalletCode on pallets. Both are common
// across the hierarchy, exercising string-interning deduplication on the Default preset
// (which has UseStringInterning = All). Targeting ~20% repeated-string share overall.
// Baselines are short ASCII (≤ FixStrMaxLength) so EnsureAllStringsBypassFixStr appends the
// active CharsetSuffix — the resulting payload's UTF-8 content profile is governed entirely
// by the selected charset (not contaminated by hard-coded Hungarian baseline values).
foreach (var item in order.Items)
{
item.Status = TestStatus.Processing;
item.ProductName = "ProductName";
foreach (var pallet in item.Pallets)
{
pallet.PalletCode = "PalletCode";
}
}
EnsureAllStringsBypassFixStr(order);
ClearDeepLevelRefs(order);
return new TestDataSet("Repeated Strings (10 items)", order, iidRefPercent: 20);
}
private static TestDataSet CreateDeepNestedTestData(bool resetId = true)
{
if (resetId) TestDataFactory.ResetIdCounter();
var sharedTag = TestDataFactory.CreateTag("DeepTag");
var sharedUser = TestDataFactory.CreateUser("deepuser");
var sharedCategory = TestDataFactory.CreateCategory("DeepCategory");
var sharedPreferences = new UserPreferences_All_True
{
Theme = "light",
Language = "french",
NotificationsEnabled = false,
EmailDigestFrequency = "monthly"
};
sharedUser.Preferences = sharedPreferences;
var order = TestDataFactory.CreateOrder(
itemCount: 2,
palletsPerItem: 4,
measurementsPerPallet: 4,
pointsPerMeasurement: 8,
sharedTag: sharedTag,
sharedUser: sharedUser,
sharedPreferences: sharedPreferences,
sharedCategory: sharedCategory);
EnsureAllStringsBypassFixStr(order);
ClearDeepLevelRefs(order);
return new TestDataSet("Deep Nested (2x4x4x8)", order, iidRefPercent: 20);
}
private static void ClearDeepLevelRefs(TestOrder_All_True order)
{
// Keep shared IId refs at the pallet level (Tag + Inspector) — these contribute the bulk of
// the ~20% IId-ref share that the test data targets. Only Category is cleared at this level
// (one-of-three clears keep the share moderate). The deeper measurement / point levels are
// cleared entirely so deep-tree ref noise does not skew the share upward beyond ~20%.
foreach (var item in order.Items)
{
foreach (var pallet in item.Pallets)
{
// pallet.Tag = null; // KEEP for ~20% IId-ref share (was cleared)
// pallet.Inspector = null; // KEEP for ~20% IId-ref share (was cleared)
pallet.Category = null;
foreach (var measurement in pallet.Measurements)
{
measurement.Tag = null;
measurement.Operator = null;
foreach (var point in measurement.Points)
{
point.Tag = null;
point.Verifier = null;
}
}
}
}
}
private static void EnsureAllStringsBypassFixStr(object? root)
{
if (root == null) return;
var visited = new HashSet<object>(ReferenceComparer.Instance);
var stack = new Stack<object>();
stack.Push(root);
while (stack.Count > 0)
{
var current = stack.Pop();
if (!visited.Add(current)) continue;
if (current is IEnumerable enumerable && current is not string)
{
foreach (var item in enumerable)
{
if (item != null)
stack.Push(item);
}
continue;
}
var type = current.GetType();
foreach (var property in type.GetProperties(BindingFlags.Instance | BindingFlags.Public))
{
if (!property.CanRead) continue;
if (property.PropertyType == typeof(string))
{
if (!property.CanWrite) continue;
var value = (string?)property.GetValue(current);
property.SetValue(current, ToLongString(value));
continue;
}
if (property.PropertyType.IsValueType || property.PropertyType.IsEnum)
continue;
var child = property.GetValue(current);
if (child != null)
stack.Push(child);
}
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static string ToLongString(string? value)
{
if (string.IsNullOrEmpty(value))
return "Benchmark_String_Value" + LongStringSuffix;
if (value.Length > FixStrMaxLength)
return value;
return value + LongStringSuffix;
}
}
public class TestDataSet<TOrder>
{
public string Name { get; }
public TOrder Order { get; }
/// <summary>
/// Percentage of IId shared references in the data (0-100).
/// Higher values mean more deduplication benefit for Default mode.
/// </summary>
public int IIdRefPercent { get; }
public TestDataSet(string name, TOrder order, int iidRefPercent = 0)
{
Name = name;
Order = order;
IIdRefPercent = iidRefPercent;
}
public string TypeName => Order.GetType().Name;
/// <summary>
/// Gets display name including IId ref percentage if set.
/// </summary>
public string DisplayName => IIdRefPercent > 0
? $"{Name} [{IIdRefPercent}% IId refs]"
: Name;
}
public sealed class TestDataSet : TestDataSet<TestOrder_All_True>
{
public TestDataSet(string name, TestOrder_All_True order, int iidRefPercent = 0)
: base(name, order, iidRefPercent)
{
}
}