using AyCode.Core.Serializers.Binaries; using System.Collections; using System.Reflection; using System.Runtime.CompilerServices; namespace AyCode.Core.Tests.TestModels; /// /// Charset suffix presets for the per-property string augmentation in /// BenchmarkStringSupport.ToLongString. The benchmark applies the configured suffix to every /// short (≤ FixStrMaxLength) string property across the test data graph (via reflection in /// BenchmarkStringSupport.EnsureAllStringsBypassFixStr), producing long-string benchmark payloads /// with a controlled UTF-8 content profile. /// /// Switch by assigning to from the interactive /// Settings → Charset submenu (or programmatically). The active charset is recorded in the .LLM /// markdown output header so per-charset bench files are self-documenting. /// public static class CharsetSuffixes { // ───────────────────────────────────────────────────────────────────────── // Consistent length across all charsets (UTF-16 char count, NOT UTF-8 byte count): // *Short = 40 char (5-char base × 8 repetitions) → StringSmall / StringAscii tier // *Long = 280 char (Short × 7) → StringMedium / StringAscii tier // // Same length across charsets isolates the workload variable to UTF-8 byte content // (1-byte ASCII vs 2-byte Latin1 / Cyrillic vs 3-byte CJK vs mixed) — wire-size and // encode/decode cost differences are pure charset effects, not length effects. // // Const-concat for compile-time evaluation (usable as attribute / DataRow source). // ───────────────────────────────────────────────────────────────────────── /// Empty suffix — baseline string property values stay short, hitting the /// FixStrAscii / short-string fast-path. Stress-test for short-string code paths. public const string Latin1FixAscii = ""; // ── Pure ASCII (every byte < 0x80) ── // Tier: StringAscii (167) — byte→char SIMD widening, zero UTF-8 decode. // UTF-8 byte count: 40 byte (Short), 280 byte (Long) — 1:1 char:byte. private const string AsciiBase = " quic"; // 5 char ASCII public const string AsciiShort = AsciiBase + AsciiBase + AsciiBase + AsciiBase + AsciiBase + AsciiBase + AsciiBase + AsciiBase; // 40 char public const string AsciiLong = AsciiShort + AsciiShort + AsciiShort + AsciiShort + AsciiShort + AsciiShort + AsciiShort; // 280 char // ── Latin1 (Hungarian proxy — ISO-8859-1 + Latin-2 ő/ű) ── // Tier: StringSmall (91) Short / StringMedium (94) Long. // UTF-8 byte count: ~72 byte Short (5 char base = 9 byte UTF-8: space+á+r+v+í), ~504 byte Long. private const string Latin1Base = " árví"; // 5 char (space + á + r + v + í) — multi-byte mix public const string Latin1Short = Latin1Base + Latin1Base + Latin1Base + Latin1Base + Latin1Base + Latin1Base + Latin1Base + Latin1Base; // 40 char public const string Latin1Long = Latin1Short + Latin1Short + Latin1Short + Latin1Short + Latin1Short + Latin1Short + Latin1Short; // 280 char // ── CJK BMP (Chinese / Japanese / Korean Basic Multilingual Plane) ── // Tier: StringSmall (91) Short / StringMedium (94) Long. // UTF-8 byte count: ~104 byte Short (5 char base = 13 byte UTF-8: 1 ASCII space + 4×3-byte CJK), // ~728 byte Long. Homogeneous 3-byte runs — primary win region for SIMD multi-byte transcoder. private const string CjkBmpBase = " 你好世界"; // 5 char (space + 4 Chinese) public const string CjkBmpShort = CjkBmpBase + CjkBmpBase + CjkBmpBase + CjkBmpBase + CjkBmpBase + CjkBmpBase + CjkBmpBase + CjkBmpBase; // 40 char public const string CjkBmpLong = CjkBmpShort + CjkBmpShort + CjkBmpShort + CjkBmpShort + CjkBmpShort + CjkBmpShort + CjkBmpShort; // 280 char // ── Cyrillic (Russian / Ukrainian) ── // Tier: StringSmall (91) Short / StringMedium (94) Long. // UTF-8 byte count: ~72 byte Short (5 char base = 9 byte UTF-8: 1 ASCII + 4×2-byte Cyrillic), // ~504 byte Long. Homogeneous 2-byte runs — different shape than Latin1 interspersed. private const string CyrillicBase = " Прив"; // 5 char (space + 4 Cyrillic) public const string CyrillicShort = CyrillicBase + CyrillicBase + CyrillicBase + CyrillicBase + CyrillicBase + CyrillicBase + CyrillicBase + CyrillicBase; // 40 char public const string CyrillicLong = CyrillicShort + CyrillicShort + CyrillicShort + CyrillicShort + CyrillicShort + CyrillicShort + CyrillicShort; // 280 char // ── Mixed (multi-codepage in one payload) ── // Tier: StringSmall (91) Short / StringMedium (94) Long. // UTF-8 byte count: ~88 byte Short (5 char base = 11 byte UTF-8: 1 ASCII + 1×2-byte Hungarian // + 1×3-byte CJK + 2×2-byte Cyrillic), ~616 byte Long. No surrogate pairs (keeps UTF-16 // length predictable); cross-tier transcoder coverage in one payload. private const string MixedBase = " á你Пй"; // 5 char (space + Hungarian + Chinese + 2× Cyrillic) public const string MixedShort = MixedBase + MixedBase + MixedBase + MixedBase + MixedBase + MixedBase + MixedBase + MixedBase; // 40 char public const string MixedLong = MixedShort + MixedShort + MixedShort + MixedShort + MixedShort + MixedShort + MixedShort; // 280 char } // ============================================================================================ // Cross-family shared state. The charset suffix is a global benchmark configuration — settable // once via the interactive Menu, applied uniformly to every family's data construction. Lives in // a non-generic helper so it ISN'T per-closed-generic (which would cause the Menu setter to affect // only one family). The forwarding // property preserves the existing Menu.cs API surface. // ============================================================================================ internal static class BenchmarkStringSupport { internal const int FixStrMaxLength = 31; internal static string LongStringSuffix = CharsetSuffixes.Latin1Long; private sealed class ReferenceComparer : IEqualityComparer { public static readonly ReferenceComparer Instance = new(); public new bool Equals(object? x, object? y) => ReferenceEquals(x, y); public int GetHashCode(object obj) => RuntimeHelpers.GetHashCode(obj); } internal static void EnsureAllStringsBypassFixStr(object? root) { if (root == null) return; var visited = new HashSet(ReferenceComparer.Instance); var stack = new Stack(); stack.Push(root); while (stack.Count > 0) { var current = stack.Pop(); if (!visited.Add(current)) continue; if (current is IEnumerable enumerable && current is not string) { foreach (var item in enumerable) { if (item != null) stack.Push(item); } continue; } var type = current.GetType(); foreach (var property in type.GetProperties(BindingFlags.Instance | BindingFlags.Public)) { if (!property.CanRead) continue; if (property.PropertyType == typeof(string)) { if (!property.CanWrite) continue; var value = (string?)property.GetValue(current); property.SetValue(current, ToLongString(value)); continue; } if (property.PropertyType.IsValueType || property.PropertyType.IsEnum) continue; var child = property.GetValue(current); if (child != null) stack.Push(child); } } } [MethodImpl(MethodImplOptions.AggressiveInlining)] private static string ToLongString(string? value) { if (string.IsNullOrEmpty(value)) return "Benchmark_String_Value" + LongStringSuffix; if (value.Length > FixStrMaxLength) return value; return value + LongStringSuffix; } } // ============================================================================================ // Generic test-data provider. One closing-generic alias per family — see // (the _All_True family, MSTEST-compatible name) and // (the _All_False family, Phase 1 benchmark // target). The five cell-creator methods + ClearDeepLevelRefs are written once on the generic base, // using the constrained TestDataFactory<TOrder, ...> for per-family element creation. // ============================================================================================ public abstract class BenchmarkTestDataProvider where TOrder : TestOrderBase, new() where TItem : TestOrderItemBase, new() where TPallet : TestPalletBase, new() where TMeasurement : TestMeasurementBase, new() where TPoint : TestMeasurementPointBase, new() where TTag : SharedTagBase, new() where TUser : SharedUserBase, new() where TCategory : SharedCategoryBase, new() where TMetadata : MetadataInfoBase, new() where TPreferences : UserPreferencesBase, new() { /// /// Active long-string suffix appended to short string properties during benchmark data construction. /// Forwards to (a non-generic shared field) so /// the setter is family-agnostic — both BenchmarkTestDataProvider.LongStringSuffix = … and /// BenchmarkTestDataProvider_All_False.LongStringSuffix = … route to the same backing value. /// Without this forwarding, a per-closed-generic static field on the base would store the suffix /// independently per family — the Menu setter would only affect whichever alias it addressed. /// public static string LongStringSuffix { get => BenchmarkStringSupport.LongStringSuffix; set => BenchmarkStringSupport.LongStringSuffix = value; } // Shortcut alias for the matching factory closing-generic. Saves typing the 10-param cluster // on every Create* call inside this class. private static class Factory { public static void ResetIdCounter() => TestDataFactory.ResetIdCounter(); public static TTag CreateTag(string? name = null) => TestDataFactory.CreateTag(name); public static TUser CreateUser(string? username = null) => TestDataFactory.CreateUser(username); public static TCategory CreateCategory(string? name = null) => TestDataFactory.CreateCategory(name); public static TMetadata CreateMetadata(string? key = null, bool withChild = false) => TestDataFactory.CreateMetadata(key, withChild); public static TOrder CreateOrder( int itemCount, int palletsPerItem, int measurementsPerPallet, int pointsPerMeasurement, TTag? sharedTag = null, TUser? sharedUser = null, TMetadata? sharedMetadata = null, TPreferences? sharedPreferences = null, TCategory? sharedCategory = null) => TestDataFactory.CreateOrder( itemCount, palletsPerItem, measurementsPerPallet, pointsPerMeasurement, sharedTag, sharedUser, sharedMetadata, sharedPreferences, sharedCategory); } public static List CreateTestDataSets(bool resetId = true) { return new List { CreateSmallTestData(resetId), CreateMediumTestData(resetId), CreateLargeTestData(resetId), CreateRepeatedStringsTestData(resetId), CreateDeepNestedTestData(resetId) }; } private static TestDataSet CreateSmallTestData(bool resetId = true) { if (resetId) Factory.ResetIdCounter(); var sharedTag = Factory.CreateTag("SharedTag"); var sharedUser = Factory.CreateUser("shareduser"); var order = Factory.CreateOrder( itemCount: 2, palletsPerItem: 2, measurementsPerPallet: 2, pointsPerMeasurement: 2, sharedTag: sharedTag, sharedUser: sharedUser); BenchmarkStringSupport.EnsureAllStringsBypassFixStr(order); ClearDeepLevelRefs(order); return new TestDataSet("Small (2x2x2x2)", order, iidRefPercent: 20); } private static TestDataSet CreateMediumTestData(bool resetId = true) { if (resetId) Factory.ResetIdCounter(); var sharedTag = Factory.CreateTag("SharedTag"); var sharedUser = Factory.CreateUser("shareduser"); var sharedMeta = Factory.CreateMetadata("shared", withChild: true); var sharedPreferences = new TPreferences { Theme = "dark", Language = "hungarian", NotificationsEnabled = true, EmailDigestFrequency = "weekly" }; sharedUser.Preferences = sharedPreferences; var order = Factory.CreateOrder( itemCount: 3, palletsPerItem: 3, measurementsPerPallet: 3, pointsPerMeasurement: 4, sharedTag: sharedTag, sharedUser: sharedUser, sharedMetadata: sharedMeta, sharedPreferences: sharedPreferences); BenchmarkStringSupport.EnsureAllStringsBypassFixStr(order); ClearDeepLevelRefs(order); return new TestDataSet("Medium (3x3x3x4)", order, iidRefPercent: 20); } private static TestDataSet CreateLargeTestData(bool resetId = true) { if (resetId) Factory.ResetIdCounter(); var sharedTag = Factory.CreateTag("SharedTag"); var sharedUser = Factory.CreateUser("shareduser"); var sharedPreferences = new TPreferences { Theme = "light", Language = "german", NotificationsEnabled = false, EmailDigestFrequency = "daily" }; sharedUser.Preferences = sharedPreferences; var order = Factory.CreateOrder( itemCount: 5, palletsPerItem: 5, measurementsPerPallet: 5, pointsPerMeasurement: 10, sharedTag: sharedTag, sharedUser: sharedUser, sharedPreferences: sharedPreferences); BenchmarkStringSupport.EnsureAllStringsBypassFixStr(order); ClearDeepLevelRefs(order); return new TestDataSet("Large (5x5x5x10)", order, iidRefPercent: 20); } private static TestDataSet CreateRepeatedStringsTestData(bool resetId = true) { if (resetId) Factory.ResetIdCounter(); var sharedTag = Factory.CreateTag("RepeatedTag"); var sharedUser = Factory.CreateUser("repeateduser"); var sharedPreferences = new TPreferences { Theme = "dark", Language = "hungarian", NotificationsEnabled = true, EmailDigestFrequency = "weekly" }; sharedUser.Preferences = sharedPreferences; var order = Factory.CreateOrder( itemCount: 10, palletsPerItem: 2, measurementsPerPallet: 2, pointsPerMeasurement: 2, sharedTag: sharedTag, sharedUser: sharedUser, sharedPreferences: sharedPreferences); // Repeated string fields — ProductName on items + PalletCode on pallets. Both are common // across the hierarchy, exercising string-interning deduplication on the Default preset // (which has UseStringInterning = All). Targeting ~20% repeated-string share overall. // Baselines are short ASCII (≤ FixStrMaxLength) so EnsureAllStringsBypassFixStr appends the // active CharsetSuffix — the resulting payload's UTF-8 content profile is governed entirely // by the selected charset (not contaminated by hard-coded Hungarian baseline values). foreach (var item in order.Items) { item.Status = TestStatus.Processing; item.ProductName = "ProductName"; foreach (var pallet in item.Pallets) { pallet.PalletCode = "PalletCode"; } } BenchmarkStringSupport.EnsureAllStringsBypassFixStr(order); ClearDeepLevelRefs(order); return new TestDataSet("Repeated Strings (10 items)", order, iidRefPercent: 20); } private static TestDataSet CreateDeepNestedTestData(bool resetId = true) { if (resetId) Factory.ResetIdCounter(); var sharedTag = Factory.CreateTag("DeepTag"); var sharedUser = Factory.CreateUser("deepuser"); var sharedCategory = Factory.CreateCategory("DeepCategory"); var sharedPreferences = new TPreferences { Theme = "light", Language = "french", NotificationsEnabled = false, EmailDigestFrequency = "monthly" }; sharedUser.Preferences = sharedPreferences; var order = Factory.CreateOrder( itemCount: 2, palletsPerItem: 4, measurementsPerPallet: 4, pointsPerMeasurement: 8, sharedTag: sharedTag, sharedUser: sharedUser, sharedPreferences: sharedPreferences, sharedCategory: sharedCategory); BenchmarkStringSupport.EnsureAllStringsBypassFixStr(order); ClearDeepLevelRefs(order); return new TestDataSet("Deep Nested (2x4x4x8)", order, iidRefPercent: 20); } private static void ClearDeepLevelRefs(TOrder order) { // Keep shared IId refs at the pallet level (Tag + Inspector) — these contribute the bulk of // the ~20% IId-ref share that the test data targets. Only Category is cleared at this level // (one-of-three clears keep the share moderate). The deeper measurement / point levels are // cleared entirely so deep-tree ref noise does not skew the share upward beyond ~20%. foreach (var item in order.Items) { foreach (var pallet in item.Pallets) { // pallet.Tag = null; // KEEP for ~20% IId-ref share (was cleared) // pallet.Inspector = null; // KEEP for ~20% IId-ref share (was cleared) pallet.Category = null; foreach (var measurement in pallet.Measurements) { measurement.Tag = null; measurement.Operator = null; foreach (var point in measurement.Points) { point.Tag = null; point.Verifier = null; } } } } } } // ============================================================================================ // Closing-generic aliases for the provider. Same pattern as the factory: a bare-name class for // MSTEST backward compatibility (kept on _All_True), and a _All_False suffix variant for the // Phase 1 benchmark target. The static LongStringSuffix forwarding property lives on the // generic base above — accessible identically through either alias (BenchmarkTestDataProvider.LongStringSuffix // or BenchmarkTestDataProvider_All_False.LongStringSuffix), both routing to the same // shared field. Symmetric API surface across // families — no per-alias asymmetry. // ============================================================================================ /// /// _All_True family provider — preserves the bare-name API surface /// (BenchmarkTestDataProvider.CreateTestDataSets()) that the SGen-vs-runtime compatibility /// test depends on. LongStringSuffix is inherited from the generic base. /// public sealed class BenchmarkTestDataProvider : BenchmarkTestDataProvider< TestOrder_All_True, TestOrderItem_All_True, TestPallet_All_True, TestMeasurement_All_True, TestMeasurementPoint_All_True, SharedTag_All_True, SharedUser_All_True, SharedCategory_All_True, MetadataInfo_All_True, UserPreferences_All_True> { } /// /// _All_False family provider — Phase 1 benchmark target. Inherits the generic cell-creator /// methods unchanged; the closed-generic new TOrder() calls inside the cell methods construct /// TestOrder_All_False graphs. /// public sealed class BenchmarkTestDataProvider_All_False : BenchmarkTestDataProvider< TestOrder_All_False, TestOrderItem_All_False, TestPallet_All_False, TestMeasurement_All_False, TestMeasurementPoint_All_False, SharedTag_All_False, SharedUser_All_False, SharedCategory_All_False, MetadataInfo_All_False, UserPreferences_All_False> { } // ============================================================================================ // TestDataSet — abstract metadata base + generic-ordered concrete. Orchestration code iterates // over the base type (Name/DisplayName/TypeName/IIdRefPercent only); concrete consumers // (CreateSerializers, Output binary-output dump) downcast to TestDataSet to access the // typed Order. // ============================================================================================ public abstract class TestDataSet { public string Name { get; } /// /// Percentage of IId shared references in the data (0-100). /// Higher values mean more deduplication benefit for Default mode. /// public int IIdRefPercent { get; } // Type-keyed variant registry. Phase 2 multi-variant dispatch: AcBinary's options preset // decides which variant graph it serializes (FastMode → _All_False, Default → _All_True), // while MemPack/MsgPack canonically use one (typically _All_True). The cells build all // known variants upfront and register them here so CreateSerializers can hand each benchmark // its matching graph instance. private readonly Dictionary _variants = new(); protected TestDataSet(string name, int iidRefPercent) { Name = name; IIdRefPercent = iidRefPercent; } public abstract string TypeName { get; } /// /// Gets display name including IId ref percentage if set. /// public string DisplayName => IIdRefPercent > 0 ? $"{Name} [{IIdRefPercent}% IId refs]" : Name; /// /// Register a variant graph for this cell. Called by builders. Idempotent on the same type /// (last-write-wins, no error) so an alias's primary registration is harmless even if /// cross-registration adds the same variant later. /// public void RegisterVariant(T variant) where T : class => _variants[typeof(T)] = variant; /// /// Get a registered variant by type. Throws if not /// registered — fail-fast surfaces a mismatch between the variant a benchmark expects and /// what the cell-builder populated. /// public T GetOrder() where T : class { if (_variants.TryGetValue(typeof(T), out var v)) return (T)v; throw new InvalidOperationException($"Variant '{typeof(T).Name}' not registered for cell '{Name}' (registered: {string.Join(", ", _variants.Keys.Select(k => k.Name))})"); } /// /// Check whether a variant is registered. Use to gate optional benchmarks that may not have /// their variant prepared in every cell. /// public bool HasOrder() where T : class => _variants.ContainsKey(typeof(T)); } public sealed class TestDataSet : TestDataSet where TOrder : class { public TOrder Order { get; } public TestDataSet(string name, TOrder order, int iidRefPercent = 0) : base(name, iidRefPercent) { Order = order; RegisterVariant(order); // primary registers itself } public override string TypeName => Order.GetType().Name; }