using AyCode.Core.Serializers.Binaries; using System.Collections; using System.Reflection; using System.Runtime.CompilerServices; namespace AyCode.Core.Tests.TestModels; ///

/// Charset suffix presets for the per-property string augmentation in /// BenchmarkStringSupport.ToLongString. The benchmark applies the configured suffix to every /// short (≤ FixStrMaxLength) string property across the test data graph (via reflection in /// BenchmarkStringSupport.EnsureAllStringsBypassFixStr), producing long-string benchmark payloads /// with a controlled UTF-8 content profile. /// /// Switch by assigning to from the interactive /// Settings → Charset submenu (or programmatically). The active charset is recorded in the .LLM /// markdown output header so per-charset bench files are self-documenting. ///

public static class CharsetSuffixes { // ───────────────────────────────────────────────────────────────────────── // Consistent length across all charsets (UTF-16 char count, NOT UTF-8 byte count): // *Short = 40 char (5-char base × 8 repetitions) → StringSmall / StringAscii tier // *Long = 280 char (Short × 7) → StringMedium / StringAscii tier // // Same length across charsets isolates the workload variable to UTF-8 byte content // (1-byte ASCII vs 2-byte Latin1 / Cyrillic vs 3-byte CJK vs mixed) — wire-size and // encode/decode cost differences are pure charset effects, not length effects. // // Const-concat for compile-time evaluation (usable as attribute / DataRow source). // ───────────────────────────────────────────────────────────────────────── ///

Empty suffix — baseline string property values stay short, hitting the /// FixStrAscii / short-string fast-path. Stress-test for short-string code paths.

public const string AsciiFix = ""; // ── Pure ASCII (every byte < 0x80) ── // Tier: StringAscii (167) — byte→char SIMD widening, zero UTF-8 decode. // UTF-8 byte count: 40 byte (Short), 280 byte (Long) — 1:1 char:byte. private const string AsciiBase = " quic"; // 5 char ASCII public const string AsciiShort = AsciiBase + AsciiBase + AsciiBase + AsciiBase + AsciiBase + AsciiBase + AsciiBase + AsciiBase; // 40 char public const string AsciiLong = AsciiShort + AsciiShort + AsciiShort + AsciiShort + AsciiShort + AsciiShort + AsciiShort; // 280 char // ── Latin1 (Hungarian proxy — ISO-8859-1 + Latin-2 ő/ű) ── // Tier: StringSmall (91) Short / StringMedium (94) Long. // UTF-8 byte count: ~72 byte Short (5 char base = 9 byte UTF-8: space+á+r+v+í), ~504 byte Long. private const string Latin1Base = " árví"; // 5 char (space + á + r + v + í) — multi-byte mix public const string Latin1Fix = Latin1Base; // 5 char (FixStr-lean profile) public const string Latin1Short = Latin1Base + Latin1Base + Latin1Base + Latin1Base + Latin1Base + Latin1Base + Latin1Base + Latin1Base; // 40 char public const string Latin1Long = Latin1Short + Latin1Short + Latin1Short + Latin1Short + Latin1Short + Latin1Short + Latin1Short; // 280 char // ── CJK BMP (Chinese / Japanese / Korean Basic Multilingual Plane) ── // Tier: StringSmall (91) Short / StringMedium (94) Long. // UTF-8 byte count: ~104 byte Short (5 char base = 13 byte UTF-8: 1 ASCII space + 4×3-byte CJK), // ~728 byte Long. Homogeneous 3-byte runs — primary win region for SIMD multi-byte transcoder. private const string CjkBmpBase = " 你好世界"; // 5 char (space + 4 Chinese) public const string CjkBmpShort = CjkBmpBase + CjkBmpBase + CjkBmpBase + CjkBmpBase + CjkBmpBase + CjkBmpBase + CjkBmpBase + CjkBmpBase; // 40 char public const string CjkBmpLong = CjkBmpShort + CjkBmpShort + CjkBmpShort + CjkBmpShort + CjkBmpShort + CjkBmpShort + CjkBmpShort; // 280 char // ── Cyrillic (Russian / Ukrainian) ── // Tier: StringSmall (91) Short / StringMedium (94) Long. // UTF-8 byte count: ~72 byte Short (5 char base = 9 byte UTF-8: 1 ASCII + 4×2-byte Cyrillic), // ~504 byte Long. Homogeneous 2-byte runs — different shape than Latin1 interspersed. private const string CyrillicBase = " Прив"; // 5 char (space + 4 Cyrillic) public const string CyrillicShort = CyrillicBase + CyrillicBase + CyrillicBase + CyrillicBase + CyrillicBase + CyrillicBase + CyrillicBase + CyrillicBase; // 40 char public const string CyrillicLong = CyrillicShort + CyrillicShort + CyrillicShort + CyrillicShort + CyrillicShort + CyrillicShort + CyrillicShort; // 280 char // ── Mixed (multi-codepage in one payload) ── // Tier: StringSmall (91) Short / StringMedium (94) Long. // UTF-8 byte count: ~88 byte Short (5 char base = 11 byte UTF-8: 1 ASCII + 1×2-byte Hungarian // + 1×3-byte CJK + 2×2-byte Cyrillic), ~616 byte Long. No surrogate pairs (keeps UTF-16 // length predictable); cross-tier transcoder coverage in one payload. private const string MixedBase = " á你Пй"; // 5 char (space + Hungarian + Chinese + 2× Cyrillic) public const string MixedShort = MixedBase + MixedBase + MixedBase + MixedBase + MixedBase + MixedBase + MixedBase + MixedBase; // 40 char public const string MixedLong = MixedShort + MixedShort + MixedShort + MixedShort + MixedShort + MixedShort + MixedShort; // 280 char } // ============================================================================================ // Cross-family shared state. The charset suffix is a global benchmark configuration — settable // once via the interactive Menu, applied uniformly to every family's data construction. Lives in // a non-generic helper so it ISN'T per-closed-generic (which would cause the Menu setter to affect // only one family). The forwarding // property preserves the existing Menu.cs API surface. // ============================================================================================ internal static class BenchmarkStringSupport { internal const int FixStrMaxLength = 31; internal static string LongStringSuffix = CharsetSuffixes.Latin1Long; private sealed class ReferenceComparer : IEqualityComparer