using AyCode.Core.Serializers.Binaries;
using System.Collections;
using System.Reflection;
using System.Runtime.CompilerServices;
namespace AyCode.Core.Tests.TestModels;
///
/// Charset suffix presets for the per-property string augmentation in
/// BenchmarkStringSupport.ToLongString. The benchmark applies the configured suffix to every
/// short (≤ FixStrMaxLength) string property across the test data graph (via reflection in
/// BenchmarkStringSupport.EnsureAllStringsBypassFixStr), producing long-string benchmark payloads
/// with a controlled UTF-8 content profile.
///
/// Switch by assigning to from the interactive
/// Settings → Charset submenu (or programmatically). The active charset is recorded in the .LLM
/// markdown output header so per-charset bench files are self-documenting.
///
public static class CharsetSuffixes
{
// ─────────────────────────────────────────────────────────────────────────
// Consistent length across all charsets (UTF-16 char count, NOT UTF-8 byte count):
// *Short = 40 char (5-char base × 8 repetitions) → StringSmall / StringAscii tier
// *Long = 280 char (Short × 7) → StringMedium / StringAscii tier
//
// Same length across charsets isolates the workload variable to UTF-8 byte content
// (1-byte ASCII vs 2-byte Latin1 / Cyrillic vs 3-byte CJK vs mixed) — wire-size and
// encode/decode cost differences are pure charset effects, not length effects.
//
// Const-concat for compile-time evaluation (usable as attribute / DataRow source).
// ─────────────────────────────────────────────────────────────────────────
/// Empty suffix — baseline string property values stay short, hitting the
/// FixStrAscii / short-string fast-path. Stress-test for short-string code paths.
public const string AsciiFix = "";
// ── Pure ASCII (every byte < 0x80) ──
// Tier: StringAscii (167) — byte→char SIMD widening, zero UTF-8 decode.
// UTF-8 byte count: 40 byte (Short), 280 byte (Long) — 1:1 char:byte.
private const string AsciiBase = " quic"; // 5 char ASCII
public const string AsciiShort = AsciiBase + AsciiBase + AsciiBase + AsciiBase
+ AsciiBase + AsciiBase + AsciiBase + AsciiBase; // 40 char
public const string AsciiLong = AsciiShort + AsciiShort + AsciiShort + AsciiShort
+ AsciiShort + AsciiShort + AsciiShort; // 280 char
// ── Latin1 (Hungarian proxy — ISO-8859-1 + Latin-2 ő/ű) ──
// Tier: StringSmall (91) Short / StringMedium (94) Long.
// UTF-8 byte count: ~72 byte Short (5 char base = 9 byte UTF-8: space+á+r+v+í), ~504 byte Long.
private const string Latin1Base = " árví"; // 5 char (space + á + r + v + í) — multi-byte mix
public const string Latin1Fix = Latin1Base; // 5 char (FixStr-lean profile)
public const string Latin1Short = Latin1Base + Latin1Base + Latin1Base + Latin1Base
+ Latin1Base + Latin1Base + Latin1Base + Latin1Base; // 40 char
public const string Latin1Long = Latin1Short + Latin1Short + Latin1Short + Latin1Short
+ Latin1Short + Latin1Short + Latin1Short; // 280 char
// ── CJK BMP (Chinese / Japanese / Korean Basic Multilingual Plane) ──
// Tier: StringSmall (91) Short / StringMedium (94) Long.
// UTF-8 byte count: ~104 byte Short (5 char base = 13 byte UTF-8: 1 ASCII space + 4×3-byte CJK),
// ~728 byte Long. Homogeneous 3-byte runs — primary win region for SIMD multi-byte transcoder.
private const string CjkBmpBase = " 你好世界"; // 5 char (space + 4 Chinese)
public const string CjkBmpShort = CjkBmpBase + CjkBmpBase + CjkBmpBase + CjkBmpBase
+ CjkBmpBase + CjkBmpBase + CjkBmpBase + CjkBmpBase; // 40 char
public const string CjkBmpLong = CjkBmpShort + CjkBmpShort + CjkBmpShort + CjkBmpShort
+ CjkBmpShort + CjkBmpShort + CjkBmpShort; // 280 char
// ── Cyrillic (Russian / Ukrainian) ──
// Tier: StringSmall (91) Short / StringMedium (94) Long.
// UTF-8 byte count: ~72 byte Short (5 char base = 9 byte UTF-8: 1 ASCII + 4×2-byte Cyrillic),
// ~504 byte Long. Homogeneous 2-byte runs — different shape than Latin1 interspersed.
private const string CyrillicBase = " Прив"; // 5 char (space + 4 Cyrillic)
public const string CyrillicShort = CyrillicBase + CyrillicBase + CyrillicBase + CyrillicBase
+ CyrillicBase + CyrillicBase + CyrillicBase + CyrillicBase; // 40 char
public const string CyrillicLong = CyrillicShort + CyrillicShort + CyrillicShort + CyrillicShort
+ CyrillicShort + CyrillicShort + CyrillicShort; // 280 char
// ── Mixed (multi-codepage in one payload) ──
// Tier: StringSmall (91) Short / StringMedium (94) Long.
// UTF-8 byte count: ~88 byte Short (5 char base = 11 byte UTF-8: 1 ASCII + 1×2-byte Hungarian
// + 1×3-byte CJK + 2×2-byte Cyrillic), ~616 byte Long. No surrogate pairs (keeps UTF-16
// length predictable); cross-tier transcoder coverage in one payload.
private const string MixedBase = " á你Пй"; // 5 char (space + Hungarian + Chinese + 2× Cyrillic)
public const string MixedShort = MixedBase + MixedBase + MixedBase + MixedBase
+ MixedBase + MixedBase + MixedBase + MixedBase; // 40 char
public const string MixedLong = MixedShort + MixedShort + MixedShort + MixedShort
+ MixedShort + MixedShort + MixedShort; // 280 char
}
// ============================================================================================
// Cross-family shared state. The charset suffix is a global benchmark configuration — settable
// once via the interactive Menu, applied uniformly to every family's data construction. Lives in
// a non-generic helper so it ISN'T per-closed-generic (which would cause the Menu setter to affect
// only one family). The forwarding
// property preserves the existing Menu.cs API surface.
// ============================================================================================
internal static class BenchmarkStringSupport
{
internal const int FixStrMaxLength = 31;
internal static string LongStringSuffix = CharsetSuffixes.Latin1Long;
private sealed class ReferenceComparer : IEqualityComparer