using AyCode.Core.Serializers.Binaries;
using System.Collections;
using System.Reflection;
using System.Runtime.CompilerServices;
namespace AyCode.Core.Tests.TestModels;
///
/// Charset suffix presets for the per-property string augmentation in
/// BenchmarkTestDataProvider.ToLongString. The benchmark applies the configured suffix
/// to every short (≤ FixStrMaxLength) string property across the test data graph (via reflection
/// in BenchmarkTestDataProvider.EnsureAllStringsBypassFixStr), producing long-string
/// benchmark payloads with a controlled UTF-8 content profile.
///
/// Switch by assigning to from the interactive
/// Settings → Charset submenu (or programmatically). The active charset is recorded in the .LLM
/// markdown output header so per-charset bench files are self-documenting.
///
public static class CharsetSuffixes
{
/// Empty suffix — short Hungarian baseline strings (e.g. "SharedTag_All_True") stay short, hitting
/// the FixStr fast-path. Stress-test for FixStr / short-string code paths. Note: the baseline
/// property values remain Hungarian; only the suffix is empty. Despite the "FixAscii" name, this
/// option does NOT change baseline values to ASCII — it suppresses the suffix that would otherwise
/// push every property past the FixStr boundary.
public const string Latin1FixAscii = "";
/// Short Latin1 mixed (Hungarian, ~24 char) — typical European i18n payload, short
/// multi-byte runs. Below the 32-char FixStr boundary on the suffix alone, but combined with
/// baseline values pushes every property past it.
public const string Latin1Short = " árvíztűrő tükörfúrógép";
/// Long Latin1 mixed (~47 char) — exceeds the 32-char FixStr boundary on the suffix alone,
/// exercising the StringSmall+ tier path with Latin1 mixed content (Hungarian accented letters).
public const string Latin1Long = " árvíztűrő tükörfúrógép a magyar betűzés tesztje";
/// CJK BMP (Chinese / Japanese / Korean Basic Multilingual Plane) — long homogeneous
/// 3-byte UTF-8 runs. Primary win region for V4N2 Phase 3 SIMD multi-byte transcoder work.
public const string CjkBmp = " 你好世界 こんにちは 안녕하세요";
/// Cyrillic (Russian / Ukrainian / etc.) — long homogeneous 2-byte runs, different shape
/// than Hungarian mixed (where 2-byte chars are short interspersed runs).
public const string Cyrillic = " Привет мир дорогой друг";
/// Mixed full-spectrum (Hungarian + CJK + Cyrillic + emoji surrogate pairs) — multi-tier
/// coverage in one payload. Stresses surrogate-pair handling in the UTF-8 transcoder.
public const string Mixed = " árvíz 你好 Привет 😀";
}
public static class BenchmarkTestDataProvider
{
private const int FixStrMaxLength = 31;
///
/// Active long-string suffix appended to short string properties during benchmark data construction.
/// Defaults to (~47-char Latin1 mixed) — backward-compatible
/// in spirit with the prior fixed default (Latin1 mixed family, ~32 char). Switch from
/// to measure other UTF-8 content profiles.
///
public static string LongStringSuffix = CharsetSuffixes.Latin1Long;
private sealed class ReferenceComparer : IEqualityComparer