using System; using System.Text.RegularExpressions; using System.Text; using System.Collections.Generic; public static class TextHelper { // Special character replacement map private static readonly Dictionary SpecialCharacterMap = new() { { "/", " per " }, { "@", " kukac " }, { "#", " kettőskereszt " }, { "&", " és " }, //{ ",", " vessző " }, { " = ", " egyenlő " }, // Example, you can add more //{ " - ", " mínusz " } // Example, you can add more }; public static string ReplaceNumbersAndSpecialCharacters(string text) { // Save parts that should be skipped (emails, URLs, dates) var protectedParts = new Dictionary(); //// Protect email addresses //text = Regex.Replace(text, @"\b[\w\.-]+@[\w\.-]+\.\w+\b", match => //{ // string key = $"__EMAIL__{protectedParts.Count}__"; // protectedParts[key] = match.Value; // return key; //}); //// Protect URLs //text = Regex.Replace(text, @"https?://[^\s]+", match => //{ // string key = $"__URL__{protectedParts.Count}__"; // protectedParts[key] = match.Value; // return key; //}); // Protect dates like 2024.05.06 text = Regex.Replace(text, @"\b\d{4}\.\d{2}\.\d{2}\b", match => { string key = $"__DATE__{protectedParts.Count}__"; protectedParts[key] = match.Value; return key; }); // Remove anything between [] including the brackets themselves text = Regex.Replace(text, @"\[[^\]]*\]", ""); // First replace floats (keep this BEFORE integers) text = Regex.Replace(text, @"\b\d+\.\d+\b", match => { var parts = match.Value.Split('.'); var integerPart = int.Parse(parts[0]); var decimalPart = int.Parse(parts[1]); return $"{NumberToHungarian(integerPart)} egész {NumberToHungarian(decimalPart)} {(parts[1].Length == 1 ? "tized" : parts[1].Length == 2 ? "század" : "ezred")}"; }); // Then replace integers text = Regex.Replace(text, @"\b\d+\b", match => { int number = int.Parse(match.Value); return NumberToHungarian(number); }); // Replace special characters from dictionary foreach (var kvp in SpecialCharacterMap) { text = text.Replace(kvp.Key, kvp.Value); } // Replace dots surrounded by spaces (optional) //text = Regex.Replace(text, @" (?=\.)|(?<=\.) ", " pont "); // Restore protected parts foreach (var kvp in protectedParts) { text = text.Replace(kvp.Key, kvp.Value); } return text; } public static string NumberToHungarian(int number) { if (number == 0) return "nulla"; string[] units = { "", "egy", "két", "három", "négy", "öt", "hat", "hét", "nyolc", "kilenc" }; string[] tens = { "", "tíz", "húsz", "harminc", "negyven", "ötven", "hatvan", "hetven", "nyolcvan", "kilencven" }; StringBuilder result = new StringBuilder(); if (number >= 1000) { int thousands = number / 1000; if (thousands == 1) result.Append("ezer"); else { result.Append(NumberToHungarian(thousands)); result.Append("ezer"); } number %= 1000; } if (number >= 100) { int hundreds = number / 100; if (hundreds == 1) result.Append("száz"); else { result.Append(NumberToHungarian(hundreds)); result.Append("száz"); } number %= 100; } if (number >= 10) { int tensPart = number / 10; result.Append(tens[tensPart]); number %= 10; } if (number > 0) { // "két" instead of "kettő" in compound numbers if (number == 2 && result.Length > 0) result.Append("két"); else result.Append(units[number]); } return result.ToString(); } public static string RemoveTabs(string text) { if (string.IsNullOrEmpty(text)) return text; return text.Replace("\t", ""); // Simple replace — remove all tab characters } }