SeemGen/Helpers/TextHelper.cs

148 lines
4.5 KiB
C#

using System;
using System.Text.RegularExpressions;
using System.Text;
using System.Collections.Generic;
public static class TextHelper
{
// Special character replacement map
private static readonly Dictionary<string, string> SpecialCharacterMap = new()
{
{ "/", " per " },
{ "@", " kukac " },
{ "#", " kettőskereszt " },
{ "&", " és " },
//{ ",", " vessző " },
{ " = ", " egyenlő " }, // Example, you can add more
//{ " - ", " mínusz " } // Example, you can add more
};
public static string ReplaceNumbersAndSpecialCharacters(string text)
{
// Save parts that should be skipped (emails, URLs, dates)
var protectedParts = new Dictionary<string, string>();
//// Protect email addresses
//text = Regex.Replace(text, @"\b[\w\.-]+@[\w\.-]+\.\w+\b", match =>
//{
// string key = $"__EMAIL__{protectedParts.Count}__";
// protectedParts[key] = match.Value;
// return key;
//});
//// Protect URLs
//text = Regex.Replace(text, @"https?://[^\s]+", match =>
//{
// string key = $"__URL__{protectedParts.Count}__";
// protectedParts[key] = match.Value;
// return key;
//});
// Protect dates like 2024.05.06
text = Regex.Replace(text, @"\b\d{4}\.\d{2}\.\d{2}\b", match =>
{
string key = $"__DATE__{protectedParts.Count}__";
protectedParts[key] = match.Value;
return key;
});
// Remove anything between [] including the brackets themselves
text = Regex.Replace(text, @"\[[^\]]*\]", "");
// First replace floats (keep this BEFORE integers)
text = Regex.Replace(text, @"\b\d+\.\d+\b", match =>
{
var parts = match.Value.Split('.');
var integerPart = int.Parse(parts[0]);
var decimalPart = int.Parse(parts[1]);
return $"{NumberToHungarian(integerPart)} egész {NumberToHungarian(decimalPart)} {(parts[1].Length == 1 ? "tized" : parts[1].Length == 2 ? "század" : "ezred")}";
});
// Then replace integers
text = Regex.Replace(text, @"\b\d+\b", match =>
{
int number = int.Parse(match.Value);
return NumberToHungarian(number);
});
// Replace special characters from dictionary
foreach (var kvp in SpecialCharacterMap)
{
text = text.Replace(kvp.Key, kvp.Value);
}
// Replace dots surrounded by spaces (optional)
//text = Regex.Replace(text, @" (?=\.)|(?<=\.) ", " pont ");
// Restore protected parts
foreach (var kvp in protectedParts)
{
text = text.Replace(kvp.Key, kvp.Value);
}
return text;
}
public static string NumberToHungarian(int number)
{
if (number == 0) return "nulla";
string[] units = { "", "egy", "két", "három", "négy", "öt", "hat", "hét", "nyolc", "kilenc" };
string[] tens = { "", "tíz", "húsz", "harminc", "negyven", "ötven", "hatvan", "hetven", "nyolcvan", "kilencven" };
StringBuilder result = new StringBuilder();
if (number >= 1000)
{
int thousands = number / 1000;
if (thousands == 1)
result.Append("ezer");
else
{
result.Append(NumberToHungarian(thousands));
result.Append("ezer");
}
number %= 1000;
}
if (number >= 100)
{
int hundreds = number / 100;
if (hundreds == 1)
result.Append("száz");
else
{
result.Append(NumberToHungarian(hundreds));
result.Append("száz");
}
number %= 100;
}
if (number >= 10)
{
int tensPart = number / 10;
result.Append(tens[tensPart]);
number %= 10;
}
if (number > 0)
{
// "két" instead of "kettő" in compound numbers
if (number == 2 && result.Length > 0)
result.Append("két");
else
result.Append(units[number]);
}
return result.ToString();
}
public static string RemoveTabs(string text)
{
if (string.IsNullOrEmpty(text)) return text;
return text.Replace("\t", ""); // Simple replace — remove all tab characters
}
}