148 lines
4.5 KiB
C#
148 lines
4.5 KiB
C#
using System;
|
|
using System.Text.RegularExpressions;
|
|
using System.Text;
|
|
using System.Collections.Generic;
|
|
|
|
public static class TextHelper
|
|
{
|
|
// Special character replacement map
|
|
private static readonly Dictionary<string, string> SpecialCharacterMap = new()
|
|
{
|
|
{ "/", " per " },
|
|
{ "@", " kukac " },
|
|
{ "#", " kettőskereszt " },
|
|
{ "&", " és " },
|
|
//{ ",", " vessző " },
|
|
{ " = ", " egyenlő " }, // Example, you can add more
|
|
//{ " - ", " mínusz " } // Example, you can add more
|
|
};
|
|
|
|
public static string ReplaceNumbersAndSpecialCharacters(string text)
|
|
{
|
|
// Save parts that should be skipped (emails, URLs, dates)
|
|
var protectedParts = new Dictionary<string, string>();
|
|
|
|
//// Protect email addresses
|
|
//text = Regex.Replace(text, @"\b[\w\.-]+@[\w\.-]+\.\w+\b", match =>
|
|
//{
|
|
// string key = $"__EMAIL__{protectedParts.Count}__";
|
|
// protectedParts[key] = match.Value;
|
|
// return key;
|
|
//});
|
|
|
|
//// Protect URLs
|
|
//text = Regex.Replace(text, @"https?://[^\s]+", match =>
|
|
//{
|
|
// string key = $"__URL__{protectedParts.Count}__";
|
|
// protectedParts[key] = match.Value;
|
|
// return key;
|
|
//});
|
|
|
|
// Protect dates like 2024.05.06
|
|
text = Regex.Replace(text, @"\b\d{4}\.\d{2}\.\d{2}\b", match =>
|
|
{
|
|
string key = $"__DATE__{protectedParts.Count}__";
|
|
protectedParts[key] = match.Value;
|
|
return key;
|
|
});
|
|
|
|
// Remove anything between [] including the brackets themselves
|
|
text = Regex.Replace(text, @"\[[^\]]*\]", "");
|
|
|
|
// First replace floats (keep this BEFORE integers)
|
|
text = Regex.Replace(text, @"\b\d+\.\d+\b", match =>
|
|
{
|
|
var parts = match.Value.Split('.');
|
|
var integerPart = int.Parse(parts[0]);
|
|
var decimalPart = int.Parse(parts[1]);
|
|
|
|
return $"{NumberToHungarian(integerPart)} egész {NumberToHungarian(decimalPart)} {(parts[1].Length == 1 ? "tized" : parts[1].Length == 2 ? "század" : "ezred")}";
|
|
});
|
|
|
|
// Then replace integers
|
|
text = Regex.Replace(text, @"\b\d+\b", match =>
|
|
{
|
|
int number = int.Parse(match.Value);
|
|
return NumberToHungarian(number);
|
|
});
|
|
|
|
// Replace special characters from dictionary
|
|
foreach (var kvp in SpecialCharacterMap)
|
|
{
|
|
text = text.Replace(kvp.Key, kvp.Value);
|
|
}
|
|
|
|
// Replace dots surrounded by spaces (optional)
|
|
//text = Regex.Replace(text, @" (?=\.)|(?<=\.) ", " pont ");
|
|
|
|
// Restore protected parts
|
|
foreach (var kvp in protectedParts)
|
|
{
|
|
text = text.Replace(kvp.Key, kvp.Value);
|
|
}
|
|
|
|
return text;
|
|
}
|
|
|
|
|
|
public static string NumberToHungarian(int number)
|
|
{
|
|
if (number == 0) return "nulla";
|
|
|
|
string[] units = { "", "egy", "két", "három", "négy", "öt", "hat", "hét", "nyolc", "kilenc" };
|
|
string[] tens = { "", "tíz", "húsz", "harminc", "negyven", "ötven", "hatvan", "hetven", "nyolcvan", "kilencven" };
|
|
|
|
StringBuilder result = new StringBuilder();
|
|
|
|
if (number >= 1000)
|
|
{
|
|
int thousands = number / 1000;
|
|
if (thousands == 1)
|
|
result.Append("ezer");
|
|
else
|
|
{
|
|
result.Append(NumberToHungarian(thousands));
|
|
result.Append("ezer");
|
|
}
|
|
number %= 1000;
|
|
}
|
|
|
|
if (number >= 100)
|
|
{
|
|
int hundreds = number / 100;
|
|
if (hundreds == 1)
|
|
result.Append("száz");
|
|
else
|
|
{
|
|
result.Append(NumberToHungarian(hundreds));
|
|
result.Append("száz");
|
|
}
|
|
number %= 100;
|
|
}
|
|
|
|
if (number >= 10)
|
|
{
|
|
int tensPart = number / 10;
|
|
result.Append(tens[tensPart]);
|
|
number %= 10;
|
|
}
|
|
|
|
if (number > 0)
|
|
{
|
|
// "két" instead of "kettő" in compound numbers
|
|
if (number == 2 && result.Length > 0)
|
|
result.Append("két");
|
|
else
|
|
result.Append(units[number]);
|
|
}
|
|
|
|
return result.ToString();
|
|
}
|
|
|
|
public static string RemoveTabs(string text)
|
|
{
|
|
if (string.IsNullOrEmpty(text)) return text;
|
|
return text.Replace("\t", ""); // Simple replace — remove all tab characters
|
|
}
|
|
}
|