SeemGen/Helpers/TextHelper.cs

270 lines
8.0 KiB
C#

using System;
using System.Text.RegularExpressions;
using System.Text;
using System.Collections.Generic;
public static class TextHelper
{
// Special character replacement map
private static readonly Dictionary<string, string> HungarianSpecialCharacterMap = new()
{
{ "/", " per " },
{ "@", " kukac " },
{ "#", " kettőskereszt " },
{ "&", " és " },
//{ ",", " vessző " },
{ " = ", " egyenlő " }, // Example, you can add more
//{ " - ", " mínusz " } // Example, you can add more
};
private static readonly Dictionary<string, string> EnglishSpecialCharacterMap = new()
{
{ "/", " slash " },
{ "@", " at " },
{ "#", " hashtag " },
{ "&", " and " },
//{ ",", " vessző " },
{ " = ", " equals " }, // Example, you can add more
//{ " - ", " mínusz " } // Example, you can add more
};
public static string ReplaceNumbersAndSpecialCharacters(string text, string language)
{
// Save parts that should be skipped (emails, URLs, dates)
var protectedParts = new Dictionary<string, string>();
// Protect dates like 2024.05.06
text = Regex.Replace(text, @"\b\d{4}\.\d{2}\.\d{2}\b", match =>
{
string key = $"__DATE__{protectedParts.Count}__";
protectedParts[key] = match.Value;
return key;
});
// Remove anything between [] including the brackets themselves
text = Regex.Replace(text, @"\[[^\]]*\]", "");
// First replace floats (keep this BEFORE integers)
text = Regex.Replace(text, @"\b\d+\.\d+\b", match =>
{
var parts = match.Value.Split('.');
var integerPart = int.Parse(parts[0]);
var decimalPart = int.Parse(parts[1]);
if(language == "Hungarian")
{
return $"{NumberToHungarian(integerPart)} egész {NumberToHungarian(decimalPart)} {(parts[1].Length == 1 ? "tized" : parts[1].Length == 2 ? "század" : "ezred")}";
}
else
{
return $"{NumberToEnglish(integerPart)} point {NumberToEnglish(decimalPart)}";
}
});
// Then replace integers
text = Regex.Replace(text, @"\b\d+\b", match =>
{
int number = int.Parse(match.Value);
if(language == "Hungarian")
{
return NumberToHungarian(number);
}
else
{
return NumberToEnglish(number);
}
});
// Replace special characters from dictionary
if(language == "Hungarian")
{
foreach (var kvp in HungarianSpecialCharacterMap)
{
text = text.Replace(kvp.Key, kvp.Value);
}
}
else
{
foreach (var kvp in EnglishSpecialCharacterMap)
{
text = text.Replace(kvp.Key, kvp.Value);
}
}
// Replace dots surrounded by spaces (optional)
//text = Regex.Replace(text, @" (?=\.)|(?<=\.) ", " pont ");
// Restore protected parts
foreach (var kvp in protectedParts)
{
text = text.Replace(kvp.Key, kvp.Value);
}
return text;
}
public static string NumberToHungarian(int number)
{
if (number == 0) return "nulla";
string[] units = { "", "egy", "két", "három", "négy", "öt", "hat", "hét", "nyolc", "kilenc" };
string[] tens = { "", "tíz", "húsz", "harminc", "negyven", "ötven", "hatvan", "hetven", "nyolcvan", "kilencven" };
string[] tensAlternate = { "", "tizen", "huszon", "harminc", "negyven", "ötven", "hatvan", "hetven", "nyolcvan", "kilencven" };
StringBuilder result = new StringBuilder();
if (number >= 1000)
{
int thousands = number / 1000;
if (thousands == 1)
result.Append("ezer");
else
{
result.Append(NumberToHungarian(thousands));
result.Append("ezer");
}
number %= 1000;
}
if (number >= 100)
{
int hundreds = number / 100;
if (hundreds == 1)
result.Append("száz");
else
{
result.Append(NumberToHungarian(hundreds));
result.Append("száz");
}
number %= 100;
}
if (number >= 10)
{
int tensPart = number / 10;
result.Append(tensAlternate[tensPart]);
number %= 10;
}
if (number > 0)
{
// "két" instead of "kettő" in compound numbers
if (number == 2 && result.Length > 0)
result.Append("két");
else
result.Append(units[number]);
}
return result.ToString();
}
public static string NumberToEnglish(int number)
{
if (number == 0) return "zero";
string[] units = { "", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine" };
string[] tens = { "", "ten", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninty" };
StringBuilder result = new StringBuilder();
if (number >= 1000)
{
int thousands = number / 1000;
if (thousands == 1)
result.Append("thousand");
else
{
result.Append(NumberToHungarian(thousands));
result.Append("thousand");
}
number %= 1000;
}
if (number >= 100)
{
int hundreds = number / 100;
if (hundreds == 1)
result.Append("hundred");
else
{
result.Append(NumberToHungarian(hundreds));
result.Append("hundred");
}
number %= 100;
}
if (number >= 10)
{
//int tensPart = number / 10;
//result.Append(tens[tensPart]);
//number %= 10;
switch (number)
{
case 10:
result.Append("ten");
break;
case 11:
result.Append("eleven");
break;
case 12:
result.Append("twelve");
break;
case 13:
result.Append("thirteen");
break;
case 14:
result.Append("fourteen");
break;
case 15:
result.Append("fifteen");
break;
case 16:
result.Append("sixteen");
break;
case 17:
result.Append("seventeen");
break;
case 18:
result.Append("eighteen");
break;
case 19:
result.Append("nineteen");
break;
}
}
return result.ToString();
}
public static string FixJsonWithoutAI(string aiResponse)
{
if (aiResponse.StartsWith("```"))
{
//Console.WriteLine("FIXING ``` in AI Response.");
aiResponse = aiResponse.Substring(3);
if (aiResponse.StartsWith("json"))
{
aiResponse = aiResponse.Substring(4);
}
if (aiResponse.StartsWith("html"))
{
aiResponse = aiResponse.Substring(4);
}
aiResponse = aiResponse.Substring(0, aiResponse.Length - 3);
}
return aiResponse;
}
public static string RemoveTabs(string text)
{
if (string.IsNullOrEmpty(text)) return text;
return text.Replace("\t", ""); // Simple replace — remove all tab characters
}
}