using FruitBank.Common.Dtos; using FruitBank.Common.Entities; using Microsoft.AspNetCore.Http; using Microsoft.AspNetCore.Mvc; using Nop.Core.Domain.Catalog; using Nop.Plugin.Misc.FruitBankPlugin.Domains.DataLayer; using Nop.Plugin.Misc.FruitBankPlugin.Helpers; using Nop.Plugin.Misc.FruitBankPlugin.Services; using Nop.Services.Catalog; using Nop.Services.Security; using Nop.Web.Framework; using Nop.Web.Framework.Controllers; using Nop.Web.Framework.Mvc.Filters; using PDFtoImage; using SkiaSharp; namespace Nop.Plugin.Misc.FruitBank.Controllers { [AuthorizeAdmin] [Area(AreaNames.ADMIN)] [AutoValidateAntiforgeryToken] public class FileManagerController : BasePluginController { private readonly IPermissionService _permissionService; private readonly OpenAIApiService _aiApiService; private readonly AICalculationService _aiCalculationService; private readonly IProductService _productService; private readonly FruitBankDbContext _dbContext; private readonly PdfToImageService _pdfToImageService; public FileManagerController( IPermissionService permissionService, OpenAIApiService aiApiService, AICalculationService aiCalculationService, IProductService productService, FruitBankDbContext fruitBankDbContext, PdfToImageService pdfToImageService) { _permissionService = permissionService; _aiApiService = aiApiService; _aiCalculationService = aiCalculationService; _productService = productService; _dbContext = fruitBankDbContext; _pdfToImageService = pdfToImageService; } /// /// Display the image text extraction page /// public async Task ImageTextExtraction() { if (!await _permissionService.AuthorizeAsync(StandardPermission.Security.ACCESS_ADMIN_PANEL)) return AccessDeniedView(); return View("~/Plugins/Misc.FruitBankPlugin/Areas/Admin/Views/Extras/ImageTextExtraction.cshtml"); } /// /// Endpoint to extract text from uploaded image /// [HttpPost] public async Task ExtractTextFromImage(IFormFile imageFile, string customPrompt = null) { if (!await _permissionService.AuthorizeAsync(StandardPermission.Security.ACCESS_ADMIN_PANEL)) return Json(new { success = false, message = "Access denied" }); if (imageFile == null || imageFile.Length == 0) { return Json(new { success = false, message = "No file received" }); } // Validate file type - now including PDF var extension = Path.GetExtension(imageFile.FileName).ToLowerInvariant(); if (extension != ".jpg" && extension != ".jpeg" && extension != ".png" && extension != ".gif" && extension != ".webp" && extension != ".pdf") { return Json(new { success = false, message = "Invalid file type. Please upload JPG, PNG, GIF, WebP, or PDF." }); } ShippingDocument shippingDocument = new ShippingDocument(); shippingDocument.ShippingItems = new List(); try { // Define the uploads folder var uploadsFolder = Path.Combine(Directory.GetCurrentDirectory(), "wwwroot", "uploads", "ocr"); // Create directory if it doesn't exist if (!Directory.Exists(uploadsFolder)) { Directory.CreateDirectory(uploadsFolder); } string processedFilePath; string processedFileName; // Handle PDF conversion if (extension == ".pdf") { // Save the PDF temporarily var tempPdfFileName = $"temp_pdf_{DateTime.Now:yyyyMMdd_HHmmss}.pdf"; var tempPdfPath = Path.Combine(uploadsFolder, tempPdfFileName); using (var stream = new FileStream(tempPdfPath, FileMode.Create)) { await imageFile.CopyToAsync(stream); } // Convert PDF to JPG using our service var convertedImages = await _pdfToImageService.ConvertPdfToJpgAsync(tempPdfPath, uploadsFolder); if (convertedImages == null || convertedImages.Count == 0) { // Clean up temp PDF if (System.IO.File.Exists(tempPdfPath)) System.IO.File.Delete(tempPdfPath); return Json(new { success = false, message = "Failed to convert PDF or PDF is empty" }); } // Use the first page processedFilePath = convertedImages[0]; processedFileName = Path.GetFileName(processedFilePath); // Clean up temp PDF if (System.IO.File.Exists(tempPdfPath)) System.IO.File.Delete(tempPdfPath); } else { // Handle regular image files processedFileName = $"ocr_image_{DateTime.Now:yyyyMMdd_HHmmss}{extension}"; processedFilePath = Path.Combine(uploadsFolder, processedFileName); using (var stream = new FileStream(processedFilePath, FileMode.Create)) { await imageFile.CopyToAsync(stream); } } // Extract text from the processed image using OpenAI Vision API string extractedText; using (var imageStream = new FileStream(processedFilePath, FileMode.Open, FileAccess.Read)) { extractedText = await _aiApiService.ExtractTextFromImageAsync( imageStream, processedFileName, customPrompt ); } if (string.IsNullOrEmpty(extractedText)) { return Json(new { success = false, message = "Failed to extract text. The API may have returned an empty response." }); } OpenaiImageResponse deserializedContent = new(); var result = TextHelper.FixJsonWithoutAI(extractedText); var options = new System.Text.Json.JsonSerializerOptions { PropertyNameCaseInsensitive = true, // Handles camelCase/PascalCase mismatches IncludeFields = true // This allows deserializing fields (in case you keep it as a field) }; try { deserializedContent = System.Text.Json.JsonSerializer.Deserialize(result, options); if (deserializedContent == null || deserializedContent.extractedData == null) { Console.Error.WriteLine($"Deserialization returned null. JSON was: {result}"); } } catch (Exception ex) { Console.Error.WriteLine($"Error deserializing extracted text: {ex}"); Console.Error.WriteLine($"JSON content: {result}"); } //string documentIdAnalysisResult = await ExtractDocumentId(deserializedContent.extractedData.fullText); Console.WriteLine($"Document number analysis Result: {deserializedContent.extractedData.documentId}"); shippingDocument.DocumentIdNumber = deserializedContent.extractedData.documentId; string partnerAnalysis = await ExtractPartnerName(extractedText); //int? dbPartnerName = await DeterminePartner(deserializedContent.extractedData.partner.name); int? dbPartnerName = await DeterminePartner(partnerAnalysis); if (dbPartnerName != null) { shippingDocument.PartnerId = (int)dbPartnerName; Console.WriteLine($"Determined Partner ID: {dbPartnerName}"); } else { Console.WriteLine("No matching partner found in the database."); } //string productAnalysis = await _aiCalculationService.ExtractProducts(extractedText); Console.WriteLine($"Product analysis Result: {deserializedContent.extractedData.products}"); //identify products from database var allProducts = await _dbContext.ProductDtos.GetAll(true).ToListAsync(); var historicalProducts = await _dbContext.ShippingItems.GetAll().ToListAsync(); //create json from product analyzis jsonstring ProductReferenceResponse deserializedProducts = new ProductReferenceResponse(); //deserializedProducts.products = new List(); deserializedProducts.products = deserializedContent.extractedData.products; Console.WriteLine($"Serialized Products: {deserializedProducts.products.Count}"); List matchedProducts = new List(); //do we have historical references? matchedProducts = await DetermineProducts(allProducts, historicalProducts, deserializedProducts); shippingDocument.ShippingItems = matchedProducts; if (matchedProducts.Count > 0) { Console.WriteLine($"Matched Products Count: {matchedProducts.Count}"); foreach (var matchedProduct in matchedProducts) { Console.WriteLine($"Matched Product: {matchedProduct.Name}"); } } else { Console.WriteLine("No products matched from the database."); } shippingDocument.PdfFileName = processedFileName; shippingDocument.ShippingDocumentToFiles = new List(); Files processedFile = new Files { FileName = processedFileName, FileExtension = extension, RawText = deserializedContent.extractedData.fullText, }; ShippingDocumentToFiles shippingDocumentToFiles = new ShippingDocumentToFiles { FilesId = processedFile.Id, ShippingDocumentId = shippingDocument.Id, }; // Calculate total pallets from shipping items shippingDocument.TotalPallets = shippingDocument.ShippingItems?.Sum(item => item.PalletsOnDocument) ?? 0; return Json(new { success = true, message = extension == ".pdf" ? "PDF converted and text extracted successfully" : "Text extracted successfully", shippingDocument = new { documentIdNumber = shippingDocument.DocumentIdNumber, partnerId = shippingDocument.PartnerId, pdfFileName = shippingDocument.PdfFileName, totalPallets = shippingDocument.TotalPallets, shippingItems = shippingDocument.ShippingItems?.Select(item => new { name = item.Name, hungarianName = item.HungarianName, nameOnDocument = item.NameOnDocument, productId = item.ProductId, palletsOnDocument = item.PalletsOnDocument, quantityOnDocument = item.QuantityOnDocument, netWeightOnDocument = item.NetWeightOnDocument, grossWeightOnDocument = item.GrossWeightOnDocument, isMeasurable = item.IsMeasurable }).ToList(), extractedText = deserializedContent.extractedData.fullText }, fileName = processedFileName, filePath = processedFilePath, fileSize = imageFile.Length, wasConverted = extension == ".pdf" }); } catch (Exception ex) { Console.Error.WriteLine($"Error in ExtractTextFromImage: {ex}"); return Json(new { success = false, message = $"Error processing file: {ex.Message}" }); } } private async Task> DetermineProducts(List allProducts, List historicalProducts, ProductReferenceResponse deserializedProducts) { List finalMatchedProducts = new List(); //List allProductDtos = await _dbContext.ProductDtos.GetAll(true).ToListAsync(); // Load all shipping items once var allShippingItems = await _dbContext.ShippingItems.GetAll(true).ToListAsync(); foreach (var deserializedProduct in deserializedProducts.products) { ShippingItem matchedItem = null; // Step 1: Try exact historical match var historicalProduct = historicalProducts .Where(hp => !string.IsNullOrEmpty(hp.NameOnDocument)) .FirstOrDefault(p => p.NameOnDocument.Equals(deserializedProduct.name, StringComparison.OrdinalIgnoreCase)); if (historicalProduct != null) { Console.WriteLine($"Historical product found: {historicalProduct.Name}"); var productDto = allProducts.FirstOrDefault(p => p.Id == historicalProduct.ProductId); if (productDto != null) { matchedItem = CreateShippingItem(productDto, deserializedProduct); Console.WriteLine($"Matched product from historical data: {productDto.Name}"); } } // Step 2: If no exact match, try AI matching with similar names if (matchedItem == null) { // Get similar products (safe substring) int substringLength = Math.Min(6, deserializedProduct.name?.Length ?? 0); if (substringLength > 0) { var similarNameProducts = historicalProducts .Where(p => !string.IsNullOrEmpty(p.NameOnDocument) && p.NameOnDocument.Contains(deserializedProduct.name.Substring(0, substringLength), StringComparison.OrdinalIgnoreCase)) .ToList(); Console.WriteLine($"Similar products found for {deserializedProduct.name}: {similarNameProducts.Count}"); // Try AI match with similar historical products if (similarNameProducts.Any()) { //var aiMatchPrompt = $"You are an agent of Fruitbank to analyze product names and match them to existing products in the Fruitbank product catalog. " + // $"Given the following product catalog: {string.Join(", ", similarNameProducts.Select(p => p.NameOnDocument))}, " + // $"which product from the catalog best matches this product name: {deserializedProduct.name}. " + // $"Reply with NOTHING ELSE THAN the exact product name from the catalog, if no match found, reply with 'NONE'."; var systemPrompt = "You are a product name matching specialist for FruitBank, a wholesale fruit and vegetable company.\n\n" + "Your task: Match a product name from a shipping document to the most similar product in our historical catalog.\n\n" + "MATCHING RULES:\n" + "1. Match based on ALL details including:\n" + " - Product type (apples, bananas, oranges)\n" + " - Variety (Golden Delicious, Cavendish, Valencia)\n" + " - Quality grade (Class I, Class II, Extra, Premium)\n" + " - Size markers (60+, 70+, 80+, Large, Small)\n" + " - Packaging type if mentioned (Carton, Box, Loose)\n" + "2. Consider language variations:\n" + " - Spanish: Manzanas = Apples, Plátanos = Bananas, Naranjas = Oranges\n" + " - Hungarian: Alma = Apples, Banán = Bananas, Narancs = Oranges\n" + " - Plural/singular: 'Bananas' = 'Banana'\n" + "3. Match as specifically as possible:\n" + " - 'APPLES CLASS I 70+' should match 'APPLES CLASS I 70+' (not just 'APPLES')\n" + " - 'ORANGES 60+' is different from 'ORANGES 70+'\n" + " - 'TOMATOES EXTRA' is different from 'TOMATOES CLASS I'\n" + "4. Abbreviations to recognize:\n" + " - 'GOLDEN DEL' = 'GOLDEN DELICIOUS'\n" + " - 'CAT I' = 'CLASS I' = 'CATEGORY I'\n" + " - 'CAT II' = 'CLASS II' = 'CATEGORY II'\n" + " - 'BIO' = 'ORGANIC'\n\n" + "OUTPUT:\n" + "Return ONLY the exact product name from the catalog that best matches ALL the details.\n" + "If no good match exists (less than 70% similarity including grade/size), return 'NONE'.\n\n" + "Examples:\n" + "Document: 'GOLDEN DEL APPLES CAT I 70+' | Catalog: ['GOLDEN DELICIOUS APPLES CLASS I 70+', 'GOLDEN DELICIOUS APPLES CLASS II 70+'] → GOLDEN DELICIOUS APPLES CLASS I 70+\n" + "Document: 'PLATANOS CAVENDISH 70+' | Catalog: ['BANANAS CAVENDISH 60+', 'BANANAS CAVENDISH 70+', 'BANANAS CAVENDISH 80+'] → BANANAS CAVENDISH 70+\n" + "Document: 'MANZANAS ROJAS EXTRA' | Catalog: ['RED APPLES CLASS I', 'RED APPLES EXTRA', 'RED APPLES CLASS II'] → RED APPLES EXTRA\n" + "Document: 'SWEET PEPPERS' | Catalog: ['TOMATOES', 'CUCUMBERS', 'CARROTS'] → NONE"; var userPrompt = "HISTORICAL PRODUCT CATALOG:\n" + string.Join("\n", similarNameProducts.Select(p => $"- {p.NameOnDocument}")) + "\n\n" + "---\n\n" + "PRODUCT NAME FROM DOCUMENT:\n" + deserializedProduct.name + "\n\n" + "Return the best matching product name from the catalog above (matching ALL details including size/grade), or 'NONE' if no good match exists."; var aiMatchedProductName = await _aiApiService.GetSimpleResponseAsync(systemPrompt, userPrompt); //var aiMatchedProductName = await _aiApiService.GetSimpleResponseAsync(aiMatchPrompt, deserializedProduct.name); Console.WriteLine($"AI matched product name for {deserializedProduct.name}: {aiMatchedProductName}"); if (!string.IsNullOrEmpty(aiMatchedProductName) && aiMatchedProductName != "NONE") { var matchingShippingItem = allShippingItems.FirstOrDefault(x => x.NameOnDocument != null && x.NameOnDocument.Equals(aiMatchedProductName, StringComparison.OrdinalIgnoreCase)); if (matchingShippingItem?.ProductDto != null) { matchedItem = CreateShippingItem(matchingShippingItem.ProductDto, deserializedProduct); Console.WriteLine($"AI Matched product from historical: {matchingShippingItem.ProductDto.Name}"); } } } // Step 3: If still no match, try AI with full product catalog if (matchedItem == null) { // HYBRID APPROACH: Combine recent products + fuzzy matched products // Get recent products (50 newest - most likely to be in current shipments) var recentProducts = allProducts .OrderByDescending(p => p.Id) // Use Id as proxy for CreatedDate if CreatedOnUtc not available .Take(50) .ToList(); // Get products that fuzzy match the search term (similar names) int fuzzySearchLength = Math.Min(4, deserializedProduct.name?.Length ?? 0); var fuzzyMatches = fuzzySearchLength > 0 ? allProducts .Where(p => p.Name.Contains( deserializedProduct.name.Substring(0, fuzzySearchLength), StringComparison.OrdinalIgnoreCase)) .Take(30) .ToList() : new List(); // Combine and deduplicate var combinedProducts = recentProducts .Union(fuzzyMatches) .GroupBy(p => p.Id) .Select(g => g.First()) .Take(100) .ToList(); Console.WriteLine($"Hybrid search: {combinedProducts.Count} products ({recentProducts.Count} recent + {fuzzyMatches.Count} fuzzy matched) for: {deserializedProduct.name}"); var systemPrompt2 = "You are a product name matching specialist for FruitBank wholesale company.\n\n" + "Your task: Match a product name from a shipping document to our product catalog.\n\n" + "MATCHING RULES:\n" + "1. Match based on ALL product details:\n" + " - Product type and variety\n" + " - Quality grades: 'Extra', 'Class I', 'Class II', 'Premium', 'Category I/II'\n" + " - Size markers: '60+', '70+', '80+', 'Large', 'Small', 'Medium'\n" + " - Packaging: 'Carton', 'Box', 'Loose', 'Packed' (if it's part of product ID)\n" + " - Origin country: 'Spanish', 'Italian', 'Dutch', 'Turkish' (if tracked separately)\n" + "2. Language variations:\n" + " SPANISH → ENGLISH → HUNGARIAN\n" + " - Manzanas → Apples → Alma\n" + " - Plátanos → Bananas → Banán\n" + " - Naranjas → Oranges → Narancs\n" + " - Tomates → Tomatoes → Paradicsom\n" + " - Pimientos → Peppers → Paprika\n" + " - Uvas → Grapes → Szőlő\n" + " - Limones → Lemons → Citrom\n" + "3. Quality grade abbreviations:\n" + " - 'CAT I' / 'CAT. I' / 'CATEGORY I' = 'CLASS I'\n" + " - 'CAT II' / 'CAT. II' / 'CATEGORY II' = 'CLASS II'\n" + " - '1' = 'CLASS I', '2' = 'CLASS II'\n" + "4. Be specific:\n" + " - 'APPLES 70+' is NOT the same as 'APPLES 80+'\n" + " - 'TOMATOES CLASS I' is NOT the same as 'TOMATOES CLASS II'\n" + " - 'ORANGES SPANISH' may be different from 'ORANGES ITALIAN'\n\n" + "OUTPUT:\n" + "Return ONLY the exact product name from the catalog that matches ALL the details.\n" + "If no close match exists (below 70% similarity), return 'NONE'.\n\n" + "Examples:\n" + "Document: 'MANZANAS GOLDEN CAT I 70+' | Best match: 'GOLDEN DELICIOUS APPLES CLASS I 70+'\n" + "Document: 'BIO BANANEN 80+' | Best match: 'ORGANIC BANANAS 80+' (NOT just 'BANANAS')\n" + "Document: 'POMODORI CILIEGINI EXTRA' | Best match: 'CHERRY TOMATOES EXTRA' (NOT 'CHERRY TOMATOES CLASS I')\n" + "Document: 'NARANJAS 60+' | Best match: 'ORANGES 60+' (NOT 'ORANGES 70+')\n" + "Document: 'RARE EXOTIC FRUIT' | No match: 'NONE'"; var userPrompt2 = "PRODUCT CATALOG (recent products + similar names):\n" + string.Join("\n", combinedProducts.Select(p => $"- {p.Name}")) + "\n\n" + "---\n\n" + "PRODUCT NAME FROM DOCUMENT:\n" + deserializedProduct.name + "\n\n" + "Return the best matching product name from the catalog above that matches ALL details (size, grade, quality), or 'NONE' if no confident match exists."; var aiMatchedProductName2 = await _aiApiService.GetSimpleResponseAsync(systemPrompt2, userPrompt2); Console.WriteLine($"AI matched product name from hybrid catalog for {deserializedProduct.name}: {aiMatchedProductName2}"); if (!string.IsNullOrEmpty(aiMatchedProductName2) && aiMatchedProductName2 != "NONE") { // Clean the AI response aiMatchedProductName2 = CleanProductName(aiMatchedProductName2); var matchingProduct = combinedProducts.FirstOrDefault(x => x.Name.Equals(aiMatchedProductName2, StringComparison.OrdinalIgnoreCase)); if (matchingProduct != null) { matchedItem = CreateShippingItem(matchingProduct, deserializedProduct); Console.WriteLine($"AI Matched product from hybrid catalog: {matchingProduct.Name}"); } } } } } // Step 4: Add matched or unmatched item if (matchedItem != null) { finalMatchedProducts.Add(matchedItem); } else { // Create unmatched item finalMatchedProducts.Add(new ShippingItem { Name = "", HungarianName = "", PalletsOnDocument = 1, IsMeasurable = false, QuantityOnDocument = deserializedProduct.quantity ?? 0, NetWeightOnDocument = deserializedProduct.netWeight ?? 0, GrossWeightOnDocument = deserializedProduct.grossWeight ?? 0, ProductId = null, NameOnDocument = deserializedProduct.name }); Console.WriteLine($"No match found for: {deserializedProduct.name}"); } } Console.WriteLine($"Total matched products: {finalMatchedProducts.Count(x => x.ProductId != null)}"); Console.WriteLine($"Total unmatched products: {finalMatchedProducts.Count(x => x.ProductId == null)}"); return finalMatchedProducts; } private ShippingItem CreateShippingItem(ProductDto productDto, ProductReference deserializedProduct) { return new ShippingItem { Name = productDto.Name, HungarianName = productDto.Name, PalletsOnDocument = 1, QuantityOnDocument = deserializedProduct.quantity ?? 0, NetWeightOnDocument = deserializedProduct.netWeight ?? 0, GrossWeightOnDocument = deserializedProduct.grossWeight ?? 0, ProductId = productDto.Id, NameOnDocument = deserializedProduct.name, IsMeasurable = productDto.IsMeasurable, UnitPriceOnDocument = deserializedProduct.unitCost }; } /// /// Cleans and normalizes product name returned from AI /// private string CleanProductName(string rawProductName) { if (string.IsNullOrWhiteSpace(rawProductName)) return string.Empty; var cleaned = rawProductName.Trim(); // Remove common prefixes that AI might add var prefixesToRemove = new[] { "Product name:", "Match:", "Best match:", "The product is", "Answer:", "-" }; foreach (var prefix in prefixesToRemove) { if (cleaned.StartsWith(prefix, StringComparison.OrdinalIgnoreCase)) { cleaned = cleaned.Substring(prefix.Length).Trim(); } } // Remove quotes if present cleaned = cleaned.Trim('\"', '\'', '\'', '«', '»'); // Remove trailing punctuation cleaned = cleaned.TrimEnd('.', ',', ';', ':'); return cleaned; } private async Task TestFullResult(string extractedText) { string fullResultPrompt = $"Role:\r\nYou are an AI data extraction assistant for Fruitbank, a " + $"fruit and vegetable wholesale company. Your task is to analyze a " + $"provided text (delivery notes, invoices, or order confirmations) and extract structured information about " + $"the shipment and its items.\r\n\r\n🎯 Goal:\r\nRead the provided text and extract all shipment " + $"details and items according to the data model below.\r\n Generate the complete JSON output following this " + $"structure.\r\n\r\n🧩 Data Models:\r\n\r\npublic " + $"class Partner\r\n{{\r\n " + $"/// \r\n /// Partner entity primary key\r\n /// \r\n " + $"public int Id {{ get; set; }}\r\n " + $"/// \r\n /// Partner company name\r\n /// \r\n " + $"public string Name {{ get; set; }}\r\n " + $"/// \r\n /// Partner company TaxId\r\n /// \r\n " + $"public string TaxId {{ get; set; }}\r\n /// \r\n " + $"/// Partner company Certification if exists\r\n /// \r\n " + $"public string CertificationNumber {{ get; set; }}\r\n /// \r\n " + $"/// Partner company address PostalCode\r\n /// \r\n " + $"public string PostalCode {{ get; set; }}\r\n /// \r\n " + $"/// Partner company address Country\r\n /// \r\n " + $"public string Country {{ get; set; }}\r\n /// \r\n " + $"/// Partner company address State if exists\r\n /// \r\n " + $"public string State {{ get; set; }}\r\n /// \r\n " + $"/// Partner company address County if exists\r\n /// \r\n " + $"public string County {{ get; set; }}\r\n /// \r\n " + $"/// Partner company address City\r\n /// \r\n " + $"public string City {{ get; set; }}\r\n /// \r\n " + $"/// Partner company address Street\r\n /// \r\n " + $"public string Street {{ get; set; }}\r\n\t/// \r\n " + $"/// Entities of ShippingDocument\r\n /// \r\n\tpublic List " + $"ShippingDocuments {{ get; set; }}\t\r\n}}\r\n\r\npublic class ShippingDocument\r\n{{\r\n " + $"/// \r\n /// ShippingItem entity primary key\r\n /// \r\n " + $"public int Id {{ get; set; }}\r\n /// \r\n /// Partner entity primary key\r\n " + $"/// \r\n public int PartnerId {{ get; set; }}\t\r\n\t/// \r\n " + $"/// Entities of ShippingItem\r\n /// \r\n\t" + $"public List ShippingItems {{ get; set; }}\r\n /// \r\n " + $"/// DocumentIdNumber if exists\r\n /// \r\n public string DocumentIdNumber {{ get; set; }}\r\n " + $"/// \r\n /// \r\n /// \r\n public DateTime ShippingDate {{ get; set; }}\r\n " + $"/// \r\n /// Shipping pickup Contry of origin\r\n /// \r\n " + $"public string Country {{ get; set; }}\r\n\t/// \r\n /// Sum of ShippingItem pallets\r\n " + $"/// \r\n public int TotalPallets {{ get; set; }}\r\n\t/// \r\n " + $"/// Filename of pdf\r\n /// \r\n\tpublic string PdfFileName {{ get; set; }}\r\n}}\r\n\r\n" + $"public class ShippingItem\r\n{{\r\n /// \r\n /// ShippingItem entity primary key\r\n /// " + $"\r\n public int Id {{ get; set; }}\r\n /// \r\n /// " + $"ShippingDocument entity primary key\r\n /// \r\n " + $"public int ShippingDocumentId {{ get; set; }}\r\n /// " + $"\r\n /// Name of the fruit or vegitable\r\n /// \r\n " + $"public string Name {{ get; set; }}\r\n\t/// \r\n /// Translated Name to Hungarian\r\n " + $"/// \r\n public string HungarianName {{ get; set; }}\r\n /// \r\n " + $"/// Pallets of fruit or vegitable item\r\n /// \r\n " + $"public int PalletsOnDocument {{ get; set; }}\r\n /// \r\n " + $"/// Quantity of fruit or vegitable item\r\n /// \r\n " + $"public int QuantityOnDocument {{ get; set; }}\r\n /// \r\n " + $"/// Net weight in kg. of fruit or vegitable item\r\n /// \r\n " + $"public double NetWeightOnDocument {{ get; set; }}\r\n /// \r\n " + $"/// Gross weight in kg. of fruit or vegitable item\r\n /// \r\n " + $"public double GrossWeightOnDocument {{ get; set; }}\r\n}}\r\n\r\n🧾 Output Requirements\r\n- " + $"Output must be a single valid JSON object containing:\r\n- One Partner object\r\n- " + $"One ShippingDocument object\r\n- A list of all related ShippingItem objects\r\n\r\n- " + $"Primary keys (Partner.Id, ShippingDocument.Id, ShippingItem.Id) should be auto-generated integers " + $"(e.g. sequential: 1, 2, 3…).\r\n\r\n- When a field is missing or unclear, return it as an empty " + $"string or 0 (depending on type).\r\nDo not omit any fields.\r\n\r\n- " + $"All dates must be in ISO 8601 format (yyyy-MM-dd).\r\n\r\n🧭 Instructions to the AI\r\n" + $"1. Analyze the provided text carefully.\r\n" + $"2. Identify the Partner/Company details of THE OTHER PARTY (other than Fruitbank), " + $"document identifiers, and each shipment item.\r\n" + $"3. FruitBank is not a partner! Always look for THE OTHER partner on the document. \r\n " + $"4. Generate a complete hierarchical JSON of ALL received documents in ONE JSON structure according to the " + $"data model above.\r\n5. Do not include any explanations or text outside the JSON output. " + $"Only return the structured JSON.\r\n" + $"6. A teljes ShippingItem.Name-et tedd bele a ShippingItem.HungarianName-be " + $"és a zöldség vagy gyümölcs nevét fordítsd le magyarra!\r\n" + $"7. A ShippingDocument-et tedd bele a Partner entitásba!\r\n" + $"8. ShippingItem-eket tedd bele a ShippingDocument-be!\r\n" + $"9. Do not assume or modify any data, if you don't find a value, return null, if you find a value, keep it unmodified.\r\n" + $"10. Magyarázat nélkül válaszolj!"; var fullresult = await _aiApiService.GetSimpleResponseAsync(fullResultPrompt, extractedText); return fullresult; } private async Task DeterminePartner(string partnerAnalysis) { // Clean the input first partnerAnalysis = CleanPartnerName(partnerAnalysis); if (string.IsNullOrWhiteSpace(partnerAnalysis)) { Console.WriteLine("Partner analysis is empty after cleaning."); return 0; } var possiblePartners = await _dbContext.Partners.GetAll().ToListAsync(); // STEP 1: Try exact match first (fast, free, no AI needed!) var exactMatch = possiblePartners.FirstOrDefault(p => p.Name.Trim().Equals(partnerAnalysis.Trim(), StringComparison.OrdinalIgnoreCase)); if (exactMatch != null) { Console.WriteLine($"✓ Exact partner match found: {exactMatch.Name} (ID: {exactMatch.Id})"); return exactMatch.Id; } Console.WriteLine($"No exact match found for '{partnerAnalysis}'. Trying AI matching..."); // STEP 2: No exact match? Use AI with IDs (handles fuzzy matching) var partnerListWithIds = string.Join("\n", possiblePartners.Select(p => $"ID: {p.Id} | Name: {p.Name}")); var systemPrompt = "You are a partner matching specialist for FruitBank.\n\n" + "Your task: Match a partner name to the correct partner from our database.\n\n" + "MATCHING RULES:\n" + "1. Ignore minor differences:\n" + " - Trailing/leading spaces\n" + " - Periods and punctuation\n" + " - Case differences (B.V. vs BV vs b.v.)\n" + " - Legal entity suffixes (B.V., S.L., S.R.L., Kft., Ltd.)\n" + "2. Match based on core company name\n" + "3. Be flexible with abbreviations\n\n" + "OUTPUT:\n" + "Return ONLY the numeric ID of the matching partner.\n" + "If no match found, return '0'.\n\n" + "Examples:\n" + "Input: 'SFI Rotterdam' | Database: 'ID: 42 | Name: SFI Rotterdam B.V.' → 42\n" + "Input: 'Frutas Sanchez SL' | Database: 'ID: 15 | Name: FRUTAS SÁNCHEZ S.L.' → 15\n" + "Input: 'Van den Berg' | Database: 'ID: 8 | Name: Van den Berg B.V.' → 8\n" + "Input: 'Unknown Company' | No match in database → 0"; var userPrompt = "PARTNER DATABASE:\n" + partnerListWithIds + "\n\n" + "---\n\n" + "PARTNER TO MATCH:\n" + partnerAnalysis + "\n\n" + "Return ONLY the numeric ID of the matching partner, or '0' if no match found."; var aiResponse = await _aiApiService.GetSimpleResponseAsync(systemPrompt, userPrompt); Console.WriteLine($"AI Partner Match Response: {aiResponse}"); // Parse the ID if (int.TryParse(aiResponse.Trim(), out int partnerId)) { if (partnerId == 0) { Console.WriteLine("AI found no matching partner."); return 0; } // Verify the ID exists in our list var matchedPartner = possiblePartners.FirstOrDefault(p => p.Id == partnerId); if (matchedPartner != null) { Console.WriteLine($"✓ AI matched partner: {matchedPartner.Name} (ID: {matchedPartner.Id})"); return partnerId; } else { Console.WriteLine($"⚠ AI returned invalid partner ID: {partnerId}"); return 0; } } else { Console.WriteLine($"⚠ AI returned non-numeric response: {aiResponse}"); return 0; } } private async Task ExtractPartnerName(string extractedText) { var availablePartners = await _dbContext.Partners.GetAll().ToListAsync(); Console.WriteLine($"Available partners count: {availablePartners.Count}"); string partnerListForAI = ""; foreach (var partner in availablePartners) { //let's make a string list of available partners for AI partnerListForAI += $"- {partner.Name}\n"; } // Enhanced system prompt with explicit instructions and examples var systemPrompt = "You are a specialized data extraction agent for FruitBank, a Hungarian fruit and vegetable wholesale company.\n\n" + "Your task: Extract the SUPPLIER/SENDER company name from shipping documents (CMR, delivery notes, invoices).\n\n" + "CRITICAL RULES:\n" + "1. FruitBank (Gyümölcsbank Kft.) is the RECEIVER - NEVER return FruitBank as the partner\n" + "2. Look for these indicators of the SUPPLIER:\n" + " - 'Sender' / 'Expediteur' / 'Feladó' / 'Absender' section\n" + " - 'From' / 'De' / 'Kitől' field\n" + " - Company name at TOP of document (usually sender)\n" + " - Tax ID / VAT number paired with company name\n" + " - EORI number holder (if present)\n" + "3. The supplier is typically:\n" + " - A farm, cooperative, or wholesaler\n" + " - Located in Spain, Italy, Netherlands, Poland, Germany, Greece, Turkey, or other EU countries\n" + " - NOT FruitBank and NOT the transport company\n\n" + "Document structure hints:\n" + "- CMR documents: Sender is box 1-2, Receiver is box 3-4\n" + "- Invoices: Look for 'Seller' / 'Eladó' / 'Vendedor' (NOT Buyer)\n" + "- Delivery notes: Sender/Origin section at top\n\n" + "OUTPUT FORMAT:\n" + "Return ONLY the exact company name as it appears in the document.\n" + "Do not include:\n" + "- Tax IDs\n" + "- Addresses\n" + "- Country codes\n" + "- Legal entity types (unless part of official name)\n\n" + "Examples:\n" + "[CORRECT] FRUTAS SÁNCHEZ S.L.\n" + "[CORRECT] Van den Berg B.V.\n" + "[CORRECT] Agricola Romana SRL\n" + "[WRONG] FruitBank (this is us!)\n" + "[WRONG] DHL Supply Chain (transport company)\n" + "[WRONG] FRUTAS SÁNCHEZ S.L. - ES12345678 (no tax ID)"; // Enhanced user prompt with context and structure var userPrompt = "DOCUMENT TEXT:\n" + extractedText + "\n\n" + "---\n\n" + "INSTRUCTIONS:\n" + "1. Identify the SENDER/SUPPLIER company name\n" + "2. Ignore FruitBank (Gyümölcsbank) - that's the receiver\n" + "3. Ignore transport companies (DHL, Transporeon, etc.)\n" + "4. Return ONLY the company name, nothing else\n\n" + "If uncertain, return the most prominent non-FruitBank company name from the document."; var partnerAnalysis = await _aiApiService.GetSimpleResponseAsync(systemPrompt, userPrompt); // Clean up the response var cleanedPartnerName = CleanPartnerName(partnerAnalysis); Console.WriteLine($"Partner analysis Result: {cleanedPartnerName}"); return cleanedPartnerName; } /// /// Cleans and normalizes partner name from AI response /// private string CleanPartnerName(string rawPartnerName) { if (string.IsNullOrWhiteSpace(rawPartnerName)) return string.Empty; var cleaned = rawPartnerName.Trim(); // Remove common prefixes that AI might add var prefixesToRemove = new[] { "Company name:", "Sender:", "Supplier:", "Partner:", "The partner is", "The company is", "Feladó:", "Expediteur:" }; foreach (var prefix in prefixesToRemove) { if (cleaned.StartsWith(prefix, StringComparison.OrdinalIgnoreCase)) { cleaned = cleaned.Substring(prefix.Length).Trim(); break; } } // Remove quotes if present cleaned = cleaned.Trim('\"', '\'', '\'', '«', '»'); // Remove trailing punctuation cleaned = cleaned.TrimEnd('.', ',', ';'); // Remove tax IDs that might have slipped through (pattern: letters followed by 8+ digits) var taxIdPattern = new System.Text.RegularExpressions.Regex(@"\s*-?\s*[A-Z]{2}\d{8,}.*$"); cleaned = taxIdPattern.Replace(cleaned, string.Empty).Trim(); // If AI returned 'NONE' or similar, return empty if (cleaned.Equals("NONE", StringComparison.OrdinalIgnoreCase) || cleaned.Equals("N/A", StringComparison.OrdinalIgnoreCase) || cleaned.Equals("NOT FOUND", StringComparison.OrdinalIgnoreCase)) { return string.Empty; } // Check if accidentally returned FruitBank if (cleaned.Contains("FruitBank", StringComparison.OrdinalIgnoreCase) || cleaned.Contains("Gyümölcsbank", StringComparison.OrdinalIgnoreCase)) { Console.WriteLine($"WARNING: AI returned FruitBank as partner. Returning empty."); return string.Empty; } return cleaned; } //private async Task ExtractDocumentId(string extractedText) //{ // //analyze the text for document number or identifiers // return await _aiApiService.GetSimpleResponseAsync("You are an agent of Fruitbank to analyize text extracted frem a pfd document, and find the document number or identifier. IMPORTANT: reply only with the number, do not add further explanation.", $"What is the document identifier of this document: {extractedText}"); //} } public class ProductReference { public string? name { get; set; } public int? quantity { get; set; } public double? netWeight { get; set; } public double? grossWeight { get; set; } public int? productId { get; set; } public double unitCost { get; set; } } public class ProductReferenceResponse { public List products { get; set; } } }