908 lines
50 KiB
C#
908 lines
50 KiB
C#
using FruitBank.Common.Dtos;
|
|
using FruitBank.Common.Entities;
|
|
using Microsoft.AspNetCore.Http;
|
|
using Microsoft.AspNetCore.Mvc;
|
|
using Nop.Core.Domain.Catalog;
|
|
using Nop.Plugin.Misc.FruitBankPlugin.Domains.DataLayer;
|
|
using Nop.Plugin.Misc.FruitBankPlugin.Helpers;
|
|
using Nop.Plugin.Misc.FruitBankPlugin.Services;
|
|
using Nop.Services.Catalog;
|
|
using Nop.Services.Security;
|
|
using Nop.Web.Framework;
|
|
using Nop.Web.Framework.Controllers;
|
|
using Nop.Web.Framework.Mvc.Filters;
|
|
using PDFtoImage;
|
|
using SkiaSharp;
|
|
|
|
|
|
namespace Nop.Plugin.Misc.FruitBank.Controllers
|
|
{
|
|
[AuthorizeAdmin]
|
|
[Area(AreaNames.ADMIN)]
|
|
[AutoValidateAntiforgeryToken]
|
|
public class FileManagerController : BasePluginController
|
|
{
|
|
private readonly IPermissionService _permissionService;
|
|
private readonly OpenAIApiService _aiApiService;
|
|
private readonly AICalculationService _aiCalculationService;
|
|
private readonly IProductService _productService;
|
|
private readonly FruitBankDbContext _dbContext;
|
|
private readonly PdfToImageService _pdfToImageService;
|
|
|
|
public FileManagerController(
|
|
IPermissionService permissionService,
|
|
OpenAIApiService aiApiService,
|
|
AICalculationService aiCalculationService,
|
|
IProductService productService,
|
|
FruitBankDbContext fruitBankDbContext,
|
|
PdfToImageService pdfToImageService)
|
|
{
|
|
_permissionService = permissionService;
|
|
_aiApiService = aiApiService;
|
|
_aiCalculationService = aiCalculationService;
|
|
_productService = productService;
|
|
_dbContext = fruitBankDbContext;
|
|
_pdfToImageService = pdfToImageService;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Display the image text extraction page
|
|
/// </summary>
|
|
public async Task<IActionResult> ImageTextExtraction()
|
|
{
|
|
if (!await _permissionService.AuthorizeAsync(StandardPermission.Security.ACCESS_ADMIN_PANEL))
|
|
return AccessDeniedView();
|
|
|
|
return View("~/Plugins/Misc.FruitBankPlugin/Areas/Admin/Views/Extras/ImageTextExtraction.cshtml");
|
|
}
|
|
|
|
/// <summary>
|
|
/// Endpoint to extract text from uploaded image
|
|
/// </summary>
|
|
[HttpPost]
|
|
public async Task<IActionResult> ExtractTextFromImage(IFormFile imageFile, string customPrompt = null)
|
|
{
|
|
if (!await _permissionService.AuthorizeAsync(StandardPermission.Security.ACCESS_ADMIN_PANEL))
|
|
return Json(new { success = false, message = "Access denied" });
|
|
|
|
if (imageFile == null || imageFile.Length == 0)
|
|
{
|
|
return Json(new { success = false, message = "No file received" });
|
|
}
|
|
|
|
// Validate file type - now including PDF
|
|
var extension = Path.GetExtension(imageFile.FileName).ToLowerInvariant();
|
|
if (extension != ".jpg" && extension != ".jpeg" && extension != ".png" &&
|
|
extension != ".gif" && extension != ".webp" && extension != ".pdf")
|
|
{
|
|
return Json(new { success = false, message = "Invalid file type. Please upload JPG, PNG, GIF, WebP, or PDF." });
|
|
}
|
|
|
|
ShippingDocument shippingDocument = new ShippingDocument();
|
|
shippingDocument.ShippingItems = new List<ShippingItem>();
|
|
|
|
try
|
|
{
|
|
// Define the uploads folder
|
|
var uploadsFolder = Path.Combine(Directory.GetCurrentDirectory(), "wwwroot", "uploads", "ocr");
|
|
|
|
// Create directory if it doesn't exist
|
|
if (!Directory.Exists(uploadsFolder))
|
|
{
|
|
Directory.CreateDirectory(uploadsFolder);
|
|
}
|
|
|
|
string processedFilePath;
|
|
string processedFileName;
|
|
|
|
// Handle PDF conversion
|
|
if (extension == ".pdf")
|
|
{
|
|
// Save the PDF temporarily
|
|
var tempPdfFileName = $"temp_pdf_{DateTime.Now:yyyyMMdd_HHmmss}.pdf";
|
|
var tempPdfPath = Path.Combine(uploadsFolder, tempPdfFileName);
|
|
|
|
using (var stream = new FileStream(tempPdfPath, FileMode.Create))
|
|
{
|
|
await imageFile.CopyToAsync(stream);
|
|
}
|
|
|
|
// Convert PDF to JPG using our service
|
|
var convertedImages = await _pdfToImageService.ConvertPdfToJpgAsync(tempPdfPath, uploadsFolder);
|
|
|
|
if (convertedImages == null || convertedImages.Count == 0)
|
|
{
|
|
// Clean up temp PDF
|
|
if (System.IO.File.Exists(tempPdfPath))
|
|
System.IO.File.Delete(tempPdfPath);
|
|
|
|
return Json(new { success = false, message = "Failed to convert PDF or PDF is empty" });
|
|
}
|
|
|
|
// Use the first page
|
|
processedFilePath = convertedImages[0];
|
|
processedFileName = Path.GetFileName(processedFilePath);
|
|
|
|
// Clean up temp PDF
|
|
if (System.IO.File.Exists(tempPdfPath))
|
|
System.IO.File.Delete(tempPdfPath);
|
|
}
|
|
else
|
|
{
|
|
// Handle regular image files
|
|
processedFileName = $"ocr_image_{DateTime.Now:yyyyMMdd_HHmmss}{extension}";
|
|
processedFilePath = Path.Combine(uploadsFolder, processedFileName);
|
|
|
|
using (var stream = new FileStream(processedFilePath, FileMode.Create))
|
|
{
|
|
await imageFile.CopyToAsync(stream);
|
|
}
|
|
}
|
|
|
|
// Extract text from the processed image using OpenAI Vision API
|
|
string extractedText;
|
|
using (var imageStream = new FileStream(processedFilePath, FileMode.Open, FileAccess.Read))
|
|
{
|
|
extractedText = await _aiApiService.ExtractTextFromImageAsync(
|
|
imageStream,
|
|
processedFileName,
|
|
customPrompt
|
|
);
|
|
}
|
|
|
|
if (string.IsNullOrEmpty(extractedText))
|
|
{
|
|
return Json(new
|
|
{
|
|
success = false,
|
|
message = "Failed to extract text. The API may have returned an empty response."
|
|
});
|
|
}
|
|
OpenaiImageResponse deserializedContent = new();
|
|
|
|
var result = TextHelper.FixJsonWithoutAI(extractedText);
|
|
|
|
var options = new System.Text.Json.JsonSerializerOptions
|
|
{
|
|
PropertyNameCaseInsensitive = true, // Handles camelCase/PascalCase mismatches
|
|
IncludeFields = true // This allows deserializing fields (in case you keep it as a field)
|
|
};
|
|
|
|
try
|
|
{
|
|
deserializedContent = System.Text.Json.JsonSerializer.Deserialize<OpenaiImageResponse>(result, options);
|
|
|
|
if (deserializedContent == null || deserializedContent.extractedData == null)
|
|
{
|
|
Console.Error.WriteLine($"Deserialization returned null. JSON was: {result}");
|
|
}
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
Console.Error.WriteLine($"Error deserializing extracted text: {ex}");
|
|
Console.Error.WriteLine($"JSON content: {result}");
|
|
}
|
|
|
|
|
|
//string documentIdAnalysisResult = await ExtractDocumentId(deserializedContent.extractedData.fullText);
|
|
|
|
Console.WriteLine($"Document number analysis Result: {deserializedContent.extractedData.documentId}");
|
|
|
|
shippingDocument.DocumentIdNumber = deserializedContent.extractedData.documentId;
|
|
string partnerAnalysis = await ExtractPartnerName(extractedText);
|
|
|
|
//int? dbPartnerName = await DeterminePartner(deserializedContent.extractedData.partner.name);
|
|
int? dbPartnerName = await DeterminePartner(partnerAnalysis);
|
|
if (dbPartnerName != null)
|
|
{
|
|
shippingDocument.PartnerId = (int)dbPartnerName;
|
|
Console.WriteLine($"Determined Partner ID: {dbPartnerName}");
|
|
}
|
|
else
|
|
{
|
|
Console.WriteLine("No matching partner found in the database.");
|
|
}
|
|
|
|
//string productAnalysis = await _aiCalculationService.ExtractProducts(extractedText);
|
|
Console.WriteLine($"Product analysis Result: {deserializedContent.extractedData.products}");
|
|
|
|
//identify products from database
|
|
var allProducts = await _dbContext.ProductDtos.GetAll(true).ToListAsync();
|
|
var historicalProducts = await _dbContext.ShippingItems.GetAll().ToListAsync();
|
|
|
|
//create json from product analyzis jsonstring
|
|
ProductReferenceResponse deserializedProducts = new ProductReferenceResponse();
|
|
//deserializedProducts.products = new List<ProductReference>();
|
|
deserializedProducts.products = deserializedContent.extractedData.products;
|
|
Console.WriteLine($"Serialized Products: {deserializedProducts.products.Count}");
|
|
|
|
List<ShippingItem> matchedProducts = new List<ShippingItem>();
|
|
//do we have historical references?
|
|
matchedProducts = await DetermineProducts(allProducts, historicalProducts, deserializedProducts);
|
|
|
|
shippingDocument.ShippingItems = matchedProducts;
|
|
|
|
if (matchedProducts.Count > 0)
|
|
{
|
|
Console.WriteLine($"Matched Products Count: {matchedProducts.Count}");
|
|
foreach (var matchedProduct in matchedProducts)
|
|
{
|
|
Console.WriteLine($"Matched Product: {matchedProduct.Name}");
|
|
}
|
|
}
|
|
else
|
|
{
|
|
Console.WriteLine("No products matched from the database.");
|
|
}
|
|
|
|
shippingDocument.PdfFileName = processedFileName;
|
|
shippingDocument.ShippingDocumentToFiles = new List<ShippingDocumentToFiles>();
|
|
|
|
Files processedFile = new Files
|
|
{
|
|
FileName = processedFileName,
|
|
FileExtension = extension,
|
|
RawText = deserializedContent.extractedData.fullText,
|
|
};
|
|
|
|
ShippingDocumentToFiles shippingDocumentToFiles = new ShippingDocumentToFiles
|
|
{
|
|
FilesId = processedFile.Id,
|
|
ShippingDocumentId = shippingDocument.Id,
|
|
};
|
|
|
|
// Calculate total pallets from shipping items
|
|
shippingDocument.TotalPallets = shippingDocument.ShippingItems?.Sum(item => item.PalletsOnDocument) ?? 0;
|
|
|
|
return Json(new
|
|
{
|
|
success = true,
|
|
message = extension == ".pdf"
|
|
? "PDF converted and text extracted successfully"
|
|
: "Text extracted successfully",
|
|
shippingDocument = new
|
|
{
|
|
documentIdNumber = shippingDocument.DocumentIdNumber,
|
|
partnerId = shippingDocument.PartnerId,
|
|
pdfFileName = shippingDocument.PdfFileName,
|
|
totalPallets = shippingDocument.TotalPallets,
|
|
shippingItems = shippingDocument.ShippingItems?.Select(item => new
|
|
{
|
|
name = item.Name,
|
|
hungarianName = item.HungarianName,
|
|
nameOnDocument = item.NameOnDocument,
|
|
productId = item.ProductId,
|
|
palletsOnDocument = item.PalletsOnDocument,
|
|
quantityOnDocument = item.QuantityOnDocument,
|
|
netWeightOnDocument = item.NetWeightOnDocument,
|
|
grossWeightOnDocument = item.GrossWeightOnDocument,
|
|
isMeasurable = item.IsMeasurable
|
|
}).ToList(),
|
|
extractedText = deserializedContent.extractedData.fullText
|
|
},
|
|
fileName = processedFileName,
|
|
filePath = processedFilePath,
|
|
fileSize = imageFile.Length,
|
|
wasConverted = extension == ".pdf"
|
|
});
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
Console.Error.WriteLine($"Error in ExtractTextFromImage: {ex}");
|
|
return Json(new
|
|
{
|
|
success = false,
|
|
message = $"Error processing file: {ex.Message}"
|
|
});
|
|
}
|
|
}
|
|
|
|
private async Task<List<ShippingItem>> DetermineProducts(List<ProductDto> allProducts, List<ShippingItem> historicalProducts, ProductReferenceResponse deserializedProducts)
|
|
{
|
|
List<ShippingItem> finalMatchedProducts = new List<ShippingItem>();
|
|
//List<ProductDto> allProductDtos = await _dbContext.ProductDtos.GetAll(true).ToListAsync();
|
|
// Load all shipping items once
|
|
var allShippingItems = await _dbContext.ShippingItems.GetAll(true).ToListAsync();
|
|
|
|
foreach (var deserializedProduct in deserializedProducts.products)
|
|
{
|
|
ShippingItem matchedItem = null;
|
|
|
|
// Step 1: Try exact historical match
|
|
var historicalProduct = historicalProducts
|
|
.Where(hp => !string.IsNullOrEmpty(hp.NameOnDocument))
|
|
.FirstOrDefault(p => p.NameOnDocument.Equals(deserializedProduct.name, StringComparison.OrdinalIgnoreCase));
|
|
|
|
if (historicalProduct != null)
|
|
{
|
|
Console.WriteLine($"Historical product found: {historicalProduct.Name}");
|
|
var productDto = allProducts.FirstOrDefault(p => p.Id == historicalProduct.ProductId);
|
|
|
|
if (productDto != null)
|
|
{
|
|
matchedItem = CreateShippingItem(productDto, deserializedProduct);
|
|
Console.WriteLine($"Matched product from historical data: {productDto.Name}");
|
|
}
|
|
}
|
|
|
|
// Step 2: If no exact match, try AI matching with similar names
|
|
if (matchedItem == null)
|
|
{
|
|
// Get similar products (safe substring)
|
|
int substringLength = Math.Min(6, deserializedProduct.name?.Length ?? 0);
|
|
|
|
if (substringLength > 0)
|
|
{
|
|
var similarNameProducts = historicalProducts
|
|
.Where(p => !string.IsNullOrEmpty(p.NameOnDocument) &&
|
|
p.NameOnDocument.Contains(deserializedProduct.name.Substring(0, substringLength), StringComparison.OrdinalIgnoreCase))
|
|
.ToList();
|
|
|
|
Console.WriteLine($"Similar products found for {deserializedProduct.name}: {similarNameProducts.Count}");
|
|
|
|
// Try AI match with similar historical products
|
|
if (similarNameProducts.Any())
|
|
{
|
|
//var aiMatchPrompt = $"You are an agent of Fruitbank to analyze product names and match them to existing products in the Fruitbank product catalog. " +
|
|
// $"Given the following product catalog: {string.Join(", ", similarNameProducts.Select(p => p.NameOnDocument))}, " +
|
|
// $"which product from the catalog best matches this product name: {deserializedProduct.name}. " +
|
|
// $"Reply with NOTHING ELSE THAN the exact product name from the catalog, if no match found, reply with 'NONE'.";
|
|
|
|
var systemPrompt = "You are a product name matching specialist for FruitBank, a wholesale fruit and vegetable company.\n\n" +
|
|
"Your task: Match a product name from a shipping document to the most similar product in our historical catalog.\n\n" +
|
|
"MATCHING RULES:\n" +
|
|
"1. Match based on ALL details including:\n" +
|
|
" - Product type (apples, bananas, oranges)\n" +
|
|
" - Variety (Golden Delicious, Cavendish, Valencia)\n" +
|
|
" - Quality grade (Class I, Class II, Extra, Premium)\n" +
|
|
" - Size markers (60+, 70+, 80+, Large, Small)\n" +
|
|
" - Packaging type if mentioned (Carton, Box, Loose)\n" +
|
|
"2. Consider language variations:\n" +
|
|
" - Spanish: Manzanas = Apples, Plátanos = Bananas, Naranjas = Oranges\n" +
|
|
" - Hungarian: Alma = Apples, Banán = Bananas, Narancs = Oranges\n" +
|
|
" - Plural/singular: 'Bananas' = 'Banana'\n" +
|
|
"3. Match as specifically as possible:\n" +
|
|
" - 'APPLES CLASS I 70+' should match 'APPLES CLASS I 70+' (not just 'APPLES')\n" +
|
|
" - 'ORANGES 60+' is different from 'ORANGES 70+'\n" +
|
|
" - 'TOMATOES EXTRA' is different from 'TOMATOES CLASS I'\n" +
|
|
"4. Abbreviations to recognize:\n" +
|
|
" - 'GOLDEN DEL' = 'GOLDEN DELICIOUS'\n" +
|
|
" - 'CAT I' = 'CLASS I' = 'CATEGORY I'\n" +
|
|
" - 'CAT II' = 'CLASS II' = 'CATEGORY II'\n" +
|
|
" - 'BIO' = 'ORGANIC'\n\n" +
|
|
"OUTPUT:\n" +
|
|
"Return ONLY the exact product name from the catalog that best matches ALL the details.\n" +
|
|
"If no good match exists (less than 70% similarity including grade/size), return 'NONE'.\n\n" +
|
|
"Examples:\n" +
|
|
"Document: 'GOLDEN DEL APPLES CAT I 70+' | Catalog: ['GOLDEN DELICIOUS APPLES CLASS I 70+', 'GOLDEN DELICIOUS APPLES CLASS II 70+'] → GOLDEN DELICIOUS APPLES CLASS I 70+\n" +
|
|
"Document: 'PLATANOS CAVENDISH 70+' | Catalog: ['BANANAS CAVENDISH 60+', 'BANANAS CAVENDISH 70+', 'BANANAS CAVENDISH 80+'] → BANANAS CAVENDISH 70+\n" +
|
|
"Document: 'MANZANAS ROJAS EXTRA' | Catalog: ['RED APPLES CLASS I', 'RED APPLES EXTRA', 'RED APPLES CLASS II'] → RED APPLES EXTRA\n" +
|
|
"Document: 'SWEET PEPPERS' | Catalog: ['TOMATOES', 'CUCUMBERS', 'CARROTS'] → NONE";
|
|
|
|
var userPrompt = "HISTORICAL PRODUCT CATALOG:\n" +
|
|
string.Join("\n", similarNameProducts.Select(p => $"- {p.NameOnDocument}")) + "\n\n" +
|
|
"---\n\n" +
|
|
"PRODUCT NAME FROM DOCUMENT:\n" +
|
|
deserializedProduct.name + "\n\n" +
|
|
"Return the best matching product name from the catalog above (matching ALL details including size/grade), or 'NONE' if no good match exists.";
|
|
|
|
var aiMatchedProductName = await _aiApiService.GetSimpleResponseAsync(systemPrompt, userPrompt);
|
|
|
|
//var aiMatchedProductName = await _aiApiService.GetSimpleResponseAsync(aiMatchPrompt, deserializedProduct.name);
|
|
Console.WriteLine($"AI matched product name for {deserializedProduct.name}: {aiMatchedProductName}");
|
|
|
|
if (!string.IsNullOrEmpty(aiMatchedProductName) && aiMatchedProductName != "NONE")
|
|
{
|
|
var matchingShippingItem = allShippingItems.FirstOrDefault(x =>
|
|
x.NameOnDocument != null &&
|
|
x.NameOnDocument.Equals(aiMatchedProductName, StringComparison.OrdinalIgnoreCase));
|
|
|
|
if (matchingShippingItem?.ProductDto != null)
|
|
{
|
|
matchedItem = CreateShippingItem(matchingShippingItem.ProductDto, deserializedProduct);
|
|
Console.WriteLine($"AI Matched product from historical: {matchingShippingItem.ProductDto.Name}");
|
|
}
|
|
}
|
|
}
|
|
|
|
// Step 3: If still no match, try AI with full product catalog
|
|
if (matchedItem == null)
|
|
{
|
|
// HYBRID APPROACH: Combine recent products + fuzzy matched products
|
|
|
|
// Get recent products (50 newest - most likely to be in current shipments)
|
|
var recentProducts = allProducts
|
|
.OrderByDescending(p => p.Id) // Use Id as proxy for CreatedDate if CreatedOnUtc not available
|
|
.Take(50)
|
|
.ToList();
|
|
|
|
// Get products that fuzzy match the search term (similar names)
|
|
int fuzzySearchLength = Math.Min(4, deserializedProduct.name?.Length ?? 0);
|
|
var fuzzyMatches = fuzzySearchLength > 0
|
|
? allProducts
|
|
.Where(p => p.Name.Contains(
|
|
deserializedProduct.name.Substring(0, fuzzySearchLength),
|
|
StringComparison.OrdinalIgnoreCase))
|
|
.Take(30)
|
|
.ToList()
|
|
: new List<ProductDto>();
|
|
|
|
// Combine and deduplicate
|
|
var combinedProducts = recentProducts
|
|
.Union(fuzzyMatches)
|
|
.GroupBy(p => p.Id)
|
|
.Select(g => g.First())
|
|
.Take(100)
|
|
.ToList();
|
|
|
|
Console.WriteLine($"Hybrid search: {combinedProducts.Count} products ({recentProducts.Count} recent + {fuzzyMatches.Count} fuzzy matched) for: {deserializedProduct.name}");
|
|
|
|
var systemPrompt2 = "You are a product name matching specialist for FruitBank wholesale company.\n\n" +
|
|
"Your task: Match a product name from a shipping document to our product catalog.\n\n" +
|
|
"MATCHING RULES:\n" +
|
|
"1. Match based on ALL product details:\n" +
|
|
" - Product type and variety\n" +
|
|
" - Quality grades: 'Extra', 'Class I', 'Class II', 'Premium', 'Category I/II'\n" +
|
|
" - Size markers: '60+', '70+', '80+', 'Large', 'Small', 'Medium'\n" +
|
|
" - Packaging: 'Carton', 'Box', 'Loose', 'Packed' (if it's part of product ID)\n" +
|
|
" - Origin country: 'Spanish', 'Italian', 'Dutch', 'Turkish' (if tracked separately)\n" +
|
|
"2. Language variations:\n" +
|
|
" SPANISH → ENGLISH → HUNGARIAN\n" +
|
|
" - Manzanas → Apples → Alma\n" +
|
|
" - Plátanos → Bananas → Banán\n" +
|
|
" - Naranjas → Oranges → Narancs\n" +
|
|
" - Tomates → Tomatoes → Paradicsom\n" +
|
|
" - Pimientos → Peppers → Paprika\n" +
|
|
" - Uvas → Grapes → Szőlő\n" +
|
|
" - Limones → Lemons → Citrom\n" +
|
|
"3. Quality grade abbreviations:\n" +
|
|
" - 'CAT I' / 'CAT. I' / 'CATEGORY I' = 'CLASS I'\n" +
|
|
" - 'CAT II' / 'CAT. II' / 'CATEGORY II' = 'CLASS II'\n" +
|
|
" - '1' = 'CLASS I', '2' = 'CLASS II'\n" +
|
|
"4. Be specific:\n" +
|
|
" - 'APPLES 70+' is NOT the same as 'APPLES 80+'\n" +
|
|
" - 'TOMATOES CLASS I' is NOT the same as 'TOMATOES CLASS II'\n" +
|
|
" - 'ORANGES SPANISH' may be different from 'ORANGES ITALIAN'\n\n" +
|
|
"OUTPUT:\n" +
|
|
"Return ONLY the exact product name from the catalog that matches ALL the details.\n" +
|
|
"If no close match exists (below 70% similarity), return 'NONE'.\n\n" +
|
|
"Examples:\n" +
|
|
"Document: 'MANZANAS GOLDEN CAT I 70+' | Best match: 'GOLDEN DELICIOUS APPLES CLASS I 70+'\n" +
|
|
"Document: 'BIO BANANEN 80+' | Best match: 'ORGANIC BANANAS 80+' (NOT just 'BANANAS')\n" +
|
|
"Document: 'POMODORI CILIEGINI EXTRA' | Best match: 'CHERRY TOMATOES EXTRA' (NOT 'CHERRY TOMATOES CLASS I')\n" +
|
|
"Document: 'NARANJAS 60+' | Best match: 'ORANGES 60+' (NOT 'ORANGES 70+')\n" +
|
|
"Document: 'RARE EXOTIC FRUIT' | No match: 'NONE'";
|
|
|
|
var userPrompt2 = "PRODUCT CATALOG (recent products + similar names):\n" +
|
|
string.Join("\n", combinedProducts.Select(p => $"- {p.Name}")) + "\n\n" +
|
|
"---\n\n" +
|
|
"PRODUCT NAME FROM DOCUMENT:\n" +
|
|
deserializedProduct.name + "\n\n" +
|
|
"Return the best matching product name from the catalog above that matches ALL details (size, grade, quality), or 'NONE' if no confident match exists.";
|
|
|
|
var aiMatchedProductName2 = await _aiApiService.GetSimpleResponseAsync(systemPrompt2, userPrompt2);
|
|
Console.WriteLine($"AI matched product name from hybrid catalog for {deserializedProduct.name}: {aiMatchedProductName2}");
|
|
|
|
if (!string.IsNullOrEmpty(aiMatchedProductName2) && aiMatchedProductName2 != "NONE")
|
|
{
|
|
// Clean the AI response
|
|
aiMatchedProductName2 = CleanProductName(aiMatchedProductName2);
|
|
|
|
var matchingProduct = combinedProducts.FirstOrDefault(x =>
|
|
x.Name.Equals(aiMatchedProductName2, StringComparison.OrdinalIgnoreCase));
|
|
|
|
if (matchingProduct != null)
|
|
{
|
|
matchedItem = CreateShippingItem(matchingProduct, deserializedProduct);
|
|
Console.WriteLine($"AI Matched product from hybrid catalog: {matchingProduct.Name}");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Step 4: Add matched or unmatched item
|
|
if (matchedItem != null)
|
|
{
|
|
finalMatchedProducts.Add(matchedItem);
|
|
}
|
|
else
|
|
{
|
|
// Create unmatched item
|
|
finalMatchedProducts.Add(new ShippingItem
|
|
{
|
|
Name = "",
|
|
HungarianName = "",
|
|
PalletsOnDocument = 1,
|
|
IsMeasurable = false,
|
|
QuantityOnDocument = deserializedProduct.quantity ?? 0,
|
|
NetWeightOnDocument = deserializedProduct.netWeight ?? 0,
|
|
GrossWeightOnDocument = deserializedProduct.grossWeight ?? 0,
|
|
ProductId = null,
|
|
NameOnDocument = deserializedProduct.name
|
|
});
|
|
Console.WriteLine($"No match found for: {deserializedProduct.name}");
|
|
}
|
|
}
|
|
|
|
Console.WriteLine($"Total matched products: {finalMatchedProducts.Count(x => x.ProductId != null)}");
|
|
Console.WriteLine($"Total unmatched products: {finalMatchedProducts.Count(x => x.ProductId == null)}");
|
|
|
|
return finalMatchedProducts;
|
|
}
|
|
|
|
private ShippingItem CreateShippingItem(ProductDto productDto, ProductReference deserializedProduct)
|
|
{
|
|
return new ShippingItem
|
|
{
|
|
Name = productDto.Name,
|
|
HungarianName = productDto.Name,
|
|
PalletsOnDocument = 1,
|
|
QuantityOnDocument = deserializedProduct.quantity ?? 0,
|
|
NetWeightOnDocument = deserializedProduct.netWeight ?? 0,
|
|
GrossWeightOnDocument = deserializedProduct.grossWeight ?? 0,
|
|
ProductId = productDto.Id,
|
|
NameOnDocument = deserializedProduct.name,
|
|
IsMeasurable = productDto.IsMeasurable,
|
|
UnitPriceOnDocument = deserializedProduct.unitCost
|
|
};
|
|
}
|
|
|
|
/// <summary>
|
|
/// Cleans and normalizes product name returned from AI
|
|
/// </summary>
|
|
private string CleanProductName(string rawProductName)
|
|
{
|
|
if (string.IsNullOrWhiteSpace(rawProductName))
|
|
return string.Empty;
|
|
|
|
var cleaned = rawProductName.Trim();
|
|
|
|
// Remove common prefixes that AI might add
|
|
var prefixesToRemove = new[]
|
|
{
|
|
"Product name:",
|
|
"Match:",
|
|
"Best match:",
|
|
"The product is",
|
|
"Answer:",
|
|
"-"
|
|
};
|
|
|
|
foreach (var prefix in prefixesToRemove)
|
|
{
|
|
if (cleaned.StartsWith(prefix, StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
cleaned = cleaned.Substring(prefix.Length).Trim();
|
|
}
|
|
}
|
|
|
|
// Remove quotes if present
|
|
cleaned = cleaned.Trim('\"', '\'', '\'', '«', '»');
|
|
|
|
// Remove trailing punctuation
|
|
cleaned = cleaned.TrimEnd('.', ',', ';', ':');
|
|
|
|
return cleaned;
|
|
}
|
|
|
|
private async Task<string> TestFullResult(string extractedText)
|
|
{
|
|
string fullResultPrompt = $"Role:\r\nYou are an AI data extraction assistant for Fruitbank, a " +
|
|
$"fruit and vegetable wholesale company. Your task is to analyze a " +
|
|
$"provided text (delivery notes, invoices, or order confirmations) and extract structured information about " +
|
|
$"the shipment and its items.\r\n\r\n🎯 Goal:\r\nRead the provided text and extract all shipment " +
|
|
$"details and items according to the data model below.\r\n Generate the complete JSON output following this " +
|
|
$"structure.\r\n\r\n🧩 Data Models:\r\n\r\npublic " +
|
|
$"class Partner\r\n{{\r\n " +
|
|
$"/// <summary>\r\n /// Partner entity primary key\r\n /// </summary>\r\n " +
|
|
$"public int Id {{ get; set; }}\r\n " +
|
|
$"/// <summary>\r\n /// Partner company name\r\n /// </summary>\r\n " +
|
|
$"public string Name {{ get; set; }}\r\n " +
|
|
$"/// <summary>\r\n /// Partner company TaxId\r\n /// </summary>\r\n " +
|
|
$"public string TaxId {{ get; set; }}\r\n /// <summary>\r\n " +
|
|
$"/// Partner company Certification if exists\r\n /// </summary>\r\n " +
|
|
$"public string CertificationNumber {{ get; set; }}\r\n /// <summary>\r\n " +
|
|
$"/// Partner company address PostalCode\r\n /// </summary>\r\n " +
|
|
$"public string PostalCode {{ get; set; }}\r\n /// <summary>\r\n " +
|
|
$"/// Partner company address Country\r\n /// </summary>\r\n " +
|
|
$"public string Country {{ get; set; }}\r\n /// <summary>\r\n " +
|
|
$"/// Partner company address State if exists\r\n /// </summary>\r\n " +
|
|
$"public string State {{ get; set; }}\r\n /// <summary>\r\n " +
|
|
$"/// Partner company address County if exists\r\n /// </summary>\r\n " +
|
|
$"public string County {{ get; set; }}\r\n /// <summary>\r\n " +
|
|
$"/// Partner company address City\r\n /// </summary>\r\n " +
|
|
$"public string City {{ get; set; }}\r\n /// <summary>\r\n " +
|
|
$"/// Partner company address Street\r\n /// </summary>\r\n " +
|
|
$"public string Street {{ get; set; }}\r\n\t/// <summary>\r\n " +
|
|
$"/// Entities of ShippingDocument\r\n /// </summary>\r\n\tpublic List<ShippingDocument> " +
|
|
$"ShippingDocuments {{ get; set; }}\t\r\n}}\r\n\r\npublic class ShippingDocument\r\n{{\r\n " +
|
|
$"/// <summary>\r\n /// ShippingItem entity primary key\r\n /// </summary>\r\n " +
|
|
$"public int Id {{ get; set; }}\r\n /// <summary>\r\n /// Partner entity primary key\r\n " +
|
|
$"/// </summary>\r\n public int PartnerId {{ get; set; }}\t\r\n\t/// <summary>\r\n " +
|
|
$"/// Entities of ShippingItem\r\n /// </summary>\r\n\t" +
|
|
$"public List<ShippingItem> ShippingItems {{ get; set; }}\r\n /// <summary>\r\n " +
|
|
$"/// DocumentIdNumber if exists\r\n /// </summary>\r\n public string DocumentIdNumber {{ get; set; }}\r\n " +
|
|
$"/// <summary>\r\n /// \r\n /// </summary>\r\n public DateTime ShippingDate {{ get; set; }}\r\n " +
|
|
$"/// <summary>\r\n /// Shipping pickup Contry of origin\r\n /// </summary>\r\n " +
|
|
$"public string Country {{ get; set; }}\r\n\t/// <summary>\r\n /// Sum of ShippingItem pallets\r\n " +
|
|
$"/// </summary>\r\n public int TotalPallets {{ get; set; }}\r\n\t/// <summary>\r\n " +
|
|
$"/// Filename of pdf\r\n /// </summary>\r\n\tpublic string PdfFileName {{ get; set; }}\r\n}}\r\n\r\n" +
|
|
$"public class ShippingItem\r\n{{\r\n /// <summary>\r\n /// ShippingItem entity primary key\r\n /// " +
|
|
$"</summary>\r\n public int Id {{ get; set; }}\r\n /// <summary>\r\n /// " +
|
|
$"ShippingDocument entity primary key\r\n /// </summary>\r\n " +
|
|
$"public int ShippingDocumentId {{ get; set; }}\r\n /// " +
|
|
$"<summary>\r\n /// Name of the fruit or vegitable\r\n /// </summary>\r\n " +
|
|
$"public string Name {{ get; set; }}\r\n\t/// <summary>\r\n /// Translated Name to Hungarian\r\n " +
|
|
$"/// </summary>\r\n public string HungarianName {{ get; set; }}\r\n /// <summary>\r\n " +
|
|
$"/// Pallets of fruit or vegitable item\r\n /// </summary>\r\n " +
|
|
$"public int PalletsOnDocument {{ get; set; }}\r\n /// <summary>\r\n " +
|
|
$"/// Quantity of fruit or vegitable item\r\n /// </summary>\r\n " +
|
|
$"public int QuantityOnDocument {{ get; set; }}\r\n /// <summary>\r\n " +
|
|
$"/// Net weight in kg. of fruit or vegitable item\r\n /// </summary>\r\n " +
|
|
$"public double NetWeightOnDocument {{ get; set; }}\r\n /// <summary>\r\n " +
|
|
$"/// Gross weight in kg. of fruit or vegitable item\r\n /// </summary>\r\n " +
|
|
$"public double GrossWeightOnDocument {{ get; set; }}\r\n}}\r\n\r\n🧾 Output Requirements\r\n- " +
|
|
$"Output must be a single valid JSON object containing:\r\n- One Partner object\r\n- " +
|
|
$"One ShippingDocument object\r\n- A list of all related ShippingItem objects\r\n\r\n- " +
|
|
$"Primary keys (Partner.Id, ShippingDocument.Id, ShippingItem.Id) should be auto-generated integers " +
|
|
$"(e.g. sequential: 1, 2, 3…).\r\n\r\n- When a field is missing or unclear, return it as an empty " +
|
|
$"string or 0 (depending on type).\r\nDo not omit any fields.\r\n\r\n- " +
|
|
$"All dates must be in ISO 8601 format (yyyy-MM-dd).\r\n\r\n🧭 Instructions to the AI\r\n" +
|
|
$"1. Analyze the provided text carefully.\r\n" +
|
|
$"2. Identify the Partner/Company details of THE OTHER PARTY (other than Fruitbank), " +
|
|
$"document identifiers, and each shipment item.\r\n" +
|
|
$"3. FruitBank is not a partner! Always look for THE OTHER partner on the document. \r\n " +
|
|
$"4. Generate a complete hierarchical JSON of ALL received documents in ONE JSON structure according to the " +
|
|
$"data model above.\r\n5. Do not include any explanations or text outside the JSON output. " +
|
|
$"Only return the structured JSON.\r\n" +
|
|
$"6. A teljes ShippingItem.Name-et tedd bele a ShippingItem.HungarianName-be " +
|
|
$"és a zöldség vagy gyümölcs nevét fordítsd le magyarra!\r\n" +
|
|
$"7. A ShippingDocument-et tedd bele a Partner entitásba!\r\n" +
|
|
$"8. ShippingItem-eket tedd bele a ShippingDocument-be!\r\n" +
|
|
$"9. Do not assume or modify any data, if you don't find a value, return null, if you find a value, keep it unmodified.\r\n" +
|
|
$"10. Magyarázat nélkül válaszolj!";
|
|
|
|
var fullresult = await _aiApiService.GetSimpleResponseAsync(fullResultPrompt, extractedText);
|
|
return fullresult;
|
|
}
|
|
|
|
private async Task<int> DeterminePartner(string partnerAnalysis)
|
|
{
|
|
// Clean the input first
|
|
partnerAnalysis = CleanPartnerName(partnerAnalysis);
|
|
|
|
if (string.IsNullOrWhiteSpace(partnerAnalysis))
|
|
{
|
|
Console.WriteLine("Partner analysis is empty after cleaning.");
|
|
return 0;
|
|
}
|
|
|
|
var possiblePartners = await _dbContext.Partners.GetAll().ToListAsync();
|
|
|
|
// STEP 1: Try exact match first (fast, free, no AI needed!)
|
|
var exactMatch = possiblePartners.FirstOrDefault(p =>
|
|
p.Name.Trim().Equals(partnerAnalysis.Trim(), StringComparison.OrdinalIgnoreCase));
|
|
|
|
if (exactMatch != null)
|
|
{
|
|
Console.WriteLine($"✓ Exact partner match found: {exactMatch.Name} (ID: {exactMatch.Id})");
|
|
return exactMatch.Id;
|
|
}
|
|
|
|
Console.WriteLine($"No exact match found for '{partnerAnalysis}'. Trying AI matching...");
|
|
|
|
// STEP 2: No exact match? Use AI with IDs (handles fuzzy matching)
|
|
var partnerListWithIds = string.Join("\n", possiblePartners.Select(p => $"ID: {p.Id} | Name: {p.Name}"));
|
|
|
|
var systemPrompt = "You are a partner matching specialist for FruitBank.\n\n" +
|
|
"Your task: Match a partner name to the correct partner from our database.\n\n" +
|
|
"MATCHING RULES:\n" +
|
|
"1. Ignore minor differences:\n" +
|
|
" - Trailing/leading spaces\n" +
|
|
" - Periods and punctuation\n" +
|
|
" - Case differences (B.V. vs BV vs b.v.)\n" +
|
|
" - Legal entity suffixes (B.V., S.L., S.R.L., Kft., Ltd.)\n" +
|
|
"2. Match based on core company name\n" +
|
|
"3. Be flexible with abbreviations\n\n" +
|
|
"OUTPUT:\n" +
|
|
"Return ONLY the numeric ID of the matching partner.\n" +
|
|
"If no match found, return '0'.\n\n" +
|
|
"Examples:\n" +
|
|
"Input: 'SFI Rotterdam' | Database: 'ID: 42 | Name: SFI Rotterdam B.V.' → 42\n" +
|
|
"Input: 'Frutas Sanchez SL' | Database: 'ID: 15 | Name: FRUTAS SÁNCHEZ S.L.' → 15\n" +
|
|
"Input: 'Van den Berg' | Database: 'ID: 8 | Name: Van den Berg B.V.' → 8\n" +
|
|
"Input: 'Unknown Company' | No match in database → 0";
|
|
|
|
var userPrompt = "PARTNER DATABASE:\n" +
|
|
partnerListWithIds + "\n\n" +
|
|
"---\n\n" +
|
|
"PARTNER TO MATCH:\n" +
|
|
partnerAnalysis + "\n\n" +
|
|
"Return ONLY the numeric ID of the matching partner, or '0' if no match found.";
|
|
|
|
var aiResponse = await _aiApiService.GetSimpleResponseAsync(systemPrompt, userPrompt);
|
|
Console.WriteLine($"AI Partner Match Response: {aiResponse}");
|
|
|
|
// Parse the ID
|
|
if (int.TryParse(aiResponse.Trim(), out int partnerId))
|
|
{
|
|
if (partnerId == 0)
|
|
{
|
|
Console.WriteLine("AI found no matching partner.");
|
|
return 0;
|
|
}
|
|
|
|
// Verify the ID exists in our list
|
|
var matchedPartner = possiblePartners.FirstOrDefault(p => p.Id == partnerId);
|
|
if (matchedPartner != null)
|
|
{
|
|
Console.WriteLine($"✓ AI matched partner: {matchedPartner.Name} (ID: {matchedPartner.Id})");
|
|
return partnerId;
|
|
}
|
|
else
|
|
{
|
|
Console.WriteLine($"⚠ AI returned invalid partner ID: {partnerId}");
|
|
return 0;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
Console.WriteLine($"⚠ AI returned non-numeric response: {aiResponse}");
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
private async Task<string> ExtractPartnerName(string extractedText)
|
|
{
|
|
var availablePartners = await _dbContext.Partners.GetAll().ToListAsync();
|
|
Console.WriteLine($"Available partners count: {availablePartners.Count}");
|
|
|
|
string partnerListForAI = "";
|
|
foreach (var partner in availablePartners)
|
|
{
|
|
//let's make a string list of available partners for AI
|
|
|
|
partnerListForAI += $"- {partner.Name}\n";
|
|
|
|
}
|
|
|
|
// Enhanced system prompt with explicit instructions and examples
|
|
var systemPrompt = "You are a specialized data extraction agent for FruitBank, a Hungarian fruit and vegetable wholesale company.\n\n" +
|
|
"Your task: Extract the SUPPLIER/SENDER company name from shipping documents (CMR, delivery notes, invoices).\n\n" +
|
|
"CRITICAL RULES:\n" +
|
|
"1. FruitBank (Gyümölcsbank Kft.) is the RECEIVER - NEVER return FruitBank as the partner\n" +
|
|
"2. Look for these indicators of the SUPPLIER:\n" +
|
|
" - 'Sender' / 'Expediteur' / 'Feladó' / 'Absender' section\n" +
|
|
" - 'From' / 'De' / 'Kitől' field\n" +
|
|
" - Company name at TOP of document (usually sender)\n" +
|
|
" - Tax ID / VAT number paired with company name\n" +
|
|
" - EORI number holder (if present)\n" +
|
|
"3. The supplier is typically:\n" +
|
|
" - A farm, cooperative, or wholesaler\n" +
|
|
" - Located in Spain, Italy, Netherlands, Poland, Germany, Greece, Turkey, or other EU countries\n" +
|
|
" - NOT FruitBank and NOT the transport company\n\n" +
|
|
"Document structure hints:\n" +
|
|
"- CMR documents: Sender is box 1-2, Receiver is box 3-4\n" +
|
|
"- Invoices: Look for 'Seller' / 'Eladó' / 'Vendedor' (NOT Buyer)\n" +
|
|
"- Delivery notes: Sender/Origin section at top\n\n" +
|
|
"OUTPUT FORMAT:\n" +
|
|
"Return ONLY the exact company name as it appears in the document.\n" +
|
|
"Do not include:\n" +
|
|
"- Tax IDs\n" +
|
|
"- Addresses\n" +
|
|
"- Country codes\n" +
|
|
"- Legal entity types (unless part of official name)\n\n" +
|
|
"Examples:\n" +
|
|
"[CORRECT] FRUTAS SÁNCHEZ S.L.\n" +
|
|
"[CORRECT] Van den Berg B.V.\n" +
|
|
"[CORRECT] Agricola Romana SRL\n" +
|
|
"[WRONG] FruitBank (this is us!)\n" +
|
|
"[WRONG] DHL Supply Chain (transport company)\n" +
|
|
"[WRONG] FRUTAS SÁNCHEZ S.L. - ES12345678 (no tax ID)";
|
|
|
|
// Enhanced user prompt with context and structure
|
|
var userPrompt = "DOCUMENT TEXT:\n" +
|
|
extractedText + "\n\n" +
|
|
"---\n\n" +
|
|
"INSTRUCTIONS:\n" +
|
|
"1. Identify the SENDER/SUPPLIER company name\n" +
|
|
"2. Ignore FruitBank (Gyümölcsbank) - that's the receiver\n" +
|
|
"3. Ignore transport companies (DHL, Transporeon, etc.)\n" +
|
|
"4. Return ONLY the company name, nothing else\n\n" +
|
|
"If uncertain, return the most prominent non-FruitBank company name from the document.";
|
|
|
|
var partnerAnalysis = await _aiApiService.GetSimpleResponseAsync(systemPrompt, userPrompt);
|
|
|
|
// Clean up the response
|
|
var cleanedPartnerName = CleanPartnerName(partnerAnalysis);
|
|
|
|
Console.WriteLine($"Partner analysis Result: {cleanedPartnerName}");
|
|
return cleanedPartnerName;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Cleans and normalizes partner name from AI response
|
|
/// </summary>
|
|
private string CleanPartnerName(string rawPartnerName)
|
|
{
|
|
if (string.IsNullOrWhiteSpace(rawPartnerName))
|
|
return string.Empty;
|
|
|
|
var cleaned = rawPartnerName.Trim();
|
|
|
|
// Remove common prefixes that AI might add
|
|
var prefixesToRemove = new[]
|
|
{
|
|
"Company name:",
|
|
"Sender:",
|
|
"Supplier:",
|
|
"Partner:",
|
|
"The partner is",
|
|
"The company is",
|
|
"Feladó:",
|
|
"Expediteur:"
|
|
};
|
|
|
|
foreach (var prefix in prefixesToRemove)
|
|
{
|
|
if (cleaned.StartsWith(prefix, StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
cleaned = cleaned.Substring(prefix.Length).Trim();
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Remove quotes if present
|
|
cleaned = cleaned.Trim('\"', '\'', '\'', '«', '»');
|
|
|
|
// Remove trailing punctuation
|
|
cleaned = cleaned.TrimEnd('.', ',', ';');
|
|
|
|
// Remove tax IDs that might have slipped through (pattern: letters followed by 8+ digits)
|
|
var taxIdPattern = new System.Text.RegularExpressions.Regex(@"\s*-?\s*[A-Z]{2}\d{8,}.*$");
|
|
cleaned = taxIdPattern.Replace(cleaned, string.Empty).Trim();
|
|
|
|
// If AI returned 'NONE' or similar, return empty
|
|
if (cleaned.Equals("NONE", StringComparison.OrdinalIgnoreCase) ||
|
|
cleaned.Equals("N/A", StringComparison.OrdinalIgnoreCase) ||
|
|
cleaned.Equals("NOT FOUND", StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
return string.Empty;
|
|
}
|
|
|
|
// Check if accidentally returned FruitBank
|
|
if (cleaned.Contains("FruitBank", StringComparison.OrdinalIgnoreCase) ||
|
|
cleaned.Contains("Gyümölcsbank", StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
Console.WriteLine($"WARNING: AI returned FruitBank as partner. Returning empty.");
|
|
return string.Empty;
|
|
}
|
|
|
|
return cleaned;
|
|
}
|
|
|
|
//private async Task<string> ExtractDocumentId(string extractedText)
|
|
//{
|
|
// //analyze the text for document number or identifiers
|
|
// return await _aiApiService.GetSimpleResponseAsync("You are an agent of Fruitbank to analyize text extracted frem a pfd document, and find the document number or identifier. IMPORTANT: reply only with the number, do not add further explanation.", $"What is the document identifier of this document: {extractedText}");
|
|
//}
|
|
}
|
|
|
|
public class ProductReference
|
|
{
|
|
public string? name { get; set; }
|
|
public int? quantity { get; set; }
|
|
public double? netWeight { get; set; }
|
|
public double? grossWeight { get; set; }
|
|
public int? productId { get; set; }
|
|
public double unitCost { get; set; }
|
|
}
|
|
|
|
public class ProductReferenceResponse
|
|
{
|
|
public List<ProductReference> products { get; set; }
|
|
}
|
|
|
|
}
|