AI document parsing small fixes (ismeasurable fix)

ShippingItem.UnitCostOnDocument added
other developments
This commit is contained in:
Adam 2025-12-19 01:43:51 +01:00
parent eb40643d62
commit 365c911c11
5 changed files with 1784 additions and 446 deletions

View File

@ -207,7 +207,7 @@ namespace Nop.Plugin.Misc.FruitBank.Controllers
Console.WriteLine($"Product analysis Result: {deserializedContent.extractedData.products}");
//identify products from database
var allProducts = await _dbContext.ProductDtos.GetAll().ToListAsync();
var allProducts = await _dbContext.ProductDtos.GetAll(true).ToListAsync();
var historicalProducts = await _dbContext.ShippingItems.GetAll().ToListAsync();
//create json from product analyzis jsonstring
@ -300,7 +300,7 @@ namespace Nop.Plugin.Misc.FruitBank.Controllers
private async Task<List<ShippingItem>> DetermineProducts(List<ProductDto> allProducts, List<ShippingItem> historicalProducts, ProductReferenceResponse deserializedProducts)
{
List<ShippingItem> finalMatchedProducts = new List<ShippingItem>();
//List<ProductDto> allProductDtos = await _dbContext.ProductDtos.GetAll(true).ToListAsync();
// Load all shipping items once
var allShippingItems = await _dbContext.ShippingItems.GetAll(true).ToListAsync();
@ -316,7 +316,7 @@ namespace Nop.Plugin.Misc.FruitBank.Controllers
if (historicalProduct != null)
{
Console.WriteLine($"Historical product found: {historicalProduct.Name}");
var productDto = await _dbContext.ProductDtos.GetByIdAsync(historicalProduct.ProductId);
var productDto = allProducts.FirstOrDefault(p => p.Id == historicalProduct.ProductId);
if (productDto != null)
{
@ -542,7 +542,9 @@ namespace Nop.Plugin.Misc.FruitBank.Controllers
NetWeightOnDocument = deserializedProduct.netWeight ?? 0,
GrossWeightOnDocument = deserializedProduct.grossWeight ?? 0,
ProductId = productDto.Id,
NameOnDocument = deserializedProduct.name
NameOnDocument = deserializedProduct.name,
IsMeasurable = productDto.IsMeasurable,
UnitPriceOnDocument = deserializedProduct.unitCost
};
}
@ -894,6 +896,7 @@ namespace Nop.Plugin.Misc.FruitBank.Controllers
public double? netWeight { get; set; }
public double? grossWeight { get; set; }
public int? productId { get; set; }
public double unitCost { get; set; }
}
public class ProductReferenceResponse

View File

@ -70,15 +70,69 @@ namespace Nop.Plugin.Misc.FruitBankPlugin.Areas.Admin.Controllers
try
{
// Transcribe audio
var transcribedText = await TranscribeAudioFile(audioFile, "hu");
// Build partner names prompt for Whisper vocabulary hints
// Whisper has a 224 character limit, so extract unique KEYWORDS instead of full names
var allCustomers = await _customerService.GetAllCustomersAsync(pageIndex: 0, pageSize: 300);
var companyNames = allCustomers
.Where(c => !string.IsNullOrEmpty(c.Company))
.Select(c => c.Company.Trim())
.Distinct()
.ToList();
// Extract unique keywords from company names
var keywords = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
foreach (var company in companyNames)
{
// Split by common separators and extract meaningful words
var words = company.Split(new[] { ' ', ',', '.', '-', '/', '(', ')' }, StringSplitOptions.RemoveEmptyEntries);
foreach (var word in words)
{
var cleaned = word.Trim();
// Skip very short words, common abbreviations, and legal terms
if (cleaned.Length < 3) continue;
if (cleaned.Equals("BV", StringComparison.OrdinalIgnoreCase)) continue;
if (cleaned.Equals("Ltd", StringComparison.OrdinalIgnoreCase)) continue;
if (cleaned.Equals("Kft", StringComparison.OrdinalIgnoreCase)) continue;
if (cleaned.Equals("Inc", StringComparison.OrdinalIgnoreCase)) continue;
if (cleaned.Equals("GmbH", StringComparison.OrdinalIgnoreCase)) continue;
if (cleaned.Equals("SRL", StringComparison.OrdinalIgnoreCase)) continue;
keywords.Add(cleaned);
}
}
// Build prompt from keywords, fitting as many as possible in 224 chars
var keywordList = keywords.OrderBy(k => k.Length).ToList();
var promptParts = new List<string>();
int currentLength = 0;
const int maxLength = 220;
foreach (var keyword in keywordList)
{
var toAdd = promptParts.Count == 0 ? keyword : ", " + keyword;
if (currentLength + toAdd.Length > maxLength)
break;
promptParts.Add(keyword);
currentLength += toAdd.Length;
}
var partnerPrompt = string.Join(", ", promptParts);
Console.WriteLine($"[VoiceOrder] Whisper prompt with {promptParts.Count} keywords from {companyNames.Count} partners ({partnerPrompt.Length} chars)");
// Transcribe audio in HUNGARIAN with partner keywords as vocabulary hints
var transcribedText = await TranscribeAudioFile(audioFile, "hu", partnerPrompt);
if (string.IsNullOrEmpty(transcribedText))
{
return Json(new { success = false, message = "Failed to transcribe audio" });
}
Console.WriteLine($"[VoiceOrder] Partner transcription: {transcribedText}");
Console.WriteLine($"[VoiceOrder] Partner transcription (HU): {transcribedText}");
// Search for matching partners
var partners = await SearchPartners(transcribedText);
@ -113,7 +167,7 @@ namespace Nop.Plugin.Misc.FruitBankPlugin.Areas.Admin.Controllers
try
{
// Transcribe audio
// Transcribe audio in HUNGARIAN
var transcribedText = await TranscribeAudioFile(audioFile, "hu");
if (string.IsNullOrEmpty(transcribedText))
@ -121,7 +175,7 @@ namespace Nop.Plugin.Misc.FruitBankPlugin.Areas.Admin.Controllers
return Json(new { success = false, message = "Failed to transcribe audio" });
}
Console.WriteLine($"[VoiceOrder] Product transcription: {transcribedText}");
Console.WriteLine($"[VoiceOrder] Product transcription (HU): {transcribedText}");
// Parse products and quantities using AI
var parsedProducts = await ParseProductsFromText(transcribedText);
@ -153,12 +207,95 @@ namespace Nop.Plugin.Misc.FruitBankPlugin.Areas.Admin.Controllers
}
}
/// <summary>
/// Search for partners by manually typed text (no audio transcription needed)
/// </summary>
[HttpPost]
public async Task<IActionResult> SearchPartnerByText(string text)
{
if (!await _permissionService.AuthorizeAsync(StandardPermission.Security.ACCESS_ADMIN_PANEL))
return Json(new { success = false, message = "Access denied" });
if (string.IsNullOrWhiteSpace(text))
{
return Json(new { success = false, message = "No text provided" });
}
try
{
Console.WriteLine($"[VoiceOrder] Manual partner search: {text}");
// Search for matching partners (same logic as voice)
var partners = await SearchPartners(text);
return Json(new
{
success = true,
transcription = text,
partners = partners
});
}
catch (Exception ex)
{
Console.WriteLine($"[VoiceOrder] Error in SearchPartnerByText: {ex.Message}");
return Json(new { success = false, message = $"Error: {ex.Message}" });
}
}
/// <summary>
/// Parse manually typed product text (no audio transcription needed)
/// </summary>
[HttpPost]
public async Task<IActionResult> ParseManualProductText(string text)
{
if (!await _permissionService.AuthorizeAsync(StandardPermission.Security.ACCESS_ADMIN_PANEL))
return Json(new { success = false, message = "Access denied" });
if (string.IsNullOrWhiteSpace(text))
{
return Json(new { success = false, message = "No text provided" });
}
try
{
Console.WriteLine($"[VoiceOrder] Manual product input: {text}");
// Parse products and quantities using AI (same as voice)
var parsedProducts = await ParseProductsFromText(text);
if (parsedProducts == null || parsedProducts.Count == 0)
{
return Json(new
{
success = false,
message = "Could not parse products from text",
transcription = text
});
}
// Enrich with actual product data from database
var enrichedProducts = await EnrichProductData(parsedProducts);
return Json(new
{
success = true,
transcription = text,
products = enrichedProducts
});
}
catch (Exception ex)
{
Console.WriteLine($"[VoiceOrder] Error in ParseManualProductText: {ex.Message}");
return Json(new { success = false, message = $"Error: {ex.Message}" });
}
}
#region Helper Methods
/// <summary>
/// Transcribe audio file using OpenAI Whisper
/// </summary>
private async Task<string> TranscribeAudioFile(IFormFile audioFile, string language)
private async Task<string> TranscribeAudioFile(IFormFile audioFile, string language, string customPrompt = null)
{
var fileName = $"voice_order_{DateTime.Now:yyyyMMdd_HHmmss}.webm";
var uploadsFolder = Path.Combine(Directory.GetCurrentDirectory(), "wwwroot", "uploads", "voice");
@ -180,7 +317,7 @@ namespace Nop.Plugin.Misc.FruitBankPlugin.Areas.Admin.Controllers
string transcribedText;
using (var audioStream = new FileStream(filePath, FileMode.Open, FileAccess.Read))
{
transcribedText = await _aiApiService.TranscribeAudioAsync(audioStream, fileName, language);
transcribedText = await _aiApiService.TranscribeAudioAsync(audioStream, fileName, language, customPrompt);
}
// Clean up temporary file
@ -195,16 +332,15 @@ namespace Nop.Plugin.Misc.FruitBankPlugin.Areas.Admin.Controllers
/// <summary>
/// Search for partners matching the transcribed text
/// Uses string-based search first, then semantic AI matching if needed
/// Uses both string-based and AI semantic search for best results
/// </summary>
private async Task<List<object>> SearchPartners(string searchTerm)
{
const int maxResults = 10;
const int minResultsForAI = 3; // If we get fewer than this, use AI semantic search
Console.WriteLine($"[VoiceOrder] Searching partners for: {searchTerm}");
// Step 1: Try string-based search
// Step 1: String-based search (fast, catches exact matches)
var customersByCompany = await _customerService.GetAllCustomersAsync(
company: searchTerm,
pageIndex: 0,
@ -220,32 +356,29 @@ namespace Nop.Plugin.Misc.FruitBankPlugin.Areas.Admin.Controllers
pageIndex: 0,
pageSize: maxResults);
// Combine and deduplicate
var allCustomers = customersByCompany
// Combine string search results
var stringResults = customersByCompany
.Union(customersByName)
.Union(customersByLastName)
.DistinctBy(c => c.Id)
.Take(maxResults)
.ToList();
Console.WriteLine($"[VoiceOrder] String-based search found {allCustomers.Count} partners");
Console.WriteLine($"[VoiceOrder] String-based search found {stringResults.Count} partners");
// Step 2: If we don't have enough results, use AI semantic matching
if (allCustomers.Count < minResultsForAI)
{
Console.WriteLine("[VoiceOrder] Using AI semantic matching for partners");
// Step 2: ALWAYS use AI semantic search for better results
Console.WriteLine("[VoiceOrder] Using AI semantic matching for partners");
var aiMatches = await SemanticPartnerSearch(searchTerm);
Console.WriteLine($"[VoiceOrder] AI semantic search found {aiMatches.Count} partners");
var aiMatches = await SemanticPartnerSearch(searchTerm);
// Step 3: Merge results - string matches first (exact), then AI matches
var allCustomers = stringResults
.Union(aiMatches)
.DistinctBy(c => c.Id)
.Take(maxResults)
.ToList();
// Merge AI matches with string matches, remove duplicates
allCustomers = allCustomers
.Union(aiMatches)
.DistinctBy(c => c.Id)
.Take(maxResults)
.ToList();
Console.WriteLine($"[VoiceOrder] After AI matching: {allCustomers.Count} partners");
}
Console.WriteLine($"[VoiceOrder] Total unique partners: {allCustomers.Count}");
// Format results
var result = new List<object>();
@ -278,10 +411,10 @@ namespace Nop.Plugin.Misc.FruitBankPlugin.Areas.Admin.Controllers
{
try
{
// Get all customers with company names (limit to reasonable number)
// Get all customers with company names (increased limit)
var allCustomersWithCompany = await _customerService.GetAllCustomersAsync(
pageIndex: 0,
pageSize: 500); // Reasonable limit for AI processing
pageSize: 1000); // Increased from 500 to catch more companies
// Filter to only those with company names
var customersWithCompany = allCustomersWithCompany
@ -294,28 +427,35 @@ namespace Nop.Plugin.Misc.FruitBankPlugin.Areas.Admin.Controllers
return new List<Nop.Core.Domain.Customers.Customer>();
}
Console.WriteLine($"[VoiceOrder] AI searching through {customersWithCompany.Count} companies");
// Build company list for AI
var companyList = string.Join("\n", customersWithCompany
.Select((c, index) => $"{index}|{c.Company}"));
var systemPrompt = @"You are a company name matcher for a B2B system.
Given a spoken company name and a list of company names, find the 5 best semantic matches.
Given a spoken company name and a list of company names, find the 5 best matches.
RULES:
1. Consider phonetic similarity (how it sounds)
2. Consider abbreviations (e.g., 'SFI' matches 'SFI Rotterdam B.V.')
3. Consider partial matches (e.g., 'Rotterdam' matches 'SFI Rotterdam B.V.')
4. Consider common misspellings or mishearings
5. Return ONLY valid JSON array with indices, no explanations
CRITICAL MATCHING RULES (in priority order):
1. EXACT MATCH: If the search term appears exactly in a company name, prioritize it
2. SUBSTRING MATCH: If the search term is contained within a company name (e.g., 'Junket' in 'Junket Silver Kft.')
3. WORD MATCH: If all words from search term appear in company name (any order)
4. PARTIAL MATCH: If significant words overlap (e.g., 'Silver' matches 'Junket Silver')
5. PHONETIC SIMILARITY: How it sounds when spoken
6. ABBREVIATIONS: 'SFI' matches 'SFI Rotterdam B.V.'
INPUT FORMAT:
Search term: [spoken company name]
Companies: [index]|[company name] (one per line)
EXAMPLES:
Search: 'Junket Silver'
Should match: 'Junket Silver Kft.' (substring match - VERY HIGH PRIORITY)
Search: 'Rotterdam'
Should match: 'SFI Rotterdam B.V.' (substring match)
Return ONLY a JSON array with the top 5 indices, ordered by best match first.
If fewer than 5 matches exist, return fewer indices.
OUTPUT FORMAT (JSON only):
[0, 15, 42, 103, 256]
Return the top 5 indices that best match the search term. If fewer than 5 good matches exist, return fewer indices.";
[0, 15, 42, 103, 256]";
var userPrompt = $@"Search term: {searchTerm}
Companies:
@ -364,28 +504,51 @@ Companies:
/// </summary>
private async Task<List<ParsedProduct>> ParseProductsFromText(string text)
{
var systemPrompt = @"You are a product parser for a Hungarian fruit wholesale company.
var systemPrompt = @"You are a product parser for a Hungarian fruit and vegetable wholesale company.
Parse the product names and quantities from the user's speech.
RULES:
1. Extract product names and quantities
2. Normalize product names to singular, lowercase (e.g., 'narancsok' 'narancs')
3. Convert quantity units to standard format (kg, db, láda)
4. Handle Hungarian number words ('száz' = 100, 'ötven' = 50, etc.)
5. Return ONLY valid JSON array, no explanations
CRITICAL RULES:
1. Extract product names and quantities from ANY produce item (fruits, vegetables, herbs, etc.)
2. Normalize product names to singular, lowercase (e.g., 'narancsok' 'narancs', 'áfonyák' 'áfonya')
3. Handle Hungarian number words ('száz' = 100, 'ötven' = 50, 'húsz' = 20, 'tíz' = 10, 'öt' = 5, 'egy' = 1, etc.)
4. FIX COMMON TRANSCRIPTION ERRORS:
- 'datója' 'datolya' (dates)
- 'szűlő' 'szőlő' (grapes)
- 'mondarin' 'mandarin' (mandarin)
- 'paprika' is correct (pepper/paprika)
- 'fokhagyma' is correct (garlic)
- Any obvious typo correct it
5. Return ONLY valid JSON array, no explanations or empty arrays
6. DO NOT include units - only product name and quantity as a number
7. ALWAYS return at least one product if you can parse anything from the input
OUTPUT FORMAT (JSON only):
[
{""product"": ""narancs"", ""quantity"": 100, ""unit"": ""kg""},
{""product"": ""alma"", ""quantity"": 50, ""unit"": ""kg""}
{""product"": ""narancs"", ""quantity"": 100},
{""product"": ""alma"", ""quantity"": 50}
]
EXAMPLES:
Input: 'narancs száz kilogram és alma ötven kiló'
Output: [{""product"":""narancs"",""quantity"":100,""unit"":""kg""},{""product"":""alma"",""quantity"":50,""unit"":""kg""}]
Output: [{""product"":""narancs"",""quantity"":100},{""product"":""alma"",""quantity"":50}]
Input: 'Kérek 200 kg narancsot meg 150 kg almát'
Output: [{""product"":""narancs"",""quantity"":200,""unit"":""kg""},{""product"":""alma"",""quantity"":150,""unit"":""kg""}]";
Input: 'fokhagyma, 1'
Output: [{""product"":""fokhagyma"",""quantity"":1}]
Input: 'paprika, öt rekesz'
Output: [{""product"":""paprika"",""quantity"":5}]
Input: 'mondarin öt rekesz' (typo in 'mandarin')
Output: [{""product"":""mandarin"",""quantity"":5}]
Input: 'menta, 1'
Output: [{""product"":""menta"",""quantity"":1}]
Input: 'datója tíz láda' (WRONG transcription)
Output: [{""product"":""datolya"",""quantity"":10}]
Input: 'szűlő ötven kiló' (WRONG transcription)
Output: [{""product"":""szőlő"",""quantity"":50}]";
var userPrompt = $"Parse this: {text}";
@ -417,18 +580,22 @@ Output: [{""product"":""narancs"",""quantity"":200,""unit"":""kg""},{""product""
/// <summary>
/// Enrich parsed products with actual product data from database
/// Returns ALL matching products so admin can select the exact one
/// </summary>
private async Task<List<object>> EnrichProductData(List<ParsedProduct> parsedProducts)
{
var enrichedProducts = new List<object>();
// OPTIMIZATION: Load all ProductDtos once instead of querying one by one
var helperProductDtos = await _dbContext.ProductDtos.GetAll(true).ToListAsync();
foreach (var parsed in parsedProducts)
{
// Search for matching product in database
// Search for ALL matching products in database
var products = await _productService.SearchProductsAsync(
keywords: parsed.Product,
pageIndex: 0,
pageSize: 5);
pageSize: 20); // Get up to 20 matches
if (!products.Any())
{
@ -436,37 +603,56 @@ Output: [{""product"":""narancs"",""quantity"":200,""unit"":""kg""},{""product""
continue;
}
// Take the best match (first result)
var product = products.First();
var productDto = await _dbContext.ProductDtos.GetByIdAsync(product.Id);
Console.WriteLine($"[VoiceOrder] Found {products.Count()} products matching '{parsed.Product}'");
if (productDto == null)
// Add ALL matching products for admin to choose from
foreach (var product in products)
{
Console.WriteLine($"[VoiceOrder] ProductDto not found for product ID: {product.Id}");
continue;
var productDto = helperProductDtos.FirstOrDefault(x => x.Id == product.Id);
if (productDto == null)
{
Console.WriteLine($"[VoiceOrder] ProductDto not found for product ID: {product.Id}");
continue;
}
// Check if enough stock
var availableQuantity = product.StockQuantity + productDto.IncomingQuantity;
if (availableQuantity <= 0)
{
Console.WriteLine($"[VoiceOrder] Product {product.Name} has no stock - skipping");
continue;
}
// Validate requested quantity against available stock
var requestedQuantity = parsed.Quantity;
var finalQuantity = requestedQuantity;
var isQuantityReduced = false;
if (requestedQuantity > availableQuantity)
{
Console.WriteLine($"[VoiceOrder] WARNING: Product {product.Name} - Requested {requestedQuantity} but only {availableQuantity} available. Capping to available.");
finalQuantity = availableQuantity;
isQuantityReduced = true;
}
// Add to enriched list with validated quantity
enrichedProducts.Add(new
{
id = product.Id,
name = product.Name,
sku = product.Sku,
quantity = finalQuantity, // Use validated quantity (capped to available)
requestedQuantity = requestedQuantity, // Original requested amount
price = product.Price,
stockQuantity = availableQuantity,
searchTerm = parsed.Product, // Track what was searched for
isQuantityReduced = isQuantityReduced // Flag if we had to reduce
});
}
// Check if enough stock
var availableQuantity = product.StockQuantity + productDto.IncomingQuantity;
if (availableQuantity <= 0)
{
Console.WriteLine($"[VoiceOrder] Product {product.Name} has no stock");
continue;
}
// Add to enriched list
enrichedProducts.Add(new
{
id = product.Id,
name = product.Name,
sku = product.Sku,
quantity = parsed.Quantity,
unit = parsed.Unit,
price = product.Price,
stockQuantity = availableQuantity
});
}
Console.WriteLine($"[VoiceOrder] Total enriched products to display: {enrichedProducts.Count}");
return enrichedProducts;
}
@ -476,9 +662,11 @@ Output: [{""product"":""narancs"",""quantity"":200,""unit"":""kg""},{""product""
private class ParsedProduct
{
[System.Text.Json.Serialization.JsonPropertyName("product")]
public string Product { get; set; }
[System.Text.Json.Serialization.JsonPropertyName("quantity")]
public int Quantity { get; set; }
public string Unit { get; set; }
}
#endregion

View File

@ -365,6 +365,13 @@ namespace Nop.Plugin.Misc.FruitBankPlugin.Controllers
_logger.Detail($"AddShippingDocument invoked; id: {shippingDocument.Id}");
await ctx.ShippingDocuments.InsertAsync(shippingDocument);
foreach (var item in shippingDocument.ShippingItems)
{
var product = await ctx.Products.GetByIdAsync(item.ProductId);
product.ProductCost = Convert.ToDecimal(item.UnitPriceOnDocument);
}
return await ctx.ShippingDocuments.GetByIdAsync(shippingDocument.Id, shippingDocument.Shipping != null || shippingDocument.Partner != null);
}

View File

@ -53,8 +53,9 @@ namespace Nop.Plugin.Misc.FruitBankPlugin.Services
/// <param name="audioStream">The audio file stream</param>
/// <param name="fileName">The original filename (used to determine format)</param>
/// <param name="language">Optional language code (e.g., "en", "hu"). If null, auto-detects.</param>
/// <param name="customPrompt">Optional vocabulary hints to help Whisper recognize specific words/names</param>
/// <returns>The transcribed text</returns>
public async Task<string?> TranscribeAudioAsync(Stream audioStream, string fileName, string? language = null)
public async Task<string?> TranscribeAudioAsync(Stream audioStream, string fileName, string? language = null, string? customPrompt = null)
{
try
{
@ -88,6 +89,22 @@ namespace Nop.Plugin.Misc.FruitBankPlugin.Services
form.Add(new StringContent(language), "language");
}
// Add prompt with vocabulary hints for better transcription
// Use custom prompt if provided, otherwise use default produce vocabulary
var prompt = customPrompt ??
"Gyümölcsök és zöldségek: narancs, mandarin, citrom, alma, körte, szőlő, datolya, áfonya, " +
"paprika, fokhagyma, hagyma, paradicsom, uborka, saláta, menta, bazsalikom, " +
"mennyiségek: rekesz, láda, kilogram, darab";
// Truncate prompt if too long (Whisper has a limit)
if (prompt.Length > 224)
{
prompt = prompt.Substring(0, 224);
Console.WriteLine($"[Whisper] Prompt truncated to 224 characters");
}
form.Add(new StringContent(prompt), "prompt");
// Optional: Add response format (json is default, can also be text, srt, verbose_json, or vtt)
form.Add(new StringContent("json"), "response_format");
@ -806,7 +823,8 @@ namespace Nop.Plugin.Misc.FruitBankPlugin.Services
"'name' (string), " +
"'quantity' (int - the number of cartons, boxes or packages), " +
"'netWeight' (double - the net kilograms), " +
"'grossWeight' (double - the gross kilograms).\r \n \n" +
"'grossWeight' (double - the gross kilograms)," +
"'unitCost (double - the unit price of the product on the document)'.\r \n \n" +
"";
string systemPrompt = "You are an AI assistant of FRUITBANK that extracts text and structured data from images. " +