Mango.Nop.Plugins/Nop.Plugin.Misc.AIPlugin/Services/OpenAIApiService.cs

900 lines
37 KiB
C#

using Nop.Plugin.Misc.FruitBankPlugin.Models;
using Nop.Services.Configuration;
using System.Net.Http.Headers;
using System.Text;
using System.Text.Json;
using AyCode.Utils.Extensions;
using Nop.Plugin.Misc.FruitBank.Controllers;
#nullable enable
namespace Nop.Plugin.Misc.FruitBankPlugin.Services
{
public class OpenAIApiService : IAIAPIService
{
private readonly ISettingService _settingService;
private readonly FruitBankSettings _fruitBankSettings;
private readonly HttpClient _httpClient;
private static Action<string, string>? _callback;
private static Action<string>? _onComplete;
private static Action<string, string>? _onError;
private const string OpenAiEndpoint = "https://api.openai.com/v1/chat/completions";
private const string OpenAiImageEndpoint = "https://api.openai.com/v1/images/generations";
private const string OpenAiFileEndpoint = "https://api.openai.com/v1/files";
private const string OpenAiAudioEndpoint = "https://api.openai.com/v1/audio/transcriptions";
private const string BaseUrl = "https://api.openai.com/v1";
private string? _assistantId;
private string? _vectorStoreId;
public OpenAIApiService(ISettingService settingService, HttpClient httpClient)
{
_settingService = settingService;
_fruitBankSettings = _settingService.LoadSetting<FruitBankSettings>();
_httpClient = httpClient;
_httpClient.DefaultRequestHeaders.Add("Authorization", $"Bearer {GetApiKey()}");
}
public string GetApiKey() => _fruitBankSettings.OpenAIApiKey;
public string GetModelName() => _fruitBankSettings.OpenAIModelName;
public void RegisterCallback(Action<string, string> callback, Action<string> onCompleteCallback, Action<string, string> onErrorCallback)
{
_callback = callback;
_onComplete = onCompleteCallback;
_onError = onErrorCallback;
}
#region === AUDIO TRANSCRIPTION ===
/// <summary>
/// Transcribe audio file to text using OpenAI Whisper API
/// </summary>
/// <param name="audioStream">The audio file stream</param>
/// <param name="fileName">The original filename (used to determine format)</param>
/// <param name="language">Optional language code (e.g., "en", "hu"). If null, auto-detects.</param>
/// <returns>The transcribed text</returns>
public async Task<string?> TranscribeAudioAsync(Stream audioStream, string fileName, string? language = null)
{
try
{
using var form = new MultipartFormDataContent();
// Add the audio file
var audioContent = new StreamContent(audioStream);
// Determine content type based on file extension
var extension = Path.GetExtension(fileName).ToLowerInvariant();
audioContent.Headers.ContentType = extension switch
{
".mp3" => new MediaTypeHeaderValue("audio/mpeg"),
".mp4" => new MediaTypeHeaderValue("audio/mp4"),
".mpeg" => new MediaTypeHeaderValue("audio/mpeg"),
".mpga" => new MediaTypeHeaderValue("audio/mpeg"),
".m4a" => new MediaTypeHeaderValue("audio/m4a"),
".wav" => new MediaTypeHeaderValue("audio/wav"),
".webm" => new MediaTypeHeaderValue("audio/webm"),
_ => new MediaTypeHeaderValue("application/octet-stream")
};
form.Add(audioContent, "file", fileName);
// Add model
form.Add(new StringContent("whisper-1"), "model");
// Add language if specified
if (!string.IsNullOrEmpty(language))
{
form.Add(new StringContent(language), "language");
}
// Optional: Add response format (json is default, can also be text, srt, verbose_json, or vtt)
form.Add(new StringContent("json"), "response_format");
var response = await _httpClient.PostAsync(OpenAiAudioEndpoint, form);
if (!response.IsSuccessStatusCode)
{
var error = await response.Content.ReadAsStringAsync();
Console.WriteLine($"Audio transcription failed: {error}");
return null;
}
await using var contentStream = await response.Content.ReadAsStreamAsync();
using var json = await JsonDocument.ParseAsync(contentStream);
var transcription = json.RootElement.GetProperty("text").GetString();
Console.WriteLine($"Audio transcription successful. Length: {transcription?.Length ?? 0} characters");
return transcription;
}
catch (Exception ex)
{
Console.Error.WriteLine($"Error transcribing audio: {ex}");
return null;
}
}
#endregion
#region === CHAT (TEXT INPUT) ===
public async Task<string?> GetSimpleResponseAsync(string systemMessage, string userMessage, string? assistantMessage = null)
{
var modelName = GetModelName();
StringContent requestContent;
if (modelName is "gpt-4.1-mini" or "gpt-4o-mini" or "gpt-4.1-nano" or "gpt-5-nano")
{
var requestBody = new OpenAIGpt4MiniAIChatRequest
{
Model = modelName,
Temperature = 0.2,
Messages = string.IsNullOrEmpty(assistantMessage)
?
[
new AIChatMessage { Role = "system", Content = systemMessage },
new AIChatMessage { Role = "user", Content = userMessage }
]
:
[
new AIChatMessage { Role = "system", Content = systemMessage },
new AIChatMessage { Role = "assistant", Content = assistantMessage },
new AIChatMessage { Role = "user", Content = userMessage }
],
Stream = false
};
var requestJson = JsonSerializer.Serialize(requestBody, new JsonSerializerOptions
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase
});
requestContent = new StringContent(requestJson, Encoding.UTF8, "application/json");
}
else
{
var requestBody = new OpenAIGpt5AIChatRequest
{
Model = modelName,
Temperature = 1,
Messages = string.IsNullOrEmpty(assistantMessage)
?
[
new AIChatMessage { Role = "system", Content = systemMessage },
new AIChatMessage { Role = "user", Content = userMessage }
]
:
[
new AIChatMessage { Role = "system", Content = systemMessage },
new AIChatMessage { Role = "assistant", Content = assistantMessage },
new AIChatMessage { Role = "user", Content = userMessage }
],
ReasoningEffort = "minimal",
Verbosity = "high",
Stream = false
};
var requestJson = JsonSerializer.Serialize(requestBody, new JsonSerializerOptions
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase
});
requestContent = new StringContent(requestJson, Encoding.UTF8, "application/json");
}
try
{
using var response = await _httpClient.PostAsync(OpenAiEndpoint, requestContent);
response.EnsureSuccessStatusCode();
await using var responseStream = await response.Content.ReadAsStreamAsync();
using var document = await JsonDocument.ParseAsync(responseStream);
var inputTokens = document.RootElement.GetProperty("usage").GetProperty("prompt_tokens").GetInt32();
var outputTokens = document.RootElement.GetProperty("usage").GetProperty("completion_tokens").GetInt32();
Console.WriteLine($"USAGE STATS - Tokens: {inputTokens} + {outputTokens} = {inputTokens + outputTokens}");
return document.RootElement
.GetProperty("choices")[0]
.GetProperty("message")
.GetProperty("content")
.GetString() ?? "No response";
}
catch (Exception ex)
{
Console.Error.WriteLine($"{ex}");
return null;
}
}
#endregion
#region === CHAT (STREAMING) ===
public async Task<string> GetStreamedResponseAsync(string sessionId, string systemMessage, string userMessage, string? assistantMessage = null)
{
var modelName = GetModelName();
StringContent requestContent = new("");
if (modelName == "gpt-4.1-mini" || modelName == "gpt-4o-mini" || modelName == "gpt-4.1-nano" || modelName == "gpt-5-nano")
{
var requestBody = new OpenAIGpt4MiniAIChatRequest
{
Model = modelName,
Temperature = 0.2,
Messages = string.IsNullOrEmpty(assistantMessage)
?
[
new AIChatMessage { Role = "system", Content = systemMessage },
new AIChatMessage { Role = "user", Content = userMessage }
]
:
[
new AIChatMessage { Role = "system", Content = systemMessage },
new AIChatMessage { Role = "assistant", Content = assistantMessage },
new AIChatMessage { Role = "user", Content = userMessage }
],
Stream = true
};
var requestJson = JsonSerializer.Serialize(requestBody, new JsonSerializerOptions
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase
});
requestContent = new StringContent(requestJson, Encoding.UTF8, "application/json");
}
else
{
var requestBody = new OpenAIGpt5AIChatRequest
{
Model = modelName,
Temperature = 1,
Messages = string.IsNullOrEmpty(assistantMessage)
?
[
new AIChatMessage { Role = "system", Content = systemMessage },
new AIChatMessage { Role = "user", Content = userMessage }
]
:
[
new AIChatMessage { Role = "system", Content = systemMessage },
new AIChatMessage { Role = "assistant", Content = assistantMessage },
new AIChatMessage { Role = "user", Content = userMessage }
],
Stream = true
};
var requestJson = JsonSerializer.Serialize(requestBody, new JsonSerializerOptions
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase
});
requestContent = new StringContent(requestJson, Encoding.UTF8, "application/json");
}
using var httpRequest = new HttpRequestMessage(HttpMethod.Post, OpenAiEndpoint);
httpRequest.Content = new StringContent("");
using var response = await _httpClient.SendAsync(httpRequest, HttpCompletionOption.ResponseHeadersRead);
response.EnsureSuccessStatusCode();
var stringBuilder = new StringBuilder();
await using var responseStream = await response.Content.ReadAsStreamAsync();
using var reader = new StreamReader(responseStream);
try
{
while (!reader.EndOfStream)
{
var line = await reader.ReadLineAsync();
if (string.IsNullOrWhiteSpace(line) || !line.StartsWith("data: ")) continue;
var jsonResponse = line[6..];
if (jsonResponse == "[DONE]")
{
_onComplete?.Invoke(sessionId);
break;
}
try
{
using var jsonDoc = JsonDocument.Parse(jsonResponse);
if (jsonDoc.RootElement.TryGetProperty("choices", out var choices) &&
choices[0].TryGetProperty("delta", out var delta) &&
delta.TryGetProperty("content", out var contentElement))
{
var content = contentElement.GetString();
if (!string.IsNullOrEmpty(content))
{
stringBuilder.Append(content);
_callback?.Invoke(sessionId, stringBuilder.ToString());
}
}
}
catch (JsonException ex)
{
_onError?.Invoke(sessionId, $"Malformed JSON: {ex.Message}");
break;
}
}
if (reader.EndOfStream && !stringBuilder.ToString().EndsWith("[DONE]"))
{
_onError?.Invoke(sessionId, "Unexpected end of stream");
}
}
catch (Exception ex)
{
_onError?.Invoke(sessionId, $"Exception: {ex.Message}");
}
return stringBuilder.ToString();
}
#endregion
#region === IMAGE GENERATION ===
public async Task<string?> GenerateImageAsync(string prompt)
{
var request = new HttpRequestMessage(HttpMethod.Post, OpenAiImageEndpoint);
var requestBody = new
{
model = "gpt-image-1",
prompt = prompt,
n = 1,
size = "1024x1024"
};
request.Content = new StringContent(JsonSerializer.Serialize(requestBody), Encoding.UTF8, "application/json");
var response = await _httpClient.SendAsync(request);
if (!response.IsSuccessStatusCode)
{
var error = await response.Content.ReadAsStringAsync();
Console.WriteLine($"Image generation failed: {error}");
return null;
}
await using var content = await response.Content.ReadAsStreamAsync();
var json = await JsonDocument.ParseAsync(content);
var base64Image = json.RootElement
.GetProperty("data")[0]
.GetProperty("b64_json")
.GetString();
return $"data:image/png;base64,{base64Image}";
}
#endregion
#region === PDF ANALYSIS (NEW) ===
private async Task EnsureAssistantAndVectorStoreAsync()
{
// Find or create vector store
_vectorStoreId ??= await FindOrCreateVectorStoreAsync("pdf-analysis-store");
// Find or create assistant
_assistantId ??= await FindOrCreateAssistantAsync("PDF and Image Analyzer Assistant");
}
public async Task CleanupAllVectorStoresAsync()
{
Console.WriteLine("Cleaning up all existing vector stores...");
var listRequest = new HttpRequestMessage(HttpMethod.Get, $"{BaseUrl}/vector_stores");
listRequest.Headers.Add("OpenAI-Beta", "assistants=v2");
var response = await _httpClient.SendAsync(listRequest);
if (response.IsSuccessStatusCode)
{
using var json = await JsonDocument.ParseAsync(await response.Content.ReadAsStreamAsync());
var vectorStores = json.RootElement.GetProperty("data");
foreach (var vectorStore in vectorStores.EnumerateArray())
{
var id = vectorStore.GetProperty("id").GetString();
var name = vectorStore.TryGetProperty("name", out var nameElement) && nameElement.ValueKind != JsonValueKind.Null
? nameElement.GetString()
: "Unnamed";
var deleteRequest = new HttpRequestMessage(HttpMethod.Delete, $"{BaseUrl}/vector_stores/{id}");
deleteRequest.Headers.Add("OpenAI-Beta", "assistants=v2");
await _httpClient.SendAsync(deleteRequest);
Console.WriteLine($"Deleted vector store: {name} ({id})");
}
Console.WriteLine("Vector store cleanup complete!");
}
}
//TEMPORARY: Cleanup all assistants (for testing purposes) - A.
public async Task CleanupAllAssistantsAsync()
{
Console.WriteLine("Cleaning up all existing assistants...");
var listRequest = new HttpRequestMessage(HttpMethod.Get, $"{BaseUrl}/assistants");
listRequest.Headers.Add("OpenAI-Beta", "assistants=v2");
var response = await _httpClient.SendAsync(listRequest);
if (response.IsSuccessStatusCode)
{
using var json = await JsonDocument.ParseAsync(await response.Content.ReadAsStreamAsync());
var assistants = json.RootElement.GetProperty("data");
foreach (var assistant in assistants.EnumerateArray())
{
var id = assistant.GetProperty("id").GetString();
var name = assistant.GetProperty("name").GetString();
var deleteRequest = new HttpRequestMessage(HttpMethod.Delete, $"{BaseUrl}/assistants/{id}");
deleteRequest.Headers.Add("OpenAI-Beta", "assistants=v2");
await _httpClient.SendAsync(deleteRequest);
Console.WriteLine($"Deleted assistant: {name} ({id})");
}
Console.WriteLine("Cleanup complete!");
}
// Reset local cache
_assistantId = null;
}
public async Task<string?> AnalyzePdfAsync(Stream file, string fileName, string userPrompt)
{
await EnsureAssistantAndVectorStoreAsync();
var fileId = await UploadFileAsync(file, fileName);
var isImage = IsImageFile(fileName);
if (!isImage)
{
await AttachFileToVectorStoreAsync(fileId);
}
var threadId = await CreateThreadAsync();
if (isImage)
{
await AddUserMessageWithImageAsync(threadId, userPrompt, fileId);
}
else
{
await AddUserMessageAsync(threadId, userPrompt);
}
var runId = await CreateRunAsync(threadId);
await WaitForRunCompletionAsync(threadId, runId);
return await GetAssistantResponseAsync(threadId);
}
private static bool IsImageFile(string fileName)
{
var extension = Path.GetExtension(fileName).ToLowerInvariant();
return extension == ".jpg" || extension == ".jpeg" || extension == ".png" || extension == ".gif" || extension == ".webp";
}
private async Task<string> UploadFileAsync(Stream file, string fileName)
{
using var form = new MultipartFormDataContent();
var fileContent = new StreamContent(file);
// Determine MIME type based on file extension
var extension = Path.GetExtension(fileName).ToLowerInvariant();
fileContent.Headers.ContentType = extension switch
{
".pdf" => new MediaTypeHeaderValue("application/pdf"),
".jpg" or ".jpeg" => new MediaTypeHeaderValue("image/jpeg"),
".png" => new MediaTypeHeaderValue("image/png"),
".gif" => new MediaTypeHeaderValue("image/gif"),
".webp" => new MediaTypeHeaderValue("image/webp"),
_ => new MediaTypeHeaderValue("application/octet-stream")
};
form.Add(fileContent, "file", fileName);
form.Add(new StringContent("assistants"), "purpose");
var response = await _httpClient.PostAsync($"{BaseUrl}/files", form);
await EnsureSuccessAsync(response, "upload file");
using var json = await JsonDocument.ParseAsync(await response.Content.ReadAsStreamAsync());
return json.RootElement.GetProperty("id").GetString()!;
}
private async Task AttachFileToVectorStoreAsync(string fileId)
{
var body = new { file_id = fileId };
var request = CreateAssistantRequest(
HttpMethod.Post,
$"{BaseUrl}/vector_stores/{_vectorStoreId}/files",
body
);
var response = await _httpClient.SendAsync(request);
await EnsureSuccessAsync(response, "attach file to vector store");
}
private async Task<string> CreateThreadAsync()
{
var request = CreateAssistantRequest(
HttpMethod.Post,
$"{BaseUrl}/threads",
new { }
);
var response = await _httpClient.SendAsync(request);
await EnsureSuccessAsync(response, "create thread");
using var json = await JsonDocument.ParseAsync(await response.Content.ReadAsStreamAsync());
return json.RootElement.GetProperty("id").GetString()!;
}
private async Task AddUserMessageAsync(string threadId, string userPrompt)
{
var body = new
{
role = "user",
content = userPrompt
};
var request = CreateAssistantRequest(
HttpMethod.Post,
$"{BaseUrl}/threads/{threadId}/messages",
body
);
var response = await _httpClient.SendAsync(request);
await EnsureSuccessAsync(response, "add user message");
}
private async Task AddUserMessageWithImageAsync(string threadId, string userPrompt, string fileId)
{
var body = new
{
role = "user",
content = new object[]
{
new { type = "text", text = userPrompt },
new { type = "image_file", image_file = new { file_id = fileId } }
}
};
var request = CreateAssistantRequest(
HttpMethod.Post,
$"{BaseUrl}/threads/{threadId}/messages",
body
);
var response = await _httpClient.SendAsync(request);
await EnsureSuccessAsync(response, "add user message with image");
}
private async Task<string> CreateRunAsync(string threadId)
{
var body = new
{
assistant_id = _assistantId,
tool_resources = new
{
file_search = new
{
vector_store_ids = new[] { _vectorStoreId }
}
}
};
var request = CreateAssistantRequest(
HttpMethod.Post,
$"{BaseUrl}/threads/{threadId}/runs",
body
);
var response = await _httpClient.SendAsync(request);
await EnsureSuccessAsync(response, "create run");
using var json = await JsonDocument.ParseAsync(await response.Content.ReadAsStreamAsync());
return json.RootElement.GetProperty("id").GetString()!;
}
private async Task WaitForRunCompletionAsync(string threadId, string runId)
{
const int pollIntervalMs = 1000;
const int maxAttempts = 60; // 1 minute timeout
var attempts = 0;
while (attempts < maxAttempts)
{
var request = CreateAssistantRequest(
HttpMethod.Get,
$"{BaseUrl}/threads/{threadId}/runs/{runId}"
);
var response = await _httpClient.SendAsync(request);
await EnsureSuccessAsync(response, "check run status");
using var json = await JsonDocument.ParseAsync(await response.Content.ReadAsStreamAsync());
var status = json.RootElement.GetProperty("status").GetString()!;
if (status == "completed")
return;
if (status != "in_progress" && status != "queued")
throw new Exception($"Run failed with status: {status}");
await Task.Delay(pollIntervalMs);
attempts++;
}
throw new TimeoutException("Run did not complete within the expected time");
}
private async Task<string?> GetAssistantResponseAsync(string threadId)
{
var request = CreateAssistantRequest(
HttpMethod.Get,
$"{BaseUrl}/threads/{threadId}/messages"
);
var response = await _httpClient.SendAsync(request);
await EnsureSuccessAsync(response, "retrieve messages");
using var json = await JsonDocument.ParseAsync(await response.Content.ReadAsStreamAsync());
var messages = json.RootElement.GetProperty("data");
if (messages.GetArrayLength() == 0)
return "No response";
var firstMessage = messages[0]
.GetProperty("content")[0]
.GetProperty("text")
.GetProperty("value")
.GetString();
return firstMessage ?? "No response";
}
private static HttpRequestMessage CreateAssistantRequest(HttpMethod method, string url, object? body = null)
{
var request = new HttpRequestMessage(method, url);
request.Headers.Add("OpenAI-Beta", "assistants=v2");
if (body != null)
{
var json = JsonSerializer.Serialize(body, new JsonSerializerOptions
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase
});
request.Content = new StringContent(json, Encoding.UTF8, "application/json");
}
return request;
}
private static async Task EnsureSuccessAsync(HttpResponseMessage response, string operation)
{
if (!response.IsSuccessStatusCode)
{
var errorBody = await response.Content.ReadAsStringAsync();
Console.WriteLine($"Error Status: {response.StatusCode}");
Console.WriteLine($"Error Body: {errorBody}");
throw new Exception($"Failed to {operation}: {errorBody}");
}
}
private async Task<string> FindOrCreateVectorStoreAsync(string name)
{
// List existing vector stores
var listRequest = new HttpRequestMessage(HttpMethod.Get, $"{BaseUrl}/vector_stores");
listRequest.Headers.Add("OpenAI-Beta", "assistants=v2");
var response = await _httpClient.SendAsync(listRequest);
if (response.IsSuccessStatusCode)
{
using var json = await JsonDocument.ParseAsync(await response.Content.ReadAsStreamAsync());
var stores = json.RootElement.GetProperty("data");
//var getString = stores.EnumerateArray().FirstOrDefault(store => store.GetProperty("name").GetString() == name)?.GetProperty("id").GetString();
//if (!getString.IsNullOrWhiteSpace()) return getString;
foreach (var store in stores.EnumerateArray().Where(store => store.GetProperty("name").GetString() == name))
{
return store.GetProperty("id").GetString()!;
}
}
// Create new if not found
var createBody = new { name = name };
var createRequest = CreateAssistantRequest(HttpMethod.Post, $"{BaseUrl}/vector_stores", createBody);
var createResponse = await _httpClient.SendAsync(createRequest);
await EnsureSuccessAsync(createResponse, "create vector store");
using var createJson = await JsonDocument.ParseAsync(await createResponse.Content.ReadAsStreamAsync());
return createJson.RootElement.GetProperty("id").GetString()!;
}
private async Task<string> FindOrCreateAssistantAsync(string name)
{
// List existing assistants
var listRequest = new HttpRequestMessage(HttpMethod.Get, $"{BaseUrl}/assistants");
listRequest.Headers.Add("OpenAI-Beta", "assistants=v2");
var response = await _httpClient.SendAsync(listRequest);
if (response.IsSuccessStatusCode)
{
using var json = await JsonDocument.ParseAsync(await response.Content.ReadAsStreamAsync());
var assistants = json.RootElement.GetProperty("data");
foreach (var assistant in assistants.EnumerateArray().Where(assistant => assistant.GetProperty("name").GetString() == name))
{
return assistant.GetProperty("id").GetString()!;
}
}
// Create new if not found
var assistantBody = new
{
name = name,
instructions = "You are an assistant that analyzes uploaded files. When you receive an image, analyze and describe what you see in the image in detail. When you receive a PDF or text document, use the file_search tool to find and analyze relevant information. Always respond directly according to the user's instructions about the current file they upload.",
model = "gpt-4o",
tools = new[] { new { type = "file_search" } }
};
var request = CreateAssistantRequest(HttpMethod.Post, $"{BaseUrl}/assistants", assistantBody);
var createResponse = await _httpClient.SendAsync(request);
await EnsureSuccessAsync(createResponse, "create assistant");
using var createJson = await JsonDocument.ParseAsync(await createResponse.Content.ReadAsStreamAsync());
return createJson.RootElement.GetProperty("id").GetString()!;
}
#endregion
#region === IMAGE TEXT EXTRACTION ===
/// <summary>
/// Extract text from an image using OpenAI Vision API
/// </summary>
/// <param name="imageStream">The image file stream</param>
/// <param name="fileName">The original filename</param>
/// <param name="customPrompt">Optional custom prompt for text extraction</param>
/// <returns>Extracted and structured text from the image</returns>
public async Task<string?> ExtractTextFromImageAsync(Stream imageStream, string fileName, string? customPrompt = null)
{
try
{
// Read image bytes from stream
byte[] imgBytes;
using (var memoryStream = new MemoryStream())
{
await imageStream.CopyToAsync(memoryStream);
imgBytes = memoryStream.ToArray();
}
string base64 = Convert.ToBase64String(imgBytes);
// Determine image format
var extension = Path.GetExtension(fileName).ToLowerInvariant();
var mimeType = extension switch
{
".jpg" or ".jpeg" => "image/jpeg",
".png" => "image/png",
".gif" => "image/gif",
".webp" => "image/webp",
_ => "image/jpeg"
};
var prompt = customPrompt ?? "You have 4 tasks: \r \n \n" +
"TASK 1: Extract all text from this image, without modifying or translating it. \n \n" +
"TASK 2: Determine the document identifier number, like orderconfirmation number, or invoice number, or such, that seems to identify the document. \n \n" +
"TASK 3: Extract partner information (name and tax id number). IMPORTANT: You work for Fruitbank, so Fruitbank is not a requested partner, extract the company information of the party OTHER THAN Fruitbank. \n \n" +
"TASK 4: Extract product information. \n \n" +
"Perform the tasks carefully, and format your response as JSON object named 'extractedData' with the following fields in the child objects: " +
"'fullText' (string - the unmodified full text), " +
"'documentId' (string)," +
"'partner' (an object with fields 'name and 'taxId'')" +
"'products' (array of objects with the following fields: " +
"'name' (string), " +
"'quantity' (int - the number of cartons, boxes or packages), " +
"'netWeight' (double - the net kilograms), " +
"'grossWeight' (double - the gross kilograms).\r \n \n" +
"";
string systemPrompt = "You are an AI assistant of FRUITBANK that extracts text and structured data from images. " +
"Carefully analyze the image content to extract all relevant information accurately. " +
"Provide the extracted data in a well-formatted JSON structure as specified.";
var payload = new
{
model = GetModelName(), // Use the configured model
messages = new object[]
{
new {
role = "system",
content = new object[]
{
new { type = "text", text = prompt },
}
},
new {
role = "user",
content = new object[]
{
new { type = "text", text = prompt },
new { type = "image_url", image_url = new { url = $"data:{mimeType};base64,{base64}" } }
}
}
}
};
var json = JsonSerializer.Serialize(payload, new JsonSerializerOptions
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase
});
var response = await _httpClient.PostAsync(
OpenAiEndpoint,
new StringContent(json, Encoding.UTF8, "application/json"));
if (!response.IsSuccessStatusCode)
{
var error = await response.Content.ReadAsStringAsync();
Console.WriteLine($"Image text extraction failed: {error}");
return null;
}
var resultJson = await response.Content.ReadAsStringAsync();
using var doc = JsonDocument.Parse(resultJson);
var inputTokens = doc.RootElement.GetProperty("usage").GetProperty("prompt_tokens").GetInt32();
var outputTokens = doc.RootElement.GetProperty("usage").GetProperty("completion_tokens").GetInt32();
Console.WriteLine($"USAGE STATS - Image OCR Tokens: {inputTokens} + {outputTokens} = {inputTokens + outputTokens}");
string text = doc.RootElement
.GetProperty("choices")[0]
.GetProperty("message")
.GetProperty("content")
.GetString();
return text;
}
catch (Exception ex)
{
Console.Error.WriteLine($"Error extracting text from image: {ex}");
return null;
}
}
#endregion
}
public class OpenaiImageResponse
{
public ExtractedData extractedData { get; set; } = new ExtractedData(); // Changed to property with { get; set; }
}
public class ExtractedData
{
public string? fullText { get; set; } = string.Empty;
public string? documentId { get; set; } = string.Empty;
public PartnerInfo? partner { get; set; } = new PartnerInfo();
public List<ProductReference>? products { get; set; } = new List<ProductReference>();
}
public class PartnerInfo
{
public string? name { get; set; } = string.Empty;
public string? taxId { get; set; } = string.Empty;
}
}