SeemGen/Components/Pages/Pdf-form-extractor.razor

112 lines
3.3 KiB
Plaintext

@page "/pdf-form-extractor"
@using System.IO
@using BLAIzor.Models
@using BLAIzor.Services
@using Microsoft.AspNetCore.Components.Forms
@using System.Text
@using UglyToad.PdfPig
@inject ContentEditorAIService ContentEditorAIService
<h3>PDF Form Extractor</h3>
<InputFile OnChange="HandleFileSelected" />
@if (!string.IsNullOrEmpty(PdfText))
{
<h5>Extracted Text:</h5>
<InputTextArea @bind-Value="@PdfText" style="width: 100%"></InputTextArea>
}
@if (ExtractedFields.Any())
{
<h5>Extracted Fields:</h5>
<ul>
@foreach (var field in ExtractedFields)
{
<li><strong>@field.GroupName:</strong> @field.Fields.Count (@field.Repeatable)</li>
}
</ul>
<DynamicEditFormFromDescription Groups="@ExtractedFields" isEditing=true></DynamicEditFormFromDescription>
}
@code {
private string PdfText = string.Empty;
private List<FormFieldGroup> ExtractedFields = new();
private async Task HandleFileSelected(InputFileChangeEventArgs e)
{
var file = e.File;
if (file == null || !file.ContentType.Contains("pdf")) return;
var tempPath = Path.Combine(Path.GetTempPath(), file.Name);
await using (var stream = file.OpenReadStream())
await using (var fileStream = File.Create(tempPath))
{
await stream.CopyToAsync(fileStream);
}
PdfText = ExtractPdfText(tempPath);
// Call OpenAI to analyze and extract structured fields
ExtractedFields = await CallAiForFieldExtraction(PdfText);
}
private string ExtractPdfText(string filePath)
{
var sb = new StringBuilder();
using (var document = PdfDocument.Open(filePath))
{
foreach (var page in document.GetPages())
{
sb.AppendLine(page.Text);
}
}
return sb.ToString();
}
// private async Task<List<FormField>> CallAiForFieldExtraction(string plainText)
// {
// var jsonResult = await ContentEditorService.AnalyzeFormFieldsFromText(plainText);
// Console.Write($"Pdf form result: {jsonResult}");
// try
// {
// var fields = System.Text.Json.JsonSerializer.Deserialize<List<FormField>>(jsonResult, new System.Text.Json.JsonSerializerOptions
// {
// PropertyNameCaseInsensitive = true
// });
// return fields ?? new List<FormField>();
// }
// catch
// {
// // fallback if something goes wrong
// return new List<FormField>();
// }
// }
private async Task<List<FormFieldGroup>> CallAiForFieldExtraction(string plainText)
{
var jsonResult = await ContentEditorAIService.AnalyzeGroupedFormFieldsFromText(plainText);
Console.WriteLine($"📄 PDF form AI response: {jsonResult}");
try
{
var groups = System.Text.Json.JsonSerializer.Deserialize<List<FormFieldGroup>>(jsonResult, new System.Text.Json.JsonSerializerOptions
{
PropertyNameCaseInsensitive = true
});
return groups ?? new List<FormFieldGroup>();
}
catch (Exception ex)
{
Console.WriteLine($"❌ Error parsing AI JSON: {ex.Message}");
return new List<FormFieldGroup>();
}
}
}