112 lines
3.3 KiB
Plaintext
112 lines
3.3 KiB
Plaintext
@page "/pdf-form-extractor"
|
|
@using System.IO
|
|
@using BLAIzor.Models
|
|
@using BLAIzor.Services
|
|
@using Microsoft.AspNetCore.Components.Forms
|
|
@using System.Text
|
|
@using UglyToad.PdfPig
|
|
@inject ContentEditorAIService ContentEditorAIService
|
|
|
|
<h3>PDF Form Extractor</h3>
|
|
|
|
<InputFile OnChange="HandleFileSelected" />
|
|
|
|
@if (!string.IsNullOrEmpty(PdfText))
|
|
{
|
|
<h5>Extracted Text:</h5>
|
|
<InputTextArea @bind-Value="@PdfText" style="width: 100%"></InputTextArea>
|
|
}
|
|
|
|
@if (ExtractedFields.Any())
|
|
{
|
|
<h5>Extracted Fields:</h5>
|
|
<ul>
|
|
@foreach (var field in ExtractedFields)
|
|
{
|
|
<li><strong>@field.GroupName:</strong> @field.Fields.Count (@field.Repeatable)</li>
|
|
}
|
|
</ul>
|
|
|
|
<DynamicEditFormFromDescription Groups="@ExtractedFields" isEditing=true></DynamicEditFormFromDescription>
|
|
}
|
|
|
|
@code {
|
|
private string PdfText = string.Empty;
|
|
private List<FormFieldGroup> ExtractedFields = new();
|
|
|
|
private async Task HandleFileSelected(InputFileChangeEventArgs e)
|
|
{
|
|
var file = e.File;
|
|
if (file == null || !file.ContentType.Contains("pdf")) return;
|
|
|
|
var tempPath = Path.Combine(Path.GetTempPath(), file.Name);
|
|
|
|
await using (var stream = file.OpenReadStream())
|
|
await using (var fileStream = File.Create(tempPath))
|
|
{
|
|
await stream.CopyToAsync(fileStream);
|
|
}
|
|
|
|
PdfText = ExtractPdfText(tempPath);
|
|
|
|
// Call OpenAI to analyze and extract structured fields
|
|
ExtractedFields = await CallAiForFieldExtraction(PdfText);
|
|
}
|
|
|
|
private string ExtractPdfText(string filePath)
|
|
{
|
|
var sb = new StringBuilder();
|
|
using (var document = PdfDocument.Open(filePath))
|
|
{
|
|
foreach (var page in document.GetPages())
|
|
{
|
|
sb.AppendLine(page.Text);
|
|
}
|
|
}
|
|
return sb.ToString();
|
|
}
|
|
|
|
// private async Task<List<FormField>> CallAiForFieldExtraction(string plainText)
|
|
// {
|
|
|
|
// var jsonResult = await ContentEditorService.AnalyzeFormFieldsFromText(plainText);
|
|
// Console.Write($"Pdf form result: {jsonResult}");
|
|
// try
|
|
// {
|
|
// var fields = System.Text.Json.JsonSerializer.Deserialize<List<FormField>>(jsonResult, new System.Text.Json.JsonSerializerOptions
|
|
// {
|
|
// PropertyNameCaseInsensitive = true
|
|
// });
|
|
|
|
// return fields ?? new List<FormField>();
|
|
// }
|
|
// catch
|
|
// {
|
|
// // fallback if something goes wrong
|
|
// return new List<FormField>();
|
|
// }
|
|
// }
|
|
|
|
private async Task<List<FormFieldGroup>> CallAiForFieldExtraction(string plainText)
|
|
{
|
|
var jsonResult = await ContentEditorAIService.AnalyzeGroupedFormFieldsFromText(plainText);
|
|
Console.WriteLine($"📄 PDF form AI response: {jsonResult}");
|
|
|
|
try
|
|
{
|
|
var groups = System.Text.Json.JsonSerializer.Deserialize<List<FormFieldGroup>>(jsonResult, new System.Text.Json.JsonSerializerOptions
|
|
{
|
|
PropertyNameCaseInsensitive = true
|
|
});
|
|
|
|
return groups ?? new List<FormFieldGroup>();
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
Console.WriteLine($"❌ Error parsing AI JSON: {ex.Message}");
|
|
return new List<FormFieldGroup>();
|
|
}
|
|
}
|
|
|
|
|
|
} |