SeemGen/Services/WebsiteContentLoaderService.cs

143 lines
5.3 KiB
C#

using BLAIzor.Data;
using BLAIzor.Models;
using Microsoft.EntityFrameworkCore;
using Qdrant.Client.Grpc;
using System.Linq;
namespace BLAIzor.Services
{
public class WebsiteContentLoaderService
{
private readonly IServiceScopeFactory _scopeFactory;
public WebsiteContentLoaderService(IServiceScopeFactory scopeFactory)
{
_scopeFactory = scopeFactory;
}
public async Task<ContentGroupModel?> LoadAsync(
int siteInfoId,
string contentGroupType,
Func<int, PointId[], Task<List<WebPageContent>>> fetchVectorsFromQdrant)
{
using var scope = _scopeFactory.CreateScope();
var db = scope.ServiceProvider.GetRequiredService<ApplicationDbContext>();
var contentGroup = await db.ContentGroups
.Include(cg => cg.Items)
.ThenInclude(ci => ci.Chunks)
.FirstOrDefaultAsync(cg => cg.SiteInfoId == siteInfoId && cg.Type == contentGroupType);
if (contentGroup == null)
return null;
// Extract PointId[] from all ContentChunks
var allPointIds = contentGroup.Items
.SelectMany(i => i.Chunks)
.Where(c => c.ChunkIndex != null)
.Select(c => new PointId(Convert.ToUInt64(c.ChunkIndex)))
.ToArray();
foreach (var item in contentGroup.Items)
{
foreach (var qdraintpoint in item.Chunks)
{
Console.WriteLine("ContentData: " + item.Title + "," + qdraintpoint.QdrantPointId);
}
}
// Fetch vector data from Qdrant
var vectorData = await fetchVectorsFromQdrant(siteInfoId, allPointIds);
// Map back to chunks and items
var contentItems = contentGroup.Items.Select(ci =>
{
var chunkPoints = ci.Chunks
.Select(chunk =>
{
var match = vectorData.FirstOrDefault(v => v.UId.Trim().ToLowerInvariant() == chunk.QdrantPointId.Trim().ToLowerInvariant());
Console.WriteLine("COMPARE: " + chunk.QdrantPointId + ", " + vectorData.Any(v => v.UId == chunk.QdrantPointId));
//Console.WriteLine("COMPARE: " + chunk.QdrantPointId + ", " + vectorData.FirstOrDefault().UId);
return match;
})
.Where(p => p != null)
.ToList();
return new ContentItemModel
{
ContentItem = ci,
Chunks = ci.Chunks.ToList(),
VectorPoints = chunkPoints!
};
}).ToList();
return new ContentGroupModel
{
SiteInfoId = siteInfoId,
ContentGroup = contentGroup,
ContentItems = contentItems
};
}
public async Task<WebsiteContentModel?> LoadAllAsync(
SiteInfo siteInfo,
Func<SiteInfo, PointId[], Task<List<WebPageContent>>> fetchVectorsFromQdrant)
{
using var scope = _scopeFactory.CreateScope();
var db = scope.ServiceProvider.GetRequiredService<ApplicationDbContext>();
var contentGroups = await db.ContentGroups
.Include(cg => cg.Items)
.ThenInclude(ci => ci.Chunks)
.Where(cg => cg.SiteInfoId == siteInfo.Id)
.ToListAsync();
if (!contentGroups.Any())
return null;
// Get all chunk point IDs across all content groups
var allPointIds = contentGroups
.SelectMany(cg => cg.Items)
.SelectMany(i => i.Chunks)
.Where(c => c.QdrantPointId != null)
//.Select(c => new PointId(Convert.ToUInt64(c.ChunkIndex)))
.Select(c => new PointId(Guid.Parse(c.QdrantPointId)))
.ToArray();
//.Select(c => new PointId(Guid.Parse(c.QdrantPointId)))
//.ToArray();
// Fetch vector data from Qdrant
var vectorData = await fetchVectorsFromQdrant(siteInfo, allPointIds);
// Build full list of content items with resolved vectors
var allContentItems = contentGroups
.SelectMany(group => group.Items.Select(ci =>
{
var chunkPoints = ci.Chunks
.Select(chunk =>
vectorData.FirstOrDefault(v =>
v.UId.Trim().ToLowerInvariant() == chunk.QdrantPointId.Trim().ToLowerInvariant()))
.Where(p => p != null)
.ToList();
return new ContentItemModel
{
ContentItem = ci,
Chunks = ci.Chunks.ToList(),
VectorPoints = chunkPoints
};
}))
.ToList();
return new WebsiteContentModel
{
SiteInfoId = siteInfo.Id,
ContentGroups = contentGroups, // <-- You may need to add this prop to WebsiteContentModel
ContentItems = allContentItems
};
}
}
}