SeemGen/Services/QDrantService.cs

678 lines
24 KiB
C#

using BLAIzor.Models;
using Google.Protobuf;
using Google.Protobuf.Collections;
using Newtonsoft.Json;
using Qdrant.Client;
using Qdrant.Client.Grpc;
using System.Numerics;
using System.Text;
using static Qdrant.Client.Grpc.PointsUpdateOperation.Types;
namespace BLAIzor.Services
{
public class QDrantService
{
public static IConfiguration? _configuration;
private string qdrantUrl = "https://fe7d5c9e-8cd1-4ad9-af5a-af2bf3b93219.europe-west3-0.gcp.cloud.qdrant.io:6333";
private readonly string _qdrantHost = "fe7d5c9e-8cd1-4ad9-af5a-af2bf3b93219.europe-west3-0.gcp.cloud.qdrant.io";
private string _apiKey = "";
public QDrantService(IConfiguration? configuration)
{
_configuration = configuration;
}
private string GetAiEmbeddingSettings() =>
_configuration?.GetSection("AiSettings")?.GetValue<string>("EmbeddingService") ?? string.Empty;
public string GetApiKey()
{
if (_configuration == null)
{
return string.Empty;
}
if (_configuration.GetSection("QDrant") == null)
{
return string.Empty;
}
return _configuration.GetSection("QDrant").GetValue<string>("ApiKey")!;
}
public async Task<int> GetCollectionCount(string collectionName)
{
_apiKey = GetApiKey();
var client = new QdrantClient(_qdrantHost, 6334, true, _apiKey);
var result = await client.CountAsync(
collectionName: collectionName,
exact: true
);
return Convert.ToInt32(result);
}
public async Task<string> GetCollectionBySiteIdAsync(int siteId)
{
return await GetCollectionByNameAsync("Site" + siteId);
}
public async Task<string> GetCollectionByTemplateIdAsync(int templateId)
{
return await GetCollectionByNameAsync("Template" + templateId);
}
public async Task<bool> CollectionExistsAsync(string collectionName)
{
_apiKey = GetApiKey();
var client = new QdrantClient(_qdrantHost, 6334, true, _apiKey);
return await client.CollectionExistsAsync(collectionName);
}
public async Task<string> GetCollectionByNameAsync(string collectionName)
{
_apiKey = GetApiKey();
var client = new QdrantClient(_qdrantHost, 6334, true, _apiKey);
bool doesExist = await CollectionExistsAsync(collectionName);
if (doesExist)
{
var response = await client.GetCollectionInfoAsync(collectionName);
Console.Write(response);
if (response != null)
{
return response.PointsCount.ToString();
}
else
{
return string.Empty;
}
}
else
{
Console.WriteLine($"Failed to get collection");
return string.Empty;
}
}
public async Task<bool> CreateQdrantCollectionAsync(string collectionName)
{
_apiKey = GetApiKey();
var httpClient = new HttpClient();
httpClient.DefaultRequestHeaders.Clear();
httpClient.DefaultRequestHeaders.Add("Authorization", $"Bearer {_apiKey}");
var createCollectionPayload = new
{
vectors = new { size = 0, distance = "" } // Adjust size based on embedding model
};
var embeddingServiceProvider = GetAiEmbeddingSettings();
if (embeddingServiceProvider == "local")
{
createCollectionPayload = new
{
vectors = new { size = 1024, distance = "Cosine" } // Adjust size based on embedding model
};
}
else
{
createCollectionPayload = new
{
vectors = new { size = 1536, distance = "Cosine" } // Adjust size based on embedding model
};
}
var content = new StringContent(JsonConvert.SerializeObject(createCollectionPayload), Encoding.UTF8, "application/json");
var response = await httpClient.PutAsync($"{qdrantUrl}/collections/{collectionName}", content);
if (response.IsSuccessStatusCode)
{
Console.WriteLine("Collection created successfully!" + response.Content.ReadAsStringAsync());
return true;
}
else
{
Console.WriteLine($"Failed to create collection: {response.StatusCode}");
return false;
}
}
public async Task<List<WebPageContent>> GetPointsFromQdrantAsyncByPointIds(SiteInfo site, PointId[] pointIds)
{
_apiKey = GetApiKey();
List<WebPageContent> pageContent = new();
if (string.IsNullOrEmpty(site.VectorCollectionName))
{
pageContent.Add(new WebPageContent(Guid.Empty,
Guid.Empty.ToString(),
"404",
site.Id,
"ErrorPage",
"A 404 error page for non existing content",
"The page doesn't exist",
null,
DateTime.Now
));
}
else
{
var client = new QdrantClient(_qdrantHost, 6334, true, _apiKey);
bool doesExist = await client.CollectionExistsAsync(site.VectorCollectionName);
if (!doesExist)
{
pageContent.Add(new WebPageContent(Guid.Empty,
Guid.Empty.ToString(),
"404",
site.Id,
"ErrorPage",
"A 404 error page for non existing content",
"The page doesn't exist",
null,
DateTime.Now
));
return pageContent;
}
else
{
var result = await client.RetrieveAsync(
collectionName: site.VectorCollectionName,
ids: pointIds,
withPayload: true,
withVectors: true
);
if (result.Count == 0)
{
pageContent.Add(new WebPageContent(Guid.Empty,
Guid.Empty.ToString(),
"404",
site.Id,
"ErrorPage",
"A 404 error page for non existing content",
"The page doesn't exist",
null,
DateTime.Now
));
}
else
{
foreach (var retrievedPoint in result)
{
float[] vectorArray = retrievedPoint.Vectors.Vector.Data.ToArray();
string idValue;
if (retrievedPoint.Id.HasNum)
{
idValue = retrievedPoint.Id.Num.ToString();
}
else
{
idValue = retrievedPoint.Id.Uuid.ToString();
}
Console.WriteLine($"POINTID: {idValue}, {retrievedPoint.Payload["name"]}");
pageContent.Add(new WebPageContent(retrievedPoint.Id,
retrievedPoint.Payload["uid"].StringValue,
retrievedPoint.Payload["type"].StringValue,
Convert.ToInt32(retrievedPoint.Payload["siteId"].IntegerValue),
retrievedPoint.Payload["name"].StringValue,
retrievedPoint.Payload["description"].StringValue,
retrievedPoint.Payload["content"].StringValue,
vectorArray,
Convert.ToDateTime(retrievedPoint.Payload["lastUpdated"].StringValue)
));
}
}
}
}
return pageContent;
}
public async Task<List<WebPageContent>> GetPointsFromQdrantAsyncByIntegerPointIds(string collectionName, PointId[] pointIds)
{
_apiKey = GetApiKey();
List<WebPageContent> pageContent = new();
var client = new QdrantClient(_qdrantHost, 6334, true, _apiKey);
bool doesExist = await client.CollectionExistsAsync(collectionName);
var result = await client.RetrieveAsync(
collectionName: collectionName,
ids: pointIds,
withPayload: true,
withVectors: true
);
foreach (var retrievedPoint in result)
{
float[] vectorArray = retrievedPoint.Vectors.Vector.Data.ToArray();
string idValue;
if (retrievedPoint.Id.HasNum)
{
idValue = retrievedPoint.Id.Num.ToString();
}
else
{
idValue = retrievedPoint.Id.Uuid.ToString();
}
Console.WriteLine($"POINTID: {idValue}, {retrievedPoint.Payload["name"]}");
pageContent.Add(new WebPageContent(retrievedPoint.Id,
retrievedPoint.Payload["uid"].StringValue,
retrievedPoint.Payload["type"].StringValue,
Convert.ToInt32(retrievedPoint.Payload["siteId"].IntegerValue),
retrievedPoint.Payload["name"].StringValue,
retrievedPoint.Payload["description"].StringValue,
retrievedPoint.Payload["content"].StringValue,
vectorArray,
Convert.ToDateTime(retrievedPoint.Payload["lastUpdated"].StringValue)
));
}
return pageContent;
}
public async Task<List<WebPageContent>> GetPointFromQdrantAsyncByPointId(int siteId, int pointId)
{
_apiKey = GetApiKey();
//var httpClient = new HttpClient();
//httpClient.DefaultRequestHeaders.Clear();
//httpClient.DefaultRequestHeaders.Add("Authorization", $"Bearer {_apiKey}");
List<WebPageContent> contentList = new();
var client = new QdrantClient(_qdrantHost, 6334, true, _apiKey);
var result = await client.RetrieveAsync(
collectionName: $"Site{siteId.ToString()}",
id: Convert.ToUInt64(pointId),
withPayload: true,
withVectors: true
);
foreach (var retrievedPoint in result)
{
float[] vectorArray = retrievedPoint.Vectors.Vector.Data.ToArray();
string idValue;
if (retrievedPoint.Id.HasNum)
{
idValue = retrievedPoint.Id.Num.ToString();
}
else
{
idValue = retrievedPoint.Id.Uuid.ToString();
}
Console.WriteLine($"POINTID: {idValue}, {retrievedPoint.Payload["name"]}");
contentList.Add(new WebPageContent(retrievedPoint.Id,
retrievedPoint.Payload["uid"].StringValue,
retrievedPoint.Payload["type"].StringValue,
Convert.ToInt32(retrievedPoint.Payload["siteId"].IntegerValue),
retrievedPoint.Payload["name"].StringValue,
retrievedPoint.Payload["description"].StringValue,
retrievedPoint.Payload["content"].StringValue,
vectorArray,
Convert.ToDateTime(retrievedPoint.Payload["lastUpdated"].StringValue)
));
}
return contentList;
}
public async Task<string> GetSnippetAsync(int snippetId, string collectionName)
{
_apiKey = GetApiKey();
var httpClient = new HttpClient();
httpClient.DefaultRequestHeaders.Clear();
httpClient.DefaultRequestHeaders.Add("Authorization", $"Bearer {_apiKey}");
var response = await httpClient.GetAsync($"{qdrantUrl}/collections/{collectionName}/points/{snippetId}");
if (response.IsSuccessStatusCode)
{
var result = await response.Content.ReadAsStringAsync();
//Console.WriteLine($"Query result: {result}");
return result;
}
else
{
Console.WriteLine($"Failed to query snippet: {response.StatusCode}");
return string.Empty;
}
}
public async Task<int> QuerySnippetAsync(float[] queryVector, int limit = 1, string collectionName = "html_snippets")
{
_apiKey = GetApiKey();
var client = new QdrantClient(_qdrantHost, 6334, true, _apiKey);
var doesCollectionExist = await client.CollectionExistsAsync(collectionName);
if (doesCollectionExist)
{
IReadOnlyList<ScoredPoint> response = new List<ScoredPoint>();
response = await client.SearchAsync(
collectionName: collectionName,
vector: queryVector,
limit: 1
);
if (response.Count > 0)
{
int sId = -1;
var result = response.FirstOrDefault();
//Console.Write(result);
sId = Convert.ToInt32(result!.Id.Num);
//Console.Write($"Query result: {sId}");
return sId;
}
else
{
Console.WriteLine($"Failed to query snippet: {response.Count()}");
return 0;
}
}
else
{
Console.WriteLine($"Failed to query snippet: no collection");
return 0;
}
}
public async Task<List<WebPageContent>> QueryContentAsync(string collectionName, float[] queryVector, int limit = 1)
{
_apiKey = GetApiKey();
//var httpClient = new HttpClient();
List<WebPageContent> pageContent = new();
var client = new QdrantClient(
host: _qdrantHost,
https: true,
apiKey: _apiKey
);
var response = await client.SearchAsync(
collectionName: collectionName,
vector: queryVector,
payloadSelector: true, //TODO comes with payload now, get the payload
vectorsSelector: true, //filter: MatchKeyword("city", "London"),
limit: 3
);
if (response.Count() == 0)
{
int[] nullResult = [];
Console.Write("None found");
return pageContent;
}
else
{
foreach (var retrievedPoint in response)
{
float[] vectorArray = retrievedPoint.Vectors.Vector.Data.ToArray();
string idValue;
if (retrievedPoint.Id.HasNum)
{
idValue = retrievedPoint.Id.Num.ToString();
}
else
{
idValue = retrievedPoint.Id.Uuid.ToString();
}
Console.WriteLine($"POINTID: {idValue}, {retrievedPoint.Payload["name"]}");
pageContent.Add(new WebPageContent(retrievedPoint.Id,
retrievedPoint.Payload["uid"].StringValue,
retrievedPoint.Payload["type"].StringValue,
Convert.ToInt32(retrievedPoint.Payload["siteId"].IntegerValue),
retrievedPoint.Payload["name"].StringValue,
retrievedPoint.Payload["description"].StringValue,
retrievedPoint.Payload["content"].StringValue,
vectorArray,
Convert.ToDateTime(retrievedPoint.Payload["lastUpdated"].StringValue)
));
}
return pageContent;
}
}
public async Task QDrantInsertPointAsync(PointId id, float[] vectors, MapField<string, Value> payload, string collectionName)
{
_apiKey = GetApiKey();
var client = new QdrantClient(_qdrantHost, 6334, true, _apiKey);
var pointStruct = new PointStruct();
pointStruct = new PointStruct
{
Id = id,
Vectors = vectors,
Payload = { payload }
};
Console.WriteLine($"{pointStruct.Id} val bekerült.");
List<PointStruct> pointStructList = new List<PointStruct> { pointStruct };
var result = await client.UpsertAsync(
collectionName: collectionName,
points: pointStructList
);
Console.Write("QDrantUpsert: " + result.Status);
}
public async Task QDrantInsertManyAsync(List<int> ids, List<float[]> vectors, List<MapField<string, Value>> payloads, string collectionName)
{
//FOR HTMLSNIPPETS, USES INT IDS FOR NOW
_apiKey = GetApiKey();
var client = new QdrantClient(_qdrantHost, 6334, true, _apiKey);
var pointStructList = new List<PointStruct>();
for (int i = 0; i < ids.Count; i++)
{
pointStructList.Add(new PointStruct
{
Id = (ulong)ids[i],
Vectors = vectors[i],
Payload = { payloads[i] }
});
Console.WriteLine($"{pointStructList[i].Id} val bekerül {pointStructList[i].Payload["name"]}");
}
Console.WriteLine(pointStructList.Count);
await client.UpsertAsync(
collectionName: collectionName,
points: pointStructList
);
}
public async Task QDrantInsertManyAsync(List<WebPageContent> chunks, string collectionName)
{
//FOR WEBPAGECONTENT, USES POINTID IDS ALREADY
_apiKey = GetApiKey();
var client = new QdrantClient(_qdrantHost, 6334, true, _apiKey);
var pointStructList = new List<PointStruct>();
for (int i = 0; i < chunks.Count; i++)
{
//get payload from WebPageContent
//generate code to convert WebpageContent to MapField<string, Value>
var payload = new MapField<string, Value>
{
{ "uid", new Value { StringValue = chunks[i].UId } }, // Correct usage of Value for string assignment
{ "type", new Value { StringValue = chunks[i].Type } },
{ "siteId", new Value { IntegerValue = chunks[i].SiteId } },
{ "name", new Value { StringValue = chunks[i].Name } },
{ "description", new Value { StringValue = chunks[i].Description } },
{ "content", new Value { StringValue = chunks[i].Content } },
{ "lastUpdated", new Value { StringValue = chunks[i].LastUpdated.ToString("o") } } // ISO 8601 format
};
pointStructList.Add(new PointStruct
{
Id = chunks[i].Id,
Vectors = chunks[i].Vectors,
Payload = { payload }
});
Console.WriteLine($"{pointStructList[i].Id} val bekerül {pointStructList[i].Payload["name"]}");
}
Console.WriteLine(pointStructList.Count);
await client.UpsertAsync(
collectionName: collectionName,
points: pointStructList
);
}
public async Task DeletePointAsync(int pointId, string collectionName)
{
_apiKey = GetApiKey();
var client = new QdrantClient(_qdrantHost, 6334, true, _apiKey);
var result = await client.DeleteAsync(collectionName: "{collection_name}", ids: [(ulong)pointId]);
Console.WriteLine(result.Status);
}
public async Task DeletePointsAsync(ulong[] pointIds, string collectionName)
{
_apiKey = GetApiKey();
var client = new QdrantClient(_qdrantHost, 6334, true, _apiKey);
var result = await client.DeleteAsync(collectionName: collectionName, ids: pointIds);
Console.WriteLine(result.Status);
}
public async Task DeletePointsAsync(Guid[] pointIds, string collectionName)
{
_apiKey = GetApiKey();
var client = new QdrantClient(_qdrantHost, 6334, true, _apiKey);
var result = await client.DeleteAsync(collectionName: collectionName, ids: pointIds);
Console.WriteLine(result.Status);
}
public async Task DeleteCollectionAsync(string collectionName)
{
_apiKey = GetApiKey();
var client = new QdrantClient(_qdrantHost, 6334, true, _apiKey);
await client.DeleteCollectionAsync(collectionName);
}
}
public class PointResult
{
public int id { get; set; }
public int version { get; set; }
public double score { get; set; }
}
public class QDrantQueryResult
{
public List<PointResult> result { get; set; }
public string status { get; set; }
public double time { get; set; }
}
public class PointData
{
public int id { get; set; }
public HtmlSnippet payload { get; set; }
public List<double> vector { get; set; }
public float[] GetFloatVector()
{
return vector?.Select(d => (float)d).ToArray();
}
}
public class QDrantGetPointResult
{
public PointData result { get; set; }
public string status { get; set; }
public double time { get; set; }
}
public class QDrantGetContentPointResult
{
public ContentPointData result { get; set; }
public string status { get; set; }
public double time { get; set; }
}
public class ContentPointData
{
public int id { get; set; }
public ContentPayload payload { get; set; }
public List<double> vector { get; set; }
public float[] GetFloatVector()
{
return vector?.Select(d => (float)d).ToArray();
}
}
public class ContentPayload
{
public int id { get; set; }
public string uid { get; set; }
public string type { get; set; }
public int siteId { get; set; }
public string name { get; set; }
public string description { get; set; }
public string content { get; set; }
public DateTime lastUpdated { get; set; }
}
}