678 lines
24 KiB
C#
678 lines
24 KiB
C#
using BLAIzor.Models;
|
|
using Google.Protobuf;
|
|
using Google.Protobuf.Collections;
|
|
using Newtonsoft.Json;
|
|
using Qdrant.Client;
|
|
using Qdrant.Client.Grpc;
|
|
using System.Numerics;
|
|
using System.Text;
|
|
using static Qdrant.Client.Grpc.PointsUpdateOperation.Types;
|
|
|
|
namespace BLAIzor.Services
|
|
{
|
|
public class QDrantService
|
|
{
|
|
public static IConfiguration? _configuration;
|
|
private string qdrantUrl = "https://fe7d5c9e-8cd1-4ad9-af5a-af2bf3b93219.europe-west3-0.gcp.cloud.qdrant.io:6333";
|
|
private readonly string _qdrantHost = "fe7d5c9e-8cd1-4ad9-af5a-af2bf3b93219.europe-west3-0.gcp.cloud.qdrant.io";
|
|
|
|
private string _apiKey = "";
|
|
|
|
public QDrantService(IConfiguration? configuration)
|
|
{
|
|
_configuration = configuration;
|
|
}
|
|
|
|
private string GetAiEmbeddingSettings() =>
|
|
_configuration?.GetSection("AiSettings")?.GetValue<string>("EmbeddingService") ?? string.Empty;
|
|
|
|
public string GetApiKey()
|
|
{
|
|
if (_configuration == null)
|
|
{
|
|
return string.Empty;
|
|
}
|
|
if (_configuration.GetSection("QDrant") == null)
|
|
{
|
|
return string.Empty;
|
|
}
|
|
|
|
return _configuration.GetSection("QDrant").GetValue<string>("ApiKey")!;
|
|
|
|
}
|
|
|
|
public async Task<int> GetCollectionCount(string collectionName)
|
|
{
|
|
_apiKey = GetApiKey();
|
|
var client = new QdrantClient(_qdrantHost, 6334, true, _apiKey);
|
|
var result = await client.CountAsync(
|
|
collectionName: collectionName,
|
|
exact: true
|
|
);
|
|
|
|
return Convert.ToInt32(result);
|
|
}
|
|
|
|
public async Task<string> GetCollectionBySiteIdAsync(int siteId)
|
|
{
|
|
return await GetCollectionByNameAsync("Site" + siteId);
|
|
}
|
|
|
|
public async Task<string> GetCollectionByTemplateIdAsync(int templateId)
|
|
{
|
|
|
|
return await GetCollectionByNameAsync("Template" + templateId);
|
|
|
|
}
|
|
|
|
public async Task<bool> CollectionExistsAsync(string collectionName)
|
|
{
|
|
_apiKey = GetApiKey();
|
|
var client = new QdrantClient(_qdrantHost, 6334, true, _apiKey);
|
|
return await client.CollectionExistsAsync(collectionName);
|
|
}
|
|
|
|
public async Task<string> GetCollectionByNameAsync(string collectionName)
|
|
{
|
|
_apiKey = GetApiKey();
|
|
|
|
var client = new QdrantClient(_qdrantHost, 6334, true, _apiKey);
|
|
bool doesExist = await CollectionExistsAsync(collectionName);
|
|
if (doesExist)
|
|
{
|
|
var response = await client.GetCollectionInfoAsync(collectionName);
|
|
Console.Write(response);
|
|
if (response != null)
|
|
{
|
|
|
|
|
|
return response.PointsCount.ToString();
|
|
|
|
}
|
|
else
|
|
{
|
|
return string.Empty;
|
|
}
|
|
|
|
}
|
|
else
|
|
{
|
|
Console.WriteLine($"Failed to get collection");
|
|
return string.Empty;
|
|
}
|
|
|
|
}
|
|
|
|
public async Task<bool> CreateQdrantCollectionAsync(string collectionName)
|
|
{
|
|
_apiKey = GetApiKey();
|
|
var httpClient = new HttpClient();
|
|
|
|
httpClient.DefaultRequestHeaders.Clear();
|
|
httpClient.DefaultRequestHeaders.Add("Authorization", $"Bearer {_apiKey}");
|
|
|
|
var createCollectionPayload = new
|
|
{
|
|
vectors = new { size = 0, distance = "" } // Adjust size based on embedding model
|
|
};
|
|
|
|
var embeddingServiceProvider = GetAiEmbeddingSettings();
|
|
if (embeddingServiceProvider == "local")
|
|
{
|
|
createCollectionPayload = new
|
|
{
|
|
vectors = new { size = 1024, distance = "Cosine" } // Adjust size based on embedding model
|
|
};
|
|
}
|
|
else
|
|
{
|
|
createCollectionPayload = new
|
|
{
|
|
vectors = new { size = 1536, distance = "Cosine" } // Adjust size based on embedding model
|
|
};
|
|
}
|
|
|
|
var content = new StringContent(JsonConvert.SerializeObject(createCollectionPayload), Encoding.UTF8, "application/json");
|
|
var response = await httpClient.PutAsync($"{qdrantUrl}/collections/{collectionName}", content);
|
|
|
|
if (response.IsSuccessStatusCode)
|
|
{
|
|
Console.WriteLine("Collection created successfully!" + response.Content.ReadAsStringAsync());
|
|
return true;
|
|
}
|
|
else
|
|
{
|
|
Console.WriteLine($"Failed to create collection: {response.StatusCode}");
|
|
return false;
|
|
}
|
|
}
|
|
|
|
public async Task<List<WebPageContent>> GetPointsFromQdrantAsyncByPointIds(SiteInfo site, PointId[] pointIds)
|
|
{
|
|
_apiKey = GetApiKey();
|
|
|
|
List<WebPageContent> pageContent = new();
|
|
|
|
if (string.IsNullOrEmpty(site.VectorCollectionName))
|
|
{
|
|
pageContent.Add(new WebPageContent(Guid.Empty,
|
|
Guid.Empty.ToString(),
|
|
"404",
|
|
site.Id,
|
|
"ErrorPage",
|
|
"A 404 error page for non existing content",
|
|
"The page doesn't exist",
|
|
null,
|
|
DateTime.Now
|
|
));
|
|
}
|
|
else
|
|
{
|
|
var client = new QdrantClient(_qdrantHost, 6334, true, _apiKey);
|
|
bool doesExist = await client.CollectionExistsAsync(site.VectorCollectionName);
|
|
if (!doesExist)
|
|
{
|
|
pageContent.Add(new WebPageContent(Guid.Empty,
|
|
Guid.Empty.ToString(),
|
|
"404",
|
|
site.Id,
|
|
"ErrorPage",
|
|
"A 404 error page for non existing content",
|
|
"The page doesn't exist",
|
|
null,
|
|
DateTime.Now
|
|
));
|
|
return pageContent;
|
|
}
|
|
else
|
|
{
|
|
var result = await client.RetrieveAsync(
|
|
collectionName: site.VectorCollectionName,
|
|
ids: pointIds,
|
|
withPayload: true,
|
|
withVectors: true
|
|
);
|
|
if (result.Count == 0)
|
|
{
|
|
pageContent.Add(new WebPageContent(Guid.Empty,
|
|
Guid.Empty.ToString(),
|
|
"404",
|
|
site.Id,
|
|
"ErrorPage",
|
|
"A 404 error page for non existing content",
|
|
"The page doesn't exist",
|
|
null,
|
|
DateTime.Now
|
|
));
|
|
}
|
|
else
|
|
{
|
|
foreach (var retrievedPoint in result)
|
|
{
|
|
float[] vectorArray = retrievedPoint.Vectors.Vector.Data.ToArray();
|
|
string idValue;
|
|
if (retrievedPoint.Id.HasNum)
|
|
{
|
|
idValue = retrievedPoint.Id.Num.ToString();
|
|
}
|
|
else
|
|
{
|
|
idValue = retrievedPoint.Id.Uuid.ToString();
|
|
}
|
|
|
|
Console.WriteLine($"POINTID: {idValue}, {retrievedPoint.Payload["name"]}");
|
|
|
|
|
|
pageContent.Add(new WebPageContent(retrievedPoint.Id,
|
|
retrievedPoint.Payload["uid"].StringValue,
|
|
retrievedPoint.Payload["type"].StringValue,
|
|
Convert.ToInt32(retrievedPoint.Payload["siteId"].IntegerValue),
|
|
retrievedPoint.Payload["name"].StringValue,
|
|
retrievedPoint.Payload["description"].StringValue,
|
|
retrievedPoint.Payload["content"].StringValue,
|
|
vectorArray,
|
|
Convert.ToDateTime(retrievedPoint.Payload["lastUpdated"].StringValue)
|
|
));
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
|
|
return pageContent;
|
|
}
|
|
|
|
public async Task<List<WebPageContent>> GetPointsFromQdrantAsyncByIntegerPointIds(string collectionName, PointId[] pointIds)
|
|
{
|
|
_apiKey = GetApiKey();
|
|
|
|
List<WebPageContent> pageContent = new();
|
|
|
|
var client = new QdrantClient(_qdrantHost, 6334, true, _apiKey);
|
|
bool doesExist = await client.CollectionExistsAsync(collectionName);
|
|
|
|
var result = await client.RetrieveAsync(
|
|
collectionName: collectionName,
|
|
ids: pointIds,
|
|
withPayload: true,
|
|
withVectors: true
|
|
);
|
|
|
|
foreach (var retrievedPoint in result)
|
|
{
|
|
float[] vectorArray = retrievedPoint.Vectors.Vector.Data.ToArray();
|
|
string idValue;
|
|
if (retrievedPoint.Id.HasNum)
|
|
{
|
|
idValue = retrievedPoint.Id.Num.ToString();
|
|
}
|
|
else
|
|
{
|
|
idValue = retrievedPoint.Id.Uuid.ToString();
|
|
}
|
|
|
|
Console.WriteLine($"POINTID: {idValue}, {retrievedPoint.Payload["name"]}");
|
|
|
|
|
|
pageContent.Add(new WebPageContent(retrievedPoint.Id,
|
|
retrievedPoint.Payload["uid"].StringValue,
|
|
retrievedPoint.Payload["type"].StringValue,
|
|
Convert.ToInt32(retrievedPoint.Payload["siteId"].IntegerValue),
|
|
retrievedPoint.Payload["name"].StringValue,
|
|
retrievedPoint.Payload["description"].StringValue,
|
|
retrievedPoint.Payload["content"].StringValue,
|
|
vectorArray,
|
|
Convert.ToDateTime(retrievedPoint.Payload["lastUpdated"].StringValue)
|
|
));
|
|
}
|
|
return pageContent;
|
|
}
|
|
|
|
public async Task<List<WebPageContent>> GetPointFromQdrantAsyncByPointId(int siteId, int pointId)
|
|
{
|
|
_apiKey = GetApiKey();
|
|
//var httpClient = new HttpClient();
|
|
|
|
//httpClient.DefaultRequestHeaders.Clear();
|
|
//httpClient.DefaultRequestHeaders.Add("Authorization", $"Bearer {_apiKey}");
|
|
|
|
List<WebPageContent> contentList = new();
|
|
|
|
var client = new QdrantClient(_qdrantHost, 6334, true, _apiKey);
|
|
var result = await client.RetrieveAsync(
|
|
collectionName: $"Site{siteId.ToString()}",
|
|
id: Convert.ToUInt64(pointId),
|
|
withPayload: true,
|
|
withVectors: true
|
|
);
|
|
|
|
foreach (var retrievedPoint in result)
|
|
{
|
|
float[] vectorArray = retrievedPoint.Vectors.Vector.Data.ToArray();
|
|
string idValue;
|
|
if (retrievedPoint.Id.HasNum)
|
|
{
|
|
idValue = retrievedPoint.Id.Num.ToString();
|
|
}
|
|
else
|
|
{
|
|
idValue = retrievedPoint.Id.Uuid.ToString();
|
|
}
|
|
|
|
Console.WriteLine($"POINTID: {idValue}, {retrievedPoint.Payload["name"]}");
|
|
|
|
contentList.Add(new WebPageContent(retrievedPoint.Id,
|
|
retrievedPoint.Payload["uid"].StringValue,
|
|
retrievedPoint.Payload["type"].StringValue,
|
|
Convert.ToInt32(retrievedPoint.Payload["siteId"].IntegerValue),
|
|
retrievedPoint.Payload["name"].StringValue,
|
|
retrievedPoint.Payload["description"].StringValue,
|
|
retrievedPoint.Payload["content"].StringValue,
|
|
vectorArray,
|
|
Convert.ToDateTime(retrievedPoint.Payload["lastUpdated"].StringValue)
|
|
));
|
|
}
|
|
|
|
return contentList;
|
|
|
|
}
|
|
|
|
public async Task<string> GetSnippetAsync(int snippetId, string collectionName)
|
|
{
|
|
_apiKey = GetApiKey();
|
|
var httpClient = new HttpClient();
|
|
|
|
httpClient.DefaultRequestHeaders.Clear();
|
|
httpClient.DefaultRequestHeaders.Add("Authorization", $"Bearer {_apiKey}");
|
|
|
|
|
|
var response = await httpClient.GetAsync($"{qdrantUrl}/collections/{collectionName}/points/{snippetId}");
|
|
|
|
if (response.IsSuccessStatusCode)
|
|
{
|
|
var result = await response.Content.ReadAsStringAsync();
|
|
//Console.WriteLine($"Query result: {result}");
|
|
return result;
|
|
}
|
|
else
|
|
{
|
|
Console.WriteLine($"Failed to query snippet: {response.StatusCode}");
|
|
return string.Empty;
|
|
}
|
|
}
|
|
|
|
public async Task<int> QuerySnippetAsync(float[] queryVector, int limit = 1, string collectionName = "html_snippets")
|
|
{
|
|
_apiKey = GetApiKey();
|
|
|
|
|
|
var client = new QdrantClient(_qdrantHost, 6334, true, _apiKey);
|
|
|
|
var doesCollectionExist = await client.CollectionExistsAsync(collectionName);
|
|
if (doesCollectionExist)
|
|
{
|
|
IReadOnlyList<ScoredPoint> response = new List<ScoredPoint>();
|
|
|
|
response = await client.SearchAsync(
|
|
|
|
collectionName: collectionName,
|
|
vector: queryVector,
|
|
limit: 1
|
|
);
|
|
|
|
if (response.Count > 0)
|
|
{
|
|
int sId = -1;
|
|
var result = response.FirstOrDefault();
|
|
//Console.Write(result);
|
|
|
|
sId = Convert.ToInt32(result!.Id.Num);
|
|
|
|
//Console.Write($"Query result: {sId}");
|
|
return sId;
|
|
}
|
|
else
|
|
{
|
|
Console.WriteLine($"Failed to query snippet: {response.Count()}");
|
|
return 0;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
Console.WriteLine($"Failed to query snippet: no collection");
|
|
return 0;
|
|
}
|
|
|
|
}
|
|
|
|
public async Task<List<WebPageContent>> QueryContentAsync(string collectionName, float[] queryVector, int limit = 1)
|
|
{
|
|
_apiKey = GetApiKey();
|
|
//var httpClient = new HttpClient();
|
|
|
|
List<WebPageContent> pageContent = new();
|
|
|
|
var client = new QdrantClient(
|
|
host: _qdrantHost,
|
|
https: true,
|
|
apiKey: _apiKey
|
|
);
|
|
|
|
|
|
var response = await client.SearchAsync(
|
|
collectionName: collectionName,
|
|
vector: queryVector,
|
|
payloadSelector: true, //TODO comes with payload now, get the payload
|
|
vectorsSelector: true, //filter: MatchKeyword("city", "London"),
|
|
limit: 3
|
|
);
|
|
|
|
if (response.Count() == 0)
|
|
{
|
|
int[] nullResult = [];
|
|
Console.Write("None found");
|
|
return pageContent;
|
|
}
|
|
else
|
|
{
|
|
|
|
foreach (var retrievedPoint in response)
|
|
{
|
|
float[] vectorArray = retrievedPoint.Vectors.Vector.Data.ToArray();
|
|
string idValue;
|
|
if (retrievedPoint.Id.HasNum)
|
|
{
|
|
idValue = retrievedPoint.Id.Num.ToString();
|
|
}
|
|
else
|
|
{
|
|
idValue = retrievedPoint.Id.Uuid.ToString();
|
|
}
|
|
|
|
Console.WriteLine($"POINTID: {idValue}, {retrievedPoint.Payload["name"]}");
|
|
|
|
|
|
pageContent.Add(new WebPageContent(retrievedPoint.Id,
|
|
retrievedPoint.Payload["uid"].StringValue,
|
|
retrievedPoint.Payload["type"].StringValue,
|
|
Convert.ToInt32(retrievedPoint.Payload["siteId"].IntegerValue),
|
|
retrievedPoint.Payload["name"].StringValue,
|
|
retrievedPoint.Payload["description"].StringValue,
|
|
retrievedPoint.Payload["content"].StringValue,
|
|
vectorArray,
|
|
Convert.ToDateTime(retrievedPoint.Payload["lastUpdated"].StringValue)
|
|
));
|
|
}
|
|
|
|
return pageContent;
|
|
}
|
|
|
|
}
|
|
|
|
public async Task QDrantInsertPointAsync(PointId id, float[] vectors, MapField<string, Value> payload, string collectionName)
|
|
{
|
|
|
|
_apiKey = GetApiKey();
|
|
|
|
var client = new QdrantClient(_qdrantHost, 6334, true, _apiKey);
|
|
var pointStruct = new PointStruct();
|
|
|
|
pointStruct = new PointStruct
|
|
{
|
|
Id = id,
|
|
Vectors = vectors,
|
|
Payload = { payload }
|
|
|
|
};
|
|
Console.WriteLine($"{pointStruct.Id} val bekerült.");
|
|
|
|
|
|
List<PointStruct> pointStructList = new List<PointStruct> { pointStruct };
|
|
|
|
var result = await client.UpsertAsync(
|
|
collectionName: collectionName,
|
|
points: pointStructList
|
|
);
|
|
|
|
Console.Write("QDrantUpsert: " + result.Status);
|
|
|
|
}
|
|
|
|
public async Task QDrantInsertManyAsync(List<int> ids, List<float[]> vectors, List<MapField<string, Value>> payloads, string collectionName)
|
|
{
|
|
//FOR HTMLSNIPPETS, USES INT IDS FOR NOW
|
|
|
|
_apiKey = GetApiKey();
|
|
|
|
var client = new QdrantClient(_qdrantHost, 6334, true, _apiKey);
|
|
var pointStructList = new List<PointStruct>();
|
|
for (int i = 0; i < ids.Count; i++)
|
|
{
|
|
pointStructList.Add(new PointStruct
|
|
{
|
|
Id = (ulong)ids[i],
|
|
Vectors = vectors[i],
|
|
Payload = { payloads[i] }
|
|
|
|
});
|
|
Console.WriteLine($"{pointStructList[i].Id} val bekerül {pointStructList[i].Payload["name"]}");
|
|
}
|
|
|
|
Console.WriteLine(pointStructList.Count);
|
|
|
|
await client.UpsertAsync(
|
|
collectionName: collectionName,
|
|
points: pointStructList
|
|
);
|
|
|
|
}
|
|
|
|
public async Task QDrantInsertManyAsync(List<WebPageContent> chunks, string collectionName)
|
|
{
|
|
|
|
//FOR WEBPAGECONTENT, USES POINTID IDS ALREADY
|
|
|
|
_apiKey = GetApiKey();
|
|
|
|
var client = new QdrantClient(_qdrantHost, 6334, true, _apiKey);
|
|
var pointStructList = new List<PointStruct>();
|
|
for (int i = 0; i < chunks.Count; i++)
|
|
{
|
|
//get payload from WebPageContent
|
|
|
|
//generate code to convert WebpageContent to MapField<string, Value>
|
|
var payload = new MapField<string, Value>
|
|
{
|
|
{ "uid", new Value { StringValue = chunks[i].UId } }, // Correct usage of Value for string assignment
|
|
{ "type", new Value { StringValue = chunks[i].Type } },
|
|
{ "siteId", new Value { IntegerValue = chunks[i].SiteId } },
|
|
{ "name", new Value { StringValue = chunks[i].Name } },
|
|
{ "description", new Value { StringValue = chunks[i].Description } },
|
|
{ "content", new Value { StringValue = chunks[i].Content } },
|
|
{ "lastUpdated", new Value { StringValue = chunks[i].LastUpdated.ToString("o") } } // ISO 8601 format
|
|
};
|
|
|
|
|
|
pointStructList.Add(new PointStruct
|
|
{
|
|
Id = chunks[i].Id,
|
|
Vectors = chunks[i].Vectors,
|
|
Payload = { payload }
|
|
|
|
});
|
|
Console.WriteLine($"{pointStructList[i].Id} val bekerül {pointStructList[i].Payload["name"]}");
|
|
}
|
|
|
|
Console.WriteLine(pointStructList.Count);
|
|
|
|
await client.UpsertAsync(
|
|
collectionName: collectionName,
|
|
points: pointStructList
|
|
);
|
|
|
|
}
|
|
|
|
public async Task DeletePointAsync(int pointId, string collectionName)
|
|
{
|
|
_apiKey = GetApiKey();
|
|
var client = new QdrantClient(_qdrantHost, 6334, true, _apiKey);
|
|
|
|
var result = await client.DeleteAsync(collectionName: "{collection_name}", ids: [(ulong)pointId]);
|
|
Console.WriteLine(result.Status);
|
|
}
|
|
|
|
public async Task DeletePointsAsync(ulong[] pointIds, string collectionName)
|
|
{
|
|
_apiKey = GetApiKey();
|
|
var client = new QdrantClient(_qdrantHost, 6334, true, _apiKey);
|
|
|
|
var result = await client.DeleteAsync(collectionName: collectionName, ids: pointIds);
|
|
Console.WriteLine(result.Status);
|
|
}
|
|
public async Task DeletePointsAsync(Guid[] pointIds, string collectionName)
|
|
{
|
|
_apiKey = GetApiKey();
|
|
var client = new QdrantClient(_qdrantHost, 6334, true, _apiKey);
|
|
|
|
var result = await client.DeleteAsync(collectionName: collectionName, ids: pointIds);
|
|
Console.WriteLine(result.Status);
|
|
}
|
|
|
|
public async Task DeleteCollectionAsync(string collectionName)
|
|
{
|
|
|
|
_apiKey = GetApiKey();
|
|
|
|
var client = new QdrantClient(_qdrantHost, 6334, true, _apiKey);
|
|
await client.DeleteCollectionAsync(collectionName);
|
|
}
|
|
|
|
}
|
|
|
|
|
|
public class PointResult
|
|
{
|
|
public int id { get; set; }
|
|
public int version { get; set; }
|
|
public double score { get; set; }
|
|
}
|
|
|
|
public class QDrantQueryResult
|
|
{
|
|
public List<PointResult> result { get; set; }
|
|
public string status { get; set; }
|
|
public double time { get; set; }
|
|
}
|
|
|
|
public class PointData
|
|
{
|
|
public int id { get; set; }
|
|
public HtmlSnippet payload { get; set; }
|
|
public List<double> vector { get; set; }
|
|
|
|
public float[] GetFloatVector()
|
|
{
|
|
return vector?.Select(d => (float)d).ToArray();
|
|
}
|
|
}
|
|
|
|
|
|
public class QDrantGetPointResult
|
|
{
|
|
public PointData result { get; set; }
|
|
public string status { get; set; }
|
|
public double time { get; set; }
|
|
}
|
|
|
|
public class QDrantGetContentPointResult
|
|
{
|
|
public ContentPointData result { get; set; }
|
|
public string status { get; set; }
|
|
public double time { get; set; }
|
|
}
|
|
|
|
public class ContentPointData
|
|
{
|
|
public int id { get; set; }
|
|
public ContentPayload payload { get; set; }
|
|
public List<double> vector { get; set; }
|
|
public float[] GetFloatVector()
|
|
{
|
|
return vector?.Select(d => (float)d).ToArray();
|
|
}
|
|
}
|
|
|
|
public class ContentPayload
|
|
{
|
|
public int id { get; set; }
|
|
public string uid { get; set; }
|
|
public string type { get; set; }
|
|
public int siteId { get; set; }
|
|
public string name { get; set; }
|
|
public string description { get; set; }
|
|
public string content { get; set; }
|
|
public DateTime lastUpdated { get; set; }
|
|
}
|
|
}
|