128 lines
4.7 KiB
C#
128 lines
4.7 KiB
C#
using BLAIzor.Interfaces;
|
|
using System.Net.Http;
|
|
using System.Text;
|
|
using System.Text.Json;
|
|
|
|
namespace BLAIzor.Services
|
|
{
|
|
public class BrightDataService : IBrightDataService
|
|
{
|
|
|
|
private readonly ISimpleLogger _logger;
|
|
private readonly IHttpClientFactory _httpClientFactory;
|
|
private string _apiToken;
|
|
public static IConfiguration? _configuration;
|
|
public BrightDataService(ISimpleLogger logger, IHttpClientFactory httpClientFactory, IConfiguration configuration)
|
|
{
|
|
_logger = logger;
|
|
_httpClientFactory = httpClientFactory;
|
|
_configuration = configuration;
|
|
}
|
|
|
|
private string GetScraperSettings() =>
|
|
_configuration?.GetSection("ScraperSettings")?.GetValue<string>("Provider") ?? string.Empty;
|
|
|
|
public string GetApiKey()
|
|
{
|
|
if (_configuration == null)
|
|
{
|
|
return string.Empty;
|
|
}
|
|
if (_configuration.GetSection("ScraperSettings") == null)
|
|
{
|
|
return string.Empty;
|
|
}
|
|
|
|
return _configuration.GetSection("ScraperSettings").GetValue<string>("ApiKey")!;
|
|
|
|
}
|
|
|
|
public async Task<string?> ScrapeFacebookPostsAsync(string pageUrl, int numPosts = 10)
|
|
{
|
|
if (string.IsNullOrWhiteSpace(pageUrl))
|
|
return null;
|
|
|
|
_apiToken = GetApiKey();
|
|
|
|
try
|
|
{
|
|
var client = _httpClientFactory.CreateClient();
|
|
client.DefaultRequestHeaders.Add("Authorization", $"Bearer {_apiToken}");
|
|
|
|
var url = "https://api.brightdata.com/datasets/v3/trigger?dataset_id=gd_lkaxegm826bjpoo9m5&include_errors=true&limit_multiple_results=20";
|
|
|
|
var payload = new[]
|
|
{
|
|
new
|
|
{
|
|
url = pageUrl,
|
|
num_of_posts = numPosts,
|
|
posts_to_not_include = Array.Empty<string>(),
|
|
start_date = "",
|
|
end_date = ""
|
|
}
|
|
};
|
|
|
|
var json = JsonSerializer.Serialize(payload);
|
|
var response = await client.PostAsync(url, new StringContent(json, Encoding.UTF8, "application/json"));
|
|
response.EnsureSuccessStatusCode();
|
|
|
|
var scrapeId = await response.Content.ReadAsStringAsync();
|
|
//"snapshot_id
|
|
|
|
if (string.IsNullOrWhiteSpace(scrapeId))
|
|
{
|
|
await _logger.ErrorAsync("Failed to initiate scraping for Facebook posts.");
|
|
return null;
|
|
}
|
|
else
|
|
{
|
|
//have to keep checking:
|
|
//result: "{\"snapshot_id\":\"s_mec12qv422avgbv9jl\"}"
|
|
//let's extract the scrapeId from the response
|
|
scrapeId = scrapeId.Trim('"').Split(':')[1].Trim('"');
|
|
//remove all other characters
|
|
scrapeId = scrapeId.Replace("{", "").Replace("}", "").Replace("\"", "");
|
|
|
|
var checkUrl = $"https://api.brightdata.com/datasets/v3/progress/{scrapeId}";
|
|
var statusResponse = await client.GetAsync(checkUrl);
|
|
var responseString = await statusResponse.Content.ReadAsStringAsync();
|
|
|
|
int attempt = 0;
|
|
//make a cycle
|
|
|
|
while (responseString.Contains("status") && !responseString.Contains("ready"))
|
|
{
|
|
if (attempt >= 60)
|
|
{
|
|
await _logger.ErrorAsync($"Failed to get scraping status for Facebook page: {pageUrl} after multiple attempts.");
|
|
return null;
|
|
}
|
|
// Wait for a while before retrying
|
|
await Task.Delay(5000); // Wait for 5 seconds
|
|
statusResponse = await client.GetAsync(checkUrl);
|
|
responseString = await statusResponse.Content.ReadAsStringAsync();
|
|
attempt++;
|
|
|
|
}
|
|
|
|
// Now fetch the snapshot
|
|
|
|
var snapshotUrl = $"https://api.brightdata.com/datasets/v3/snapshot/{scrapeId}?format=json";
|
|
|
|
var snapshotResponse = await client.GetAsync(snapshotUrl);
|
|
var snapshotString = await snapshotResponse.Content.ReadAsStringAsync();
|
|
|
|
return snapshotString;
|
|
}
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
Console.WriteLine($"Error scraping Facebook: {ex.Message}");
|
|
return null;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|