using BLAIzor.Interfaces; using System.Net.Http; using System.Text; using System.Text.Json; namespace BLAIzor.Services { public class BrightDataService : IBrightDataService { private readonly ISimpleLogger _logger; private readonly IHttpClientFactory _httpClientFactory; private string _apiToken; public static IConfiguration? _configuration; public BrightDataService(ISimpleLogger logger, IHttpClientFactory httpClientFactory, IConfiguration configuration) { _logger = logger; _httpClientFactory = httpClientFactory; _configuration = configuration; } private string GetScraperSettings() => _configuration?.GetSection("ScraperSettings")?.GetValue("Provider") ?? string.Empty; public string GetApiKey() { if (_configuration == null) { return string.Empty; } if (_configuration.GetSection("ScraperSettings") == null) { return string.Empty; } return _configuration.GetSection("ScraperSettings").GetValue("ApiKey")!; } public async Task ScrapeFacebookPostsAsync(string pageUrl, int numPosts = 10) { if (string.IsNullOrWhiteSpace(pageUrl)) return null; _apiToken = GetApiKey(); try { var client = _httpClientFactory.CreateClient(); client.DefaultRequestHeaders.Add("Authorization", $"Bearer {_apiToken}"); var url = "https://api.brightdata.com/datasets/v3/trigger?dataset_id=gd_lkaxegm826bjpoo9m5&include_errors=true&limit_multiple_results=20"; var payload = new[] { new { url = pageUrl, num_of_posts = numPosts, posts_to_not_include = Array.Empty(), start_date = "", end_date = "" } }; var json = JsonSerializer.Serialize(payload); var response = await client.PostAsync(url, new StringContent(json, Encoding.UTF8, "application/json")); response.EnsureSuccessStatusCode(); var scrapeId = await response.Content.ReadAsStringAsync(); //"snapshot_id if (string.IsNullOrWhiteSpace(scrapeId)) { await _logger.ErrorAsync("Failed to initiate scraping for Facebook posts."); return null; } else { //have to keep checking: //result: "{\"snapshot_id\":\"s_mec12qv422avgbv9jl\"}" //let's extract the scrapeId from the response scrapeId = scrapeId.Trim('"').Split(':')[1].Trim('"'); //remove all other characters scrapeId = scrapeId.Replace("{", "").Replace("}", "").Replace("\"", ""); var checkUrl = $"https://api.brightdata.com/datasets/v3/progress/{scrapeId}"; var statusResponse = await client.GetAsync(checkUrl); var responseString = await statusResponse.Content.ReadAsStringAsync(); int attempt = 0; //make a cycle while (responseString.Contains("status") && !responseString.Contains("ready")) { if (attempt >= 60) { await _logger.ErrorAsync($"Failed to get scraping status for Facebook page: {pageUrl} after multiple attempts."); return null; } // Wait for a while before retrying await Task.Delay(5000); // Wait for 5 seconds statusResponse = await client.GetAsync(checkUrl); responseString = await statusResponse.Content.ReadAsStringAsync(); attempt++; } // Now fetch the snapshot var snapshotUrl = $"https://api.brightdata.com/datasets/v3/snapshot/{scrapeId}?format=json"; var snapshotResponse = await client.GetAsync(snapshotUrl); var snapshotString = await snapshotResponse.Content.ReadAsStringAsync(); return snapshotString; } } catch (Exception ex) { Console.WriteLine($"Error scraping Facebook: {ex.Message}"); return null; } } } }