Files
rssotto-csharp-client/ModVersionChecker/checkers/ScrapeChecker.cs
2025-09-04 10:14:30 +02:00

94 lines
3.3 KiB
C#

using System.Text.RegularExpressions;
using ModVersionChecker.data.model;
using OpenQA.Selenium;
using OpenQA.Selenium.Chrome;
namespace ModVersionChecker
{
public class ScrapeChecker : IVersionChecker
{
public async Task<string> GetLatestVersion(Dictionary<string, string> paramsDict, SourceDef source)
{
if (!paramsDict.TryGetValue("url", out var url) || string.IsNullOrEmpty(url))
{
throw new ArgumentException("URL required");
}
var mode = GetValueOrDefault(paramsDict, "mode", source);
var response = "";
if (mode == "selenium")
{
response = await SeleniumFetch(url);
}
else
{
response = await DefaultFetch(url); ;
}
string pattern = @">\s+<";
response = Regex.Replace(response, pattern, "><");
var regex = GetValueOrDefault(paramsDict, "regex", source);
var match = System.Text.RegularExpressions.Regex.Match(response, regex);
if (!match.Success || match.Groups.Count < 2)
{
throw new Exception($"No match with regex in response");
}
return match.Groups[1].Value;
}
private string GetValueOrDefault(Dictionary<string, string> dict, string key, SourceDef source)
{
var value = "";
if (dict.ContainsKey(key) && !string.IsNullOrEmpty(dict[key]))
{
value = dict[key];
}
else if (source.Defaults != null && source.Defaults.ContainsKey(key))
{
value = source.Defaults[key];
}
return value;
}
private Task<string> DefaultFetch(string url)
{
var httpClient = new HttpClient();
httpClient.DefaultRequestHeaders.Add("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36");
httpClient.DefaultRequestHeaders.Add("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
httpClient.DefaultRequestHeaders.Add("Accept-Language", "en-US,en;q=0.5");
return httpClient.GetStringAsync(url);
}
private async Task<string> SeleniumFetch(string url)
{
var service = ChromeDriverService.CreateDefaultService();
service.HideCommandPromptWindow = true;
var options = new ChromeOptions();
options.AddArgument("--headless"); // Run in headless mode
options.AddArgument("--disable-gpu");
options.AddArgument("--no-sandbox");
options.AddArgument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/91.0.4472.124");
using var driver = new ChromeDriver(service, options);
try
{
driver.Navigate().GoToUrl(url);
// Wait for the page to load
await Task.Delay(2000); // Adjust as necessary
// Example: Get the page source
var pageSource = driver.PageSource;
// Close the driver
return pageSource;
}
finally
{
driver.Quit();
}
}
}
}