[CHANGE] WIP YouTubeClient & HTML parsing

This commit is contained in:
max
2025-08-25 15:27:16 +02:00
parent 9789c5f535
commit a2489fea8d
7 changed files with 219 additions and 1 deletions

View File

@@ -0,0 +1,39 @@
using System.Text.RegularExpressions;
using DotBased.Monads;
using HtmlAgilityPack;
namespace Manager.YouTube.Parsers;
public static class HtmlParser
{
public static Result<string> GetJsonFromScriptFunction(string html, string functionName)
{
if (string.IsNullOrWhiteSpace(html))
{
return ResultError.Fail("html cannot be empty!");
}
if (string.IsNullOrWhiteSpace(functionName))
{
return ResultError.Fail("No function names provided!");
}
var htmlDocument = new HtmlDocument();
htmlDocument.LoadHtml(html);
var scriptNode = htmlDocument.DocumentNode.SelectSingleNode($"//script[contains(., '{functionName}')]");
if (string.IsNullOrWhiteSpace(scriptNode.InnerText))
return ResultError.Fail($"Could not find {functionName} in html script nodes!");
var regexPattern = $@"{Regex.Escape(functionName)}\(([^)]+)\);";
var match = Regex.Match(scriptNode.InnerText, regexPattern);
if (match.Success)
{
var jsonString = match.Groups[1].Value.Trim();
return jsonString;
}
return ResultError.Fail($"Unable to parse {functionName} JSON!");
}
}