[CHANGE] WIP YouTubeClient & HTML parsing
This commit is contained in:
39
Manager.YouTube/Parsers/HtmlParser.cs
Normal file
39
Manager.YouTube/Parsers/HtmlParser.cs
Normal file
@@ -0,0 +1,39 @@
|
||||
using System.Text.RegularExpressions;
|
||||
using DotBased.Monads;
|
||||
using HtmlAgilityPack;
|
||||
|
||||
namespace Manager.YouTube.Parsers;
|
||||
|
||||
public static class HtmlParser
|
||||
{
|
||||
public static Result<string> GetJsonFromScriptFunction(string html, string functionName)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(html))
|
||||
{
|
||||
return ResultError.Fail("html cannot be empty!");
|
||||
}
|
||||
|
||||
if (string.IsNullOrWhiteSpace(functionName))
|
||||
{
|
||||
return ResultError.Fail("No function names provided!");
|
||||
}
|
||||
|
||||
var htmlDocument = new HtmlDocument();
|
||||
htmlDocument.LoadHtml(html);
|
||||
|
||||
var scriptNode = htmlDocument.DocumentNode.SelectSingleNode($"//script[contains(., '{functionName}')]");
|
||||
if (string.IsNullOrWhiteSpace(scriptNode.InnerText))
|
||||
return ResultError.Fail($"Could not find {functionName} in html script nodes!");
|
||||
|
||||
var regexPattern = $@"{Regex.Escape(functionName)}\(([^)]+)\);";
|
||||
var match = Regex.Match(scriptNode.InnerText, regexPattern);
|
||||
|
||||
if (match.Success)
|
||||
{
|
||||
var jsonString = match.Groups[1].Value.Trim();
|
||||
return jsonString;
|
||||
}
|
||||
|
||||
return ResultError.Fail($"Unable to parse {functionName} JSON!");
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user