39 lines
1.2 KiB
C#
39 lines
1.2 KiB
C#
using System.Text.RegularExpressions;
|
|
using DotBased.Monads;
|
|
using HtmlAgilityPack;
|
|
|
|
namespace Manager.YouTube.Parsers;
|
|
|
|
public static class HtmlParser
|
|
{
|
|
public static Result<string> GetJsonFromScriptFunction(string html, string functionName)
|
|
{
|
|
if (string.IsNullOrWhiteSpace(html))
|
|
{
|
|
return ResultError.Fail("html cannot be empty!");
|
|
}
|
|
|
|
if (string.IsNullOrWhiteSpace(functionName))
|
|
{
|
|
return ResultError.Fail("No function names provided!");
|
|
}
|
|
|
|
var htmlDocument = new HtmlDocument();
|
|
htmlDocument.LoadHtml(html);
|
|
|
|
var scriptNode = htmlDocument.DocumentNode.SelectSingleNode($"//script[contains(., '{functionName}')]");
|
|
if (string.IsNullOrWhiteSpace(scriptNode.InnerText))
|
|
return ResultError.Fail($"Could not find {functionName} in html script nodes!");
|
|
|
|
var regexPattern = $@"{Regex.Escape(functionName)}\(([^)]+)\);";
|
|
var match = Regex.Match(scriptNode.InnerText, regexPattern);
|
|
|
|
if (match.Success)
|
|
{
|
|
var jsonString = match.Groups[1].Value.Trim();
|
|
return jsonString;
|
|
}
|
|
|
|
return ResultError.Fail($"Unable to parse {functionName} JSON!");
|
|
}
|
|
} |