64 lines
1.8 KiB
C#
64 lines
1.8 KiB
C#
using DotBased.Monads;
|
|
using HtmlAgilityPack;
|
|
|
|
namespace Manager.YouTube.Parsers;
|
|
|
|
public static class HtmlParser
|
|
{
|
|
public static Result<(string, string)> GetStateJson(string html)
|
|
{
|
|
if (string.IsNullOrWhiteSpace(html))
|
|
{
|
|
return ResultError.Fail("html cannot be empty!");
|
|
}
|
|
|
|
var htmlDocument = new HtmlDocument();
|
|
htmlDocument.LoadHtml(html);
|
|
|
|
const string setFunction = "ytcfg.set({";
|
|
var scriptNode = htmlDocument.DocumentNode.SelectSingleNode($"//script[contains(., '{setFunction}')]");
|
|
if (string.IsNullOrWhiteSpace(scriptNode.InnerText))
|
|
return ResultError.Fail($"Could not find {setFunction} in html script nodes!");
|
|
|
|
var json = ExtractJson(scriptNode.InnerText, "ytcfg.set(");
|
|
var jsonText = ExtractJson(scriptNode.InnerText, "setMessage(");
|
|
|
|
if (string.IsNullOrWhiteSpace(json) || string.IsNullOrWhiteSpace(jsonText))
|
|
{
|
|
return ResultError.Fail($"Could not find {setFunction} in html script nodes!");
|
|
}
|
|
|
|
return (json, jsonText);
|
|
}
|
|
|
|
static string? ExtractJson(string input, string marker)
|
|
{
|
|
var start = input.IndexOf(marker, StringComparison.Ordinal);
|
|
if (start < 0) return null;
|
|
|
|
start += marker.Length;
|
|
|
|
// Skip until first '{'
|
|
while (start < input.Length && input[start] != '{')
|
|
start++;
|
|
|
|
if (start >= input.Length) return null;
|
|
|
|
var depth = 0;
|
|
var i = start;
|
|
|
|
for (; i < input.Length; i++)
|
|
{
|
|
if (input[i] == '{') depth++;
|
|
else if (input[i] == '}')
|
|
{
|
|
depth--;
|
|
if (depth != 0) continue;
|
|
i++;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return input[start..i];
|
|
}
|
|
} |