Files
YouTube-Manager/Manager.YouTube/Parsers/HtmlParser.cs
2025-09-04 18:05:56 +02:00

64 lines
1.8 KiB
C#

using DotBased.Monads;
using HtmlAgilityPack;
namespace Manager.YouTube.Parsers;
public static class HtmlParser
{
public static Result<(string, string)> GetStateJson(string html)
{
if (string.IsNullOrWhiteSpace(html))
{
return ResultError.Fail("html cannot be empty!");
}
var htmlDocument = new HtmlDocument();
htmlDocument.LoadHtml(html);
const string setFunction = "ytcfg.set({";
var scriptNode = htmlDocument.DocumentNode.SelectSingleNode($"//script[contains(., '{setFunction}')]");
if (string.IsNullOrWhiteSpace(scriptNode.InnerText))
return ResultError.Fail($"Could not find {setFunction} in html script nodes!");
var json = ExtractJson(scriptNode.InnerText, "ytcfg.set(");
var jsonText = ExtractJson(scriptNode.InnerText, "setMessage(");
if (string.IsNullOrWhiteSpace(json) || string.IsNullOrWhiteSpace(jsonText))
{
return ResultError.Fail($"Could not find {setFunction} in html script nodes!");
}
return (json, jsonText);
}
static string? ExtractJson(string input, string marker)
{
var start = input.IndexOf(marker, StringComparison.Ordinal);
if (start < 0) return null;
start += marker.Length;
// Skip until first '{'
while (start < input.Length && input[start] != '{')
start++;
if (start >= input.Length) return null;
var depth = 0;
var i = start;
for (; i < input.Length; i++)
{
if (input[i] == '{') depth++;
else if (input[i] == '}')
{
depth--;
if (depth != 0) continue;
i++;
break;
}
}
return input[start..i];
}
}