Files
YouTube-Manager/Manager.YouTube/Parsers/HtmlParser.cs

117 lines
4.0 KiB
C#

using System.Text.Json.Nodes;
using DotBased.Monads;
using HtmlAgilityPack;
using Manager.YouTube.Models.Parser;
namespace Manager.YouTube.Parsers;
public static class HtmlParser
{
public static Result<(string, bool)> GetStateJson(string html)
{
if (string.IsNullOrWhiteSpace(html))
{
return ResultError.Fail("html cannot be empty!");
}
var htmlDocument = new HtmlDocument();
htmlDocument.LoadHtml(html);
const string setFunction = "ytcfg.set({";
var scriptNode = htmlDocument.DocumentNode.SelectSingleNode($"//script[contains(., '{setFunction}')]");
if (string.IsNullOrWhiteSpace(scriptNode.InnerText))
return ResultError.Fail($"Could not find {setFunction} in html script nodes!");
var json = ExtractJson(scriptNode.InnerText, "ytcfg.set(");
if (string.IsNullOrWhiteSpace(json))
{
return ResultError.Fail($"Could not find {setFunction} in html script nodes!");
}
var isPremiumUser = html.Contains("logo-type=\"YOUTUBE_PREMIUM_LOGO\"", StringComparison.OrdinalIgnoreCase);
return (json, isPremiumUser);
}
public static Result<YouTubeVideoData> GetVideoDataFromHtml(string html)
{
if (string.IsNullOrWhiteSpace(html))
{
return ResultError.Fail("html cannot be empty!");
}
var htmlDocument = new HtmlDocument();
htmlDocument.LoadHtml(html);
const string initialYoutubeData = "var ytInitialPlayerResponse = {";
var initialPlayerDataNode = htmlDocument.DocumentNode.SelectSingleNode($"//script[contains(., '{initialYoutubeData}')]");
if (string.IsNullOrWhiteSpace(initialPlayerDataNode.InnerText))
{
return ResultError.Fail("Could not find {initialPlayerData} in html script nodes!");
}
var initialPlayerDataString = ExtractJson(initialPlayerDataNode.InnerText, "var ytInitialPlayerResponse = ");
if (string.IsNullOrWhiteSpace(initialPlayerDataString))
{
return ResultError.Fail("Failed to extract initial player date from JSON.");
}
var parsedPlayerInitialData = JsonNode.Parse(initialPlayerDataString);
const string initialData = "var ytInitialData = {";
var initialDataNode = htmlDocument.DocumentNode.SelectSingleNode($"//script[contains(., '{initialData}')]");
if (string.IsNullOrWhiteSpace(initialDataNode.InnerText))
{
return ResultError.Fail("Could not find {initialData} in html script nodes!");
}
var initialDataJsonString = ExtractJson(initialDataNode.InnerText, "var ytInitialData = ");
if (string.IsNullOrWhiteSpace(initialDataJsonString))
{
return ResultError.Fail("Failed to extract initial player date from JSON.");
}
var parsedInitialData = JsonNode.Parse(initialDataJsonString);
try
{
return new YouTubeVideoData
{
YouTubePlayerData = parsedPlayerInitialData?.AsObject(),
YouTubeInitialData = parsedInitialData?.AsObject()
};
}
catch (Exception e)
{
return ResultError.Error(e, "Could not parse youtube player data.");
}
}
static string? ExtractJson(string input, string marker)
{
var start = input.IndexOf(marker, StringComparison.Ordinal);
if (start < 0) return null;
start += marker.Length;
// Skip until first '{'
while (start < input.Length && input[start] != '{')
start++;
if (start >= input.Length) return null;
var depth = 0;
var i = start;
for (; i < input.Length; i++)
{
if (input[i] == '{') depth++;
else if (input[i] == '}')
{
depth--;
if (depth != 0) continue;
i++;
break;
}
}
return input[start..i];
}
}