117 lines
4.0 KiB
C#
117 lines
4.0 KiB
C#
using System.Text.Json.Nodes;
|
|
using DotBased.Monads;
|
|
using HtmlAgilityPack;
|
|
using Manager.YouTube.Models.Parser;
|
|
|
|
namespace Manager.YouTube.Parsers;
|
|
|
|
public static class HtmlParser
|
|
{
|
|
public static Result<(string, bool)> GetStateJson(string html)
|
|
{
|
|
if (string.IsNullOrWhiteSpace(html))
|
|
{
|
|
return ResultError.Fail("html cannot be empty!");
|
|
}
|
|
|
|
var htmlDocument = new HtmlDocument();
|
|
htmlDocument.LoadHtml(html);
|
|
|
|
const string setFunction = "ytcfg.set({";
|
|
var scriptNode = htmlDocument.DocumentNode.SelectSingleNode($"//script[contains(., '{setFunction}')]");
|
|
if (string.IsNullOrWhiteSpace(scriptNode.InnerText))
|
|
return ResultError.Fail($"Could not find {setFunction} in html script nodes!");
|
|
|
|
var json = ExtractJson(scriptNode.InnerText, "ytcfg.set(");
|
|
|
|
if (string.IsNullOrWhiteSpace(json))
|
|
{
|
|
return ResultError.Fail($"Could not find {setFunction} in html script nodes!");
|
|
}
|
|
|
|
var isPremiumUser = html.Contains("logo-type=\"YOUTUBE_PREMIUM_LOGO\"", StringComparison.OrdinalIgnoreCase);
|
|
|
|
return (json, isPremiumUser);
|
|
}
|
|
|
|
public static Result<YouTubeVideoData> GetVideoDataFromHtml(string html)
|
|
{
|
|
if (string.IsNullOrWhiteSpace(html))
|
|
{
|
|
return ResultError.Fail("html cannot be empty!");
|
|
}
|
|
var htmlDocument = new HtmlDocument();
|
|
htmlDocument.LoadHtml(html);
|
|
|
|
const string initialYoutubeData = "var ytInitialPlayerResponse = {";
|
|
var initialPlayerDataNode = htmlDocument.DocumentNode.SelectSingleNode($"//script[contains(., '{initialYoutubeData}')]");
|
|
if (string.IsNullOrWhiteSpace(initialPlayerDataNode.InnerText))
|
|
{
|
|
return ResultError.Fail("Could not find {initialPlayerData} in html script nodes!");
|
|
}
|
|
var initialPlayerDataString = ExtractJson(initialPlayerDataNode.InnerText, "var ytInitialPlayerResponse = ");
|
|
if (string.IsNullOrWhiteSpace(initialPlayerDataString))
|
|
{
|
|
return ResultError.Fail("Failed to extract initial player date from JSON.");
|
|
}
|
|
var parsedPlayerInitialData = JsonNode.Parse(initialPlayerDataString);
|
|
|
|
const string initialData = "var ytInitialData = {";
|
|
var initialDataNode = htmlDocument.DocumentNode.SelectSingleNode($"//script[contains(., '{initialData}')]");
|
|
if (string.IsNullOrWhiteSpace(initialDataNode.InnerText))
|
|
{
|
|
return ResultError.Fail("Could not find {initialData} in html script nodes!");
|
|
}
|
|
|
|
var initialDataJsonString = ExtractJson(initialDataNode.InnerText, "var ytInitialData = ");
|
|
if (string.IsNullOrWhiteSpace(initialDataJsonString))
|
|
{
|
|
return ResultError.Fail("Failed to extract initial player date from JSON.");
|
|
}
|
|
var parsedInitialData = JsonNode.Parse(initialDataJsonString);
|
|
|
|
try
|
|
{
|
|
return new YouTubeVideoData
|
|
{
|
|
YouTubePlayerData = parsedPlayerInitialData?.AsObject(),
|
|
YouTubeInitialData = parsedInitialData?.AsObject()
|
|
};
|
|
}
|
|
catch (Exception e)
|
|
{
|
|
return ResultError.Error(e, "Could not parse youtube player data.");
|
|
}
|
|
}
|
|
|
|
static string? ExtractJson(string input, string marker)
|
|
{
|
|
var start = input.IndexOf(marker, StringComparison.Ordinal);
|
|
if (start < 0) return null;
|
|
|
|
start += marker.Length;
|
|
|
|
// Skip until first '{'
|
|
while (start < input.Length && input[start] != '{')
|
|
start++;
|
|
|
|
if (start >= input.Length) return null;
|
|
|
|
var depth = 0;
|
|
var i = start;
|
|
|
|
for (; i < input.Length; i++)
|
|
{
|
|
if (input[i] == '{') depth++;
|
|
else if (input[i] == '}')
|
|
{
|
|
depth--;
|
|
if (depth != 0) continue;
|
|
i++;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return input[start..i];
|
|
}
|
|
} |