using System.Text.Json.Nodes; using DotBased.Monads; using HtmlAgilityPack; using Manager.YouTube.Models.Parser; namespace Manager.YouTube.Parsers; public static class HtmlParser { public static Result<(string, bool)> GetStateJson(string html) { if (string.IsNullOrWhiteSpace(html)) { return ResultError.Fail("html cannot be empty!"); } var htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(html); const string setFunction = "ytcfg.set({"; var scriptNode = htmlDocument.DocumentNode.SelectSingleNode($"//script[contains(., '{setFunction}')]"); if (string.IsNullOrWhiteSpace(scriptNode.InnerText)) return ResultError.Fail($"Could not find {setFunction} in html script nodes!"); var json = ExtractJson(scriptNode.InnerText, "ytcfg.set("); if (string.IsNullOrWhiteSpace(json)) { return ResultError.Fail($"Could not find {setFunction} in html script nodes!"); } var isPremiumUser = html.Contains("logo-type=\"YOUTUBE_PREMIUM_LOGO\"", StringComparison.OrdinalIgnoreCase); return (json, isPremiumUser); } public static Result GetVideoDataFromHtml(string html) { if (string.IsNullOrWhiteSpace(html)) { return ResultError.Fail("html cannot be empty!"); } var htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(html); const string initialYoutubeData = "var ytInitialPlayerResponse = {"; var initialPlayerDataNode = htmlDocument.DocumentNode.SelectSingleNode($"//script[contains(., '{initialYoutubeData}')]"); if (string.IsNullOrWhiteSpace(initialPlayerDataNode.InnerText)) { return ResultError.Fail("Could not find {initialPlayerData} in html script nodes!"); } var initialPlayerDataString = ExtractJson(initialPlayerDataNode.InnerText, "var ytInitialPlayerResponse = "); if (string.IsNullOrWhiteSpace(initialPlayerDataString)) { return ResultError.Fail("Failed to extract initial player date from JSON."); } var parsedPlayerInitialData = JsonNode.Parse(initialPlayerDataString); const string initialData = "var ytInitialData = {"; var initialDataNode = htmlDocument.DocumentNode.SelectSingleNode($"//script[contains(., '{initialData}')]"); if (string.IsNullOrWhiteSpace(initialDataNode.InnerText)) { return ResultError.Fail("Could not find {initialData} in html script nodes!"); } var initialDataJsonString = ExtractJson(initialDataNode.InnerText, "var ytInitialData = "); if (string.IsNullOrWhiteSpace(initialDataJsonString)) { return ResultError.Fail("Failed to extract initial player date from JSON."); } var parsedInitialData = JsonNode.Parse(initialDataJsonString); try { return new YouTubeVideoData { YouTubePlayerData = parsedPlayerInitialData, YouTubeInitialData = parsedInitialData }; } catch (Exception e) { return ResultError.Error(e, "Could not parse youtube player data."); } } static string? ExtractJson(string input, string marker) { var start = input.IndexOf(marker, StringComparison.Ordinal); if (start < 0) return null; start += marker.Length; // Skip until first '{' while (start < input.Length && input[start] != '{') start++; if (start >= input.Length) return null; var depth = 0; var i = start; for (; i < input.Length; i++) { if (input[i] == '{') depth++; else if (input[i] == '}') { depth--; if (depth != 0) continue; i++; break; } } return input[start..i]; } }