[CHANGE] WIP YouTubeClient & HTML parsing

This commit is contained in:
max
2025-08-25 15:27:16 +02:00
parent 9789c5f535
commit a2489fea8d
7 changed files with 219 additions and 1 deletions

View File

@@ -6,4 +6,9 @@
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="DotBased" Version="1.0.0" />
<PackageReference Include="HtmlAgilityPack" Version="1.12.2" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,18 @@
using System.Text.Json.Serialization;
namespace Manager.YouTube.Models.Innertube;
public class ClientState
{
[JsonExtensionData]
public Dictionary<string, object> AdditionalData { get; set; } = [];
[JsonPropertyName("INNERTUBE_API_KEY")]
public string? InnertubeApiKey { get; set; }
[JsonPropertyName("SIGNIN_URL")]
public string? SigninUrl { get; set; }
[JsonPropertyName("SBOX_SETTINGS")]
public SBoxSettings? SBoxSettings { get; set; }
}

View File

@@ -0,0 +1,12 @@
using System.Text.Json.Serialization;
namespace Manager.YouTube.Models.Innertube;
public class SBoxSettings
{
[JsonExtensionData]
public Dictionary<string, object> AdditionalData { get; set; } = [];
[JsonPropertyName("VISITOR_DATA")]
public string? VisitorData { get; set; }
}

View File

@@ -0,0 +1,59 @@
using System.Text.Json;
using DotBased.Monads;
using Manager.YouTube.Models.Innertube;
using Manager.YouTube.Parsers;
using Manager.YouTube.Util;
namespace Manager.YouTube;
public static class NetworkService
{
public static async Task<Result<ClientState>> GetClientStateAsync(YouTubeClient client)
{
var origin = "https://www.youtube.com/";
var httpRequest = new HttpRequestMessage
{
Method = HttpMethod.Get,
RequestUri = new Uri(origin)
};
httpRequest.Headers.IfModifiedSince = new DateTimeOffset(DateTime.UtcNow);
httpRequest.Headers.UserAgent.ParseAdd(client.UserAgent);
if (client.SapisidCookie != null)
{
httpRequest.Headers.Authorization = AuthenticationUtilities.GetSapisidHashHeader(client.SapisidCookie.Value, origin);
httpRequest.Headers.Add("Origin", origin);
}
var http = client.GetHttpClient();
if (http == null)
{
return ResultError.Fail("Unable to get http client!");
}
var response = await http.SendAsync(httpRequest);
if (!response.IsSuccessStatusCode)
{
var responseResult = await response.Content.ReadAsStringAsync();
return Result<ClientState>.Fail(ResultError.Fail(responseResult));
}
var responseHtml = await response.Content.ReadAsStringAsync();
var clientStateResult = HtmlParser.GetJsonFromScriptFunction(responseHtml, "ytcfg.set");
if (clientStateResult is { IsSuccess: false, Error: not null })
{
return clientStateResult.Error;
}
ClientState? clientState;
try
{
clientState = JsonSerializer.Deserialize<ClientState>(clientStateResult.Value);
}
catch (Exception e)
{
return ResultError.Error(e, "Error while parsing JSON!");
}
return clientState == null ? ResultError.Fail("Unable to parse client state!") : clientState;
}
}

View File

@@ -0,0 +1,39 @@
using System.Text.RegularExpressions;
using DotBased.Monads;
using HtmlAgilityPack;
namespace Manager.YouTube.Parsers;
public static class HtmlParser
{
public static Result<string> GetJsonFromScriptFunction(string html, string functionName)
{
if (string.IsNullOrWhiteSpace(html))
{
return ResultError.Fail("html cannot be empty!");
}
if (string.IsNullOrWhiteSpace(functionName))
{
return ResultError.Fail("No function names provided!");
}
var htmlDocument = new HtmlDocument();
htmlDocument.LoadHtml(html);
var scriptNode = htmlDocument.DocumentNode.SelectSingleNode($"//script[contains(., '{functionName}')]");
if (string.IsNullOrWhiteSpace(scriptNode.InnerText))
return ResultError.Fail($"Could not find {functionName} in html script nodes!");
var regexPattern = $@"{Regex.Escape(functionName)}\(([^)]+)\);";
var match = Regex.Match(scriptNode.InnerText, regexPattern);
if (match.Success)
{
var jsonString = match.Groups[1].Value.Trim();
return jsonString;
}
return ResultError.Fail($"Unable to parse {functionName} JSON!");
}
}

View File

@@ -0,0 +1,37 @@
using System.Globalization;
using System.Net.Http.Headers;
using System.Security.Cryptography;
using System.Text;
namespace Manager.YouTube.Util;
public static class AuthenticationUtilities
{
private const string HeaderScheme = "SAPISIDHASH";
// Dave Thomas @ https://stackoverflow.com/a/32065323/9948300
public static AuthenticationHeaderValue? GetSapisidHashHeader(string sapisid, string origin)
{
if (string.IsNullOrWhiteSpace(sapisid) || string.IsNullOrWhiteSpace(origin))
return null;
var time = GetTime();
var sha1 = HashString($"{time} {sapisid} {origin}");
var completeHash = $"{time}_{sha1}";
return new AuthenticationHeaderValue(HeaderScheme, completeHash);
}
private static string HashString(string stringData)
{
var dataBytes = Encoding.ASCII.GetBytes(stringData);
var hashData = SHA1.HashData(dataBytes);
return hashData.Aggregate(string.Empty, (current, item) => current + item.ToString("x2"));
}
private static string GetTime()
{
var st = new DateTime(1970, 1, 1);
var t = DateTime.Now.ToUniversalTime() - st;
var time = (t.TotalMilliseconds + 0.5).ToString(CultureInfo.InvariantCulture);
return time[..10];
}
}

View File

@@ -1,6 +1,54 @@
using System.Net;
using DotBased.Logging;
using Manager.YouTube.Models.Innertube;
namespace Manager.YouTube;
public sealed class YouTubeClient
{
public string Id { get; private set; }
public string AccountName { get; private set; }
public string? UserAgent { get; private set; }
public CookieContainer CookieContainer { get; }
public ClientState? ClientState { get; private set; }
public Cookie? SapisidCookie => CookieContainer.GetAllCookies()["SAPISID"];
public HttpClient? GetHttpClient() => _httpClient;
private readonly ILogger? _logger;
private HttpClient? _httpClient;
public YouTubeClient(CookieContainer cookieContainer, string userAgent, ILogger? logger = null)
{
CookieContainer = cookieContainer;
_logger = logger;
UserAgent = userAgent;
SetupClient();
}
private void SetupClient()
{
_logger?.Information("Building http client...");
_httpClient?.Dispose();
var clientHandler = new HttpClientHandler
{
AutomaticDecompression = DecompressionMethods.Deflate | DecompressionMethods.GZip,
UseCookies = true,
CookieContainer = CookieContainer
};
_httpClient = new HttpClient(clientHandler);
}
private async Task GetStateAsync()
{
var state = await NetworkService.GetClientStateAsync(this);
if (!state.IsSuccess)
{
_logger?.Warning($"Error getting client state: {state.Error}");
return;
}
ClientState = state.Value;
_logger?.Information("Client state retrieved. With API key: {InnertubeApiKey}", ClientState.InnertubeApiKey);
}
}