using System.Net; using System.Text; using System.Text.Json; using System.Text.RegularExpressions; using FictionArchive.Common.Enums; using FictionArchive.Service.NovelService.Constants; using FictionArchive.Service.NovelService.Models.Enums; using FictionArchive.Service.NovelService.Models.SourceAdapters; namespace FictionArchive.Service.NovelService.Services.SourceAdapters.Novelpia; public class NovelpiaAdapter : ISourceAdapter { private readonly HttpClient _httpClient; private readonly ILogger _logger; private const string NovelIdRegex = @"novelpia.com\/novel\/(\d+)"; private const string ChapterIdRegex = @"novelpia.com\/viewer\/(\d+)"; private const string EpisodeListEndpoint = "/proc/episode_list"; private const string ChapterDownloadEndpoint = "/proc/viewer_data/"; private const string SourceKey = "novelpia"; private const string SourceName = "Novelpia"; private const string SourceUrl = "https://novelpia.com"; private const string ChapterDownloadFailedMessage = "본인인증"; public NovelpiaAdapter(HttpClient httpClient, ILogger logger) { _httpClient = httpClient; _logger = logger; } public SourceDescriptor SourceDescriptor { get { return new SourceDescriptor() { Name = SourceName, Key = SourceKey, Url = SourceUrl }; } } public async Task CanProcessNovel(string url) { return Regex.IsMatch(url, @"https://novelpia.com/novel/(\d+)"); } public async Task GetMetadata(string novelUrl) { // PROCESS // Get novelurl // Title is
따먹히는 순애 금태양
// Author is 구다수 // Chapters are gotten from the episode_list proc uint novelId = uint.Parse(Regex.Match(novelUrl, NovelIdRegex).Groups[1].Value); NovelMetadata novel = new NovelMetadata() { Url = novelUrl, RawLanguage = Language.Kr, ExternalId = novelId.ToString(), SystemTags = new List(), SourceTags = new List(), Chapters = new List(), SourceDescriptor = SourceDescriptor }; // Novel metadata var novelData = await _httpClient.GetStringAsync(novelUrl); var novelNameMatch = Regex.Match(novelData, @"
(.+)<\/div>"); var authorMatch = Regex.Match(novelData, @"(?s)\s*(.*?)\s*<\/a>"); var descriptionMatch = Regex.Match(novelData, @"(?s)\s*(.*?)\s*<\/div>"); novel.Name = novelNameMatch.Groups[1].Value; novel.Description = descriptionMatch.Groups[1].Value; novel.AuthorName = authorMatch.Groups[2].Value; novel.AuthorUrl = authorMatch.Groups[2].Value; // Cover image URL var coverMatch = Regex.Match(novelData, @"href=""(//images\.novelpia\.com/imagebox/cover/.+?\.file)"""); string coverImageUrl = coverMatch.Groups[1].Value; if (string.IsNullOrEmpty(coverImageUrl)) { coverMatch = Regex.Match(novelData, @"src=""(//images\.novelpia\.com/imagebox/cover/.+?\.file)"""); coverImageUrl = coverMatch.Groups[1].Value; } novel.CoverImage = new ImageData() { Url = coverImageUrl, Data = await GetImageData(coverImageUrl), }; // Some badge info var badgeSet = Regex.Match(novelData, @"(?s)(.*?)<\/p>"); var badgeMatches = Regex.Matches(badgeSet.Groups[1].Value, @"]*>(.*?)<\/span>"); foreach (Match badge in badgeMatches) { var innerText = badge.Groups[1].Value; if (innerText == "19") { novel.SystemTags.Add(SystemTags.Nsfw); } if (innerText == "완결") { novel.RawStatus = NovelStatus.Completed; } else { novel.RawStatus = NovelStatus.InProgress; } } // Novel tags HashSet tags = new HashSet(); var tagSetMatch = Regex.Match(novelData, @"(?s)(.*?)<\/p>"); var tagMatches = Regex.Matches(tagSetMatch.Groups[1].Value, @"]*>#(.*?)<\/span>"); foreach (Match tagMatch in tagMatches) { var tagText = tagMatch.Groups[1].Value; tags.Add(tagText); } foreach (string tag in tags) { novel.SourceTags.Add(tag); } // Chapters uint page = 0; List chapters = new List(); List seenChapterIds = new List(); uint chapterOrder = 0; while (true) { await Task.Delay(500); _logger.LogInformation("Next chapter batch"); var response = await _httpClient.PostAsync(EpisodeListEndpoint, new FormUrlEncodedContent(new Dictionary { {"novel_no", novelId.ToString()}, {"sort", "DOWN"}, {"page", page.ToString()} })); var responseContent = await response.Content.ReadAsStringAsync(); var capturedChapters = Regex.Matches(responseContent, @"id=""bookmark_(\d+)"">(.+?)"); if (seenChapterIds.Contains(uint.Parse(capturedChapters[0].Groups[1].Value))) { break; } foreach (Match chapter in capturedChapters) { string chapterId = chapter.Groups[1].Value; string chapterName = chapter.Groups[2].Value; chapters.Add(new ChapterMetadata { Revision = 0, Order = chapterOrder, Url = $"https://novelpia.com/viewer/{chapterId}", Name = chapterName }); seenChapterIds.Add(uint.Parse(chapterId)); chapterOrder++; } page++; } novel.Chapters = chapters; return novel; } public async Task GetRawChapter(string chapterUrl) { var chapterId = uint.Parse(Regex.Match(chapterUrl, ChapterIdRegex).Groups[1].Value); var endpoint = ChapterDownloadEndpoint + chapterId; var result = await _httpClient.PostAsync(endpoint, null); var responseContent = await result.Content.ReadAsStringAsync(); if (string.IsNullOrEmpty(responseContent) || responseContent.Contains(ChapterDownloadFailedMessage)) { throw new Exception(); } var fetchResult = new ChapterFetchResult() { ImageData = new List() }; StringBuilder builder = new StringBuilder(); using var doc = JsonDocument.Parse(responseContent); JsonElement root = doc.RootElement; // Get the "s" array JsonElement sArray = root.GetProperty("s"); foreach (JsonElement item in sArray.EnumerateArray()) { string text = item.GetProperty("text").GetString(); var imageMatch = Regex.Match(text, @""); if (text.Contains("cover-wrapper")) { continue; } if (imageMatch.Success) { var url = imageMatch.Groups[1].Value; fetchResult.ImageData.Add(new ImageData() { Url = url, Data = await GetImageData(url) }); } if (text.Contains("opacity: 0")) { continue; } builder.Append(WebUtility.HtmlDecode(text)); } fetchResult.Text = builder.ToString(); return fetchResult; } private async Task GetImageData(string url) { if (!url.StartsWith("http")) { url = "https:" + url; } var image = await _httpClient.GetAsync(url); if (!image.IsSuccessStatusCode) { _logger.LogError("Attempting to fetch image with url {imgUrl} returned status code {code}.", url, image.StatusCode); throw new Exception(); } return await image.Content.ReadAsByteArrayAsync(); } }