Files
FictionArchive/FictionArchive.Service.NovelService/Services/SourceAdapters/Novelpia/NovelpiaAdapter.cs

246 lines
8.7 KiB
C#

using System.Net;
using System.Text;
using System.Text.Json;
using System.Text.RegularExpressions;
using FictionArchive.Common.Enums;
using FictionArchive.Service.NovelService.Constants;
using FictionArchive.Service.NovelService.Models.Enums;
using FictionArchive.Service.NovelService.Models.SourceAdapters;
namespace FictionArchive.Service.NovelService.Services.SourceAdapters.Novelpia;
public class NovelpiaAdapter : ISourceAdapter
{
private readonly HttpClient _httpClient;
private readonly ILogger _logger;
private const string NovelIdRegex = @"novelpia.com\/novel\/(\d+)";
private const string ChapterIdRegex = @"novelpia.com\/viewer\/(\d+)";
private const string EpisodeListEndpoint = "/proc/episode_list";
private const string ChapterDownloadEndpoint = "/proc/viewer_data/";
private const string SourceKey = "novelpia";
private const string SourceName = "Novelpia";
private const string SourceUrl = "https://novelpia.com";
private const string ChapterDownloadFailedMessage = "본인인증";
public NovelpiaAdapter(HttpClient httpClient, ILogger<NovelpiaAdapter> logger)
{
_httpClient = httpClient;
_logger = logger;
}
public SourceDescriptor SourceDescriptor
{
get
{
return new SourceDescriptor()
{
Name = SourceName,
Key = SourceKey,
Url = SourceUrl
};
}
}
public async Task<bool> CanProcessNovel(string url)
{
return Regex.IsMatch(url, @"https://novelpia.com/novel/(\d+)");
}
public async Task<NovelMetadata> GetMetadata(string novelUrl)
{
// PROCESS
// Get novelurl
// Title is <div class="ep-info-line epnew-novel-title">따먹히는 순애 금태양</div>
// Author is <a class="writer-name" href="/user/579482">구다수 </a>
// Chapters are gotten from the episode_list proc
uint novelId = uint.Parse(Regex.Match(novelUrl, NovelIdRegex).Groups[1].Value);
NovelMetadata novel = new NovelMetadata()
{
Url = novelUrl,
RawLanguage = Language.Kr,
ExternalId = novelId.ToString(),
SystemTags = new List<string>(),
SourceTags = new List<string>(),
Chapters = new List<ChapterMetadata>(),
SourceDescriptor = SourceDescriptor
};
// Novel metadata
var novelData = await _httpClient.GetStringAsync(novelUrl);
var novelNameMatch = Regex.Match(novelData, @"<div class=""ep-info-line epnew-novel-title"">(.+)<\/div>");
var authorMatch = Regex.Match(novelData, @"(?s)<a\s+class=""writer-name""\s+href=""([^""]+)"">\s*(.*?)\s*<\/a>");
var descriptionMatch = Regex.Match(novelData, @"(?s)<div\s+class=""synopsis"">\s*(.*?)\s*<\/div>");
novel.Name = novelNameMatch.Groups[1].Value;
novel.Description = descriptionMatch.Groups[1].Value;
novel.AuthorName = authorMatch.Groups[2].Value;
novel.AuthorUrl = authorMatch.Groups[2].Value;
// Cover image URL
var coverMatch = Regex.Match(novelData, @"href=""(//images\.novelpia\.com/imagebox/cover/.+?\.file)""");
string coverImageUrl = coverMatch.Groups[1].Value;
if (string.IsNullOrEmpty(coverImageUrl))
{
coverMatch = Regex.Match(novelData, @"src=""(//images\.novelpia\.com/imagebox/cover/.+?\.file)""");
coverImageUrl = coverMatch.Groups[1].Value;
}
novel.CoverImage = new ImageData()
{
Url = coverImageUrl,
Data = await GetImageData(coverImageUrl),
};
// Some badge info
var badgeSet = Regex.Match(novelData, @"(?s)<p\s+class=""in-badge"">(.*?)<\/p>");
var badgeMatches = Regex.Matches(badgeSet.Groups[1].Value, @"<span[^>]*>(.*?)<\/span>");
foreach (Match badge in badgeMatches)
{
var innerText = badge.Groups[1].Value;
if (innerText == "19")
{
novel.SystemTags.Add(SystemTags.Nsfw);
}
if (innerText == "완결")
{
novel.RawStatus = NovelStatus.Completed;
}
else
{
novel.RawStatus = NovelStatus.InProgress;
}
}
// Novel tags
HashSet<string> tags = new HashSet<string>();
var tagSetMatch = Regex.Match(novelData, @"(?s)<p\s+class=""writer-tag"">(.*?)<\/p>");
var tagMatches =
Regex.Matches(tagSetMatch.Groups[1].Value, @"<span[^>]*>#(.*?)<\/span>");
foreach (Match tagMatch in tagMatches)
{
var tagText = tagMatch.Groups[1].Value;
tags.Add(tagText);
}
foreach (string tag in tags)
{
novel.SourceTags.Add(tag);
}
// Chapters
uint page = 0;
List<ChapterMetadata> chapters = new List<ChapterMetadata>();
List<uint> seenChapterIds = new List<uint>();
uint chapterOrder = 0;
while (true)
{
await Task.Delay(500);
_logger.LogInformation("Next chapter batch");
var response = await _httpClient.PostAsync(EpisodeListEndpoint, new FormUrlEncodedContent(new Dictionary<string, string>
{
{"novel_no", novelId.ToString()},
{"sort", "DOWN"},
{"page", page.ToString()}
}));
var responseContent = await response.Content.ReadAsStringAsync();
var capturedChapters = Regex.Matches(responseContent, @"id=""bookmark_(\d+)""></i>(.+?)</b>");
if (seenChapterIds.Contains(uint.Parse(capturedChapters[0].Groups[1].Value)))
{
break;
}
foreach (Match chapter in capturedChapters)
{
string chapterId = chapter.Groups[1].Value;
string chapterName = chapter.Groups[2].Value;
chapters.Add(new ChapterMetadata
{
Revision = 0,
Order = chapterOrder,
Url = $"https://novelpia.com/viewer/{chapterId}",
Name = chapterName
});
seenChapterIds.Add(uint.Parse(chapterId));
chapterOrder++;
}
page++;
}
novel.Chapters = chapters;
return novel;
}
public async Task<ChapterFetchResult> GetRawChapter(string chapterUrl)
{
var chapterId = uint.Parse(Regex.Match(chapterUrl, ChapterIdRegex).Groups[1].Value);
var endpoint = ChapterDownloadEndpoint + chapterId;
var result = await _httpClient.PostAsync(endpoint, null);
var responseContent = await result.Content.ReadAsStringAsync();
if (string.IsNullOrEmpty(responseContent) || responseContent.Contains(ChapterDownloadFailedMessage))
{
throw new Exception();
}
var fetchResult = new ChapterFetchResult()
{
ImageData = new List<ImageData>()
};
StringBuilder builder = new StringBuilder();
using var doc = JsonDocument.Parse(responseContent);
JsonElement root = doc.RootElement;
// Get the "s" array
JsonElement sArray = root.GetProperty("s");
foreach (JsonElement item in sArray.EnumerateArray())
{
string text = item.GetProperty("text").GetString();
var imageMatch = Regex.Match(text, @"<img.+?src=\""(.+?)\"".+?>");
if (text.Contains("cover-wrapper"))
{
continue;
}
if (imageMatch.Success)
{
var url = imageMatch.Groups[1].Value;
fetchResult.ImageData.Add(new ImageData()
{
Url = url,
Data = await GetImageData(url)
});
}
if (text.Contains("opacity: 0"))
{
continue;
}
builder.Append(WebUtility.HtmlDecode(text));
}
fetchResult.Text = builder.ToString();
return fetchResult;
}
private async Task<byte[]> GetImageData(string url)
{
if (!url.StartsWith("http"))
{
url = "https:" + url;
}
var image = await _httpClient.GetAsync(url);
if (!image.IsSuccessStatusCode)
{
_logger.LogError("Attempting to fetch image with url {imgUrl} returned status code {code}.", url, image.StatusCode);
throw new Exception();
}
return await image.Content.ReadAsByteArrayAsync();
}
}