246 lines
8.7 KiB
C#
246 lines
8.7 KiB
C#
using System.Net;
|
|
using System.Text;
|
|
using System.Text.Json;
|
|
using System.Text.RegularExpressions;
|
|
using FictionArchive.Common.Enums;
|
|
using FictionArchive.Service.NovelService.Constants;
|
|
using FictionArchive.Service.NovelService.Models.Enums;
|
|
using FictionArchive.Service.NovelService.Models.SourceAdapters;
|
|
|
|
namespace FictionArchive.Service.NovelService.Services.SourceAdapters.Novelpia;
|
|
|
|
public class NovelpiaAdapter : ISourceAdapter
|
|
{
|
|
private readonly HttpClient _httpClient;
|
|
private readonly ILogger _logger;
|
|
|
|
private const string NovelIdRegex = @"novelpia.com\/novel\/(\d+)";
|
|
private const string ChapterIdRegex = @"novelpia.com\/viewer\/(\d+)";
|
|
private const string EpisodeListEndpoint = "/proc/episode_list";
|
|
private const string ChapterDownloadEndpoint = "/proc/viewer_data/";
|
|
|
|
private const string SourceKey = "novelpia";
|
|
private const string SourceName = "Novelpia";
|
|
private const string SourceUrl = "https://novelpia.com";
|
|
|
|
private const string ChapterDownloadFailedMessage = "본인인증";
|
|
|
|
public NovelpiaAdapter(HttpClient httpClient, ILogger<NovelpiaAdapter> logger)
|
|
{
|
|
_httpClient = httpClient;
|
|
_logger = logger;
|
|
}
|
|
|
|
public SourceDescriptor SourceDescriptor
|
|
{
|
|
get
|
|
{
|
|
return new SourceDescriptor()
|
|
{
|
|
Name = SourceName,
|
|
Key = SourceKey,
|
|
Url = SourceUrl
|
|
};
|
|
}
|
|
}
|
|
|
|
public async Task<bool> CanProcessNovel(string url)
|
|
{
|
|
return Regex.IsMatch(url, @"https://novelpia.com/novel/(\d+)");
|
|
}
|
|
|
|
public async Task<NovelMetadata> GetMetadata(string novelUrl)
|
|
{
|
|
// PROCESS
|
|
// Get novelurl
|
|
// Title is <div class="ep-info-line epnew-novel-title">따먹히는 순애 금태양</div>
|
|
// Author is <a class="writer-name" href="/user/579482">구다수 </a>
|
|
// Chapters are gotten from the episode_list proc
|
|
|
|
uint novelId = uint.Parse(Regex.Match(novelUrl, NovelIdRegex).Groups[1].Value);
|
|
|
|
NovelMetadata novel = new NovelMetadata()
|
|
{
|
|
Url = novelUrl,
|
|
RawLanguage = Language.Kr,
|
|
ExternalId = novelId.ToString(),
|
|
SystemTags = new List<string>(),
|
|
SourceTags = new List<string>(),
|
|
Chapters = new List<ChapterMetadata>(),
|
|
SourceDescriptor = SourceDescriptor
|
|
};
|
|
|
|
// Novel metadata
|
|
var novelData = await _httpClient.GetStringAsync(novelUrl);
|
|
var novelNameMatch = Regex.Match(novelData, @"<div class=""ep-info-line epnew-novel-title"">(.+)<\/div>");
|
|
var authorMatch = Regex.Match(novelData, @"(?s)<a\s+class=""writer-name""\s+href=""([^""]+)"">\s*(.*?)\s*<\/a>");
|
|
var descriptionMatch = Regex.Match(novelData, @"(?s)<div\s+class=""synopsis"">\s*(.*?)\s*<\/div>");
|
|
|
|
novel.Name = novelNameMatch.Groups[1].Value;
|
|
novel.Description = descriptionMatch.Groups[1].Value;
|
|
novel.AuthorName = authorMatch.Groups[2].Value;
|
|
novel.AuthorUrl = authorMatch.Groups[2].Value;
|
|
|
|
// Cover image URL
|
|
var coverMatch = Regex.Match(novelData, @"href=""(//images\.novelpia\.com/imagebox/cover/.+?\.file)""");
|
|
string coverImageUrl = coverMatch.Groups[1].Value;
|
|
if (string.IsNullOrEmpty(coverImageUrl))
|
|
{
|
|
coverMatch = Regex.Match(novelData, @"src=""(//images\.novelpia\.com/imagebox/cover/.+?\.file)""");
|
|
coverImageUrl = coverMatch.Groups[1].Value;
|
|
}
|
|
|
|
novel.CoverImage = new ImageData()
|
|
{
|
|
Url = coverImageUrl,
|
|
Data = await GetImageData(coverImageUrl),
|
|
};
|
|
|
|
// Some badge info
|
|
var badgeSet = Regex.Match(novelData, @"(?s)<p\s+class=""in-badge"">(.*?)<\/p>");
|
|
var badgeMatches = Regex.Matches(badgeSet.Groups[1].Value, @"<span[^>]*>(.*?)<\/span>");
|
|
foreach (Match badge in badgeMatches)
|
|
{
|
|
var innerText = badge.Groups[1].Value;
|
|
if (innerText == "19")
|
|
{
|
|
novel.SystemTags.Add(SystemTags.Nsfw);
|
|
}
|
|
|
|
if (innerText == "완결")
|
|
{
|
|
novel.RawStatus = NovelStatus.Completed;
|
|
}
|
|
else
|
|
{
|
|
novel.RawStatus = NovelStatus.InProgress;
|
|
}
|
|
}
|
|
|
|
// Novel tags
|
|
HashSet<string> tags = new HashSet<string>();
|
|
var tagSetMatch = Regex.Match(novelData, @"(?s)<p\s+class=""writer-tag"">(.*?)<\/p>");
|
|
var tagMatches =
|
|
Regex.Matches(tagSetMatch.Groups[1].Value, @"<span[^>]*>#(.*?)<\/span>");
|
|
foreach (Match tagMatch in tagMatches)
|
|
{
|
|
var tagText = tagMatch.Groups[1].Value;
|
|
tags.Add(tagText);
|
|
}
|
|
|
|
foreach (string tag in tags)
|
|
{
|
|
novel.SourceTags.Add(tag);
|
|
}
|
|
|
|
// Chapters
|
|
uint page = 0;
|
|
List<ChapterMetadata> chapters = new List<ChapterMetadata>();
|
|
List<uint> seenChapterIds = new List<uint>();
|
|
uint chapterOrder = 0;
|
|
while (true)
|
|
{
|
|
await Task.Delay(500);
|
|
_logger.LogInformation("Next chapter batch");
|
|
var response = await _httpClient.PostAsync(EpisodeListEndpoint, new FormUrlEncodedContent(new Dictionary<string, string>
|
|
{
|
|
{"novel_no", novelId.ToString()},
|
|
{"sort", "DOWN"},
|
|
{"page", page.ToString()}
|
|
}));
|
|
var responseContent = await response.Content.ReadAsStringAsync();
|
|
var capturedChapters = Regex.Matches(responseContent, @"id=""bookmark_(\d+)""></i>(.+?)</b>");
|
|
if (seenChapterIds.Contains(uint.Parse(capturedChapters[0].Groups[1].Value)))
|
|
{
|
|
break;
|
|
}
|
|
foreach (Match chapter in capturedChapters)
|
|
{
|
|
string chapterId = chapter.Groups[1].Value;
|
|
string chapterName = chapter.Groups[2].Value;
|
|
chapters.Add(new ChapterMetadata
|
|
{
|
|
Revision = 0,
|
|
Order = chapterOrder,
|
|
Url = $"https://novelpia.com/viewer/{chapterId}",
|
|
Name = chapterName
|
|
});
|
|
seenChapterIds.Add(uint.Parse(chapterId));
|
|
chapterOrder++;
|
|
}
|
|
page++;
|
|
}
|
|
novel.Chapters = chapters;
|
|
|
|
return novel;
|
|
}
|
|
|
|
public async Task<ChapterFetchResult> GetRawChapter(string chapterUrl)
|
|
{
|
|
var chapterId = uint.Parse(Regex.Match(chapterUrl, ChapterIdRegex).Groups[1].Value);
|
|
var endpoint = ChapterDownloadEndpoint + chapterId;
|
|
var result = await _httpClient.PostAsync(endpoint, null);
|
|
var responseContent = await result.Content.ReadAsStringAsync();
|
|
|
|
if (string.IsNullOrEmpty(responseContent) || responseContent.Contains(ChapterDownloadFailedMessage))
|
|
{
|
|
throw new Exception();
|
|
}
|
|
|
|
var fetchResult = new ChapterFetchResult()
|
|
{
|
|
ImageData = new List<ImageData>()
|
|
};
|
|
|
|
StringBuilder builder = new StringBuilder();
|
|
using var doc = JsonDocument.Parse(responseContent);
|
|
JsonElement root = doc.RootElement;
|
|
|
|
// Get the "s" array
|
|
JsonElement sArray = root.GetProperty("s");
|
|
|
|
foreach (JsonElement item in sArray.EnumerateArray())
|
|
{
|
|
string text = item.GetProperty("text").GetString();
|
|
var imageMatch = Regex.Match(text, @"<img.+?src=\""(.+?)\"".+?>");
|
|
if (text.Contains("cover-wrapper"))
|
|
{
|
|
continue;
|
|
}
|
|
if (imageMatch.Success)
|
|
{
|
|
var url = imageMatch.Groups[1].Value;
|
|
fetchResult.ImageData.Add(new ImageData()
|
|
{
|
|
Url = url,
|
|
Data = await GetImageData(url)
|
|
});
|
|
}
|
|
if (text.Contains("opacity: 0"))
|
|
{
|
|
continue;
|
|
}
|
|
|
|
builder.Append(WebUtility.HtmlDecode(text));
|
|
}
|
|
fetchResult.Text = builder.ToString();
|
|
|
|
return fetchResult;
|
|
}
|
|
|
|
private async Task<byte[]> GetImageData(string url)
|
|
{
|
|
if (!url.StartsWith("http"))
|
|
{
|
|
url = "https:" + url;
|
|
}
|
|
|
|
var image = await _httpClient.GetAsync(url);
|
|
if (!image.IsSuccessStatusCode)
|
|
{
|
|
_logger.LogError("Attempting to fetch image with url {imgUrl} returned status code {code}.", url, image.StatusCode);
|
|
throw new Exception();
|
|
}
|
|
return await image.Content.ReadAsByteArrayAsync();
|
|
}
|
|
} |