Initial commit
This commit is contained in:
@@ -0,0 +1,200 @@
|
||||
using System.Net;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.RegularExpressions;
|
||||
using FictionArchive.Common.Enums;
|
||||
using FictionArchive.Service.NovelService.Constants;
|
||||
using FictionArchive.Service.NovelService.Models.Enums;
|
||||
using FictionArchive.Service.NovelService.Models.SourceAdapters;
|
||||
|
||||
namespace FictionArchive.Service.NovelService.Services.SourceAdapters.Novelpia;
|
||||
|
||||
public class NovelpiaAdapter : ISourceAdapter
|
||||
{
|
||||
private readonly HttpClient _httpClient;
|
||||
private readonly ILogger _logger;
|
||||
|
||||
private const string NovelIdRegex = @"novelpia.com\/novel\/(\d+)";
|
||||
private const string ChapterIdRegex = @"novelpia.com\/viewer\/(\d+)";
|
||||
private const string EpisodeListEndpoint = "/proc/episode_list";
|
||||
private const string ChapterDownloadEndpoint = "/proc/viewer_data/";
|
||||
|
||||
private const string SourceKey = "novelpia";
|
||||
private const string SourceName = "Novelpia";
|
||||
private const string SourceUrl = "https://novelpia.com";
|
||||
|
||||
private const string ChapterDownloadFailedMessage = "본인인증";
|
||||
|
||||
public NovelpiaAdapter(HttpClient httpClient, ILogger<NovelpiaAdapter> logger)
|
||||
{
|
||||
_httpClient = httpClient;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
public SourceDescriptor SourceDescriptor
|
||||
{
|
||||
get
|
||||
{
|
||||
return new SourceDescriptor()
|
||||
{
|
||||
Name = SourceName,
|
||||
Key = SourceKey,
|
||||
Url = SourceUrl
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
public async Task<bool> CanProcessNovel(string url)
|
||||
{
|
||||
return Regex.IsMatch(url, @"https://novelpia.com/novel/(\d+)");
|
||||
}
|
||||
|
||||
public async Task<NovelMetadata> GetMetadata(string novelUrl)
|
||||
{
|
||||
// PROCESS
|
||||
// Get novelurl
|
||||
// Title is <div class="ep-info-line epnew-novel-title">따먹히는 순애 금태양</div>
|
||||
// Author is <a class="writer-name" href="/user/579482">구다수 </a>
|
||||
// Chapters are gotten from the episode_list proc
|
||||
|
||||
uint novelId = uint.Parse(Regex.Match(novelUrl, NovelIdRegex).Groups[1].Value);
|
||||
|
||||
NovelMetadata novel = new NovelMetadata()
|
||||
{
|
||||
Url = novelUrl,
|
||||
RawLanguage = Language.Kr,
|
||||
ExternalId = novelId.ToString(),
|
||||
SystemTags = new List<string>(),
|
||||
SourceTags = new List<string>(),
|
||||
Chapters = new List<ChapterMetadata>(),
|
||||
SourceDescriptor = SourceDescriptor
|
||||
};
|
||||
|
||||
// Novel metadata
|
||||
var novelData = await _httpClient.GetStringAsync(novelUrl);
|
||||
var novelNameMatch = Regex.Match(novelData, @"<div class=""ep-info-line epnew-novel-title"">(.+)<\/div>");
|
||||
var authorMatch = Regex.Match(novelData, @"(?s)<a\s+class=""writer-name""\s+href=""([^""]+)"">\s*(.*?)\s*<\/a>");
|
||||
var descriptionMatch = Regex.Match(novelData, @"(?s)<div\s+class=""synopsis"">\s*(.*?)\s*<\/div>");
|
||||
|
||||
novel.Name = novelNameMatch.Groups[1].Value;
|
||||
novel.Description = descriptionMatch.Groups[1].Value;
|
||||
novel.AuthorName = authorMatch.Groups[2].Value;
|
||||
novel.AuthorUrl = authorMatch.Groups[2].Value;
|
||||
|
||||
// Some badge info
|
||||
var badgeSet = Regex.Match(novelData, @"(?s)<p\s+class=""in-badge"">(.*?)<\/p>");
|
||||
var badgeMatches = Regex.Matches(badgeSet.Groups[1].Value, @"<span[^>]*>(.*?)<\/span>");
|
||||
foreach (Match badge in badgeMatches)
|
||||
{
|
||||
var innerText = badge.Groups[1].Value;
|
||||
if (innerText == "19")
|
||||
{
|
||||
novel.SystemTags.Add(SystemTags.Nsfw);
|
||||
}
|
||||
|
||||
if (innerText == "완결")
|
||||
{
|
||||
novel.RawStatus = NovelStatus.Completed;
|
||||
}
|
||||
else
|
||||
{
|
||||
novel.RawStatus = NovelStatus.InProgress;
|
||||
}
|
||||
}
|
||||
|
||||
// Novel tags
|
||||
HashSet<string> tags = new HashSet<string>();
|
||||
var tagSetMatch = Regex.Match(novelData, @"(?s)<p\s+class=""writer-tag"">(.*?)<\/p>");
|
||||
var tagMatches =
|
||||
Regex.Matches(tagSetMatch.Groups[1].Value, @"<span[^>]*>#(.*?)<\/span>");
|
||||
foreach (Match tagMatch in tagMatches)
|
||||
{
|
||||
var tagText = tagMatch.Groups[1].Value;
|
||||
tags.Add(tagText);
|
||||
}
|
||||
|
||||
foreach (string tag in tags)
|
||||
{
|
||||
novel.SourceTags.Add(tag);
|
||||
}
|
||||
|
||||
// Chapters
|
||||
uint page = 0;
|
||||
List<ChapterMetadata> chapters = new List<ChapterMetadata>();
|
||||
List<uint> seenChapterIds = new List<uint>();
|
||||
uint chapterOrder = 0;
|
||||
while (true)
|
||||
{
|
||||
await Task.Delay(500);
|
||||
_logger.LogInformation("Next chapter batch");
|
||||
var response = await _httpClient.PostAsync(EpisodeListEndpoint, new FormUrlEncodedContent(new Dictionary<string, string>
|
||||
{
|
||||
{"novel_no", novelId.ToString()},
|
||||
{"sort", "DOWN"},
|
||||
{"page", page.ToString()}
|
||||
}));
|
||||
var responseContent = await response.Content.ReadAsStringAsync();
|
||||
var capturedChapters = Regex.Matches(responseContent, @"id=""bookmark_(\d+)""></i>(.+?)</b>");
|
||||
if (seenChapterIds.Contains(uint.Parse(capturedChapters[0].Groups[1].Value)))
|
||||
{
|
||||
break;
|
||||
}
|
||||
foreach (Match chapter in capturedChapters)
|
||||
{
|
||||
string chapterId = chapter.Groups[1].Value;
|
||||
string chapterName = chapter.Groups[2].Value;
|
||||
chapters.Add(new ChapterMetadata
|
||||
{
|
||||
Revision = 0,
|
||||
Order = chapterOrder,
|
||||
Url = $"https://novelpia.com/viewer/{chapterId}",
|
||||
Name = chapterName
|
||||
});
|
||||
seenChapterIds.Add(uint.Parse(chapterId));
|
||||
chapterOrder++;
|
||||
}
|
||||
page++;
|
||||
}
|
||||
novel.Chapters = chapters;
|
||||
|
||||
return novel;
|
||||
}
|
||||
|
||||
public async Task<string> GetRawChapter(string chapterUrl)
|
||||
{
|
||||
var chapterId = uint.Parse(Regex.Match(chapterUrl, ChapterIdRegex).Groups[1].Value);
|
||||
var endpoint = ChapterDownloadEndpoint + chapterId;
|
||||
var result = await _httpClient.PostAsync(endpoint, null);
|
||||
var responseContent = await result.Content.ReadAsStringAsync();
|
||||
|
||||
if (string.IsNullOrEmpty(responseContent) || responseContent.Contains(ChapterDownloadFailedMessage))
|
||||
{
|
||||
throw new Exception();
|
||||
}
|
||||
|
||||
StringBuilder builder = new StringBuilder();
|
||||
using var doc = JsonDocument.Parse(responseContent);
|
||||
JsonElement root = doc.RootElement;
|
||||
|
||||
// Get the "s" array
|
||||
JsonElement sArray = root.GetProperty("s");
|
||||
|
||||
foreach (JsonElement item in sArray.EnumerateArray())
|
||||
{
|
||||
string text = item.GetProperty("text").GetString();
|
||||
if (text.Contains("cover-wrapper"))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
if (text.Contains("opacity: 0"))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
builder.Append(WebUtility.HtmlDecode(text));
|
||||
}
|
||||
|
||||
return builder.ToString();
|
||||
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user