Syosetu single chapter novel support, closes #1
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
2022-08-03 14:33:08 -04:00
parent 0903278f14
commit 13b7306ca2

View File

@@ -29,7 +29,7 @@ public class SyosetuScraper : AbstractScraper
protected override string? DatePostedPattern => @"//th[text()='掲載日']/following-sibling::td"; protected override string? DatePostedPattern => @"//th[text()='掲載日']/following-sibling::td";
protected override string? DateUpdatedPattern => @"//th[contains(text(),'掲載日')]/following-sibling::td"; protected override string? DateUpdatedPattern => @"//th[contains(text(),'掲載日')]/following-sibling::td | //th[contains(text(),'最終更新日')]/following-sibling::td";
private async Task<HtmlDocument> GetInfoPage(string baseUrl, string novelUrl) private async Task<HtmlDocument> GetInfoPage(string baseUrl, string novelUrl)
{ {
@@ -41,10 +41,25 @@ public class SyosetuScraper : AbstractScraper
} }
protected override List<Chapter> GetChapters(HtmlDocument document, string baseUrl, string novelUrl) protected List<Chapter> GetChapters(HtmlDocument document, string baseUrl, string novelUrl, string novelName, DateTime novelPostedDate, DateTime novelUpdatedDate)
{ {
string dateUpdatedRegex = @"\d\d\d\d\/\d\d\/\d\d \d\d:\d\d"; string dateUpdatedRegex = @"\d\d\d\d\/\d\d\/\d\d \d\d:\d\d";
var nodes = document.DocumentNode.SelectNodes(ChapterUrlPattern); var nodes = document.DocumentNode.SelectNodes(ChapterUrlPattern);
// single chapter syosetu novel
if (nodes == null)
{
return new List<Chapter>
{
new Chapter
{
ChapterNumber = 1,
Name = novelName,
Url = novelUrl,
DatePosted = novelPostedDate,
DateUpdated = novelUpdatedDate
}
};
}
return nodes.Select((node,i) => return nodes.Select((node,i) =>
{ {
var datePostedNode = node.ParentNode.SelectSingleNode(ChapterPostedPattern); var datePostedNode = node.ParentNode.SelectSingleNode(ChapterPostedPattern);
@@ -93,7 +108,7 @@ public class SyosetuScraper : AbstractScraper
protected override DateTime GetLastUpdatedDate(HtmlDocument document, string baseUrl, string novelUrl) protected override DateTime GetLastUpdatedDate(HtmlDocument document, string baseUrl, string novelUrl)
{ {
return DateTime.Parse(document.DocumentNode.SelectNodes(DateUpdatedPattern)[1].InnerText).ToUniversalTime(); return DateTime.Parse(document.DocumentNode.SelectNodes(DateUpdatedPattern).Last().InnerText).ToUniversalTime();
} }
protected override List<Tag> GetTags(HtmlDocument document, string baseUrl, string novelUrl) protected override List<Tag> GetTags(HtmlDocument document, string baseUrl, string novelUrl)
@@ -148,24 +163,30 @@ public class SyosetuScraper : AbstractScraper
throw new Exception("Error parsing document"); throw new Exception("Error parsing document");
} }
var novelName = GetNovelTitle(baseDoc,
baseUrl,
novelUrl);
var lastUpdated = GetLastUpdatedDate(novelInfoPage, baseUrl, novelUrl);
var datePosted = GetPostedDate(novelInfoPage,
baseUrl,
novelUrl);
return new Novel return new Novel
{ {
Title = GetNovelTitle(baseDoc, Title = novelName,
baseUrl,
novelUrl),
Author = GetAuthor(baseDoc, Author = GetAuthor(baseDoc,
baseUrl, baseUrl,
novelUrl), novelUrl),
Chapters = GetChapters(baseDoc, Chapters = GetChapters(baseDoc,
baseUrl, baseUrl,
novelUrl), novelUrl,
LastUpdated = GetLastUpdatedDate(novelInfoPage, baseUrl, novelUrl), novelName,
datePosted,
lastUpdated),
LastUpdated = lastUpdated,
Tags = GetTags(novelInfoPage, Tags = GetTags(novelInfoPage,
baseUrl, baseUrl,
novelUrl), novelUrl),
DatePosted = GetPostedDate(novelInfoPage, DatePosted = datePosted,
baseUrl,
novelUrl),
Url = novelUrl Url = novelUrl
}; };
} }