Syosetu single chapter novel support, closes #1
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
2022-08-03 14:33:08 -04:00
parent 0903278f14
commit 13b7306ca2

View File

@@ -29,7 +29,7 @@ public class SyosetuScraper : AbstractScraper
protected override string? DatePostedPattern => @"//th[text()='掲載日']/following-sibling::td";
protected override string? DateUpdatedPattern => @"//th[contains(text(),'掲載日')]/following-sibling::td";
protected override string? DateUpdatedPattern => @"//th[contains(text(),'掲載日')]/following-sibling::td | //th[contains(text(),'最終更新日')]/following-sibling::td";
private async Task<HtmlDocument> GetInfoPage(string baseUrl, string novelUrl)
{
@@ -41,10 +41,25 @@ public class SyosetuScraper : AbstractScraper
}
protected override List<Chapter> GetChapters(HtmlDocument document, string baseUrl, string novelUrl)
protected List<Chapter> GetChapters(HtmlDocument document, string baseUrl, string novelUrl, string novelName, DateTime novelPostedDate, DateTime novelUpdatedDate)
{
string dateUpdatedRegex = @"\d\d\d\d\/\d\d\/\d\d \d\d:\d\d";
var nodes = document.DocumentNode.SelectNodes(ChapterUrlPattern);
// single chapter syosetu novel
if (nodes == null)
{
return new List<Chapter>
{
new Chapter
{
ChapterNumber = 1,
Name = novelName,
Url = novelUrl,
DatePosted = novelPostedDate,
DateUpdated = novelUpdatedDate
}
};
}
return nodes.Select((node,i) =>
{
var datePostedNode = node.ParentNode.SelectSingleNode(ChapterPostedPattern);
@@ -93,7 +108,7 @@ public class SyosetuScraper : AbstractScraper
protected override DateTime GetLastUpdatedDate(HtmlDocument document, string baseUrl, string novelUrl)
{
return DateTime.Parse(document.DocumentNode.SelectNodes(DateUpdatedPattern)[1].InnerText).ToUniversalTime();
return DateTime.Parse(document.DocumentNode.SelectNodes(DateUpdatedPattern).Last().InnerText).ToUniversalTime();
}
protected override List<Tag> GetTags(HtmlDocument document, string baseUrl, string novelUrl)
@@ -148,24 +163,30 @@ public class SyosetuScraper : AbstractScraper
throw new Exception("Error parsing document");
}
var novelName = GetNovelTitle(baseDoc,
baseUrl,
novelUrl);
var lastUpdated = GetLastUpdatedDate(novelInfoPage, baseUrl, novelUrl);
var datePosted = GetPostedDate(novelInfoPage,
baseUrl,
novelUrl);
return new Novel
{
Title = GetNovelTitle(baseDoc,
baseUrl,
novelUrl),
Title = novelName,
Author = GetAuthor(baseDoc,
baseUrl,
novelUrl),
Chapters = GetChapters(baseDoc,
baseUrl,
novelUrl),
LastUpdated = GetLastUpdatedDate(novelInfoPage, baseUrl, novelUrl),
novelUrl,
novelName,
datePosted,
lastUpdated),
LastUpdated = lastUpdated,
Tags = GetTags(novelInfoPage,
baseUrl,
novelUrl),
DatePosted = GetPostedDate(novelInfoPage,
baseUrl,
novelUrl),
DatePosted = datePosted,
Url = novelUrl
};
}