Syosetu single chapter novel support, closes #1
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
This commit is contained in:
@@ -29,7 +29,7 @@ public class SyosetuScraper : AbstractScraper
|
||||
|
||||
protected override string? DatePostedPattern => @"//th[text()='掲載日']/following-sibling::td";
|
||||
|
||||
protected override string? DateUpdatedPattern => @"//th[contains(text(),'掲載日')]/following-sibling::td";
|
||||
protected override string? DateUpdatedPattern => @"//th[contains(text(),'掲載日')]/following-sibling::td | //th[contains(text(),'最終更新日')]/following-sibling::td";
|
||||
|
||||
private async Task<HtmlDocument> GetInfoPage(string baseUrl, string novelUrl)
|
||||
{
|
||||
@@ -41,10 +41,25 @@ public class SyosetuScraper : AbstractScraper
|
||||
|
||||
}
|
||||
|
||||
protected override List<Chapter> GetChapters(HtmlDocument document, string baseUrl, string novelUrl)
|
||||
protected List<Chapter> GetChapters(HtmlDocument document, string baseUrl, string novelUrl, string novelName, DateTime novelPostedDate, DateTime novelUpdatedDate)
|
||||
{
|
||||
string dateUpdatedRegex = @"\d\d\d\d\/\d\d\/\d\d \d\d:\d\d";
|
||||
var nodes = document.DocumentNode.SelectNodes(ChapterUrlPattern);
|
||||
// single chapter syosetu novel
|
||||
if (nodes == null)
|
||||
{
|
||||
return new List<Chapter>
|
||||
{
|
||||
new Chapter
|
||||
{
|
||||
ChapterNumber = 1,
|
||||
Name = novelName,
|
||||
Url = novelUrl,
|
||||
DatePosted = novelPostedDate,
|
||||
DateUpdated = novelUpdatedDate
|
||||
}
|
||||
};
|
||||
}
|
||||
return nodes.Select((node,i) =>
|
||||
{
|
||||
var datePostedNode = node.ParentNode.SelectSingleNode(ChapterPostedPattern);
|
||||
@@ -93,7 +108,7 @@ public class SyosetuScraper : AbstractScraper
|
||||
|
||||
protected override DateTime GetLastUpdatedDate(HtmlDocument document, string baseUrl, string novelUrl)
|
||||
{
|
||||
return DateTime.Parse(document.DocumentNode.SelectNodes(DateUpdatedPattern)[1].InnerText).ToUniversalTime();
|
||||
return DateTime.Parse(document.DocumentNode.SelectNodes(DateUpdatedPattern).Last().InnerText).ToUniversalTime();
|
||||
}
|
||||
|
||||
protected override List<Tag> GetTags(HtmlDocument document, string baseUrl, string novelUrl)
|
||||
@@ -148,24 +163,30 @@ public class SyosetuScraper : AbstractScraper
|
||||
throw new Exception("Error parsing document");
|
||||
}
|
||||
|
||||
var novelName = GetNovelTitle(baseDoc,
|
||||
baseUrl,
|
||||
novelUrl);
|
||||
var lastUpdated = GetLastUpdatedDate(novelInfoPage, baseUrl, novelUrl);
|
||||
var datePosted = GetPostedDate(novelInfoPage,
|
||||
baseUrl,
|
||||
novelUrl);
|
||||
return new Novel
|
||||
{
|
||||
Title = GetNovelTitle(baseDoc,
|
||||
baseUrl,
|
||||
novelUrl),
|
||||
Title = novelName,
|
||||
Author = GetAuthor(baseDoc,
|
||||
baseUrl,
|
||||
novelUrl),
|
||||
Chapters = GetChapters(baseDoc,
|
||||
baseUrl,
|
||||
novelUrl),
|
||||
LastUpdated = GetLastUpdatedDate(novelInfoPage, baseUrl, novelUrl),
|
||||
novelUrl,
|
||||
novelName,
|
||||
datePosted,
|
||||
lastUpdated),
|
||||
LastUpdated = lastUpdated,
|
||||
Tags = GetTags(novelInfoPage,
|
||||
baseUrl,
|
||||
novelUrl),
|
||||
DatePosted = GetPostedDate(novelInfoPage,
|
||||
baseUrl,
|
||||
novelUrl),
|
||||
DatePosted = datePosted,
|
||||
Url = novelUrl
|
||||
};
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user