Syosetu single chapter novel support, closes #1
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
This commit is contained in:
@@ -29,7 +29,7 @@ public class SyosetuScraper : AbstractScraper
|
|||||||
|
|
||||||
protected override string? DatePostedPattern => @"//th[text()='掲載日']/following-sibling::td";
|
protected override string? DatePostedPattern => @"//th[text()='掲載日']/following-sibling::td";
|
||||||
|
|
||||||
protected override string? DateUpdatedPattern => @"//th[contains(text(),'掲載日')]/following-sibling::td";
|
protected override string? DateUpdatedPattern => @"//th[contains(text(),'掲載日')]/following-sibling::td | //th[contains(text(),'最終更新日')]/following-sibling::td";
|
||||||
|
|
||||||
private async Task<HtmlDocument> GetInfoPage(string baseUrl, string novelUrl)
|
private async Task<HtmlDocument> GetInfoPage(string baseUrl, string novelUrl)
|
||||||
{
|
{
|
||||||
@@ -41,10 +41,25 @@ public class SyosetuScraper : AbstractScraper
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected override List<Chapter> GetChapters(HtmlDocument document, string baseUrl, string novelUrl)
|
protected List<Chapter> GetChapters(HtmlDocument document, string baseUrl, string novelUrl, string novelName, DateTime novelPostedDate, DateTime novelUpdatedDate)
|
||||||
{
|
{
|
||||||
string dateUpdatedRegex = @"\d\d\d\d\/\d\d\/\d\d \d\d:\d\d";
|
string dateUpdatedRegex = @"\d\d\d\d\/\d\d\/\d\d \d\d:\d\d";
|
||||||
var nodes = document.DocumentNode.SelectNodes(ChapterUrlPattern);
|
var nodes = document.DocumentNode.SelectNodes(ChapterUrlPattern);
|
||||||
|
// single chapter syosetu novel
|
||||||
|
if (nodes == null)
|
||||||
|
{
|
||||||
|
return new List<Chapter>
|
||||||
|
{
|
||||||
|
new Chapter
|
||||||
|
{
|
||||||
|
ChapterNumber = 1,
|
||||||
|
Name = novelName,
|
||||||
|
Url = novelUrl,
|
||||||
|
DatePosted = novelPostedDate,
|
||||||
|
DateUpdated = novelUpdatedDate
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
return nodes.Select((node,i) =>
|
return nodes.Select((node,i) =>
|
||||||
{
|
{
|
||||||
var datePostedNode = node.ParentNode.SelectSingleNode(ChapterPostedPattern);
|
var datePostedNode = node.ParentNode.SelectSingleNode(ChapterPostedPattern);
|
||||||
@@ -93,7 +108,7 @@ public class SyosetuScraper : AbstractScraper
|
|||||||
|
|
||||||
protected override DateTime GetLastUpdatedDate(HtmlDocument document, string baseUrl, string novelUrl)
|
protected override DateTime GetLastUpdatedDate(HtmlDocument document, string baseUrl, string novelUrl)
|
||||||
{
|
{
|
||||||
return DateTime.Parse(document.DocumentNode.SelectNodes(DateUpdatedPattern)[1].InnerText).ToUniversalTime();
|
return DateTime.Parse(document.DocumentNode.SelectNodes(DateUpdatedPattern).Last().InnerText).ToUniversalTime();
|
||||||
}
|
}
|
||||||
|
|
||||||
protected override List<Tag> GetTags(HtmlDocument document, string baseUrl, string novelUrl)
|
protected override List<Tag> GetTags(HtmlDocument document, string baseUrl, string novelUrl)
|
||||||
@@ -148,24 +163,30 @@ public class SyosetuScraper : AbstractScraper
|
|||||||
throw new Exception("Error parsing document");
|
throw new Exception("Error parsing document");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var novelName = GetNovelTitle(baseDoc,
|
||||||
|
baseUrl,
|
||||||
|
novelUrl);
|
||||||
|
var lastUpdated = GetLastUpdatedDate(novelInfoPage, baseUrl, novelUrl);
|
||||||
|
var datePosted = GetPostedDate(novelInfoPage,
|
||||||
|
baseUrl,
|
||||||
|
novelUrl);
|
||||||
return new Novel
|
return new Novel
|
||||||
{
|
{
|
||||||
Title = GetNovelTitle(baseDoc,
|
Title = novelName,
|
||||||
baseUrl,
|
|
||||||
novelUrl),
|
|
||||||
Author = GetAuthor(baseDoc,
|
Author = GetAuthor(baseDoc,
|
||||||
baseUrl,
|
baseUrl,
|
||||||
novelUrl),
|
novelUrl),
|
||||||
Chapters = GetChapters(baseDoc,
|
Chapters = GetChapters(baseDoc,
|
||||||
baseUrl,
|
baseUrl,
|
||||||
novelUrl),
|
novelUrl,
|
||||||
LastUpdated = GetLastUpdatedDate(novelInfoPage, baseUrl, novelUrl),
|
novelName,
|
||||||
|
datePosted,
|
||||||
|
lastUpdated),
|
||||||
|
LastUpdated = lastUpdated,
|
||||||
Tags = GetTags(novelInfoPage,
|
Tags = GetTags(novelInfoPage,
|
||||||
baseUrl,
|
baseUrl,
|
||||||
novelUrl),
|
novelUrl),
|
||||||
DatePosted = GetPostedDate(novelInfoPage,
|
DatePosted = datePosted,
|
||||||
baseUrl,
|
|
||||||
novelUrl),
|
|
||||||
Url = novelUrl
|
Url = novelUrl
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user