diff --git a/WebNovelPortalAPI/Scrapers/SyosetuScraper.cs b/WebNovelPortalAPI/Scrapers/SyosetuScraper.cs index 97ad616..593bd28 100644 --- a/WebNovelPortalAPI/Scrapers/SyosetuScraper.cs +++ b/WebNovelPortalAPI/Scrapers/SyosetuScraper.cs @@ -29,7 +29,7 @@ public class SyosetuScraper : AbstractScraper protected override string? DatePostedPattern => @"//th[text()='掲載日']/following-sibling::td"; - protected override string? DateUpdatedPattern => @"//th[contains(text(),'掲載日')]/following-sibling::td"; + protected override string? DateUpdatedPattern => @"//th[contains(text(),'掲載日')]/following-sibling::td | //th[contains(text(),'最終更新日')]/following-sibling::td"; private async Task GetInfoPage(string baseUrl, string novelUrl) { @@ -41,10 +41,25 @@ public class SyosetuScraper : AbstractScraper } - protected override List GetChapters(HtmlDocument document, string baseUrl, string novelUrl) + protected List GetChapters(HtmlDocument document, string baseUrl, string novelUrl, string novelName, DateTime novelPostedDate, DateTime novelUpdatedDate) { string dateUpdatedRegex = @"\d\d\d\d\/\d\d\/\d\d \d\d:\d\d"; var nodes = document.DocumentNode.SelectNodes(ChapterUrlPattern); + // single chapter syosetu novel + if (nodes == null) + { + return new List + { + new Chapter + { + ChapterNumber = 1, + Name = novelName, + Url = novelUrl, + DatePosted = novelPostedDate, + DateUpdated = novelUpdatedDate + } + }; + } return nodes.Select((node,i) => { var datePostedNode = node.ParentNode.SelectSingleNode(ChapterPostedPattern); @@ -93,7 +108,7 @@ public class SyosetuScraper : AbstractScraper protected override DateTime GetLastUpdatedDate(HtmlDocument document, string baseUrl, string novelUrl) { - return DateTime.Parse(document.DocumentNode.SelectNodes(DateUpdatedPattern)[1].InnerText).ToUniversalTime(); + return DateTime.Parse(document.DocumentNode.SelectNodes(DateUpdatedPattern).Last().InnerText).ToUniversalTime(); } protected override List GetTags(HtmlDocument document, string baseUrl, string novelUrl) @@ -148,24 +163,30 @@ public class SyosetuScraper : AbstractScraper throw new Exception("Error parsing document"); } + var novelName = GetNovelTitle(baseDoc, + baseUrl, + novelUrl); + var lastUpdated = GetLastUpdatedDate(novelInfoPage, baseUrl, novelUrl); + var datePosted = GetPostedDate(novelInfoPage, + baseUrl, + novelUrl); return new Novel { - Title = GetNovelTitle(baseDoc, - baseUrl, - novelUrl), + Title = novelName, Author = GetAuthor(baseDoc, baseUrl, novelUrl), Chapters = GetChapters(baseDoc, baseUrl, - novelUrl), - LastUpdated = GetLastUpdatedDate(novelInfoPage, baseUrl, novelUrl), + novelUrl, + novelName, + datePosted, + lastUpdated), + LastUpdated = lastUpdated, Tags = GetTags(novelInfoPage, baseUrl, novelUrl), - DatePosted = GetPostedDate(novelInfoPage, - baseUrl, - novelUrl), + DatePosted = datePosted, Url = novelUrl }; }