Files
WebNovelPortal/WebNovelPortalAPI/Scrapers/KakuyomuScraper.cs
littlefoot 050ea7aa80
All checks were successful
continuous-integration/drone/push Build is passing
Added start of completion status scraping and fixed kakuyomu date posted lookup
2022-07-21 11:39:15 -04:00

47 lines
1.8 KiB
C#

using System.Reflection.Metadata;
using System.Text.RegularExpressions;
using Common.Models.DBDomain;
using HtmlAgilityPack;
namespace WebNovelPortalAPI.Scrapers;
public class KakuyomuScraper : AbstractScraper
{
protected override string UrlMatchPattern => @"https?:\/\/kakuyomu\.jp\/works\/\d+\/?";
protected override string BaseUrlPattern => @"https?:\/\/kakuyomu\.jp";
protected override string? WorkTitlePattern => @"//*[@id='workTitle']/a";
protected override string? AuthorNamePattern => @"//*[@id='workAuthor-activityName']/a";
protected override string? AuthorLinkPattern => @"//*[@id='workAuthor-activityName']/a";
protected override string? ChapterUrlPattern => @"//a[@class='widget-toc-episode-episodeTitle']";
protected override string? ChapterNamePattern => @"span";
protected override string? ChapterPostedPattern => @"time";
protected override string? ChapterUpdatedPattern => base.ChapterUpdatedPattern;
protected override string? TagPattern => @"//span[@itemprop='keywords']/a";
protected override string? DatePostedPattern => @"//section[@id='work-information']//time[@itemprop='datePublished']";
protected override string? DateUpdatedPattern => @"//time[@itemprop='dateModified']";
protected override (DateTime? Posted, DateTime? Updated) GetDateTimeForChapter(HtmlNode linkNode, HtmlNode baseNode, string baseUrl,
string novelUrl)
{
var datePosted = linkNode.SelectSingleNode(ChapterPostedPattern).Attributes["datetime"].Value;
return (DateTime.Parse(datePosted).ToUniversalTime(), null);
}
protected override IEnumerable<Tag> GetMetadataTags(HtmlDocument document, string baseUrl, string novelUrl)
{
return new List<Tag>
{
Tag.GetSiteTag(baseUrl),
Tag.GetOriginalWorkTag()
};
}
}