Files
WebNovelPortal/WebNovelPortalAPI/Scrapers/KakuyomuScraper.cs
littlefoot 12a1f48fbd
All checks were successful
continuous-integration/drone/push Build is passing
Fix up times and remove extraneous api inject from NovelList.razor
2022-07-17 22:26:22 -04:00

37 lines
1.5 KiB
C#

using System.Reflection.Metadata;
using System.Text.RegularExpressions;
using HtmlAgilityPack;
namespace WebNovelPortalAPI.Scrapers;
public class KakuyomuScraper : AbstractScraper
{
protected override string UrlMatchPattern => @"https?:\/\/kakuyomu\.jp\/works\/\d+\/?";
protected override string BaseUrlPattern => @"https?:\/\/kakuyomu\.jp";
protected override string? WorkTitlePattern => @"//*[@id='workTitle']/a";
protected override string? AuthorNamePattern => @"//*[@id='workAuthor-activityName']/a";
protected override string? AuthorLinkPattern => @"//*[@id='workAuthor-activityName']/a";
protected override string? ChapterUrlPattern => @"//a[@class='widget-toc-episode-episodeTitle']";
protected override string? ChapterNamePattern => @"span";
protected override string? ChapterPostedPattern => @"time";
protected override string? ChapterUpdatedPattern => base.ChapterUpdatedPattern;
protected override string? TagPattern => @"//span[@itemprop='keywords']/a";
protected override string? DatePostedPattern => @"//time[@itemprop='datePublished']";
protected override string? DateUpdatedPattern => @"//time[@itemprop='dateModified']";
protected override (DateTime? Posted, DateTime? Updated) GetDateTimeForChapter(HtmlNode linkNode, HtmlNode baseNode, string baseUrl,
string novelUrl)
{
var datePosted = linkNode.SelectSingleNode(ChapterPostedPattern).Attributes["datetime"].Value;
return (DateTime.Parse(datePosted).ToUniversalTime(), null);
}
}