37 lines
1.5 KiB
C#
37 lines
1.5 KiB
C#
using System.Reflection.Metadata;
|
|
using System.Text.RegularExpressions;
|
|
using HtmlAgilityPack;
|
|
|
|
namespace WebNovelPortalAPI.Scrapers;
|
|
|
|
public class KakuyomuScraper : AbstractScraper
|
|
{
|
|
protected override string UrlMatchPattern => @"https?:\/\/kakuyomu\.jp\/works\/\d+\/?";
|
|
|
|
protected override string BaseUrlPattern => @"https?:\/\/kakuyomu\.jp";
|
|
|
|
protected override string? WorkTitlePattern => @"//*[@id='workTitle']/a";
|
|
protected override string? AuthorNamePattern => @"//*[@id='workAuthor-activityName']/a";
|
|
protected override string? AuthorLinkPattern => @"//*[@id='workAuthor-activityName']/a";
|
|
|
|
protected override string? ChapterUrlPattern => @"//a[@class='widget-toc-episode-episodeTitle']";
|
|
|
|
protected override string? ChapterNamePattern => @"span";
|
|
|
|
protected override string? ChapterPostedPattern => @"time";
|
|
|
|
protected override string? ChapterUpdatedPattern => base.ChapterUpdatedPattern;
|
|
|
|
protected override string? TagPattern => @"//span[@itemprop='keywords']/a";
|
|
|
|
protected override string? DatePostedPattern => @"//time[@itemprop='datePublished']";
|
|
|
|
protected override string? DateUpdatedPattern => @"//time[@itemprop='dateModified']";
|
|
|
|
protected override (DateTime? Posted, DateTime? Updated) GetDateTimeForChapter(HtmlNode linkNode, HtmlNode baseNode, string baseUrl,
|
|
string novelUrl)
|
|
{
|
|
var datePosted = linkNode.SelectSingleNode(ChapterPostedPattern).Attributes["datetime"].Value;
|
|
return (DateTime.Parse(datePosted).ToUniversalTime(), null);
|
|
}
|
|
} |