From 050ea7aa804e99a0ca888a83b496a77b2f105d72 Mon Sep 17 00:00:00 2001 From: littlefoot Date: Thu, 21 Jul 2022 11:39:15 -0400 Subject: [PATCH] Added start of completion status scraping and fixed kakuyomu date posted lookup --- WebNovelPortalAPI/Scrapers/AbstractScraper.cs | 5 ++++- WebNovelPortalAPI/Scrapers/KakuyomuScraper.cs | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/WebNovelPortalAPI/Scrapers/AbstractScraper.cs b/WebNovelPortalAPI/Scrapers/AbstractScraper.cs index 579592b..5bdd30b 100644 --- a/WebNovelPortalAPI/Scrapers/AbstractScraper.cs +++ b/WebNovelPortalAPI/Scrapers/AbstractScraper.cs @@ -3,6 +3,7 @@ using System.Net.Http.Headers; using System.Text.RegularExpressions; using HtmlAgilityPack; using Common.Models.DBDomain; +using Common.Models.Enums; namespace WebNovelPortalAPI.Scrapers; @@ -38,6 +39,7 @@ public abstract class AbstractScraper : IScraper protected virtual string? TagPattern { get; } protected virtual string? DatePostedPattern { get; } protected virtual string? DateUpdatedPattern { get; } + protected virtual NovelStatus DefaultStatus => NovelStatus.Unknown; protected async Task GetPage(string url) { @@ -156,7 +158,8 @@ public abstract class AbstractScraper : IScraper LastUpdated = GetLastUpdatedDate(doc, baseUrl, novelUrl), Tags = GetTags(doc, baseUrl, novelUrl), Title = GetNovelTitle(doc, baseUrl, novelUrl), - Url = novelUrl + Url = novelUrl, + Status = DefaultStatus }; } diff --git a/WebNovelPortalAPI/Scrapers/KakuyomuScraper.cs b/WebNovelPortalAPI/Scrapers/KakuyomuScraper.cs index 91f9e49..24cf53a 100644 --- a/WebNovelPortalAPI/Scrapers/KakuyomuScraper.cs +++ b/WebNovelPortalAPI/Scrapers/KakuyomuScraper.cs @@ -25,7 +25,7 @@ public class KakuyomuScraper : AbstractScraper protected override string? TagPattern => @"//span[@itemprop='keywords']/a"; - protected override string? DatePostedPattern => @"//time[@itemprop='datePublished']"; + protected override string? DatePostedPattern => @"//section[@id='work-information']//time[@itemprop='datePublished']"; protected override string? DateUpdatedPattern => @"//time[@itemprop='dateModified']";