Added start of completion status scraping and fixed kakuyomu date posted lookup
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
2022-07-21 11:39:15 -04:00
parent ceb8a0db8e
commit 050ea7aa80
2 changed files with 5 additions and 2 deletions

View File

@@ -3,6 +3,7 @@ using System.Net.Http.Headers;
using System.Text.RegularExpressions; using System.Text.RegularExpressions;
using HtmlAgilityPack; using HtmlAgilityPack;
using Common.Models.DBDomain; using Common.Models.DBDomain;
using Common.Models.Enums;
namespace WebNovelPortalAPI.Scrapers; namespace WebNovelPortalAPI.Scrapers;
@@ -38,6 +39,7 @@ public abstract class AbstractScraper : IScraper
protected virtual string? TagPattern { get; } protected virtual string? TagPattern { get; }
protected virtual string? DatePostedPattern { get; } protected virtual string? DatePostedPattern { get; }
protected virtual string? DateUpdatedPattern { get; } protected virtual string? DateUpdatedPattern { get; }
protected virtual NovelStatus DefaultStatus => NovelStatus.Unknown;
protected async Task<HtmlDocument> GetPage(string url) protected async Task<HtmlDocument> GetPage(string url)
{ {
@@ -156,7 +158,8 @@ public abstract class AbstractScraper : IScraper
LastUpdated = GetLastUpdatedDate(doc, baseUrl, novelUrl), LastUpdated = GetLastUpdatedDate(doc, baseUrl, novelUrl),
Tags = GetTags(doc, baseUrl, novelUrl), Tags = GetTags(doc, baseUrl, novelUrl),
Title = GetNovelTitle(doc, baseUrl, novelUrl), Title = GetNovelTitle(doc, baseUrl, novelUrl),
Url = novelUrl Url = novelUrl,
Status = DefaultStatus
}; };
} }

View File

@@ -25,7 +25,7 @@ public class KakuyomuScraper : AbstractScraper
protected override string? TagPattern => @"//span[@itemprop='keywords']/a"; protected override string? TagPattern => @"//span[@itemprop='keywords']/a";
protected override string? DatePostedPattern => @"//time[@itemprop='datePublished']"; protected override string? DatePostedPattern => @"//section[@id='work-information']//time[@itemprop='datePublished']";
protected override string? DateUpdatedPattern => @"//time[@itemprop='dateModified']"; protected override string? DateUpdatedPattern => @"//time[@itemprop='dateModified']";