Cleaning up tags and url regexes, closes #6
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
2022-08-03 12:44:04 -04:00
parent d4c4f521ec
commit 0903278f14
4 changed files with 8 additions and 8 deletions

View File

@@ -35,7 +35,7 @@ namespace Common.Models.DBDomain
public static Tag GetOriginalWorkTag() public static Tag GetOriginalWorkTag()
{ {
return new Tag {TagValue = "original_work"}; return new Tag {TagValue = "meta:original_work"};
} }
public static Tag GetNsfwTag() public static Tag GetNsfwTag()

View File

@@ -142,8 +142,8 @@ public abstract class AbstractScraper : IScraper
public virtual async Task<Novel> ScrapeNovel(string url) public virtual async Task<Novel> ScrapeNovel(string url)
{ {
var baseUrl = new Regex(BaseUrlPattern).Match(url).Value; var baseUrl = new Regex(BaseUrlPattern, RegexOptions.IgnoreCase).Match(url).Value;
var novelUrl = new Regex(UrlMatchPattern).Match(url).Value; var novelUrl = new Regex(UrlMatchPattern, RegexOptions.IgnoreCase).Match(url).Value;
var doc = await GetPage(novelUrl); var doc = await GetPage(novelUrl);
if (string.IsNullOrEmpty(doc.Text)) if (string.IsNullOrEmpty(doc.Text))
{ {

View File

@@ -7,7 +7,7 @@ namespace WebNovelPortalAPI.Scrapers;
public class KakuyomuScraper : AbstractScraper public class KakuyomuScraper : AbstractScraper
{ {
protected override string UrlMatchPattern => @"https?:\/\/kakuyomu\.jp\/works\/\d+\/?"; protected override string UrlMatchPattern => @"https?:\/\/kakuyomu\.jp\/works\/\d+";
protected override string BaseUrlPattern => @"https?:\/\/kakuyomu\.jp"; protected override string BaseUrlPattern => @"https?:\/\/kakuyomu\.jp";

View File

@@ -9,7 +9,7 @@ namespace WebNovelPortalAPI.Scrapers;
public class SyosetuScraper : AbstractScraper public class SyosetuScraper : AbstractScraper
{ {
protected override string UrlMatchPattern => @"https?:\/\/(\w+)\.syosetu\.com\/\w+\/?"; protected override string UrlMatchPattern => @"https?:\/\/(\w+)\.syosetu\.com\/n\w+";
protected override string BaseUrlPattern => @"https?:\/\/(\w+)\.syosetu\.com"; protected override string BaseUrlPattern => @"https?:\/\/(\w+)\.syosetu\.com";
@@ -35,7 +35,7 @@ public class SyosetuScraper : AbstractScraper
{ {
string novelInfoBase = $"/novelview/infotop/ncode/"; string novelInfoBase = $"/novelview/infotop/ncode/";
string novelRegex = @"https?:\/\/\w+\.syosetu\.com\/(\w+)\/?"; string novelRegex = @"https?:\/\/\w+\.syosetu\.com\/(\w+)\/?";
string novelCode = new Regex(novelRegex).Match(novelUrl).Groups[1].Value; string novelCode = new Regex(novelRegex, RegexOptions.IgnoreCase).Match(novelUrl).Groups[1].Value;
string novelInfoPage = $"{baseUrl}{novelInfoBase}{novelCode}"; string novelInfoPage = $"{baseUrl}{novelInfoBase}{novelCode}";
return await GetPage(novelInfoPage); return await GetPage(novelInfoPage);
@@ -134,8 +134,8 @@ public class SyosetuScraper : AbstractScraper
public override async Task<Novel> ScrapeNovel(string url) public override async Task<Novel> ScrapeNovel(string url)
{ {
var baseUrl = new Regex(BaseUrlPattern).Match(url).Value; var baseUrl = new Regex(BaseUrlPattern, RegexOptions.IgnoreCase).Match(url).Value;
var novelUrl = new Regex(UrlMatchPattern).Match(url).Value; var novelUrl = new Regex(UrlMatchPattern, RegexOptions.IgnoreCase).Match(url).Value;
HtmlDocument baseDoc; HtmlDocument baseDoc;
HtmlDocument novelInfoPage; HtmlDocument novelInfoPage;
try try