From 0903278f14f7a5ddf3541c554ef6b8b5ed32c660 Mon Sep 17 00:00:00 2001 From: littlefoot Date: Wed, 3 Aug 2022 12:44:04 -0400 Subject: [PATCH] Cleaning up tags and url regexes, closes #6 --- Common/Models/DBDomain/Tag.cs | 2 +- WebNovelPortalAPI/Scrapers/AbstractScraper.cs | 4 ++-- WebNovelPortalAPI/Scrapers/KakuyomuScraper.cs | 2 +- WebNovelPortalAPI/Scrapers/SyosetuScraper.cs | 8 ++++---- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Common/Models/DBDomain/Tag.cs b/Common/Models/DBDomain/Tag.cs index a1e42d1..1624c75 100644 --- a/Common/Models/DBDomain/Tag.cs +++ b/Common/Models/DBDomain/Tag.cs @@ -35,7 +35,7 @@ namespace Common.Models.DBDomain public static Tag GetOriginalWorkTag() { - return new Tag {TagValue = "original_work"}; + return new Tag {TagValue = "meta:original_work"}; } public static Tag GetNsfwTag() diff --git a/WebNovelPortalAPI/Scrapers/AbstractScraper.cs b/WebNovelPortalAPI/Scrapers/AbstractScraper.cs index 5bdd30b..bec32ea 100644 --- a/WebNovelPortalAPI/Scrapers/AbstractScraper.cs +++ b/WebNovelPortalAPI/Scrapers/AbstractScraper.cs @@ -142,8 +142,8 @@ public abstract class AbstractScraper : IScraper public virtual async Task ScrapeNovel(string url) { - var baseUrl = new Regex(BaseUrlPattern).Match(url).Value; - var novelUrl = new Regex(UrlMatchPattern).Match(url).Value; + var baseUrl = new Regex(BaseUrlPattern, RegexOptions.IgnoreCase).Match(url).Value; + var novelUrl = new Regex(UrlMatchPattern, RegexOptions.IgnoreCase).Match(url).Value; var doc = await GetPage(novelUrl); if (string.IsNullOrEmpty(doc.Text)) { diff --git a/WebNovelPortalAPI/Scrapers/KakuyomuScraper.cs b/WebNovelPortalAPI/Scrapers/KakuyomuScraper.cs index 24cf53a..69c6223 100644 --- a/WebNovelPortalAPI/Scrapers/KakuyomuScraper.cs +++ b/WebNovelPortalAPI/Scrapers/KakuyomuScraper.cs @@ -7,7 +7,7 @@ namespace WebNovelPortalAPI.Scrapers; public class KakuyomuScraper : AbstractScraper { - protected override string UrlMatchPattern => @"https?:\/\/kakuyomu\.jp\/works\/\d+\/?"; + protected override string UrlMatchPattern => @"https?:\/\/kakuyomu\.jp\/works\/\d+"; protected override string BaseUrlPattern => @"https?:\/\/kakuyomu\.jp"; diff --git a/WebNovelPortalAPI/Scrapers/SyosetuScraper.cs b/WebNovelPortalAPI/Scrapers/SyosetuScraper.cs index 9e0b27e..97ad616 100644 --- a/WebNovelPortalAPI/Scrapers/SyosetuScraper.cs +++ b/WebNovelPortalAPI/Scrapers/SyosetuScraper.cs @@ -9,7 +9,7 @@ namespace WebNovelPortalAPI.Scrapers; public class SyosetuScraper : AbstractScraper { - protected override string UrlMatchPattern => @"https?:\/\/(\w+)\.syosetu\.com\/\w+\/?"; + protected override string UrlMatchPattern => @"https?:\/\/(\w+)\.syosetu\.com\/n\w+"; protected override string BaseUrlPattern => @"https?:\/\/(\w+)\.syosetu\.com"; @@ -35,7 +35,7 @@ public class SyosetuScraper : AbstractScraper { string novelInfoBase = $"/novelview/infotop/ncode/"; string novelRegex = @"https?:\/\/\w+\.syosetu\.com\/(\w+)\/?"; - string novelCode = new Regex(novelRegex).Match(novelUrl).Groups[1].Value; + string novelCode = new Regex(novelRegex, RegexOptions.IgnoreCase).Match(novelUrl).Groups[1].Value; string novelInfoPage = $"{baseUrl}{novelInfoBase}{novelCode}"; return await GetPage(novelInfoPage); @@ -134,8 +134,8 @@ public class SyosetuScraper : AbstractScraper public override async Task ScrapeNovel(string url) { - var baseUrl = new Regex(BaseUrlPattern).Match(url).Value; - var novelUrl = new Regex(UrlMatchPattern).Match(url).Value; + var baseUrl = new Regex(BaseUrlPattern, RegexOptions.IgnoreCase).Match(url).Value; + var novelUrl = new Regex(UrlMatchPattern, RegexOptions.IgnoreCase).Match(url).Value; HtmlDocument baseDoc; HtmlDocument novelInfoPage; try