Cleaning up tags and url regexes, closes #6
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
This commit is contained in:
@@ -142,8 +142,8 @@ public abstract class AbstractScraper : IScraper
|
||||
public virtual async Task<Novel> ScrapeNovel(string url)
|
||||
{
|
||||
|
||||
var baseUrl = new Regex(BaseUrlPattern).Match(url).Value;
|
||||
var novelUrl = new Regex(UrlMatchPattern).Match(url).Value;
|
||||
var baseUrl = new Regex(BaseUrlPattern, RegexOptions.IgnoreCase).Match(url).Value;
|
||||
var novelUrl = new Regex(UrlMatchPattern, RegexOptions.IgnoreCase).Match(url).Value;
|
||||
var doc = await GetPage(novelUrl);
|
||||
if (string.IsNullOrEmpty(doc.Text))
|
||||
{
|
||||
|
||||
@@ -7,7 +7,7 @@ namespace WebNovelPortalAPI.Scrapers;
|
||||
|
||||
public class KakuyomuScraper : AbstractScraper
|
||||
{
|
||||
protected override string UrlMatchPattern => @"https?:\/\/kakuyomu\.jp\/works\/\d+\/?";
|
||||
protected override string UrlMatchPattern => @"https?:\/\/kakuyomu\.jp\/works\/\d+";
|
||||
|
||||
protected override string BaseUrlPattern => @"https?:\/\/kakuyomu\.jp";
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ namespace WebNovelPortalAPI.Scrapers;
|
||||
public class SyosetuScraper : AbstractScraper
|
||||
{
|
||||
|
||||
protected override string UrlMatchPattern => @"https?:\/\/(\w+)\.syosetu\.com\/\w+\/?";
|
||||
protected override string UrlMatchPattern => @"https?:\/\/(\w+)\.syosetu\.com\/n\w+";
|
||||
|
||||
protected override string BaseUrlPattern => @"https?:\/\/(\w+)\.syosetu\.com";
|
||||
|
||||
@@ -35,7 +35,7 @@ public class SyosetuScraper : AbstractScraper
|
||||
{
|
||||
string novelInfoBase = $"/novelview/infotop/ncode/";
|
||||
string novelRegex = @"https?:\/\/\w+\.syosetu\.com\/(\w+)\/?";
|
||||
string novelCode = new Regex(novelRegex).Match(novelUrl).Groups[1].Value;
|
||||
string novelCode = new Regex(novelRegex, RegexOptions.IgnoreCase).Match(novelUrl).Groups[1].Value;
|
||||
string novelInfoPage = $"{baseUrl}{novelInfoBase}{novelCode}";
|
||||
return await GetPage(novelInfoPage);
|
||||
|
||||
@@ -134,8 +134,8 @@ public class SyosetuScraper : AbstractScraper
|
||||
|
||||
public override async Task<Novel> ScrapeNovel(string url)
|
||||
{
|
||||
var baseUrl = new Regex(BaseUrlPattern).Match(url).Value;
|
||||
var novelUrl = new Regex(UrlMatchPattern).Match(url).Value;
|
||||
var baseUrl = new Regex(BaseUrlPattern, RegexOptions.IgnoreCase).Match(url).Value;
|
||||
var novelUrl = new Regex(UrlMatchPattern, RegexOptions.IgnoreCase).Match(url).Value;
|
||||
HtmlDocument baseDoc;
|
||||
HtmlDocument novelInfoPage;
|
||||
try
|
||||
|
||||
Reference in New Issue
Block a user