Cleaning up tags and url regexes, closes #6
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
This commit is contained in:
@@ -35,7 +35,7 @@ namespace Common.Models.DBDomain
|
|||||||
|
|
||||||
public static Tag GetOriginalWorkTag()
|
public static Tag GetOriginalWorkTag()
|
||||||
{
|
{
|
||||||
return new Tag {TagValue = "original_work"};
|
return new Tag {TagValue = "meta:original_work"};
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Tag GetNsfwTag()
|
public static Tag GetNsfwTag()
|
||||||
|
|||||||
@@ -142,8 +142,8 @@ public abstract class AbstractScraper : IScraper
|
|||||||
public virtual async Task<Novel> ScrapeNovel(string url)
|
public virtual async Task<Novel> ScrapeNovel(string url)
|
||||||
{
|
{
|
||||||
|
|
||||||
var baseUrl = new Regex(BaseUrlPattern).Match(url).Value;
|
var baseUrl = new Regex(BaseUrlPattern, RegexOptions.IgnoreCase).Match(url).Value;
|
||||||
var novelUrl = new Regex(UrlMatchPattern).Match(url).Value;
|
var novelUrl = new Regex(UrlMatchPattern, RegexOptions.IgnoreCase).Match(url).Value;
|
||||||
var doc = await GetPage(novelUrl);
|
var doc = await GetPage(novelUrl);
|
||||||
if (string.IsNullOrEmpty(doc.Text))
|
if (string.IsNullOrEmpty(doc.Text))
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ namespace WebNovelPortalAPI.Scrapers;
|
|||||||
|
|
||||||
public class KakuyomuScraper : AbstractScraper
|
public class KakuyomuScraper : AbstractScraper
|
||||||
{
|
{
|
||||||
protected override string UrlMatchPattern => @"https?:\/\/kakuyomu\.jp\/works\/\d+\/?";
|
protected override string UrlMatchPattern => @"https?:\/\/kakuyomu\.jp\/works\/\d+";
|
||||||
|
|
||||||
protected override string BaseUrlPattern => @"https?:\/\/kakuyomu\.jp";
|
protected override string BaseUrlPattern => @"https?:\/\/kakuyomu\.jp";
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ namespace WebNovelPortalAPI.Scrapers;
|
|||||||
public class SyosetuScraper : AbstractScraper
|
public class SyosetuScraper : AbstractScraper
|
||||||
{
|
{
|
||||||
|
|
||||||
protected override string UrlMatchPattern => @"https?:\/\/(\w+)\.syosetu\.com\/\w+\/?";
|
protected override string UrlMatchPattern => @"https?:\/\/(\w+)\.syosetu\.com\/n\w+";
|
||||||
|
|
||||||
protected override string BaseUrlPattern => @"https?:\/\/(\w+)\.syosetu\.com";
|
protected override string BaseUrlPattern => @"https?:\/\/(\w+)\.syosetu\.com";
|
||||||
|
|
||||||
@@ -35,7 +35,7 @@ public class SyosetuScraper : AbstractScraper
|
|||||||
{
|
{
|
||||||
string novelInfoBase = $"/novelview/infotop/ncode/";
|
string novelInfoBase = $"/novelview/infotop/ncode/";
|
||||||
string novelRegex = @"https?:\/\/\w+\.syosetu\.com\/(\w+)\/?";
|
string novelRegex = @"https?:\/\/\w+\.syosetu\.com\/(\w+)\/?";
|
||||||
string novelCode = new Regex(novelRegex).Match(novelUrl).Groups[1].Value;
|
string novelCode = new Regex(novelRegex, RegexOptions.IgnoreCase).Match(novelUrl).Groups[1].Value;
|
||||||
string novelInfoPage = $"{baseUrl}{novelInfoBase}{novelCode}";
|
string novelInfoPage = $"{baseUrl}{novelInfoBase}{novelCode}";
|
||||||
return await GetPage(novelInfoPage);
|
return await GetPage(novelInfoPage);
|
||||||
|
|
||||||
@@ -134,8 +134,8 @@ public class SyosetuScraper : AbstractScraper
|
|||||||
|
|
||||||
public override async Task<Novel> ScrapeNovel(string url)
|
public override async Task<Novel> ScrapeNovel(string url)
|
||||||
{
|
{
|
||||||
var baseUrl = new Regex(BaseUrlPattern).Match(url).Value;
|
var baseUrl = new Regex(BaseUrlPattern, RegexOptions.IgnoreCase).Match(url).Value;
|
||||||
var novelUrl = new Regex(UrlMatchPattern).Match(url).Value;
|
var novelUrl = new Regex(UrlMatchPattern, RegexOptions.IgnoreCase).Match(url).Value;
|
||||||
HtmlDocument baseDoc;
|
HtmlDocument baseDoc;
|
||||||
HtmlDocument novelInfoPage;
|
HtmlDocument novelInfoPage;
|
||||||
try
|
try
|
||||||
|
|||||||
Reference in New Issue
Block a user