Initial efcore migration and updates to make sure upserting novels (mostly) works. still need to do chapter handling
This commit is contained in:
@@ -3,8 +3,13 @@ using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Threading.Tasks;
|
||||
using DBConnection;
|
||||
using DBConnection.Models;
|
||||
using DBConnection.Repositories;
|
||||
using DBConnection.Repositories.Interfaces;
|
||||
using Microsoft.AspNetCore.Http;
|
||||
using Microsoft.AspNetCore.Mvc;
|
||||
using WebNovelPortalAPI.DTO;
|
||||
using WebNovelPortalAPI.Scrapers;
|
||||
|
||||
namespace WebNovelPortalAPI.Controllers
|
||||
{
|
||||
@@ -12,11 +17,42 @@ namespace WebNovelPortalAPI.Controllers
|
||||
[ApiController]
|
||||
public class NovelController : ControllerBase
|
||||
{
|
||||
private readonly AppDbContext _context;
|
||||
private readonly INovelRepository _novelRepository;
|
||||
private readonly IEnumerable<IScraper> _scrapers;
|
||||
|
||||
public NovelController(AppDbContext context)
|
||||
public NovelController(IEnumerable<IScraper> scrapers, INovelRepository novelRepository)
|
||||
{
|
||||
_context = context;
|
||||
_scrapers = scrapers;
|
||||
_novelRepository = novelRepository;
|
||||
}
|
||||
|
||||
private IScraper? MatchScraper(string novelUrl)
|
||||
{
|
||||
return _scrapers.FirstOrDefault(i => i.MatchesUrl(novelUrl));
|
||||
}
|
||||
|
||||
[HttpPost]
|
||||
[Route("scrapeNovel")]
|
||||
public async Task<IActionResult> ScrapeNovel(ScrapeNovelRequest request)
|
||||
{
|
||||
var scraper = MatchScraper(request.NovelUrl);
|
||||
if (scraper == null)
|
||||
{
|
||||
return BadRequest("Invalid url, no valid scraper configured");
|
||||
}
|
||||
|
||||
Novel novel;
|
||||
try
|
||||
{
|
||||
novel = scraper.ScrapeNovel(request.NovelUrl);
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
return StatusCode(500, e);
|
||||
}
|
||||
|
||||
var novelUpload = await _novelRepository.Upsert(novel);
|
||||
return Ok(novelUpload);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,32 +0,0 @@
|
||||
using Microsoft.AspNetCore.Mvc;
|
||||
|
||||
namespace WebNovelPortalAPI.Controllers;
|
||||
|
||||
[ApiController]
|
||||
[Route("[controller]")]
|
||||
public class WeatherForecastController : ControllerBase
|
||||
{
|
||||
private static readonly string[] Summaries = new[]
|
||||
{
|
||||
"Freezing", "Bracing", "Chilly", "Cool", "Mild", "Warm", "Balmy", "Hot", "Sweltering", "Scorching"
|
||||
};
|
||||
|
||||
private readonly ILogger<WeatherForecastController> _logger;
|
||||
|
||||
public WeatherForecastController(ILogger<WeatherForecastController> logger)
|
||||
{
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
[HttpGet(Name = "GetWeatherForecast")]
|
||||
public IEnumerable<WeatherForecast> Get()
|
||||
{
|
||||
return Enumerable.Range(1, 5).Select(index => new WeatherForecast
|
||||
{
|
||||
Date = DateTime.Now.AddDays(index),
|
||||
TemperatureC = Random.Shared.Next(-20, 55),
|
||||
Summary = Summaries[Random.Shared.Next(Summaries.Length)]
|
||||
})
|
||||
.ToArray();
|
||||
}
|
||||
}
|
||||
6
WebNovelPortalAPI/DTO/ScrapeNovelRequest.cs
Normal file
6
WebNovelPortalAPI/DTO/ScrapeNovelRequest.cs
Normal file
@@ -0,0 +1,6 @@
|
||||
namespace WebNovelPortalAPI.DTO;
|
||||
|
||||
public class ScrapeNovelRequest
|
||||
{
|
||||
public string NovelUrl { get; set; }
|
||||
}
|
||||
18
WebNovelPortalAPI/Extensions/ScraperExtensions.cs
Normal file
18
WebNovelPortalAPI/Extensions/ScraperExtensions.cs
Normal file
@@ -0,0 +1,18 @@
|
||||
using System.Reflection;
|
||||
using WebNovelPortalAPI.Scrapers;
|
||||
|
||||
namespace WebNovelPortalAPI.Extensions;
|
||||
|
||||
public static class ScraperExtensions
|
||||
{
|
||||
public static void AddScrapers(this IServiceCollection services)
|
||||
{
|
||||
Type[] types = Assembly.GetExecutingAssembly().GetTypes().Where(t =>
|
||||
t.IsClass && typeof(IScraper).IsAssignableFrom(t) && (t.Namespace?.Contains(nameof(Scrapers)) ?? false))
|
||||
.ToArray();
|
||||
foreach (var t in types)
|
||||
{
|
||||
services.AddScoped(typeof(IScraper), t);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,12 +1,19 @@
|
||||
using DBConnection;
|
||||
using DBConnection.Extensions;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Newtonsoft.Json;
|
||||
using WebNovelPortalAPI.Extensions;
|
||||
using WebNovelPortalAPI.Scrapers;
|
||||
|
||||
var builder = WebApplication.CreateBuilder(args);
|
||||
|
||||
// Add services to the container.
|
||||
builder.Services.AddDbServices(builder.Configuration);
|
||||
builder.Services.AddControllers();
|
||||
builder.Services.AddScrapers();
|
||||
builder.Services.AddControllers().AddNewtonsoftJson(opt =>
|
||||
{
|
||||
opt.SerializerSettings.ReferenceLoopHandling = ReferenceLoopHandling.Ignore;
|
||||
});
|
||||
// Learn more about configuring Swagger/OpenAPI at https://aka.ms/aspnetcore/swashbuckle
|
||||
builder.Services.AddEndpointsApiExplorer();
|
||||
builder.Services.AddSwaggerGen();
|
||||
|
||||
@@ -1,6 +0,0 @@
|
||||
namespace WebNovelPortalAPI.Scrapers;
|
||||
|
||||
public class AbstractScraper
|
||||
{
|
||||
|
||||
}
|
||||
@@ -4,6 +4,8 @@ namespace WebNovelPortalAPI.Scrapers;
|
||||
|
||||
public interface IScraper
|
||||
{
|
||||
public bool MatchesUrl(string url);
|
||||
public Novel ScrapeNovel(string url);
|
||||
|
||||
public string? ScrapeChapterContent(string chapterUrl);
|
||||
|
||||
}
|
||||
102
WebNovelPortalAPI/Scrapers/KakuyomuScraper.cs
Normal file
102
WebNovelPortalAPI/Scrapers/KakuyomuScraper.cs
Normal file
@@ -0,0 +1,102 @@
|
||||
using System.Reflection.Metadata;
|
||||
using System.Text.RegularExpressions;
|
||||
using DBConnection.Models;
|
||||
using HtmlAgilityPack;
|
||||
|
||||
namespace WebNovelPortalAPI.Scrapers;
|
||||
|
||||
public class KakuyomuScraper : IScraper
|
||||
{
|
||||
private const string UrlPattern = @"https?:\/\/kakuyomu\.jp\/works\/\d+\/?";
|
||||
private const string BaseUrl = "https://kakuyomu.jp";
|
||||
public bool MatchesUrl(string url)
|
||||
{
|
||||
var regex = new Regex(UrlPattern, RegexOptions.IgnoreCase);
|
||||
return regex.IsMatch(url);
|
||||
}
|
||||
|
||||
private string GetNovelTitle(HtmlDocument document)
|
||||
{
|
||||
var xpath = @"//*[@id='workTitle']/a";
|
||||
return document.DocumentNode.SelectSingleNode(xpath).InnerText;
|
||||
}
|
||||
|
||||
private Author GetAuthor(HtmlDocument document)
|
||||
{
|
||||
var nameXPath = @"//*[@id='workAuthor-activityName']/a";
|
||||
var urlXPath = @"//*[@id='workAuthor-activityName']/a";
|
||||
var authorName = document.DocumentNode.SelectSingleNode(nameXPath).InnerText;
|
||||
var authorUrl = document.DocumentNode.SelectSingleNode(urlXPath).Attributes["href"].Value;
|
||||
Author author = new Author
|
||||
{
|
||||
Name = authorName,
|
||||
Url = $"{BaseUrl + authorUrl}"
|
||||
};
|
||||
return author;
|
||||
|
||||
}
|
||||
|
||||
private List<Chapter> GetChapters(HtmlDocument document)
|
||||
{
|
||||
var urlxpath = @"//a[@class='widget-toc-episode-episodeTitle']";
|
||||
var namexpath = @"span";
|
||||
var urlnodes = document.DocumentNode.SelectNodes(urlxpath);
|
||||
var chapters = urlnodes.Select((node, i) => new Chapter
|
||||
{
|
||||
ChapterNumber = i + 1,
|
||||
Url = $"{BaseUrl}{node.Attributes["href"].Value}",
|
||||
Name = node.SelectSingleNode(namexpath).InnerText
|
||||
});
|
||||
|
||||
return chapters.ToList();
|
||||
}
|
||||
|
||||
private List<Tag> GetTags(HtmlDocument document)
|
||||
{
|
||||
var xpath = @"//span[@itemprop='keywords']/a";
|
||||
var nodes = document.DocumentNode.SelectNodes(xpath);
|
||||
return nodes.Select(node => new Tag
|
||||
{
|
||||
TagValue = node.InnerText
|
||||
}).ToList();
|
||||
}
|
||||
|
||||
private DateTime GetPostedDate(HtmlDocument document)
|
||||
{
|
||||
var xpath = @"//time[@itemprop='datePublished']";
|
||||
return DateTime.Parse(document.DocumentNode.SelectSingleNode(xpath).InnerText);
|
||||
}
|
||||
|
||||
private DateTime GetLastUpdatedDate(HtmlDocument document)
|
||||
{
|
||||
var xpath = @"//time[@itemprop='dateModified']";
|
||||
return DateTime.Parse(document.DocumentNode.SelectSingleNode(xpath).InnerText);
|
||||
}
|
||||
|
||||
public Novel ScrapeNovel(string url)
|
||||
{
|
||||
Novel novel = new Novel();
|
||||
var web = new HtmlWeb();
|
||||
var doc = web.Load(url);
|
||||
if (doc == null)
|
||||
{
|
||||
throw new Exception("Error parsing document");
|
||||
}
|
||||
|
||||
return new Novel
|
||||
{
|
||||
Author = GetAuthor(doc),
|
||||
Chapters = GetChapters(doc),
|
||||
DatePosted = GetPostedDate(doc),
|
||||
LastUpdated = GetLastUpdatedDate(doc),
|
||||
Tags = GetTags(doc),
|
||||
Title = GetNovelTitle(doc),
|
||||
Url = url
|
||||
};
|
||||
}
|
||||
|
||||
public string? ScrapeChapterContent(string chapterUrl)
|
||||
{
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
namespace WebNovelPortalAPI.TranslationEngines;
|
||||
|
||||
public interface ITranslationEngine
|
||||
{
|
||||
public string Translate(string text);
|
||||
}
|
||||
@@ -1,12 +0,0 @@
|
||||
namespace WebNovelPortalAPI;
|
||||
|
||||
public class WeatherForecast
|
||||
{
|
||||
public DateTime Date { get; set; }
|
||||
|
||||
public int TemperatureC { get; set; }
|
||||
|
||||
public int TemperatureF => 32 + (int) (TemperatureC / 0.5556);
|
||||
|
||||
public string? Summary { get; set; }
|
||||
}
|
||||
@@ -7,6 +7,8 @@
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="HtmlAgilityPack" Version="1.11.43" />
|
||||
<PackageReference Include="Microsoft.AspNetCore.Mvc.NewtonsoftJson" Version="6.0.7" />
|
||||
<PackageReference Include="Microsoft.EntityFrameworkCore.Sqlite" Version="6.0.7" />
|
||||
<PackageReference Include="Microsoft.EntityFrameworkCore.SqlServer" Version="6.0.7" />
|
||||
<PackageReference Include="Microsoft.EntityFrameworkCore.Tools" Version="6.0.7">
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
}
|
||||
},
|
||||
"ConnectionStrings": {
|
||||
"DefaultConnection": "null"
|
||||
"DefaultConnection": "Data Source=/home/m/Documents/WebNovelPortal/WebNovelPortalAPI/test_db"
|
||||
},
|
||||
"AllowedHosts": "*"
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user