Initial efcore migration and updates to make sure upserting novels (mostly) works. still need to do chapter handling

This commit is contained in:
2022-07-14 23:12:12 -04:00
parent 5402923e9f
commit 5337e7ccb8
25 changed files with 962 additions and 64 deletions

View File

@@ -3,8 +3,13 @@ using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using DBConnection;
using DBConnection.Models;
using DBConnection.Repositories;
using DBConnection.Repositories.Interfaces;
using Microsoft.AspNetCore.Http;
using Microsoft.AspNetCore.Mvc;
using WebNovelPortalAPI.DTO;
using WebNovelPortalAPI.Scrapers;
namespace WebNovelPortalAPI.Controllers
{
@@ -12,11 +17,42 @@ namespace WebNovelPortalAPI.Controllers
[ApiController]
public class NovelController : ControllerBase
{
private readonly AppDbContext _context;
private readonly INovelRepository _novelRepository;
private readonly IEnumerable<IScraper> _scrapers;
public NovelController(AppDbContext context)
public NovelController(IEnumerable<IScraper> scrapers, INovelRepository novelRepository)
{
_context = context;
_scrapers = scrapers;
_novelRepository = novelRepository;
}
private IScraper? MatchScraper(string novelUrl)
{
return _scrapers.FirstOrDefault(i => i.MatchesUrl(novelUrl));
}
[HttpPost]
[Route("scrapeNovel")]
public async Task<IActionResult> ScrapeNovel(ScrapeNovelRequest request)
{
var scraper = MatchScraper(request.NovelUrl);
if (scraper == null)
{
return BadRequest("Invalid url, no valid scraper configured");
}
Novel novel;
try
{
novel = scraper.ScrapeNovel(request.NovelUrl);
}
catch (Exception e)
{
return StatusCode(500, e);
}
var novelUpload = await _novelRepository.Upsert(novel);
return Ok(novelUpload);
}
}
}

View File

@@ -1,32 +0,0 @@
using Microsoft.AspNetCore.Mvc;
namespace WebNovelPortalAPI.Controllers;
[ApiController]
[Route("[controller]")]
public class WeatherForecastController : ControllerBase
{
private static readonly string[] Summaries = new[]
{
"Freezing", "Bracing", "Chilly", "Cool", "Mild", "Warm", "Balmy", "Hot", "Sweltering", "Scorching"
};
private readonly ILogger<WeatherForecastController> _logger;
public WeatherForecastController(ILogger<WeatherForecastController> logger)
{
_logger = logger;
}
[HttpGet(Name = "GetWeatherForecast")]
public IEnumerable<WeatherForecast> Get()
{
return Enumerable.Range(1, 5).Select(index => new WeatherForecast
{
Date = DateTime.Now.AddDays(index),
TemperatureC = Random.Shared.Next(-20, 55),
Summary = Summaries[Random.Shared.Next(Summaries.Length)]
})
.ToArray();
}
}

View File

@@ -0,0 +1,6 @@
namespace WebNovelPortalAPI.DTO;
public class ScrapeNovelRequest
{
public string NovelUrl { get; set; }
}

View File

@@ -0,0 +1,18 @@
using System.Reflection;
using WebNovelPortalAPI.Scrapers;
namespace WebNovelPortalAPI.Extensions;
public static class ScraperExtensions
{
public static void AddScrapers(this IServiceCollection services)
{
Type[] types = Assembly.GetExecutingAssembly().GetTypes().Where(t =>
t.IsClass && typeof(IScraper).IsAssignableFrom(t) && (t.Namespace?.Contains(nameof(Scrapers)) ?? false))
.ToArray();
foreach (var t in types)
{
services.AddScoped(typeof(IScraper), t);
}
}
}

View File

@@ -1,12 +1,19 @@
using DBConnection;
using DBConnection.Extensions;
using Microsoft.EntityFrameworkCore;
using Newtonsoft.Json;
using WebNovelPortalAPI.Extensions;
using WebNovelPortalAPI.Scrapers;
var builder = WebApplication.CreateBuilder(args);
// Add services to the container.
builder.Services.AddDbServices(builder.Configuration);
builder.Services.AddControllers();
builder.Services.AddScrapers();
builder.Services.AddControllers().AddNewtonsoftJson(opt =>
{
opt.SerializerSettings.ReferenceLoopHandling = ReferenceLoopHandling.Ignore;
});
// Learn more about configuring Swagger/OpenAPI at https://aka.ms/aspnetcore/swashbuckle
builder.Services.AddEndpointsApiExplorer();
builder.Services.AddSwaggerGen();

View File

@@ -1,6 +0,0 @@
namespace WebNovelPortalAPI.Scrapers;
public class AbstractScraper
{
}

View File

@@ -4,6 +4,8 @@ namespace WebNovelPortalAPI.Scrapers;
public interface IScraper
{
public bool MatchesUrl(string url);
public Novel ScrapeNovel(string url);
public string? ScrapeChapterContent(string chapterUrl);
}

View File

@@ -0,0 +1,102 @@
using System.Reflection.Metadata;
using System.Text.RegularExpressions;
using DBConnection.Models;
using HtmlAgilityPack;
namespace WebNovelPortalAPI.Scrapers;
public class KakuyomuScraper : IScraper
{
private const string UrlPattern = @"https?:\/\/kakuyomu\.jp\/works\/\d+\/?";
private const string BaseUrl = "https://kakuyomu.jp";
public bool MatchesUrl(string url)
{
var regex = new Regex(UrlPattern, RegexOptions.IgnoreCase);
return regex.IsMatch(url);
}
private string GetNovelTitle(HtmlDocument document)
{
var xpath = @"//*[@id='workTitle']/a";
return document.DocumentNode.SelectSingleNode(xpath).InnerText;
}
private Author GetAuthor(HtmlDocument document)
{
var nameXPath = @"//*[@id='workAuthor-activityName']/a";
var urlXPath = @"//*[@id='workAuthor-activityName']/a";
var authorName = document.DocumentNode.SelectSingleNode(nameXPath).InnerText;
var authorUrl = document.DocumentNode.SelectSingleNode(urlXPath).Attributes["href"].Value;
Author author = new Author
{
Name = authorName,
Url = $"{BaseUrl + authorUrl}"
};
return author;
}
private List<Chapter> GetChapters(HtmlDocument document)
{
var urlxpath = @"//a[@class='widget-toc-episode-episodeTitle']";
var namexpath = @"span";
var urlnodes = document.DocumentNode.SelectNodes(urlxpath);
var chapters = urlnodes.Select((node, i) => new Chapter
{
ChapterNumber = i + 1,
Url = $"{BaseUrl}{node.Attributes["href"].Value}",
Name = node.SelectSingleNode(namexpath).InnerText
});
return chapters.ToList();
}
private List<Tag> GetTags(HtmlDocument document)
{
var xpath = @"//span[@itemprop='keywords']/a";
var nodes = document.DocumentNode.SelectNodes(xpath);
return nodes.Select(node => new Tag
{
TagValue = node.InnerText
}).ToList();
}
private DateTime GetPostedDate(HtmlDocument document)
{
var xpath = @"//time[@itemprop='datePublished']";
return DateTime.Parse(document.DocumentNode.SelectSingleNode(xpath).InnerText);
}
private DateTime GetLastUpdatedDate(HtmlDocument document)
{
var xpath = @"//time[@itemprop='dateModified']";
return DateTime.Parse(document.DocumentNode.SelectSingleNode(xpath).InnerText);
}
public Novel ScrapeNovel(string url)
{
Novel novel = new Novel();
var web = new HtmlWeb();
var doc = web.Load(url);
if (doc == null)
{
throw new Exception("Error parsing document");
}
return new Novel
{
Author = GetAuthor(doc),
Chapters = GetChapters(doc),
DatePosted = GetPostedDate(doc),
LastUpdated = GetLastUpdatedDate(doc),
Tags = GetTags(doc),
Title = GetNovelTitle(doc),
Url = url
};
}
public string? ScrapeChapterContent(string chapterUrl)
{
throw new NotImplementedException();
}
}

View File

@@ -0,0 +1,6 @@
namespace WebNovelPortalAPI.TranslationEngines;
public interface ITranslationEngine
{
public string Translate(string text);
}

View File

@@ -1,12 +0,0 @@
namespace WebNovelPortalAPI;
public class WeatherForecast
{
public DateTime Date { get; set; }
public int TemperatureC { get; set; }
public int TemperatureF => 32 + (int) (TemperatureC / 0.5556);
public string? Summary { get; set; }
}

View File

@@ -7,6 +7,8 @@
</PropertyGroup>
<ItemGroup>
<PackageReference Include="HtmlAgilityPack" Version="1.11.43" />
<PackageReference Include="Microsoft.AspNetCore.Mvc.NewtonsoftJson" Version="6.0.7" />
<PackageReference Include="Microsoft.EntityFrameworkCore.Sqlite" Version="6.0.7" />
<PackageReference Include="Microsoft.EntityFrameworkCore.SqlServer" Version="6.0.7" />
<PackageReference Include="Microsoft.EntityFrameworkCore.Tools" Version="6.0.7">

View File

@@ -6,7 +6,7 @@
}
},
"ConnectionStrings": {
"DefaultConnection": "null"
"DefaultConnection": "Data Source=/home/m/Documents/WebNovelPortal/WebNovelPortalAPI/test_db"
},
"AllowedHosts": "*"
}