Files
FictionArchive/fictionarchive-web-astro/src/lib/utils/sanitizeChapter.ts
gamer147 4fb34bdef7
All checks were successful
CI / build-backend (pull_request) Successful in 1m21s
CI / build-frontend (pull_request) Successful in 39s
[FA-misc] Should fix paragraph blocking
2025-12-10 13:40:33 -05:00

105 lines
2.1 KiB
TypeScript

import DOMPurify from 'isomorphic-dompurify';
/**
* Splits plain text into paragraphs based on newlines.
* Double newlines create new paragraphs, single newlines become <br>.
* If the content already contains HTML block elements, returns as-is.
*/
function wrapInParagraphs(text: string): string {
// Check if content already has block-level HTML elements
const hasBlockElements = /<(p|div|h[1-6]|ul|ol|blockquote|pre|table|hr)[>\s]/i.test(text);
if (hasBlockElements) {
return text;
}
// Split on double newlines (paragraph breaks)
const paragraphs = text.split(/\n\s*\n/);
return paragraphs
.map((para) => {
const trimmed = para.trim();
if (!trimmed) return '';
// Convert single newlines to <br> within paragraphs
const withBreaks = trimmed.replace(/\n/g, '<br>');
return `<p>${withBreaks}</p>`;
})
.filter(Boolean)
.join('\n');
}
/**
* Sanitizes chapter HTML content with extended allowed tags.
* More permissive than the description sanitizer to support
* formatted novel content including headings, lists, and images.
* Also wraps plain text in paragraph tags for better browser translate support.
*/
export function sanitizeChapterHtml(html: string): string {
// First wrap in paragraphs if needed, then sanitize
const wrapped = wrapInParagraphs(html);
return DOMPurify.sanitize(wrapped, {
ALLOWED_TAGS: [
// Basic formatting
'b',
'i',
'em',
'strong',
'u',
's',
'strike',
'del',
'ins',
// Structure
'p',
'br',
'hr',
'div',
'span',
// Headings
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
// Lists
'ul',
'ol',
'li',
// Quotes
'blockquote',
'q',
'cite',
// Preformatted
'pre',
'code',
// Ruby (for Asian language annotations)
'ruby',
'rt',
'rp',
// Images
'img',
// Tables
'table',
'thead',
'tbody',
'tr',
'th',
'td'
],
ALLOWED_ATTR: [
// Image attributes
'src',
'alt',
'title',
'width',
'height',
// Table attributes
'colspan',
'rowspan',
// Generic styling (limited)
'class'
],
ALLOW_DATA_ATTR: false
});
}