import DOMPurify from 'isomorphic-dompurify';
/**
* Splits plain text into paragraphs based on newlines.
* Double newlines create new paragraphs, single newlines become
.
* If the content already contains HTML block elements, returns as-is.
*/
function wrapInParagraphs(text: string): string {
// Check if content already has block-level HTML elements
const hasBlockElements = /<(p|div|h[1-6]|ul|ol|blockquote|pre|table|hr)[>\s]/i.test(text);
if (hasBlockElements) {
return text;
}
// Split on double newlines (paragraph breaks)
const paragraphs = text.split(/\n\s*\n/);
return paragraphs
.map((para) => {
const trimmed = para.trim();
if (!trimmed) return '';
// Convert single newlines to
within paragraphs
const withBreaks = trimmed.replace(/\n/g, '
');
return `
${withBreaks}
`; }) .filter(Boolean) .join('\n'); } /** * Sanitizes chapter HTML content with extended allowed tags. * More permissive than the description sanitizer to support * formatted novel content including headings, lists, and images. * Also wraps plain text in paragraph tags for better browser translate support. */ export function sanitizeChapterHtml(html: string): string { // First wrap in paragraphs if needed, then sanitize const wrapped = wrapInParagraphs(html); return DOMPurify.sanitize(wrapped, { ALLOWED_TAGS: [ // Basic formatting 'b', 'i', 'em', 'strong', 'u', 's', 'strike', 'del', 'ins', // Structure 'p', 'br', 'hr', 'div', 'span', // Headings 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', // Lists 'ul', 'ol', 'li', // Quotes 'blockquote', 'q', 'cite', // Preformatted 'pre', 'code', // Ruby (for Asian language annotations) 'ruby', 'rt', 'rp', // Images 'img', // Tables 'table', 'thead', 'tbody', 'tr', 'th', 'td' ], ALLOWED_ATTR: [ // Image attributes 'src', 'alt', 'title', 'width', 'height', // Table attributes 'colspan', 'rowspan', // Generic styling (limited) 'class' ], ALLOW_DATA_ATTR: false }); }