| | | 1 | | using System.Text.RegularExpressions; |
| | | 2 | | using Chronicis.Shared.Extensions; |
| | | 3 | | using Ganss.Xss; |
| | | 4 | | using Markdig; |
| | | 5 | | |
| | | 6 | | namespace Chronicis.Client.Services; |
| | | 7 | | |
| | | 8 | | /// <summary> |
| | | 9 | | /// Service for converting markdown to sanitized HTML |
| | | 10 | | /// </summary> |
| | | 11 | | public partial class MarkdownService : IMarkdownService |
| | | 12 | | { |
| | | 13 | | private readonly MarkdownPipeline _pipeline; |
| | | 14 | | private readonly HtmlSanitizer _sanitizer; |
| | | 15 | | private readonly ILogger<MarkdownService> _logger; |
| | | 16 | | |
| | | 17 | | [GeneratedRegex(@"<(p|h[1-6]|ul|ol|li|strong|em|a|pre|code|blockquote|div|span|br)[^>]*>", RegexOptions.Compiled | R |
| | | 18 | | private static partial Regex HtmlRegex(); |
| | | 19 | | |
| | 34 | 20 | | public MarkdownService(ILogger<MarkdownService> logger) |
| | | 21 | | { |
| | 34 | 22 | | _logger = logger; |
| | | 23 | | |
| | | 24 | | // Configure Markdig pipeline with extensions |
| | 34 | 25 | | _pipeline = new MarkdownPipelineBuilder() |
| | 34 | 26 | | .UseAdvancedExtensions() // Tables, task lists, etc. |
| | 34 | 27 | | .UseEmojiAndSmiley() // :smile: syntax |
| | 34 | 28 | | .UsePipeTables() // GitHub-style tables |
| | 34 | 29 | | .UseGridTables() // Complex tables |
| | 34 | 30 | | .UseAutoLinks() // Auto-detect URLs |
| | 34 | 31 | | .UseGenericAttributes() // Add CSS classes |
| | 34 | 32 | | .Build(); |
| | | 33 | | |
| | | 34 | | // Configure HTML sanitizer to prevent XSS |
| | 34 | 35 | | _sanitizer = new HtmlSanitizer(); |
| | | 36 | | |
| | | 37 | | // Allow common markdown HTML elements |
| | 34 | 38 | | _sanitizer.AllowedTags.Add("h1"); |
| | 34 | 39 | | _sanitizer.AllowedTags.Add("h2"); |
| | 34 | 40 | | _sanitizer.AllowedTags.Add("h3"); |
| | 34 | 41 | | _sanitizer.AllowedTags.Add("h4"); |
| | 34 | 42 | | _sanitizer.AllowedTags.Add("h5"); |
| | 34 | 43 | | _sanitizer.AllowedTags.Add("h6"); |
| | 34 | 44 | | _sanitizer.AllowedTags.Add("table"); |
| | 34 | 45 | | _sanitizer.AllowedTags.Add("thead"); |
| | 34 | 46 | | _sanitizer.AllowedTags.Add("tbody"); |
| | 34 | 47 | | _sanitizer.AllowedTags.Add("tr"); |
| | 34 | 48 | | _sanitizer.AllowedTags.Add("th"); |
| | 34 | 49 | | _sanitizer.AllowedTags.Add("td"); |
| | 34 | 50 | | _sanitizer.AllowedTags.Add("img"); |
| | 34 | 51 | | _sanitizer.AllowedTags.Add("code"); |
| | 34 | 52 | | _sanitizer.AllowedTags.Add("pre"); |
| | 34 | 53 | | _sanitizer.AllowedTags.Add("blockquote"); |
| | 34 | 54 | | _sanitizer.AllowedTags.Add("del"); |
| | 34 | 55 | | _sanitizer.AllowedTags.Add("ins"); |
| | | 56 | | |
| | | 57 | | // Allow necessary attributes |
| | 34 | 58 | | _sanitizer.AllowedAttributes.Add("class"); |
| | 34 | 59 | | _sanitizer.AllowedAttributes.Add("src"); |
| | 34 | 60 | | _sanitizer.AllowedAttributes.Add("alt"); |
| | 34 | 61 | | _sanitizer.AllowedAttributes.Add("href"); |
| | 34 | 62 | | _sanitizer.AllowedAttributes.Add("title"); |
| | | 63 | | |
| | | 64 | | // Allow data attributes for syntax highlighting |
| | 34 | 65 | | _sanitizer.AllowDataAttributes = true; |
| | 34 | 66 | | } |
| | | 67 | | |
| | | 68 | | /// <summary> |
| | | 69 | | /// Convert markdown text to sanitized HTML |
| | | 70 | | /// </summary> |
| | | 71 | | public string ToHtml(string markdown) |
| | | 72 | | { |
| | 13 | 73 | | if (string.IsNullOrWhiteSpace(markdown)) |
| | 3 | 74 | | return string.Empty; |
| | | 75 | | |
| | | 76 | | try |
| | | 77 | | { |
| | | 78 | | // Convert markdown to HTML |
| | 10 | 79 | | var html = Markdown.ToHtml(markdown, _pipeline); |
| | | 80 | | |
| | | 81 | | // Sanitize to prevent XSS |
| | 10 | 82 | | return _sanitizer.Sanitize(html); |
| | | 83 | | } |
| | 0 | 84 | | catch (Exception ex) |
| | | 85 | | { |
| | | 86 | | // Log error and return escaped text as fallback |
| | 0 | 87 | | _logger.LogErrorSanitized(ex, "Error converting markdown to HTML"); |
| | 0 | 88 | | return $"<p>{System.Net.WebUtility.HtmlEncode(markdown)}</p>"; |
| | | 89 | | } |
| | 10 | 90 | | } |
| | | 91 | | |
| | | 92 | | /// <summary> |
| | | 93 | | /// Convert markdown to plain text (strip formatting) |
| | | 94 | | /// </summary> |
| | | 95 | | public string ToPlainText(string markdown) |
| | | 96 | | { |
| | 8 | 97 | | if (string.IsNullOrWhiteSpace(markdown)) |
| | 2 | 98 | | return string.Empty; |
| | | 99 | | |
| | | 100 | | try |
| | | 101 | | { |
| | 6 | 102 | | var html = Markdown.ToPlainText(markdown, _pipeline); |
| | 6 | 103 | | return html; |
| | | 104 | | } |
| | 0 | 105 | | catch |
| | | 106 | | { |
| | 0 | 107 | | return markdown; |
| | | 108 | | } |
| | 6 | 109 | | } |
| | | 110 | | |
| | | 111 | | /// <summary> |
| | | 112 | | /// Get a preview of the markdown (first N characters as plain text) |
| | | 113 | | /// </summary> |
| | | 114 | | public string GetPreview(string markdown, int maxLength = 200) |
| | | 115 | | { |
| | 4 | 116 | | var plainText = ToPlainText(markdown); |
| | | 117 | | |
| | 4 | 118 | | if (plainText.Length <= maxLength) |
| | 2 | 119 | | return plainText; |
| | | 120 | | |
| | 2 | 121 | | return string.Concat(plainText.AsSpan(0, maxLength), "..."); |
| | | 122 | | } |
| | | 123 | | |
| | | 124 | | /// <summary> |
| | | 125 | | /// Detects if content is HTML (vs markdown). |
| | | 126 | | /// HTML from TipTap will have tags like p, h1, ul, etc. |
| | | 127 | | /// Markdown will have #, *, -, etc. without HTML tags. |
| | | 128 | | /// </summary> |
| | | 129 | | public bool IsHtml(string content) |
| | | 130 | | { |
| | 13 | 131 | | if (string.IsNullOrWhiteSpace(content)) |
| | 2 | 132 | | return false; |
| | | 133 | | |
| | | 134 | | // Check for common HTML tags that TipTap produces |
| | 11 | 135 | | return HtmlRegex().IsMatch(content); |
| | | 136 | | } |
| | | 137 | | |
| | | 138 | | /// <summary> |
| | | 139 | | /// Ensures content is HTML. If content appears to be markdown, converts it to HTML. |
| | | 140 | | /// If content is already HTML, returns it as-is (after sanitization). |
| | | 141 | | /// </summary> |
| | | 142 | | public string EnsureHtml(string content) |
| | | 143 | | { |
| | 6 | 144 | | if (string.IsNullOrWhiteSpace(content)) |
| | 2 | 145 | | return string.Empty; |
| | | 146 | | |
| | 4 | 147 | | if (IsHtml(content)) |
| | | 148 | | { |
| | | 149 | | // Already HTML - just sanitize and return |
| | 2 | 150 | | return _sanitizer.Sanitize(content); |
| | | 151 | | } |
| | | 152 | | |
| | | 153 | | // Content appears to be markdown - convert to HTML |
| | 2 | 154 | | return ToHtml(content); |
| | | 155 | | } |
| | | 156 | | } |