| | | 1 | | using System.Text.RegularExpressions; |
| | | 2 | | |
| | | 3 | | namespace Chronicis.Api.Services; |
| | | 4 | | |
| | | 5 | | /// <summary> |
| | | 6 | | /// Parses wiki-style links from article content using regex pattern matching. |
| | | 7 | | /// Supports both legacy markdown format and modern HTML span format. |
| | | 8 | | /// </summary> |
| | | 9 | | public class LinkParser : ILinkParser |
| | | 10 | | { |
| | | 11 | | // Legacy regex pattern to match [[guid]] or [[guid|display text]] |
| | | 12 | | // Guid format: 8-4-4-4-12 hex characters with dashes |
| | 1 | 13 | | private static readonly Regex LegacyLinkPattern = new( |
| | 1 | 14 | | @"\[\[([0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12})(?:\|([^\]]+))?\]\]", |
| | 1 | 15 | | RegexOptions.Compiled | RegexOptions.IgnoreCase |
| | 1 | 16 | | ); |
| | | 17 | | |
| | | 18 | | // HTML span pattern for TipTap wiki-link nodes |
| | | 19 | | // Matches: <span ... data-target-id="guid" ...>Display Text</span> |
| | | 20 | | // Uses single quotes in the pattern to avoid C# escaping issues |
| | 1 | 21 | | private static readonly Regex HtmlLinkPattern = new( |
| | 1 | 22 | | @"<span[^>]+data-target-id=""([0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12})""[^>] |
| | 1 | 23 | | RegexOptions.Compiled | RegexOptions.IgnoreCase |
| | 1 | 24 | | ); |
| | | 25 | | |
| | | 26 | | /// <summary> |
| | | 27 | | /// Extracts all wiki links from the given article body. |
| | | 28 | | /// Supports both legacy [[guid|text]] format and HTML span format. |
| | | 29 | | /// </summary> |
| | | 30 | | /// <param name="body">The article body to parse.</param> |
| | | 31 | | /// <returns>Collection of parsed links with target ID, display text, and position.</returns> |
| | | 32 | | public IEnumerable<ParsedLink> ParseLinks(string? body) |
| | | 33 | | { |
| | | 34 | | // Return empty if body is null or empty |
| | 31 | 35 | | if (string.IsNullOrEmpty(body)) |
| | | 36 | | { |
| | 3 | 37 | | return Enumerable.Empty<ParsedLink>(); |
| | | 38 | | } |
| | | 39 | | |
| | 28 | 40 | | var links = new List<ParsedLink>(); |
| | 28 | 41 | | var processedGuids = new HashSet<Guid>(); // Track unique links |
| | | 42 | | |
| | | 43 | | // Parse HTML span format (TipTap output) - check for marker first |
| | 28 | 44 | | if (body.Contains("data-target-id=")) |
| | | 45 | | { |
| | 8 | 46 | | ParseHtmlLinks(body, links, processedGuids); |
| | | 47 | | } |
| | | 48 | | |
| | | 49 | | // Parse legacy markdown format for backwards compatibility |
| | 28 | 50 | | if (body.Contains("[[")) |
| | | 51 | | { |
| | 19 | 52 | | ParseLegacyLinks(body, links, processedGuids); |
| | | 53 | | } |
| | | 54 | | |
| | 28 | 55 | | return links; |
| | | 56 | | } |
| | | 57 | | |
| | | 58 | | private static void ParseHtmlLinks(string body, List<ParsedLink> links, HashSet<Guid> processedGuids) |
| | | 59 | | { |
| | 8 | 60 | | var matches = HtmlLinkPattern.Matches(body); |
| | | 61 | | |
| | 34 | 62 | | foreach (Match match in matches) |
| | | 63 | | { |
| | 9 | 64 | | var guidString = match.Groups[1].Value; |
| | | 65 | | |
| | 9 | 66 | | if (!Guid.TryParse(guidString, out var targetArticleId)) |
| | | 67 | | { |
| | | 68 | | continue; |
| | | 69 | | } |
| | | 70 | | |
| | | 71 | | // Skip if we've already processed this target |
| | 9 | 72 | | if (!processedGuids.Add(targetArticleId)) |
| | | 73 | | { |
| | | 74 | | continue; |
| | | 75 | | } |
| | | 76 | | |
| | | 77 | | // Get display text and trim whitespace |
| | 9 | 78 | | var displayText = match.Groups[2].Success |
| | 9 | 79 | | ? match.Groups[2].Value.Trim() |
| | 9 | 80 | | : null; |
| | | 81 | | |
| | 9 | 82 | | var position = match.Index; |
| | | 83 | | |
| | 9 | 84 | | links.Add(new ParsedLink(targetArticleId, displayText, position)); |
| | | 85 | | } |
| | 8 | 86 | | } |
| | | 87 | | |
| | | 88 | | private static void ParseLegacyLinks(string body, List<ParsedLink> links, HashSet<Guid> processedGuids) |
| | | 89 | | { |
| | 19 | 90 | | var matches = LegacyLinkPattern.Matches(body); |
| | | 91 | | |
| | 82 | 92 | | foreach (Match match in matches) |
| | | 93 | | { |
| | 22 | 94 | | var guidString = match.Groups[1].Value; |
| | | 95 | | |
| | 22 | 96 | | if (!Guid.TryParse(guidString, out var targetArticleId)) |
| | | 97 | | { |
| | | 98 | | continue; |
| | | 99 | | } |
| | | 100 | | |
| | | 101 | | // Skip if we've already processed this target (from HTML parsing) |
| | 22 | 102 | | if (!processedGuids.Add(targetArticleId)) |
| | | 103 | | { |
| | | 104 | | continue; |
| | | 105 | | } |
| | | 106 | | |
| | 20 | 107 | | var displayText = match.Groups[2].Success |
| | 20 | 108 | | ? match.Groups[2].Value.Trim() |
| | 20 | 109 | | : null; |
| | | 110 | | |
| | 20 | 111 | | var position = match.Index; |
| | | 112 | | |
| | 20 | 113 | | links.Add(new ParsedLink(targetArticleId, displayText, position)); |
| | | 114 | | } |
| | 19 | 115 | | } |
| | | 116 | | } |