| | | 1 | | using System.Text.RegularExpressions; |
| | | 2 | | |
| | | 3 | | namespace Chronicis.Api.Services; |
| | | 4 | | |
| | | 5 | | /// <summary> |
| | | 6 | | /// Parses wiki-style links from article content using regex pattern matching. |
| | | 7 | | /// Supports both legacy markdown format and modern HTML span format. |
| | | 8 | | /// </summary> |
| | | 9 | | public sealed partial class LinkParser : ILinkParser |
| | | 10 | | { |
| | | 11 | | |
| | | 12 | | /// <summary> |
| | | 13 | | /// Extracts all wiki links from the given article body. |
| | | 14 | | /// Supports both legacy [[guid|text]] format and HTML span format. |
| | | 15 | | /// </summary> |
| | | 16 | | /// <param name="body">The article body to parse.</param> |
| | | 17 | | /// <returns>Collection of parsed links with target ID, display text, and position.</returns> |
| | | 18 | | public IEnumerable<ParsedLink> ParseLinks(string? body) |
| | | 19 | | { |
| | | 20 | | // Return empty if body is null or empty |
| | 33 | 21 | | if (string.IsNullOrEmpty(body)) |
| | | 22 | | { |
| | 3 | 23 | | return Enumerable.Empty<ParsedLink>(); |
| | | 24 | | } |
| | | 25 | | |
| | 30 | 26 | | var links = new List<ParsedLink>(); |
| | 30 | 27 | | var processedGuids = new HashSet<Guid>(); // Track unique links |
| | | 28 | | |
| | | 29 | | // Parse HTML span format (TipTap output) - check for marker first |
| | 30 | 30 | | if (body.Contains("data-target-id=", StringComparison.Ordinal)) |
| | | 31 | | { |
| | 9 | 32 | | ParseHtmlLinks(body, links, processedGuids); |
| | | 33 | | } |
| | | 34 | | |
| | | 35 | | // Parse legacy markdown format for backwards compatibility |
| | 30 | 36 | | if (body.Contains("[[", StringComparison.Ordinal)) |
| | | 37 | | { |
| | 20 | 38 | | ParseLegacyLinks(body, links, processedGuids); |
| | | 39 | | } |
| | | 40 | | |
| | 30 | 41 | | return links; |
| | | 42 | | } |
| | | 43 | | |
| | | 44 | | private static void ParseHtmlLinks(string body, List<ParsedLink> links, HashSet<Guid> processedGuids) |
| | | 45 | | { |
| | 9 | 46 | | var matches = HtmlLinkRegex().Matches(body); |
| | | 47 | | |
| | 40 | 48 | | foreach (Match match in matches) |
| | | 49 | | { |
| | 11 | 50 | | var targetArticleId = Guid.Parse(match.Groups[1].Value); |
| | | 51 | | |
| | | 52 | | // Skip if we've already processed this target |
| | 11 | 53 | | if (!processedGuids.Add(targetArticleId)) |
| | | 54 | | { |
| | | 55 | | continue; |
| | | 56 | | } |
| | | 57 | | |
| | | 58 | | // Get display text and trim whitespace |
| | 10 | 59 | | var displayText = match.Groups[2].Value.Trim(); |
| | | 60 | | |
| | 10 | 61 | | var position = match.Index; |
| | | 62 | | |
| | 10 | 63 | | links.Add(new ParsedLink(targetArticleId, displayText, position)); |
| | | 64 | | } |
| | 9 | 65 | | } |
| | | 66 | | |
| | | 67 | | [GeneratedRegex(@"<span[^>]+data-target-id=""([0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F |
| | | 68 | | private static partial Regex HtmlLinkRegex(); |
| | | 69 | | |
| | | 70 | | [GeneratedRegex(@"\[\[([0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12})(?:\|([^\]]+))?\] |
| | | 71 | | private static partial Regex LegacyLinkRegex(); |
| | | 72 | | |
| | | 73 | | private static void ParseLegacyLinks(string body, List<ParsedLink> links, HashSet<Guid> processedGuids) |
| | | 74 | | { |
| | 20 | 75 | | var matches = LegacyLinkRegex().Matches(body); |
| | | 76 | | |
| | 86 | 77 | | foreach (Match match in matches) |
| | | 78 | | { |
| | 23 | 79 | | var targetArticleId = Guid.Parse(match.Groups[1].Value); |
| | | 80 | | |
| | | 81 | | // Skip if we've already processed this target (from HTML parsing) |
| | 23 | 82 | | if (!processedGuids.Add(targetArticleId)) |
| | | 83 | | { |
| | | 84 | | continue; |
| | | 85 | | } |
| | | 86 | | |
| | 21 | 87 | | var displayText = match.Groups[2].Success |
| | 21 | 88 | | ? match.Groups[2].Value.Trim() |
| | 21 | 89 | | : null; |
| | | 90 | | |
| | 21 | 91 | | var position = match.Index; |
| | | 92 | | |
| | 21 | 93 | | links.Add(new ParsedLink(targetArticleId, displayText, position)); |
| | | 94 | | } |
| | 20 | 95 | | } |
| | | 96 | | } |