| | | 1 | | using System.Text.RegularExpressions; |
| | | 2 | | using Chronicis.Api.Data; |
| | | 3 | | using Chronicis.Shared.DTOs; |
| | | 4 | | using Microsoft.EntityFrameworkCore; |
| | | 5 | | |
| | | 6 | | namespace Chronicis.Api.Services; |
| | | 7 | | |
| | | 8 | | /// <summary> |
| | | 9 | | /// Service for automatically detecting and inserting wiki links in article content. |
| | | 10 | | /// </summary> |
| | | 11 | | public interface IAutoLinkService |
| | | 12 | | { |
| | | 13 | | /// <summary> |
| | | 14 | | /// Scans article content and returns match positions for wiki links. |
| | | 15 | | /// The client uses these positions to insert links via TipTap. |
| | | 16 | | /// </summary> |
| | | 17 | | /// <param name="articleId">The article being edited (to exclude from matches).</param> |
| | | 18 | | /// <param name="worldId">The world to search for matching articles.</param> |
| | | 19 | | /// <param name="body">The article body content (HTML) to scan.</param> |
| | | 20 | | /// <param name="userId">The user ID for scoping.</param> |
| | | 21 | | /// <returns>Response containing match positions and details.</returns> |
| | | 22 | | Task<AutoLinkResponseDto> FindLinksAsync(Guid articleId, Guid worldId, string body, Guid userId); |
| | | 23 | | } |
| | | 24 | | |
| | | 25 | | /// <summary> |
| | | 26 | | /// Implementation of auto-link service. |
| | | 27 | | /// Works on HTML content and returns match positions for client-side insertion. |
| | | 28 | | /// </summary> |
| | | 29 | | public class AutoLinkService : IAutoLinkService |
| | | 30 | | { |
| | | 31 | | private readonly ChronicisDbContext _context; |
| | | 32 | | private readonly ILogger<AutoLinkService> _logger; |
| | | 33 | | |
| | | 34 | | // Regex to find existing wiki-link spans in HTML |
| | | 35 | | // Matches: <span data-type="wiki-link" ... >...</span> |
| | 0 | 36 | | private static readonly Regex ExistingWikiLinkPattern = new( |
| | 0 | 37 | | @"<span[^>]*data-type=""wiki-link""[^>]*>.*?</span>", |
| | 0 | 38 | | RegexOptions.Compiled | RegexOptions.Singleline); |
| | | 39 | | |
| | | 40 | | // Regex to find existing external-link spans in HTML |
| | 0 | 41 | | private static readonly Regex ExistingExternalLinkPattern = new( |
| | 0 | 42 | | @"<span[^>]*data-type=""external-link""[^>]*>.*?</span>", |
| | 0 | 43 | | RegexOptions.Compiled | RegexOptions.Singleline); |
| | | 44 | | |
| | | 45 | | // Regex to find HTML tags (to avoid matching inside them) |
| | 0 | 46 | | private static readonly Regex HtmlTagPattern = new( |
| | 0 | 47 | | @"<[^>]+>", |
| | 0 | 48 | | RegexOptions.Compiled); |
| | | 49 | | |
| | | 50 | | // Legacy markdown wiki link pattern (for backwards compatibility) |
| | 0 | 51 | | private static readonly Regex LegacyWikiLinkPattern = new( |
| | 0 | 52 | | @"\[\[([a-fA-F0-9\-]{36})(?:\|([^\]]+))?\]\]", |
| | 0 | 53 | | RegexOptions.Compiled); |
| | | 54 | | |
| | 0 | 55 | | public AutoLinkService(ChronicisDbContext context, ILogger<AutoLinkService> logger) |
| | | 56 | | { |
| | 0 | 57 | | _context = context; |
| | 0 | 58 | | _logger = logger; |
| | 0 | 59 | | } |
| | | 60 | | |
| | | 61 | | public async Task<AutoLinkResponseDto> FindLinksAsync( |
| | | 62 | | Guid articleId, |
| | | 63 | | Guid worldId, |
| | | 64 | | string body, |
| | | 65 | | Guid userId) |
| | | 66 | | { |
| | 0 | 67 | | if (string.IsNullOrWhiteSpace(body)) |
| | | 68 | | { |
| | 0 | 69 | | return new AutoLinkResponseDto |
| | 0 | 70 | | { |
| | 0 | 71 | | LinksFound = 0, |
| | 0 | 72 | | Matches = new List<AutoLinkMatchDto>() |
| | 0 | 73 | | }; |
| | | 74 | | } |
| | | 75 | | |
| | | 76 | | // Get all articles in this world that could be linked to (with their aliases) |
| | 0 | 77 | | var linkableArticles = await ( |
| | 0 | 78 | | from a in _context.Articles |
| | 0 | 79 | | join wm in _context.WorldMembers on a.WorldId equals wm.WorldId |
| | 0 | 80 | | where wm.UserId == userId |
| | 0 | 81 | | where a.WorldId == worldId |
| | 0 | 82 | | where a.Id != articleId |
| | 0 | 83 | | where !string.IsNullOrEmpty(a.Title) |
| | 0 | 84 | | select new |
| | 0 | 85 | | { |
| | 0 | 86 | | a.Id, |
| | 0 | 87 | | a.Title, |
| | 0 | 88 | | Aliases = a.Aliases.Select(al => al.AliasText).ToList() |
| | 0 | 89 | | } |
| | 0 | 90 | | ).ToListAsync(); |
| | | 91 | | |
| | 0 | 92 | | if (!linkableArticles.Any()) |
| | | 93 | | { |
| | 0 | 94 | | return new AutoLinkResponseDto |
| | 0 | 95 | | { |
| | 0 | 96 | | LinksFound = 0, |
| | 0 | 97 | | Matches = new List<AutoLinkMatchDto>() |
| | 0 | 98 | | }; |
| | | 99 | | } |
| | | 100 | | |
| | | 101 | | // Build protected ranges (areas we should not match in) |
| | 0 | 102 | | var protectedRanges = GetProtectedRanges(body); |
| | | 103 | | |
| | | 104 | | // Build a list of all searchable terms (titles + aliases) with their article info |
| | | 105 | | // Each term knows whether it's an alias or the canonical title |
| | 0 | 106 | | var searchTerms = new List<(string Term, Guid ArticleId, string ArticleTitle, bool IsAlias)>(); |
| | | 107 | | |
| | 0 | 108 | | foreach (var article in linkableArticles) |
| | | 109 | | { |
| | | 110 | | // Add the title |
| | 0 | 111 | | searchTerms.Add((article.Title, article.Id, article.Title, false)); |
| | | 112 | | |
| | | 113 | | // Add all aliases |
| | 0 | 114 | | foreach (var alias in article.Aliases) |
| | | 115 | | { |
| | 0 | 116 | | if (!string.IsNullOrWhiteSpace(alias)) |
| | | 117 | | { |
| | 0 | 118 | | searchTerms.Add((alias, article.Id, article.Title, true)); |
| | | 119 | | } |
| | | 120 | | } |
| | | 121 | | } |
| | | 122 | | |
| | | 123 | | // Sort by term length descending so we match longer terms first |
| | | 124 | | // This prevents "Water" from matching before "Waterdeep" |
| | 0 | 125 | | var sortedTerms = searchTerms |
| | 0 | 126 | | .OrderByDescending(t => t.Term.Length) |
| | 0 | 127 | | .ToList(); |
| | | 128 | | |
| | 0 | 129 | | var allMatches = new List<AutoLinkMatchDto>(); |
| | 0 | 130 | | var usedRanges = new List<(int Start, int End)>(); // Track ranges we've already matched |
| | | 131 | | |
| | 0 | 132 | | foreach (var term in sortedTerms) |
| | | 133 | | { |
| | | 134 | | // Build regex for whole-word, case-insensitive match |
| | 0 | 135 | | var escapedTerm = Regex.Escape(term.Term); |
| | 0 | 136 | | var pattern = $@"\b{escapedTerm}\b"; |
| | | 137 | | |
| | | 138 | | try |
| | | 139 | | { |
| | 0 | 140 | | var regex = new Regex(pattern, RegexOptions.IgnoreCase); |
| | 0 | 141 | | var regexMatches = regex.Matches(body); |
| | | 142 | | |
| | 0 | 143 | | foreach (Match match in regexMatches) |
| | | 144 | | { |
| | | 145 | | // Skip if this position is in a protected range (HTML tag, existing link, etc.) |
| | 0 | 146 | | if (IsInProtectedRange(match.Index, match.Length, protectedRanges)) |
| | | 147 | | { |
| | | 148 | | continue; |
| | | 149 | | } |
| | | 150 | | |
| | | 151 | | // Skip if this position overlaps with an already-matched range |
| | 0 | 152 | | if (IsInProtectedRange(match.Index, match.Length, usedRanges)) |
| | | 153 | | { |
| | | 154 | | continue; |
| | | 155 | | } |
| | | 156 | | |
| | | 157 | | // Valid match - record it |
| | 0 | 158 | | allMatches.Add(new AutoLinkMatchDto |
| | 0 | 159 | | { |
| | 0 | 160 | | MatchedText = match.Value, |
| | 0 | 161 | | ArticleTitle = term.ArticleTitle, |
| | 0 | 162 | | ArticleId = term.ArticleId, |
| | 0 | 163 | | StartIndex = match.Index, |
| | 0 | 164 | | EndIndex = match.Index + match.Length, |
| | 0 | 165 | | IsAliasMatch = term.IsAlias |
| | 0 | 166 | | }); |
| | | 167 | | |
| | | 168 | | // Mark this range as used so shorter terms don't match within it |
| | 0 | 169 | | usedRanges.Add((match.Index, match.Index + match.Length)); |
| | | 170 | | } |
| | 0 | 171 | | } |
| | 0 | 172 | | catch (Exception ex) |
| | | 173 | | { |
| | 0 | 174 | | _logger.LogWarning(ex, "Failed to create regex for term: {Term}", term.Term); |
| | 0 | 175 | | } |
| | | 176 | | } |
| | | 177 | | |
| | | 178 | | // Sort matches by position for consistent display in confirmation dialog |
| | 0 | 179 | | allMatches = allMatches.OrderBy(m => m.StartIndex).ToList(); |
| | | 180 | | |
| | 0 | 181 | | _logger.LogDebug( |
| | 0 | 182 | | "Auto-link found {Count} matches for article {ArticleId}", |
| | 0 | 183 | | allMatches.Count, |
| | 0 | 184 | | articleId); |
| | | 185 | | |
| | 0 | 186 | | return new AutoLinkResponseDto |
| | 0 | 187 | | { |
| | 0 | 188 | | LinksFound = allMatches.Count, |
| | 0 | 189 | | Matches = allMatches |
| | 0 | 190 | | }; |
| | 0 | 191 | | } |
| | | 192 | | |
| | | 193 | | /// <summary> |
| | | 194 | | /// Gets ranges in the content that should not be matched: |
| | | 195 | | /// - HTML tags |
| | | 196 | | /// - Existing wiki-link spans |
| | | 197 | | /// - Existing external-link spans |
| | | 198 | | /// - Legacy markdown wiki links |
| | | 199 | | /// </summary> |
| | | 200 | | private List<(int Start, int End)> GetProtectedRanges(string body) |
| | | 201 | | { |
| | 0 | 202 | | var ranges = new List<(int Start, int End)>(); |
| | | 203 | | |
| | | 204 | | // Protect HTML tags |
| | 0 | 205 | | foreach (Match match in HtmlTagPattern.Matches(body)) |
| | | 206 | | { |
| | 0 | 207 | | ranges.Add((match.Index, match.Index + match.Length)); |
| | | 208 | | } |
| | | 209 | | |
| | | 210 | | // Protect existing wiki-link spans (entire span including content) |
| | 0 | 211 | | foreach (Match match in ExistingWikiLinkPattern.Matches(body)) |
| | | 212 | | { |
| | 0 | 213 | | ranges.Add((match.Index, match.Index + match.Length)); |
| | | 214 | | } |
| | | 215 | | |
| | | 216 | | // Protect existing external-link spans |
| | 0 | 217 | | foreach (Match match in ExistingExternalLinkPattern.Matches(body)) |
| | | 218 | | { |
| | 0 | 219 | | ranges.Add((match.Index, match.Index + match.Length)); |
| | | 220 | | } |
| | | 221 | | |
| | | 222 | | // Protect legacy markdown wiki links (for mixed content) |
| | 0 | 223 | | foreach (Match match in LegacyWikiLinkPattern.Matches(body)) |
| | | 224 | | { |
| | 0 | 225 | | ranges.Add((match.Index, match.Index + match.Length)); |
| | | 226 | | } |
| | | 227 | | |
| | 0 | 228 | | return ranges; |
| | | 229 | | } |
| | | 230 | | |
| | | 231 | | /// <summary> |
| | | 232 | | /// Checks if a position falls within any protected range. |
| | | 233 | | /// </summary> |
| | | 234 | | private bool IsInProtectedRange(int index, int length, List<(int Start, int End)> ranges) |
| | | 235 | | { |
| | 0 | 236 | | foreach (var range in ranges) |
| | | 237 | | { |
| | | 238 | | // Check if any part of the match overlaps with the protected range |
| | 0 | 239 | | if (index < range.End && index + length > range.Start) |
| | | 240 | | { |
| | 0 | 241 | | return true; |
| | | 242 | | } |
| | | 243 | | } |
| | 0 | 244 | | return false; |
| | 0 | 245 | | } |
| | | 246 | | } |