| | | 1 | | using System.Text.RegularExpressions; |
| | | 2 | | using Chronicis.Api.Data; |
| | | 3 | | using Chronicis.Shared.DTOs; |
| | | 4 | | using Chronicis.Shared.Models; |
| | | 5 | | using Microsoft.EntityFrameworkCore; |
| | | 6 | | |
| | | 7 | | namespace Chronicis.Api.Services.Articles; |
| | | 8 | | |
| | | 9 | | /// <summary> |
| | | 10 | | /// Service for managing external resource links embedded in article content. |
| | | 11 | | /// </summary> |
| | | 12 | | public sealed partial class ArticleExternalLinkService : IArticleExternalLinkService |
| | | 13 | | { |
| | | 14 | | private readonly ChronicisDbContext _context; |
| | | 15 | | private readonly ILogger<ArticleExternalLinkService> _logger; |
| | | 16 | | |
| | | 17 | | public ArticleExternalLinkService( |
| | | 18 | | ChronicisDbContext context, |
| | | 19 | | ILogger<ArticleExternalLinkService> logger) |
| | | 20 | | { |
| | 18 | 21 | | _context = context; |
| | 18 | 22 | | _logger = logger; |
| | 18 | 23 | | } |
| | | 24 | | |
| | | 25 | | /// <summary> |
| | | 26 | | /// Regex to find span elements with data-type="external-link". |
| | | 27 | | /// Captures the full attribute block so individual data-* attributes can be extracted. |
| | | 28 | | /// </summary> |
| | | 29 | | [GeneratedRegex( |
| | | 30 | | @"<span\s([^>]*data-type=""external-link""[^>]*)>", |
| | | 31 | | RegexOptions.IgnoreCase | RegexOptions.Compiled)] |
| | | 32 | | private static partial Regex ExternalLinkSpanRegex(); |
| | | 33 | | |
| | | 34 | | /// <summary>Extracts data-source value from an attribute string.</summary> |
| | | 35 | | [GeneratedRegex(@"data-source=""([^""]*)""", RegexOptions.IgnoreCase | RegexOptions.Compiled)] |
| | | 36 | | private static partial Regex DataSourceRegex(); |
| | | 37 | | |
| | | 38 | | /// <summary>Extracts data-id value from an attribute string.</summary> |
| | | 39 | | [GeneratedRegex(@"data-id=""([^""]*)""", RegexOptions.IgnoreCase | RegexOptions.Compiled)] |
| | | 40 | | private static partial Regex DataIdRegex(); |
| | | 41 | | |
| | | 42 | | /// <summary>Extracts data-title value from an attribute string.</summary> |
| | | 43 | | [GeneratedRegex(@"data-title=""([^""]*)""", RegexOptions.IgnoreCase | RegexOptions.Compiled)] |
| | | 44 | | private static partial Regex DataTitleRegex(); |
| | | 45 | | |
| | | 46 | | public async Task SyncExternalLinksAsync(Guid articleId, string? htmlContent) |
| | | 47 | | { |
| | | 48 | | try |
| | | 49 | | { |
| | | 50 | | // Extract external links from HTML |
| | | 51 | | var extractedLinks = ExtractExternalLinksFromHtml(htmlContent); |
| | | 52 | | |
| | | 53 | | _logger.LogTraceSanitized( |
| | | 54 | | "Extracted {Count} external links from article {ArticleId}", |
| | | 55 | | extractedLinks.Count, |
| | | 56 | | articleId); |
| | | 57 | | |
| | | 58 | | // Delete all existing external links for this article |
| | | 59 | | var existingLinks = await _context.ArticleExternalLinks |
| | | 60 | | .Where(ael => ael.ArticleId == articleId) |
| | | 61 | | .ToListAsync(); |
| | | 62 | | |
| | | 63 | | if (existingLinks.Any()) |
| | | 64 | | { |
| | | 65 | | _context.ArticleExternalLinks.RemoveRange(existingLinks); |
| | | 66 | | _logger.LogTraceSanitized( |
| | | 67 | | "Removed {Count} existing external links for article {ArticleId}", |
| | | 68 | | existingLinks.Count, |
| | | 69 | | articleId); |
| | | 70 | | } |
| | | 71 | | |
| | | 72 | | // Insert new external links |
| | | 73 | | if (extractedLinks.Any()) |
| | | 74 | | { |
| | | 75 | | var newLinks = extractedLinks.Select(link => new ArticleExternalLink |
| | | 76 | | { |
| | | 77 | | Id = Guid.NewGuid(), |
| | | 78 | | ArticleId = articleId, |
| | | 79 | | Source = link.Source, |
| | | 80 | | ExternalId = link.ExternalId, |
| | | 81 | | DisplayTitle = link.DisplayTitle |
| | | 82 | | }).ToList(); |
| | | 83 | | |
| | | 84 | | await _context.ArticleExternalLinks.AddRangeAsync(newLinks); |
| | | 85 | | |
| | | 86 | | _logger.LogTraceSanitized( |
| | | 87 | | "Added {Count} new external links for article {ArticleId}", |
| | | 88 | | newLinks.Count, |
| | | 89 | | articleId); |
| | | 90 | | } |
| | | 91 | | |
| | | 92 | | await _context.SaveChangesAsync(); |
| | | 93 | | } |
| | | 94 | | catch (Exception ex) |
| | | 95 | | { |
| | | 96 | | _logger.LogErrorSanitized( |
| | | 97 | | ex, |
| | | 98 | | "Error syncing external links for article {ArticleId}", |
| | | 99 | | articleId); |
| | | 100 | | throw; |
| | | 101 | | } |
| | | 102 | | } |
| | | 103 | | |
| | | 104 | | public async Task<List<ArticleExternalLinkDto>> GetExternalLinksForArticleAsync(Guid articleId) |
| | | 105 | | { |
| | | 106 | | try |
| | | 107 | | { |
| | | 108 | | var links = await _context.ArticleExternalLinks |
| | | 109 | | .Where(ael => ael.ArticleId == articleId) |
| | | 110 | | .OrderBy(ael => ael.Source) |
| | | 111 | | .ThenBy(ael => ael.DisplayTitle) |
| | | 112 | | .Select(ael => new ArticleExternalLinkDto |
| | | 113 | | { |
| | | 114 | | Id = ael.Id, |
| | | 115 | | ArticleId = ael.ArticleId, |
| | | 116 | | Source = ael.Source, |
| | | 117 | | ExternalId = ael.ExternalId, |
| | | 118 | | DisplayTitle = ael.DisplayTitle |
| | | 119 | | }) |
| | | 120 | | .ToListAsync(); |
| | | 121 | | |
| | | 122 | | _logger.LogTraceSanitized( |
| | | 123 | | "Retrieved {Count} external links for article {ArticleId}", |
| | | 124 | | links.Count, |
| | | 125 | | articleId); |
| | | 126 | | |
| | | 127 | | return links; |
| | | 128 | | } |
| | | 129 | | catch (Exception ex) |
| | | 130 | | { |
| | | 131 | | _logger.LogErrorSanitized( |
| | | 132 | | ex, |
| | | 133 | | "Error retrieving external links for article {ArticleId}", |
| | | 134 | | articleId); |
| | | 135 | | throw; |
| | | 136 | | } |
| | | 137 | | } |
| | | 138 | | |
| | | 139 | | /// <summary> |
| | | 140 | | /// Extracts external link information from HTML content. |
| | | 141 | | /// Attribute order within the span is not significant. |
| | | 142 | | /// </summary> |
| | | 143 | | private List<(string Source, string ExternalId, string DisplayTitle)> ExtractExternalLinksFromHtml(string? htmlConte |
| | | 144 | | { |
| | 14 | 145 | | if (string.IsNullOrWhiteSpace(htmlContent)) |
| | | 146 | | { |
| | 3 | 147 | | return new List<(string, string, string)>(); |
| | | 148 | | } |
| | | 149 | | |
| | 11 | 150 | | var links = new List<(string Source, string ExternalId, string DisplayTitle)>(); |
| | 11 | 151 | | var spanMatches = ExternalLinkSpanRegex().Matches(htmlContent); |
| | | 152 | | |
| | 50 | 153 | | foreach (Match spanMatch in spanMatches) |
| | | 154 | | { |
| | 14 | 155 | | var attrs = spanMatch.Groups[1].Value; |
| | | 156 | | |
| | 14 | 157 | | var sourceMatch = DataSourceRegex().Match(attrs); |
| | 14 | 158 | | var idMatch = DataIdRegex().Match(attrs); |
| | 14 | 159 | | var titleMatch = DataTitleRegex().Match(attrs); |
| | | 160 | | |
| | 14 | 161 | | var source = sourceMatch.Success ? sourceMatch.Groups[1].Value : string.Empty; |
| | 14 | 162 | | var externalId = idMatch.Success ? idMatch.Groups[1].Value : string.Empty; |
| | 14 | 163 | | var displayTitle = titleMatch.Success ? titleMatch.Groups[1].Value : string.Empty; |
| | | 164 | | |
| | 14 | 165 | | if (!string.IsNullOrWhiteSpace(source) && |
| | 14 | 166 | | !string.IsNullOrWhiteSpace(externalId) && |
| | 14 | 167 | | !string.IsNullOrWhiteSpace(displayTitle)) |
| | | 168 | | { |
| | 8 | 169 | | links.Add((source, externalId, displayTitle)); |
| | | 170 | | } |
| | | 171 | | } |
| | | 172 | | |
| | 11 | 173 | | return links; |
| | | 174 | | } |
| | | 175 | | } |