using System.Text.RegularExpressions;
using Dpz.Core.Service.Mediator.Features.Article.Contracts;
using Dpz.Core.Service.Mediator.Features.Search;
using Dpz.Core.Service.Mediator.Features.Search.Queries;

namespace Dpz.Core.Service.Mediator.Features.Article.Queries;

/// <summary>
/// 处理文章搜索请求,支持缓存命中、MongoDB 全文检索与结果高亮定位。
/// </summary>
public class ArticleSearchEvent(
    IRepository<Public.Entity.Article> repository,
    IMediator mediator,
    IMapper mapper,
    IFusionCache fusionCache,
    ILogger<ArticleSearchEvent> logger
) : IRequestHandler<ArticleSearchRequest, List<ArticleResponseSearchResultResponse>>
{
    private const string CacheTag = "Mediator:ArticleSearch";

    /// <summary>
    /// 执行文章搜索主流程。
    /// </summary>
    public async ValueTask<List<ArticleResponseSearchResultResponse>> Handle(
        ArticleSearchRequest request,
        CancellationToken cancellationToken
    )
    {
        logger.LogInformation("收到文章搜索请求: {Keyword}", request.Keyword);

        var keywordAnalysis = SearchKeywordAnalyzer.Analyze(request.Keyword);
        if (string.IsNullOrWhiteSpace(keywordAnalysis.NormalizedKeyword))
        {
            logger.LogWarning("搜索关键词为空,返回空结果");
            return [];
        }

        if (keywordAnalysis.NormalizedKeyword.Length > 100)
        {
            logger.LogWarning("搜索关键词过长,返回空结果");
            return [];
        }

        var cacheKey = BuildCacheKey(keywordAnalysis.NormalizedKeyword);
        logger.LogInformation("尝试从缓存获取结果,缓存键: {CacheKey}", cacheKey);

        var cache = await fusionCache.TryGetAsync<List<ArticleResponseSearchResultResponse>>(
            cacheKey,
            token: cancellationToken
        );
        if (cache.HasValue)
        {
            logger.LogInformation(
                "从缓存中获取到搜索结果,关键词: {Keyword}, 结果数量: {Count}",
                keywordAnalysis.NormalizedKeyword,
                cache.Value.Count
            );
            return cache.Value;
        }

        if (keywordAnalysis.QueryTerms.Count == 0)
        {
            logger.LogWarning("未生成有效查询词,返回空结果");
            return [];
        }

        logger.LogDebug("缓存未命中,开始执行搜索");
        var pattern = SearchKeywordAnalyzer.BuildRegexPattern(keywordAnalysis.HighlightTerms);

        var stopwatch = Stopwatch.StartNew();
        logger.LogInformation(
            "开始处理文章搜索请求,关键词: {Keyword}, 查询词: {QueryTerms}, 搜索模式: {Pattern}",
            keywordAnalysis.NormalizedKeyword,
            string.Join(',', keywordAnalysis.QueryTerms),
            pattern
        );
        var searchResult = await SearchAsync(keywordAnalysis, cancellationToken);

        var result = new List<ArticleResponseSearchResultResponse>();
        var articleCount = 0;

        foreach (var article in searchResult)
        {
            articleCount++;
            var articleStopwatch = Stopwatch.StartNew();
            logger.LogDebug(
                "处理第 {Count} 篇文章: {Title} (ID: {Id})",
                articleCount,
                article.Title,
                article.Id
            );

            var response = mapper.Map<ArticleResponseSearchResultResponse>(article);
            var titleSearchRequest = new ContentSearchRequest
            {
                Text = article.Title,
                Pattern = pattern,
            };
            response.TitleSearchResult = await mediator.Send(titleSearchRequest, cancellationToken);

            var contentSearchRequest = new ContentSearchRequest
            {
                Text = article.Markdown,
                Pattern = pattern,
            };
            response.ContentSearchResult = await mediator.Send(
                contentSearchRequest,
                cancellationToken
            );
            result.Add(response);

            articleStopwatch.Stop();
            logger.LogInformation(
                "文章 {Title} 处理完成,标题匹配数: {TitleMatches}, 内容匹配数: {ContentMatches}, 耗时: {ElapsedMs}ms",
                article.Title,
                response.TitleSearchResult.Count,
                response.ContentSearchResult.Count,
                articleStopwatch.ElapsedMilliseconds
            );
        }

        stopwatch.Stop();
        logger.LogInformation(
            "文章搜索请求处理完成,关键词: {Keyword}, 共处理 {Count} 篇文章,总耗时: {ElapsedMs}ms",
            request.Keyword,
            articleCount,
            stopwatch.ElapsedMilliseconds
        );

        logger.LogInformation(
            "将搜索结果存入缓存,缓存键: {CacheKey}, 结果数量: {Count}",
            cacheKey,
            result.Count
        );
        await fusionCache.SetAsync(
            cacheKey,
            result,
            factoryOptions => factoryOptions.SetDuration(TimeSpan.FromHours(3)),
            [CacheTag],
            token: cancellationToken
        );
        return result;
    }

    private static string BuildCacheKey(string keyword)
    {
        return $"{CacheTag}:{keyword.Trim().ToLowerInvariant()}";
    }

    private async Task<List<Public.Entity.Article>> SearchAsync(
        SearchKeywordAnalysis keywordAnalysis,
        CancellationToken cancellationToken = default
    )
    {
        var merged = new Dictionary<string, (Public.Entity.Article Article, int Score)>();

        var textRank = 0;
        try
        {
            await foreach (
                var article in SearchByTextAsync(
                    keywordAnalysis.NormalizedKeyword,
                    cancellationToken
                )
            )
            {
                var score = 2000 - textRank;
                Merge(article, score);
                textRank++;
            }
        }
        catch (Exception ex)
        {
            logger.LogWarning(ex, "MongoDB 全文检索失败,改用正则兜底搜索");
        }

        if (merged.Count < 10)
        {
            await foreach (
                var article in SearchByRegexFallbackAsync(
                    keywordAnalysis.QueryTerms,
                    cancellationToken
                )
            )
            {
                var score = CalculateRegexRelevance(article, keywordAnalysis.QueryTerms);
                if (score <= 0)
                {
                    continue;
                }
                Merge(article, score);
            }
        }

        return merged
            .Values.OrderByDescending(x => x.Score)
            .ThenByDescending(x => x.Article.CreateTime)
            .Take(10)
            .Select(x => x.Article)
            .ToList();

        void Merge(Public.Entity.Article article, int score)
        {
            var key = article.Id.ToString();
            if (!merged.TryGetValue(key, out var existing))
            {
                merged[key] = (article, score);
                return;
            }

            merged[key] = existing.Score >= score ? existing : (article, score);
        }
    }

    private IAsyncEnumerable<Public.Entity.Article> SearchByTextAsync(
        string keyword,
        CancellationToken cancellationToken = default
    )
    {
        logger.LogDebug("执行MongoDB全文搜索,关键词: {Keyword}", keyword);
        var textFilter = Builders<Public.Entity.Article>.Filter.Text(keyword);
        var sort = Builders<Public.Entity.Article>.Sort.MetaTextScore("textScore");
        var options = new FindOptions<Public.Entity.Article> { Sort = sort, Limit = 30 };
        return repository.SearchForAsync(textFilter, options, cancellationToken);
    }

    private IAsyncEnumerable<Public.Entity.Article> SearchByRegexFallbackAsync(
        IReadOnlyList<string> queryTerms,
        CancellationToken cancellationToken = default
    )
    {
        logger.LogDebug(
            "执行中文友好的正则兜底搜索,查询词: {QueryTerms}",
            string.Join(',', queryTerms)
        );
        var fieldFilters = new List<FilterDefinition<Public.Entity.Article>>();

        foreach (var term in queryTerms)
        {
            var escaped = Regex.Escape(term);
            var regex = new BsonRegularExpression(escaped, "i");
            fieldFilters.Add(
                Builders<Public.Entity.Article>.Filter.Or(
                    Builders<Public.Entity.Article>.Filter.Regex(x => x.Title, regex),
                    Builders<Public.Entity.Article>.Filter.Regex(x => x.Introduction, regex),
                    Builders<Public.Entity.Article>.Filter.Regex(x => x.Markdown, regex),
                    Builders<Public.Entity.Article>.Filter.Regex("Tags", regex)
                )
            );
        }

        var filter =
            fieldFilters.Count == 0
                ? Builders<Public.Entity.Article>.Filter.Empty
                : Builders<Public.Entity.Article>.Filter.Or(fieldFilters);
        var options = new FindOptions<Public.Entity.Article>
        {
            Limit = 80,
            Sort = Builders<Public.Entity.Article>.Sort.Descending(x => x.CreateTime),
        };
        return repository.SearchForAsync(filter, options, cancellationToken);
    }

    private static int CalculateRegexRelevance(
        Public.Entity.Article article,
        IReadOnlyList<string> queryTerms
    )
    {
        var score = 0;
        foreach (var term in queryTerms)
        {
            var titleHits = CountOccurrences(article.Title, term);
            var introHits = CountOccurrences(article.Introduction, term);
            var contentHits = CountOccurrences(article.Markdown, term);
            var tagHits = article.Tags.Count(x =>
                x.Contains(term, StringComparison.OrdinalIgnoreCase)
            );

            score += titleHits * 8;
            score += tagHits * 6;
            score += introHits * 4;
            score += Math.Min(contentHits * 2, 20);
        }

        if (queryTerms.Count >= 3 && score < 8)
        {
            return 0;
        }

        return score;
    }

    private static int CountOccurrences(string? source, string term)
    {
        if (string.IsNullOrEmpty(source) || string.IsNullOrWhiteSpace(term))
        {
            return 0;
        }

        var count = 0;
        var index = 0;
        while (index < source.Length)
        {
            index = source.IndexOf(term, index, StringComparison.OrdinalIgnoreCase);
            if (index < 0)
            {
                break;
            }

            count++;
            index += term.Length;
        }

        return count;
    }
}
⚠⚠    以下内容为AI分析的结果,请根据实际情况进行判断。

这是一个基于 .NET 的文章搜索功能实现,采用了 CQRS(命令查询责任分离)模式和 MediatR 框架。让我详细解释一下这个代码的功能和结构:

核心功能

这个 ArticleSearchEvent 类实现了一个智能的文章搜索系统,具备以下特性:

1. 多层搜索策略

  • MongoDB 全文检索:优先使用 MongoDB 的全文搜索功能,性能最佳
  • 正则表达式兜底:当全文检索结果不足或失败时,使用正则表达式进行兜底搜索

2. 缓存机制

  • 使用 IFusionCache 缓存搜索结果,有效期3小时
  • 避免重复搜索相同关键词,提升响应速度

3. 关键词分析与高亮

  • 通过 SearchKeywordAnalyzer 分析和规范化搜索关键词
  • 支持搜索结果的高亮显示

主要处理流程

请求接收 → 关键词分析 → 缓存检查 → 数据库搜索 → 结果处理 → 缓存存储 → 返回结果

详细步骤:

  1. 输入验证:检查关键词是否为空或过长(>100字符)
  2. 缓存查询:尝试从缓存获取已有结果
  3. 分层搜索
    • 首先执行 MongoDB 全文搜索(限制30条)
    • 如果结果不足10条,启用正则表达式兜底搜索(限制80条)
  4. 结果融合:合并两种搜索方式的结果,去重并按相关性排序
  5. 内容匹配:对每篇文章的标题和内容进行匹配高亮处理
  6. 缓存存储:将最终结果存入缓存

相关性评分算法

// 权重分配:
titleHits * 8     // 标题匹配权重最高
tagHits * 6       // 标签匹配次之
introHits * 4     // 简介匹配
contentHits * 2   // 内容匹配(最多20分)

技术特点

  1. 异步编程:大量使用 async/awaitIAsyncEnumerable
  2. 依赖注入:通过构造函数注入所需服务
  3. 详细日志:提供完整的搜索过程日志记录
  4. 性能监控:使用 Stopwatch 监控处理时间
  5. 异常处理:MongoDB 搜索失败时自动降级到正则搜索

返回结果

返回 List<ArticleResponseSearchResultResponse>,每个结果包含:

  • 文章基本信息(通过 AutoMapper 映射)
  • 标题搜索匹配结果
  • 内容搜索匹配结果

这个实现展现了现代 .NET 应用的最佳实践,包括缓存策略、分层搜索、性能优化和可观测性等方面。

评论加载中...