Procházet zdrojové kódy

feat: RAG环节调试Web工具 — 新增逐环节调试功能并清理旧测试脚本

新增 RAG 管线逐环节调试 Web 工具,支持独立测试每个环节并友好渲染结果。
同时清理 utils_test/RAG_Test/ 下的旧 test_*/debug_* 脚本。

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
WangXuMing před 1 týdnem
rodič
revize
9088cb396c

+ 0 - 83
utils_test/RAG_Test/debug_messages.py

@@ -1,83 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-"""
-详细调试传递给模型的消息内容
-"""
-
-import sys
-import os
-
-from foundation.ai.rag.retrieval.query_rewrite import prompt_loader
-
-
-def debug_messages():
-    """
-    调试实际传递给模型的消息
-    """
-    print("="*60)
-    print("调试传递给模型的消息内容")
-    print("="*60)
-
-    # 测试数据
-    review_content = "深度大于3m的基坑开挖、有地下水侵扰的基坑清底封底,每个工作班至少巡查两遍。"
-    print(f"原始输入内容: {review_content}")
-
-    try:
-        # 获取提示词模板
-        task_prompt = prompt_loader.get_prompt_template(
-            reviewer_type="query_extract",
-            review_content=review_content
-        )
-
-        print(f"\n[DEBUG] 提示词模板类型: {type(task_prompt)}")
-
-        # 检查模板的输入变量
-        print(f"[DEBUG] 模板输入变量: {task_prompt.input_variables}")
-
-        # 尝试不同的格式化方式
-        print("\n=== 方法1: format_messages() (不传参数) ===")
-        try:
-            messages1 = task_prompt.format_messages()
-            print(f"[DEBUG] 用户消息: {messages1[1].content}")
-        except Exception as e:
-            print(f"[ERROR] 方法1失败: {e}")
-
-        print("\n=== 方法2: format_messages(review_content=...) ===")
-        try:
-            messages2 = task_prompt.format_messages(review_content=review_content)
-            print(f"[DEBUG] 用户消息: {messages2[1].content}")
-            print(f"[OK] 方法2成功!")
-        except Exception as e:
-            print(f"[ERROR] 方法2失败: {e}")
-
-        print("\n=== 方法3: 手动构造消息 ===")
-        try:
-            # 获取提示词配置
-            cache_key = "query_extract_query_extract"
-            prompt_config = prompt_loader._cache[cache_key]
-
-            system_prompt = prompt_config['system_prompt']
-            user_template = prompt_config['user_prompt_template']
-
-            # 手动替换变量
-            user_prompt = user_template.replace("{{review_content}}", review_content)
-
-            print(f"[DEBUG] 系统提示词: {system_prompt[:100]}...")
-            print(f"[DEBUG] 用户提示词: {user_prompt}")
-            print(f"[OK] 手动替换成功!")
-        except Exception as e:
-            print(f"[ERROR] 方法3失败: {e}")
-
-    except Exception as e:
-        print(f"[ERROR] 调试失败: {str(e)}")
-        import traceback
-        traceback.print_exc()
-
-
-def main():
-    debug_messages()
-
-
-if __name__ == "__main__":
-    main()

+ 0 - 55
utils_test/RAG_Test/debug_query_extract.py

@@ -1,55 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-"""
-调试QueryRewriteManager.query_extract方法
-"""
-
-import sys
-import os
-import time
-
-from foundation.ai.rag.retrieval.query_rewrite import QueryRewriteManager
-from foundation.observability.logger.loggering import review_logger as logger
-
-
-def debug_query_extract():
-    """
-    调试query_extract方法
-    """
-    # 测试数据
-    review_content = """
-主要部件说明
-1、主梁总成
-主梁总成由主梁和导梁构成。主梁单节长12m,共7节,每节重10.87t,主梁为主要承载受力构件,其上弦杆上方设有轨道供纵移桁车走行,实现预制梁的纵向移动;下弦设有反滚轮行走轨道,作为导梁纵移、前中支腿移动纵行轨道。导梁长18m,主要是为降低过孔挠度和承受中支腿移动荷载,起安全引导、辅助过孔作用。主梁、导梁为三角桁架构件单元,采用销轴连接,前、后端各设置横联构架。
-
-图4-1 主梁总成图
-注意事项:
-(1)更换上、下弦销轴时,应优先向设备供应方购买符合要求的备件。自行更换时,材料性能必须优于设计零件性能,并按规定进行热处理,否则可能造成人员、设备事故。
-(2)销轴不得弯曲受力,不得用销轴作为锤砸工具,不得任意放置及焊接
-
-"""
-    query_rewrite_manager = QueryRewriteManager()
-    start_time = time.time()
-    result = query_rewrite_manager.query_extract(review_content)
-    end_time = time.time()
-    elapsed_time = end_time - start_time
-
-    print(f"[OK] 原始方法提取完成,耗时: {elapsed_time:.2f}秒")
-    print(f"[OK] 原始方法返回结果: {result}")
-
-
-
-def main():
-    """
-    主测试函数
-    """
-    print("开始调试 QueryRewriteManager.query_extract 方法")
-
-    debug_query_extract()
-
-    print("\n调试完成")
-
-
-if __name__ == "__main__":
-    main()

+ 493 - 0
utils_test/RAG_Test/rag_pipeline_web/app.js

@@ -530,3 +530,496 @@ function formatTimestamp(timestamp) {
     const date = new Date(timestamp * 1000);
     return date.toLocaleString('zh-CN');
 }
+
+// ==================== 环节调试功能 ====================
+
+let currentDebugStep = 'query_extract';
+const stepMeta = {
+    query_extract:        { name: '查询提取', icon: '🔍', desc: '从输入内容中提取查询实体和关键词', inputType: 'text' },
+    entity_enhance:       { name: '实体增强检索', icon: '🎯', desc: '实体召回 + BFP召回,需要 query_pairs 输入(JSON)', inputType: 'json' },
+    multi_stage_recall:   { name: '多阶段召回', icon: '🔄', desc: '混合检索 + 重排序', inputType: 'text' },
+    hybrid_search:        { name: '混合检索', icon: '⚡', desc: 'Dense + Sparse 加权融合检索', inputType: 'text' },
+    parent_doc_enhance:   { name: '父文档增强', icon: '📚', desc: '使用父文档增强检索结果,需要 bfp_result_lists 输入(JSON)', inputType: 'json' },
+    extract_results:      { name: '结果提取', icon: '✂️', desc: '按查询对提取高分结果,需要 bfp_result_lists 输入(JSON)', inputType: 'json' }
+};
+
+function selectDebugStep(stepName) {
+    currentDebugStep = stepName;
+    const meta = stepMeta[stepName];
+
+    // 更新按钮状态
+    document.querySelectorAll('.step-btn').forEach(b => b.classList.remove('active'));
+    document.querySelector(`[data-step="${stepName}"]`).classList.add('active');
+
+    // 更新提示
+    document.getElementById('currentStepInfo').innerHTML =
+        `<span class="step-info-icon">${meta.icon}</span>` +
+        `<span class="step-info-text">当前环节: <strong>${meta.name}</strong> — ${meta.desc}</span>`;
+
+    // 更新输入框占位符
+    const input = document.getElementById('debugInput');
+    if (meta.inputType === 'json') {
+        input.placeholder = '粘贴 JSON 数据...\n\n示例: [{"entity": "...", "search_keywords": [...], "background": "..."}]';
+    } else {
+        input.placeholder = '输入测试文本...\n\n示例:主要部件说明\n1、主梁总成\n主梁总成由主梁和导梁构成。主梁单节长12m,共7节,每节重10.87t...';
+    }
+
+    // 显示/隐藏相关参数
+    updateParamVisibility(stepName);
+}
+
+function updateParamVisibility(stepName) {
+    const visibleParams = {
+        query_extract: [],
+        entity_enhance: [],
+        multi_stage_recall: ['collectionName', 'topK', 'hybridTopK'],
+        hybrid_search: ['collectionName', 'topK', 'denseWeight'],
+        parent_doc_enhance: ['scoreThreshold', 'maxParents'],
+        extract_results: ['scoreThreshold']
+    };
+
+    const allParams = ['paramCollectionName', 'paramTopK', 'paramHybridTopK', 'paramScoreThreshold', 'paramDenseWeight', 'paramMaxParents'];
+    const visible = visibleParams[stepName] || [];
+
+    allParams.forEach(id => {
+        const el = document.getElementById(id);
+        if (!el) return;
+        const row = el.closest('.param-row');
+        if (!row) return;
+
+        const paramKey = {
+            'paramCollectionName': 'collectionName',
+            'paramTopK': 'topK',
+            'paramHybridTopK': 'hybridTopK',
+            'paramScoreThreshold': 'scoreThreshold',
+            'paramDenseWeight': 'denseWeight',
+            'paramMaxParents': 'maxParents'
+        }[id];
+
+        row.style.display = visible.includes(paramKey) ? '' : 'none';
+    });
+}
+
+function toggleParamsPanel() {
+    const body = document.getElementById('paramsBody');
+    const icon = document.getElementById('paramsToggleIcon');
+    if (body.style.display === 'none') {
+        body.style.display = 'block';
+        icon.textContent = '▲';
+    } else {
+        body.style.display = 'none';
+        icon.textContent = '▼';
+    }
+}
+
+function updateDebugServerStatus() {
+    const statusEl = document.getElementById('debugServerStatus');
+    if (!statusEl) return;
+    const dot = statusEl.querySelector('.status-dot');
+    const text = statusEl.querySelector('.status-text');
+    if (window.serverConnected) {
+        dot.className = 'status-dot ' + (window.milvusReady ? 'online' : 'warning');
+        text.textContent = window.milvusReady ? '服务已连接 (Milvus就绪)' : '服务已连接 (Milvus未就绪)';
+    } else {
+        dot.className = 'status-dot offline';
+        text.textContent = '服务未连接';
+    }
+}
+
+function getDebugParams() {
+    return {
+        collection_name: document.getElementById('paramCollectionName').value || 'rag_children_hybrid',
+        top_k: parseInt(document.getElementById('paramTopK').value) || 10,
+        hybrid_top_k: parseInt(document.getElementById('paramHybridTopK').value) || 50,
+        score_threshold: parseFloat(document.getElementById('paramScoreThreshold').value) || 0.5,
+        dense_weight: parseFloat(document.getElementById('paramDenseWeight').value) || 0.7,
+        sparse_weight: 1.0 - (parseFloat(document.getElementById('paramDenseWeight').value) || 0.7),
+        max_parents_per_pair: parseInt(document.getElementById('paramMaxParents').value) || 3
+    };
+}
+
+function runDebugStep() {
+    const content = document.getElementById('debugInput').value.trim();
+    if (!content) {
+        alert('请输入测试内容');
+        return;
+    }
+    if (!window.serverConnected) {
+        alert('服务未连接,请先启动 rag_pipeline_server.py');
+        return;
+    }
+
+    const overlay = document.getElementById('debugLoadingOverlay');
+    const loadingText = document.getElementById('debugLoadingText');
+    overlay.style.display = 'flex';
+    loadingText.textContent = `正在执行: ${stepMeta[currentDebugStep].name}...`;
+    document.getElementById('runDebugStepBtn').disabled = true;
+    document.getElementById('runDebugChainBtn').disabled = true;
+
+    fetch(`${API_BASE}/api/debug/step`, {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({
+            step: currentDebugStep,
+            content: content,
+            params: getDebugParams()
+        })
+    })
+    .then(r => r.json().then(data => ({ status: r.status, data })))
+    .then(({ status, data }) => {
+        overlay.style.display = 'none';
+        document.getElementById('runDebugStepBtn').disabled = false;
+        document.getElementById('runDebugChainBtn').disabled = false;
+        renderDebugResult(data, false);
+    })
+    .catch(err => {
+        overlay.style.display = 'none';
+        document.getElementById('runDebugStepBtn').disabled = false;
+        document.getElementById('runDebugChainBtn').disabled = false;
+        alert(`执行失败: ${err.message}`);
+    });
+}
+
+function runDebugChain() {
+    const content = document.getElementById('debugInput').value.trim();
+    if (!content) {
+        alert('请输入测试内容');
+        return;
+    }
+    if (!window.serverConnected) {
+        alert('服务未连接,请先启动 rag_pipeline_server.py');
+        return;
+    }
+
+    const overlay = document.getElementById('debugLoadingOverlay');
+    const loadingText = document.getElementById('debugLoadingText');
+    overlay.style.display = 'flex';
+    loadingText.textContent = '链式执行中: query_extract → entity_enhance → parent_doc_enhance → extract_results...';
+    document.getElementById('runDebugStepBtn').disabled = true;
+    document.getElementById('runDebugChainBtn').disabled = true;
+
+    fetch(`${API_BASE}/api/debug/chain`, {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({
+            content: content,
+            params: getDebugParams()
+        })
+    })
+    .then(r => r.json())
+    .then(data => {
+        overlay.style.display = 'none';
+        document.getElementById('runDebugStepBtn').disabled = false;
+        document.getElementById('runDebugChainBtn').disabled = false;
+        renderChainResult(data);
+    })
+    .catch(err => {
+        overlay.style.display = 'none';
+        document.getElementById('runDebugStepBtn').disabled = false;
+        document.getElementById('runDebugChainBtn').disabled = false;
+        alert(`链式执行失败: ${err.message}`);
+    });
+}
+
+function clearDebugResult() {
+    document.getElementById('debugResultSection').style.display = 'none';
+    document.getElementById('debugInput').value = '';
+    document.getElementById('chainFlow').style.display = 'none';
+}
+
+// ==================== 单环节结果渲染 ====================
+
+function renderDebugResult(data, isChainStep) {
+    const section = document.getElementById('debugResultSection');
+    const overviewCards = document.getElementById('debugOverviewCards');
+    const chainFlow = document.getElementById('chainFlow');
+    const outputArea = document.getElementById('debugOutputArea');
+
+    section.style.display = 'block';
+    if (!isChainStep) {
+        chainFlow.style.display = 'none';
+    }
+
+    const stepName = data.step || currentDebugStep;
+    const meta = stepMeta[stepName] || { name: stepName, icon: '📦' };
+
+    // 统计卡片
+    const execTime = data.execution_time || 0;
+    const hasError = data.status === 'error';
+    overviewCards.innerHTML = `
+        <div class="overview-card">
+            <div class="card-icon">${meta.icon}</div>
+            <div class="card-content">
+                <span class="card-label">执行环节</span>
+                <span class="card-value">${meta.name}</span>
+            </div>
+        </div>
+        <div class="overview-card">
+            <div class="card-icon">⏱️</div>
+            <div class="card-content">
+                <span class="card-label">执行时间</span>
+                <span class="card-value">${execTime} 秒</span>
+            </div>
+        </div>
+        <div class="overview-card">
+            <div class="card-icon">${hasError ? '❌' : '✅'}</div>
+            <div class="card-content">
+                <span class="card-label">状态</span>
+                <span class="card-value" style="color: ${hasError ? '#ff5555' : '#00ff88'}">${hasError ? '失败' : '成功'}</span>
+            </div>
+        </div>
+    `;
+
+    if (hasError) {
+        outputArea.innerHTML = `<div class="debug-error-box">
+            <div class="debug-error-icon">❌</div>
+            <div class="debug-error-msg">${escapeHtml(data.error || '未知错误')}</div>
+        </div>`;
+        return;
+    }
+
+    // 输入摘要
+    let summaryHtml = '<div class="debug-summary-box">';
+    if (data.input_summary) {
+        summaryHtml += '<h4>📥 输入摘要</h4><div class="debug-summary-grid">';
+        for (const [k, v] of Object.entries(data.input_summary)) {
+            summaryHtml += `<div class="debug-summary-item"><span class="debug-summary-key">${k}</span><span class="debug-summary-val">${v}</span></div>`;
+        }
+        summaryHtml += '</div>';
+    }
+    summaryHtml += '</div>';
+
+    // 输出内容
+    let outputHtml = '<div class="debug-output-box"><h4>📤 输出结果</h4>';
+    const output = data.output;
+
+    if (stepName === 'query_extract' && Array.isArray(output)) {
+        outputHtml += renderQueryExtractTable(output);
+    } else if ((stepName === 'entity_enhance' || stepName === 'multi_stage_recall' || stepName === 'hybrid_search') && Array.isArray(output)) {
+        outputHtml += renderSearchResultCards(output, stepName);
+    } else if (stepName === 'parent_doc_enhance' && typeof output === 'object') {
+        outputHtml += renderParentDocSummary(output);
+    } else if (stepName === 'extract_results' && Array.isArray(output)) {
+        outputHtml += renderExtractResultsTable(output);
+    } else {
+        outputHtml += `<pre class="json-viewer">${formatJson(output)}</pre>`;
+    }
+
+    outputHtml += '</div>';
+    outputArea.innerHTML = summaryHtml + outputHtml;
+}
+
+// ==================== 链式执行结果渲染 ====================
+
+function renderChainResult(data) {
+    const section = document.getElementById('debugResultSection');
+    const overviewCards = document.getElementById('debugOverviewCards');
+    const chainFlow = document.getElementById('chainFlow');
+    const outputArea = document.getElementById('debugOutputArea');
+
+    section.style.display = 'block';
+    chainFlow.style.display = 'block';
+
+    // 总体统计
+    const totalTime = data.execution_time || 0;
+    const hasError = data.status === 'error';
+    const steps = data.steps || {};
+    const stepCount = Object.keys(steps).length;
+
+    overviewCards.innerHTML = `
+        <div class="overview-card">
+            <div class="card-icon">🔗</div>
+            <div class="card-content">
+                <span class="card-label">链式执行</span>
+                <span class="card-value">${stepCount} 个环节</span>
+            </div>
+        </div>
+        <div class="overview-card">
+            <div class="card-icon">⏱️</div>
+            <div class="card-content">
+                <span class="card-label">总耗时</span>
+                <span class="card-value">${totalTime} 秒</span>
+            </div>
+        </div>
+        <div class="overview-card">
+            <div class="card-icon">${hasError ? '❌' : '✅'}</div>
+            <div class="card-content">
+                <span class="card-label">状态</span>
+                <span class="card-value" style="color: ${hasError ? '#ff5555' : '#00ff88'}">${hasError ? data.error || '失败' : '成功'}</span>
+            </div>
+        </div>
+    `;
+
+    // 链式流程图
+    const stepNames = ['query_extract', 'entity_enhance', 'parent_doc_enhance', 'extract_results'];
+    let flowHtml = '<div class="chain-flow-container">';
+    stepNames.forEach((sn, i) => {
+        const stepData = steps[sn];
+        const meta = stepMeta[sn] || { name: sn, icon: '📦' };
+        const statusClass = stepData ? (stepData.status === 'success' ? 'chain-step-success' : 'chain-step-error') : 'chain-step-pending';
+        const statusIcon = stepData ? (stepData.status === 'success' ? '✅' : '❌') : '⏳';
+        const timeStr = stepData ? `${stepData.execution_time}s` : '-';
+        const summary = stepData ? (stepData.summary || '') : '未执行';
+
+        flowHtml += `
+            <div class="chain-step ${statusClass}">
+                <div class="chain-step-header">
+                    <span class="chain-step-icon">${meta.icon}</span>
+                    <span class="chain-step-status">${statusIcon}</span>
+                </div>
+                <div class="chain-step-name">${meta.name}</div>
+                <div class="chain-step-time">${timeStr}</div>
+                <div class="chain-step-summary">${escapeHtml(summary)}</div>
+            </div>`;
+        if (i < stepNames.length - 1) {
+            flowHtml += '<div class="chain-arrow">→</div>';
+        }
+    });
+    flowHtml += '</div>';
+    chainFlow.innerHTML = flowHtml;
+
+    // 最后一步(extract_results)的详情
+    const lastStep = steps['extract_results'];
+    let outputHtml = '';
+    if (lastStep && lastStep.status === 'success' && lastStep.output) {
+        outputHtml = '<div class="debug-output-box"><h4>📤 最终输出 (extract_results)</h4>';
+        outputHtml += renderExtractResultsTable(lastStep.output);
+        outputHtml += '</div>';
+    }
+
+    // 各步骤折叠详情
+    outputHtml += '<div class="debug-output-box"><h4>📋 各环节详情</h4><div class="accordion">';
+    stepNames.forEach(sn => {
+        const stepData = steps[sn];
+        if (!stepData) return;
+        const meta = stepMeta[sn] || { name: sn, icon: '📦' };
+        const statusClass = stepData.status === 'success' ? 'status-success' : 'status-error';
+        outputHtml += `
+            <div class="accordion-item">
+                <div class="accordion-header" onclick="toggleAccordion(this)">
+                    <div class="accordion-title">
+                        <span>${meta.icon}</span>
+                        <span>${meta.name}</span>
+                        <span class="status-badge ${statusClass}">${stepData.status}</span>
+                    </div>
+                    <span class="accordion-icon">▼</span>
+                </div>
+                <div class="accordion-content">
+                    <div class="debug-step-detail">
+                        <div class="data-section"><h4>摘要</h4>
+                            <pre class="json-viewer">${stepData.summary || '无'}</pre>
+                        </div>`;
+        if (stepData.output) {
+            outputHtml += `<div class="data-section"><h4>输出数据</h4>
+                <pre class="json-viewer">${formatJson(stepData.output)}</pre></div>`;
+        }
+        if (stepData.error) {
+            outputHtml += `<div class="data-section"><h4>错误</h4>
+                <pre class="json-viewer" style="color:#ff5555">${escapeHtml(stepData.error)}</pre></div>`;
+        }
+        outputHtml += '</div></div></div>';
+    });
+    outputHtml += '</div></div>';
+    outputArea.innerHTML = outputHtml;
+}
+
+// ==================== 差异化渲染函数 ====================
+
+function renderQueryExtractTable(queryPairs) {
+    if (!queryPairs || queryPairs.length === 0) {
+        return '<div class="empty-state"><div class="empty-state-icon">📭</div><p>未提取到查询对</p></div>';
+    }
+    let html = '<div class="debug-table-wrap"><table class="debug-table"><thead><tr>' +
+        '<th>#</th><th>实体 (entity)</th><th>搜索关键词</th><th>背景 (background)</th><th>参数 (parameter)</th></tr></thead><tbody>';
+    queryPairs.forEach((qp, i) => {
+        const keywords = Array.isArray(qp.search_keywords) ? qp.search_keywords.join(', ') : (qp.search_keywords || '');
+        html += `<tr>
+            <td>${i + 1}</td>
+            <td><strong>${escapeHtml(qp.entity || '')}</strong></td>
+            <td>${escapeHtml(keywords)}</td>
+            <td>${escapeHtml(qp.background || '')}</td>
+            <td>${escapeHtml(qp.parameter || '')}</td>
+        </tr>`;
+    });
+    html += '</tbody></table></div>';
+    return html;
+}
+
+function renderSearchResultCards(results, stepName) {
+    if (!results || results.length === 0) {
+        return '<div class="empty-state"><div class="empty-state-icon">📭</div><p>未检索到结果</p></div>';
+    }
+    let html = `<div class="debug-result-count">共 ${results.length} 个结果</div><div class="debug-result-cards">`;
+    results.forEach((r, i) => {
+        const textContent = (r.text_content || r.text || '').substring(0, 300);
+        const fileName = r.file_name || r.metadata?.file_name || r.metadata?.document_id || 'N/A';
+        const score = r.rerank_score || r.hybrid_similarity || r.bfp_rerank_score || r.distance || 0;
+        const scoreType = r.rerank_score ? 'rerank' : (r.hybrid_similarity ? 'hybrid' : (r.bfp_rerank_score ? 'bfp' : 'score'));
+        html += `
+            <div class="debug-result-card">
+                <div class="drc-header">
+                    <span class="drc-index">#${i + 1}</span>
+                    <span class="drc-score">${scoreType}: ${typeof score === 'number' ? score.toFixed(4) : score}</span>
+                </div>
+                <div class="drc-file">📄 ${escapeHtml(String(fileName))}</div>
+                <div class="drc-content">${escapeHtml(textContent)}${textContent.length >= 300 ? '...' : ''}</div>
+            </div>`;
+    });
+    html += '</div>';
+    return html;
+}
+
+function renderParentDocSummary(output) {
+    let html = '<div class="debug-summary-grid">';
+    for (const [k, v] of Object.entries(output)) {
+        html += `<div class="debug-summary-item"><span class="debug-summary-key">${k}</span><span class="debug-summary-val">${v}</span></div>`;
+    }
+    html += '</div>';
+
+    if (output.parent_docs && output.parent_docs.length > 0) {
+        html += `<div class="debug-result-count">父文档列表 (${output.parent_docs.length})</div><div class="debug-result-cards">`;
+        output.parent_docs.forEach((p, i) => {
+            const textContent = (p.text_content || '').substring(0, 300);
+            const fileName = p.metadata?.file_name || p.metadata?.document_id || 'N/A';
+            html += `
+                <div class="debug-result-card parent-doc">
+                    <div class="drc-header">
+                        <span class="drc-index">📚 父文档 #${i + 1}</span>
+                    </div>
+                    <div class="drc-file">📄 ${escapeHtml(String(fileName))}</div>
+                    <div class="drc-content">${escapeHtml(textContent)}${textContent.length >= 300 ? '...' : ''}</div>
+                </div>`;
+        });
+        html += '</div>';
+    }
+    return html;
+}
+
+function renderExtractResultsTable(entityResults) {
+    if (!entityResults || entityResults.length === 0) {
+        return '<div class="empty-state"><div class="empty-state-icon">📭</div><p>没有通过阈值过滤的结果</p></div>';
+    }
+    let html = '<div class="debug-table-wrap"><table class="debug-table"><thead><tr>' +
+        '<th>#</th><th>实体</th><th>combined_query</th><th>分数</th><th>文件名</th><th>内容预览</th></tr></thead><tbody>';
+    entityResults.forEach((r, i) => {
+        const score = r.final_score || r.bfp_rerank_score || 0;
+        const content = (r.text_content || '').substring(0, 150);
+        html += `<tr>
+            <td>${i + 1}</td>
+            <td><strong>${escapeHtml(r.entity || '')}</strong></td>
+            <td>${escapeHtml((r.combined_query || '').substring(0, 80))}</td>
+            <td><span class="score-badge">${typeof score === 'number' ? score.toFixed(4) : score}</span></td>
+            <td>${escapeHtml((r.file_name || '').substring(0, 50))}</td>
+            <td class="preview-cell">${escapeHtml(content)}${content.length >= 150 ? '...' : ''}</td>
+        </tr>`;
+    });
+    html += '</tbody></table></div>';
+    return html;
+}
+
+// ==================== 工具函数 ====================
+
+function escapeHtml(str) {
+    if (!str) return '';
+    return String(str).replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;').replace(/"/g, '&quot;');
+}

+ 132 - 0
utils_test/RAG_Test/rag_pipeline_web/index.html

@@ -30,6 +30,9 @@
             <button class="function-tab" data-function="professional" onclick="switchFunction('professional')">
                 🎯 专业性审查测试
             </button>
+            <button class="function-tab" data-function="debug" onclick="switchFunction('debug')">
+                🔬 环节调试
+            </button>
         </section>
 
         <!-- 统一的测试输入区域 -->
@@ -177,6 +180,135 @@
                 </div>
             </div>
         </section>
+
+        <!-- 环节调试 Section -->
+        <section class="debug-section" id="debugSection" style="display: none;">
+            <h2>🔬 RAG环节调试</h2>
+            <div class="server-status" id="debugServerStatus">
+                <span class="status-dot offline"></span>
+                <span class="status-text">服务未连接</span>
+            </div>
+
+            <!-- 环节选择器 -->
+            <div class="step-selector">
+                <span class="step-selector-label">选择环节:</span>
+                <div class="step-buttons" id="stepButtons">
+                    <button class="step-btn active" data-step="query_extract" onclick="selectDebugStep('query_extract')">
+                        <span class="step-num">1</span>
+                        <span class="step-name">查询提取</span>
+                        <span class="step-desc">query_extract</span>
+                    </button>
+                    <span class="step-arrow">→</span>
+                    <button class="step-btn" data-step="entity_enhance" onclick="selectDebugStep('entity_enhance')">
+                        <span class="step-num">2</span>
+                        <span class="step-name">实体增强</span>
+                        <span class="step-desc">entity_enhance</span>
+                    </button>
+                    <span class="step-arrow">→</span>
+                    <button class="step-btn" data-step="multi_stage_recall" onclick="selectDebugStep('multi_stage_recall')">
+                        <span class="step-num">3</span>
+                        <span class="step-name">多阶段召回</span>
+                        <span class="step-desc">multi_stage_recall</span>
+                    </button>
+                </div>
+                <div class="step-buttons second-row" id="stepButtonsRow2">
+                    <button class="step-btn" data-step="hybrid_search" onclick="selectDebugStep('hybrid_search')">
+                        <span class="step-num">4</span>
+                        <span class="step-name">混合检索</span>
+                        <span class="step-desc">hybrid_search</span>
+                    </button>
+                    <span class="step-arrow">→</span>
+                    <button class="step-btn" data-step="parent_doc_enhance" onclick="selectDebugStep('parent_doc_enhance')">
+                        <span class="step-num">5</span>
+                        <span class="step-name">父文档增强</span>
+                        <span class="step-desc">parent_doc_enhance</span>
+                    </button>
+                    <span class="step-arrow">→</span>
+                    <button class="step-btn" data-step="extract_results" onclick="selectDebugStep('extract_results')">
+                        <span class="step-num">6</span>
+                        <span class="step-name">结果提取</span>
+                        <span class="step-desc">extract_results</span>
+                    </button>
+                </div>
+            </div>
+
+            <!-- 当前环节提示 -->
+            <div class="current-step-info" id="currentStepInfo">
+                <span class="step-info-icon">📌</span>
+                <span class="step-info-text">当前环节: <strong>查询提取</strong> — 从输入内容中提取查询实体和关键词</span>
+            </div>
+
+            <!-- 输入区域 -->
+            <div class="debug-input-area">
+                <div class="input-area">
+                    <textarea id="debugInput" placeholder="输入测试文本...&#10;&#10;示例:主要部件说明&#10;1、主梁总成&#10;主梁总成由主梁和导梁构成。主梁单节长12m,共7节,每节重10.87t..."></textarea>
+                </div>
+
+                <!-- 参数面板 -->
+                <div class="params-panel">
+                    <div class="params-header" onclick="toggleParamsPanel()">
+                        <span>⚙️ 参数配置</span>
+                        <span class="params-toggle" id="paramsToggleIcon">▼</span>
+                    </div>
+                    <div class="params-body" id="paramsBody" style="display: none;">
+                        <div class="param-row">
+                            <label>collection_name</label>
+                            <input type="text" id="paramCollectionName" value="rag_children_hybrid" placeholder="集合名称">
+                        </div>
+                        <div class="param-row">
+                            <label>top_k</label>
+                            <input type="number" id="paramTopK" value="10" min="1" max="100">
+                        </div>
+                        <div class="param-row">
+                            <label>hybrid_top_k</label>
+                            <input type="number" id="paramHybridTopK" value="50" min="1" max="200">
+                        </div>
+                        <div class="param-row">
+                            <label>score_threshold</label>
+                            <input type="number" id="paramScoreThreshold" value="0.5" min="0" max="1" step="0.05">
+                        </div>
+                        <div class="param-row">
+                            <label>dense_weight</label>
+                            <input type="number" id="paramDenseWeight" value="0.7" min="0" max="1" step="0.1">
+                        </div>
+                        <div class="param-row">
+                            <label>max_parents_per_pair</label>
+                            <input type="number" id="paramMaxParents" value="3" min="1" max="10">
+                        </div>
+                    </div>
+                </div>
+            </div>
+
+            <!-- 操作按钮 -->
+            <div class="action-buttons">
+                <button class="btn btn-primary" id="runDebugStepBtn" onclick="runDebugStep()">
+                    <span class="btn-icon">▶</span> 执行当前环节
+                </button>
+                <button class="btn btn-success" id="runDebugChainBtn" onclick="runDebugChain()">
+                    <span class="btn-icon">🔗</span> 链式执行全部
+                </button>
+                <button class="btn btn-secondary" onclick="clearDebugResult()">清空结果</button>
+            </div>
+
+            <div class="loading-overlay" id="debugLoadingOverlay" style="display: none;">
+                <div class="loading-spinner"></div>
+                <p id="debugLoadingText">正在执行...</p>
+            </div>
+
+            <!-- 结果展示 -->
+            <section class="debug-result-section" id="debugResultSection" style="display: none;">
+                <h3>📊 执行结果</h3>
+
+                <!-- 统计卡片 -->
+                <div class="overview-cards" id="debugOverviewCards"></div>
+
+                <!-- 链式执行步骤图 -->
+                <div class="chain-flow" id="chainFlow" style="display: none;"></div>
+
+                <!-- 输出详情 -->
+                <div class="debug-output-area" id="debugOutputArea"></div>
+            </section>
+        </section>
     </div>
 
     <!-- 审查依据侧边栏 -->

+ 22 - 0
utils_test/RAG_Test/rag_pipeline_web/professional_review.js

@@ -27,8 +27,20 @@ function switchFunction(functionType) {
     const detailPanel = document.getElementById('detailPanel');
     const stagesDetail = document.getElementById('stagesDetail');
     const professionalResults = document.getElementById('professionalResults');
+    const debugSection = document.getElementById('debugSection');
+
+    // 隐藏所有section
+    if (pipelineOverview) pipelineOverview.style.display = 'none';
+    if (pipelineFlow) pipelineFlow.style.display = 'none';
+    if (detailPanel) detailPanel.style.display = 'none';
+    if (stagesDetail) stagesDetail.style.display = 'none';
+    if (professionalResults) professionalResults.style.display = 'none';
+    if (debugSection) debugSection.style.display = 'none';
 
     if (functionType === 'rag') {
+        // 恢复共享区域
+        document.querySelector('.test-input-section').style.display = '';
+        document.querySelector('.upload-section').style.display = '';
         // 切换到RAG模式
         sectionTitle.textContent = '🚀 RAG链路测试';
         testInput.placeholder = '输入测试文本,点击执行RAG检索...\n\n示例:主要部件说明\n1、主梁总成\n主梁总成由主梁和导梁构成。主梁单节长12m,共7节,每节重10.87t...';
@@ -50,6 +62,9 @@ function switchFunction(functionType) {
             stagesDetail.style.display = 'block';
         }
     } else if (functionType === 'professional') {
+        // 恢复共享区域
+        document.querySelector('.test-input-section').style.display = '';
+        document.querySelector('.upload-section').style.display = '';
         // 切换到专业性审查模式
         sectionTitle.textContent = '🎯 专业性审查完整测试';
         testInput.placeholder = '输入待审查内容...\n\n示例:\n二)架桥机安装施工\n1、安装准备\n(1)图纸审核...';
@@ -70,6 +85,13 @@ function switchFunction(functionType) {
         if (window.professionalReviewData) {
             professionalResults.style.display = 'block';
         }
+    } else if (functionType === 'debug') {
+        // 隐藏共享的 test-input-section 和 upload-section
+        document.querySelector('.test-input-section').style.display = 'none';
+        document.querySelector('.upload-section').style.display = 'none';
+        // 显示debug专有的section
+        debugSection.style.display = 'block';
+        updateDebugServerStatus();
     }
 }
 

+ 276 - 0
utils_test/RAG_Test/rag_pipeline_web/rag_pipeline_server.py

@@ -665,6 +665,240 @@ def _finalize_pipeline_result(trace_id: str, final_result: dict) -> dict:
         return final_result
 
 
+def debug_step(step_name: str, content: str, params: dict = None) -> dict:
+    """
+    执行单个RAG环节调试
+
+    Args:
+        step_name: 环节名称
+        content: 输入内容(文本或JSON字符串)
+        params: 额外参数
+
+    Returns:
+        dict: {"status", "step", "input_summary", "output", "execution_time", "error"}
+    """
+    global milvus_manager
+    if params is None:
+        params = {}
+
+    start_time = time.time()
+
+    try:
+        if step_name == 'query_extract':
+            input_summary = {"content_length": len(content)}
+            output = query_rewrite_manager.query_extract(content)
+            return _debug_response(step_name, input_summary, output, start_time)
+
+        elif step_name == 'entity_enhance':
+            query_pairs = _parse_json_param(content, params, 'query_pairs')
+            if query_pairs is None:
+                return {"status": "error", "step": step_name, "error": "请提供 query_pairs(在 params 或 content 中传入 JSON)"}
+            input_summary = {"query_pairs_count": len(query_pairs)}
+            output = entity_enhance.entities_enhance_retrieval(query_pairs)
+            return _debug_response(step_name, input_summary, output, start_time)
+
+        elif step_name == 'multi_stage_recall':
+            collection_name = params.get('collection_name', 'rag_children_hybrid')
+            hybrid_top_k = params.get('hybrid_top_k', 50)
+            top_k = params.get('top_k', 10)
+            input_summary = {"content_length": len(content), "collection_name": collection_name,
+                             "hybrid_top_k": hybrid_top_k, "top_k": top_k}
+            output = retrieval_manager.multi_stage_recall(
+                collection_name=collection_name, query_text=content,
+                hybrid_top_k=hybrid_top_k, top_k=top_k)
+            return _debug_response(step_name, input_summary, _serialize_results(output), start_time)
+
+        elif step_name == 'hybrid_search':
+            collection_name = params.get('collection_name', 'rag_children_hybrid')
+            top_k = params.get('top_k', 10)
+            dense_weight = params.get('dense_weight', 0.7)
+            sparse_weight = params.get('sparse_weight', 0.3)
+            input_summary = {"content_length": len(content), "collection_name": collection_name,
+                             "top_k": top_k, "dense_weight": dense_weight, "sparse_weight": sparse_weight}
+            output = retrieval_manager.hybrid_search_recall(
+                collection_name=collection_name, query_text=content,
+                top_k=top_k, dense_weight=dense_weight, sparse_weight=sparse_weight)
+            return _debug_response(step_name, input_summary, _serialize_results(output), start_time)
+
+        elif step_name == 'parent_doc_enhance':
+            if milvus_manager is None:
+                init_milvus()
+            bfp_result_lists = _parse_json_param(content, params, 'bfp_result_lists')
+            if bfp_result_lists is None:
+                return {"status": "error", "step": step_name, "error": "请提供 bfp_result_lists(在 params 或 content 中传入 JSON)"}
+            score_threshold = params.get('score_threshold', 0.3)
+            max_parents = params.get('max_parents_per_pair', 3)
+            input_summary = {"bfp_lists_count": len(bfp_result_lists),
+                             "score_threshold": score_threshold, "max_parents_per_pair": max_parents}
+            output = enhance_with_parent_docs_grouped(
+                milvus_manager, bfp_result_lists,
+                score_threshold=score_threshold, max_parents_per_pair=max_parents)
+            serialized = {
+                "enhanced_count": output.get("enhanced_count", 0),
+                "enhanced_pairs": output.get("enhanced_pairs", 0),
+                "total_pairs": output.get("total_pairs", 0),
+                "parent_docs": _serialize_parent_docs(output.get("parent_docs", [])),
+                "enhanced_results_summary": f"{len(output.get('enhanced_results', []))} 个查询对的结果"
+            }
+            return _debug_response(step_name, input_summary, serialized, start_time)
+
+        elif step_name == 'extract_results':
+            bfp_result_lists = _parse_json_param(content, params, 'bfp_result_lists')
+            query_pairs = params.get('query_pairs', None)
+            score_threshold = params.get('score_threshold', 0.5)
+            if bfp_result_lists is None:
+                return {"status": "error", "step": step_name, "error": "请提供 bfp_result_lists(在 params 或 content 中传入 JSON)"}
+            input_summary = {"bfp_lists_count": len(bfp_result_lists),
+                             "has_query_pairs": query_pairs is not None,
+                             "score_threshold": score_threshold}
+            output = extract_query_pairs_results(bfp_result_lists, query_pairs, score_threshold=score_threshold)
+            return _debug_response(step_name, input_summary, _serialize_results(output), start_time)
+
+        else:
+            return {"status": "error", "step": step_name,
+                    "error": f"未知环节: {step_name},可选: query_extract, entity_enhance, multi_stage_recall, hybrid_search, parent_doc_enhance, extract_results"}
+
+    except Exception as e:
+        logger.error(f"[环节调试] {step_name} 失败: {e}", exc_info=True)
+        return {"status": "error", "step": step_name, "error": str(e),
+                "execution_time": round(time.time() - start_time, 3)}
+
+
+def debug_chain(content: str, params: dict = None) -> dict:
+    """
+    链式执行: query_extract → entity_enhance → parent_doc_enhance → extract_results
+    """
+    global milvus_manager
+    if params is None:
+        params = {}
+    if milvus_manager is None:
+        init_milvus()
+
+    chain_start = time.time()
+    steps = {}
+
+    # Step 1: query_extract
+    t0 = time.time()
+    try:
+        query_pairs = query_rewrite_manager.query_extract(content)
+        steps["query_extract"] = {"status": "success", "execution_time": round(time.time() - t0, 3),
+                                  "output": query_pairs, "summary": f"提取到 {len(query_pairs) if query_pairs else 0} 个查询对"}
+    except Exception as e:
+        steps["query_extract"] = {"status": "error", "execution_time": round(time.time() - t0, 3), "error": str(e)}
+        return {"status": "error", "steps": steps, "execution_time": round(time.time() - chain_start, 3),
+                "error": "query_extract 失败"}
+
+    if not query_pairs:
+        return {"status": "no_results", "steps": steps, "execution_time": round(time.time() - chain_start, 3),
+                "message": "query_extract 未提取到查询对"}
+
+    # Step 2: entity_enhance
+    t0 = time.time()
+    try:
+        bfp_result_lists = entity_enhance.entities_enhance_retrieval(query_pairs)
+        total_bfp = sum(len(r) for r in bfp_result_lists) if bfp_result_lists else 0
+        steps["entity_enhance"] = {"status": "success", "execution_time": round(time.time() - t0, 3),
+                                   "summary": f"召回 {total_bfp} 个BFP结果({len(bfp_result_lists) if bfp_result_lists else 0} 个查询对)"}
+    except Exception as e:
+        steps["entity_enhance"] = {"status": "error", "execution_time": round(time.time() - t0, 3), "error": str(e)}
+        return {"status": "error", "steps": steps, "execution_time": round(time.time() - chain_start, 3),
+                "error": "entity_enhance 失败"}
+
+    if not bfp_result_lists:
+        return {"status": "no_results", "steps": steps, "execution_time": round(time.time() - chain_start, 3),
+                "message": "entity_enhance 未召回结果"}
+
+    # Step 3: parent_doc_enhance
+    t0 = time.time()
+    try:
+        score_threshold = params.get('score_threshold', 0.3)
+        max_parents = params.get('max_parents_per_pair', 3)
+        enhancement_result = enhance_with_parent_docs_grouped(
+            milvus_manager, bfp_result_lists,
+            score_threshold=score_threshold, max_parents_per_pair=max_parents)
+        enhanced_results = enhancement_result.get('enhanced_results', bfp_result_lists)
+        steps["parent_doc_enhance"] = {"status": "success", "execution_time": round(time.time() - t0, 3),
+                                       "summary": f"增强 {enhancement_result.get('enhanced_pairs', 0)}/{enhancement_result.get('total_pairs', 0)} 个查询对, "
+                                                  f"使用 {len(enhancement_result.get('parent_docs', []))} 个父文档"}
+    except Exception as e:
+        steps["parent_doc_enhance"] = {"status": "error", "execution_time": round(time.time() - t0, 3), "error": str(e)}
+        enhanced_results = bfp_result_lists
+
+    # Step 4: extract_results
+    t0 = time.time()
+    try:
+        extract_threshold = params.get('score_threshold', 0.5)
+        entity_results = extract_query_pairs_results(enhanced_results, query_pairs, score_threshold=extract_threshold)
+        steps["extract_results"] = {"status": "success", "execution_time": round(time.time() - t0, 3),
+                                    "output": _serialize_results(entity_results),
+                                    "summary": f"提取 {len(entity_results) if entity_results else 0} 个高分结果(阈值>{extract_threshold})"}
+    except Exception as e:
+        steps["extract_results"] = {"status": "error", "execution_time": round(time.time() - t0, 3), "error": str(e)}
+
+    return {"status": "success", "steps": steps,
+            "execution_time": round(time.time() - chain_start, 3)}
+
+
+def _debug_response(step_name: str, input_summary: dict, output, start_time: float) -> dict:
+    return {"status": "success", "step": step_name, "input_summary": input_summary,
+            "output": output, "execution_time": round(time.time() - start_time, 3)}
+
+
+def _parse_json_param(content: str, params: dict, key: str):
+    """从 params[key] 或 content(作为JSON解析)中获取参数"""
+    if key in params and params[key] is not None:
+        return params[key]
+    if content:
+        try:
+            parsed = json.loads(content)
+            if isinstance(parsed, list):
+                return parsed
+            if isinstance(parsed, dict) and key in parsed:
+                return parsed[key]
+        except (json.JSONDecodeError, TypeError):
+            pass
+    return None
+
+
+def _serialize_results(results):
+    """序列化检索结果,处理不可JSON序列化的字段"""
+    if not results:
+        return []
+    out = []
+    for item in results:
+        if not isinstance(item, dict):
+            out.append(str(item))
+            continue
+        d = {}
+        for k, v in item.items():
+            if k == 'metadata' and isinstance(v, dict):
+                d[k] = {mk: str(mv) for mk, mv in v.items()}
+            elif isinstance(v, (str, int, float, bool, list, type(None))):
+                d[k] = v
+            else:
+                d[k] = str(v)
+        out.append(d)
+    return out
+
+
+def _serialize_parent_docs(parent_docs):
+    """序列化父文档列表"""
+    out = []
+    for p in parent_docs:
+        d = {}
+        for k, v in p.items():
+            if k == 'metadata' and isinstance(v, dict):
+                d[k] = {mk: str(mv) for mk, mv in v.items()}
+            elif k == 'text_content' and isinstance(v, str):
+                d[k] = v[:500] + '...' if len(v) > 500 else v
+            elif isinstance(v, (str, int, float, bool, list, type(None))):
+                d[k] = v
+            else:
+                d[k] = str(v)
+        out.append(d)
+    return out
+
+
 class RAGPipelineHandler(SimpleHTTPRequestHandler):
     """HTTP请求处理器"""
 
@@ -811,6 +1045,48 @@ class RAGPipelineHandler(SimpleHTTPRequestHandler):
                 logger.error(f"父子文档RAG处理失败: {e}", exc_info=True)
                 self.send_json_response({'error': str(e)}, 500)
 
+        elif parsed.path == '/api/debug/step':
+            content_length = int(self.headers['Content-Length'])
+            post_data = self.rfile.read(content_length)
+            try:
+                body = json.loads(post_data.decode('utf-8'))
+                step_name = body.get('step', '')
+                content = body.get('content', '')
+                params = body.get('params', {})
+                if not step_name:
+                    self.send_json_response({'error': '请提供step参数'}, 400)
+                    return
+                print(f"\n📝 收到环节调试请求, step: {step_name}, 内容长度: {len(content)}")
+                result = debug_step(step_name, content, params)
+                status_code = 200 if result.get('status') != 'error' else 400
+                print(f"✅ 环节调试完成, status: {result.get('status')}, 耗时: {result.get('execution_time', 0)}秒")
+                self.send_json_response(result, status_code)
+            except json.JSONDecodeError:
+                self.send_json_response({'error': 'JSON解析失败'}, 400)
+            except Exception as e:
+                logger.error(f"环节调试失败: {e}", exc_info=True)
+                self.send_json_response({'error': str(e)}, 500)
+
+        elif parsed.path == '/api/debug/chain':
+            content_length = int(self.headers['Content-Length'])
+            post_data = self.rfile.read(content_length)
+            try:
+                body = json.loads(post_data.decode('utf-8'))
+                content = body.get('content', '')
+                params = body.get('params', {})
+                if not content:
+                    self.send_json_response({'error': '请提供content参数'}, 400)
+                    return
+                print(f"\n📝 收到链式执行请求, 内容长度: {len(content)}")
+                result = debug_chain(content, params)
+                print(f"✅ 链式执行完成, status: {result.get('status')}, 总耗时: {result.get('execution_time', 0)}秒")
+                self.send_json_response(result)
+            except json.JSONDecodeError:
+                self.send_json_response({'error': 'JSON解析失败'}, 400)
+            except Exception as e:
+                logger.error(f"链式执行失败: {e}", exc_info=True)
+                self.send_json_response({'error': str(e)}, 500)
+
         else:
             self.send_json_response({'error': 'Not Found'}, 404)
 

+ 535 - 0
utils_test/RAG_Test/rag_pipeline_web/styles.css

@@ -1245,3 +1245,538 @@ body {
     font-size: 0.9rem;
     color: #555;
 }
+
+/* ==================== 环节调试样式 ==================== */
+
+.debug-section {
+    margin-bottom: 30px;
+}
+
+.debug-section h2 {
+    color: #00d4ff;
+    margin-bottom: 20px;
+    font-size: 1.3rem;
+}
+
+.debug-section .server-status {
+    position: absolute;
+    top: 0;
+    right: 25px;
+}
+
+/* 环节选择器 */
+.step-selector {
+    background: rgba(255, 255, 255, 0.03);
+    border: 1px solid rgba(255, 255, 255, 0.1);
+    border-radius: 12px;
+    padding: 20px;
+    margin-bottom: 20px;
+}
+
+.step-selector-label {
+    display: block;
+    color: #888;
+    font-size: 0.85rem;
+    margin-bottom: 12px;
+}
+
+.step-buttons {
+    display: flex;
+    align-items: center;
+    gap: 8px;
+    flex-wrap: wrap;
+}
+
+.step-buttons.second-row {
+    margin-top: 10px;
+}
+
+.step-btn {
+    display: flex;
+    align-items: center;
+    gap: 8px;
+    padding: 10px 16px;
+    background: rgba(255, 255, 255, 0.05);
+    border: 2px solid rgba(255, 255, 255, 0.12);
+    border-radius: 10px;
+    color: #aaa;
+    cursor: pointer;
+    transition: all 0.25s ease;
+    font-size: 0.9rem;
+}
+
+.step-btn:hover {
+    border-color: #00d4ff;
+    color: #00d4ff;
+    background: rgba(0, 212, 255, 0.08);
+}
+
+.step-btn.active {
+    border-color: #00ff88;
+    color: #00ff88;
+    background: rgba(0, 255, 136, 0.12);
+    box-shadow: 0 0 16px rgba(0, 255, 136, 0.15);
+}
+
+.step-num {
+    display: inline-flex;
+    align-items: center;
+    justify-content: center;
+    width: 22px;
+    height: 22px;
+    border-radius: 50%;
+    background: rgba(0, 212, 255, 0.2);
+    color: #00d4ff;
+    font-size: 0.75rem;
+    font-weight: bold;
+    flex-shrink: 0;
+}
+
+.step-btn.active .step-num {
+    background: rgba(0, 255, 136, 0.25);
+    color: #00ff88;
+}
+
+.step-name {
+    font-weight: 600;
+    white-space: nowrap;
+}
+
+.step-desc {
+    font-size: 0.75rem;
+    color: #666;
+    font-family: 'Consolas', monospace;
+}
+
+.step-arrow {
+    color: #4a4a6a;
+    font-size: 1.2rem;
+    font-weight: bold;
+    user-select: none;
+}
+
+/* 当前环节提示 */
+.current-step-info {
+    background: rgba(0, 212, 255, 0.08);
+    border: 1px solid rgba(0, 212, 255, 0.2);
+    border-radius: 8px;
+    padding: 12px 16px;
+    margin-bottom: 20px;
+    display: flex;
+    align-items: center;
+    gap: 10px;
+}
+
+.step-info-icon {
+    font-size: 1.3rem;
+}
+
+.step-info-text {
+    color: #e0e0e0;
+    font-size: 0.95rem;
+}
+
+.step-info-text strong {
+    color: #00d4ff;
+}
+
+/* 调试输入区域 */
+.debug-input-area {
+    margin-bottom: 20px;
+}
+
+.debug-input-area .input-area textarea {
+    min-height: 150px;
+}
+
+/* 参数面板 */
+.params-panel {
+    background: rgba(255, 255, 255, 0.03);
+    border: 1px solid rgba(255, 255, 255, 0.1);
+    border-radius: 10px;
+    margin-top: 12px;
+    overflow: hidden;
+}
+
+.params-header {
+    padding: 12px 16px;
+    cursor: pointer;
+    display: flex;
+    justify-content: space-between;
+    align-items: center;
+    color: #aaa;
+    font-size: 0.9rem;
+    transition: background 0.2s;
+}
+
+.params-header:hover {
+    background: rgba(255, 255, 255, 0.03);
+}
+
+.params-toggle {
+    font-size: 0.8rem;
+    color: #666;
+}
+
+.params-body {
+    padding: 16px;
+    border-top: 1px solid rgba(255, 255, 255, 0.08);
+    display: grid;
+    grid-template-columns: repeat(auto-fill, minmax(240px, 1fr));
+    gap: 12px;
+}
+
+.param-row {
+    display: flex;
+    flex-direction: column;
+    gap: 5px;
+}
+
+.param-row label {
+    font-size: 0.8rem;
+    color: #888;
+    font-family: 'Consolas', monospace;
+}
+
+.param-row input {
+    background: rgba(0, 0, 0, 0.4);
+    border: 1px solid rgba(255, 255, 255, 0.12);
+    border-radius: 6px;
+    padding: 8px 12px;
+    color: #e0e0e0;
+    font-size: 0.9rem;
+    transition: border-color 0.2s;
+}
+
+.param-row input:focus {
+    outline: none;
+    border-color: #00d4ff;
+}
+
+/* 调试结果区域 */
+.debug-result-section {
+    margin-top: 30px;
+}
+
+.debug-result-section h3 {
+    color: #00d4ff;
+    margin-bottom: 20px;
+    font-size: 1.2rem;
+}
+
+/* 错误展示 */
+.debug-error-box {
+    background: rgba(255, 85, 85, 0.1);
+    border: 1px solid rgba(255, 85, 85, 0.3);
+    border-radius: 10px;
+    padding: 20px;
+    display: flex;
+    align-items: flex-start;
+    gap: 14px;
+}
+
+.debug-error-icon {
+    font-size: 1.5rem;
+    flex-shrink: 0;
+}
+
+.debug-error-msg {
+    color: #ff8888;
+    font-size: 0.95rem;
+    word-break: break-all;
+}
+
+/* 摘要网格 */
+.debug-summary-box {
+    background: rgba(255, 255, 255, 0.03);
+    border: 1px solid rgba(255, 255, 255, 0.1);
+    border-radius: 10px;
+    padding: 16px;
+    margin-bottom: 16px;
+}
+
+.debug-summary-box h4 {
+    color: #ffb86c;
+    margin-bottom: 12px;
+    font-size: 0.95rem;
+}
+
+.debug-summary-grid {
+    display: grid;
+    grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
+    gap: 10px;
+}
+
+.debug-summary-item {
+    background: rgba(0, 0, 0, 0.2);
+    border-radius: 6px;
+    padding: 10px 14px;
+    display: flex;
+    flex-direction: column;
+    gap: 4px;
+}
+
+.debug-summary-key {
+    font-size: 0.75rem;
+    color: #888;
+    font-family: 'Consolas', monospace;
+}
+
+.debug-summary-val {
+    font-size: 0.95rem;
+    color: #e0e0e0;
+    font-weight: 500;
+    word-break: break-all;
+}
+
+/* 输出盒子 */
+.debug-output-box {
+    background: rgba(255, 255, 255, 0.03);
+    border: 1px solid rgba(255, 255, 255, 0.1);
+    border-radius: 10px;
+    padding: 16px;
+    margin-bottom: 16px;
+}
+
+.debug-output-box h4 {
+    color: #50fa7b;
+    margin-bottom: 12px;
+    font-size: 0.95rem;
+}
+
+/* 数据表格 */
+.debug-table-wrap {
+    overflow-x: auto;
+    border-radius: 8px;
+    border: 1px solid rgba(255, 255, 255, 0.08);
+}
+
+.debug-table {
+    width: 100%;
+    border-collapse: collapse;
+    font-size: 0.85rem;
+}
+
+.debug-table th {
+    background: rgba(0, 212, 255, 0.1);
+    color: #00d4ff;
+    padding: 10px 14px;
+    text-align: left;
+    font-weight: 600;
+    white-space: nowrap;
+    border-bottom: 2px solid rgba(0, 212, 255, 0.2);
+}
+
+.debug-table td {
+    padding: 10px 14px;
+    border-bottom: 1px solid rgba(255, 255, 255, 0.05);
+    color: #e0e0e0;
+    vertical-align: top;
+}
+
+.debug-table tbody tr:hover {
+    background: rgba(255, 255, 255, 0.03);
+}
+
+.preview-cell {
+    max-width: 300px;
+    overflow: hidden;
+    text-overflow: ellipsis;
+    white-space: nowrap;
+    color: #aaa;
+    font-size: 0.82rem;
+}
+
+.score-badge {
+    background: rgba(189, 147, 249, 0.2);
+    color: #bd93f9;
+    padding: 2px 8px;
+    border-radius: 8px;
+    font-family: 'Consolas', monospace;
+    font-size: 0.82rem;
+}
+
+/* 结果卡片 */
+.debug-result-count {
+    color: #888;
+    font-size: 0.85rem;
+    margin-bottom: 12px;
+}
+
+.debug-result-cards {
+    display: flex;
+    flex-direction: column;
+    gap: 12px;
+}
+
+.debug-result-card {
+    background: rgba(255, 255, 255, 0.04);
+    border: 1px solid rgba(255, 255, 255, 0.08);
+    border-left: 3px solid #00d4ff;
+    border-radius: 8px;
+    padding: 14px 16px;
+    transition: border-color 0.2s;
+}
+
+.debug-result-card:hover {
+    border-left-color: #00ff88;
+}
+
+.debug-result-card.parent-doc {
+    border-left-color: #ffb86c;
+}
+
+.drc-header {
+    display: flex;
+    justify-content: space-between;
+    align-items: center;
+    margin-bottom: 8px;
+}
+
+.drc-index {
+    font-weight: 600;
+    color: #00d4ff;
+    font-size: 0.9rem;
+}
+
+.drc-score {
+    font-family: 'Consolas', monospace;
+    font-size: 0.82rem;
+    color: #bd93f9;
+    background: rgba(189, 147, 249, 0.1);
+    padding: 2px 8px;
+    border-radius: 6px;
+}
+
+.drc-file {
+    color: #888;
+    font-size: 0.8rem;
+    margin-bottom: 8px;
+}
+
+.drc-content {
+    color: #ccc;
+    font-size: 0.85rem;
+    line-height: 1.6;
+    word-break: break-all;
+}
+
+/* 链式流程图 */
+.chain-flow {
+    margin-bottom: 20px;
+}
+
+.chain-flow-container {
+    display: flex;
+    align-items: flex-start;
+    gap: 10px;
+    padding: 20px;
+    background: rgba(0, 0, 0, 0.2);
+    border-radius: 12px;
+    overflow-x: auto;
+    flex-wrap: wrap;
+}
+
+.chain-step {
+    background: rgba(255, 255, 255, 0.05);
+    border: 2px solid #4a4a6a;
+    border-radius: 10px;
+    padding: 14px;
+    min-width: 150px;
+    text-align: center;
+    transition: all 0.3s;
+}
+
+.chain-step-success {
+    border-color: #00ff88;
+    background: rgba(0, 255, 136, 0.06);
+}
+
+.chain-step-error {
+    border-color: #ff5555;
+    background: rgba(255, 85, 85, 0.06);
+}
+
+.chain-step-pending {
+    border-color: #4a4a6a;
+    opacity: 0.5;
+}
+
+.chain-step-header {
+    display: flex;
+    justify-content: center;
+    gap: 8px;
+    margin-bottom: 6px;
+}
+
+.chain-step-icon {
+    font-size: 1.2rem;
+}
+
+.chain-step-status {
+    font-size: 0.9rem;
+}
+
+.chain-step-name {
+    font-weight: 600;
+    color: #e0e0e0;
+    font-size: 0.85rem;
+    margin-bottom: 4px;
+}
+
+.chain-step-time {
+    color: #ffb86c;
+    font-size: 0.75rem;
+    margin-bottom: 4px;
+}
+
+.chain-step-summary {
+    color: #888;
+    font-size: 0.72rem;
+    line-height: 1.4;
+}
+
+.chain-arrow {
+    color: #4a4a6a;
+    font-size: 1.5rem;
+    align-self: center;
+    flex-shrink: 0;
+}
+
+/* 环节详情 */
+.debug-step-detail {
+    display: flex;
+    flex-direction: column;
+    gap: 12px;
+}
+
+.debug-step-detail .data-section {
+    background: rgba(0, 0, 0, 0.2);
+    border-radius: 8px;
+    padding: 12px;
+}
+
+.debug-step-detail .data-section h4 {
+    color: #00d4ff;
+    margin-bottom: 8px;
+    font-size: 0.85rem;
+}
+
+@media (max-width: 768px) {
+    .step-buttons {
+        flex-direction: column;
+    }
+    .step-arrow {
+        transform: rotate(90deg);
+    }
+    .chain-flow-container {
+        flex-direction: column;
+    }
+    .chain-arrow {
+        transform: rotate(90deg);
+        align-self: center;
+    }
+    .debug-table-wrap {
+        font-size: 0.75rem;
+    }
+}

+ 0 - 52
utils_test/RAG_Test/test_entity_bfp_recall.py

@@ -1,52 +0,0 @@
-import json
-import asyncio
-from foundation.ai.rag.retrieval.retrieval import retrieval_manager
-from foundation.observability.monitoring.time_statistics import track_execution_time
-
-
-entity = "架桥机"
-search_keywords = ["提梁机", "架桥设备", "造桥机"]
-background = "JQ220t-40m架桥机安装及拆除"
-
-@track_execution_time
-def main():
-    print("="*60)
-    print("实体增强召回测试")
-    print("="*60)
-    print(f"主实体: {entity}")
-    print(f"辅助实体: {search_keywords}")
-    print(f"背景信息: {background}")
-    print("-"*60)
-
-    # 使用新参数调用 entity_recall
-    # recall_top_k=5: 每个实体召回5个结果
-    # max_results=20: 最终返回最多20个实体文本
-    entity_list = asyncio.run(retrieval_manager.entity_recall(
-        entity,
-        search_keywords,
-        recall_top_k=5,      # 每次单实体召回返回5个
-        max_results=20       # 最终最多返回20个
-    ))
-
-    print(f"\n✅ 实体召回完成, 共召回 {len(entity_list)} 个实体")
-    print(f"实体列表前5个: {entity_list[:5]}")
-
-    # 使用 top_k 参数调用 async_bfp_recall
-    # top_k=3: 二次重排后最多返回3个BFP文档
-    bfp_result = asyncio.run(retrieval_manager.async_bfp_recall(
-        entity_list,
-        background,
-        top_k=3
-    ))
-
-    print(f"\n✅ BFP召回完成, 共召回 {len(bfp_result)} 个文档")
-
-    # 保存结果
-    with open("temp/entity_bfp_recall/entity_bfp_recall.json", "w", encoding="utf-8") as f:
-        json.dump(bfp_result, f, ensure_ascii=False, indent=4)
-
-    print(f"\n✅ 结果已保存到: temp/entity_bfp_recall/entity_bfp_recall.json")
-    print("="*60)
-
-if __name__ == "__main__":
-    main()

+ 0 - 195
utils_test/RAG_Test/test_extract_modes.py

@@ -1,195 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-"""
-测试 extract_first_result 的两种模式
-"""
-
-import sys
-import os
-import json
-
-project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-from core.construction_review.component.infrastructure.parent_tool import extract_first_result
-
-
-def load_enhanced_results():
-    """加载增强后的检索结果"""
-    result_path = os.path.join(project_root, "temp", "entity_bfp_recall", "enhance_with_parent_docs.json")
-
-    if not os.path.exists(result_path):
-        print(f"❌ 文件不存在: {result_path}")
-        print("请先运行 RAG 检索生成该文件")
-        return None
-
-    with open(result_path, 'r', encoding='utf-8') as f:
-        enhanced_results = json.load(f)
-
-    print(f"✅ 成功加载增强结果,共 {len(enhanced_results)} 个查询对")
-    for idx, results in enumerate(enhanced_results):
-        if results:
-            first_entity = results[0].get('source_entity', f'query_{idx}')
-            print(f"   - 查询对 {idx}: entity='{first_entity}', {len(results)} 个结果")
-
-    return enhanced_results
-
-
-def load_query_pairs():
-    """加载查询对"""
-    # 优先从 rag_pipeline_data.json 读取
-    pipeline_path = os.path.join(project_root, "temp", "entity_bfp_recall", "rag_pipeline_data.json")
-
-    if os.path.exists(pipeline_path):
-        with open(pipeline_path, 'r', encoding='utf-8') as f:
-            pipeline_data = json.load(f)
-
-        # 从 steps 中提取 query_pairs
-        query_extract_step = pipeline_data.get('steps', {}).get('1_query_extract', {})
-        query_pairs = query_extract_step.get('output', {}).get('query_pairs', [])
-
-        if query_pairs:
-            print(f"✅ 从 rag_pipeline_data.json 加载了 {len(query_pairs)} 个查询对")
-            for idx, qp in enumerate(query_pairs):
-                print(f"   - 查询对 {idx}: entity='{qp.get('entity', 'N/A')}'")
-            return query_pairs
-
-    # 降级:从 enhanced_results 中提取 entity 信息
-    print("⚠️  未找到 rag_pipeline_data.json,尝试从 enhanced_results 提取")
-    result_path = os.path.join(project_root, "temp", "entity_bfp_recall", "enhance_with_parent_docs.json")
-
-    if not os.path.exists(result_path):
-        return None
-
-    with open(result_path, 'r', encoding='utf-8') as f:
-        enhanced_results = json.load(f)
-
-    # 构造简化的 query_pairs
-    query_pairs = []
-    for idx, results in enumerate(enhanced_results):
-        if results:
-            # 优先使用 source_entity,回退到 query_N
-            entity = results[0].get('source_entity', f'query_{idx}')
-            query_pairs.append({
-                'entity': entity,
-                'search_keywords': [],
-                'background': ''
-            })
-
-    return query_pairs
-
-
-def test_mode_best_overall(enhanced_results, query_pairs):
-    """测试模式1: 全局最优"""
-    print("\n" + "="*80)
-    print("📊 测试模式1: best_overall (全局最优)")
-    print("="*80)
-
-    result = extract_first_result(enhanced_results, query_pairs, mode='best_overall')
-
-    print(f"\n✅ 返回结果:")
-    print(f"  - file_name: {result.get('file_name', 'N/A')}")
-    print(f"  - source_entity: {result.get('source_entity', 'N/A')}")
-    print(f"  - bfp_rerank_score: {result.get('bfp_rerank_score', 0.0):.6f}")
-    print(f"  - text_content 长度: {len(result.get('text_content', ''))}")
-    print(f"  - retrieval_status: {result.get('retrieval_status', 'N/A')}")
-
-    # 显示文本内容预览
-    text_preview = result.get('text_content', '')[:200]
-    print(f"\n  - 文本预览: {text_preview}...")
-
-    return result
-
-
-def test_mode_best_per_entity(enhanced_results, query_pairs):
-    """测试模式2: 分实体最优"""
-    print("\n" + "="*80)
-    print("📊 测试模式2: best_per_entity (分实体最优)")
-    print("="*80)
-
-    result = extract_first_result(enhanced_results, query_pairs, mode='best_per_entity')
-
-    print(f"\n✅ 返回结果:")
-    print(f"  - total_entities: {result.get('total_entities', 0)}")
-    print(f"  - retrieval_status: {result.get('retrieval_status', 'N/A')}")
-
-    entity_results = result.get('entity_results', {})
-    print(f"\n📋 各实体最优结果:")
-    for entity_name, entity_result in entity_results.items():
-        score = entity_result.get('bfp_rerank_score', 0.0)
-        file_name = entity_result.get('file_name', 'N/A')
-        text_len = len(entity_result.get('text_content', ''))
-
-        print(f"\n  🎯 实体: {entity_name}")
-        print(f"     - score: {score:.6f}")
-        print(f"     - file_name: {file_name}")
-        print(f"     - text_length: {text_len}")
-
-    return result
-
-
-def compare_with_current_result():
-    """对比当前 extract_first_result.json 的结果"""
-    print("\n" + "="*80)
-    print("📂 对比当前保存的结果")
-    print("="*80)
-
-    result_path = os.path.join(project_root, "temp", "entity_bfp_recall", "extract_first_result.json")
-
-    if not os.path.exists(result_path):
-        print("⚠️  当前没有保存的 extract_first_result.json")
-        return
-
-    with open(result_path, 'r', encoding='utf-8') as f:
-        current_result = json.load(f)
-
-    print(f"\n当前保存的结果:")
-    print(f"  - file_name: {current_result.get('file_name', 'N/A')}")
-    print(f"  - retrieval_status: {current_result.get('retrieval_status', 'N/A')}")
-    print(f"  - bfp_rerank_score: {current_result.get('bfp_rerank_score', 'N/A')}")
-    print(f"  - source_entity: {current_result.get('source_entity', 'N/A')}")
-
-
-if __name__ == "__main__":
-    print("\n" + "="*80)
-    print("🚀 开始测试 extract_first_result 的两种模式")
-    print("="*80)
-
-    # 加载数据
-    enhanced_results = load_enhanced_results()
-    if not enhanced_results:
-        sys.exit(1)
-
-    query_pairs = load_query_pairs()
-
-    # 测试模式1
-    result1 = test_mode_best_overall(enhanced_results, query_pairs)
-
-    # 测试模式2
-    result2 = test_mode_best_per_entity(enhanced_results, query_pairs)
-
-    # 对比当前结果
-    compare_with_current_result()
-
-    # 保存测试结果
-    test_output_path = os.path.join(project_root, "temp", "entity_bfp_recall", "test_extract_modes.json")
-    with open(test_output_path, 'w', encoding='utf-8') as f:
-        json.dump({
-            'best_overall': result1,
-            'best_per_entity': result2
-        }, f, ensure_ascii=False, indent=4)
-
-    print(f"\n✅ 测试完成,结果已保存到: {test_output_path}")
-    print("\n" + "="*80)
-    print("📝 建议使用哪种模式?")
-    print("="*80)
-    print("\n模式1 (best_overall):")
-    print("  - 适用场景: 只需要一个最相关的结果")
-    print("  - 优点: 返回全局最优的结果")
-    print("  - 缺点: 可能丢失其他实体的有用信息")
-    print("\n模式2 (best_per_entity):")
-    print("  - 适用场景: 需要保留所有查询对的最优结果")
-    print("  - 优点: 保留各实体的最优结果,信息更全面")
-    print("  - 缺点: 返回结构更复杂,需要后续处理")
-    print("\n💡 如果审查需要针对不同实体分别检查,建议使用 mode='best_per_entity'")
-    print("="*80 + "\n")

+ 0 - 400
utils_test/RAG_Test/test_hybrid_search_debug.py

@@ -1,400 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-"""
-混合检索问题诊断脚本
-用于排查 hybrid_search 返回0结果的问题
-"""
-
-import sys
-import os
-
-from pymilvus import connections, Collection, utility
-from foundation.ai.models.model_handler import model_handler
-from foundation.observability.logger.loggering import review_logger as logger
-
-
-def check_milvus_connection():
-    """检查 Milvus 连接"""
-    print("\n" + "="*60)
-    print("1. 检查 Milvus 连接")
-    print("="*60)
-    try:
-        from foundation.infrastructure.config.config import config_handler
-        host = config_handler.get('milvus', 'MILVUS_HOST', 'localhost')
-        port = int(config_handler.get('milvus', 'MILVUS_PORT', '19530'))
-        
-        connections.connect(
-            alias="debug",
-            host=host,
-            port=port,
-            db_name="lq_db"
-        )
-        print(f"✅ Milvus 连接成功: {host}:{port}")
-        return True
-    except Exception as e:
-        print(f"❌ Milvus 连接失败: {e}")
-        return False
-
-
-def check_collection_exists(collection_name: str):
-    """检查 Collection 是否存在"""
-    print(f"\n2. 检查 Collection 是否存在: {collection_name}")
-    print("-"*60)
-    
-    exists = utility.has_collection(collection_name, using="debug")
-    if exists:
-        print(f"✅ Collection '{collection_name}' 存在")
-    else:
-        print(f"❌ Collection '{collection_name}' 不存在!")
-    return exists
-
-
-def check_collection_schema(collection_name: str):
-    """检查 Collection Schema 结构"""
-    print(f"\n3. 检查 Collection Schema 结构")
-    print("-"*60)
-    
-    try:
-        col = Collection(collection_name, using="debug")
-        schema = col.schema
-        
-        print(f"Collection: {collection_name}")
-        print(f"Description: {schema.description}")
-        print(f"\n字段列表:")
-        
-        has_dense = False
-        has_sparse = False
-        field_names = []
-        
-        for field in schema.fields:
-            field_names.append(field.name)
-            print(f"  - {field.name}: {field.dtype.name}", end="")
-            if hasattr(field, 'dim') and field.dim:
-                print(f" (dim={field.dim})", end="")
-            if field.is_primary:
-                print(" [PRIMARY]", end="")
-            print()
-            
-            # 检查关键字段
-            if field.name == "dense":
-                has_dense = True
-            if field.name == "sparse":
-                has_sparse = True
-        
-        print(f"\n混合搜索所需字段检查:")
-        print(f"  - dense 字段: {'✅ 存在' if has_dense else '❌ 不存在'}")
-        print(f"  - sparse 字段: {'✅ 存在' if has_sparse else '❌ 不存在'}")
-        
-        if not has_dense or not has_sparse:
-            print(f"\n⚠️  警告: Collection 缺少混合搜索所需的字段!")
-            print(f"   混合搜索需要 'dense' 和 'sparse' 两个字段")
-            print(f"   当前字段: {field_names}")
-        
-        return has_dense and has_sparse
-        
-    except Exception as e:
-        print(f"❌ 获取 Schema 失败: {e}")
-        return False
-
-
-def check_collection_data(collection_name: str):
-    """检查 Collection 数据量"""
-    print(f"\n4. 检查 Collection 数据量")
-    print("-"*60)
-    
-    try:
-        col = Collection(collection_name, using="debug")
-        col.load()
-        num_entities = col.num_entities
-        
-        print(f"数据量: {num_entities} 条")
-        
-        if num_entities == 0:
-            print("❌ Collection 为空,没有数据!")
-            return False
-        else:
-            print("✅ Collection 有数据")
-            return True
-            
-    except Exception as e:
-        print(f"❌ 获取数据量失败: {e}")
-        return False
-
-
-def check_collection_index(collection_name: str):
-    """检查 Collection 索引"""
-    print(f"\n5. 检查 Collection 索引")
-    print("-"*60)
-    
-    try:
-        col = Collection(collection_name, using="debug")
-        indexes = col.indexes
-        
-        if not indexes:
-            print("❌ 没有索引!")
-            return False
-        
-        for idx in indexes:
-            print(f"  - 字段: {idx.field_name}")
-            print(f"    索引参数: {idx.params}")
-        
-        print("✅ 索引存在")
-        return True
-        
-    except Exception as e:
-        print(f"❌ 获取索引失败: {e}")
-        return False
-
-
-def test_traditional_search(collection_name: str, query_text: str):
-    """测试传统向量搜索(不使用混合搜索)"""
-    print(f"\n6. 测试传统向量搜索")
-    print("-"*60)
-    
-    try:
-        col = Collection(collection_name, using="debug")
-        col.load()
-        
-        # 获取 embedding
-        emdmodel = model_handler.get_embedding_model()
-        query_vector = emdmodel.embed_query(query_text)
-        
-        print(f"查询文本: {query_text}")
-        print(f"向量维度: {len(query_vector)}")
-        
-        # 确定向量字段名
-        vector_field = None
-        for field in col.schema.fields:
-            if "FLOAT_VECTOR" in str(field.dtype):
-                vector_field = field.name
-                break
-        
-        if not vector_field:
-            print("❌ 未找到向量字段")
-            return False
-        
-        print(f"向量字段: {vector_field}")
-        
-        # 执行搜索
-        search_params = {"metric_type": "COSINE", "params": {"nprobe": 10}}
-        results = col.search(
-            data=[query_vector],
-            anns_field=vector_field,
-            param=search_params,
-            limit=5,
-            output_fields=["text"]
-        )
-        
-        print(f"\n搜索结果: {len(results[0])} 条")
-        for i, hit in enumerate(results[0]):
-            print(f"  {i+1}. ID={hit.id}, 距离={hit.distance:.4f}")
-        
-        if len(results[0]) > 0:
-            print("✅ 传统向量搜索正常")
-            return True
-        else:
-            print("❌ 传统向量搜索也返回0结果")
-            return False
-            
-    except Exception as e:
-        print(f"❌ 传统搜索失败: {e}")
-        import traceback
-        traceback.print_exc()
-        return False
-
-
-def test_langchain_hybrid_search(collection_name: str, query_text: str):
-    """测试 LangChain Milvus 混合搜索"""
-    print(f"\n7. 测试 LangChain Milvus 混合搜索")
-    print("-"*60)
-    
-    try:
-        from langchain_milvus import Milvus, BM25BuiltInFunction
-        from foundation.infrastructure.config.config import config_handler
-        
-        host = config_handler.get('milvus', 'MILVUS_HOST', 'localhost')
-        port = int(config_handler.get('milvus', 'MILVUS_PORT', '19530'))
-        
-        connection_args = {
-            "uri": f"http://{host}:{port}",
-            "db_name": "lq_db"
-        }
-        
-        emdmodel = model_handler.get_embedding_model()
-        
-        print(f"尝试连接 Collection: {collection_name}")
-        print(f"连接参数: {connection_args}")
-        
-        # 尝试创建 vectorstore
-        vectorstore = Milvus(
-            embedding_function=emdmodel,
-            collection_name=collection_name,
-            connection_args=connection_args,
-            consistency_level="Strong",
-            builtin_function=BM25BuiltInFunction(),
-            vector_field=["dense", "sparse"]
-        )
-        
-        print("✅ Vectorstore 创建成功")
-        
-        # 执行混合搜索
-        print(f"\n执行混合搜索,查询: {query_text}")
-        results = vectorstore.similarity_search_with_score(
-            query=query_text,
-            k=5,
-            ranker_type="weighted",
-            ranker_params={"weights": [0.7, 0.3]}
-        )
-        
-        print(f"搜索结果: {len(results)} 条")
-        for i, (doc, score) in enumerate(results):
-            content = doc.page_content[:50] if doc.page_content else "N/A"
-            print(f"  {i+1}. score={score:.4f}, content={content}...")
-        
-        if len(results) > 0:
-            print("✅ 混合搜索正常")
-            return True
-        else:
-            print("❌ 混合搜索返回0结果")
-            return False
-            
-    except Exception as e:
-        print(f"❌ 混合搜索失败: {e}")
-        import traceback
-        traceback.print_exc()
-        return False
-
-
-def test_retrieval_manager(collection_name: str, query_text: str):
-    """测试 RetrievalManager 的混合搜索"""
-    print(f"\n8. 测试 RetrievalManager 混合搜索")
-    print("-"*60)
-    
-    try:
-        from foundation.ai.rag.retrieval.retrieval import retrieval_manager
-        
-        results = retrieval_manager.hybrid_search_recall(
-            collection_name=collection_name,
-            query_text=query_text,
-            top_k=5,
-            ranker_type="weighted",
-            dense_weight=0.7,
-            sparse_weight=0.3
-        )
-        
-        print(f"搜索结果: {len(results)} 条")
-        for i, result in enumerate(results):
-            content = result.get('text_content', '')[:50]
-            print(f"  {i+1}. {content}...")
-        
-        if len(results) > 0:
-            print("✅ RetrievalManager 混合搜索正常")
-            return True
-        else:
-            print("❌ RetrievalManager 混合搜索返回0结果")
-            return False
-            
-    except Exception as e:
-        print(f"❌ RetrievalManager 测试失败: {e}")
-        import traceback
-        traceback.print_exc()
-        return False
-
-
-def main():
-    """主诊断函数"""
-    print("\n" + "="*60)
-    print("混合检索问题诊断")
-    print("="*60)
-    
-    # 配置
-    collection_name = "first_bfp_collection_entity"
-    query_text = "高空作业"
-    
-    print(f"\n诊断目标:")
-    print(f"  - Collection: {collection_name}")
-    print(f"  - 查询文本: {query_text}")
-    
-    # 执行诊断
-    results = {}
-    
-    # 1. 检查连接
-    results['connection'] = check_milvus_connection()
-    if not results['connection']:
-        print("\n❌ Milvus 连接失败,无法继续诊断")
-        return
-    
-    # 2. 检查 Collection 存在
-    results['exists'] = check_collection_exists(collection_name)
-    if not results['exists']:
-        print(f"\n❌ Collection '{collection_name}' 不存在,无法继续诊断")
-        return
-    
-    # 3. 检查 Schema
-    results['schema'] = check_collection_schema(collection_name)
-    
-    # 4. 检查数据量
-    results['data'] = check_collection_data(collection_name)
-    
-    # 5. 检查索引
-    results['index'] = check_collection_index(collection_name)
-    
-    # 6. 测试传统搜索
-    results['traditional'] = test_traditional_search(collection_name, query_text)
-    
-    # 7. 测试 LangChain 混合搜索
-    results['langchain'] = test_langchain_hybrid_search(collection_name, query_text)
-    
-    # 8. 测试 RetrievalManager
-    results['retrieval'] = test_retrieval_manager(collection_name, query_text)
-    
-    # 总结
-    print("\n" + "="*60)
-    print("诊断总结")
-    print("="*60)
-    
-    for key, value in results.items():
-        status = "✅" if value else "❌"
-        print(f"  {status} {key}")
-    
-    # 给出建议
-    print("\n" + "="*60)
-    print("问题分析与建议")
-    print("="*60)
-    
-    if not results.get('schema'):
-        print("""
-⚠️  主要问题: Collection Schema 不支持混合搜索
-
-原因: Collection 缺少 'dense' 和 'sparse' 字段
-      混合搜索需要在创建 Collection 时使用 BM25BuiltInFunction
-
-解决方案:
-1. 使用 create_hybrid_collection 方法重新创建 Collection
-2. 或者修改代码,对不支持混合搜索的 Collection 使用传统向量搜索
-""")
-    
-    if results.get('traditional') and not results.get('langchain'):
-        print("""
-⚠️  问题: 传统搜索正常,但混合搜索失败
-
-可能原因:
-1. Collection 创建时未启用 BM25 功能
-2. LangChain Milvus 版本兼容性问题
-3. vector_field 配置与实际字段名不匹配
-
-建议:
-1. 检查 Collection 创建方式
-2. 确认 langchain-milvus 版本
-""")
-    
-    if not results.get('data'):
-        print("""
-⚠️  问题: Collection 为空
-
-解决方案: 先向 Collection 中导入数据
-""")
-
-
-if __name__ == "__main__":
-    main()

+ 0 - 143
utils_test/RAG_Test/test_query_extract_integration.py

@@ -1,143 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-"""
-集成测试QueryRewriteManager.query_extract方法
-"""
-
-import sys
-import os
-import time
-
-from foundation.ai.rag.retrieval.query_rewrite import QueryRewriteManager
-from foundation.observability.logger.loggering import review_logger as logger
-
-
-def test_query_extract_with_construction_content():
-    """
-    测试使用施工方案内容的query_extract
-    """
-    print("="*60)
-    print("测试QueryRewriteManager.query_extract方法")
-    print("="*60)
-
-    query_rewrite_manager = QueryRewriteManager()
-
-    # 测试数据: 基坑工程
-    review_content = """(1)应将架桥机随机文件和有关技术资料准备齐全,认真组织安装人员学习阅
-读本方案,并以此为依据结合实际工况及地貌拟定有关安装施工方案。
-(2)安装前,应对设备散件进行全面检查、清理,如发现有损伤、腐蚀或其它
-缺陷,应在安装前予以处理,合格后方可安装。
-(3)应对架桥机运梁轨道进行如下检查:
-1)架桥机运梁轨道基础应有足够的承压能力,应能满足架桥机运梁平车载重安
-全运行。轨道下部如有支垫是否具有强大的刚性和一定的密度,特殊端部的支垫必
-须采用刚性支垫,严禁使用腐蚀的枕木。
-2)轨道钢轨正面、侧面的不平度不应大于1/1500,全长范围内不应大于10毫
-米。
-3)轨道安装的允许偏差:
-①轨道实际中心线对轨道设计中心线的位置偏移允许偏差为3毫米。
-②轨距允许偏差为±5毫米。
-③轨道纵向不平度应小于 1/1500,且全行程不超过10毫米。
-④同一断面上两轨道的标高相对偏差不超过5毫米。
-4)轨道接头应符合下列要求:
-①接头左、右、上三面错位不应大于2毫米。
-②两平行轨道接口的位置应错开,其错开距离不应等于架桥机运梁平车前后车
-轮的轮距。
-第19页
-四川公路桥梁建设集团有限公司 镇广C4项目经理部JQ220t-40m架桥机安装及拆除专项施工方案
-③接头间隙应为1~2毫米,伸缩缝接头间隙应符合设计要求,其偏差不应大于
-±1毫米。
-④轨道的悬空部位是否得到了加强。
-5)施工期间的基本要求及安全规定
-①架桥机安装属于高空作业,施工前技术员及现场指挥员向参加施工的所有人
-员详细介绍安装工序、技术要求和指挥信号。
-②严禁在施工的架桥机下面逗留通过,与安装施工无关人员不准擅自进入施工
-现场。
-③现场施工人员必须按有关安全规定佩戴好安全用品(安全带、安全帽、绝缘
-鞋等)。
-④施工现场使用的氧气瓶、乙炔瓶必须保证立放并固定好,两瓶之间距离不得
-小于5m。
-6)架桥机组装
-①安装原则:按从下至上的顺序进行安装,且必须在下部结构安装固定牢固以
-后,才能进行上部结构的安装。同时,在安装过程中,在运输及吊装能力允许的情
-况下,尽量保持设备各部件的整体性,一方面能有效减少危险作业点,另一方面有
-利于提高设备现场安装工效。
-②安装场地:架桥拼装在桥台台背后路基上,拼装场地选址位于路基上,拼装
-场地面积宽15m,长 100m,架桥机主体最大宽度为9m(移动轨道 18m),路基宽度
-满足安装场地所需,路基及便道顶面承载力满足接卸拼装所需的承载要求。
-③辅助机械设备:根据施工现场实际情况,选择1台25T汽车吊作为主要安装
-设备,架桥机主梁拼装采用分节进行吊装,整机重量最大的组件为三节主梁同时起
-吊,总重18t,采用2 台50t汽车吊满足吊装要求。"""
-
-    print(f"输入内容: {review_content}")
-    print(f"内容长度: {len(review_content)}")
-
-    try:
-        start_time = time.time()
-        result = query_rewrite_manager.query_extract(review_content)
-        end_time = time.time()
-        elapsed_time = end_time - start_time
-
-        print(f"\n[OK] 提取完成,耗时: {elapsed_time:.2f}秒")
-        print(f"返回结果类型: {type(result)}")
-        print(f"返回结果长度: {len(str(result))}")
-        print(f"返回结果: {result}")
-
-        if result:
-            print("[OK] 成功提取到查询信息")
-
-            # 尝试解析JSON响应
-            try:
-                import json
-                if isinstance(result, str):
-                    # 提取JSON部分
-                    if '```json' in result:
-                        start = result.find('```json') + 7
-                        end = result.find('```', start)
-                        if end != -1:
-                            json_str = result[start:end].strip()
-                        else:
-                            json_str = result[start:].strip()
-                    else:
-                        json_str = result.strip()
-
-                    parsed_result = json.loads(json_str)
-                    print(f"[OK] JSON解析成功,提取到 {len(parsed_result)} 个实体")
-
-                    for i, entity in enumerate(parsed_result):
-                        entity_name = entity.get('entity', 'N/A')
-                        keywords = entity.get('search_keywords', [])
-                        background = entity.get('background', 'N/A')
-                        parameter = entity.get('parameter', 'N/A')
-
-                        print(f"  实体{i+1}: {entity_name}")
-                        print(f"    关键词: {keywords}")
-                        print(f"    背景: {background}")
-                        print(f"    参数: {parameter}")
-                        print()
-            except Exception as e:
-                print(f"[WARN] JSON解析失败: {e}")
-        else:
-            print("[ERROR] 未返回有效结果")
-
-    except Exception as e:
-        print(f"[ERROR] 测试失败: {str(e)}")
-        logger.error(f"测试失败: {str(e)}")
-        import traceback
-        traceback.print_exc()
-
-
-def main():
-    """
-    主测试函数
-    """
-    print("开始集成测试 QueryRewriteManager.query_extract 方法")
-
-    # 执行核心测试
-    test_query_extract_with_construction_content()
-
-    print("\n集成测试完成")
-
-
-if __name__ == "__main__":
-    main()

+ 0 - 87
utils_test/RAG_Test/test_rag.py

@@ -1,87 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-"""
-测试多阶段召回功能
-"""
-
-import sys
-import os
-import time
-
-from foundation.ai.rag.retrieval.retrieval import retrieval_manager
-from foundation.observability.logger.loggering import review_logger as logger
-
-
-def test_multi_stage_recall(collection_name,query):
-    """
-    测试多阶段召回
-    """
-
-    try:
-        start_time = time.time()
-        results = retrieval_manager.multi_stage_recall(
-            collection_name=collection_name,
-            query_text=query,
-            hybrid_top_k=10,
-            top_k=5,
-        )
-        logger.info(f"返回结果:{results}")
-        end_time = time.time()
-        elapsed_time = end_time - start_time
-
-        print(f"[OK] 召回完成,耗时: {elapsed_time:.2f}秒")
-        print(f"[OK] 返回结果数量: {len(results)}")
-
-
-    except Exception as e:
-        print(f"[ERROR] 多阶段召回测试失败: {str(e)}")
-
-def test_hybrid_search_recall(collection_name,query):
-    """
-    测试混合召回
-    """
-    try:
-        start_time = time.time()
-        results = retrieval_manager.hybrid_search_recall(
-            collection_name=collection_name,
-            query_text=query,
-            top_k=1,
-            ranker_type="weighted",
-            dense_weight=0.7,
-            sparse_weight=0.3
-        )
-        logger.info(f"返回结果:{results}")
-        end_time = time.time()
-        elapsed_time = end_time - start_time
-        print(f"[OK] 召回完成,耗时: {elapsed_time:.2f}秒")
-        print(f"[OK] 召回结果数量: {len(results)}")
-        return results
-    
-    except Exception as e:
-        print(f"[ERROR] 混合召回测试失败: {str(e)}")
-
-
-
-
-def main():
-    """
-    主测试函数
-    """
-    collection_name = "first_bfp_collection_test"
-    query = "起重小车轨道,起重量小于 320t的分段拼接桁架梁每段梁上小车轨道不允许有接缝(允许焊为一体),拼接 处高低差≤2mm、间隙≤4mm、侧向错位≤2mm,非焊接连接轨道端部加挡铁,其他梁轨道接头高低差≤1mm、间隙≤2mm、侧向错位≤1mm,正轨箱形梁及半偏轨箱形梁轨道接缝应放 在筋板上允差≤15mm,两端最短轨道长度≥1.5m且端部加挡"
-    
-    # 测试多路召回
-    logger.info("开始测试多路召回...")
-    test_multi_stage_recall(collection_name,query=query)
-
-    # # 测试混合召回
-    # logger.info("开始测试混合召回...")
-    # test_hybrid_search_recall(collection_name="first_bfp_collection_entity",query=query)
-
-
-
-
-
-if __name__ == "__main__":
-    main()

+ 0 - 57
utils_test/RAG_Test/test_rag_enhanced_check.py

@@ -1,57 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-"""
-测试AI审查引擎的RAG增强检查功能
-"""
-
-import sys
-import os
-import asyncio
-
-
-from core.base.task_models import TaskFileInfo
-from core.construction_review.component.ai_review_engine import AIReviewEngine
-from foundation.observability.logger.loggering import review_logger as logger
-
-# 构建测试用的 TaskFileInfo
-test_file_info = {
-    'file_id': 'test_file_001',
-    'user_id': 'test_user',
-    'callback_task_id': 'test_task_001',
-    'file_name': 'test_document.pdf',
-    'file_type': 'pdf',
-    'review_config': ['non_parameter_compliance_check', 'parameter_compliance_check'],
-    'project_plan_type': '桥梁施工方案',
-    'tendency_review_role': 'reviewer',
-    'launched_at': 0
-}
-
-# 创建 TaskFileInfo 实例
-task_file_info = TaskFileInfo(test_file_info)
-
-# 实例化AIReviewEngine (传入 task_file_info)
-review_engine = AIReviewEngine(task_file_info=task_file_info)
-
-# 记录开始时间
-import time
-start_time = time.time()
-query = """ 主要部件说明
-1、主梁总成
-主梁总成由主梁和导梁构成。主梁单节长12m,共7节,每节重10.87t,主梁为主要承载受力构件,其上弦杆上方设有轨道供纵移桁车走行,实现预制梁的纵向移动;下弦设有反滚轮行走轨道,作为导梁纵移、前中支腿移动纵行轨道。导梁长18m,主要是为降低过孔挠度和承受中支腿移动荷载,起安全引导、辅助过孔作用。主梁、导梁为三角桁架构件单元,采用销轴连接,前、后端各设置横联构架。
-
-图4-1 主梁总成图
-注意事项:
-(1)更换上、下弦销轴时,应优先向设备供应方购买符合要求的备件。自行更换时,材料性能必须优于设计零件性能,并按规定进行热处理,否则可能造成人员、设备事故。
-(2)销轴不得弯曲受力,不得用销轴作为锤砸工具,不得任意放置及焊接"""
-unit_content= {
-    "content" : query,
-}
-result = review_engine.rag_enhanced_check(unit_content)
-
-
-print(result)
-
-end_time = time.time()
-elapsed_time = end_time - start_time
-print(f"\nRAG增强检查完成,耗时: {elapsed_time:.2f}秒")

+ 0 - 338
utils_test/RAG_Test/test_rag_monitor.py

@@ -1,338 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-"""
-RAG监控装饰器使用示例和测试脚本
-展示如何使用 rag_monitor 装饰器监控RAG链路
-"""
-
-import sys
-import os
-import time
-import json
-import asyncio
-from pathlib import Path
-
-project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-from foundation.observability.monitoring.rag import rag_monitor
-from foundation.observability.logger.loggering import review_logger as logger
-
-
-# ========== 示例1: 同步函数监控 ==========
-
-@rag_monitor.monitor_step(
-    step_name="example_sync_query_extract",
-    capture_input=True,
-    capture_output=True
-)
-def example_query_extract(content: str):
-    """示例:查询提取函数"""
-    logger.info(f"正在提取查询,内容长度: {len(content)}")
-    time.sleep(0.5)  # 模拟处理时间
-
-    # 模拟提取结果
-    return [
-        {"query": "安全生产条件", "entity": "安全"},
-        {"query": "施工管理制度", "entity": "施工"}
-    ]
-
-
-@rag_monitor.monitor_step(
-    step_name="example_sync_vector_search",
-    capture_input=True,
-    capture_output=True,
-    output_transform=lambda x: {  # 只保留关键信息
-        "results_count": len(x),
-        "has_results": bool(x)
-    }
-)
-def example_vector_search(query_pairs: list):
-    """示例:向量检索函数"""
-    logger.info(f"正在进行向量检索,查询对数量: {len(query_pairs)}")
-    time.sleep(1.0)  # 模拟检索时间
-
-    # 模拟检索结果
-    results = []
-    for pair in query_pairs:
-        results.append({
-            "query": pair["query"],
-            "doc_id": f"doc_{hash(pair['query']) % 100}",
-            "score": 0.85,
-            "content": f"这是关于{pair['query']}的内容..."
-        })
-    return results
-
-
-# ========== 示例2: 异步函数监控 ==========
-
-@rag_monitor.monitor_step(
-    step_name="example_async_rerank",
-    capture_input=True,
-    capture_output=True,
-    input_transform=lambda x: {  # 只记录输入统计信息
-        "results_count": len(x["args"][0]) if x["args"] else 0
-    }
-)
-async def example_async_rerank(results: list):
-    """示例:异步重排序函数"""
-    logger.info(f"正在进行重排序,结果数量: {len(results)}")
-    await asyncio.sleep(0.8)  # 模拟异步处理
-
-    # 模拟重排序
-    sorted_results = sorted(results, key=lambda x: x["score"], reverse=True)
-    return sorted_results[:5]  # 只返回前5个
-
-
-@rag_monitor.monitor_step(
-    step_name="example_async_parent_enhance",
-    capture_input=True,
-    capture_output=True
-)
-async def example_async_parent_enhance(results: list):
-    """示例:异步父文档增强函数"""
-    logger.info(f"正在进行父文档增强,结果数量: {len(results)}")
-    await asyncio.sleep(1.2)  # 模拟异步处理
-
-    # 模拟父文档增强
-    enhanced = []
-    for res in results:
-        enhanced.append({
-            **res,
-            "parent_content": f"父文档内容: {res['content']}的完整上下文...",
-            "enhanced": True
-        })
-    return enhanced
-
-
-# ========== 示例3: 完整的RAG链路测试 ==========
-
-def test_sync_rag_pipeline():
-    """测试同步RAG链路"""
-    print("\n" + "="*60)
-    print("示例1: 同步RAG链路监控")
-    print("="*60)
-
-    # 开始追踪会话
-    trace_id = f"test_sync_{int(time.time() * 1000)}"
-    rag_monitor.start_trace(trace_id, metadata={
-        "test_type": "sync",
-        "description": "同步RAG链路测试"
-    })
-
-    try:
-        # Step 1: 查询提取
-        query_content = "请检查施工方案中的安全生产条件和施工管理制度是否符合规范要求。"
-        query_pairs = example_query_extract(query_content)
-        print(f"✅ 查询提取完成,提取到 {len(query_pairs)} 个查询对")
-
-        # Step 2: 向量检索
-        search_results = example_vector_search(query_pairs)
-        print(f"✅ 向量检索完成,找到 {len(search_results)} 个结果")
-
-        print(f"\n✅ 同步RAG链路测试完成")
-
-    finally:
-        # 结束追踪并保存
-        trace_data = rag_monitor.end_trace(trace_id)
-        print(f"\n📊 追踪数据已保存: temp/rag_monitoring/{trace_id}.json")
-        print(f"⏱️  总耗时: {trace_data['total_duration']}秒")
-        print(f"📝 步骤数量: {len(trace_data['steps'])}")
-
-
-async def test_async_rag_pipeline():
-    """测试异步RAG链路"""
-    print("\n" + "="*60)
-    print("示例2: 异步RAG链路监控")
-    print("="*60)
-
-    # 开始追踪会话
-    trace_id = f"test_async_{int(time.time() * 1000)}"
-    rag_monitor.start_trace(trace_id, metadata={
-        "test_type": "async",
-        "description": "异步RAG链路测试"
-    })
-
-    try:
-        # 模拟一些初始数据
-        initial_results = [
-            {"query": "安全", "doc_id": "doc_1", "score": 0.82, "content": "安全内容..."},
-            {"query": "施工", "doc_id": "doc_2", "score": 0.91, "content": "施工内容..."},
-            {"query": "管理", "doc_id": "doc_3", "score": 0.75, "content": "管理内容..."}
-        ]
-
-        # Step 1: 异步重排序
-        reranked_results = await example_async_rerank(initial_results)
-        print(f"✅ 重排序完成,保留前 {len(reranked_results)} 个结果")
-
-        # Step 2: 异步父文档增强
-        enhanced_results = await example_async_parent_enhance(reranked_results)
-        print(f"✅ 父文档增强完成,增强了 {len(enhanced_results)} 个结果")
-
-        print(f"\n✅ 异步RAG链路测试完成")
-
-    finally:
-        # 结束追踪并保存
-        trace_data = rag_monitor.end_trace(trace_id)
-        print(f"\n📊 追踪数据已保存: temp/rag_monitoring/{trace_id}.json")
-        print(f"⏱️  总耗时: {trace_data['total_duration']}秒")
-        print(f"📝 步骤数量: {len(trace_data['steps'])}")
-
-
-def test_mixed_rag_pipeline():
-    """测试混合(同步+异步)RAG链路"""
-    print("\n" + "="*60)
-    print("示例3: 混合RAG链路监控(同步+异步)")
-    print("="*60)
-
-    # 开始追踪会话
-    trace_id = f"test_mixed_{int(time.time() * 1000)}"
-    rag_monitor.start_trace(trace_id, metadata={
-        "test_type": "mixed",
-        "description": "混合RAG链路测试"
-    })
-
-    try:
-        # Step 1: 同步查询提取
-        query_content = "检查项目的环境保护措施和质量管理体系。"
-        query_pairs = example_query_extract(query_content)
-        print(f"✅ [同步] 查询提取完成")
-
-        # Step 2: 同步向量检索
-        search_results = example_vector_search(query_pairs)
-        print(f"✅ [同步] 向量检索完成")
-
-        # Step 3: 异步重排序
-        async def async_part():
-            reranked = await example_async_rerank(search_results)
-            print(f"✅ [异步] 重排序完成")
-
-            # Step 4: 异步父文档增强
-            enhanced = await example_async_parent_enhance(reranked)
-            print(f"✅ [异步] 父文档增强完成")
-            return enhanced
-
-        # 运行异步部分
-        final_results = asyncio.run(async_part())
-
-        print(f"\n✅ 混合RAG链路测试完成,最终得到 {len(final_results)} 个结果")
-
-    finally:
-        # 结束追踪并保存
-        trace_data = rag_monitor.end_trace(trace_id)
-        print(f"\n📊 追踪数据已保存: temp/rag_monitoring/{trace_id}.json")
-        print(f"⏱️  总耗时: {trace_data['total_duration']}秒")
-        print(f"📝 步骤数量: {len(trace_data['steps'])}")
-
-
-# ========== 示例4: 自定义输入输出转换 ==========
-
-@rag_monitor.monitor_step(
-    step_name="example_sensitive_data",
-    capture_input=True,
-    capture_output=True,
-    input_transform=lambda x: {
-        # 过滤敏感信息,只保留统计数据
-        "user_id": "***",  # 隐藏用户ID
-        "data_length": len(str(x))
-    },
-    output_transform=lambda x: {
-        # 只保留关键指标
-        "success": x.get("success"),
-        "count": x.get("count")
-    }
-)
-def example_process_sensitive_data(user_id: str, data: dict):
-    """示例:处理敏感数据(自定义转换)"""
-    time.sleep(0.3)
-    return {
-        "success": True,
-        "user_id": user_id,
-        "count": len(data),
-        "details": data  # 这些详细信息不会被记录
-    }
-
-
-def test_custom_transform():
-    """测试自定义输入输出转换"""
-    print("\n" + "="*60)
-    print("示例4: 自定义输入输出转换(敏感数据保护)")
-    print("="*60)
-
-    trace_id = f"test_transform_{int(time.time() * 1000)}"
-    rag_monitor.start_trace(trace_id, metadata={
-        "test_type": "custom_transform"
-    })
-
-    try:
-        result = example_process_sensitive_data(
-            user_id="user_12345",
-            data={"key1": "value1", "key2": "value2"}
-        )
-        print(f"✅ 处理完成,成功: {result['success']}")
-        print(f"ℹ️  敏感信息已被过滤,只记录统计数据")
-
-    finally:
-        trace_data = rag_monitor.end_trace(trace_id)
-        print(f"\n📊 追踪数据已保存: temp/rag_monitoring/{trace_id}.json")
-
-
-# ========== 查看监控结果 ==========
-
-def view_trace_result(trace_id: str):
-    """查看追踪结果"""
-    file_path = Path("temp/rag_monitoring") / f"{trace_id}.json"
-
-    if file_path.exists():
-        print(f"\n📄 追踪结果: {trace_id}")
-        print("="*60)
-
-        with open(file_path, 'r', encoding='utf-8') as f:
-            data = json.load(f)
-
-        print(f"⏱️  总耗时: {data.get('total_duration')}秒")
-        print(f"📝 步骤数量: {len(data.get('steps', {}))}")
-        print(f"\n步骤详情:")
-
-        for step_name, step_data in data.get('steps', {}).items():
-            print(f"\n  [{step_data.get('status', 'unknown').upper()}] {step_name}")
-            print(f"    函数: {step_data.get('function_name')}")
-            print(f"    耗时: {step_data.get('duration')}秒")
-
-            if step_data.get('status') == 'error':
-                print(f"    ❌ 错误: {step_data.get('error', {}).get('message')}")
-    else:
-        print(f"❌ 找不到追踪文件: {file_path}")
-
-
-# ========== 主函数 ==========
-
-def main():
-    """运行所有测试示例"""
-    print("\n" + "🚀 RAG监控装饰器测试 🚀".center(60, "="))
-
-    try:
-        # 示例1: 同步RAG链路
-        test_sync_rag_pipeline()
-
-        # 示例2: 异步RAG链路
-        asyncio.run(test_async_rag_pipeline())
-
-        # 示例3: 混合RAG链路
-        test_mixed_rag_pipeline()
-
-        # 示例4: 自定义转换
-        test_custom_transform()
-
-        print("\n" + "✅ 所有测试完成!".center(60, "="))
-        print(f"\n💡 提示: 查看监控数据文件在: temp/rag_monitoring/")
-        print(f"💡 提示: 每个trace_id对应一个JSON文件,包含完整的执行链路信息")
-
-    except Exception as e:
-        print(f"\n❌ 测试失败: {e}")
-        import traceback
-        traceback.print_exc()
-
-
-if __name__ == "__main__":
-    main()

+ 0 - 647
utils_test/RAG_Test/test_rag_pipeline.py

@@ -1,647 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-"""
-RAG链路独立测试工具
-用于快速调通和验证参数合规性检查的RAG检索+LLM审查功能
-
-核心功能:
-1. rag_enhanced_check() - 完整的RAG检索逻辑
-2. check_parameter_compliance() - 参数合规性检查(与原链路完全一致)
-
-使用方法:
-    python test_rag_pipeline.py
-"""
-
-import sys
-import os
-import json
-import time
-import asyncio
-from typing import Dict, Any
-
-from core.construction_review.component.infrastructure.milvus import MilvusConfig, MilvusManager
-from core.construction_review.component.infrastructure.parent_tool import (
-    enhance_with_parent_docs,
-    extract_first_result
-)
-from core.construction_review.component.reviewers.base_reviewer import BaseReviewer, ReviewResult
-from foundation.ai.rag.retrieval.entities_enhance import entity_enhance
-from foundation.ai.rag.retrieval.query_rewrite import query_rewrite_manager
-from foundation.ai.agent.generate.model_generate import generate_model_client
-from core.construction_review.component.reviewers.utils.prompt_loader import prompt_loader
-from foundation.observability.logger.loggering import review_logger as logger
-
-
-# ============================================================================
-# 简化的BaseReviewer类 - 用于调用LLM审查
-# ============================================================================
-
-class SimpleReviewer(BaseReviewer):
-    """
-    简化的审查器 - 继承BaseReviewer,用于调用LLM审查
-    """
-
-    def __init__(self):
-        """初始化简化的审查器"""
-        super().__init__()
-        self.model_client = generate_model_client
-        self.prompt_loader = prompt_loader
-
-
-# 全局审查器实例
-simple_reviewer = SimpleReviewer()
-
-
-# ============================================================================
-# 核心RAG链路函数
-# ============================================================================
-
-def rag_enhanced_check(milvus_manager, unit_content: dict) -> dict:
-    """
-    RAG增强检查 - 完整链路
-
-    流程:
-    1. 查询提取 (query_rewrite_manager.query_extract)
-    2. 实体增强检索 (entity_enhance.entities_enhance_retrieval)
-    3. 父文档增强 (enhance_with_parent_docs)
-    4. 提取第一个结果 (extract_first_result)
-
-    Args:
-        milvus_manager: MilvusManager实例
-        unit_content: 包含content字段的字典,格式: {"content": "待检索的文本内容"}
-
-    Returns:
-        dict: RAG检索结果,包含:
-            - vector_search: 向量检索结果列表
-            - retrieval_status: 检索状态
-            - file_name: 参考文件名
-            - text_content: 参考文本内容
-            - metadata: 元数据信息
-    """
-    # 创建数据流跟踪字典
-    pipeline_data = {
-        "stage": "rag_enhanced_check",
-        "timestamp": time.time(),
-        "steps": {}
-    }
-
-    query_content = unit_content['content']
-    logger.info(f"[RAG增强] 开始处理, 内容长度: {len(query_content)}")
-
-    # Step 1: 查询提取
-    logger.info("=" * 80)
-    logger.info("Step 1: 查询提取")
-    logger.info("=" * 80)
-
-    logger.info(f"开始查询提取, 输入内容长度: {len(query_content)}")
-    logger.info(f"输入内容预览: {query_content[:200]}...")
-
-    # 执行查询提取
-    query_pairs = query_rewrite_manager.query_extract(query_content)
-
-    logger.info(f"[RAG增强] 提取到 {len(query_pairs)} 个查询对")
-
-    # 打印查询对详情
-    for idx, query_pair in enumerate(query_pairs):
-        logger.info(f"  查询对 {idx+1}: {query_pair}")
-
-    # 保存Step 1的输入输出
-    pipeline_data["steps"]["1_query_extract"] = {
-        "input": {
-            "content_length": len(query_content),
-            "content_full": query_content,
-            "content_preview": query_content[:200]
-        },
-        "output": {
-            "query_pairs_count": len(query_pairs),
-            "query_pairs": [str(qp) for qp in query_pairs],  # 转为字符串列表便于查看
-            "extraction_timestamp": time.time()
-        }
-    }
-
-    # Step 2: 实体增强检索
-    logger.info("=" * 80)
-    logger.info("Step 2: 实体增强检索")
-    logger.info("=" * 80)
-
-    logger.info(f"开始实体增强检索, 输入查询对数量: {len(query_pairs)}")
-
-    # 保存输入
-    entity_enhance_input = {
-        "query_pairs": [str(qp) for qp in query_pairs],
-        "query_pairs_count": len(query_pairs)
-    }
-
-    # 详细记录每个查询对的处理过程
-    entity_enhance_process_details = []
-
-    # 手动展开实体增强检索的每个步骤,便于记录数据流
-    import asyncio
-
-    def run_async(coro):
-        """在合适的环境中运行异步函数"""
-        try:
-            loop = asyncio.get_running_loop()
-            import concurrent.futures
-            with concurrent.futures.ThreadPoolExecutor() as executor:
-                future = executor.submit(asyncio.run, coro)
-                return future.result()
-        except RuntimeError:
-            return asyncio.run(coro)
-
-    # 导入retrieval_manager
-    from foundation.ai.rag.retrieval.retrieval import retrieval_manager
-
-    bfp_result_lists = []
-
-    # 遍历每个查询对进行处理
-    for idx, query_pair in enumerate(query_pairs):
-        logger.info(f"\n{'='*60}")
-        logger.info(f"处理查询对 {idx+1}/{len(query_pairs)}")
-        logger.info(f"{'='*60}")
-
-        # 提取查询对的各个字段
-        entity = query_pair['entity']
-        search_keywords = query_pair['search_keywords']
-        background = query_pair['background']
-
-        logger.info(f"  实体(entity): {entity}")
-        logger.info(f"  搜索关键词(search_keywords): {search_keywords}")
-        logger.info(f"  背景(background): {background}")
-
-        # 记录当前查询对的输入
-        current_query_detail = {
-            "index": idx + 1,
-            "input": {
-                "entity": entity,
-                "search_keywords": search_keywords,
-                "background": background
-            },
-            "steps": {}
-        }
-
-        # Step 2.1: 实体召回 (entity_recall)
-        logger.info(f"  Step 2.1: 实体召回 (recall_top_k=5, max_results=5)")
-
-        entity_list = run_async(retrieval_manager.entity_recall(
-            entity,
-            search_keywords,
-            recall_top_k=5,
-            max_results=5
-        ))
-
-        logger.info(f"  ✅ 实体召回完成, 召回实体数量: {len(entity_list) if entity_list else 0}")
-
-        # 记录实体召回结果
-        current_query_detail["steps"]["2_1_entity_recall"] = {
-            "input": {
-                "entity": entity,
-                "search_keywords": search_keywords,
-                "recall_top_k": 5,
-                "max_results": 5
-            },
-            "output": {
-                "entity_list": entity_list,
-                "entity_count": len(entity_list) if entity_list else 0
-            }
-        }
-
-        # Step 2.2: BFP召回 (async_bfp_recall)
-        logger.info(f"  Step 2.2: BFP召回 (top_k=3)")
-
-        bfp_result = run_async(retrieval_manager.async_bfp_recall(
-            entity_list,
-            background,
-            top_k=3
-        ))
-
-        logger.info(f"  ✅ BFP召回完成, BFP结果数量: {len(bfp_result) if bfp_result else 0}")
-        logger.info(f"  bfp_result: {bfp_result}")
-
-        # 记录BFP召回结果
-        current_query_detail["steps"]["2_2_bfp_recall"] = {
-            "input": {
-                "entity_list": entity_list,
-                "background": background,
-                "top_k": 3
-            },
-            "output": {
-                "bfp_result": bfp_result,
-                "bfp_result_count": len(bfp_result) if bfp_result else 0
-            }
-        }
-
-        bfp_result_lists.append(bfp_result)
-
-        entity_enhance_process_details.append(current_query_detail)
-
-        logger.info(f"✅ 查询对 {idx+1} 处理完成")
-
-    logger.info(f"\n{'='*80}")
-    logger.info(f"实体增强检索全部完成")
-    logger.info(f"总查询对数: {len(query_pairs)}")
-    logger.info(f"总BFP结果数: {len(bfp_result_lists)}")
-    logger.info(f"{'='*80}")
-
-    # 保存Step 2的详细输出
-    pipeline_data["steps"]["2_entity_enhance_retrieval"] = {
-        "input": entity_enhance_input,
-        "output": {
-            "results_count": len(bfp_result_lists),
-            "results": bfp_result_lists,
-            "process_details": entity_enhance_process_details  # 每个查询对的详细处理过程
-        },
-        "timestamp": time.time()
-    }
-
-    # Step 3: 检查检索结果
-    if not bfp_result_lists:
-        logger.warning("[RAG增强] 实体检索未返回结果")
-
-        # 保存最终数据流
-        os.makedirs(r"temp\entity_bfp_recall", exist_ok=True)
-        with open(rf"temp\entity_bfp_recall\rag_pipeline_data.json", "w", encoding='utf-8') as f:
-            json.dump(pipeline_data, f, ensure_ascii=False, indent=4)
-
-        return {
-            'vector_search': [],
-            'retrieval_status': 'no_results',
-            'file_name': '',
-            'text_content': '',
-            'metadata': {}
-        }
-
-    logger.info(f"[RAG增强] 实体检索返回 {len(bfp_result_lists)} 个结果")
-
-    # Step 4: 父文档增强 (使用独立工具函数)
-    logger.info("=" * 80)
-    logger.info("Step 3: 父文档增强")
-    logger.info("=" * 80)
-
-    try:
-        enhancement_result = enhance_with_parent_docs(milvus_manager, bfp_result_lists)
-        enhanced_results = enhancement_result['enhanced_results']
-        enhanced_count = enhancement_result['enhanced_count']
-        parent_docs = enhancement_result['parent_docs']
-
-        # 保存Step 3输出
-        pipeline_data["steps"]["3_parent_doc_enhancement"] = {
-            "input": {
-                "bfp_results_count": len(bfp_result_lists)
-            },
-            "output": {
-                "enhanced_count": enhanced_count,
-                "parent_docs_count": len(parent_docs),
-                "parent_docs": parent_docs,
-                "enhanced_results": enhanced_results
-            }
-        }
-
-        # 保存增强后的结果
-        os.makedirs(r"temp\entity_bfp_recall", exist_ok=True)
-        with open(rf"temp\entity_bfp_recall\enhance_with_parent_docs.json", "w", encoding='utf-8') as f:
-            json.dump(enhanced_results, f, ensure_ascii=False, indent=4)
-
-        logger.info(f"[RAG增强] 成功增强 {enhanced_count} 个结果")
-        logger.info(f"[RAG增强] 使用了 {len(parent_docs)} 个父文档")
-
-        # 打印父文档信息
-        for idx, parent_doc in enumerate(parent_docs):
-            logger.info(f"  父文档 {idx+1}: {parent_doc.get('file_name', 'unknown')}")
-
-    except Exception as e:
-        logger.error(f"[RAG增强] 父文档增强失败: {e}", exc_info=True)
-
-        # 保存错误信息
-        pipeline_data["steps"]["3_parent_doc_enhancement"] = {
-            "input": {
-                "bfp_results_count": len(bfp_result_lists)
-            },
-            "output": {
-                "error": str(e),
-                "error_type": type(e).__name__
-            }
-        }
-
-        # 失败时使用原始结果
-        enhanced_results = bfp_result_lists
-        parent_docs = []
-
-    # Step 5: 提取第一个结果返回 (使用增强后的结果)
-    logger.info("=" * 80)
-    logger.info("Step 4: 提取第一个结果")
-    logger.info("=" * 80)
-
-    final_result = extract_first_result(enhanced_results)
-
-    # 保存Step 4输出
-    pipeline_data["steps"]["4_extract_first_result"] = {
-        "input": {
-            "enhanced_results_count": len(enhanced_results)
-        },
-        "output": {
-            "final_result": final_result
-        }
-    }
-
-    # 保存最终结果用于调试
-    with open(rf"temp\entity_bfp_recall\extract_first_result.json", "w", encoding='utf-8') as f:
-        json.dump(final_result, f, ensure_ascii=False, indent=4)
-
-    # 保存完整数据流
-    pipeline_data["final_result"] = final_result
-    os.makedirs(r"temp\entity_bfp_recall", exist_ok=True)
-    with open(rf"temp\entity_bfp_recall\rag_pipeline_data.json", "w", encoding='utf-8') as f:
-        json.dump(pipeline_data, f, ensure_ascii=False, indent=4)
-
-    logger.info(f"[RAG增强] 最终提取结果文件名: {final_result.get('file_name', '无')}")
-    logger.info(f"[RAG增强] 最终提取结果内容长度: {len(final_result.get('text_content', ''))}")
-    logger.info(f"[RAG增强] 完整数据流已保存到: temp/entity_bfp_recall/rag_pipeline_data.json")
-
-    return final_result
-
-
-# ============================================================================
-# 参数合规性检查函数 (与原链路完全一致)
-# ============================================================================
-
-async def check_parameter_compliance(trace_id_idx: str, review_content: str, review_references: str,
-                                    reference_source: str, review_location_label: str, state: str, stage_name: str) -> Dict[str, Any]:
-    """
-    参数合规性检查 - 实体概念/工程术语知识库
-    (与原链路完全一致的方法签名和实现)
-
-    Args:
-        trace_id_idx: 追踪ID索引
-        review_content: 审查内容
-        review_references: 审查参考信息
-        reference_source: 参考来源
-        review_location_label: 审查位置标签
-        state: 状态字典
-        stage_name: 阶段名称
-
-    Returns:
-        Dict[str, Any]: 参数合规性检查结果
-    """
-    # 从原链路导入Stage枚举
-    from core.construction_review.component.ai_review_engine import Stage
-
-    reviewer_type = Stage.TECHNICAL.value['reviewer_type']
-    prompt_name = Stage.TECHNICAL.value['parameter']
-    trace_id = prompt_name + trace_id_idx
-
-    # 直接调用原链路的review方法
-    return await simple_reviewer.review("parameter_compliance_check", trace_id, reviewer_type, prompt_name, review_content, review_references,
-                                       reference_source, review_location_label, state, stage_name, timeout=45)
-
-
-# ============================================================================
-# 主测试函数
-# ============================================================================
-
-async def main():
-    """
-    主测试函数 - 测试参数合规性检查的完整流程
-
-    流程:
-    1. 初始化Milvus Manager
-    2. 准备测试内容
-    3. 调用RAG获取参考信息
-    4. 调用参数合规性检查(与原链路一致)
-    5. 保存完整数据流
-    """
-    print("\n" + "=" * 80)
-    print("RAG链路独立测试工具 - 参数合规性检查".center(80))
-    print("=" * 80 + "\n")
-
-    # 初始化Milvus Manager
-    print("📌 初始化Milvus Manager...")
-    logger.info("初始化Milvus Manager...")
-    try:
-        milvus_manager = MilvusManager(MilvusConfig())
-        print("✅ Milvus Manager 初始化成功\n")
-    except Exception as e:
-        print(f"❌ Milvus Manager 初始化失败: {e}")
-        logger.error(f"Milvus Manager 初始化失败: {e}", exc_info=True)
-        return
-
-    # 测试内容
-    test_content = """主要部件说明
-1、主梁总成
-主梁总成由主梁和导梁构成。主梁单节长12m,共7节,每节重10.87t,主梁为主要承载受力构件,其上弦杆上方设有轨道供纵移桁车走行,实现预制梁的纵向移动;下弦设有反滚轮行走轨道,作为导梁纵移、前中支腿移动纵行轨道。导梁长18m,主要是为降低过孔挠度和承受中支腿移动荷载,起安全引导、辅助过孔作用。主梁、导梁为三角桁架构件单元,采用销轴连接,前、后端各设置横联构架。
-
-图4-1 主梁总成图
-注意事项:
-(1)更换上、下弦销轴时,应优先向设备供应方购买符合要求的备件。自行更换时,材料性能必须优于设计零件性能,并按规定进行热处理,否则可能造成人员、设备事故。
-(2)销轴不得弯曲受力,不得用销轴作为锤砸工具,不得任意放置及焊接"""
-
-    unit_content = {"content": test_content}
-
-    print(f"📝 测试内容长度: {len(test_content)} 字符")
-    print(f"📝 测试内容预览:\n{test_content[:200]}...\n")
-
-    # 创建数据流跟踪字典
-    pipeline_data = {
-        "stage": "parameter_compliance_check_full_pipeline",
-        "timestamp": time.time(),
-        "steps": {}
-    }
-
-    # Step 1: RAG增强检索
-    print("=" * 80)
-    print("【Step 1】RAG增强检索".center(80))
-    print("=" * 80)
-
-    logger.info("=" * 80)
-    logger.info("Step 1: RAG增强检索")
-    logger.info("=" * 80)
-
-    start_time = time.time()
-
-    rag_result = rag_enhanced_check(milvus_manager, unit_content)
-    review_references = rag_result.get('text_content', '')
-    reference_source = rag_result.get('file_name', '')
-
-    # 保存Step 1数据
-    pipeline_data["steps"]["1_rag_retrieval"] = {
-        "input": {
-            "unit_content": unit_content
-        },
-        "output": {
-            "rag_result": rag_result,
-            "review_references_length": len(review_references),
-            "reference_source": reference_source
-        },
-        "execution_time": time.time() - start_time
-    }
-
-    if not review_references:
-        logger.warning("RAG检索未返回参考信息,将继续使用空参考进行审查")
-        print("⚠️  RAG检索未返回参考信息\n")
-    else:
-        print(f"✅ RAG检索成功")
-        print(f"   参考来源: {reference_source}")
-        print(f"   参考内容长度: {len(review_references)} 字符\n")
-
-    # Step 2: 调用参数合规性检查 (使用原链路的方法)
-    print("=" * 80)
-    print("【Step 2】参数合规性检查 (LLM审查)".center(80))
-    print("=" * 80)
-
-    logger.info("=" * 80)
-    logger.info("Step 2: 参数合规性检查")
-    logger.info("=" * 80)
-
-    trace_id_idx = "_test_001"
-    review_location_label = "测试文档-第1章"
-    state = None
-    stage_name = "test_stage"
-
-    logger.info(f"开始调用参数合规性检查")
-    logger.info(f"  - trace_id_idx: {trace_id_idx}")
-    logger.info(f"  - review_content长度: {len(test_content)}")
-    logger.info(f"  - review_references长度: {len(review_references)}")
-    logger.info(f"  - reference_source: {reference_source}")
-
-    # 保存Step 2输入
-    pipeline_data["steps"]["2_parameter_compliance_check"] = {
-        "input": {
-            "trace_id_idx": trace_id_idx,
-            "review_content_length": len(test_content),
-            "review_content_preview": test_content[:200],
-            "review_references_length": len(review_references),
-            "review_references_preview": review_references[:200] if review_references else "",
-            "reference_source": reference_source,
-            "review_location_label": review_location_label,
-            "stage_name": stage_name
-        },
-        "output": {}
-    }
-
-    start_time = time.time()
-
-    try:
-        # 调用与原链路完全一致的方法
-        result = await check_parameter_compliance(
-            trace_id_idx=trace_id_idx,
-            review_content=test_content,
-            review_references=review_references,
-            reference_source=reference_source,
-            review_location_label=review_location_label,
-            state=state,
-            stage_name=stage_name
-        )
-
-        elapsed_time = time.time() - start_time
-
-        # 保存Step 2输出
-        pipeline_data["steps"]["2_parameter_compliance_check"]["output"] = {
-            "success": result.success,
-            "execution_time": result.execution_time,
-            "error_message": result.error_message,
-            "details": result.details
-        }
-
-        # 保存完整数据流
-        pipeline_data["final_result"] = {
-            "success": result.success,
-            "execution_time": result.execution_time,
-            "error_message": result.error_message,
-            "details": result.details
-        }
-
-        os.makedirs(r"temp\entity_bfp_recall", exist_ok=True)
-        with open(rf"temp\entity_bfp_recall\parameter_compliance_full_pipeline.json", "w", encoding='utf-8') as f:
-            json.dump(pipeline_data, f, ensure_ascii=False, indent=4)
-
-        logger.info(f"✅ 参数合规性检查完成, 总耗时: {elapsed_time:.2f}秒")
-        logger.info(f"📁 完整数据流已保存到: temp/entity_bfp_recall/parameter_compliance_full_pipeline.json")
-
-    except Exception as e:
-        error_msg = f"参数合规性检查失败: {str(e)}"
-        logger.error(error_msg, exc_info=True)
-
-        # 保存错误信息
-        pipeline_data["steps"]["2_parameter_compliance_check"]["output"] = {
-            "error": error_msg,
-            "error_type": type(e).__name__,
-            "traceback": str(e)
-        }
-        pipeline_data["error"] = {
-            "error_message": error_msg,
-            "error_type": type(e).__name__
-        }
-
-        os.makedirs(r"temp\entity_bfp_recall", exist_ok=True)
-        with open(rf"temp\entity_bfp_recall\parameter_compliance_full_pipeline.json", "w", encoding='utf-8') as f:
-            json.dump(pipeline_data, f, ensure_ascii=False, indent=4)
-
-        print(f"❌ 参数合规性检查失败: {error_msg}\n")
-        return
-
-    # 输出测试结果
-    print("\n" + "=" * 80)
-    print("测试结果".center(80))
-    print("=" * 80)
-
-    status_icon = "✅" if result.success else "❌"
-    print(f"\n{status_icon} 参数合规性检查")
-    print(f"   执行时间: {result.execution_time:.2f}秒")
-
-    if result.success:
-        print(f"   审查成功!")
-        print(f"   详细信息: {result.details.get('name', 'N/A')}")
-
-        # 如果有RAG参考信息,打印出来
-        if 'rag_reference_source' in result.details:
-            print(f"\n   📚 RAG参考信息:")
-            print(f"      参考来源: {result.details['rag_reference_source']}")
-            print(f"      参考内容长度: {len(result.details.get('rag_review_references', ''))} 字符")
-
-        # 打印审查响应(截取前500字符)
-        response = result.details.get('response', '')
-        if response:
-            print(f"\n   📋 审查响应 (前500字符):")
-            print(f"      {response[:500]}...")
-    else:
-        print(f"   错误信息: {result.error_message}")
-
-    # 输出文件位置
-    print("\n" + "=" * 80)
-    print("详细结果已保存到:".center(80))
-    print("  📁 temp/entity_bfp_recall/rag_pipeline_data.json - RAG检索完整数据流")
-    print("  📁 temp/entity_bfp_recall/enhance_with_parent_docs.json - 父文档增强结果")
-    print("  📁 temp/entity_bfp_recall/extract_first_result.json - 最终提取结果")
-    print("  📁 temp/entity_bfp_recall/parameter_compliance_full_pipeline.json - 参数检查完整数据流")
-    print("=" * 80 + "\n")
-
-    print("✅ 测试完成!")
-
-    # 保存测试结果摘要
-    os.makedirs(r"temp\entity_bfp_recall", exist_ok=True)
-    test_summary = {
-        "test_type": "parameter_compliance",
-        "check_display_name": "参数合规性检查",
-        "timestamp": time.time(),
-        "result": {
-            'success': result.success,
-            'execution_time': result.execution_time,
-            'error_message': result.error_message,
-            'details_summary': {
-                'name': result.details.get('name'),
-                'has_rag_reference': 'rag_reference_source' in result.details,
-                'response_length': len(result.details.get('response', '')),
-                'response_preview': result.details.get('response', '')[:200]
-            }
-        }
-    }
-    with open(rf"temp\entity_bfp_recall\test_summary.json", "w", encoding='utf-8') as f:
-        json.dump(test_summary, f, ensure_ascii=False, indent=4)
-
-    return result
-
-
-if __name__ == "__main__":
-    # 运行异步主函数
-    asyncio.run(main())