test_rag_monitor.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. """
  4. RAG监控装饰器使用示例和测试脚本
  5. 展示如何使用 rag_monitor 装饰器监控RAG链路
  6. """
  7. import sys
  8. import os
  9. import time
  10. import json
  11. import asyncio
  12. from pathlib import Path
  13. # 添加项目根目录到路径
  14. project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  15. sys.path.insert(0, project_root)
  16. from foundation.observability.monitoring.rag import rag_monitor
  17. from foundation.observability.logger.loggering import review_logger as logger
  18. # ========== 示例1: 同步函数监控 ==========
  19. @rag_monitor.monitor_step(
  20. step_name="example_sync_query_extract",
  21. capture_input=True,
  22. capture_output=True
  23. )
  24. def example_query_extract(content: str):
  25. """示例:查询提取函数"""
  26. logger.info(f"正在提取查询,内容长度: {len(content)}")
  27. time.sleep(0.5) # 模拟处理时间
  28. # 模拟提取结果
  29. return [
  30. {"query": "安全生产条件", "entity": "安全"},
  31. {"query": "施工管理制度", "entity": "施工"}
  32. ]
  33. @rag_monitor.monitor_step(
  34. step_name="example_sync_vector_search",
  35. capture_input=True,
  36. capture_output=True,
  37. output_transform=lambda x: { # 只保留关键信息
  38. "results_count": len(x),
  39. "has_results": bool(x)
  40. }
  41. )
  42. def example_vector_search(query_pairs: list):
  43. """示例:向量检索函数"""
  44. logger.info(f"正在进行向量检索,查询对数量: {len(query_pairs)}")
  45. time.sleep(1.0) # 模拟检索时间
  46. # 模拟检索结果
  47. results = []
  48. for pair in query_pairs:
  49. results.append({
  50. "query": pair["query"],
  51. "doc_id": f"doc_{hash(pair['query']) % 100}",
  52. "score": 0.85,
  53. "content": f"这是关于{pair['query']}的内容..."
  54. })
  55. return results
  56. # ========== 示例2: 异步函数监控 ==========
  57. @rag_monitor.monitor_step(
  58. step_name="example_async_rerank",
  59. capture_input=True,
  60. capture_output=True,
  61. input_transform=lambda x: { # 只记录输入统计信息
  62. "results_count": len(x["args"][0]) if x["args"] else 0
  63. }
  64. )
  65. async def example_async_rerank(results: list):
  66. """示例:异步重排序函数"""
  67. logger.info(f"正在进行重排序,结果数量: {len(results)}")
  68. await asyncio.sleep(0.8) # 模拟异步处理
  69. # 模拟重排序
  70. sorted_results = sorted(results, key=lambda x: x["score"], reverse=True)
  71. return sorted_results[:5] # 只返回前5个
  72. @rag_monitor.monitor_step(
  73. step_name="example_async_parent_enhance",
  74. capture_input=True,
  75. capture_output=True
  76. )
  77. async def example_async_parent_enhance(results: list):
  78. """示例:异步父文档增强函数"""
  79. logger.info(f"正在进行父文档增强,结果数量: {len(results)}")
  80. await asyncio.sleep(1.2) # 模拟异步处理
  81. # 模拟父文档增强
  82. enhanced = []
  83. for res in results:
  84. enhanced.append({
  85. **res,
  86. "parent_content": f"父文档内容: {res['content']}的完整上下文...",
  87. "enhanced": True
  88. })
  89. return enhanced
  90. # ========== 示例3: 完整的RAG链路测试 ==========
  91. def test_sync_rag_pipeline():
  92. """测试同步RAG链路"""
  93. print("\n" + "="*60)
  94. print("示例1: 同步RAG链路监控")
  95. print("="*60)
  96. # 开始追踪会话
  97. trace_id = f"test_sync_{int(time.time() * 1000)}"
  98. rag_monitor.start_trace(trace_id, metadata={
  99. "test_type": "sync",
  100. "description": "同步RAG链路测试"
  101. })
  102. try:
  103. # Step 1: 查询提取
  104. query_content = "请检查施工方案中的安全生产条件和施工管理制度是否符合规范要求。"
  105. query_pairs = example_query_extract(query_content)
  106. print(f"✅ 查询提取完成,提取到 {len(query_pairs)} 个查询对")
  107. # Step 2: 向量检索
  108. search_results = example_vector_search(query_pairs)
  109. print(f"✅ 向量检索完成,找到 {len(search_results)} 个结果")
  110. print(f"\n✅ 同步RAG链路测试完成")
  111. finally:
  112. # 结束追踪并保存
  113. trace_data = rag_monitor.end_trace(trace_id)
  114. print(f"\n📊 追踪数据已保存: temp/rag_monitoring/{trace_id}.json")
  115. print(f"⏱️ 总耗时: {trace_data['total_duration']}秒")
  116. print(f"📝 步骤数量: {len(trace_data['steps'])}")
  117. async def test_async_rag_pipeline():
  118. """测试异步RAG链路"""
  119. print("\n" + "="*60)
  120. print("示例2: 异步RAG链路监控")
  121. print("="*60)
  122. # 开始追踪会话
  123. trace_id = f"test_async_{int(time.time() * 1000)}"
  124. rag_monitor.start_trace(trace_id, metadata={
  125. "test_type": "async",
  126. "description": "异步RAG链路测试"
  127. })
  128. try:
  129. # 模拟一些初始数据
  130. initial_results = [
  131. {"query": "安全", "doc_id": "doc_1", "score": 0.82, "content": "安全内容..."},
  132. {"query": "施工", "doc_id": "doc_2", "score": 0.91, "content": "施工内容..."},
  133. {"query": "管理", "doc_id": "doc_3", "score": 0.75, "content": "管理内容..."}
  134. ]
  135. # Step 1: 异步重排序
  136. reranked_results = await example_async_rerank(initial_results)
  137. print(f"✅ 重排序完成,保留前 {len(reranked_results)} 个结果")
  138. # Step 2: 异步父文档增强
  139. enhanced_results = await example_async_parent_enhance(reranked_results)
  140. print(f"✅ 父文档增强完成,增强了 {len(enhanced_results)} 个结果")
  141. print(f"\n✅ 异步RAG链路测试完成")
  142. finally:
  143. # 结束追踪并保存
  144. trace_data = rag_monitor.end_trace(trace_id)
  145. print(f"\n📊 追踪数据已保存: temp/rag_monitoring/{trace_id}.json")
  146. print(f"⏱️ 总耗时: {trace_data['total_duration']}秒")
  147. print(f"📝 步骤数量: {len(trace_data['steps'])}")
  148. def test_mixed_rag_pipeline():
  149. """测试混合(同步+异步)RAG链路"""
  150. print("\n" + "="*60)
  151. print("示例3: 混合RAG链路监控(同步+异步)")
  152. print("="*60)
  153. # 开始追踪会话
  154. trace_id = f"test_mixed_{int(time.time() * 1000)}"
  155. rag_monitor.start_trace(trace_id, metadata={
  156. "test_type": "mixed",
  157. "description": "混合RAG链路测试"
  158. })
  159. try:
  160. # Step 1: 同步查询提取
  161. query_content = "检查项目的环境保护措施和质量管理体系。"
  162. query_pairs = example_query_extract(query_content)
  163. print(f"✅ [同步] 查询提取完成")
  164. # Step 2: 同步向量检索
  165. search_results = example_vector_search(query_pairs)
  166. print(f"✅ [同步] 向量检索完成")
  167. # Step 3: 异步重排序
  168. async def async_part():
  169. reranked = await example_async_rerank(search_results)
  170. print(f"✅ [异步] 重排序完成")
  171. # Step 4: 异步父文档增强
  172. enhanced = await example_async_parent_enhance(reranked)
  173. print(f"✅ [异步] 父文档增强完成")
  174. return enhanced
  175. # 运行异步部分
  176. final_results = asyncio.run(async_part())
  177. print(f"\n✅ 混合RAG链路测试完成,最终得到 {len(final_results)} 个结果")
  178. finally:
  179. # 结束追踪并保存
  180. trace_data = rag_monitor.end_trace(trace_id)
  181. print(f"\n📊 追踪数据已保存: temp/rag_monitoring/{trace_id}.json")
  182. print(f"⏱️ 总耗时: {trace_data['total_duration']}秒")
  183. print(f"📝 步骤数量: {len(trace_data['steps'])}")
  184. # ========== 示例4: 自定义输入输出转换 ==========
  185. @rag_monitor.monitor_step(
  186. step_name="example_sensitive_data",
  187. capture_input=True,
  188. capture_output=True,
  189. input_transform=lambda x: {
  190. # 过滤敏感信息,只保留统计数据
  191. "user_id": "***", # 隐藏用户ID
  192. "data_length": len(str(x))
  193. },
  194. output_transform=lambda x: {
  195. # 只保留关键指标
  196. "success": x.get("success"),
  197. "count": x.get("count")
  198. }
  199. )
  200. def example_process_sensitive_data(user_id: str, data: dict):
  201. """示例:处理敏感数据(自定义转换)"""
  202. time.sleep(0.3)
  203. return {
  204. "success": True,
  205. "user_id": user_id,
  206. "count": len(data),
  207. "details": data # 这些详细信息不会被记录
  208. }
  209. def test_custom_transform():
  210. """测试自定义输入输出转换"""
  211. print("\n" + "="*60)
  212. print("示例4: 自定义输入输出转换(敏感数据保护)")
  213. print("="*60)
  214. trace_id = f"test_transform_{int(time.time() * 1000)}"
  215. rag_monitor.start_trace(trace_id, metadata={
  216. "test_type": "custom_transform"
  217. })
  218. try:
  219. result = example_process_sensitive_data(
  220. user_id="user_12345",
  221. data={"key1": "value1", "key2": "value2"}
  222. )
  223. print(f"✅ 处理完成,成功: {result['success']}")
  224. print(f"ℹ️ 敏感信息已被过滤,只记录统计数据")
  225. finally:
  226. trace_data = rag_monitor.end_trace(trace_id)
  227. print(f"\n📊 追踪数据已保存: temp/rag_monitoring/{trace_id}.json")
  228. # ========== 查看监控结果 ==========
  229. def view_trace_result(trace_id: str):
  230. """查看追踪结果"""
  231. file_path = Path("temp/rag_monitoring") / f"{trace_id}.json"
  232. if file_path.exists():
  233. print(f"\n📄 追踪结果: {trace_id}")
  234. print("="*60)
  235. with open(file_path, 'r', encoding='utf-8') as f:
  236. data = json.load(f)
  237. print(f"⏱️ 总耗时: {data.get('total_duration')}秒")
  238. print(f"📝 步骤数量: {len(data.get('steps', {}))}")
  239. print(f"\n步骤详情:")
  240. for step_name, step_data in data.get('steps', {}).items():
  241. print(f"\n [{step_data.get('status', 'unknown').upper()}] {step_name}")
  242. print(f" 函数: {step_data.get('function_name')}")
  243. print(f" 耗时: {step_data.get('duration')}秒")
  244. if step_data.get('status') == 'error':
  245. print(f" ❌ 错误: {step_data.get('error', {}).get('message')}")
  246. else:
  247. print(f"❌ 找不到追踪文件: {file_path}")
  248. # ========== 主函数 ==========
  249. def main():
  250. """运行所有测试示例"""
  251. print("\n" + "🚀 RAG监控装饰器测试 🚀".center(60, "="))
  252. try:
  253. # 示例1: 同步RAG链路
  254. test_sync_rag_pipeline()
  255. # 示例2: 异步RAG链路
  256. asyncio.run(test_async_rag_pipeline())
  257. # 示例3: 混合RAG链路
  258. test_mixed_rag_pipeline()
  259. # 示例4: 自定义转换
  260. test_custom_transform()
  261. print("\n" + "✅ 所有测试完成!".center(60, "="))
  262. print(f"\n💡 提示: 查看监控数据文件在: temp/rag_monitoring/")
  263. print(f"💡 提示: 每个trace_id对应一个JSON文件,包含完整的执行链路信息")
  264. except Exception as e:
  265. print(f"\n❌ 测试失败: {e}")
  266. import traceback
  267. traceback.print_exc()
  268. if __name__ == "__main__":
  269. main()