entities_enhance.py 1.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. import json
  2. import asyncio
  3. from foundation.observability.monitoring.time_statistics import track_execution_time
  4. from foundation.ai.rag.retrieval.retrieval import retrieval_manager
  5. from foundation.observability.logger.loggering import server_logger
  6. class EntitiesEnhance():
  7. def __init__(self):
  8. self.bfp_result_lists = []
  9. @track_execution_time
  10. def entities_enhance_retrieval(self, query_pairs):
  11. def run_async(coro):
  12. """在合适的环境中运行异步函数"""
  13. try:
  14. loop = asyncio.get_running_loop()
  15. import concurrent.futures
  16. with concurrent.futures.ThreadPoolExecutor() as executor:
  17. future = executor.submit(asyncio.run, coro)
  18. return future.result()
  19. except RuntimeError:
  20. return asyncio.run(coro)
  21. # 清空之前的结果
  22. self.bfp_result_lists = []
  23. for query_pair in query_pairs:
  24. entity = query_pair['entity']
  25. search_keywords = query_pair['search_keywords']
  26. background = query_pair['background']
  27. server_logger.info(f"正在处理实体:{entity},辅助搜索词:{search_keywords},背景:{background}")
  28. entity_list = run_async(retrieval_manager.entity_recall(
  29. entity,
  30. search_keywords,
  31. recall_top_k=5, # 主实体返回数量
  32. max_results=5 # 最终最多返回20个实体文本
  33. ))
  34. # BFP背景增强召回
  35. bfp_result = run_async(retrieval_manager.async_bfp_recall(entity_list, background, top_k=3))
  36. # 为每个结果添加实体信息
  37. for result in bfp_result:
  38. result['source_entity'] = entity
  39. self.bfp_result_lists.append(bfp_result)
  40. return self.bfp_result_lists
  41. entity_enhance = EntitiesEnhance()