| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970 |
- import hashlib
- import math
- import requests
- import logging
- from typing import List
- from app.core.config import config_handler
- logger = logging.getLogger(__name__)
- # Read config
- EMBEDDING_BASE_URL = config_handler.get("admin_app", "EMBEDDING_BASE_URL", "")
- EMBEDDING_MODEL = config_handler.get("admin_app", "EMBEDDING_MODEL", "")
- EMBEDDING_API_KEY = config_handler.get("admin_app", "EMBEDDING_API_KEY", "dummy")
- def text_to_vector_algo(text: str, dim: int = 768) -> List[float]:
- """
- 调用 Embedding API 生成向量。
- 如果 API 调用失败,返回全 0 向量 (长度为 dim)。
- 注意:返回的向量维度取决于模型,可能与传入的 dim 不一致。
- """
- if not text:
- return [0.0] * dim
- # 如果没有配置 URL,回退到原来的哈希算法 (或者直接报错,视需求而定)
- # 这里为了防止完全无法运行,保留一个简单的 fallback,但打个 warning
- if not EMBEDDING_BASE_URL:
- logger.warning("未配置 EMBEDDING_BASE_URL,使用 Dummy Hash 向量")
- return _dummy_hash_vector(text, dim)
- try:
- url = f"{EMBEDDING_BASE_URL}/embeddings"
- headers = {
- "Content-Type": "application/json",
- "Authorization": f"Bearer {EMBEDDING_API_KEY}"
- }
- payload = {
- "input": text,
- "model": EMBEDDING_MODEL
- }
-
- # 简单重试机制
- response = requests.post(url, json=payload, headers=headers, timeout=30)
- response.raise_for_status()
- data = response.json()
-
- # 兼容 OpenAI 格式
- if 'data' in data and len(data['data']) > 0:
- embedding = data['data'][0]['embedding']
- return embedding
- else:
- logger.error(f"Embedding API 响应格式错误: {data}")
- return [0.0] * dim
-
- except Exception as e:
- logger.error(f"Embedding API 调用失败: {e}")
- return [0.0] * dim
- def _dummy_hash_vector(text: str, dim: int) -> List[float]:
- """原有的特征哈希算法,作为 fallback"""
- vector = [0.0] * dim
- tokens = list(text)
- for token in tokens:
- hash_obj = hashlib.md5(token.encode('utf-8'))
- hash_val = int(hash_obj.hexdigest(), 16)
- idx = hash_val % dim
- vector[idx] += 1.0
- magnitude = math.sqrt(sum(x*x for x in vector))
- if magnitude > 0:
- vector = [x / magnitude for x in vector]
- return vector
|