string_match.py 1.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253
  1. from typing import List, Tuple
  2. def levenshtein_distance(s1: str, s2: str) -> int:
  3. """计算两个字符串的编辑距离"""
  4. if len(s1) < len(s2):
  5. return levenshtein_distance(s2, s1)
  6. if len(s2) == 0:
  7. return len(s1)
  8. previous_row = range(len(s2) + 1)
  9. for i, c1 in enumerate(s1):
  10. current_row = [i + 1]
  11. for j, c2 in enumerate(s2):
  12. insertions = previous_row[j + 1] + 1
  13. deletions = current_row[j] + 1
  14. substitutions = previous_row[j] + (c1 != c2)
  15. current_row.append(min(insertions, deletions, substitutions))
  16. previous_row = current_row
  17. return previous_row[-1]
  18. def string_similarity(s1: str, s2: str) -> float:
  19. """计算两个字符串的相似度 (0-1)"""
  20. if not s1 or not s2:
  21. return 0.0
  22. distance = levenshtein_distance(s1, s2)
  23. max_len = max(len(s1), len(s2))
  24. if max_len == 0:
  25. return 1.0
  26. return 1.0 - (distance / max_len)
  27. def find_best_match(target: str, candidates: List[str]) -> Tuple[str, float]:
  28. """从候选列表中找到最佳匹配"""
  29. if not candidates:
  30. return "", 0.0
  31. best_match = ""
  32. best_score = 0.0
  33. for candidate in candidates:
  34. score = string_similarity(target, candidate)
  35. if score > best_score:
  36. best_score = score
  37. best_match = candidate
  38. return best_match, best_score