| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253 |
- from typing import List, Tuple
- def levenshtein_distance(s1: str, s2: str) -> int:
- """计算两个字符串的编辑距离"""
- if len(s1) < len(s2):
- return levenshtein_distance(s2, s1)
-
- if len(s2) == 0:
- return len(s1)
-
- previous_row = range(len(s2) + 1)
- for i, c1 in enumerate(s1):
- current_row = [i + 1]
- for j, c2 in enumerate(s2):
- insertions = previous_row[j + 1] + 1
- deletions = current_row[j] + 1
- substitutions = previous_row[j] + (c1 != c2)
- current_row.append(min(insertions, deletions, substitutions))
- previous_row = current_row
-
- return previous_row[-1]
- def string_similarity(s1: str, s2: str) -> float:
- """计算两个字符串的相似度 (0-1)"""
- if not s1 or not s2:
- return 0.0
-
- distance = levenshtein_distance(s1, s2)
- max_len = max(len(s1), len(s2))
-
- if max_len == 0:
- return 1.0
-
- return 1.0 - (distance / max_len)
- def find_best_match(target: str, candidates: List[str]) -> Tuple[str, float]:
- """从候选列表中找到最佳匹配"""
- if not candidates:
- return "", 0.0
-
- best_match = ""
- best_score = 0.0
-
- for candidate in candidates:
- score = string_similarity(target, candidate)
- if score > best_score:
- best_score = score
- best_match = candidate
-
- return best_match, best_score
|