string_match.go 2.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112
  1. package utils
  2. import (
  3. "math"
  4. )
  5. // LevenshteinDistance 计算两个字符串的编辑距离
  6. func LevenshteinDistance(s1, s2 string) int {
  7. r1, r2 := []rune(s1), []rune(s2)
  8. rows := len(r1) + 1
  9. cols := len(r2) + 1
  10. d := make([][]int, rows)
  11. for i := range d {
  12. d[i] = make([]int, cols)
  13. }
  14. for i := 1; i < rows; i++ {
  15. d[i][0] = i
  16. }
  17. for j := 1; j < cols; j++ {
  18. d[0][j] = j
  19. }
  20. for i := 1; i < rows; i++ {
  21. for j := 1; j < cols; j++ {
  22. cost := 0
  23. if r1[i-1] != r2[j-1] {
  24. cost = 1
  25. }
  26. d[i][j] = min(
  27. min(d[i-1][j]+1, d[i][j-1]+1),
  28. d[i-1][j-1]+cost,
  29. )
  30. }
  31. }
  32. return d[rows-1][cols-1]
  33. }
  34. // StringSimilarity 计算字符串相似度 (0-1之间,1表示完全相同)
  35. func StringSimilarity(s1, s2 string) float64 {
  36. if s1 == s2 {
  37. return 1.0
  38. }
  39. maxLen := math.Max(float64(len([]rune(s1))), float64(len([]rune(s2))))
  40. if maxLen == 0 {
  41. return 1.0
  42. }
  43. distance := LevenshteinDistance(s1, s2)
  44. return 1.0 - float64(distance)/maxLen
  45. }
  46. // FindBestMatch 在多个候选项中找到与目标字符串最相似的匹配
  47. func FindBestMatch(target string, candidates []string) (string, float64) {
  48. if len(candidates) == 0 {
  49. return "", 0.0
  50. }
  51. bestMatch := candidates[0]
  52. bestScore := StringSimilarity(target, candidates[0])
  53. for i := 1; i < len(candidates); i++ {
  54. score := StringSimilarity(target, candidates[i])
  55. if score > bestScore {
  56. bestScore = score
  57. bestMatch = candidates[i]
  58. }
  59. }
  60. return bestMatch, bestScore
  61. }
  62. // MatchResult 匹配结果结构体
  63. type MatchResult struct {
  64. Text string `json:"text"`
  65. Score float64 `json:"score"`
  66. }
  67. // FindBestMatches 返回所有候选项的相似度分数,按分数降序排列
  68. func FindBestMatches(target string, candidates []string) []MatchResult {
  69. results := make([]MatchResult, len(candidates))
  70. for i, candidate := range candidates {
  71. score := StringSimilarity(target, candidate)
  72. results[i] = MatchResult{
  73. Text: candidate,
  74. Score: score,
  75. }
  76. }
  77. // 按分数降序排序
  78. for i := 0; i < len(results)-1; i++ {
  79. for j := i + 1; j < len(results); j++ {
  80. if results[i].Score < results[j].Score {
  81. results[i], results[j] = results[j], results[i]
  82. }
  83. }
  84. }
  85. return results
  86. }
  87. // min 辅助函数
  88. func min(a, b int) int {
  89. if a < b {
  90. return a
  91. }
  92. return b
  93. }