/metaphone.go

http://github.com/sanjayc77/metaphone · Go · 173 lines · 133 code · 32 blank · 8 comment · 8 complexity · 180aff9f35a3c22550c2b3bccd084e47 MD5 · raw file

  1. // Copyright (c) 2012 Sanjay Chouksey
  2. // Use of this source code is governed by a MIT-style
  3. // license that can be found in the LICENSE file.
  4. package metaphone
  5. import (
  6. "fmt"
  7. "regexp"
  8. "bytes"
  9. "strings"
  10. )
  11. /*
  12. func dedup(token) {
  13. return token.replace(/([^c])\1/g, '${1}');
  14. }
  15. */
  16. func dedup(token string) string {
  17. if len(token) < 2 {
  18. return token;
  19. }
  20. buffer := bytes.NewBufferString("")
  21. fmt.Fprint(buffer, string(token[0]))
  22. for i := 1; i < len(token); i++ {
  23. if token[i] == "c"[0] || token[i] != token[i - 1] {
  24. fmt.Fprint(buffer, string(token[i]))
  25. }
  26. }
  27. return string(buffer.Bytes())
  28. }
  29. func dropInitialLetters(token string) string {
  30. if regexp.MustCompile("^(kn|gn|pn|ae|wr)").MatchString(token) {
  31. return token[1:]
  32. }
  33. return token;
  34. }
  35. func dropBafterMAtEnd(token string) string {
  36. return regexp.MustCompile("mb$").ReplaceAllLiteralString(token, "m")
  37. }
  38. func cTransform(token string) string {
  39. token = strings.TrimSpace(regexp.MustCompile("([^s]|^)(c)(h)").ReplaceAllString(token, "${1}x${3}"))
  40. token = regexp.MustCompile("cia").ReplaceAllLiteralString(token, "xia")
  41. token = regexp.MustCompile("c(i|e|y)").ReplaceAllString(token, "s${1}")
  42. token = regexp.MustCompile("c").ReplaceAllLiteralString(token, "k")
  43. return token;
  44. }
  45. func dTransform(token string) string {
  46. token = regexp.MustCompile("d(ge|gy|gi)").ReplaceAllString(token, "j${1}")
  47. token = regexp.MustCompile("d").ReplaceAllLiteralString(token, "t")
  48. return token;
  49. }
  50. func dropG(token string) string {
  51. token = regexp.MustCompile("gh(^$|[^aeiou])").ReplaceAllString(token, "h${1}")
  52. token = regexp.MustCompile("g(n|ned)$").ReplaceAllString(token, "${1}")
  53. return token;
  54. }
  55. func transformG(token string) string {
  56. token = regexp.MustCompile("([^g]|^)(g)(i|e|y)").ReplaceAllString(token, "${1}j${3}")
  57. token = regexp.MustCompile("gg").ReplaceAllLiteralString(token, "g")
  58. token = regexp.MustCompile("g").ReplaceAllLiteralString(token, "k")
  59. return token;
  60. }
  61. func dropH(token string) string {
  62. return regexp.MustCompile("([aeiou])h([^aeiou])").ReplaceAllString(token, "${1}${2}")
  63. }
  64. func transformCK(token string) string {
  65. return regexp.MustCompile("ck").ReplaceAllString(token, "k")
  66. }
  67. func transformPH(token string) string {
  68. return regexp.MustCompile("ph").ReplaceAllString(token, "f")
  69. }
  70. func transformQ(token string) string {
  71. return regexp.MustCompile("q").ReplaceAllString(token, "k")
  72. }
  73. func transformS(token string) string {
  74. return regexp.MustCompile("s(h|io|ia)").ReplaceAllString(token, "x${1}")
  75. }
  76. func transformT(token string) string {
  77. token = regexp.MustCompile("t(ia|io)").ReplaceAllString(token, "x${1}")
  78. token = regexp.MustCompile("th").ReplaceAllLiteralString(token, "0")
  79. return token;
  80. }
  81. func dropT(token string) string {
  82. return regexp.MustCompile("tch").ReplaceAllString(token, "ch")
  83. }
  84. func transformV(token string) string {
  85. return regexp.MustCompile("v").ReplaceAllString(token, "f")
  86. }
  87. func transformWH(token string) string {
  88. return regexp.MustCompile("^wh").ReplaceAllLiteralString(token, "w")
  89. }
  90. func dropW(token string) string {
  91. return regexp.MustCompile("w([^aeiou]|$)").ReplaceAllString(token, "${1}")
  92. }
  93. func transformX(token string) string {
  94. token = regexp.MustCompile("^x").ReplaceAllLiteralString(token, "s")
  95. token = regexp.MustCompile("x").ReplaceAllString(token, "ks")
  96. return token;
  97. }
  98. func dropY(token string) string {
  99. return regexp.MustCompile("y([^aeiou]|$)").ReplaceAllString(token, "${1}")
  100. }
  101. func transformZ(token string) string {
  102. return regexp.MustCompile("z").ReplaceAllLiteralString(token, "s")
  103. }
  104. func dropVowels(token string) string {
  105. buffer := bytes.NewBufferString("")
  106. fmt.Fprint(buffer, string(token[0]))
  107. fmt.Fprint(buffer, regexp.MustCompile("[aeiou]").ReplaceAllLiteralString(token[1:], ""))
  108. return string(buffer.Bytes())
  109. }
  110. func ProcessWithMaxLength(token string, maxLength int) string {
  111. token = strings.ToLower(token)
  112. token = dedup(token)
  113. token = dropInitialLetters(token)
  114. token = dropBafterMAtEnd(token)
  115. token = transformCK(token)
  116. token = cTransform(token)
  117. token = dTransform(token)
  118. token = dropG(token)
  119. token = transformG(token)
  120. token = dropH(token)
  121. token = transformPH(token)
  122. token = transformQ(token)
  123. token = transformS(token)
  124. token = transformX(token)
  125. token = transformT(token)
  126. token = dropT(token)
  127. token = transformV(token)
  128. token = transformWH(token)
  129. token = dropW(token)
  130. token = dropY(token)
  131. token = transformZ(token)
  132. token = dropVowels(token)
  133. token = strings.ToUpper(token)
  134. if len(token) >= maxLength {
  135. token = token[0:maxLength]
  136. }
  137. return token
  138. }
  139. func Process(token string) string {
  140. return ProcessWithMaxLength(token, 32)
  141. }