/shared/helpers/address/address.go

https://gitlab.com/berew/abot · Go · 165 lines · 141 code · 11 blank · 13 comment · 23 complexity · 852bc8600b3c5ec0e96c80f62a73aa07 MD5 · raw file

  1. // Package address provides address extraction and validation tools.
  2. package address
  3. import (
  4. "errors"
  5. "regexp"
  6. "strings"
  7. "github.com/itsabot/abot/shared/datatypes"
  8. "github.com/itsabot/abot/core/log"
  9. )
  10. // ErrInvalidAddress is returned when an address is expected but none found or
  11. // when an address is found but missing core components such as the state *and*
  12. // zip code.
  13. var ErrInvalidAddress = errors.New("invalid address")
  14. var regexAddress = regexp.MustCompile(
  15. `\d+\s+[a-zA-Z#-'\s\.\,\n\d]*(\d{5}-\d{4}|\d{5})?`)
  16. // regexStreet is useful to search within a regexAddress substring match
  17. var regexStreet = regexp.MustCompile(`^\d+\s+[\w#-'\s\.\n]*$`)
  18. // regexApartment is useful to search within a regexAddress substring match
  19. // after the city has been removed.
  20. var regexApartment = regexp.MustCompile(`(,\s*)?[#\s\.\w]*[\w\s]+$`)
  21. // regexCity is useful to search within a regexAddress substring match after the
  22. // state has been removed.
  23. var regexCity = regexp.MustCompile(`(,\s*)?([a-zA-Z]{2}|\s\w+\s*\w*)$`)
  24. // regexState is useful to search within a regexAddress substring match after
  25. // the zip code has been removed
  26. var regexState = regexp.MustCompile(`(,\s*)?([a-zA-Z]{2}|\s\w+\s*\w*)(,\s*)?$`)
  27. // regexZip is useful to search within a regexAddress substring match
  28. var regexZip = regexp.MustCompile(`(\d{5}-\d{4}|\d{5})$`)
  29. var states = map[string]string{
  30. "alabama": "AL",
  31. "alaska": "AK",
  32. "arizona": "AZ",
  33. "arkansas": "AR",
  34. "california": "CA",
  35. "colorado": "CO",
  36. "connecticut": "CT",
  37. "delaware": "DE",
  38. "florida": "FL",
  39. "georgia": "GA",
  40. "hawaii": "HI",
  41. "idaho": "ID",
  42. "illinois": "IL",
  43. "indiana": "IN",
  44. "iowa": "IA",
  45. "kansas": "KS",
  46. "kentucky": "KY",
  47. "lousiana": "LA",
  48. "maine": "ME",
  49. "maryland": "MD",
  50. "massachusetts": "MA",
  51. "michigan": "MI",
  52. "minnesota": "MN",
  53. "mississippi": "MS",
  54. "missouri": "MO",
  55. "montana": "MT",
  56. "nebraska": "NE",
  57. "nevada": "NV",
  58. "new hampshire": "NH",
  59. "new jersey": "NJ",
  60. "new mexico": "NM",
  61. "new york": "NY",
  62. "north carolina": "NC",
  63. "n carolina": "NC",
  64. "north dakota": "ND",
  65. "n dakota": "ND",
  66. "ohio": "OH",
  67. "oklahoma": "OK",
  68. "oregon": "OR",
  69. "pennsylvania": "PA",
  70. "rhode island": "RI",
  71. "s carolina": "SC",
  72. "south carolina": "SC",
  73. "s dakota": "SD",
  74. "south dakota": "SD",
  75. "tennessee": "TN",
  76. "texas": "TX",
  77. "utah": "UT",
  78. "vermont": "VT",
  79. "virginia": "VA",
  80. "washington": "WA",
  81. "w virginia": "WV",
  82. "west virginia": "WV",
  83. "wisconsin": "WI",
  84. "wyoming": "WY",
  85. }
  86. // Parse a string to return a fully-validated U.S. address.
  87. func Parse(s string) (*dt.Address, error) {
  88. s = regexAddress.FindString(s)
  89. if len(s) == 0 {
  90. log.Debug("missing address")
  91. return nil, ErrInvalidAddress
  92. }
  93. log.Debug("address", s)
  94. tmp := regexZip.FindStringIndex(s)
  95. var zip string
  96. if tmp != nil {
  97. zip = s[tmp[0]:tmp[1]]
  98. s = s[:tmp[0]]
  99. } else {
  100. log.Debug("no zip found")
  101. }
  102. tmp2 := regexState.FindStringIndex(s)
  103. if tmp2 == nil && tmp == nil {
  104. log.Debug("no state found AND no zip found")
  105. return &dt.Address{}, ErrInvalidAddress
  106. }
  107. var city, state string
  108. if tmp2 != nil {
  109. state = s[tmp2[0]:tmp2[1]]
  110. s = s[:tmp2[0]]
  111. state = strings.Trim(state, ", \n")
  112. if len(state) > 2 {
  113. state = strings.ToLower(state)
  114. state = states[state]
  115. }
  116. tmp = regexCity.FindStringIndex(s)
  117. if tmp == nil {
  118. log.Debug("no city found")
  119. return &dt.Address{}, ErrInvalidAddress
  120. }
  121. city = s[tmp[0]:tmp[1]]
  122. s = s[:tmp[0]]
  123. } else {
  124. log.Debug("no state found")
  125. }
  126. tmp = regexApartment.FindStringIndex(s)
  127. var apartment string
  128. if tmp != nil {
  129. apartment = s[tmp[0]:tmp[1]]
  130. s2 := s[:tmp[0]]
  131. if len(s2) == 0 {
  132. apartment = ""
  133. } else {
  134. s = s2
  135. }
  136. } else {
  137. log.Debug("no apartment found")
  138. }
  139. tmp = regexStreet.FindStringIndex(s)
  140. if tmp == nil {
  141. log.Debug(s)
  142. log.Debug("no street found")
  143. return &dt.Address{}, ErrInvalidAddress
  144. }
  145. street := s[tmp[0]:tmp[1]]
  146. return &dt.Address{
  147. Line1: strings.Trim(street, " \n,"),
  148. Line2: strings.Trim(apartment, " \n,"),
  149. City: strings.Trim(city, " \n,"),
  150. State: strings.Trim(state, " \n,"),
  151. Zip: strings.Trim(zip, " \n,"),
  152. Country: "USA",
  153. }, nil
  154. }