PageRenderTime 30ms CodeModel.GetById 9ms RepoModel.GetById 0ms app.codeStats 0ms

/keepalived-vip/vendor/github.com/PuerkitoBio/purell/purell.go

https://gitlab.com/unofficial-mirrors/kubernetes-contrib
Go | 375 lines | 296 code | 43 blank | 36 comment | 75 complexity | e1f7cb12ae59e9cee264c600fc7f8576 MD5 | raw file
  1. /*
  2. Package purell offers URL normalization as described on the wikipedia page:
  3. http://en.wikipedia.org/wiki/URL_normalization
  4. */
  5. package purell
  6. import (
  7. "bytes"
  8. "fmt"
  9. "net/url"
  10. "regexp"
  11. "sort"
  12. "strconv"
  13. "strings"
  14. "github.com/PuerkitoBio/urlesc"
  15. "golang.org/x/net/idna"
  16. "golang.org/x/text/secure/precis"
  17. "golang.org/x/text/unicode/norm"
  18. )
  19. // A set of normalization flags determines how a URL will
  20. // be normalized.
  21. type NormalizationFlags uint
  22. const (
  23. // Safe normalizations
  24. FlagLowercaseScheme NormalizationFlags = 1 << iota // HTTP://host -> http://host, applied by default in Go1.1
  25. FlagLowercaseHost // http://HOST -> http://host
  26. FlagUppercaseEscapes // http://host/t%ef -> http://host/t%EF
  27. FlagDecodeUnnecessaryEscapes // http://host/t%41 -> http://host/tA
  28. FlagEncodeNecessaryEscapes // http://host/!"#$ -> http://host/%21%22#$
  29. FlagRemoveDefaultPort // http://host:80 -> http://host
  30. FlagRemoveEmptyQuerySeparator // http://host/path? -> http://host/path
  31. // Usually safe normalizations
  32. FlagRemoveTrailingSlash // http://host/path/ -> http://host/path
  33. FlagAddTrailingSlash // http://host/path -> http://host/path/ (should choose only one of these add/remove trailing slash flags)
  34. FlagRemoveDotSegments // http://host/path/./a/b/../c -> http://host/path/a/c
  35. // Unsafe normalizations
  36. FlagRemoveDirectoryIndex // http://host/path/index.html -> http://host/path/
  37. FlagRemoveFragment // http://host/path#fragment -> http://host/path
  38. FlagForceHTTP // https://host -> http://host
  39. FlagRemoveDuplicateSlashes // http://host/path//a///b -> http://host/path/a/b
  40. FlagRemoveWWW // http://www.host/ -> http://host/
  41. FlagAddWWW // http://host/ -> http://www.host/ (should choose only one of these add/remove WWW flags)
  42. FlagSortQuery // http://host/path?c=3&b=2&a=1&b=1 -> http://host/path?a=1&b=1&b=2&c=3
  43. // Normalizations not in the wikipedia article, required to cover tests cases
  44. // submitted by jehiah
  45. FlagDecodeDWORDHost // http://1113982867 -> http://66.102.7.147
  46. FlagDecodeOctalHost // http://0102.0146.07.0223 -> http://66.102.7.147
  47. FlagDecodeHexHost // http://0x42660793 -> http://66.102.7.147
  48. FlagRemoveUnnecessaryHostDots // http://.host../path -> http://host/path
  49. FlagRemoveEmptyPortSeparator // http://host:/path -> http://host/path
  50. // Convenience set of safe normalizations
  51. FlagsSafe NormalizationFlags = FlagLowercaseHost | FlagLowercaseScheme | FlagUppercaseEscapes | FlagDecodeUnnecessaryEscapes | FlagEncodeNecessaryEscapes | FlagRemoveDefaultPort | FlagRemoveEmptyQuerySeparator
  52. // For convenience sets, "greedy" uses the "remove trailing slash" and "remove www. prefix" flags,
  53. // while "non-greedy" uses the "add (or keep) the trailing slash" and "add www. prefix".
  54. // Convenience set of usually safe normalizations (includes FlagsSafe)
  55. FlagsUsuallySafeGreedy NormalizationFlags = FlagsSafe | FlagRemoveTrailingSlash | FlagRemoveDotSegments
  56. FlagsUsuallySafeNonGreedy NormalizationFlags = FlagsSafe | FlagAddTrailingSlash | FlagRemoveDotSegments
  57. // Convenience set of unsafe normalizations (includes FlagsUsuallySafe)
  58. FlagsUnsafeGreedy NormalizationFlags = FlagsUsuallySafeGreedy | FlagRemoveDirectoryIndex | FlagRemoveFragment | FlagForceHTTP | FlagRemoveDuplicateSlashes | FlagRemoveWWW | FlagSortQuery
  59. FlagsUnsafeNonGreedy NormalizationFlags = FlagsUsuallySafeNonGreedy | FlagRemoveDirectoryIndex | FlagRemoveFragment | FlagForceHTTP | FlagRemoveDuplicateSlashes | FlagAddWWW | FlagSortQuery
  60. // Convenience set of all available flags
  61. FlagsAllGreedy = FlagsUnsafeGreedy | FlagDecodeDWORDHost | FlagDecodeOctalHost | FlagDecodeHexHost | FlagRemoveUnnecessaryHostDots | FlagRemoveEmptyPortSeparator
  62. FlagsAllNonGreedy = FlagsUnsafeNonGreedy | FlagDecodeDWORDHost | FlagDecodeOctalHost | FlagDecodeHexHost | FlagRemoveUnnecessaryHostDots | FlagRemoveEmptyPortSeparator
  63. )
  64. const (
  65. defaultHttpPort = ":80"
  66. defaultHttpsPort = ":443"
  67. )
  68. // Regular expressions used by the normalizations
  69. var rxPort = regexp.MustCompile(`(:\d+)/?$`)
  70. var rxDirIndex = regexp.MustCompile(`(^|/)((?:default|index)\.\w{1,4})$`)
  71. var rxDupSlashes = regexp.MustCompile(`/{2,}`)
  72. var rxDWORDHost = regexp.MustCompile(`^(\d+)((?:\.+)?(?:\:\d*)?)$`)
  73. var rxOctalHost = regexp.MustCompile(`^(0\d*)\.(0\d*)\.(0\d*)\.(0\d*)((?:\.+)?(?:\:\d*)?)$`)
  74. var rxHexHost = regexp.MustCompile(`^0x([0-9A-Fa-f]+)((?:\.+)?(?:\:\d*)?)$`)
  75. var rxHostDots = regexp.MustCompile(`^(.+?)(:\d+)?$`)
  76. var rxEmptyPort = regexp.MustCompile(`:+$`)
  77. // Map of flags to implementation function.
  78. // FlagDecodeUnnecessaryEscapes has no action, since it is done automatically
  79. // by parsing the string as an URL. Same for FlagUppercaseEscapes and FlagRemoveEmptyQuerySeparator.
  80. // Since maps have undefined traversing order, make a slice of ordered keys
  81. var flagsOrder = []NormalizationFlags{
  82. FlagLowercaseScheme,
  83. FlagLowercaseHost,
  84. FlagRemoveDefaultPort,
  85. FlagRemoveDirectoryIndex,
  86. FlagRemoveDotSegments,
  87. FlagRemoveFragment,
  88. FlagForceHTTP, // Must be after remove default port (because https=443/http=80)
  89. FlagRemoveDuplicateSlashes,
  90. FlagRemoveWWW,
  91. FlagAddWWW,
  92. FlagSortQuery,
  93. FlagDecodeDWORDHost,
  94. FlagDecodeOctalHost,
  95. FlagDecodeHexHost,
  96. FlagRemoveUnnecessaryHostDots,
  97. FlagRemoveEmptyPortSeparator,
  98. FlagRemoveTrailingSlash, // These two (add/remove trailing slash) must be last
  99. FlagAddTrailingSlash,
  100. }
  101. // ... and then the map, where order is unimportant
  102. var flags = map[NormalizationFlags]func(*url.URL){
  103. FlagLowercaseScheme: lowercaseScheme,
  104. FlagLowercaseHost: lowercaseHost,
  105. FlagRemoveDefaultPort: removeDefaultPort,
  106. FlagRemoveDirectoryIndex: removeDirectoryIndex,
  107. FlagRemoveDotSegments: removeDotSegments,
  108. FlagRemoveFragment: removeFragment,
  109. FlagForceHTTP: forceHTTP,
  110. FlagRemoveDuplicateSlashes: removeDuplicateSlashes,
  111. FlagRemoveWWW: removeWWW,
  112. FlagAddWWW: addWWW,
  113. FlagSortQuery: sortQuery,
  114. FlagDecodeDWORDHost: decodeDWORDHost,
  115. FlagDecodeOctalHost: decodeOctalHost,
  116. FlagDecodeHexHost: decodeHexHost,
  117. FlagRemoveUnnecessaryHostDots: removeUnncessaryHostDots,
  118. FlagRemoveEmptyPortSeparator: removeEmptyPortSeparator,
  119. FlagRemoveTrailingSlash: removeTrailingSlash,
  120. FlagAddTrailingSlash: addTrailingSlash,
  121. }
  122. // MustNormalizeURLString returns the normalized string, and panics if an error occurs.
  123. // It takes an URL string as input, as well as the normalization flags.
  124. func MustNormalizeURLString(u string, f NormalizationFlags) string {
  125. result, e := NormalizeURLString(u, f)
  126. if e != nil {
  127. panic(e)
  128. }
  129. return result
  130. }
  131. // NormalizeURLString returns the normalized string, or an error if it can't be parsed into an URL object.
  132. // It takes an URL string as input, as well as the normalization flags.
  133. func NormalizeURLString(u string, f NormalizationFlags) (string, error) {
  134. if parsed, e := url.Parse(u); e != nil {
  135. return "", e
  136. } else {
  137. options := make([]precis.Option, 1, 3)
  138. options[0] = precis.IgnoreCase
  139. if f&FlagLowercaseHost == FlagLowercaseHost {
  140. options = append(options, precis.FoldCase())
  141. }
  142. options = append(options, precis.Norm(norm.NFC))
  143. profile := precis.NewFreeform(options...)
  144. if parsed.Host, e = idna.ToASCII(profile.NewTransformer().String(parsed.Host)); e != nil {
  145. return "", e
  146. }
  147. return NormalizeURL(parsed, f), nil
  148. }
  149. panic("Unreachable code.")
  150. }
  151. // NormalizeURL returns the normalized string.
  152. // It takes a parsed URL object as input, as well as the normalization flags.
  153. func NormalizeURL(u *url.URL, f NormalizationFlags) string {
  154. for _, k := range flagsOrder {
  155. if f&k == k {
  156. flags[k](u)
  157. }
  158. }
  159. return urlesc.Escape(u)
  160. }
  161. func lowercaseScheme(u *url.URL) {
  162. if len(u.Scheme) > 0 {
  163. u.Scheme = strings.ToLower(u.Scheme)
  164. }
  165. }
  166. func lowercaseHost(u *url.URL) {
  167. if len(u.Host) > 0 {
  168. u.Host = strings.ToLower(u.Host)
  169. }
  170. }
  171. func removeDefaultPort(u *url.URL) {
  172. if len(u.Host) > 0 {
  173. scheme := strings.ToLower(u.Scheme)
  174. u.Host = rxPort.ReplaceAllStringFunc(u.Host, func(val string) string {
  175. if (scheme == "http" && val == defaultHttpPort) || (scheme == "https" && val == defaultHttpsPort) {
  176. return ""
  177. }
  178. return val
  179. })
  180. }
  181. }
  182. func removeTrailingSlash(u *url.URL) {
  183. if l := len(u.Path); l > 0 {
  184. if strings.HasSuffix(u.Path, "/") {
  185. u.Path = u.Path[:l-1]
  186. }
  187. } else if l = len(u.Host); l > 0 {
  188. if strings.HasSuffix(u.Host, "/") {
  189. u.Host = u.Host[:l-1]
  190. }
  191. }
  192. }
  193. func addTrailingSlash(u *url.URL) {
  194. if l := len(u.Path); l > 0 {
  195. if !strings.HasSuffix(u.Path, "/") {
  196. u.Path += "/"
  197. }
  198. } else if l = len(u.Host); l > 0 {
  199. if !strings.HasSuffix(u.Host, "/") {
  200. u.Host += "/"
  201. }
  202. }
  203. }
  204. func removeDotSegments(u *url.URL) {
  205. if len(u.Path) > 0 {
  206. var dotFree []string
  207. var lastIsDot bool
  208. sections := strings.Split(u.Path, "/")
  209. for _, s := range sections {
  210. if s == ".." {
  211. if len(dotFree) > 0 {
  212. dotFree = dotFree[:len(dotFree)-1]
  213. }
  214. } else if s != "." {
  215. dotFree = append(dotFree, s)
  216. }
  217. lastIsDot = (s == "." || s == "..")
  218. }
  219. // Special case if host does not end with / and new path does not begin with /
  220. u.Path = strings.Join(dotFree, "/")
  221. if u.Host != "" && !strings.HasSuffix(u.Host, "/") && !strings.HasPrefix(u.Path, "/") {
  222. u.Path = "/" + u.Path
  223. }
  224. // Special case if the last segment was a dot, make sure the path ends with a slash
  225. if lastIsDot && !strings.HasSuffix(u.Path, "/") {
  226. u.Path += "/"
  227. }
  228. }
  229. }
  230. func removeDirectoryIndex(u *url.URL) {
  231. if len(u.Path) > 0 {
  232. u.Path = rxDirIndex.ReplaceAllString(u.Path, "$1")
  233. }
  234. }
  235. func removeFragment(u *url.URL) {
  236. u.Fragment = ""
  237. }
  238. func forceHTTP(u *url.URL) {
  239. if strings.ToLower(u.Scheme) == "https" {
  240. u.Scheme = "http"
  241. }
  242. }
  243. func removeDuplicateSlashes(u *url.URL) {
  244. if len(u.Path) > 0 {
  245. u.Path = rxDupSlashes.ReplaceAllString(u.Path, "/")
  246. }
  247. }
  248. func removeWWW(u *url.URL) {
  249. if len(u.Host) > 0 && strings.HasPrefix(strings.ToLower(u.Host), "www.") {
  250. u.Host = u.Host[4:]
  251. }
  252. }
  253. func addWWW(u *url.URL) {
  254. if len(u.Host) > 0 && !strings.HasPrefix(strings.ToLower(u.Host), "www.") {
  255. u.Host = "www." + u.Host
  256. }
  257. }
  258. func sortQuery(u *url.URL) {
  259. q := u.Query()
  260. if len(q) > 0 {
  261. arKeys := make([]string, len(q))
  262. i := 0
  263. for k, _ := range q {
  264. arKeys[i] = k
  265. i++
  266. }
  267. sort.Strings(arKeys)
  268. buf := new(bytes.Buffer)
  269. for _, k := range arKeys {
  270. sort.Strings(q[k])
  271. for _, v := range q[k] {
  272. if buf.Len() > 0 {
  273. buf.WriteRune('&')
  274. }
  275. buf.WriteString(fmt.Sprintf("%s=%s", k, urlesc.QueryEscape(v)))
  276. }
  277. }
  278. // Rebuild the raw query string
  279. u.RawQuery = buf.String()
  280. }
  281. }
  282. func decodeDWORDHost(u *url.URL) {
  283. if len(u.Host) > 0 {
  284. if matches := rxDWORDHost.FindStringSubmatch(u.Host); len(matches) > 2 {
  285. var parts [4]int64
  286. dword, _ := strconv.ParseInt(matches[1], 10, 0)
  287. for i, shift := range []uint{24, 16, 8, 0} {
  288. parts[i] = dword >> shift & 0xFF
  289. }
  290. u.Host = fmt.Sprintf("%d.%d.%d.%d%s", parts[0], parts[1], parts[2], parts[3], matches[2])
  291. }
  292. }
  293. }
  294. func decodeOctalHost(u *url.URL) {
  295. if len(u.Host) > 0 {
  296. if matches := rxOctalHost.FindStringSubmatch(u.Host); len(matches) > 5 {
  297. var parts [4]int64
  298. for i := 1; i <= 4; i++ {
  299. parts[i-1], _ = strconv.ParseInt(matches[i], 8, 0)
  300. }
  301. u.Host = fmt.Sprintf("%d.%d.%d.%d%s", parts[0], parts[1], parts[2], parts[3], matches[5])
  302. }
  303. }
  304. }
  305. func decodeHexHost(u *url.URL) {
  306. if len(u.Host) > 0 {
  307. if matches := rxHexHost.FindStringSubmatch(u.Host); len(matches) > 2 {
  308. // Conversion is safe because of regex validation
  309. parsed, _ := strconv.ParseInt(matches[1], 16, 0)
  310. // Set host as DWORD (base 10) encoded host
  311. u.Host = fmt.Sprintf("%d%s", parsed, matches[2])
  312. // The rest is the same as decoding a DWORD host
  313. decodeDWORDHost(u)
  314. }
  315. }
  316. }
  317. func removeUnncessaryHostDots(u *url.URL) {
  318. if len(u.Host) > 0 {
  319. if matches := rxHostDots.FindStringSubmatch(u.Host); len(matches) > 1 {
  320. // Trim the leading and trailing dots
  321. u.Host = strings.Trim(matches[1], ".")
  322. if len(matches) > 2 {
  323. u.Host += matches[2]
  324. }
  325. }
  326. }
  327. }
  328. func removeEmptyPortSeparator(u *url.URL) {
  329. if len(u.Host) > 0 {
  330. u.Host = rxEmptyPort.ReplaceAllString(u.Host, "")
  331. }
  332. }