/lib/uri/rfc3986_parser.rb

https://github.com/abwinkler999/ruby · Ruby · 105 lines · 93 code · 10 blank · 2 comment · 12 complexity · e76c91f5a30b4ac207beb3521cd6b0bb MD5 · raw file

  1. module URI
  2. class RFC3986_Parser # :nodoc:
  3. # URI defined in RFC3986
  4. # this regexp is modified not to host is not empty string
  5. RFC3986_URI = /\A(?<URI>(?<scheme>[A-Za-z][+\-.0-9A-Za-z]*):(?<hier-part>\/\/(?<authority>(?:(?<userinfo>(?:%\h\h|[!$&-.0-;=A-Z_a-z~])*)@)?(?<host>(?<IP-literal>\[(?:(?<IPv6address>(?:\h{1,4}:){6}(?<ls32>\h{1,4}:\h{1,4}|(?<IPv4address>(?<dec-octet>[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]|\d)\.\g<dec-octet>\.\g<dec-octet>\.\g<dec-octet>))|::(?:\h{1,4}:){5}\g<ls32>|\h{1,4}?::(?:\h{1,4}:){4}\g<ls32>|(?:(?:\h{1,4}:)?\h{1,4})?::(?:\h{1,4}:){3}\g<ls32>|(?:(?:\h{1,4}:){,2}\h{1,4})?::(?:\h{1,4}:){2}\g<ls32>|(?:(?:\h{1,4}:){,3}\h{1,4})?::\h{1,4}:\g<ls32>|(?:(?:\h{1,4}:){,4}\h{1,4})?::\g<ls32>|(?:(?:\h{1,4}:){,5}\h{1,4})?::\h{1,4}|(?:(?:\h{1,4}:){,6}\h{1,4})?::)|(?<IPvFuture>v\h+\.[!$&-.0-;=A-Z_a-z~]+))\])|\g<IPv4address>|(?<reg-name>(?:%\h\h|[!$&-.0-9;=A-Z_a-z~])+))?(?::(?<port>\d*))?)(?<path-abempty>(?:\/(?<segment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*))*)|(?<path-absolute>\/(?:(?<segment-nz>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])+)(?:\/\g<segment>)*)?)|(?<path-rootless>\g<segment-nz>(?:\/\g<segment>)*)|(?<path-empty>))(?:\?(?<query>(?:%\h\h|[!$&-.0-;=@-Z_a-z~\/?])*))?(?:\#(?<fragment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~\/?])*))?)\z/
  6. RFC3986_relative_ref = /\A(?<relative-ref>(?<relative-part>\/\/(?<authority>(?:(?<userinfo>(?:%\h\h|[!$&-.0-;=A-Z_a-z~])*)@)?(?<host>(?<IP-literal>\[(?<IPv6address>(?:\h{1,4}:){6}(?<ls32>\h{1,4}:\h{1,4}|(?<IPv4address>(?<dec-octet>[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]|\d)\.\g<dec-octet>\.\g<dec-octet>\.\g<dec-octet>))|::(?:\h{1,4}:){5}\g<ls32>|\h{1,4}?::(?:\h{1,4}:){4}\g<ls32>|(?:(?:\h{1,4}:){,1}\h{1,4})?::(?:\h{1,4}:){3}\g<ls32>|(?:(?:\h{1,4}:){,2}\h{1,4})?::(?:\h{1,4}:){2}\g<ls32>|(?:(?:\h{1,4}:){,3}\h{1,4})?::\h{1,4}:\g<ls32>|(?:(?:\h{1,4}:){,4}\h{1,4})?::\g<ls32>|(?:(?:\h{1,4}:){,5}\h{1,4})?::\h{1,4}|(?:(?:\h{1,4}:){,6}\h{1,4})?::)|(?<IPvFuture>v\h+\.[!$&-.0-;=A-Z_a-z~]+)\])|\g<IPv4address>|(?<reg-name>(?:%\h\h|[!$&-.0-9;=A-Z_a-z~])+))?(?::(?<port>\d*))?)(?<path-abempty>(?:\/(?<segment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*))*)|(?<path-absolute>\/(?:(?<segment-nz>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])+)(?:\/\g<segment>)*)?)|(?<path-noscheme>(?<segment-nz-nc>(?:%\h\h|[!$&-.0-9;=@-Z_a-z~])+)(?:\/\g<segment>)*)|(?<path-empty>))(?:\?(?<query>(?:%\h\h|[!$&-.0-;=@-Z_a-z~\/?])*))?(?:\#(?<fragment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~\/?])*))?)\z/
  7. def split(uri) #:nodoc:
  8. uri = uri.to_str
  9. unless uri.ascii_only?
  10. raise InvalidURIError, "URI must be ascii only #{uri.dump}"
  11. end
  12. if m = RFC3986_URI.match(uri)
  13. ary = []
  14. ary << m["scheme"]
  15. if m["path-rootless"] # opaque
  16. ary << nil # userinfo
  17. ary << nil # host
  18. ary << nil # port
  19. ary << nil # registry
  20. ary << nil # path
  21. ary << m["path-rootless"]
  22. ary[-1] << '?' << m["query"] if m["query"]
  23. ary << nil # query
  24. ary << m["fragment"]
  25. else # normal
  26. ary << m["userinfo"]
  27. ary << m["host"]
  28. ary << m["port"]
  29. ary << nil # registry
  30. ary << (m["path-abempty"] || m["path-absolute"] || m["path-empty"])
  31. ary << nil # opaque
  32. ary << m["query"]
  33. ary << m["fragment"]
  34. end
  35. elsif m = RFC3986_relative_ref.match(uri)
  36. ary = [nil]
  37. ary << m["userinfo"]
  38. ary << m["host"]
  39. ary << m["port"]
  40. ary << nil # registry
  41. ary << (m["path-abempty"] || m["path-absolute"] || m["path-noscheme"] || m["path-empty"])
  42. ary << nil # opaque
  43. ary << m["query"]
  44. ary << m["fragment"]
  45. else
  46. raise InvalidURIError, "bad URI(is not URI?): #{uri}"
  47. end
  48. end
  49. def parse(uri) # :nodoc:
  50. scheme, userinfo, host, port,
  51. registry, path, opaque, query, fragment = self.split(uri)
  52. if scheme && URI.scheme_list.include?(scheme.upcase)
  53. URI.scheme_list[scheme.upcase].new(scheme, userinfo, host, port,
  54. registry, path, opaque, query,
  55. fragment, self)
  56. else
  57. Generic.new(scheme, userinfo, host, port,
  58. registry, path, opaque, query,
  59. fragment, self)
  60. end
  61. end
  62. def join(*uris) # :nodoc:
  63. uris[0] = convert_to_uri(uris[0])
  64. uris.inject :merge
  65. end
  66. @@to_s = Kernel.instance_method(:to_s)
  67. def inspect
  68. @@to_s.bind(self).call
  69. end
  70. def regexp
  71. {
  72. SCHEME: /\A[A-Za-z][A-Za-z0-9+\-.]*\z/,
  73. USERINFO: /\A(?:%\h\h|[!$&-.0-;=A-Z_a-z~])*\z/,
  74. HOST: /\A(?:(?<IP-literal>\[(?:(?<IPv6address>(?:\h{1,4}:){6}(?<ls32>\h{1,4}:\h{1,4}|(?<IPv4address>(?<dec-octet>[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]|\d)\.\g<dec-octet>\.\g<dec-octet>\.\g<dec-octet>))|::(?:\h{1,4}:){5}\g<ls32>|\h{,4}::(?:\h{1,4}:){4}\g<ls32>|(?:(?:\h{1,4}:)?\h{1,4})?::(?:\h{1,4}:){3}\g<ls32>|(?:(?:\h{1,4}:){,2}\h{1,4})?::(?:\h{1,4}:){2}\g<ls32>|(?:(?:\h{1,4}:){,3}\h{1,4})?::\h{1,4}:\g<ls32>|(?:(?:\h{1,4}:){,4}\h{1,4})?::\g<ls32>|(?:(?:\h{1,4}:){,5}\h{1,4})?::\h{1,4}|(?:(?:\h{1,4}:){,6}\h{1,4})?::)|(?<IPvFuture>v\h+\.[!$&-.0-;=A-Z_a-z~]+))\])|\g<IPv4address>|(?<reg-name>(?:%\h\h|[!$&-.0-9;=A-Z_a-z~])*))\z/,
  75. ABS_PATH: /\A\/(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*(?:\/(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*)*\z/,
  76. REL_PATH: /\A(?:%\h\h|[!$&-.0-;=@-Z_a-z~])+(?:\/(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*)*\z/,
  77. QUERY: /\A(?:%\h\h|[!$&-.0-;=@-Z_a-z~\/?])*\z/,
  78. FRAGMENT: /\A(?:%\h\h|[!$&-.0-;=@-Z_a-z~\/?])*\z/,
  79. OPAQUE: nil,
  80. PORT: /\A[\x09\x0a\x0c\x0d ]*\d*[\x09\x0a\x0c\x0d ]*\z/,
  81. }
  82. end
  83. private
  84. def convert_to_uri(uri)
  85. if uri.is_a?(URI::Generic)
  86. uri
  87. elsif uri = String.try_convert(uri)
  88. parse(uri)
  89. else
  90. raise ArgumentError,
  91. "bad argument (expected URI object or URI string)"
  92. end
  93. end
  94. end # class Parser
  95. end # module URI