PageRenderTime 68ms CodeModel.GetById 27ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/addressable/uri.rb

https://github.com/bolshakov/addressable
Ruby | 2337 lines | 1662 code | 121 blank | 554 comment | 213 complexity | dd74655e5bda72abe8dc95a7d76a10b4 MD5 | raw file
Possible License(s): Apache-2.0
  1. # encoding:utf-8
  2. #--
  3. # Copyright (C) 2006-2013 Bob Aman
  4. #
  5. # Licensed under the Apache License, Version 2.0 (the "License");
  6. # you may not use this file except in compliance with the License.
  7. # You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. #++
  17. require "addressable/version"
  18. require "addressable/idna"
  19. ##
  20. # Addressable is a library for processing links and URIs.
  21. module Addressable
  22. ##
  23. # This is an implementation of a URI parser based on
  24. # <a href="http://www.ietf.org/rfc/rfc3986.txt">RFC 3986</a>,
  25. # <a href="http://www.ietf.org/rfc/rfc3987.txt">RFC 3987</a>.
  26. class URI
  27. ##
  28. # Raised if something other than a uri is supplied.
  29. class InvalidURIError < StandardError
  30. end
  31. ##
  32. # Container for the character classes specified in
  33. # <a href="http://www.ietf.org/rfc/rfc3986.txt">RFC 3986</a>.
  34. module CharacterClasses
  35. ALPHA = "a-zA-Z"
  36. DIGIT = "0-9"
  37. GEN_DELIMS = "\\:\\/\\?\\#\\[\\]\\@"
  38. SUB_DELIMS = "\\!\\$\\&\\'\\(\\)\\*\\+\\,\\;\\="
  39. RESERVED = GEN_DELIMS + SUB_DELIMS
  40. UNRESERVED = ALPHA + DIGIT + "\\-\\.\\_\\~"
  41. PCHAR = UNRESERVED + SUB_DELIMS + "\\:\\@"
  42. SCHEME = ALPHA + DIGIT + "\\-\\+\\."
  43. AUTHORITY = PCHAR
  44. PATH = PCHAR + "\\/"
  45. QUERY = PCHAR + "\\/\\?"
  46. FRAGMENT = PCHAR + "\\/\\?"
  47. end
  48. SLASH = '/'
  49. EMPTY_STR = ''
  50. URIREGEX = /^(([^:\/?#]+):)?(\/\/([^\/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?$/
  51. PORT_MAPPING = {
  52. "http" => 80,
  53. "https" => 443,
  54. "ftp" => 21,
  55. "tftp" => 69,
  56. "sftp" => 22,
  57. "ssh" => 22,
  58. "svn+ssh" => 22,
  59. "telnet" => 23,
  60. "nntp" => 119,
  61. "gopher" => 70,
  62. "wais" => 210,
  63. "ldap" => 389,
  64. "prospero" => 1525
  65. }
  66. ##
  67. # Returns a URI object based on the parsed string.
  68. #
  69. # @param [String, Addressable::URI, #to_str] uri
  70. # The URI string to parse.
  71. # No parsing is performed if the object is already an
  72. # <code>Addressable::URI</code>.
  73. #
  74. # @return [Addressable::URI] The parsed URI.
  75. def self.parse(uri)
  76. # If we were given nil, return nil.
  77. return nil unless uri
  78. # If a URI object is passed, just return itself.
  79. return uri.dup if uri.kind_of?(self)
  80. # If a URI object of the Ruby standard library variety is passed,
  81. # convert it to a string, then parse the string.
  82. # We do the check this way because we don't want to accidentally
  83. # cause a missing constant exception to be thrown.
  84. if uri.class.name =~ /^URI\b/
  85. uri = uri.to_s
  86. end
  87. # Otherwise, convert to a String
  88. begin
  89. uri = uri.to_str
  90. rescue TypeError, NoMethodError
  91. raise TypeError, "Can't convert #{uri.class} into String."
  92. end if not uri.is_a? String
  93. # This Regexp supplied as an example in RFC 3986, and it works great.
  94. scan = uri.scan(URIREGEX)
  95. fragments = scan[0]
  96. scheme = fragments[1]
  97. authority = fragments[3]
  98. path = fragments[4]
  99. query = fragments[6]
  100. fragment = fragments[8]
  101. user = nil
  102. password = nil
  103. host = nil
  104. port = nil
  105. if authority != nil
  106. # The Regexp above doesn't split apart the authority.
  107. userinfo = authority[/^([^\[\]]*)@/, 1]
  108. if userinfo != nil
  109. user = userinfo.strip[/^([^:]*):?/, 1]
  110. password = userinfo.strip[/:(.*)$/, 1]
  111. end
  112. host = authority.gsub(
  113. /^([^\[\]]*)@/, EMPTY_STR
  114. ).gsub(
  115. /:([^:@\[\]]*?)$/, EMPTY_STR
  116. )
  117. port = authority[/:([^:@\[\]]*?)$/, 1]
  118. end
  119. if port == EMPTY_STR
  120. port = nil
  121. end
  122. return new(
  123. :scheme => scheme,
  124. :user => user,
  125. :password => password,
  126. :host => host,
  127. :port => port,
  128. :path => path,
  129. :query => query,
  130. :fragment => fragment
  131. )
  132. end
  133. ##
  134. # Converts an input to a URI. The input does not have to be a valid
  135. # URI — the method will use heuristics to guess what URI was intended.
  136. # This is not standards-compliant, merely user-friendly.
  137. #
  138. # @param [String, Addressable::URI, #to_str] uri
  139. # The URI string to parse.
  140. # No parsing is performed if the object is already an
  141. # <code>Addressable::URI</code>.
  142. # @param [Hash] hints
  143. # A <code>Hash</code> of hints to the heuristic parser.
  144. # Defaults to <code>{:scheme => "http"}</code>.
  145. #
  146. # @return [Addressable::URI] The parsed URI.
  147. def self.heuristic_parse(uri, hints={})
  148. # If we were given nil, return nil.
  149. return nil unless uri
  150. # If a URI object is passed, just return itself.
  151. return uri.dup if uri.kind_of?(self)
  152. if !uri.respond_to?(:to_str)
  153. raise TypeError, "Can't convert #{uri.class} into String."
  154. end
  155. # Otherwise, convert to a String
  156. uri = uri.to_str.dup
  157. hints = {
  158. :scheme => "http"
  159. }.merge(hints)
  160. case uri
  161. when /^http:\/+/
  162. uri.gsub!(/^http:\/+/, "http://")
  163. when /^https:\/+/
  164. uri.gsub!(/^https:\/+/, "https://")
  165. when /^feed:\/+http:\/+/
  166. uri.gsub!(/^feed:\/+http:\/+/, "feed:http://")
  167. when /^feed:\/+/
  168. uri.gsub!(/^feed:\/+/, "feed://")
  169. when /^file:\/+/
  170. uri.gsub!(/^file:\/+/, "file:///")
  171. when /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/
  172. uri.gsub!(/^/, hints[:scheme] + "://")
  173. end
  174. parsed = self.parse(uri)
  175. if parsed.scheme =~ /^[^\/?#\.]+\.[^\/?#]+$/
  176. parsed = self.parse(hints[:scheme] + "://" + uri)
  177. end
  178. if parsed.path.include?(".")
  179. new_host = parsed.path[/^([^\/]+\.[^\/]*)/, 1]
  180. if new_host
  181. parsed.defer_validation do
  182. new_path = parsed.path.gsub(
  183. Regexp.new("^" + Regexp.escape(new_host)), EMPTY_STR)
  184. parsed.host = new_host
  185. parsed.path = new_path
  186. parsed.scheme = hints[:scheme] unless parsed.scheme
  187. end
  188. end
  189. end
  190. return parsed
  191. end
  192. ##
  193. # Converts a path to a file scheme URI. If the path supplied is
  194. # relative, it will be returned as a relative URI. If the path supplied
  195. # is actually a non-file URI, it will parse the URI as if it had been
  196. # parsed with <code>Addressable::URI.parse</code>. Handles all of the
  197. # various Microsoft-specific formats for specifying paths.
  198. #
  199. # @param [String, Addressable::URI, #to_str] path
  200. # Typically a <code>String</code> path to a file or directory, but
  201. # will return a sensible return value if an absolute URI is supplied
  202. # instead.
  203. #
  204. # @return [Addressable::URI]
  205. # The parsed file scheme URI or the original URI if some other URI
  206. # scheme was provided.
  207. #
  208. # @example
  209. # base = Addressable::URI.convert_path("/absolute/path/")
  210. # uri = Addressable::URI.convert_path("relative/path")
  211. # (base + uri).to_s
  212. # #=> "file:///absolute/path/relative/path"
  213. #
  214. # Addressable::URI.convert_path(
  215. # "c:\\windows\\My Documents 100%20\\foo.txt"
  216. # ).to_s
  217. # #=> "file:///c:/windows/My%20Documents%20100%20/foo.txt"
  218. #
  219. # Addressable::URI.convert_path("http://example.com/").to_s
  220. # #=> "http://example.com/"
  221. def self.convert_path(path)
  222. # If we were given nil, return nil.
  223. return nil unless path
  224. # If a URI object is passed, just return itself.
  225. return path if path.kind_of?(self)
  226. if !path.respond_to?(:to_str)
  227. raise TypeError, "Can't convert #{path.class} into String."
  228. end
  229. # Otherwise, convert to a String
  230. path = path.to_str.strip
  231. path.gsub!(/^file:\/?\/?/, EMPTY_STR) if path =~ /^file:\/?\/?/
  232. path = SLASH + path if path =~ /^([a-zA-Z])[\|:]/
  233. uri = self.parse(path)
  234. if uri.scheme == nil
  235. # Adjust windows-style uris
  236. uri.path.gsub!(/^\/?([a-zA-Z])[\|:][\\\/]/) do
  237. "/#{$1.downcase}:/"
  238. end
  239. uri.path.gsub!(/\\/, SLASH)
  240. if File.exists?(uri.path) &&
  241. File.stat(uri.path).directory?
  242. uri.path.gsub!(/\/$/, EMPTY_STR)
  243. uri.path = uri.path + '/'
  244. end
  245. # If the path is absolute, set the scheme and host.
  246. if uri.path =~ /^\//
  247. uri.scheme = "file"
  248. uri.host = EMPTY_STR
  249. end
  250. uri.normalize!
  251. end
  252. return uri
  253. end
  254. ##
  255. # Joins several URIs together.
  256. #
  257. # @param [String, Addressable::URI, #to_str] *uris
  258. # The URIs to join.
  259. #
  260. # @return [Addressable::URI] The joined URI.
  261. #
  262. # @example
  263. # base = "http://example.com/"
  264. # uri = Addressable::URI.parse("relative/path")
  265. # Addressable::URI.join(base, uri)
  266. # #=> #<Addressable::URI:0xcab390 URI:http://example.com/relative/path>
  267. def self.join(*uris)
  268. uri_objects = uris.collect do |uri|
  269. if !uri.respond_to?(:to_str)
  270. raise TypeError, "Can't convert #{uri.class} into String."
  271. end
  272. uri.kind_of?(self) ? uri : self.parse(uri.to_str)
  273. end
  274. result = uri_objects.shift.dup
  275. for uri in uri_objects
  276. result.join!(uri)
  277. end
  278. return result
  279. end
  280. ##
  281. # Percent encodes a URI component.
  282. #
  283. # @param [String, #to_str] component The URI component to encode.
  284. #
  285. # @param [String, Regexp] character_class
  286. # The characters which are not percent encoded. If a <code>String</code>
  287. # is passed, the <code>String</code> must be formatted as a regular
  288. # expression character class. (Do not include the surrounding square
  289. # brackets.) For example, <code>"b-zB-Z0-9"</code> would cause
  290. # everything but the letters 'b' through 'z' and the numbers '0' through
  291. # '9' to be percent encoded. If a <code>Regexp</code> is passed, the
  292. # value <code>/[^b-zB-Z0-9]/</code> would have the same effect. A set of
  293. # useful <code>String</code> values may be found in the
  294. # <code>Addressable::URI::CharacterClasses</code> module. The default
  295. # value is the reserved plus unreserved character classes specified in
  296. # <a href="http://www.ietf.org/rfc/rfc3986.txt">RFC 3986</a>.
  297. #
  298. # @param [Regexp] upcase_encoded
  299. # A string of characters that may already be percent encoded, and whose
  300. # encodings should be upcased. This allows normalization of percent
  301. # encodings for characters not included in the
  302. # <code>character_class</code>.
  303. #
  304. # @return [String] The encoded component.
  305. #
  306. # @example
  307. # Addressable::URI.encode_component("simple/example", "b-zB-Z0-9")
  308. # => "simple%2Fex%61mple"
  309. # Addressable::URI.encode_component("simple/example", /[^b-zB-Z0-9]/)
  310. # => "simple%2Fex%61mple"
  311. # Addressable::URI.encode_component(
  312. # "simple/example", Addressable::URI::CharacterClasses::UNRESERVED
  313. # )
  314. # => "simple%2Fexample"
  315. def self.encode_component(component, character_class=
  316. CharacterClasses::RESERVED + CharacterClasses::UNRESERVED,
  317. upcase_encoded='')
  318. return nil if component.nil?
  319. begin
  320. if component.kind_of?(Symbol) ||
  321. component.kind_of?(Numeric) ||
  322. component.kind_of?(TrueClass) ||
  323. component.kind_of?(FalseClass)
  324. component = component.to_s
  325. else
  326. component = component.to_str
  327. end
  328. rescue TypeError, NoMethodError
  329. raise TypeError, "Can't convert #{component.class} into String."
  330. end if !component.is_a? String
  331. if ![String, Regexp].include?(character_class.class)
  332. raise TypeError,
  333. "Expected String or Regexp, got #{character_class.inspect}"
  334. end
  335. if character_class.kind_of?(String)
  336. character_class = /[^#{character_class}]/
  337. end
  338. if component.respond_to?(:force_encoding)
  339. # We can't perform regexps on invalid UTF sequences, but
  340. # here we need to, so switch to ASCII.
  341. component = component.dup
  342. component.force_encoding(Encoding::ASCII_8BIT)
  343. end
  344. # Avoiding gsub! because there are edge cases with frozen strings
  345. component = component.gsub(character_class) do |sequence|
  346. (sequence.unpack('C*').map { |c| "%" + ("%02x" % c).upcase }).join
  347. end
  348. if upcase_encoded.length > 0
  349. component = component.gsub(/%(#{upcase_encoded.chars.map do |char|
  350. char.unpack('C*').map { |c| '%02x' % c }.join
  351. end.join('|')})/i) { |s| s.upcase }
  352. end
  353. return component
  354. end
  355. class << self
  356. alias_method :encode_component, :encode_component
  357. end
  358. ##
  359. # Unencodes any percent encoded characters within a URI component.
  360. # This method may be used for unencoding either components or full URIs,
  361. # however, it is recommended to use the <code>unencode_component</code>
  362. # alias when unencoding components.
  363. #
  364. # @param [String, Addressable::URI, #to_str] uri
  365. # The URI or component to unencode.
  366. #
  367. # @param [Class] return_type
  368. # The type of object to return.
  369. # This value may only be set to <code>String</code> or
  370. # <code>Addressable::URI</code>. All other values are invalid. Defaults
  371. # to <code>String</code>.
  372. #
  373. # @param [String] leave_encoded
  374. # A string of characters to leave encoded. If a percent encoded character
  375. # in this list is encountered then it will remain percent encoded.
  376. #
  377. # @return [String, Addressable::URI]
  378. # The unencoded component or URI.
  379. # The return type is determined by the <code>return_type</code>
  380. # parameter.
  381. def self.unencode(uri, return_type=String, leave_encoded='')
  382. return nil if uri.nil?
  383. begin
  384. uri = uri.to_str
  385. rescue NoMethodError, TypeError
  386. raise TypeError, "Can't convert #{uri.class} into String."
  387. end if !uri.is_a? String
  388. if ![String, ::Addressable::URI].include?(return_type)
  389. raise TypeError,
  390. "Expected Class (String or Addressable::URI), " +
  391. "got #{return_type.inspect}"
  392. end
  393. result = uri.gsub(/%[0-9a-f]{2}/i) do |sequence|
  394. c = sequence[1..3].to_i(16).chr
  395. leave_encoded.include?(c) ? sequence : c
  396. end
  397. result.force_encoding("utf-8") if result.respond_to?(:force_encoding)
  398. if return_type == String
  399. return result
  400. elsif return_type == ::Addressable::URI
  401. return ::Addressable::URI.parse(result)
  402. end
  403. end
  404. class << self
  405. alias_method :unescape, :unencode
  406. alias_method :unencode_component, :unencode
  407. alias_method :unescape_component, :unencode
  408. end
  409. ##
  410. # Normalizes the encoding of a URI component.
  411. #
  412. # @param [String, #to_str] component The URI component to encode.
  413. #
  414. # @param [String, Regexp] character_class
  415. # The characters which are not percent encoded. If a <code>String</code>
  416. # is passed, the <code>String</code> must be formatted as a regular
  417. # expression character class. (Do not include the surrounding square
  418. # brackets.) For example, <code>"b-zB-Z0-9"</code> would cause
  419. # everything but the letters 'b' through 'z' and the numbers '0'
  420. # through '9' to be percent encoded. If a <code>Regexp</code> is passed,
  421. # the value <code>/[^b-zB-Z0-9]/</code> would have the same effect. A
  422. # set of useful <code>String</code> values may be found in the
  423. # <code>Addressable::URI::CharacterClasses</code> module. The default
  424. # value is the reserved plus unreserved character classes specified in
  425. # <a href="http://www.ietf.org/rfc/rfc3986.txt">RFC 3986</a>.
  426. #
  427. # @param [String] leave_encoded
  428. # When <code>character_class</code> is a <code>String</code> then
  429. # <code>leave_encoded</code> is a string of characters that should remain
  430. # percent encoded while normalizing the component; if they appear percent
  431. # encoded in the original component, then they will be upcased ("%2f"
  432. # normalized to "%2F") but otherwise left alone.
  433. #
  434. # @return [String] The normalized component.
  435. #
  436. # @example
  437. # Addressable::URI.normalize_component("simpl%65/%65xampl%65", "b-zB-Z")
  438. # => "simple%2Fex%61mple"
  439. # Addressable::URI.normalize_component(
  440. # "simpl%65/%65xampl%65", /[^b-zB-Z]/
  441. # )
  442. # => "simple%2Fex%61mple"
  443. # Addressable::URI.normalize_component(
  444. # "simpl%65/%65xampl%65",
  445. # Addressable::URI::CharacterClasses::UNRESERVED
  446. # )
  447. # => "simple%2Fexample"
  448. # Addressable::URI.normalize_component(
  449. # "one%20two%2fthree%26four",
  450. # "0-9a-zA-Z &/",
  451. # "/"
  452. # )
  453. # => "one two%2Fthree&four"
  454. def self.normalize_component(component, character_class=
  455. CharacterClasses::RESERVED + CharacterClasses::UNRESERVED,
  456. leave_encoded='')
  457. return nil if component.nil?
  458. begin
  459. component = component.to_str
  460. rescue NoMethodError, TypeError
  461. raise TypeError, "Can't convert #{component.class} into String."
  462. end if !component.is_a? String
  463. if ![String, Regexp].include?(character_class.class)
  464. raise TypeError,
  465. "Expected String or Regexp, got #{character_class.inspect}"
  466. end
  467. if character_class.kind_of?(String)
  468. leave_re = if leave_encoded.length > 0
  469. character_class << '%'
  470. "|%(?!#{leave_encoded.chars.map do |char|
  471. seq = char.unpack('C*').map { |c| '%02x' % c }.join
  472. [seq.upcase, seq.downcase]
  473. end.flatten.join('|')})"
  474. end
  475. character_class = /[^#{character_class}]#{leave_re}/
  476. end
  477. if component.respond_to?(:force_encoding)
  478. # We can't perform regexps on invalid UTF sequences, but
  479. # here we need to, so switch to ASCII.
  480. component = component.dup
  481. component.force_encoding(Encoding::ASCII_8BIT)
  482. end
  483. unencoded = self.unencode_component(component, String, leave_encoded)
  484. begin
  485. encoded = self.encode_component(
  486. Addressable::IDNA.unicode_normalize_kc(unencoded),
  487. character_class,
  488. leave_encoded
  489. )
  490. rescue ArgumentError
  491. encoded = self.encode_component(unencoded)
  492. end
  493. return encoded
  494. end
  495. ##
  496. # Percent encodes any special characters in the URI.
  497. #
  498. # @param [String, Addressable::URI, #to_str] uri
  499. # The URI to encode.
  500. #
  501. # @param [Class] return_type
  502. # The type of object to return.
  503. # This value may only be set to <code>String</code> or
  504. # <code>Addressable::URI</code>. All other values are invalid. Defaults
  505. # to <code>String</code>.
  506. #
  507. # @return [String, Addressable::URI]
  508. # The encoded URI.
  509. # The return type is determined by the <code>return_type</code>
  510. # parameter.
  511. def self.encode(uri, return_type=String)
  512. return nil if uri.nil?
  513. begin
  514. uri = uri.to_str
  515. rescue NoMethodError, TypeError
  516. raise TypeError, "Can't convert #{uri.class} into String."
  517. end if !uri.is_a? String
  518. if ![String, ::Addressable::URI].include?(return_type)
  519. raise TypeError,
  520. "Expected Class (String or Addressable::URI), " +
  521. "got #{return_type.inspect}"
  522. end
  523. uri_object = uri.kind_of?(self) ? uri : self.parse(uri)
  524. encoded_uri = Addressable::URI.new(
  525. :scheme => self.encode_component(uri_object.scheme,
  526. Addressable::URI::CharacterClasses::SCHEME),
  527. :authority => self.encode_component(uri_object.authority,
  528. Addressable::URI::CharacterClasses::AUTHORITY),
  529. :path => self.encode_component(uri_object.path,
  530. Addressable::URI::CharacterClasses::PATH),
  531. :query => self.encode_component(uri_object.query,
  532. Addressable::URI::CharacterClasses::QUERY),
  533. :fragment => self.encode_component(uri_object.fragment,
  534. Addressable::URI::CharacterClasses::FRAGMENT)
  535. )
  536. if return_type == String
  537. return encoded_uri.to_s
  538. elsif return_type == ::Addressable::URI
  539. return encoded_uri
  540. end
  541. end
  542. class << self
  543. alias_method :escape, :encode
  544. end
  545. ##
  546. # Normalizes the encoding of a URI. Characters within a hostname are
  547. # not percent encoded to allow for internationalized domain names.
  548. #
  549. # @param [String, Addressable::URI, #to_str] uri
  550. # The URI to encode.
  551. #
  552. # @param [Class] return_type
  553. # The type of object to return.
  554. # This value may only be set to <code>String</code> or
  555. # <code>Addressable::URI</code>. All other values are invalid. Defaults
  556. # to <code>String</code>.
  557. #
  558. # @return [String, Addressable::URI]
  559. # The encoded URI.
  560. # The return type is determined by the <code>return_type</code>
  561. # parameter.
  562. def self.normalized_encode(uri, return_type=String)
  563. begin
  564. uri = uri.to_str
  565. rescue NoMethodError, TypeError
  566. raise TypeError, "Can't convert #{uri.class} into String."
  567. end if !uri.is_a? String
  568. if ![String, ::Addressable::URI].include?(return_type)
  569. raise TypeError,
  570. "Expected Class (String or Addressable::URI), " +
  571. "got #{return_type.inspect}"
  572. end
  573. uri_object = uri.kind_of?(self) ? uri : self.parse(uri)
  574. components = {
  575. :scheme => self.unencode_component(uri_object.scheme),
  576. :user => self.unencode_component(uri_object.user),
  577. :password => self.unencode_component(uri_object.password),
  578. :host => self.unencode_component(uri_object.host),
  579. :port => (uri_object.port.nil? ? nil : uri_object.port.to_s),
  580. :path => self.unencode_component(uri_object.path),
  581. :query => self.unencode_component(uri_object.query),
  582. :fragment => self.unencode_component(uri_object.fragment)
  583. }
  584. components.each do |key, value|
  585. if value != nil
  586. begin
  587. components[key] =
  588. Addressable::IDNA.unicode_normalize_kc(value.to_str)
  589. rescue ArgumentError
  590. # Likely a malformed UTF-8 character, skip unicode normalization
  591. components[key] = value.to_str
  592. end
  593. end
  594. end
  595. encoded_uri = Addressable::URI.new(
  596. :scheme => self.encode_component(components[:scheme],
  597. Addressable::URI::CharacterClasses::SCHEME),
  598. :user => self.encode_component(components[:user],
  599. Addressable::URI::CharacterClasses::UNRESERVED),
  600. :password => self.encode_component(components[:password],
  601. Addressable::URI::CharacterClasses::UNRESERVED),
  602. :host => components[:host],
  603. :port => components[:port],
  604. :path => self.encode_component(components[:path],
  605. Addressable::URI::CharacterClasses::PATH),
  606. :query => self.encode_component(components[:query],
  607. Addressable::URI::CharacterClasses::QUERY),
  608. :fragment => self.encode_component(components[:fragment],
  609. Addressable::URI::CharacterClasses::FRAGMENT)
  610. )
  611. if return_type == String
  612. return encoded_uri.to_s
  613. elsif return_type == ::Addressable::URI
  614. return encoded_uri
  615. end
  616. end
  617. ##
  618. # Encodes a set of key/value pairs according to the rules for the
  619. # <code>application/x-www-form-urlencoded</code> MIME type.
  620. #
  621. # @param [#to_hash, #to_ary] form_values
  622. # The form values to encode.
  623. #
  624. # @param [TrueClass, FalseClass] sort
  625. # Sort the key/value pairs prior to encoding.
  626. # Defaults to <code>false</code>.
  627. #
  628. # @return [String]
  629. # The encoded value.
  630. def self.form_encode(form_values, sort=false)
  631. if form_values.respond_to?(:to_hash)
  632. form_values = form_values.to_hash.to_a
  633. elsif form_values.respond_to?(:to_ary)
  634. form_values = form_values.to_ary
  635. else
  636. raise TypeError, "Can't convert #{form_values.class} into Array."
  637. end
  638. form_values = form_values.inject([]) do |accu, (key, value)|
  639. if value.kind_of?(Array)
  640. value.each do |v|
  641. accu << [key.to_s, v.to_s]
  642. end
  643. else
  644. accu << [key.to_s, value.to_s]
  645. end
  646. accu
  647. end
  648. if sort
  649. # Useful for OAuth and optimizing caching systems
  650. form_values = form_values.sort
  651. end
  652. escaped_form_values = form_values.map do |(key, value)|
  653. # Line breaks are CRLF pairs
  654. [
  655. self.encode_component(
  656. key.gsub(/(\r\n|\n|\r)/, "\r\n"),
  657. CharacterClasses::UNRESERVED
  658. ).gsub("%20", "+"),
  659. self.encode_component(
  660. value.gsub(/(\r\n|\n|\r)/, "\r\n"),
  661. CharacterClasses::UNRESERVED
  662. ).gsub("%20", "+")
  663. ]
  664. end
  665. return (escaped_form_values.map do |(key, value)|
  666. "#{key}=#{value}"
  667. end).join("&")
  668. end
  669. ##
  670. # Decodes a <code>String</code> according to the rules for the
  671. # <code>application/x-www-form-urlencoded</code> MIME type.
  672. #
  673. # @param [String, #to_str] encoded_value
  674. # The form values to decode.
  675. #
  676. # @return [Array]
  677. # The decoded values.
  678. # This is not a <code>Hash</code> because of the possibility for
  679. # duplicate keys.
  680. def self.form_unencode(encoded_value)
  681. if !encoded_value.respond_to?(:to_str)
  682. raise TypeError, "Can't convert #{encoded_value.class} into String."
  683. end
  684. encoded_value = encoded_value.to_str
  685. split_values = encoded_value.split("&").map do |pair|
  686. pair.split("=", 2)
  687. end
  688. return split_values.map do |(key, value)|
  689. [
  690. key ? self.unencode_component(
  691. key.gsub("+", "%20")).gsub(/(\r\n|\n|\r)/, "\n") : nil,
  692. value ? (self.unencode_component(
  693. value.gsub("+", "%20")).gsub(/(\r\n|\n|\r)/, "\n")) : nil
  694. ]
  695. end
  696. end
  697. ##
  698. # Creates a new uri object from component parts.
  699. #
  700. # @option [String, #to_str] scheme The scheme component.
  701. # @option [String, #to_str] user The user component.
  702. # @option [String, #to_str] password The password component.
  703. # @option [String, #to_str] userinfo
  704. # The userinfo component. If this is supplied, the user and password
  705. # components must be omitted.
  706. # @option [String, #to_str] host The host component.
  707. # @option [String, #to_str] port The port component.
  708. # @option [String, #to_str] authority
  709. # The authority component. If this is supplied, the user, password,
  710. # userinfo, host, and port components must be omitted.
  711. # @option [String, #to_str] path The path component.
  712. # @option [String, #to_str] query The query component.
  713. # @option [String, #to_str] fragment The fragment component.
  714. #
  715. # @return [Addressable::URI] The constructed URI object.
  716. def initialize(options={})
  717. if options.has_key?(:authority)
  718. if (options.keys & [:userinfo, :user, :password, :host, :port]).any?
  719. raise ArgumentError,
  720. "Cannot specify both an authority and any of the components " +
  721. "within the authority."
  722. end
  723. end
  724. if options.has_key?(:userinfo)
  725. if (options.keys & [:user, :password]).any?
  726. raise ArgumentError,
  727. "Cannot specify both a userinfo and either the user or password."
  728. end
  729. end
  730. self.defer_validation do
  731. # Bunch of crazy logic required because of the composite components
  732. # like userinfo and authority.
  733. self.scheme = options[:scheme] if options[:scheme]
  734. self.user = options[:user] if options[:user]
  735. self.password = options[:password] if options[:password]
  736. self.userinfo = options[:userinfo] if options[:userinfo]
  737. self.host = options[:host] if options[:host]
  738. self.port = options[:port] if options[:port]
  739. self.authority = options[:authority] if options[:authority]
  740. self.path = options[:path] if options[:path]
  741. self.query = options[:query] if options[:query]
  742. self.query_values = options[:query_values] if options[:query_values]
  743. self.fragment = options[:fragment] if options[:fragment]
  744. end
  745. end
  746. ##
  747. # Freeze URI, initializing instance variables.
  748. #
  749. # @return [Addressable::URI] The frozen URI object.
  750. def freeze
  751. self.normalized_scheme
  752. self.normalized_user
  753. self.normalized_password
  754. self.normalized_userinfo
  755. self.normalized_host
  756. self.normalized_port
  757. self.normalized_authority
  758. self.normalized_site
  759. self.normalized_path
  760. self.normalized_query
  761. self.normalized_fragment
  762. self.hash
  763. super
  764. end
  765. ##
  766. # The scheme component for this URI.
  767. #
  768. # @return [String] The scheme component.
  769. def scheme
  770. return instance_variable_defined?(:@scheme) ? @scheme : nil
  771. end
  772. ##
  773. # The scheme component for this URI, normalized.
  774. #
  775. # @return [String] The scheme component, normalized.
  776. def normalized_scheme
  777. self.scheme && @normalized_scheme ||= (begin
  778. if self.scheme =~ /^\s*ssh\+svn\s*$/i
  779. "svn+ssh"
  780. else
  781. Addressable::URI.normalize_component(
  782. self.scheme.strip.downcase,
  783. Addressable::URI::CharacterClasses::SCHEME
  784. )
  785. end
  786. end)
  787. end
  788. ##
  789. # Sets the scheme component for this URI.
  790. #
  791. # @param [String, #to_str] new_scheme The new scheme component.
  792. def scheme=(new_scheme)
  793. if new_scheme && !new_scheme.respond_to?(:to_str)
  794. raise TypeError, "Can't convert #{new_scheme.class} into String."
  795. elsif new_scheme
  796. new_scheme = new_scheme.to_str
  797. end
  798. if new_scheme && new_scheme !~ /[a-z][a-z0-9\.\+\-]*/i
  799. raise InvalidURIError, "Invalid scheme format."
  800. end
  801. @scheme = new_scheme
  802. @scheme = nil if @scheme.to_s.strip.empty?
  803. # Reset dependant values
  804. @normalized_scheme = nil
  805. @uri_string = nil
  806. @hash = nil
  807. # Ensure we haven't created an invalid URI
  808. validate()
  809. end
  810. ##
  811. # The user component for this URI.
  812. #
  813. # @return [String] The user component.
  814. def user
  815. return instance_variable_defined?(:@user) ? @user : nil
  816. end
  817. ##
  818. # The user component for this URI, normalized.
  819. #
  820. # @return [String] The user component, normalized.
  821. def normalized_user
  822. self.user && @normalized_user ||= (begin
  823. if normalized_scheme =~ /https?/ && self.user.strip.empty? &&
  824. (!self.password || self.password.strip.empty?)
  825. nil
  826. else
  827. Addressable::URI.normalize_component(
  828. self.user.strip,
  829. Addressable::URI::CharacterClasses::UNRESERVED
  830. )
  831. end
  832. end)
  833. end
  834. ##
  835. # Sets the user component for this URI.
  836. #
  837. # @param [String, #to_str] new_user The new user component.
  838. def user=(new_user)
  839. if new_user && !new_user.respond_to?(:to_str)
  840. raise TypeError, "Can't convert #{new_user.class} into String."
  841. end
  842. @user = new_user ? new_user.to_str : nil
  843. # You can't have a nil user with a non-nil password
  844. if password != nil
  845. @user = EMPTY_STR if @user.nil?
  846. end
  847. # Reset dependant values
  848. @userinfo = nil
  849. @normalized_userinfo = nil
  850. @authority = nil
  851. @normalized_user = nil
  852. @uri_string = nil
  853. @hash = nil
  854. # Ensure we haven't created an invalid URI
  855. validate()
  856. end
  857. ##
  858. # The password component for this URI.
  859. #
  860. # @return [String] The password component.
  861. def password
  862. return instance_variable_defined?(:@password) ? @password : nil
  863. end
  864. ##
  865. # The password component for this URI, normalized.
  866. #
  867. # @return [String] The password component, normalized.
  868. def normalized_password
  869. self.password && @normalized_password ||= (begin
  870. if self.normalized_scheme =~ /https?/ && self.password.strip.empty? &&
  871. (!self.user || self.user.strip.empty?)
  872. nil
  873. else
  874. Addressable::URI.normalize_component(
  875. self.password.strip,
  876. Addressable::URI::CharacterClasses::UNRESERVED
  877. )
  878. end
  879. end)
  880. end
  881. ##
  882. # Sets the password component for this URI.
  883. #
  884. # @param [String, #to_str] new_password The new password component.
  885. def password=(new_password)
  886. if new_password && !new_password.respond_to?(:to_str)
  887. raise TypeError, "Can't convert #{new_password.class} into String."
  888. end
  889. @password = new_password ? new_password.to_str : nil
  890. # You can't have a nil user with a non-nil password
  891. @password ||= nil
  892. @user ||= nil
  893. if @password != nil
  894. @user = EMPTY_STR if @user.nil?
  895. end
  896. # Reset dependant values
  897. @userinfo = nil
  898. @normalized_userinfo = nil
  899. @authority = nil
  900. @normalized_password = nil
  901. @uri_string = nil
  902. @hash = nil
  903. # Ensure we haven't created an invalid URI
  904. validate()
  905. end
  906. ##
  907. # The userinfo component for this URI.
  908. # Combines the user and password components.
  909. #
  910. # @return [String] The userinfo component.
  911. def userinfo
  912. current_user = self.user
  913. current_password = self.password
  914. (current_user || current_password) && @userinfo ||= (begin
  915. if current_user && current_password
  916. "#{current_user}:#{current_password}"
  917. elsif current_user && !current_password
  918. "#{current_user}"
  919. end
  920. end)
  921. end
  922. ##
  923. # The userinfo component for this URI, normalized.
  924. #
  925. # @return [String] The userinfo component, normalized.
  926. def normalized_userinfo
  927. self.userinfo && @normalized_userinfo ||= (begin
  928. current_user = self.normalized_user
  929. current_password = self.normalized_password
  930. if !current_user && !current_password
  931. nil
  932. elsif current_user && current_password
  933. "#{current_user}:#{current_password}"
  934. elsif current_user && !current_password
  935. "#{current_user}"
  936. end
  937. end)
  938. end
  939. ##
  940. # Sets the userinfo component for this URI.
  941. #
  942. # @param [String, #to_str] new_userinfo The new userinfo component.
  943. def userinfo=(new_userinfo)
  944. if new_userinfo && !new_userinfo.respond_to?(:to_str)
  945. raise TypeError, "Can't convert #{new_userinfo.class} into String."
  946. end
  947. new_user, new_password = if new_userinfo
  948. [
  949. new_userinfo.to_str.strip[/^(.*):/, 1],
  950. new_userinfo.to_str.strip[/:(.*)$/, 1]
  951. ]
  952. else
  953. [nil, nil]
  954. end
  955. # Password assigned first to ensure validity in case of nil
  956. self.password = new_password
  957. self.user = new_user
  958. # Reset dependant values
  959. @authority = nil
  960. @uri_string = nil
  961. @hash = nil
  962. # Ensure we haven't created an invalid URI
  963. validate()
  964. end
  965. ##
  966. # The host component for this URI.
  967. #
  968. # @return [String] The host component.
  969. def host
  970. return instance_variable_defined?(:@host) ? @host : nil
  971. end
  972. ##
  973. # The host component for this URI, normalized.
  974. #
  975. # @return [String] The host component, normalized.
  976. def normalized_host
  977. self.host && @normalized_host ||= (begin
  978. if !self.host.strip.empty?
  979. result = ::Addressable::IDNA.to_ascii(
  980. URI.unencode_component(self.host.strip.downcase)
  981. )
  982. if result[-1..-1] == "."
  983. # Trailing dots are unnecessary
  984. result = result[0...-1]
  985. end
  986. result
  987. else
  988. EMPTY_STR
  989. end
  990. end)
  991. end
  992. ##
  993. # Sets the host component for this URI.
  994. #
  995. # @param [String, #to_str] new_host The new host component.
  996. def host=(new_host)
  997. if new_host && !new_host.respond_to?(:to_str)
  998. raise TypeError, "Can't convert #{new_host.class} into String."
  999. end
  1000. @host = new_host ? new_host.to_str : nil
  1001. unreserved = CharacterClasses::UNRESERVED
  1002. sub_delims = CharacterClasses::SUB_DELIMS
  1003. if @host != nil && (@host =~ /[<>{}\/\?\#\@]/ ||
  1004. (@host[/^\[(.*)\]$/, 1] != nil && @host[/^\[(.*)\]$/, 1] !~
  1005. Regexp.new("^[#{unreserved}#{sub_delims}:]*$")))
  1006. raise InvalidURIError, "Invalid character in host: '#{@host.to_s}'"
  1007. end
  1008. # Reset dependant values
  1009. @authority = nil
  1010. @normalized_host = nil
  1011. @uri_string = nil
  1012. @hash = nil
  1013. # Ensure we haven't created an invalid URI
  1014. validate()
  1015. end
  1016. ##
  1017. # This method is same as URI::Generic#host except
  1018. # brackets for IPv6 (and 'IPvFuture') addresses are removed.
  1019. #
  1020. # @see Addressable::URI#host
  1021. #
  1022. # @return [String] The hostname for this URI.
  1023. def hostname
  1024. v = self.host
  1025. /\A\[(.*)\]\z/ =~ v ? $1 : v
  1026. end
  1027. ##
  1028. # This method is same as URI::Generic#host= except
  1029. # the argument can be a bare IPv6 address (or 'IPvFuture').
  1030. #
  1031. # @see Addressable::URI#host=
  1032. #
  1033. # @param [String, #to_str] new_hostname The new hostname for this URI.
  1034. def hostname=(new_hostname)
  1035. if new_hostname && !new_hostname.respond_to?(:to_str)
  1036. raise TypeError, "Can't convert #{new_hostname.class} into String."
  1037. end
  1038. v = new_hostname ? new_hostname.to_str : nil
  1039. v = "[#{v}]" if /\A\[.*\]\z/ !~ v && /:/ =~ v
  1040. self.host = v
  1041. end
  1042. ##
  1043. # The authority component for this URI.
  1044. # Combines the user, password, host, and port components.
  1045. #
  1046. # @return [String] The authority component.
  1047. def authority
  1048. self.host && @authority ||= (begin
  1049. authority = ""
  1050. if self.userinfo != nil
  1051. authority << "#{self.userinfo}@"
  1052. end
  1053. authority << self.host
  1054. if self.port != nil
  1055. authority << ":#{self.port}"
  1056. end
  1057. authority
  1058. end)
  1059. end
  1060. ##
  1061. # The authority component for this URI, normalized.
  1062. #
  1063. # @return [String] The authority component, normalized.
  1064. def normalized_authority
  1065. self.authority && @normalized_authority ||= (begin
  1066. authority = ""
  1067. if self.normalized_userinfo != nil
  1068. authority << "#{self.normalized_userinfo}@"
  1069. end
  1070. authority << self.normalized_host
  1071. if self.normalized_port != nil
  1072. authority << ":#{self.normalized_port}"
  1073. end
  1074. authority
  1075. end)
  1076. end
  1077. ##
  1078. # Sets the authority component for this URI.
  1079. #
  1080. # @param [String, #to_str] new_authority The new authority component.
  1081. def authority=(new_authority)
  1082. if new_authority
  1083. if !new_authority.respond_to?(:to_str)
  1084. raise TypeError, "Can't convert #{new_authority.class} into String."
  1085. end
  1086. new_authority = new_authority.to_str
  1087. new_userinfo = new_authority[/^([^\[\]]*)@/, 1]
  1088. if new_userinfo
  1089. new_user = new_userinfo.strip[/^([^:]*):?/, 1]
  1090. new_password = new_userinfo.strip[/:(.*)$/, 1]
  1091. end
  1092. new_host = new_authority.gsub(
  1093. /^([^\[\]]*)@/, EMPTY_STR
  1094. ).gsub(
  1095. /:([^:@\[\]]*?)$/, EMPTY_STR
  1096. )
  1097. new_port =
  1098. new_authority[/:([^:@\[\]]*?)$/, 1]
  1099. end
  1100. # Password assigned first to ensure validity in case of nil
  1101. self.password = defined?(new_password) ? new_password : nil
  1102. self.user = defined?(new_user) ? new_user : nil
  1103. self.host = defined?(new_host) ? new_host : nil
  1104. self.port = defined?(new_port) ? new_port : nil
  1105. # Reset dependant values
  1106. @userinfo = nil
  1107. @normalized_userinfo = nil
  1108. @uri_string = nil
  1109. @hash = nil
  1110. # Ensure we haven't created an invalid URI
  1111. validate()
  1112. end
  1113. ##
  1114. # The origin for this URI, serialized to ASCII, as per
  1115. # RFC 6454, section 6.2.
  1116. #
  1117. # @return [String] The serialized origin.
  1118. def origin
  1119. return (if self.scheme && self.authority
  1120. if self.normalized_port
  1121. (
  1122. "#{self.normalized_scheme}://#{self.normalized_host}" +
  1123. ":#{self.normalized_port}"
  1124. )
  1125. else
  1126. "#{self.normalized_scheme}://#{self.normalized_host}"
  1127. end
  1128. else
  1129. "null"
  1130. end)
  1131. end
  1132. # Returns an array of known ip-based schemes. These schemes typically
  1133. # use a similar URI form:
  1134. # <code>//<user>:<password>@<host>:<port>/<url-path></code>
  1135. def self.ip_based_schemes
  1136. return self.port_mapping.keys
  1137. end
  1138. # Returns a hash of common IP-based schemes and their default port
  1139. # numbers. Adding new schemes to this hash, as necessary, will allow
  1140. # for better URI normalization.
  1141. def self.port_mapping
  1142. PORT_MAPPING
  1143. end
  1144. ##
  1145. # The port component for this URI.
  1146. # This is the port number actually given in the URI. This does not
  1147. # infer port numbers from default values.
  1148. #
  1149. # @return [Integer] The port component.
  1150. def port
  1151. return instance_variable_defined?(:@port) ? @port : nil
  1152. end
  1153. ##
  1154. # The port component for this URI, normalized.
  1155. #
  1156. # @return [Integer] The port component, normalized.
  1157. def normalized_port
  1158. if URI.port_mapping[self.normalized_scheme] == self.port
  1159. nil
  1160. else
  1161. self.port
  1162. end
  1163. end
  1164. ##
  1165. # Sets the port component for this URI.
  1166. #
  1167. # @param [String, Integer, #to_s] new_port The new port component.
  1168. def port=(new_port)
  1169. if new_port != nil && new_port.respond_to?(:to_str)
  1170. new_port = Addressable::URI.unencode_component(new_port.to_str)
  1171. end
  1172. if new_port != nil && !(new_port.to_s =~ /^\d+$/)
  1173. raise InvalidURIError,
  1174. "Invalid port number: #{new_port.inspect}"
  1175. end
  1176. @port = new_port.to_s.to_i
  1177. @port = nil if @port == 0
  1178. # Reset dependant values
  1179. @authority = nil
  1180. @normalized_port = nil
  1181. @uri_string = nil
  1182. @hash = nil
  1183. # Ensure we haven't created an invalid URI
  1184. validate()
  1185. end
  1186. ##
  1187. # The inferred port component for this URI.
  1188. # This method will normalize to the default port for the URI's scheme if
  1189. # the port isn't explicitly specified in the URI.
  1190. #
  1191. # @return [Integer] The inferred port component.
  1192. def inferred_port
  1193. if self.port.to_i == 0
  1194. self.default_port
  1195. else
  1196. self.port.to_i
  1197. end
  1198. end
  1199. ##
  1200. # The default port for this URI's scheme.
  1201. # This method will always returns the default port for the URI's scheme
  1202. # regardless of the presence of an explicit port in the URI.
  1203. #
  1204. # @return [Integer] The default port.
  1205. def default_port
  1206. URI.port_mapping[self.scheme.strip.downcase] if self.scheme
  1207. end
  1208. ##
  1209. # The combination of components that represent a site.
  1210. # Combines the scheme, user, password, host, and port components.
  1211. # Primarily useful for HTTP and HTTPS.
  1212. #
  1213. # For example, <code>"http://example.com/path?query"</code> would have a
  1214. # <code>site</code> value of <code>"http://example.com"</code>.
  1215. #
  1216. # @return [String] The components that identify a site.
  1217. def site
  1218. (self.scheme || self.authority) && @site ||= (begin
  1219. site_string = ""
  1220. site_string << "#{self.scheme}:" if self.scheme != nil
  1221. site_string << "//#{self.authority}" if self.authority != nil
  1222. site_string
  1223. end)
  1224. end
  1225. ##
  1226. # The normalized combination of components that represent a site.
  1227. # Combines the scheme, user, password, host, and port components.
  1228. # Primarily useful for HTTP and HTTPS.
  1229. #
  1230. # For example, <code>"http://example.com/path?query"</code> would have a
  1231. # <code>site</code> value of <code>"http://example.com"</code>.
  1232. #
  1233. # @return [String] The normalized components that identify a site.
  1234. def normalized_site
  1235. self.site && @normalized_site ||= (begin
  1236. site_string = ""
  1237. if self.normalized_scheme != nil
  1238. site_string << "#{self.normalized_scheme}:"
  1239. end
  1240. if self.normalized_authority != nil
  1241. site_string << "//#{self.normalized_authority}"
  1242. end
  1243. site_string
  1244. end)
  1245. end
  1246. ##
  1247. # Sets the site value for this URI.
  1248. #
  1249. # @param [String, #to_str] new_site The new site value.
  1250. def site=(new_site)
  1251. if new_site
  1252. if !new_site.respond_to?(:to_str)
  1253. raise TypeError, "Can't convert #{new_site.class} into String."
  1254. end
  1255. new_site = new_site.to_str
  1256. # These two regular expressions derived from the primary parsing
  1257. # expression
  1258. self.scheme = new_site[/^(?:([^:\/?#]+):)?(?:\/\/(?:[^\/?#]*))?$/, 1]
  1259. self.authority = new_site[
  1260. /^(?:(?:[^:\/?#]+):)?(?:\/\/([^\/?#]*))?$/, 1
  1261. ]
  1262. else
  1263. self.scheme = nil
  1264. self.authority = nil
  1265. end
  1266. end
  1267. ##
  1268. # The path component for this URI.
  1269. #
  1270. # @return [String] The path component.
  1271. def path
  1272. return instance_variable_defined?(:@path) ? @path : EMPTY_STR
  1273. end
  1274. NORMPATH = /^(?!\/)[^\/:]*:.*$/
  1275. ##
  1276. # The path component for this URI, normalized.
  1277. #
  1278. # @return [String] The path component, normalized.
  1279. def normalized_path
  1280. @normalized_path ||= (begin
  1281. path = self.path.to_s
  1282. if self.scheme == nil && path =~ NORMPATH
  1283. # Relative paths with colons in the first segment are ambiguous.
  1284. path = path.sub(":", "%2F")
  1285. end
  1286. # String#split(delimeter, -1) uses the more strict splitting behavior
  1287. # found by default in Python.
  1288. result = (path.strip.split(SLASH, -1).map do |segment|
  1289. Addressable::URI.normalize_component(
  1290. segment,
  1291. Addressable::URI::CharacterClasses::PCHAR
  1292. )
  1293. end).join(SLASH)
  1294. result = URI.normalize_path(result)
  1295. if result.empty? &&
  1296. ["http", "https", "ftp", "tftp"].include?(self.normalized_scheme)
  1297. result = SLASH
  1298. end
  1299. result
  1300. end)
  1301. end
  1302. ##
  1303. # Sets the path component for this URI.
  1304. #
  1305. # @param [String, #to_str] new_path The new path component.
  1306. def path=(new_path)
  1307. if new_path && !new_path.respond_to?(:to_str)
  1308. raise TypeError, "Can't convert #{new_path.class} into String."
  1309. end
  1310. @path = (new_path || EMPTY_STR).to_str
  1311. if !@path.empty? && @path[0..0] != SLASH && host != nil
  1312. @path = "/#{@path}"
  1313. end
  1314. # Reset dependant values
  1315. @normalized_path = nil
  1316. @uri_string = nil
  1317. @hash = nil
  1318. end
  1319. ##
  1320. # The basename, if any, of the file in the path component.
  1321. #
  1322. # @return [String] The path's basename.
  1323. def basename
  1324. # Path cannot be nil
  1325. return File.basename(self.path).gsub(/;[^\/]*$/, EMPTY_STR)
  1326. end
  1327. ##
  1328. # The extname, if any, of the file in the path component.
  1329. # Empty string if there is no extension.
  1330. #
  1331. # @return [String] The path's extname.
  1332. def extname
  1333. return nil unless self.path
  1334. return File.extname(self.basename)
  1335. end
  1336. ##
  1337. # The query component for this URI.
  1338. #
  1339. # @return [String] The query component.
  1340. def query
  1341. return instance_variable_defined?(:@query) ? @query : nil
  1342. end
  1343. ##
  1344. # The query component for this URI, normalized.
  1345. #
  1346. # @return [String] The query component, normalized.
  1347. def normalized_query
  1348. self.query && @normalized_query ||= (begin
  1349. modified_query_class = Addressable::URI::CharacterClasses::QUERY
  1350. # Make sure possible key-value pair delimiters are escaped.
  1351. modified_query_class = modified_query_class.sub("\\&", "")
  1352. modified_query_class = modified_query_class.sub("\\;", "")
  1353. (self.query.split("&", -1).map do |pair|
  1354. Addressable::URI.normalize_component(
  1355. pair,
  1356. modified_query_class,
  1357. '+'
  1358. )
  1359. end).join("&")
  1360. end)
  1361. end
  1362. ##
  1363. # Sets the query component for this URI.
  1364. #
  1365. # @param [String, #to_str] new_query The new query component.
  1366. def query=(new_query)
  1367. if new_query && !new_query.respond_to?(:to_str)
  1368. raise TypeError, "Can't convert #{new_query.class} into String."
  1369. end
  1370. @query = new_query ? new_query.to_str : nil
  1371. # Reset dependant values
  1372. @normalized_query = nil
  1373. @uri_string = nil
  1374. @hash = nil
  1375. end
  1376. ##
  1377. # Converts the query component to a Hash value.
  1378. #
  1379. # @param [Class] return_type The return type desired. Value must be either
  1380. # `Hash` or `Array`.
  1381. #
  1382. # @return [Hash, Array] The query string parsed as a Hash or Array object.
  1383. #
  1384. # @example
  1385. # Addressable::URI.parse("?one=1&two=2&three=3").query_values
  1386. # #=> {"one" => "1", "two" => "2", "three" => "3"}
  1387. # Addressable::URI.parse("?one=two&one=three").query_values(Array)
  1388. # #=> [["one", "two"], ["one", "three"]]
  1389. # Addressable::URI.parse("?one=two&one=three").query_values(Hash)
  1390. # #=> {"one" => "three"}
  1391. def query_values(return_type=Hash)
  1392. empty_accumulator = Array == return_type ? [] : {}
  1393. if return_type != Hash && return_type != Array
  1394. raise ArgumentError, "Invalid return type. Must be Hash or Array."
  1395. end
  1396. return nil if self.query == nil
  1397. split_query = (self.query.split("&").map do |pair|
  1398. pair.split("=", 2) if pair && !pair.empty?
  1399. end).compact
  1400. return split_query.inject(empty_accumulator.dup) do |accu, pair|
  1401. # I'd rather use key/value identifiers instead of array lookups,
  1402. # but in this case I really want to maintain the exact pair structure,
  1403. # so it's best to make all changes in-place.
  1404. pair[0] = URI.unencode_component(pair[0])
  1405. if pair[1].respond_to?(:to_str)
  1406. # I loathe the fact that I have to do this. Stupid HTML 4.01.
  1407. # Treating '+' as a space was just an unbelievably bad idea.
  1408. # There was nothing wrong with '%20'!
  1409. # If it ain't broke, don't fix it!
  1410. pair[1] = URI.unencode_component(pair[1].to_str.gsub(/\+/, " "))
  1411. end
  1412. if return_type == Hash
  1413. accu[pair[0]] = pair[1]
  1414. else
  1415. accu << pair
  1416. end
  1417. accu
  1418. end
  1419. end
  1420. ##
  1421. # Sets the query component for this URI from a Hash object.
  1422. # An empty Hash or Array will result in an empty query string.
  1423. #
  1424. # @param [Hash, #to_hash, Array] new_query_values The new query values.
  1425. #
  1426. # @example
  1427. # uri.query_values = {:a => "a", :b => ["c", "d", "e"]}
  1428. # uri.query
  1429. # # => "a=a&b=c&b=d&b=e"
  1430. # uri.query_values = [['a', 'a'], ['b', 'c'], ['b', 'd'], ['b', 'e']]
  1431. # uri.query
  1432. # # => "a=a&b=c&b=d&b=e"
  1433. # uri.query_values = [['a', 'a'], ['b', ['c', 'd', 'e']]]
  1434. # uri.query
  1435. # # => "a=a&b=c&b=d&b=e"
  1436. # uri.query_values = [['flag'], ['key', 'value']]
  1437. # uri.query
  1438. # # => "flag&key=value"
  1439. def query_values=(new_query_values)
  1440. if new_query_values == nil
  1441. self.query = nil
  1442. return nil
  1443. end
  1444. if !new_query_values.is_a?(Array)
  1445. if !new_query_values.respond_to?(:to_hash)
  1446. raise TypeError,
  1447. "Can't convert #{new_query_values.class} into Hash."
  1448. end
  1449. new_query_values = new_query_values.to_hash
  1450. new_query_values = new_query_values.map do |key, value|
  1451. key = key.to_s if key.kind_of?(Symbol)
  1452. [key, value]
  1453. end
  1454. # Useful default for OAuth and caching.
  1455. # Only to be used for non-Array inputs. Arrays should preserve order.
  1456. new_query_values.sort!
  1457. end
  1458. # new_query_values have form [['key1', 'value1'], ['key2', 'value2']]
  1459. buffer = ""
  1460. new_query_values.each do |key, value|
  1461. encoded_key = URI.encode_component(
  1462. key, CharacterClasses::UNRESERVED
  1463. )
  1464. if value == nil
  1465. buffer << "#{encoded_key}&"
  1466. elsif value.kind_of?(Array)
  1467. value.each do |sub_value|
  1468. encoded_value = URI.encode_component(
  1469. sub_value, CharacterClasses::UNRESERVED
  1470. )
  1471. buffer << "#{encoded_key}=#{encoded_value}&"
  1472. end
  1473. else
  1474. encoded_value = URI.encode_component(
  1475. value, CharacterClasses::UNRESERVED
  1476. )
  1477. buffer << "#{encoded_key}=#{encoded_value}&"
  1478. end
  1479. end
  1480. self.query = buffer.chop
  1481. end
  1482. ##
  1483. # The HTTP request URI for this URI. This is the path and the
  1484. # query string.
  1485. #
  1486. # @return [String] The request URI required for an HTTP request.
  1487. def request_uri
  1488. return nil if self.absolute? && self.scheme !~ /^https?$/
  1489. return (
  1490. (!self.path.empty? ? self.path : SLASH) +
  1491. (self.query ? "?#{self.query}" : EMPTY_STR)
  1492. )
  1493. end
  1494. ##
  1495. # Sets the HTTP request URI for this URI.
  1496. #
  1497. # @param [String, #to_str] new_request_uri The new HTTP request URI.
  1498. def request_uri=(new_request_uri)
  1499. if !new_request_uri.respond_to?(:to_str)
  1500. raise TypeError, "Can't convert #{new_request_uri.class} into String."
  1501. end
  1502. if self.absolute? && self.scheme !~ /^https?$/
  1503. raise InvalidURIError,
  1504. "Cannot set an HTTP request URI for a non-HTTP URI."
  1505. end
  1506. new_request_uri = new_request_uri.to_str
  1507. path_component = new_request_uri[/^([^\?]*)\?(?:.*)$/, 1]
  1508. query_component = new_request_uri[/^(?:[^\?]*)\?(.*)$/, 1]
  1509. path_component = path_component.to_s
  1510. path_component = (!path_component.empty? ? path_component : SLASH)
  1511. self.path = path_component
  1512. self.query = query_component
  1513. # Reset dependant values
  1514. @uri_string = nil
  1515. @hash = nil
  1516. end
  1517. ##
  1518. # The fragment component for this URI.
  1519. #
  1520. # @return [String] The fragment component.
  1521. def fragment
  1522. return instance_variable_defined?(:@fragment) ? @fragment : nil
  1523. end
  1524. ##
  1525. # The fragment component for this URI, normalized.
  1526. #
  1527. # @return [String] The fragment component, normalized.
  1528. def normalized_fragment
  1529. self.fragment && @normalized_fragment ||= (begin
  1530. Addressable::URI.normalize_component(
  1531. self.fragment.strip,
  1532. Addressable::URI::CharacterClasses::FRAGMENT
  1533. )
  1534. end)
  1535. end
  1536. ##
  1537. # Sets the fragment component for this URI.
  1538. #
  1539. # @param [String, #to_str] new_fragment The new fragment component.
  1540. def fragment=(new_fragment)
  1541. if new_fragment && !new_fragment.respond_to?(:to_str)
  1542. raise TypeError, "Can't convert #{new_fragment.class} into String."
  1543. end
  1544. @fragment = new_fragment ? new_fragment.to_str : nil
  1545. # Reset dependant values
  1546. @normalized_fragment = nil
  1547. @uri_string = nil
  1548. @hash = nil
  1549. # Ensure we haven't created an invalid URI
  1550. validate()
  1551. end
  1552. ##
  1553. # Determines if the scheme indicates an IP-based protocol.
  1554. #
  1555. # @return [TrueClass, FalseClass]
  1556. # <code>true</code> if the scheme indicates an IP-based protocol.
  1557. # <code>false</code> otherwise.
  1558. def ip_based?
  1559. if self.scheme
  1560. return URI.ip_based_schemes.include?(
  1561. self.scheme.strip.downcase)
  1562. end
  1563. return false
  1564. end
  1565. ##
  1566. # Determines if the URI is relative.
  1567. #
  1568. # @return [TrueClass, FalseClass]
  1569. # <code>true</code> if the URI is relative. <code>false</code>
  1570. # otherwise.
  1571. def relative?
  1572. return self.scheme.nil?
  1573. end
  1574. ##
  1575. # Determines if the URI is absolute.
  1576. #
  1577. # @return [TrueClass, FalseClass]
  1578. # <code>true</code> if the URI is absolute. <code>false</code>
  1579. # otherwise.
  1580. def absolute?
  1581. return !relative?
  1582. end
  1583. ##
  1584. # Joins two URIs together.
  1585. #
  1586. # @param [String, Addressable::URI, #to_str] The URI to join with.
  1587. #
  1588. # @return [Addressable::URI] The joined URI.
  1589. def join(uri)
  1590. if !uri.respond_to?(:to_str)
  1591. raise TypeError, "Can't convert #{uri.class} into String."
  1592. end
  1593. if !uri.kind_of?(URI)
  1594. # Otherwise, convert to a String, then parse.
  1595. uri = URI.parse(uri.to_str)
  1596. end
  1597. if uri.to_s.empty?
  1598. return self.dup
  1599. end
  1600. joined_scheme = nil
  1601. joined_user = nil
  1602. joined_password = nil
  1603. joined_host = nil
  1604. joined_port = nil
  1605. joined_path = nil
  1606. joined_query = nil
  1607. joined_fragment = nil
  1608. # Section 5.2.2 of RFC 3986
  1609. if uri.scheme != nil
  1610. joined_scheme = uri.scheme
  1611. joined_user = uri.user
  1612. joined_password = uri.password
  1613. joined_host = uri.host
  1614. joined_port = uri.port
  1615. joined_path = URI.normalize_path(uri.path)
  1616. joined_query = uri.query
  1617. else
  1618. if uri.authority != nil
  1619. joined_user = uri.user
  1620. joined_password = uri.password
  1621. joined_host = uri.host
  1622. joined_port = uri.port
  1623. joined_path = URI.normalize_path(uri.path)
  1624. joined_query = uri.query
  1625. else
  1626. if uri.path == nil || uri.path.empty?
  1627. joined_path = self.path
  1628. if uri.query != nil
  1629. joined_query = uri.query
  1630. else
  1631. joined_query = self.query
  1632. end
  1633. else
  1634. if uri.path[0..0] == SLASH
  1635. joined_path = URI.normalize_path(uri.path)
  1636. else
  1637. base_path = self.path.dup
  1638. base_path = EMPTY_STR if base_path == nil
  1639. base_path = URI.normalize_path(base_path)
  1640. # Section 5.2.3 of RFC 3986
  1641. #
  1642. # Removes the right-most path segment from the base path.
  1643. if base_path =~ /\//
  1644. base_path.gsub!(/\/[^\/]+$/, SLASH)
  1645. else
  1646. base_path = EMPTY_STR
  1647. end
  1648. # If the base path is empty and an authority segment has been
  1649. # defined, use a base path of SLASH
  1650. if base_path.empty? && self.authority != nil
  1651. base_path = SLASH
  1652. end
  1653. joined_path = URI.normalize_path(base_path + uri.path)
  1654. end
  1655. joined_query = uri.query
  1656. end
  1657. joined_user = self.user
  1658. joined_password = self.password
  1659. joined_host = self.host
  1660. joined_port = self.port
  1661. end
  1662. joined_scheme = self.scheme
  1663. end
  1664. joined_fragment = uri.fragment
  1665. return self.class.new(
  1666. :scheme => joined_scheme,
  1667. :user => joined_user,
  1668. :password => joined_password,
  1669. :host => joined_host,
  1670. :port => joined_port,
  1671. :path => joined_path,
  1672. :query => joined_query,
  1673. :fragment => joined_fragment
  1674. )
  1675. end
  1676. alias_method :+, :join
  1677. ##
  1678. # Destructive form of <code>join</code>.
  1679. #
  1680. # @param [String, Addressable::URI, #to_str] The URI to join with.
  1681. #
  1682. # @return [Addressable::URI] The joined URI.
  1683. #
  1684. # @see Addressable::URI#join
  1685. def join!(uri)
  1686. replace_self(self.join(uri))
  1687. end
  1688. ##
  1689. # Merges a URI with a <code>Hash</code> of components.
  1690. # This method has different behavior from <code>join</code>. Any
  1691. # components present in the <code>hash</code> parameter will override the
  1692. # original components. The path component is not treated specially.
  1693. #
  1694. # @param [Hash, Addressable::URI, #to_hash] The components to merge with.
  1695. #
  1696. # @return [Addressable::URI] The merged URI.
  1697. #
  1698. # @see Hash#merge
  1699. def merge(hash)
  1700. if !hash.respond_to?(:to_hash)
  1701. raise TypeError, "Can't convert #{hash.class} into Hash."
  1702. end
  1703. hash = hash.to_hash
  1704. if hash.has_key?(:authority)
  1705. if (hash.keys & [:userinfo, :user, :password, :host, :port]).any?
  1706. raise ArgumentError,
  1707. "Cannot specify both an authority and any of the components " +
  1708. "within the authority."
  1709. end
  1710. end
  1711. if hash.has_key?(:userinfo)
  1712. if (hash.keys & [:user, :password]).any?
  1713. raise ArgumentError,
  1714. "Cannot specify both a userinfo and either the user or password."
  1715. end
  1716. end
  1717. uri = self.class.new
  1718. uri.defer_validation do
  1719. # Bunch of crazy logic required because of the composite components
  1720. # like userinfo and authority.
  1721. uri.scheme =
  1722. hash.has_key?(:scheme) ? hash[:scheme] : self.scheme
  1723. if hash.has_key?(:authority)
  1724. uri.authority =
  1725. hash.has_key?(:authority) ? hash[:authority] : self.authority
  1726. end
  1727. if hash.has_key?(:userinfo)
  1728. uri.userinfo =
  1729. hash.has_key?(:userinfo) ? hash[:userinfo] : self.userinfo
  1730. end
  1731. if !hash.has_key?(:userinfo) && !hash.has_key?(:authority)
  1732. uri.user =
  1733. hash.has_key?(:user) ? hash[:user] : self.user
  1734. uri.password =
  1735. hash.has_key?(:password) ? hash[:password] : self.password
  1736. end
  1737. if !hash.has_key?(:authority)
  1738. uri.host =
  1739. hash.has_key?(:host) ? hash[:host] : self.host
  1740. uri.port =
  1741. hash.has_key?(:port) ? hash[:port] : self.port
  1742. end
  1743. uri.path =
  1744. hash.has_key?(:path) ? hash[:path] : self.path
  1745. uri.query =
  1746. hash.has_key?(:query) ? hash[:query] : self.query
  1747. uri.fragment =
  1748. hash.has_key?(:fragment) ? hash[:fragment] : self.fragment
  1749. end
  1750. return uri
  1751. end
  1752. ##
  1753. # Destructive form of <code>merge</code>.
  1754. #
  1755. # @param [Hash, Addressable::URI, #to_hash] The components to merge with.
  1756. #
  1757. # @return [Addressable::URI] The merged URI.
  1758. #
  1759. # @see Addressable::URI#merge
  1760. def merge!(uri)
  1761. replace_self(self.merge(uri))
  1762. end
  1763. ##
  1764. # Returns the shortest normalized relative form of this URI that uses the
  1765. # supplied URI as a base for resolution. Returns an absolute URI if
  1766. # necessary. This is effectively the opposite of <code>route_to</code>.
  1767. #
  1768. # @param [String, Addressable::URI, #to_str] uri The URI to route from.
  1769. #
  1770. # @return [Addressable::URI]
  1771. # The normalized relative URI that is equivalent to the original URI.
  1772. def route_from(uri)
  1773. uri = URI.parse(uri).normalize
  1774. normalized_self = self.normalize
  1775. if normalized_self.relative?
  1776. raise ArgumentError, "Expected absolute URI, got: #{self.to_s}"
  1777. end
  1778. if uri.relative?
  1779. raise ArgumentError, "Expected absolute URI, got: #{uri.to_s}"
  1780. end
  1781. if normalized_self == uri
  1782. return Addressable::URI.parse("##{normalized_self.fragment}")
  1783. end
  1784. components = normalized_self.to_hash
  1785. if normalized_self.scheme == uri.scheme
  1786. components[:scheme] = nil
  1787. if normalized_self.authority == uri.authority
  1788. components[:user] = nil
  1789. components[:password] = nil
  1790. components[:host] = nil
  1791. components[:port] = nil
  1792. if normalized_self.path == uri.path
  1793. components[:path] = nil
  1794. if normalized_self.query == uri.query
  1795. components[:query] = nil
  1796. end
  1797. else
  1798. if uri.path != SLASH and components[:path]
  1799. self_splitted_path = split_path(components[:path])
  1800. uri_splitted_path = split_path(uri.path)
  1801. self_dir = self_splitted_path.shift
  1802. uri_dir = uri_splitted_path.shift
  1803. while !self_splitted_path.empty? && !uri_splitted_path.empty? and self_dir == uri_dir
  1804. self_dir = self_splitted_path.shift
  1805. uri_dir = uri_splitted_path.shift
  1806. end
  1807. components[:path] = (uri_splitted_path.fill('..') + [self_dir] + self_splitted_path).join(SLASH)
  1808. end
  1809. end
  1810. end
  1811. end
  1812. # Avoid network-path references.
  1813. if components[:host] != nil
  1814. components[:scheme] = normalized_self.scheme
  1815. end
  1816. return Addressable::URI.new(
  1817. :scheme => components[:scheme],
  1818. :user => components[:user],
  1819. :password => components[:password],
  1820. :host => components[:host],
  1821. :port => components[:port],
  1822. :path => components[:path],
  1823. :query => components[:query],
  1824. :fragment => components[:fragment]
  1825. )
  1826. end
  1827. ##
  1828. # Returns the shortest normalized relative form of the supplied URI that
  1829. # uses this URI as a base for resolution. Returns an absolute URI if
  1830. # necessary. This is effectively the opposite of <code>route_from</code>.
  1831. #
  1832. # @param [String, Addressable::URI, #to_str] uri The URI to route to.
  1833. #
  1834. # @return [Addressable::URI]
  1835. # The normalized relative URI that is equivalent to the supplied URI.
  1836. def route_to(uri)
  1837. return URI.parse(uri).route_from(self)
  1838. end
  1839. ##
  1840. # Returns a normalized URI object.
  1841. #
  1842. # NOTE: This method does not attempt to fully conform to specifications.
  1843. # It exists largely to correct other people's failures to read the
  1844. # specifications, and also to deal with caching issues since several
  1845. # different URIs may represent the same resource and should not be
  1846. # cached multiple times.
  1847. #
  1848. # @return [Addressable::URI] The normalized URI.
  1849. def normalize
  1850. # This is a special exception for the frequently misused feed
  1851. # URI scheme.
  1852. if normalized_scheme == "feed"
  1853. if self.to_s =~ /^feed:\/*http:\/*/
  1854. return URI.parse(
  1855. self.to_s[/^feed:\/*(http:\/*.*)/, 1]
  1856. ).normalize
  1857. end
  1858. end
  1859. return self.class.new(
  1860. :scheme => normalized_scheme,
  1861. :authority => normalized_authority,
  1862. :path => normalized_path,
  1863. :query => normalized_query,
  1864. :fragment => normalized_fragment
  1865. )
  1866. end
  1867. ##
  1868. # Destructively normalizes this URI object.
  1869. #
  1870. # @return [Addressable::URI] The normalized URI.
  1871. #
  1872. # @see Addressable::URI#normalize
  1873. def normalize!
  1874. replace_self(self.normalize)
  1875. end
  1876. ##
  1877. # Creates a URI suitable for display to users. If semantic attacks are
  1878. # likely, the application should try to detect these and warn the user.
  1879. # See <a href="http://www.ietf.org/rfc/rfc3986.txt">RFC 3986</a>,
  1880. # section 7.6 for more information.
  1881. #
  1882. # @return [Addressable::URI] A URI suitable for display purposes.
  1883. def display_uri
  1884. display_uri = self.normalize
  1885. display_uri.host = ::Addressable::IDNA.to_unicode(display_uri.host)
  1886. return display_uri
  1887. end
  1888. ##
  1889. # Returns <code>true</code> if the URI objects are equal. This method
  1890. # normalizes both URIs before doing the comparison, and allows comparison
  1891. # against <code>Strings</code>.
  1892. #
  1893. # @param [Object] uri The URI to compare.
  1894. #
  1895. # @return [TrueClass, FalseClass]
  1896. # <code>true</code> if the URIs are equivalent, <code>false</code>
  1897. # otherwise.
  1898. def ===(uri)
  1899. if uri.respond_to?(:normalize)
  1900. uri_string = uri.normalize.to_s
  1901. else
  1902. begin
  1903. uri_string = ::Addressable::URI.parse(uri).normalize.to_s
  1904. rescue InvalidURIError, TypeError
  1905. return false
  1906. end
  1907. end
  1908. return self.normalize.to_s == uri_string
  1909. end
  1910. ##
  1911. # Returns <code>true</code> if the URI objects are equal. This method
  1912. # normalizes both URIs before doing the comparison.
  1913. #
  1914. # @param [Object] uri The URI to compare.
  1915. #
  1916. # @return [TrueClass, FalseClass]
  1917. # <code>true</code> if the URIs are equivalent, <code>false</code>
  1918. # otherwise.
  1919. def ==(uri)
  1920. return false unless uri.kind_of?(URI)
  1921. return self.normalize.to_s == uri.normalize.to_s
  1922. end
  1923. ##
  1924. # Returns <code>true</code> if the URI objects are equal. This method
  1925. # does NOT normalize either URI before doing the comparison.
  1926. #
  1927. # @param [Object] uri The URI to compare.
  1928. #
  1929. # @return [TrueClass, FalseClass]
  1930. # <code>true</code> if the URIs are equivalent, <code>false</code>
  1931. # otherwise.
  1932. def eql?(uri)
  1933. return false unless uri.kind_of?(URI)
  1934. return self.to_s == uri.to_s
  1935. end
  1936. ##
  1937. # A hash value that will make a URI equivalent to its normalized
  1938. # form.
  1939. #
  1940. # @return [Integer] A hash of the URI.
  1941. def hash
  1942. return @hash ||= (self.to_s.hash * -1)
  1943. end
  1944. ##
  1945. # Clones the URI object.
  1946. #
  1947. # @return [Addressable::URI] The cloned URI.
  1948. def dup
  1949. duplicated_uri = self.class.new(
  1950. :scheme => self.scheme ? self.scheme.dup : nil,
  1951. :user => self.user ? self.user.dup : nil,
  1952. :password => self.password ? self.password.dup : nil,
  1953. :host => self.host ? self.host.dup : nil,
  1954. :port => self.port,
  1955. :path => self.path ? self.path.dup : nil,
  1956. :query => self.query ? self.query.dup : nil,
  1957. :fragment => self.fragment ? self.fragment.dup : nil
  1958. )
  1959. return duplicated_uri
  1960. end
  1961. ##
  1962. # Omits components from a URI.
  1963. #
  1964. # @param [Symbol] *components The components to be omitted.
  1965. #
  1966. # @return [Addressable::URI] The URI with components omitted.
  1967. #
  1968. # @example
  1969. # uri = Addressable::URI.parse("http://example.com/path?query")
  1970. # #=> #<Addressable::URI:0xcc5e7a URI:http://example.com/path?query>
  1971. # uri.omit(:scheme, :authority)
  1972. # #=> #<Addressable::URI:0xcc4d86 URI:/path?query>
  1973. def omit(*components)
  1974. invalid_components = components - [
  1975. :scheme, :user, :password, :userinfo, :host, :port, :authority,
  1976. :path, :query, :fragment
  1977. ]
  1978. unless invalid_components.empty?
  1979. raise ArgumentError,
  1980. "Invalid component names: #{invalid_components.inspect}."
  1981. end
  1982. duplicated_uri = self.dup
  1983. duplicated_uri.defer_validation do
  1984. components.each do |component|
  1985. duplicated_uri.send((component.to_s + "=").to_sym, nil)
  1986. end
  1987. duplicated_uri.user = duplicated_uri.normalized_user
  1988. end
  1989. duplicated_uri
  1990. end
  1991. ##
  1992. # Destructive form of omit.
  1993. #
  1994. # @param [Symbol] *components The components to be omitted.
  1995. #
  1996. # @return [Addressable::URI] The URI with components omitted.
  1997. #
  1998. # @see Addressable::URI#omit
  1999. def omit!(*components)
  2000. replace_self(self.omit(*components))
  2001. end
  2002. ##
  2003. # Determines if the URI is an empty string.
  2004. #
  2005. # @return [TrueClass, FalseClass]
  2006. # Returns <code>true</code> if empty, <code>false</code> otherwise.
  2007. def empty?
  2008. return self.to_s.empty?
  2009. end
  2010. ##
  2011. # Converts the URI to a <code>String</code>.
  2012. #
  2013. # @return [String] The URI's <code>String</code> representation.
  2014. def to_s
  2015. if self.scheme == nil && self.path != nil && !self.path.empty? &&
  2016. self.path =~ NORMPATH
  2017. raise InvalidURIError,
  2018. "Cannot assemble URI string with ambiguous path: '#{self.path}'"
  2019. end
  2020. @uri_string ||= (begin
  2021. uri_string = ""
  2022. uri_string << "#{self.scheme}:" if self.scheme != nil
  2023. uri_string << "//#{self.authority}" if self.authority != nil
  2024. uri_string << self.path.to_s
  2025. uri_string << "?#{self.query}" if self.query != nil
  2026. uri_string << "##{self.fragment}" if self.fragment != nil
  2027. if uri_string.respond_to?(:force_encoding)
  2028. uri_string.force_encoding(Encoding::UTF_8)
  2029. end
  2030. uri_string
  2031. end)
  2032. end
  2033. ##
  2034. # URI's are glorified <code>Strings</code>. Allow implicit conversion.
  2035. alias_method :to_str, :to_s
  2036. ##
  2037. # Returns a Hash of the URI components.
  2038. #
  2039. # @return [Hash] The URI as a <code>Hash</code> of components.
  2040. def to_hash
  2041. return {
  2042. :scheme => self.scheme,
  2043. :user => self.user,
  2044. :password => self.password,
  2045. :host => self.host,
  2046. :port => self.port,
  2047. :path => self.path,
  2048. :query => self.query,
  2049. :fragment => self.fragment
  2050. }
  2051. end
  2052. ##
  2053. # Returns a <code>String</code> representation of the URI object's state.
  2054. #
  2055. # @return [String] The URI object's state, as a <code>String</code>.
  2056. def inspect
  2057. sprintf("#<%s:%#0x URI:%s>", URI.to_s, self.object_id, self.to_s)
  2058. end
  2059. ##
  2060. # This method allows you to make several changes to a URI simultaneously,
  2061. # which separately would cause validation errors, but in conjunction,
  2062. # are valid. The URI will be revalidated as soon as the entire block has
  2063. # been executed.
  2064. #
  2065. # @param [Proc] block
  2066. # A set of operations to perform on a given URI.
  2067. def defer_validation(&block)
  2068. raise LocalJumpError, "No block given." unless block
  2069. @validation_deferred = true
  2070. block.call()
  2071. @validation_deferred = false
  2072. validate
  2073. return nil
  2074. end
  2075. private
  2076. SELF_REF = '.'
  2077. PARENT = '..'
  2078. RULE_2A = /\/\.\/|\/\.$/
  2079. RULE_2B_2C = /\/([^\/]*)\/\.\.\/|\/([^\/]*)\/\.\.$/
  2080. RULE_2D = /^\.\.?\/?/
  2081. RULE_PREFIXED_PARENT = /^\/\.\.?\/|^(\/\.\.?)+\/?$/
  2082. ##
  2083. # Resolves paths to their simplest form.
  2084. #
  2085. # @param [String] path The path to normalize.
  2086. #
  2087. # @return [String] The normalized path.
  2088. def self.normalize_path(path)
  2089. # Section 5.2.4 of RFC 3986
  2090. return nil if path.nil?
  2091. normalized_path = path.dup
  2092. begin
  2093. mod = nil
  2094. mod ||= normalized_path.gsub!(RULE_2A, SLASH)
  2095. pair = normalized_path.match(RULE_2B_2C)
  2096. parent, current = pair[1], pair[2] if pair
  2097. if pair && ((parent != SELF_REF && parent != PARENT) ||
  2098. (current != SELF_REF && current != PARENT))
  2099. mod ||= normalized_path.gsub!(
  2100. Regexp.new(
  2101. "/#{Regexp.escape(parent.to_s)}/\\.\\./|" +
  2102. "(/#{Regexp.escape(current.to_s)}/\\.\\.$)"
  2103. ), SLASH
  2104. )
  2105. end
  2106. mod ||= normalized_path.gsub!(RULE_2D, EMPTY_STR)
  2107. # Non-standard, removes prefixed dotted segments from path.
  2108. mod ||= normalized_path.gsub!(RULE_PREFIXED_PARENT, SLASH)
  2109. end until mod.nil?
  2110. return normalized_path
  2111. end
  2112. ##
  2113. # Ensures that the URI is valid.
  2114. def validate
  2115. return if !!@validation_deferred
  2116. if self.scheme != nil && self.ip_based? &&
  2117. (self.host == nil || self.host.empty?) &&
  2118. (self.path == nil || self.path.empty?)
  2119. raise InvalidURIError,
  2120. "Absolute URI missing hierarchical segment: '#{self.to_s}'"
  2121. end
  2122. if self.host == nil
  2123. if self.port != nil ||
  2124. self.user != nil ||
  2125. self.password != nil
  2126. raise InvalidURIError, "Hostname not supplied: '#{self.to_s}'"
  2127. end
  2128. end
  2129. if self.path != nil && !self.path.empty? && self.path[0..0] != SLASH &&
  2130. self.authority != nil
  2131. raise InvalidURIError,
  2132. "Cannot have a relative path with an authority set: '#{self.to_s}'"
  2133. end
  2134. return nil
  2135. end
  2136. ##
  2137. # Replaces the internal state of self with the specified URI's state.
  2138. # Used in destructive operations to avoid massive code repetition.
  2139. #
  2140. # @param [Addressable::URI] uri The URI to replace <code>self</code> with.
  2141. #
  2142. # @return [Addressable::URI] <code>self</code>.
  2143. def replace_self(uri)
  2144. # Reset dependant values
  2145. instance_variables.each do |var|
  2146. instance_variable_set(var, nil)
  2147. end
  2148. @scheme = uri.scheme
  2149. @user = uri.user
  2150. @password = uri.password
  2151. @host = uri.host
  2152. @port = uri.port
  2153. @path = uri.path
  2154. @query = uri.query
  2155. @fragment = uri.fragment
  2156. return self
  2157. end
  2158. ##
  2159. # Splits path string with "/"(slash).
  2160. # It is considered that there is empty string after last slash when
  2161. # path ends with slash.
  2162. #
  2163. # @param [String] path The path to split.
  2164. #
  2165. # @return [Array<String>] An array of parts of path.
  2166. def split_path(path)
  2167. splitted = path.split(SLASH)
  2168. splitted << EMPTY_STR if path.end_with? SLASH
  2169. splitted
  2170. end
  2171. end
  2172. end