PageRenderTime 52ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/addressable/uri.rb

https://github.com/bolshakov/addressable
Ruby | 2337 lines | 1662 code | 121 blank | 554 comment | 213 complexity | dd74655e5bda72abe8dc95a7d76a10b4 MD5 | raw file
Possible License(s): Apache-2.0

Large files files are truncated, but you can click here to view the full file

  1. # encoding:utf-8
  2. #--
  3. # Copyright (C) 2006-2013 Bob Aman
  4. #
  5. # Licensed under the Apache License, Version 2.0 (the "License");
  6. # you may not use this file except in compliance with the License.
  7. # You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. #++
  17. require "addressable/version"
  18. require "addressable/idna"
  19. ##
  20. # Addressable is a library for processing links and URIs.
  21. module Addressable
  22. ##
  23. # This is an implementation of a URI parser based on
  24. # <a href="http://www.ietf.org/rfc/rfc3986.txt">RFC 3986</a>,
  25. # <a href="http://www.ietf.org/rfc/rfc3987.txt">RFC 3987</a>.
  26. class URI
  27. ##
  28. # Raised if something other than a uri is supplied.
  29. class InvalidURIError < StandardError
  30. end
  31. ##
  32. # Container for the character classes specified in
  33. # <a href="http://www.ietf.org/rfc/rfc3986.txt">RFC 3986</a>.
  34. module CharacterClasses
  35. ALPHA = "a-zA-Z"
  36. DIGIT = "0-9"
  37. GEN_DELIMS = "\\:\\/\\?\\#\\[\\]\\@"
  38. SUB_DELIMS = "\\!\\$\\&\\'\\(\\)\\*\\+\\,\\;\\="
  39. RESERVED = GEN_DELIMS + SUB_DELIMS
  40. UNRESERVED = ALPHA + DIGIT + "\\-\\.\\_\\~"
  41. PCHAR = UNRESERVED + SUB_DELIMS + "\\:\\@"
  42. SCHEME = ALPHA + DIGIT + "\\-\\+\\."
  43. AUTHORITY = PCHAR
  44. PATH = PCHAR + "\\/"
  45. QUERY = PCHAR + "\\/\\?"
  46. FRAGMENT = PCHAR + "\\/\\?"
  47. end
  48. SLASH = '/'
  49. EMPTY_STR = ''
  50. URIREGEX = /^(([^:\/?#]+):)?(\/\/([^\/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?$/
  51. PORT_MAPPING = {
  52. "http" => 80,
  53. "https" => 443,
  54. "ftp" => 21,
  55. "tftp" => 69,
  56. "sftp" => 22,
  57. "ssh" => 22,
  58. "svn+ssh" => 22,
  59. "telnet" => 23,
  60. "nntp" => 119,
  61. "gopher" => 70,
  62. "wais" => 210,
  63. "ldap" => 389,
  64. "prospero" => 1525
  65. }
  66. ##
  67. # Returns a URI object based on the parsed string.
  68. #
  69. # @param [String, Addressable::URI, #to_str] uri
  70. # The URI string to parse.
  71. # No parsing is performed if the object is already an
  72. # <code>Addressable::URI</code>.
  73. #
  74. # @return [Addressable::URI] The parsed URI.
  75. def self.parse(uri)
  76. # If we were given nil, return nil.
  77. return nil unless uri
  78. # If a URI object is passed, just return itself.
  79. return uri.dup if uri.kind_of?(self)
  80. # If a URI object of the Ruby standard library variety is passed,
  81. # convert it to a string, then parse the string.
  82. # We do the check this way because we don't want to accidentally
  83. # cause a missing constant exception to be thrown.
  84. if uri.class.name =~ /^URI\b/
  85. uri = uri.to_s
  86. end
  87. # Otherwise, convert to a String
  88. begin
  89. uri = uri.to_str
  90. rescue TypeError, NoMethodError
  91. raise TypeError, "Can't convert #{uri.class} into String."
  92. end if not uri.is_a? String
  93. # This Regexp supplied as an example in RFC 3986, and it works great.
  94. scan = uri.scan(URIREGEX)
  95. fragments = scan[0]
  96. scheme = fragments[1]
  97. authority = fragments[3]
  98. path = fragments[4]
  99. query = fragments[6]
  100. fragment = fragments[8]
  101. user = nil
  102. password = nil
  103. host = nil
  104. port = nil
  105. if authority != nil
  106. # The Regexp above doesn't split apart the authority.
  107. userinfo = authority[/^([^\[\]]*)@/, 1]
  108. if userinfo != nil
  109. user = userinfo.strip[/^([^:]*):?/, 1]
  110. password = userinfo.strip[/:(.*)$/, 1]
  111. end
  112. host = authority.gsub(
  113. /^([^\[\]]*)@/, EMPTY_STR
  114. ).gsub(
  115. /:([^:@\[\]]*?)$/, EMPTY_STR
  116. )
  117. port = authority[/:([^:@\[\]]*?)$/, 1]
  118. end
  119. if port == EMPTY_STR
  120. port = nil
  121. end
  122. return new(
  123. :scheme => scheme,
  124. :user => user,
  125. :password => password,
  126. :host => host,
  127. :port => port,
  128. :path => path,
  129. :query => query,
  130. :fragment => fragment
  131. )
  132. end
  133. ##
  134. # Converts an input to a URI. The input does not have to be a valid
  135. # URI — the method will use heuristics to guess what URI was intended.
  136. # This is not standards-compliant, merely user-friendly.
  137. #
  138. # @param [String, Addressable::URI, #to_str] uri
  139. # The URI string to parse.
  140. # No parsing is performed if the object is already an
  141. # <code>Addressable::URI</code>.
  142. # @param [Hash] hints
  143. # A <code>Hash</code> of hints to the heuristic parser.
  144. # Defaults to <code>{:scheme => "http"}</code>.
  145. #
  146. # @return [Addressable::URI] The parsed URI.
  147. def self.heuristic_parse(uri, hints={})
  148. # If we were given nil, return nil.
  149. return nil unless uri
  150. # If a URI object is passed, just return itself.
  151. return uri.dup if uri.kind_of?(self)
  152. if !uri.respond_to?(:to_str)
  153. raise TypeError, "Can't convert #{uri.class} into String."
  154. end
  155. # Otherwise, convert to a String
  156. uri = uri.to_str.dup
  157. hints = {
  158. :scheme => "http"
  159. }.merge(hints)
  160. case uri
  161. when /^http:\/+/
  162. uri.gsub!(/^http:\/+/, "http://")
  163. when /^https:\/+/
  164. uri.gsub!(/^https:\/+/, "https://")
  165. when /^feed:\/+http:\/+/
  166. uri.gsub!(/^feed:\/+http:\/+/, "feed:http://")
  167. when /^feed:\/+/
  168. uri.gsub!(/^feed:\/+/, "feed://")
  169. when /^file:\/+/
  170. uri.gsub!(/^file:\/+/, "file:///")
  171. when /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/
  172. uri.gsub!(/^/, hints[:scheme] + "://")
  173. end
  174. parsed = self.parse(uri)
  175. if parsed.scheme =~ /^[^\/?#\.]+\.[^\/?#]+$/
  176. parsed = self.parse(hints[:scheme] + "://" + uri)
  177. end
  178. if parsed.path.include?(".")
  179. new_host = parsed.path[/^([^\/]+\.[^\/]*)/, 1]
  180. if new_host
  181. parsed.defer_validation do
  182. new_path = parsed.path.gsub(
  183. Regexp.new("^" + Regexp.escape(new_host)), EMPTY_STR)
  184. parsed.host = new_host
  185. parsed.path = new_path
  186. parsed.scheme = hints[:scheme] unless parsed.scheme
  187. end
  188. end
  189. end
  190. return parsed
  191. end
  192. ##
  193. # Converts a path to a file scheme URI. If the path supplied is
  194. # relative, it will be returned as a relative URI. If the path supplied
  195. # is actually a non-file URI, it will parse the URI as if it had been
  196. # parsed with <code>Addressable::URI.parse</code>. Handles all of the
  197. # various Microsoft-specific formats for specifying paths.
  198. #
  199. # @param [String, Addressable::URI, #to_str] path
  200. # Typically a <code>String</code> path to a file or directory, but
  201. # will return a sensible return value if an absolute URI is supplied
  202. # instead.
  203. #
  204. # @return [Addressable::URI]
  205. # The parsed file scheme URI or the original URI if some other URI
  206. # scheme was provided.
  207. #
  208. # @example
  209. # base = Addressable::URI.convert_path("/absolute/path/")
  210. # uri = Addressable::URI.convert_path("relative/path")
  211. # (base + uri).to_s
  212. # #=> "file:///absolute/path/relative/path"
  213. #
  214. # Addressable::URI.convert_path(
  215. # "c:\\windows\\My Documents 100%20\\foo.txt"
  216. # ).to_s
  217. # #=> "file:///c:/windows/My%20Documents%20100%20/foo.txt"
  218. #
  219. # Addressable::URI.convert_path("http://example.com/").to_s
  220. # #=> "http://example.com/"
  221. def self.convert_path(path)
  222. # If we were given nil, return nil.
  223. return nil unless path
  224. # If a URI object is passed, just return itself.
  225. return path if path.kind_of?(self)
  226. if !path.respond_to?(:to_str)
  227. raise TypeError, "Can't convert #{path.class} into String."
  228. end
  229. # Otherwise, convert to a String
  230. path = path.to_str.strip
  231. path.gsub!(/^file:\/?\/?/, EMPTY_STR) if path =~ /^file:\/?\/?/
  232. path = SLASH + path if path =~ /^([a-zA-Z])[\|:]/
  233. uri = self.parse(path)
  234. if uri.scheme == nil
  235. # Adjust windows-style uris
  236. uri.path.gsub!(/^\/?([a-zA-Z])[\|:][\\\/]/) do
  237. "/#{$1.downcase}:/"
  238. end
  239. uri.path.gsub!(/\\/, SLASH)
  240. if File.exists?(uri.path) &&
  241. File.stat(uri.path).directory?
  242. uri.path.gsub!(/\/$/, EMPTY_STR)
  243. uri.path = uri.path + '/'
  244. end
  245. # If the path is absolute, set the scheme and host.
  246. if uri.path =~ /^\//
  247. uri.scheme = "file"
  248. uri.host = EMPTY_STR
  249. end
  250. uri.normalize!
  251. end
  252. return uri
  253. end
  254. ##
  255. # Joins several URIs together.
  256. #
  257. # @param [String, Addressable::URI, #to_str] *uris
  258. # The URIs to join.
  259. #
  260. # @return [Addressable::URI] The joined URI.
  261. #
  262. # @example
  263. # base = "http://example.com/"
  264. # uri = Addressable::URI.parse("relative/path")
  265. # Addressable::URI.join(base, uri)
  266. # #=> #<Addressable::URI:0xcab390 URI:http://example.com/relative/path>
  267. def self.join(*uris)
  268. uri_objects = uris.collect do |uri|
  269. if !uri.respond_to?(:to_str)
  270. raise TypeError, "Can't convert #{uri.class} into String."
  271. end
  272. uri.kind_of?(self) ? uri : self.parse(uri.to_str)
  273. end
  274. result = uri_objects.shift.dup
  275. for uri in uri_objects
  276. result.join!(uri)
  277. end
  278. return result
  279. end
  280. ##
  281. # Percent encodes a URI component.
  282. #
  283. # @param [String, #to_str] component The URI component to encode.
  284. #
  285. # @param [String, Regexp] character_class
  286. # The characters which are not percent encoded. If a <code>String</code>
  287. # is passed, the <code>String</code> must be formatted as a regular
  288. # expression character class. (Do not include the surrounding square
  289. # brackets.) For example, <code>"b-zB-Z0-9"</code> would cause
  290. # everything but the letters 'b' through 'z' and the numbers '0' through
  291. # '9' to be percent encoded. If a <code>Regexp</code> is passed, the
  292. # value <code>/[^b-zB-Z0-9]/</code> would have the same effect. A set of
  293. # useful <code>String</code> values may be found in the
  294. # <code>Addressable::URI::CharacterClasses</code> module. The default
  295. # value is the reserved plus unreserved character classes specified in
  296. # <a href="http://www.ietf.org/rfc/rfc3986.txt">RFC 3986</a>.
  297. #
  298. # @param [Regexp] upcase_encoded
  299. # A string of characters that may already be percent encoded, and whose
  300. # encodings should be upcased. This allows normalization of percent
  301. # encodings for characters not included in the
  302. # <code>character_class</code>.
  303. #
  304. # @return [String] The encoded component.
  305. #
  306. # @example
  307. # Addressable::URI.encode_component("simple/example", "b-zB-Z0-9")
  308. # => "simple%2Fex%61mple"
  309. # Addressable::URI.encode_component("simple/example", /[^b-zB-Z0-9]/)
  310. # => "simple%2Fex%61mple"
  311. # Addressable::URI.encode_component(
  312. # "simple/example", Addressable::URI::CharacterClasses::UNRESERVED
  313. # )
  314. # => "simple%2Fexample"
  315. def self.encode_component(component, character_class=
  316. CharacterClasses::RESERVED + CharacterClasses::UNRESERVED,
  317. upcase_encoded='')
  318. return nil if component.nil?
  319. begin
  320. if component.kind_of?(Symbol) ||
  321. component.kind_of?(Numeric) ||
  322. component.kind_of?(TrueClass) ||
  323. component.kind_of?(FalseClass)
  324. component = component.to_s
  325. else
  326. component = component.to_str
  327. end
  328. rescue TypeError, NoMethodError
  329. raise TypeError, "Can't convert #{component.class} into String."
  330. end if !component.is_a? String
  331. if ![String, Regexp].include?(character_class.class)
  332. raise TypeError,
  333. "Expected String or Regexp, got #{character_class.inspect}"
  334. end
  335. if character_class.kind_of?(String)
  336. character_class = /[^#{character_class}]/
  337. end
  338. if component.respond_to?(:force_encoding)
  339. # We can't perform regexps on invalid UTF sequences, but
  340. # here we need to, so switch to ASCII.
  341. component = component.dup
  342. component.force_encoding(Encoding::ASCII_8BIT)
  343. end
  344. # Avoiding gsub! because there are edge cases with frozen strings
  345. component = component.gsub(character_class) do |sequence|
  346. (sequence.unpack('C*').map { |c| "%" + ("%02x" % c).upcase }).join
  347. end
  348. if upcase_encoded.length > 0
  349. component = component.gsub(/%(#{upcase_encoded.chars.map do |char|
  350. char.unpack('C*').map { |c| '%02x' % c }.join
  351. end.join('|')})/i) { |s| s.upcase }
  352. end
  353. return component
  354. end
  355. class << self
  356. alias_method :encode_component, :encode_component
  357. end
  358. ##
  359. # Unencodes any percent encoded characters within a URI component.
  360. # This method may be used for unencoding either components or full URIs,
  361. # however, it is recommended to use the <code>unencode_component</code>
  362. # alias when unencoding components.
  363. #
  364. # @param [String, Addressable::URI, #to_str] uri
  365. # The URI or component to unencode.
  366. #
  367. # @param [Class] return_type
  368. # The type of object to return.
  369. # This value may only be set to <code>String</code> or
  370. # <code>Addressable::URI</code>. All other values are invalid. Defaults
  371. # to <code>String</code>.
  372. #
  373. # @param [String] leave_encoded
  374. # A string of characters to leave encoded. If a percent encoded character
  375. # in this list is encountered then it will remain percent encoded.
  376. #
  377. # @return [String, Addressable::URI]
  378. # The unencoded component or URI.
  379. # The return type is determined by the <code>return_type</code>
  380. # parameter.
  381. def self.unencode(uri, return_type=String, leave_encoded='')
  382. return nil if uri.nil?
  383. begin
  384. uri = uri.to_str
  385. rescue NoMethodError, TypeError
  386. raise TypeError, "Can't convert #{uri.class} into String."
  387. end if !uri.is_a? String
  388. if ![String, ::Addressable::URI].include?(return_type)
  389. raise TypeError,
  390. "Expected Class (String or Addressable::URI), " +
  391. "got #{return_type.inspect}"
  392. end
  393. result = uri.gsub(/%[0-9a-f]{2}/i) do |sequence|
  394. c = sequence[1..3].to_i(16).chr
  395. leave_encoded.include?(c) ? sequence : c
  396. end
  397. result.force_encoding("utf-8") if result.respond_to?(:force_encoding)
  398. if return_type == String
  399. return result
  400. elsif return_type == ::Addressable::URI
  401. return ::Addressable::URI.parse(result)
  402. end
  403. end
  404. class << self
  405. alias_method :unescape, :unencode
  406. alias_method :unencode_component, :unencode
  407. alias_method :unescape_component, :unencode
  408. end
  409. ##
  410. # Normalizes the encoding of a URI component.
  411. #
  412. # @param [String, #to_str] component The URI component to encode.
  413. #
  414. # @param [String, Regexp] character_class
  415. # The characters which are not percent encoded. If a <code>String</code>
  416. # is passed, the <code>String</code> must be formatted as a regular
  417. # expression character class. (Do not include the surrounding square
  418. # brackets.) For example, <code>"b-zB-Z0-9"</code> would cause
  419. # everything but the letters 'b' through 'z' and the numbers '0'
  420. # through '9' to be percent encoded. If a <code>Regexp</code> is passed,
  421. # the value <code>/[^b-zB-Z0-9]/</code> would have the same effect. A
  422. # set of useful <code>String</code> values may be found in the
  423. # <code>Addressable::URI::CharacterClasses</code> module. The default
  424. # value is the reserved plus unreserved character classes specified in
  425. # <a href="http://www.ietf.org/rfc/rfc3986.txt">RFC 3986</a>.
  426. #
  427. # @param [String] leave_encoded
  428. # When <code>character_class</code> is a <code>String</code> then
  429. # <code>leave_encoded</code> is a string of characters that should remain
  430. # percent encoded while normalizing the component; if they appear percent
  431. # encoded in the original component, then they will be upcased ("%2f"
  432. # normalized to "%2F") but otherwise left alone.
  433. #
  434. # @return [String] The normalized component.
  435. #
  436. # @example
  437. # Addressable::URI.normalize_component("simpl%65/%65xampl%65", "b-zB-Z")
  438. # => "simple%2Fex%61mple"
  439. # Addressable::URI.normalize_component(
  440. # "simpl%65/%65xampl%65", /[^b-zB-Z]/
  441. # )
  442. # => "simple%2Fex%61mple"
  443. # Addressable::URI.normalize_component(
  444. # "simpl%65/%65xampl%65",
  445. # Addressable::URI::CharacterClasses::UNRESERVED
  446. # )
  447. # => "simple%2Fexample"
  448. # Addressable::URI.normalize_component(
  449. # "one%20two%2fthree%26four",
  450. # "0-9a-zA-Z &/",
  451. # "/"
  452. # )
  453. # => "one two%2Fthree&four"
  454. def self.normalize_component(component, character_class=
  455. CharacterClasses::RESERVED + CharacterClasses::UNRESERVED,
  456. leave_encoded='')
  457. return nil if component.nil?
  458. begin
  459. component = component.to_str
  460. rescue NoMethodError, TypeError
  461. raise TypeError, "Can't convert #{component.class} into String."
  462. end if !component.is_a? String
  463. if ![String, Regexp].include?(character_class.class)
  464. raise TypeError,
  465. "Expected String or Regexp, got #{character_class.inspect}"
  466. end
  467. if character_class.kind_of?(String)
  468. leave_re = if leave_encoded.length > 0
  469. character_class << '%'
  470. "|%(?!#{leave_encoded.chars.map do |char|
  471. seq = char.unpack('C*').map { |c| '%02x' % c }.join
  472. [seq.upcase, seq.downcase]
  473. end.flatten.join('|')})"
  474. end
  475. character_class = /[^#{character_class}]#{leave_re}/
  476. end
  477. if component.respond_to?(:force_encoding)
  478. # We can't perform regexps on invalid UTF sequences, but
  479. # here we need to, so switch to ASCII.
  480. component = component.dup
  481. component.force_encoding(Encoding::ASCII_8BIT)
  482. end
  483. unencoded = self.unencode_component(component, String, leave_encoded)
  484. begin
  485. encoded = self.encode_component(
  486. Addressable::IDNA.unicode_normalize_kc(unencoded),
  487. character_class,
  488. leave_encoded
  489. )
  490. rescue ArgumentError
  491. encoded = self.encode_component(unencoded)
  492. end
  493. return encoded
  494. end
  495. ##
  496. # Percent encodes any special characters in the URI.
  497. #
  498. # @param [String, Addressable::URI, #to_str] uri
  499. # The URI to encode.
  500. #
  501. # @param [Class] return_type
  502. # The type of object to return.
  503. # This value may only be set to <code>String</code> or
  504. # <code>Addressable::URI</code>. All other values are invalid. Defaults
  505. # to <code>String</code>.
  506. #
  507. # @return [String, Addressable::URI]
  508. # The encoded URI.
  509. # The return type is determined by the <code>return_type</code>
  510. # parameter.
  511. def self.encode(uri, return_type=String)
  512. return nil if uri.nil?
  513. begin
  514. uri = uri.to_str
  515. rescue NoMethodError, TypeError
  516. raise TypeError, "Can't convert #{uri.class} into String."
  517. end if !uri.is_a? String
  518. if ![String, ::Addressable::URI].include?(return_type)
  519. raise TypeError,
  520. "Expected Class (String or Addressable::URI), " +
  521. "got #{return_type.inspect}"
  522. end
  523. uri_object = uri.kind_of?(self) ? uri : self.parse(uri)
  524. encoded_uri = Addressable::URI.new(
  525. :scheme => self.encode_component(uri_object.scheme,
  526. Addressable::URI::CharacterClasses::SCHEME),
  527. :authority => self.encode_component(uri_object.authority,
  528. Addressable::URI::CharacterClasses::AUTHORITY),
  529. :path => self.encode_component(uri_object.path,
  530. Addressable::URI::CharacterClasses::PATH),
  531. :query => self.encode_component(uri_object.query,
  532. Addressable::URI::CharacterClasses::QUERY),
  533. :fragment => self.encode_component(uri_object.fragment,
  534. Addressable::URI::CharacterClasses::FRAGMENT)
  535. )
  536. if return_type == String
  537. return encoded_uri.to_s
  538. elsif return_type == ::Addressable::URI
  539. return encoded_uri
  540. end
  541. end
  542. class << self
  543. alias_method :escape, :encode
  544. end
  545. ##
  546. # Normalizes the encoding of a URI. Characters within a hostname are
  547. # not percent encoded to allow for internationalized domain names.
  548. #
  549. # @param [String, Addressable::URI, #to_str] uri
  550. # The URI to encode.
  551. #
  552. # @param [Class] return_type
  553. # The type of object to return.
  554. # This value may only be set to <code>String</code> or
  555. # <code>Addressable::URI</code>. All other values are invalid. Defaults
  556. # to <code>String</code>.
  557. #
  558. # @return [String, Addressable::URI]
  559. # The encoded URI.
  560. # The return type is determined by the <code>return_type</code>
  561. # parameter.
  562. def self.normalized_encode(uri, return_type=String)
  563. begin
  564. uri = uri.to_str
  565. rescue NoMethodError, TypeError
  566. raise TypeError, "Can't convert #{uri.class} into String."
  567. end if !uri.is_a? String
  568. if ![String, ::Addressable::URI].include?(return_type)
  569. raise TypeError,
  570. "Expected Class (String or Addressable::URI), " +
  571. "got #{return_type.inspect}"
  572. end
  573. uri_object = uri.kind_of?(self) ? uri : self.parse(uri)
  574. components = {
  575. :scheme => self.unencode_component(uri_object.scheme),
  576. :user => self.unencode_component(uri_object.user),
  577. :password => self.unencode_component(uri_object.password),
  578. :host => self.unencode_component(uri_object.host),
  579. :port => (uri_object.port.nil? ? nil : uri_object.port.to_s),
  580. :path => self.unencode_component(uri_object.path),
  581. :query => self.unencode_component(uri_object.query),
  582. :fragment => self.unencode_component(uri_object.fragment)
  583. }
  584. components.each do |key, value|
  585. if value != nil
  586. begin
  587. components[key] =
  588. Addressable::IDNA.unicode_normalize_kc(value.to_str)
  589. rescue ArgumentError
  590. # Likely a malformed UTF-8 character, skip unicode normalization
  591. components[key] = value.to_str
  592. end
  593. end
  594. end
  595. encoded_uri = Addressable::URI.new(
  596. :scheme => self.encode_component(components[:scheme],
  597. Addressable::URI::CharacterClasses::SCHEME),
  598. :user => self.encode_component(components[:user],
  599. Addressable::URI::CharacterClasses::UNRESERVED),
  600. :password => self.encode_component(components[:password],
  601. Addressable::URI::CharacterClasses::UNRESERVED),
  602. :host => components[:host],
  603. :port => components[:port],
  604. :path => self.encode_component(components[:path],
  605. Addressable::URI::CharacterClasses::PATH),
  606. :query => self.encode_component(components[:query],
  607. Addressable::URI::CharacterClasses::QUERY),
  608. :fragment => self.encode_component(components[:fragment],
  609. Addressable::URI::CharacterClasses::FRAGMENT)
  610. )
  611. if return_type == String
  612. return encoded_uri.to_s
  613. elsif return_type == ::Addressable::URI
  614. return encoded_uri
  615. end
  616. end
  617. ##
  618. # Encodes a set of key/value pairs according to the rules for the
  619. # <code>application/x-www-form-urlencoded</code> MIME type.
  620. #
  621. # @param [#to_hash, #to_ary] form_values
  622. # The form values to encode.
  623. #
  624. # @param [TrueClass, FalseClass] sort
  625. # Sort the key/value pairs prior to encoding.
  626. # Defaults to <code>false</code>.
  627. #
  628. # @return [String]
  629. # The encoded value.
  630. def self.form_encode(form_values, sort=false)
  631. if form_values.respond_to?(:to_hash)
  632. form_values = form_values.to_hash.to_a
  633. elsif form_values.respond_to?(:to_ary)
  634. form_values = form_values.to_ary
  635. else
  636. raise TypeError, "Can't convert #{form_values.class} into Array."
  637. end
  638. form_values = form_values.inject([]) do |accu, (key, value)|
  639. if value.kind_of?(Array)
  640. value.each do |v|
  641. accu << [key.to_s, v.to_s]
  642. end
  643. else
  644. accu << [key.to_s, value.to_s]
  645. end
  646. accu
  647. end
  648. if sort
  649. # Useful for OAuth and optimizing caching systems
  650. form_values = form_values.sort
  651. end
  652. escaped_form_values = form_values.map do |(key, value)|
  653. # Line breaks are CRLF pairs
  654. [
  655. self.encode_component(
  656. key.gsub(/(\r\n|\n|\r)/, "\r\n"),
  657. CharacterClasses::UNRESERVED
  658. ).gsub("%20", "+"),
  659. self.encode_component(
  660. value.gsub(/(\r\n|\n|\r)/, "\r\n"),
  661. CharacterClasses::UNRESERVED
  662. ).gsub("%20", "+")
  663. ]
  664. end
  665. return (escaped_form_values.map do |(key, value)|
  666. "#{key}=#{value}"
  667. end).join("&")
  668. end
  669. ##
  670. # Decodes a <code>String</code> according to the rules for the
  671. # <code>application/x-www-form-urlencoded</code> MIME type.
  672. #
  673. # @param [String, #to_str] encoded_value
  674. # The form values to decode.
  675. #
  676. # @return [Array]
  677. # The decoded values.
  678. # This is not a <code>Hash</code> because of the possibility for
  679. # duplicate keys.
  680. def self.form_unencode(encoded_value)
  681. if !encoded_value.respond_to?(:to_str)
  682. raise TypeError, "Can't convert #{encoded_value.class} into String."
  683. end
  684. encoded_value = encoded_value.to_str
  685. split_values = encoded_value.split("&").map do |pair|
  686. pair.split("=", 2)
  687. end
  688. return split_values.map do |(key, value)|
  689. [
  690. key ? self.unencode_component(
  691. key.gsub("+", "%20")).gsub(/(\r\n|\n|\r)/, "\n") : nil,
  692. value ? (self.unencode_component(
  693. value.gsub("+", "%20")).gsub(/(\r\n|\n|\r)/, "\n")) : nil
  694. ]
  695. end
  696. end
  697. ##
  698. # Creates a new uri object from component parts.
  699. #
  700. # @option [String, #to_str] scheme The scheme component.
  701. # @option [String, #to_str] user The user component.
  702. # @option [String, #to_str] password The password component.
  703. # @option [String, #to_str] userinfo
  704. # The userinfo component. If this is supplied, the user and password
  705. # components must be omitted.
  706. # @option [String, #to_str] host The host component.
  707. # @option [String, #to_str] port The port component.
  708. # @option [String, #to_str] authority
  709. # The authority component. If this is supplied, the user, password,
  710. # userinfo, host, and port components must be omitted.
  711. # @option [String, #to_str] path The path component.
  712. # @option [String, #to_str] query The query component.
  713. # @option [String, #to_str] fragment The fragment component.
  714. #
  715. # @return [Addressable::URI] The constructed URI object.
  716. def initialize(options={})
  717. if options.has_key?(:authority)
  718. if (options.keys & [:userinfo, :user, :password, :host, :port]).any?
  719. raise ArgumentError,
  720. "Cannot specify both an authority and any of the components " +
  721. "within the authority."
  722. end
  723. end
  724. if options.has_key?(:userinfo)
  725. if (options.keys & [:user, :password]).any?
  726. raise ArgumentError,
  727. "Cannot specify both a userinfo and either the user or password."
  728. end
  729. end
  730. self.defer_validation do
  731. # Bunch of crazy logic required because of the composite components
  732. # like userinfo and authority.
  733. self.scheme = options[:scheme] if options[:scheme]
  734. self.user = options[:user] if options[:user]
  735. self.password = options[:password] if options[:password]
  736. self.userinfo = options[:userinfo] if options[:userinfo]
  737. self.host = options[:host] if options[:host]
  738. self.port = options[:port] if options[:port]
  739. self.authority = options[:authority] if options[:authority]
  740. self.path = options[:path] if options[:path]
  741. self.query = options[:query] if options[:query]
  742. self.query_values = options[:query_values] if options[:query_values]
  743. self.fragment = options[:fragment] if options[:fragment]
  744. end
  745. end
  746. ##
  747. # Freeze URI, initializing instance variables.
  748. #
  749. # @return [Addressable::URI] The frozen URI object.
  750. def freeze
  751. self.normalized_scheme
  752. self.normalized_user
  753. self.normalized_password
  754. self.normalized_userinfo
  755. self.normalized_host
  756. self.normalized_port
  757. self.normalized_authority
  758. self.normalized_site
  759. self.normalized_path
  760. self.normalized_query
  761. self.normalized_fragment
  762. self.hash
  763. super
  764. end
  765. ##
  766. # The scheme component for this URI.
  767. #
  768. # @return [String] The scheme component.
  769. def scheme
  770. return instance_variable_defined?(:@scheme) ? @scheme : nil
  771. end
  772. ##
  773. # The scheme component for this URI, normalized.
  774. #
  775. # @return [String] The scheme component, normalized.
  776. def normalized_scheme
  777. self.scheme && @normalized_scheme ||= (begin
  778. if self.scheme =~ /^\s*ssh\+svn\s*$/i
  779. "svn+ssh"
  780. else
  781. Addressable::URI.normalize_component(
  782. self.scheme.strip.downcase,
  783. Addressable::URI::CharacterClasses::SCHEME
  784. )
  785. end
  786. end)
  787. end
  788. ##
  789. # Sets the scheme component for this URI.
  790. #
  791. # @param [String, #to_str] new_scheme The new scheme component.
  792. def scheme=(new_scheme)
  793. if new_scheme && !new_scheme.respond_to?(:to_str)
  794. raise TypeError, "Can't convert #{new_scheme.class} into String."
  795. elsif new_scheme
  796. new_scheme = new_scheme.to_str
  797. end
  798. if new_scheme && new_scheme !~ /[a-z][a-z0-9\.\+\-]*/i
  799. raise InvalidURIError, "Invalid scheme format."
  800. end
  801. @scheme = new_scheme
  802. @scheme = nil if @scheme.to_s.strip.empty?
  803. # Reset dependant values
  804. @normalized_scheme = nil
  805. @uri_string = nil
  806. @hash = nil
  807. # Ensure we haven't created an invalid URI
  808. validate()
  809. end
  810. ##
  811. # The user component for this URI.
  812. #
  813. # @return [String] The user component.
  814. def user
  815. return instance_variable_defined?(:@user) ? @user : nil
  816. end
  817. ##
  818. # The user component for this URI, normalized.
  819. #
  820. # @return [String] The user component, normalized.
  821. def normalized_user
  822. self.user && @normalized_user ||= (begin
  823. if normalized_scheme =~ /https?/ && self.user.strip.empty? &&
  824. (!self.password || self.password.strip.empty?)
  825. nil
  826. else
  827. Addressable::URI.normalize_component(
  828. self.user.strip,
  829. Addressable::URI::CharacterClasses::UNRESERVED
  830. )
  831. end
  832. end)
  833. end
  834. ##
  835. # Sets the user component for this URI.
  836. #
  837. # @param [String, #to_str] new_user The new user component.
  838. def user=(new_user)
  839. if new_user && !new_user.respond_to?(:to_str)
  840. raise TypeError, "Can't convert #{new_user.class} into String."
  841. end
  842. @user = new_user ? new_user.to_str : nil
  843. # You can't have a nil user with a non-nil password
  844. if password != nil
  845. @user = EMPTY_STR if @user.nil?
  846. end
  847. # Reset dependant values
  848. @userinfo = nil
  849. @normalized_userinfo = nil
  850. @authority = nil
  851. @normalized_user = nil
  852. @uri_string = nil
  853. @hash = nil
  854. # Ensure we haven't created an invalid URI
  855. validate()
  856. end
  857. ##
  858. # The password component for this URI.
  859. #
  860. # @return [String] The password component.
  861. def password
  862. return instance_variable_defined?(:@password) ? @password : nil
  863. end
  864. ##
  865. # The password component for this URI, normalized.
  866. #
  867. # @return [String] The password component, normalized.
  868. def normalized_password
  869. self.password && @normalized_password ||= (begin
  870. if self.normalized_scheme =~ /https?/ && self.password.strip.empty? &&
  871. (!self.user || self.user.strip.empty?)
  872. nil
  873. else
  874. Addressable::URI.normalize_component(
  875. self.password.strip,
  876. Addressable::URI::CharacterClasses::UNRESERVED
  877. )
  878. end
  879. end)
  880. end
  881. ##
  882. # Sets the password component for this URI.
  883. #
  884. # @param [String, #to_str] new_password The new password component.
  885. def password=(new_password)
  886. if new_password && !new_password.respond_to?(:to_str)
  887. raise TypeError, "Can't convert #{new_password.class} into String."
  888. end
  889. @password = new_password ? new_password.to_str : nil
  890. # You can't have a nil user with a non-nil password
  891. @password ||= nil
  892. @user ||= nil
  893. if @password != nil
  894. @user = EMPTY_STR if @user.nil?
  895. end
  896. # Reset dependant values
  897. @userinfo = nil
  898. @normalized_userinfo = nil
  899. @authority = nil
  900. @normalized_password = nil
  901. @uri_string = nil
  902. @hash = nil
  903. # Ensure we haven't created an invalid URI
  904. validate()
  905. end
  906. ##
  907. # The userinfo component for this URI.
  908. # Combines the user and password components.
  909. #
  910. # @return [String] The userinfo component.
  911. def userinfo
  912. current_user = self.user
  913. current_password = self.password
  914. (current_user || current_password) && @userinfo ||= (begin
  915. if current_user && current_password
  916. "#{current_user}:#{current_password}"
  917. elsif current_user && !current_password
  918. "#{current_user}"
  919. end
  920. end)
  921. end
  922. ##
  923. # The userinfo component for this URI, normalized.
  924. #
  925. # @return [String] The userinfo component, normalized.
  926. def normalized_userinfo
  927. self.userinfo && @normalized_userinfo ||= (begin
  928. current_user = self.normalized_user
  929. current_password = self.normalized_password
  930. if !current_user && !current_password
  931. nil
  932. elsif current_user && current_password
  933. "#{current_user}:#{current_password}"
  934. elsif current_user && !current_password
  935. "#{current_user}"
  936. end
  937. end)
  938. end
  939. ##
  940. # Sets the userinfo component for this URI.
  941. #
  942. # @param [String, #to_str] new_userinfo The new userinfo component.
  943. def userinfo=(new_userinfo)
  944. if new_userinfo && !new_userinfo.respond_to?(:to_str)
  945. raise TypeError, "Can't convert #{new_userinfo.class} into String."
  946. end
  947. new_user, new_password = if new_userinfo
  948. [
  949. new_userinfo.to_str.strip[/^(.*):/, 1],
  950. new_userinfo.to_str.strip[/:(.*)$/, 1]
  951. ]
  952. else
  953. [nil, nil]
  954. end
  955. # Password assigned first to ensure validity in case of nil
  956. self.password = new_password
  957. self.user = new_user
  958. # Reset dependant values
  959. @authority = nil
  960. @uri_string = nil
  961. @hash = nil
  962. # Ensure we haven't created an invalid URI
  963. validate()
  964. end
  965. ##
  966. # The host component for this URI.
  967. #
  968. # @return [String] The host component.
  969. def host
  970. return instance_variable_defined?(:@host) ? @host : nil
  971. end
  972. ##
  973. # The host component for this URI, normalized.
  974. #
  975. # @return [String] The host component, normalized.
  976. def normalized_host
  977. self.host && @normalized_host ||= (begin
  978. if !self.host.strip.empty?
  979. result = ::Addressable::IDNA.to_ascii(
  980. URI.unencode_component(self.host.strip.downcase)
  981. )
  982. if result[-1..-1] == "."
  983. # Trailing dots are unnecessary
  984. result = result[0...-1]
  985. end
  986. result
  987. else
  988. EMPTY_STR
  989. end
  990. end)
  991. end
  992. ##
  993. # Sets the host component for this URI.
  994. #
  995. # @param [String, #to_str] new_host The new host component.
  996. def host=(new_host)
  997. if new_host && !new_host.respond_to?(:to_str)
  998. raise TypeError, "Can't convert #{new_host.class} into String."
  999. end
  1000. @host = new_host ? new_host.to_str : nil
  1001. unreserved = CharacterClasses::UNRESERVED
  1002. sub_delims = CharacterClasses::SUB_DELIMS
  1003. if @host != nil && (@host =~ /[<>{}\/\?\#\@]/ ||
  1004. (@host[/^\[(.*)\]$/, 1] != nil && @host[/^\[(.*)\]$/, 1] !~
  1005. Regexp.new("^[#{unreserved}#{sub_delims}:]*$")))
  1006. raise InvalidURIError, "Invalid character in host: '#{@host.to_s}'"
  1007. end
  1008. # Reset dependant values
  1009. @authority = nil
  1010. @normalized_host = nil
  1011. @uri_string = nil
  1012. @hash = nil
  1013. # Ensure we haven't created an invalid URI
  1014. validate()
  1015. end
  1016. ##
  1017. # This method is same as URI::Generic#host except
  1018. # brackets for IPv6 (and 'IPvFuture') addresses are removed.
  1019. #
  1020. # @see Addressable::URI#host
  1021. #
  1022. # @return [String] The hostname for this URI.
  1023. def hostname
  1024. v = self.host
  1025. /\A\[(.*)\]\z/ =~ v ? $1 : v
  1026. end
  1027. ##
  1028. # This method is same as URI::Generic#host= except
  1029. # the argument can be a bare IPv6 address (or 'IPvFuture').
  1030. #
  1031. # @see Addressable::URI#host=
  1032. #
  1033. # @param [String, #to_str] new_hostname The new hostname for this URI.
  1034. def hostname=(new_hostname)
  1035. if new_hostname && !new_hostname.respond_to?(:to_str)
  1036. raise TypeError, "Can't convert #{new_hostname.class} into String."
  1037. end
  1038. v = new_hostname ? new_hostname.to_str : nil
  1039. v = "[#{v}]" if /\A\[.*\]\z/ !~ v && /:/ =~ v
  1040. self.host = v
  1041. end
  1042. ##
  1043. # The authority component for this URI.
  1044. # Combines the user, password, host, and port components.
  1045. #
  1046. # @return [String] The authority component.
  1047. def authority
  1048. self.host && @authority ||= (begin
  1049. authority = ""
  1050. if self.userinfo != nil
  1051. authority << "#{self.userinfo}@"
  1052. end
  1053. authority << self.host
  1054. if self.port != nil
  1055. authority << ":#{self.port}"
  1056. end
  1057. authority
  1058. end)
  1059. end
  1060. ##
  1061. # The authority component for this URI, normalized.
  1062. #
  1063. # @return [String] The authority component, normalized.
  1064. def normalized_authority
  1065. self.authority && @normalized_authority ||= (begin
  1066. authority = ""
  1067. if self.normalized_userinfo != nil
  1068. authority << "#{self.normalized_userinfo}@"
  1069. end
  1070. authority << self.normalized_host
  1071. if self.normalized_port != nil
  1072. authority << ":#{self.normalized_port}"
  1073. end
  1074. authority
  1075. end)
  1076. end
  1077. ##
  1078. # Sets the authority component for this URI.
  1079. #
  1080. # @param [String, #to_str] new_authority The new authority component.
  1081. def authority=(new_authority)
  1082. if new_authority
  1083. if !new_authority.respond_to?(:to_str)
  1084. raise TypeError, "Can't convert #{new_authority.class} into String."
  1085. end
  1086. new_authority = new_authority.to_str
  1087. new_userinfo = new_authority[/^([^\[\]]*)@/, 1]
  1088. if new_userinfo
  1089. new_user = new_userinfo.strip[/^([^:]*):?/, 1]
  1090. new_password = new_userinfo.strip[/:(.*)$/, 1]
  1091. end
  1092. new_host = new_authority.gsub(
  1093. /^([^\[\]]*)@/, EMPTY_STR
  1094. ).gsub(
  1095. /:([^:@\[\]]*?)$/, EMPTY_STR
  1096. )
  1097. new_port =
  1098. new_authority[/:([^:@\[\]]*?)$/, 1]
  1099. end
  1100. # Password assigned first to ensure validity in case of nil
  1101. self.password = defined?(new_password) ? new_password : nil
  1102. self.user = defined?(new_user) ? new_user : nil
  1103. self.host = defined?(new_host) ? new_host : nil
  1104. self.port = defined?(new_port) ? new_port : nil
  1105. # Reset dependant values
  1106. @userinfo = nil
  1107. @normalized_userinfo = nil
  1108. @uri_string = nil
  1109. @hash = nil
  1110. # Ensure we haven't created an invalid URI
  1111. validate()
  1112. end
  1113. ##
  1114. # The origin for this URI, serialized to ASCII, as per
  1115. # RFC 6454, section 6.2.
  1116. #
  1117. # @return [String] The serialized origin.
  1118. def origin
  1119. return (if self.scheme && self.authority
  1120. if self.normalized_port
  1121. (
  1122. "#{self.normalized_scheme}://#{self.normalized_host}" +
  1123. ":#{self.normalized_port}"
  1124. )
  1125. else
  1126. "#{self.normalized_scheme}://#{self.normalized_host}"
  1127. end
  1128. else
  1129. "null"
  1130. end)
  1131. end
  1132. # Returns an array of known ip-based schemes. These schemes typically
  1133. # use a similar URI form:
  1134. # <code>//<user>:<password>@<host>:<port>/<url-path></code>
  1135. def self.ip_based_schemes
  1136. return self.port_mapping.keys
  1137. end
  1138. # Returns a hash of common IP-based schemes and their default port
  1139. # numbers. Adding new schemes to this hash, as necessary, will allow
  1140. # for better URI normalization.
  1141. def self.port_mapping
  1142. PORT_MAPPING
  1143. end
  1144. ##
  1145. # The port component for this URI.
  1146. # This is the port number actually given in the URI. This does not
  1147. # infer port numbers from default values.
  1148. #
  1149. # @return [Integer] The port component.
  1150. def port
  1151. return instance_variable_defined?(:@port) ? @port : nil
  1152. end
  1153. ##
  1154. # The port component for this URI, normalized.
  1155. #
  1156. # @return [Integer] The port component, normalized.
  1157. def normalized_port
  1158. if URI.port_mapping[self.normalized_scheme] == self.port
  1159. nil
  1160. else
  1161. self.port
  1162. end
  1163. end
  1164. ##
  1165. # Sets the port component for this URI.
  1166. #
  1167. # @param [String, Integer, #to_s] new_port The new port component.
  1168. def port=(new_port)
  1169. if new_port != nil && new_port.respond_to?(:to_str)
  1170. new_port = Addressable::URI.unencode_component(new_port.to_str)
  1171. end
  1172. if new_port != nil && !(new_port.to_s =~ /^\d+$/)
  1173. raise InvalidURIError,
  1174. "Invalid port number: #{new_port.inspect}"
  1175. end
  1176. @port = new_port.to_s.to_i
  1177. @port = nil if @port == 0
  1178. # Reset dependant values
  1179. @authority = nil
  1180. @normalized_port = nil
  1181. @uri_string = nil
  1182. @hash = nil
  1183. # Ensure we haven't created an invalid URI
  1184. validate()
  1185. end
  1186. ##
  1187. # The inferred port component for this URI.
  1188. # This method will normalize to the default port for the URI's scheme if
  1189. # the port isn't explicitly specified in the URI.
  1190. #
  1191. # @return [Integer] The inferred port component.
  1192. def inferred_port
  1193. if self.port.to_i == 0
  1194. self.default_port
  1195. else
  1196. self.port.to_i
  1197. end
  1198. end
  1199. ##
  1200. # The default port for this URI's scheme.
  1201. # This method will always returns the default port for the URI's scheme
  1202. # regardless of the presence of an explicit port in the URI.
  1203. #
  1204. # @return [Integer] The default port.
  1205. def default_port
  1206. URI.port_mapping[self.scheme.strip.downcase] if self.scheme
  1207. end
  1208. ##
  1209. # The combination of components that represent a site.
  1210. # Combines the scheme, user, password, host, and port components.
  1211. # Primarily useful for HTTP and HTTPS.
  1212. #
  1213. # For example, <code>"http://example.com/path?query"</code> would have a
  1214. # <code>site</code> value of <code>"http://example.com"</code>.
  1215. #
  1216. # @return [String] The components that identify a site.
  1217. def site
  1218. (self.scheme || self.authority) && @site ||= (begin
  1219. site_string = ""
  1220. site_string << "#{self.scheme}:" if self.scheme != nil
  1221. site_string << "//#{self.authority}" if self.authority != nil
  1222. site_string
  1223. end)
  1224. end
  1225. ##
  1226. # The normalized combination of components that represent a site.
  1227. # Combines the scheme, user, password, host, and port components.
  1228. # Primarily useful for HTTP and HTTPS.
  1229. #
  1230. # For example, <code>"http://example.com/path?query"</code> would have a
  1231. # <code>site</code> value of <code>"http://example.com"</code>.
  1232. #
  1233. # @return [String] The normalized components that identify a site.
  1234. def normalized_site
  1235. self.site && @normalized_site ||= (begin
  1236. site_string = ""
  1237. if self.normalized_scheme != nil
  1238. site_string << "#{self.normalized_scheme}:"
  1239. end
  1240. if self.normalized_authority != nil
  1241. site_string << "//#{self.normalized_authority}"
  1242. end
  1243. site_string
  1244. end)
  1245. end
  1246. ##
  1247. # Sets the site value for this URI.
  1248. #
  1249. # @param [String, #to_str] new_site The new site value.
  1250. def site=(new_site)
  1251. if new_site
  1252. if !new_site.respond_to?(:to_str)
  1253. raise TypeError, "Can't convert #{new_site.class} into String."
  1254. end
  1255. new_site = new_site.to_str
  1256. # These two regular expressions derived from the primary parsing
  1257. # expression
  1258. self.scheme = new_site[/^(?:([^:\/?#]+):)?(?:\/\/(?:[^\/?#]*))?$/, 1]
  1259. self.authority = new_site[
  1260. /^(?:(?:[^:\/?#]+):)?(?:\/\/([^\/?#]*))?$/, 1
  1261. ]
  1262. else
  1263. self.scheme = nil
  1264. self.authority = nil
  1265. end
  1266. end
  1267. ##
  1268. # The path component for this URI.
  1269. #
  1270. # @return [String] The path component.
  1271. def path
  1272. return instance_variable_defined?(:@path) ? @path : EMPTY_STR
  1273. end
  1274. NORMPATH = /^(?!\/)[^\/:]*:.*$/
  1275. ##
  1276. # The path component for this URI, normalized.
  1277. #
  1278. # @return [String] The path component, normalized.
  1279. def normalized_path
  1280. @normalized_path ||= (begin
  1281. path = self.path.to_s
  1282. if self.scheme == nil && path =~ NORMPATH
  1283. # Relative paths with colons in the first segment are ambiguous.
  1284. path = path.sub(":", "%2F")
  1285. end
  1286. # String#split(delimeter, -1) uses the more strict splitting behavior
  1287. # found by default in Python.
  1288. result = (path.strip.split(SLASH, -1).map do |segment|
  1289. Addressable::URI.normalize_component(
  1290. segment,
  1291. Addressable::URI::CharacterClasses::PCHAR
  1292. )
  1293. end).join(SLASH)
  1294. result = URI.normalize_path(result)
  1295. if result.empty? &&
  1296. ["http", "https", "ftp", "tftp"].include?(self.normalized_scheme)
  1297. result = SLASH
  1298. end
  1299. result
  1300. end)
  1301. end
  1302. ##
  1303. # Sets the path component for this URI.
  1304. #
  1305. # @param [String, #to_str] new_path The new path component.
  1306. def path=(new_path)
  1307. if new_path && !new_path.respond_to?(:to_str)
  1308. raise TypeError, "Can't convert #{new_path.class} into String."
  1309. end
  1310. @path = (new_path || EMPTY_STR).to_str
  1311. if !@path.empty? && @path[0..0] != SLASH && host != nil
  1312. @path = "/#{@path}"
  1313. end
  1314. # Reset dependant values
  1315. @normalized_path = nil
  1316. @uri_string = nil
  1317. @hash = nil
  1318. end
  1319. ##
  1320. # The basename, if any, of the file in the path component.
  1321. #
  1322. # @return [String] The path's basename.
  1323. def basename
  1324. # Path cannot be nil
  1325. return File.basename(self.path).gsub(/;[^\/]*$/, EMPTY_STR)
  1326. end
  1327. ##
  1328. # The extname, if any, of the file in the path component.
  1329. # Empty string if there is no extension.
  1330. #
  1331. # @return [String] The path's extname.
  1332. def extname
  1333. return nil unless self.path
  1334. return File.extname(self.basename)
  1335. end
  1336. ##
  1337. # The query component for this URI.
  1338. #
  1339. # @return [String] The query component.
  1340. def query
  1341. return instance_variable_defined?(:@query) ? @query : nil
  1342. end
  1343. ##
  1344. # The query component for this URI, normalized.
  1345. #
  1346. # @return [String] The query component, normalized.
  1347. def normalized_query
  1348. self.query && @normalized_query ||= (begin
  1349. modified_query_class = Addressable::URI::CharacterClasses::QUERY
  1350. # Make sure possible key-value pair delimiters are escaped.
  1351. modified_query_class = modified_query_class.sub("\\&", "")
  1352. modified_query_class = modified_query_class.sub("\\;", "")
  1353. (self.query.split("&", -1).map do |pair|
  1354. Addressable::URI.normalize_component(
  1355. pair,
  1356. modified_query_class,
  1357. '+'
  1358. )
  1359. end).join("&")
  1360. end)
  1361. end
  1362. ##
  1363. # Sets the query component for this URI.
  1364. #
  1365. # @param [String, #to_str] new_query The new query component.
  1366. def query=(new_query)
  1367. if new_query && !new_query.respond_to?(:to_str)
  1368. raise TypeError, "Can't convert #{new_query.class} into String."
  1369. end
  1370. @query = new_query ? new_query.to_str : nil
  1371. # Reset dependant values
  1372. @normalized_query = nil
  1373. @uri_string = nil
  1374. @hash = nil
  1375. end
  1376. ##
  1377. # Converts the query component to a Hash value.
  1378. #
  1379. # @param [Class] return_type The return type desired. Value must be either
  1380. # `Hash` or `Array`.
  1381. #
  1382. # @return [Hash, Array] The query string parsed as a Hash or Array object.
  1383. #
  1384. # @example
  1385. # Addressable::URI.parse("?one=1&two=2&three=3").query_values
  1386. # #=> {"one" => "1", "two" => "2", "three" => "3"}
  1387. # Addressable::URI.parse("?one=two&one=three").query_values(Array)
  1388. # #=> [["one", "two"], ["one", "three"]]
  1389. # Addressable::URI.parse("?one=two&one=three").query_values(Hash)
  1390. # #=> {"one" => "three"}
  1391. def query_values(return_type=Hash)
  1392. empty_accumulator = Array == return_type ? [] : {}
  1393. if return_type != Hash && return_type != Array
  1394. raise ArgumentError, "Invalid return type. Must be Hash or Array."
  1395. end
  1396. return nil if self.query == nil
  1397. split_query = (self.query.split("&").map do |pair|
  1398. pair.split("=", 2) if pair && !pair.empty?
  1399. end).compact
  1400. return split_query.inject(empty_accumulator.dup) do |accu, pair|
  1401. # I'd rather use key/value identifiers instead of array lookups,
  1402. # but in this case I really want to maintain the exact pair structure,
  1403. # so it's best to m…

Large files files are truncated, but you can click here to view the full file