PageRenderTime 61ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 1ms

/lib/uri/generic.rb

http://github.com/ruby/ruby
Ruby | 1567 lines | 748 code | 146 blank | 673 comment | 174 complexity | e33b4954b006fda449ab21729e9ff038 MD5 | raw file
Possible License(s): GPL-2.0, BSD-3-Clause, AGPL-3.0
  1. # frozen_string_literal: true
  2. # = uri/generic.rb
  3. #
  4. # Author:: Akira Yamada <akira@ruby-lang.org>
  5. # License:: You can redistribute it and/or modify it under the same term as Ruby.
  6. #
  7. # See URI for general documentation
  8. #
  9. require_relative 'common'
  10. autoload :IPSocket, 'socket'
  11. autoload :IPAddr, 'ipaddr'
  12. module URI
  13. #
  14. # Base class for all URI classes.
  15. # Implements generic URI syntax as per RFC 2396.
  16. #
  17. class Generic
  18. include URI
  19. #
  20. # A Default port of nil for URI::Generic.
  21. #
  22. DEFAULT_PORT = nil
  23. #
  24. # Returns default port.
  25. #
  26. def self.default_port
  27. self::DEFAULT_PORT
  28. end
  29. #
  30. # Returns default port.
  31. #
  32. def default_port
  33. self.class.default_port
  34. end
  35. #
  36. # An Array of the available components for URI::Generic.
  37. #
  38. COMPONENT = [
  39. :scheme,
  40. :userinfo, :host, :port, :registry,
  41. :path, :opaque,
  42. :query,
  43. :fragment
  44. ].freeze
  45. #
  46. # Components of the URI in the order.
  47. #
  48. def self.component
  49. self::COMPONENT
  50. end
  51. USE_REGISTRY = false # :nodoc:
  52. def self.use_registry # :nodoc:
  53. self::USE_REGISTRY
  54. end
  55. #
  56. # == Synopsis
  57. #
  58. # See ::new.
  59. #
  60. # == Description
  61. #
  62. # At first, tries to create a new URI::Generic instance using
  63. # URI::Generic::build. But, if exception URI::InvalidComponentError is raised,
  64. # then it does URI::Escape.escape all URI components and tries again.
  65. #
  66. def self.build2(args)
  67. begin
  68. return self.build(args)
  69. rescue InvalidComponentError
  70. if args.kind_of?(Array)
  71. return self.build(args.collect{|x|
  72. if x.is_a?(String)
  73. DEFAULT_PARSER.escape(x)
  74. else
  75. x
  76. end
  77. })
  78. elsif args.kind_of?(Hash)
  79. tmp = {}
  80. args.each do |key, value|
  81. tmp[key] = if value
  82. DEFAULT_PARSER.escape(value)
  83. else
  84. value
  85. end
  86. end
  87. return self.build(tmp)
  88. end
  89. end
  90. end
  91. #
  92. # == Synopsis
  93. #
  94. # See ::new.
  95. #
  96. # == Description
  97. #
  98. # Creates a new URI::Generic instance from components of URI::Generic
  99. # with check. Components are: scheme, userinfo, host, port, registry, path,
  100. # opaque, query, and fragment. You can provide arguments either by an Array or a Hash.
  101. # See ::new for hash keys to use or for order of array items.
  102. #
  103. def self.build(args)
  104. if args.kind_of?(Array) &&
  105. args.size == ::URI::Generic::COMPONENT.size
  106. tmp = args.dup
  107. elsif args.kind_of?(Hash)
  108. tmp = ::URI::Generic::COMPONENT.collect do |c|
  109. if args.include?(c)
  110. args[c]
  111. else
  112. nil
  113. end
  114. end
  115. else
  116. component = self.class.component rescue ::URI::Generic::COMPONENT
  117. raise ArgumentError,
  118. "expected Array of or Hash of components of #{self.class} (#{component.join(', ')})"
  119. end
  120. tmp << nil
  121. tmp << true
  122. return self.new(*tmp)
  123. end
  124. #
  125. # == Args
  126. #
  127. # +scheme+::
  128. # Protocol scheme, i.e. 'http','ftp','mailto' and so on.
  129. # +userinfo+::
  130. # User name and password, i.e. 'sdmitry:bla'.
  131. # +host+::
  132. # Server host name.
  133. # +port+::
  134. # Server port.
  135. # +registry+::
  136. # Registry of naming authorities.
  137. # +path+::
  138. # Path on server.
  139. # +opaque+::
  140. # Opaque part.
  141. # +query+::
  142. # Query data.
  143. # +fragment+::
  144. # Part of the URI after '#' character.
  145. # +parser+::
  146. # Parser for internal use [URI::DEFAULT_PARSER by default].
  147. # +arg_check+::
  148. # Check arguments [false by default].
  149. #
  150. # == Description
  151. #
  152. # Creates a new URI::Generic instance from ``generic'' components without check.
  153. #
  154. def initialize(scheme,
  155. userinfo, host, port, registry,
  156. path, opaque,
  157. query,
  158. fragment,
  159. parser = DEFAULT_PARSER,
  160. arg_check = false)
  161. @scheme = nil
  162. @user = nil
  163. @password = nil
  164. @host = nil
  165. @port = nil
  166. @path = nil
  167. @query = nil
  168. @opaque = nil
  169. @fragment = nil
  170. @parser = parser == DEFAULT_PARSER ? nil : parser
  171. if arg_check
  172. self.scheme = scheme
  173. self.userinfo = userinfo
  174. self.hostname = host
  175. self.port = port
  176. self.path = path
  177. self.query = query
  178. self.opaque = opaque
  179. self.fragment = fragment
  180. else
  181. self.set_scheme(scheme)
  182. self.set_userinfo(userinfo)
  183. self.set_host(host)
  184. self.set_port(port)
  185. self.set_path(path)
  186. self.query = query
  187. self.set_opaque(opaque)
  188. self.fragment=(fragment)
  189. end
  190. if registry
  191. raise InvalidURIError,
  192. "the scheme #{@scheme} does not accept registry part: #{registry} (or bad hostname?)"
  193. end
  194. @scheme&.freeze
  195. self.set_path('') if !@path && !@opaque # (see RFC2396 Section 5.2)
  196. self.set_port(self.default_port) if self.default_port && !@port
  197. end
  198. #
  199. # Returns the scheme component of the URI.
  200. #
  201. # URI("http://foo/bar/baz").scheme #=> "http"
  202. #
  203. attr_reader :scheme
  204. # Returns the host component of the URI.
  205. #
  206. # URI("http://foo/bar/baz").host #=> "foo"
  207. #
  208. # It returns nil if no host component exists.
  209. #
  210. # URI("mailto:foo@example.org").host #=> nil
  211. #
  212. # The component does not contain the port number.
  213. #
  214. # URI("http://foo:8080/bar/baz").host #=> "foo"
  215. #
  216. # Since IPv6 addresses are wrapped with brackets in URIs,
  217. # this method returns IPv6 addresses wrapped with brackets.
  218. # This form is not appropriate to pass to socket methods such as TCPSocket.open.
  219. # If unwrapped host names are required, use the #hostname method.
  220. #
  221. # URI("http://[::1]/bar/baz").host #=> "[::1]"
  222. # URI("http://[::1]/bar/baz").hostname #=> "::1"
  223. #
  224. attr_reader :host
  225. # Returns the port component of the URI.
  226. #
  227. # URI("http://foo/bar/baz").port #=> 80
  228. # URI("http://foo:8080/bar/baz").port #=> 8080
  229. #
  230. attr_reader :port
  231. def registry # :nodoc:
  232. nil
  233. end
  234. # Returns the path component of the URI.
  235. #
  236. # URI("http://foo/bar/baz").path #=> "/bar/baz"
  237. #
  238. attr_reader :path
  239. # Returns the query component of the URI.
  240. #
  241. # URI("http://foo/bar/baz?search=FooBar").query #=> "search=FooBar"
  242. #
  243. attr_reader :query
  244. # Returns the opaque part of the URI.
  245. #
  246. # URI("mailto:foo@example.org").opaque #=> "foo@example.org"
  247. # URI("http://foo/bar/baz").opaque #=> nil
  248. #
  249. # The portion of the path that does not make use of the slash '/'.
  250. # The path typically refers to an absolute path or an opaque part.
  251. # (See RFC2396 Section 3 and 5.2.)
  252. #
  253. attr_reader :opaque
  254. # Returns the fragment component of the URI.
  255. #
  256. # URI("http://foo/bar/baz?search=FooBar#ponies").fragment #=> "ponies"
  257. #
  258. attr_reader :fragment
  259. # Returns the parser to be used.
  260. #
  261. # Unless a URI::Parser is defined, DEFAULT_PARSER is used.
  262. #
  263. def parser
  264. if !defined?(@parser) || !@parser
  265. DEFAULT_PARSER
  266. else
  267. @parser || DEFAULT_PARSER
  268. end
  269. end
  270. # Replaces self by other URI object.
  271. #
  272. def replace!(oth)
  273. if self.class != oth.class
  274. raise ArgumentError, "expected #{self.class} object"
  275. end
  276. component.each do |c|
  277. self.__send__("#{c}=", oth.__send__(c))
  278. end
  279. end
  280. private :replace!
  281. #
  282. # Components of the URI in the order.
  283. #
  284. def component
  285. self.class.component
  286. end
  287. #
  288. # Checks the scheme +v+ component against the URI::Parser Regexp for :SCHEME.
  289. #
  290. def check_scheme(v)
  291. if v && parser.regexp[:SCHEME] !~ v
  292. raise InvalidComponentError,
  293. "bad component(expected scheme component): #{v}"
  294. end
  295. return true
  296. end
  297. private :check_scheme
  298. # Protected setter for the scheme component +v+.
  299. #
  300. # See also URI::Generic.scheme=.
  301. #
  302. def set_scheme(v)
  303. @scheme = v&.downcase
  304. end
  305. protected :set_scheme
  306. #
  307. # == Args
  308. #
  309. # +v+::
  310. # String
  311. #
  312. # == Description
  313. #
  314. # Public setter for the scheme component +v+
  315. # (with validation).
  316. #
  317. # See also URI::Generic.check_scheme.
  318. #
  319. # == Usage
  320. #
  321. # require 'uri'
  322. #
  323. # uri = URI.parse("http://my.example.com")
  324. # uri.scheme = "https"
  325. # uri.to_s #=> "https://my.example.com"
  326. #
  327. def scheme=(v)
  328. check_scheme(v)
  329. set_scheme(v)
  330. v
  331. end
  332. #
  333. # Checks the +user+ and +password+.
  334. #
  335. # If +password+ is not provided, then +user+ is
  336. # split, using URI::Generic.split_userinfo, to
  337. # pull +user+ and +password.
  338. #
  339. # See also URI::Generic.check_user, URI::Generic.check_password.
  340. #
  341. def check_userinfo(user, password = nil)
  342. if !password
  343. user, password = split_userinfo(user)
  344. end
  345. check_user(user)
  346. check_password(password, user)
  347. return true
  348. end
  349. private :check_userinfo
  350. #
  351. # Checks the user +v+ component for RFC2396 compliance
  352. # and against the URI::Parser Regexp for :USERINFO.
  353. #
  354. # Can not have a registry or opaque component defined,
  355. # with a user component defined.
  356. #
  357. def check_user(v)
  358. if @opaque
  359. raise InvalidURIError,
  360. "can not set user with opaque"
  361. end
  362. return v unless v
  363. if parser.regexp[:USERINFO] !~ v
  364. raise InvalidComponentError,
  365. "bad component(expected userinfo component or user component): #{v}"
  366. end
  367. return true
  368. end
  369. private :check_user
  370. #
  371. # Checks the password +v+ component for RFC2396 compliance
  372. # and against the URI::Parser Regexp for :USERINFO.
  373. #
  374. # Can not have a registry or opaque component defined,
  375. # with a user component defined.
  376. #
  377. def check_password(v, user = @user)
  378. if @opaque
  379. raise InvalidURIError,
  380. "can not set password with opaque"
  381. end
  382. return v unless v
  383. if !user
  384. raise InvalidURIError,
  385. "password component depends user component"
  386. end
  387. if parser.regexp[:USERINFO] !~ v
  388. raise InvalidComponentError,
  389. "bad password component"
  390. end
  391. return true
  392. end
  393. private :check_password
  394. #
  395. # Sets userinfo, argument is string like 'name:pass'.
  396. #
  397. def userinfo=(userinfo)
  398. if userinfo.nil?
  399. return nil
  400. end
  401. check_userinfo(*userinfo)
  402. set_userinfo(*userinfo)
  403. # returns userinfo
  404. end
  405. #
  406. # == Args
  407. #
  408. # +v+::
  409. # String
  410. #
  411. # == Description
  412. #
  413. # Public setter for the +user+ component
  414. # (with validation).
  415. #
  416. # See also URI::Generic.check_user.
  417. #
  418. # == Usage
  419. #
  420. # require 'uri'
  421. #
  422. # uri = URI.parse("http://john:S3nsit1ve@my.example.com")
  423. # uri.user = "sam"
  424. # uri.to_s #=> "http://sam:V3ry_S3nsit1ve@my.example.com"
  425. #
  426. def user=(user)
  427. check_user(user)
  428. set_user(user)
  429. # returns user
  430. end
  431. #
  432. # == Args
  433. #
  434. # +v+::
  435. # String
  436. #
  437. # == Description
  438. #
  439. # Public setter for the +password+ component
  440. # (with validation).
  441. #
  442. # See also URI::Generic.check_password.
  443. #
  444. # == Usage
  445. #
  446. # require 'uri'
  447. #
  448. # uri = URI.parse("http://john:S3nsit1ve@my.example.com")
  449. # uri.password = "V3ry_S3nsit1ve"
  450. # uri.to_s #=> "http://john:V3ry_S3nsit1ve@my.example.com"
  451. #
  452. def password=(password)
  453. check_password(password)
  454. set_password(password)
  455. # returns password
  456. end
  457. # Protected setter for the +user+ component, and +password+ if available
  458. # (with validation).
  459. #
  460. # See also URI::Generic.userinfo=.
  461. #
  462. def set_userinfo(user, password = nil)
  463. unless password
  464. user, password = split_userinfo(user)
  465. end
  466. @user = user
  467. @password = password if password
  468. [@user, @password]
  469. end
  470. protected :set_userinfo
  471. # Protected setter for the user component +v+.
  472. #
  473. # See also URI::Generic.user=.
  474. #
  475. def set_user(v)
  476. set_userinfo(v, @password)
  477. v
  478. end
  479. protected :set_user
  480. # Protected setter for the password component +v+.
  481. #
  482. # See also URI::Generic.password=.
  483. #
  484. def set_password(v)
  485. @password = v
  486. # returns v
  487. end
  488. protected :set_password
  489. # Returns the userinfo +ui+ as <code>[user, password]</code>
  490. # if properly formatted as 'user:password'.
  491. def split_userinfo(ui)
  492. return nil, nil unless ui
  493. user, password = ui.split(':', 2)
  494. return user, password
  495. end
  496. private :split_userinfo
  497. # Escapes 'user:password' +v+ based on RFC 1738 section 3.1.
  498. def escape_userpass(v)
  499. parser.escape(v, /[@:\/]/o) # RFC 1738 section 3.1 #/
  500. end
  501. private :escape_userpass
  502. # Returns the userinfo, either as 'user' or 'user:password'.
  503. def userinfo
  504. if @user.nil?
  505. nil
  506. elsif @password.nil?
  507. @user
  508. else
  509. @user + ':' + @password
  510. end
  511. end
  512. # Returns the user component.
  513. def user
  514. @user
  515. end
  516. # Returns the password component.
  517. def password
  518. @password
  519. end
  520. #
  521. # Checks the host +v+ component for RFC2396 compliance
  522. # and against the URI::Parser Regexp for :HOST.
  523. #
  524. # Can not have a registry or opaque component defined,
  525. # with a host component defined.
  526. #
  527. def check_host(v)
  528. return v unless v
  529. if @opaque
  530. raise InvalidURIError,
  531. "can not set host with registry or opaque"
  532. elsif parser.regexp[:HOST] !~ v
  533. raise InvalidComponentError,
  534. "bad component(expected host component): #{v}"
  535. end
  536. return true
  537. end
  538. private :check_host
  539. # Protected setter for the host component +v+.
  540. #
  541. # See also URI::Generic.host=.
  542. #
  543. def set_host(v)
  544. @host = v
  545. end
  546. protected :set_host
  547. #
  548. # == Args
  549. #
  550. # +v+::
  551. # String
  552. #
  553. # == Description
  554. #
  555. # Public setter for the host component +v+
  556. # (with validation).
  557. #
  558. # See also URI::Generic.check_host.
  559. #
  560. # == Usage
  561. #
  562. # require 'uri'
  563. #
  564. # uri = URI.parse("http://my.example.com")
  565. # uri.host = "foo.com"
  566. # uri.to_s #=> "http://foo.com"
  567. #
  568. def host=(v)
  569. check_host(v)
  570. set_host(v)
  571. v
  572. end
  573. # Extract the host part of the URI and unwrap brackets for IPv6 addresses.
  574. #
  575. # This method is the same as URI::Generic#host except
  576. # brackets for IPv6 (and future IP) addresses are removed.
  577. #
  578. # uri = URI("http://[::1]/bar")
  579. # uri.hostname #=> "::1"
  580. # uri.host #=> "[::1]"
  581. #
  582. def hostname
  583. v = self.host
  584. /\A\[(.*)\]\z/ =~ v ? $1 : v
  585. end
  586. # Sets the host part of the URI as the argument with brackets for IPv6 addresses.
  587. #
  588. # This method is the same as URI::Generic#host= except
  589. # the argument can be a bare IPv6 address.
  590. #
  591. # uri = URI("http://foo/bar")
  592. # uri.hostname = "::1"
  593. # uri.to_s #=> "http://[::1]/bar"
  594. #
  595. # If the argument seems to be an IPv6 address,
  596. # it is wrapped with brackets.
  597. #
  598. def hostname=(v)
  599. v = "[#{v}]" if /\A\[.*\]\z/ !~ v && /:/ =~ v
  600. self.host = v
  601. end
  602. #
  603. # Checks the port +v+ component for RFC2396 compliance
  604. # and against the URI::Parser Regexp for :PORT.
  605. #
  606. # Can not have a registry or opaque component defined,
  607. # with a port component defined.
  608. #
  609. def check_port(v)
  610. return v unless v
  611. if @opaque
  612. raise InvalidURIError,
  613. "can not set port with registry or opaque"
  614. elsif !v.kind_of?(Integer) && parser.regexp[:PORT] !~ v
  615. raise InvalidComponentError,
  616. "bad component(expected port component): #{v.inspect}"
  617. end
  618. return true
  619. end
  620. private :check_port
  621. # Protected setter for the port component +v+.
  622. #
  623. # See also URI::Generic.port=.
  624. #
  625. def set_port(v)
  626. v = v.empty? ? nil : v.to_i unless !v || v.kind_of?(Integer)
  627. @port = v
  628. end
  629. protected :set_port
  630. #
  631. # == Args
  632. #
  633. # +v+::
  634. # String
  635. #
  636. # == Description
  637. #
  638. # Public setter for the port component +v+
  639. # (with validation).
  640. #
  641. # See also URI::Generic.check_port.
  642. #
  643. # == Usage
  644. #
  645. # require 'uri'
  646. #
  647. # uri = URI.parse("http://my.example.com")
  648. # uri.port = 8080
  649. # uri.to_s #=> "http://my.example.com:8080"
  650. #
  651. def port=(v)
  652. check_port(v)
  653. set_port(v)
  654. port
  655. end
  656. def check_registry(v) # :nodoc:
  657. raise InvalidURIError, "can not set registry"
  658. end
  659. private :check_registry
  660. def set_registry(v) #:nodoc:
  661. raise InvalidURIError, "can not set registry"
  662. end
  663. protected :set_registry
  664. def registry=(v)
  665. raise InvalidURIError, "can not set registry"
  666. end
  667. #
  668. # Checks the path +v+ component for RFC2396 compliance
  669. # and against the URI::Parser Regexp
  670. # for :ABS_PATH and :REL_PATH.
  671. #
  672. # Can not have a opaque component defined,
  673. # with a path component defined.
  674. #
  675. def check_path(v)
  676. # raise if both hier and opaque are not nil, because:
  677. # absoluteURI = scheme ":" ( hier_part | opaque_part )
  678. # hier_part = ( net_path | abs_path ) [ "?" query ]
  679. if v && @opaque
  680. raise InvalidURIError,
  681. "path conflicts with opaque"
  682. end
  683. # If scheme is ftp, path may be relative.
  684. # See RFC 1738 section 3.2.2, and RFC 2396.
  685. if @scheme && @scheme != "ftp"
  686. if v && v != '' && parser.regexp[:ABS_PATH] !~ v
  687. raise InvalidComponentError,
  688. "bad component(expected absolute path component): #{v}"
  689. end
  690. else
  691. if v && v != '' && parser.regexp[:ABS_PATH] !~ v &&
  692. parser.regexp[:REL_PATH] !~ v
  693. raise InvalidComponentError,
  694. "bad component(expected relative path component): #{v}"
  695. end
  696. end
  697. return true
  698. end
  699. private :check_path
  700. # Protected setter for the path component +v+.
  701. #
  702. # See also URI::Generic.path=.
  703. #
  704. def set_path(v)
  705. @path = v
  706. end
  707. protected :set_path
  708. #
  709. # == Args
  710. #
  711. # +v+::
  712. # String
  713. #
  714. # == Description
  715. #
  716. # Public setter for the path component +v+
  717. # (with validation).
  718. #
  719. # See also URI::Generic.check_path.
  720. #
  721. # == Usage
  722. #
  723. # require 'uri'
  724. #
  725. # uri = URI.parse("http://my.example.com/pub/files")
  726. # uri.path = "/faq/"
  727. # uri.to_s #=> "http://my.example.com/faq/"
  728. #
  729. def path=(v)
  730. check_path(v)
  731. set_path(v)
  732. v
  733. end
  734. #
  735. # == Args
  736. #
  737. # +v+::
  738. # String
  739. #
  740. # == Description
  741. #
  742. # Public setter for the query component +v+.
  743. #
  744. # == Usage
  745. #
  746. # require 'uri'
  747. #
  748. # uri = URI.parse("http://my.example.com/?id=25")
  749. # uri.query = "id=1"
  750. # uri.to_s #=> "http://my.example.com/?id=1"
  751. #
  752. def query=(v)
  753. return @query = nil unless v
  754. raise InvalidURIError, "query conflicts with opaque" if @opaque
  755. x = v.to_str
  756. v = x.dup if x.equal? v
  757. v.encode!(Encoding::UTF_8) rescue nil
  758. v.delete!("\t\r\n")
  759. v.force_encoding(Encoding::ASCII_8BIT)
  760. raise InvalidURIError, "invalid percent escape: #{$1}" if /(%\H\H)/n.match(v)
  761. v.gsub!(/(?!%\h\h|[!$-&(-;=?-_a-~])./n.freeze){'%%%02X' % $&.ord}
  762. v.force_encoding(Encoding::US_ASCII)
  763. @query = v
  764. end
  765. #
  766. # Checks the opaque +v+ component for RFC2396 compliance and
  767. # against the URI::Parser Regexp for :OPAQUE.
  768. #
  769. # Can not have a host, port, user, or path component defined,
  770. # with an opaque component defined.
  771. #
  772. def check_opaque(v)
  773. return v unless v
  774. # raise if both hier and opaque are not nil, because:
  775. # absoluteURI = scheme ":" ( hier_part | opaque_part )
  776. # hier_part = ( net_path | abs_path ) [ "?" query ]
  777. if @host || @port || @user || @path # userinfo = @user + ':' + @password
  778. raise InvalidURIError,
  779. "can not set opaque with host, port, userinfo or path"
  780. elsif v && parser.regexp[:OPAQUE] !~ v
  781. raise InvalidComponentError,
  782. "bad component(expected opaque component): #{v}"
  783. end
  784. return true
  785. end
  786. private :check_opaque
  787. # Protected setter for the opaque component +v+.
  788. #
  789. # See also URI::Generic.opaque=.
  790. #
  791. def set_opaque(v)
  792. @opaque = v
  793. end
  794. protected :set_opaque
  795. #
  796. # == Args
  797. #
  798. # +v+::
  799. # String
  800. #
  801. # == Description
  802. #
  803. # Public setter for the opaque component +v+
  804. # (with validation).
  805. #
  806. # See also URI::Generic.check_opaque.
  807. #
  808. def opaque=(v)
  809. check_opaque(v)
  810. set_opaque(v)
  811. v
  812. end
  813. #
  814. # Checks the fragment +v+ component against the URI::Parser Regexp for :FRAGMENT.
  815. #
  816. #
  817. # == Args
  818. #
  819. # +v+::
  820. # String
  821. #
  822. # == Description
  823. #
  824. # Public setter for the fragment component +v+
  825. # (with validation).
  826. #
  827. # == Usage
  828. #
  829. # require 'uri'
  830. #
  831. # uri = URI.parse("http://my.example.com/?id=25#time=1305212049")
  832. # uri.fragment = "time=1305212086"
  833. # uri.to_s #=> "http://my.example.com/?id=25#time=1305212086"
  834. #
  835. def fragment=(v)
  836. return @fragment = nil unless v
  837. x = v.to_str
  838. v = x.dup if x.equal? v
  839. v.encode!(Encoding::UTF_8) rescue nil
  840. v.delete!("\t\r\n")
  841. v.force_encoding(Encoding::ASCII_8BIT)
  842. v.gsub!(/(?!%\h\h|[!-~])./n){'%%%02X' % $&.ord}
  843. v.force_encoding(Encoding::US_ASCII)
  844. @fragment = v
  845. end
  846. #
  847. # Returns true if URI is hierarchical.
  848. #
  849. # == Description
  850. #
  851. # URI has components listed in order of decreasing significance from left to right,
  852. # see RFC3986 https://tools.ietf.org/html/rfc3986 1.2.3.
  853. #
  854. # == Usage
  855. #
  856. # require 'uri'
  857. #
  858. # uri = URI.parse("http://my.example.com/")
  859. # uri.hierarchical?
  860. # #=> true
  861. # uri = URI.parse("mailto:joe@example.com")
  862. # uri.hierarchical?
  863. # #=> false
  864. #
  865. def hierarchical?
  866. if @path
  867. true
  868. else
  869. false
  870. end
  871. end
  872. #
  873. # Returns true if URI has a scheme (e.g. http:// or https://) specified.
  874. #
  875. def absolute?
  876. if @scheme
  877. true
  878. else
  879. false
  880. end
  881. end
  882. alias absolute absolute?
  883. #
  884. # Returns true if URI does not have a scheme (e.g. http:// or https://) specified.
  885. #
  886. def relative?
  887. !absolute?
  888. end
  889. #
  890. # Returns an Array of the path split on '/'.
  891. #
  892. def split_path(path)
  893. path.split("/", -1)
  894. end
  895. private :split_path
  896. #
  897. # Merges a base path +base+, with relative path +rel+,
  898. # returns a modified base path.
  899. #
  900. def merge_path(base, rel)
  901. # RFC2396, Section 5.2, 5)
  902. # RFC2396, Section 5.2, 6)
  903. base_path = split_path(base)
  904. rel_path = split_path(rel)
  905. # RFC2396, Section 5.2, 6), a)
  906. base_path << '' if base_path.last == '..'
  907. while i = base_path.index('..')
  908. base_path.slice!(i - 1, 2)
  909. end
  910. if (first = rel_path.first) and first.empty?
  911. base_path.clear
  912. rel_path.shift
  913. end
  914. # RFC2396, Section 5.2, 6), c)
  915. # RFC2396, Section 5.2, 6), d)
  916. rel_path.push('') if rel_path.last == '.' || rel_path.last == '..'
  917. rel_path.delete('.')
  918. # RFC2396, Section 5.2, 6), e)
  919. tmp = []
  920. rel_path.each do |x|
  921. if x == '..' &&
  922. !(tmp.empty? || tmp.last == '..')
  923. tmp.pop
  924. else
  925. tmp << x
  926. end
  927. end
  928. add_trailer_slash = !tmp.empty?
  929. if base_path.empty?
  930. base_path = [''] # keep '/' for root directory
  931. elsif add_trailer_slash
  932. base_path.pop
  933. end
  934. while x = tmp.shift
  935. if x == '..'
  936. # RFC2396, Section 4
  937. # a .. or . in an absolute path has no special meaning
  938. base_path.pop if base_path.size > 1
  939. else
  940. # if x == '..'
  941. # valid absolute (but abnormal) path "/../..."
  942. # else
  943. # valid absolute path
  944. # end
  945. base_path << x
  946. tmp.each {|t| base_path << t}
  947. add_trailer_slash = false
  948. break
  949. end
  950. end
  951. base_path.push('') if add_trailer_slash
  952. return base_path.join('/')
  953. end
  954. private :merge_path
  955. #
  956. # == Args
  957. #
  958. # +oth+::
  959. # URI or String
  960. #
  961. # == Description
  962. #
  963. # Destructive form of #merge.
  964. #
  965. # == Usage
  966. #
  967. # require 'uri'
  968. #
  969. # uri = URI.parse("http://my.example.com")
  970. # uri.merge!("/main.rbx?page=1")
  971. # uri.to_s # => "http://my.example.com/main.rbx?page=1"
  972. #
  973. def merge!(oth)
  974. t = merge(oth)
  975. if self == t
  976. nil
  977. else
  978. replace!(t)
  979. self
  980. end
  981. end
  982. #
  983. # == Args
  984. #
  985. # +oth+::
  986. # URI or String
  987. #
  988. # == Description
  989. #
  990. # Merges two URIs.
  991. #
  992. # == Usage
  993. #
  994. # require 'uri'
  995. #
  996. # uri = URI.parse("http://my.example.com")
  997. # uri.merge("/main.rbx?page=1")
  998. # # => "http://my.example.com/main.rbx?page=1"
  999. #
  1000. def merge(oth)
  1001. rel = parser.send(:convert_to_uri, oth)
  1002. if rel.absolute?
  1003. #raise BadURIError, "both URI are absolute" if absolute?
  1004. # hmm... should return oth for usability?
  1005. return rel
  1006. end
  1007. unless self.absolute?
  1008. raise BadURIError, "both URI are relative"
  1009. end
  1010. base = self.dup
  1011. authority = rel.userinfo || rel.host || rel.port
  1012. # RFC2396, Section 5.2, 2)
  1013. if (rel.path.nil? || rel.path.empty?) && !authority && !rel.query
  1014. base.fragment=(rel.fragment) if rel.fragment
  1015. return base
  1016. end
  1017. base.query = nil
  1018. base.fragment=(nil)
  1019. # RFC2396, Section 5.2, 4)
  1020. if !authority
  1021. base.set_path(merge_path(base.path, rel.path)) if base.path && rel.path
  1022. else
  1023. # RFC2396, Section 5.2, 4)
  1024. base.set_path(rel.path) if rel.path
  1025. end
  1026. # RFC2396, Section 5.2, 7)
  1027. base.set_userinfo(rel.userinfo) if rel.userinfo
  1028. base.set_host(rel.host) if rel.host
  1029. base.set_port(rel.port) if rel.port
  1030. base.query = rel.query if rel.query
  1031. base.fragment=(rel.fragment) if rel.fragment
  1032. return base
  1033. end # merge
  1034. alias + merge
  1035. # :stopdoc:
  1036. def route_from_path(src, dst)
  1037. case dst
  1038. when src
  1039. # RFC2396, Section 4.2
  1040. return ''
  1041. when %r{(?:\A|/)\.\.?(?:/|\z)}
  1042. # dst has abnormal absolute path,
  1043. # like "/./", "/../", "/x/../", ...
  1044. return dst.dup
  1045. end
  1046. src_path = src.scan(%r{[^/]*/})
  1047. dst_path = dst.scan(%r{[^/]*/?})
  1048. # discard same parts
  1049. while !dst_path.empty? && dst_path.first == src_path.first
  1050. src_path.shift
  1051. dst_path.shift
  1052. end
  1053. tmp = dst_path.join
  1054. # calculate
  1055. if src_path.empty?
  1056. if tmp.empty?
  1057. return './'
  1058. elsif dst_path.first.include?(':') # (see RFC2396 Section 5)
  1059. return './' + tmp
  1060. else
  1061. return tmp
  1062. end
  1063. end
  1064. return '../' * src_path.size + tmp
  1065. end
  1066. private :route_from_path
  1067. # :startdoc:
  1068. # :stopdoc:
  1069. def route_from0(oth)
  1070. oth = parser.send(:convert_to_uri, oth)
  1071. if self.relative?
  1072. raise BadURIError,
  1073. "relative URI: #{self}"
  1074. end
  1075. if oth.relative?
  1076. raise BadURIError,
  1077. "relative URI: #{oth}"
  1078. end
  1079. if self.scheme != oth.scheme
  1080. return self, self.dup
  1081. end
  1082. rel = URI::Generic.new(nil, # it is relative URI
  1083. self.userinfo, self.host, self.port,
  1084. nil, self.path, self.opaque,
  1085. self.query, self.fragment, parser)
  1086. if rel.userinfo != oth.userinfo ||
  1087. rel.host.to_s.downcase != oth.host.to_s.downcase ||
  1088. rel.port != oth.port
  1089. if self.userinfo.nil? && self.host.nil?
  1090. return self, self.dup
  1091. end
  1092. rel.set_port(nil) if rel.port == oth.default_port
  1093. return rel, rel
  1094. end
  1095. rel.set_userinfo(nil)
  1096. rel.set_host(nil)
  1097. rel.set_port(nil)
  1098. if rel.path && rel.path == oth.path
  1099. rel.set_path('')
  1100. rel.query = nil if rel.query == oth.query
  1101. return rel, rel
  1102. elsif rel.opaque && rel.opaque == oth.opaque
  1103. rel.set_opaque('')
  1104. rel.query = nil if rel.query == oth.query
  1105. return rel, rel
  1106. end
  1107. # you can modify `rel', but can not `oth'.
  1108. return oth, rel
  1109. end
  1110. private :route_from0
  1111. # :startdoc:
  1112. #
  1113. # == Args
  1114. #
  1115. # +oth+::
  1116. # URI or String
  1117. #
  1118. # == Description
  1119. #
  1120. # Calculates relative path from oth to self.
  1121. #
  1122. # == Usage
  1123. #
  1124. # require 'uri'
  1125. #
  1126. # uri = URI.parse('http://my.example.com/main.rbx?page=1')
  1127. # uri.route_from('http://my.example.com')
  1128. # #=> #<URI::Generic /main.rbx?page=1>
  1129. #
  1130. def route_from(oth)
  1131. # you can modify `rel', but can not `oth'.
  1132. begin
  1133. oth, rel = route_from0(oth)
  1134. rescue
  1135. raise $!.class, $!.message
  1136. end
  1137. if oth == rel
  1138. return rel
  1139. end
  1140. rel.set_path(route_from_path(oth.path, self.path))
  1141. if rel.path == './' && self.query
  1142. # "./?foo" -> "?foo"
  1143. rel.set_path('')
  1144. end
  1145. return rel
  1146. end
  1147. alias - route_from
  1148. #
  1149. # == Args
  1150. #
  1151. # +oth+::
  1152. # URI or String
  1153. #
  1154. # == Description
  1155. #
  1156. # Calculates relative path to oth from self.
  1157. #
  1158. # == Usage
  1159. #
  1160. # require 'uri'
  1161. #
  1162. # uri = URI.parse('http://my.example.com')
  1163. # uri.route_to('http://my.example.com/main.rbx?page=1')
  1164. # #=> #<URI::Generic /main.rbx?page=1>
  1165. #
  1166. def route_to(oth)
  1167. parser.send(:convert_to_uri, oth).route_from(self)
  1168. end
  1169. #
  1170. # Returns normalized URI.
  1171. #
  1172. # require 'uri'
  1173. #
  1174. # URI("HTTP://my.EXAMPLE.com").normalize
  1175. # #=> #<URI::HTTP http://my.example.com/>
  1176. #
  1177. # Normalization here means:
  1178. #
  1179. # * scheme and host are converted to lowercase,
  1180. # * an empty path component is set to "/".
  1181. #
  1182. def normalize
  1183. uri = dup
  1184. uri.normalize!
  1185. uri
  1186. end
  1187. #
  1188. # Destructive version of #normalize.
  1189. #
  1190. def normalize!
  1191. if path&.empty?
  1192. set_path('/')
  1193. end
  1194. if scheme && scheme != scheme.downcase
  1195. set_scheme(self.scheme.downcase)
  1196. end
  1197. if host && host != host.downcase
  1198. set_host(self.host.downcase)
  1199. end
  1200. end
  1201. #
  1202. # Constructs String from URI.
  1203. #
  1204. def to_s
  1205. str = ''.dup
  1206. if @scheme
  1207. str << @scheme
  1208. str << ':'
  1209. end
  1210. if @opaque
  1211. str << @opaque
  1212. else
  1213. if @host || %w[file postgres].include?(@scheme)
  1214. str << '//'
  1215. end
  1216. if self.userinfo
  1217. str << self.userinfo
  1218. str << '@'
  1219. end
  1220. if @host
  1221. str << @host
  1222. end
  1223. if @port && @port != self.default_port
  1224. str << ':'
  1225. str << @port.to_s
  1226. end
  1227. str << @path
  1228. if @query
  1229. str << '?'
  1230. str << @query
  1231. end
  1232. end
  1233. if @fragment
  1234. str << '#'
  1235. str << @fragment
  1236. end
  1237. str
  1238. end
  1239. #
  1240. # Compares two URIs.
  1241. #
  1242. def ==(oth)
  1243. if self.class == oth.class
  1244. self.normalize.component_ary == oth.normalize.component_ary
  1245. else
  1246. false
  1247. end
  1248. end
  1249. def hash
  1250. self.component_ary.hash
  1251. end
  1252. def eql?(oth)
  1253. self.class == oth.class &&
  1254. parser == oth.parser &&
  1255. self.component_ary.eql?(oth.component_ary)
  1256. end
  1257. =begin
  1258. --- URI::Generic#===(oth)
  1259. =end
  1260. # def ===(oth)
  1261. # raise NotImplementedError
  1262. # end
  1263. =begin
  1264. =end
  1265. # Returns an Array of the components defined from the COMPONENT Array.
  1266. def component_ary
  1267. component.collect do |x|
  1268. self.send(x)
  1269. end
  1270. end
  1271. protected :component_ary
  1272. # == Args
  1273. #
  1274. # +components+::
  1275. # Multiple Symbol arguments defined in URI::HTTP.
  1276. #
  1277. # == Description
  1278. #
  1279. # Selects specified components from URI.
  1280. #
  1281. # == Usage
  1282. #
  1283. # require 'uri'
  1284. #
  1285. # uri = URI.parse('http://myuser:mypass@my.example.com/test.rbx')
  1286. # uri.select(:userinfo, :host, :path)
  1287. # # => ["myuser:mypass", "my.example.com", "/test.rbx"]
  1288. #
  1289. def select(*components)
  1290. components.collect do |c|
  1291. if component.include?(c)
  1292. self.send(c)
  1293. else
  1294. raise ArgumentError,
  1295. "expected of components of #{self.class} (#{self.class.component.join(', ')})"
  1296. end
  1297. end
  1298. end
  1299. def inspect
  1300. "#<#{self.class} #{self}>"
  1301. end
  1302. #
  1303. # == Args
  1304. #
  1305. # +v+::
  1306. # URI or String
  1307. #
  1308. # == Description
  1309. #
  1310. # Attempts to parse other URI +oth+,
  1311. # returns [parsed_oth, self].
  1312. #
  1313. # == Usage
  1314. #
  1315. # require 'uri'
  1316. #
  1317. # uri = URI.parse("http://my.example.com")
  1318. # uri.coerce("http://foo.com")
  1319. # #=> [#<URI::HTTP http://foo.com>, #<URI::HTTP http://my.example.com>]
  1320. #
  1321. def coerce(oth)
  1322. case oth
  1323. when String
  1324. oth = parser.parse(oth)
  1325. else
  1326. super
  1327. end
  1328. return oth, self
  1329. end
  1330. # Returns a proxy URI.
  1331. # The proxy URI is obtained from environment variables such as http_proxy,
  1332. # ftp_proxy, no_proxy, etc.
  1333. # If there is no proper proxy, nil is returned.
  1334. #
  1335. # If the optional parameter +env+ is specified, it is used instead of ENV.
  1336. #
  1337. # Note that capitalized variables (HTTP_PROXY, FTP_PROXY, NO_PROXY, etc.)
  1338. # are examined, too.
  1339. #
  1340. # But http_proxy and HTTP_PROXY is treated specially under CGI environment.
  1341. # It's because HTTP_PROXY may be set by Proxy: header.
  1342. # So HTTP_PROXY is not used.
  1343. # http_proxy is not used too if the variable is case insensitive.
  1344. # CGI_HTTP_PROXY can be used instead.
  1345. def find_proxy(env=ENV)
  1346. raise BadURIError, "relative URI: #{self}" if self.relative?
  1347. name = self.scheme.downcase + '_proxy'
  1348. proxy_uri = nil
  1349. if name == 'http_proxy' && env.include?('REQUEST_METHOD') # CGI?
  1350. # HTTP_PROXY conflicts with *_proxy for proxy settings and
  1351. # HTTP_* for header information in CGI.
  1352. # So it should be careful to use it.
  1353. pairs = env.reject {|k, v| /\Ahttp_proxy\z/i !~ k }
  1354. case pairs.length
  1355. when 0 # no proxy setting anyway.
  1356. proxy_uri = nil
  1357. when 1
  1358. k, _ = pairs.shift
  1359. if k == 'http_proxy' && env[k.upcase] == nil
  1360. # http_proxy is safe to use because ENV is case sensitive.
  1361. proxy_uri = env[name]
  1362. else
  1363. proxy_uri = nil
  1364. end
  1365. else # http_proxy is safe to use because ENV is case sensitive.
  1366. proxy_uri = env.to_hash[name]
  1367. end
  1368. if !proxy_uri
  1369. # Use CGI_HTTP_PROXY. cf. libwww-perl.
  1370. proxy_uri = env["CGI_#{name.upcase}"]
  1371. end
  1372. elsif name == 'http_proxy'
  1373. unless proxy_uri = env[name]
  1374. if proxy_uri = env[name.upcase]
  1375. warn 'The environment variable HTTP_PROXY is discouraged. Use http_proxy.', uplevel: 1
  1376. end
  1377. end
  1378. else
  1379. proxy_uri = env[name] || env[name.upcase]
  1380. end
  1381. if proxy_uri.nil? || proxy_uri.empty?
  1382. return nil
  1383. end
  1384. if self.hostname
  1385. begin
  1386. addr = IPSocket.getaddress(self.hostname)
  1387. return nil if /\A127\.|\A::1\z/ =~ addr
  1388. rescue SocketError
  1389. end
  1390. end
  1391. name = 'no_proxy'
  1392. if no_proxy = env[name] || env[name.upcase]
  1393. return nil unless URI::Generic.use_proxy?(self.hostname, addr, self.port, no_proxy)
  1394. end
  1395. URI.parse(proxy_uri)
  1396. end
  1397. def self.use_proxy?(hostname, addr, port, no_proxy) # :nodoc:
  1398. hostname = hostname.downcase
  1399. dothostname = ".#{hostname}"
  1400. no_proxy.scan(/([^:,\s]+)(?::(\d+))?/) {|p_host, p_port|
  1401. if !p_port || port == p_port.to_i
  1402. if p_host.start_with?('.')
  1403. return false if hostname.end_with?(p_host.downcase)
  1404. else
  1405. return false if dothostname.end_with?(".#{p_host.downcase}")
  1406. end
  1407. if addr
  1408. begin
  1409. return false if IPAddr.new(p_host).include?(addr)
  1410. rescue IPAddr::InvalidAddressError
  1411. next
  1412. end
  1413. end
  1414. end
  1415. }
  1416. true
  1417. end
  1418. end
  1419. end