PageRenderTime 63ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/ruby/lib/uri/common.rb

http://android-ruby.googlecode.com/
Ruby | 727 lines | 400 code | 43 blank | 284 comment | 2 complexity | 911e8345b7075186f13fb6defc39544d MD5 | raw file
Possible License(s): LGPL-2.1, AGPL-3.0, 0BSD, Unlicense, GPL-2.0, BSD-3-Clause
  1. # = uri/common.rb
  2. #
  3. # Author:: Akira Yamada <akira@ruby-lang.org>
  4. # Revision:: $Id: common.rb 22760 2009-03-04 09:21:12Z yugui $
  5. # License::
  6. # You can redistribute it and/or modify it under the same term as Ruby.
  7. #
  8. module URI
  9. module REGEXP
  10. #
  11. # Patterns used to parse URI's
  12. #
  13. module PATTERN
  14. # :stopdoc:
  15. # RFC 2396 (URI Generic Syntax)
  16. # RFC 2732 (IPv6 Literal Addresses in URL's)
  17. # RFC 2373 (IPv6 Addressing Architecture)
  18. # alpha = lowalpha | upalpha
  19. ALPHA = "a-zA-Z"
  20. # alphanum = alpha | digit
  21. ALNUM = "#{ALPHA}\\d"
  22. # hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
  23. # "a" | "b" | "c" | "d" | "e" | "f"
  24. HEX = "a-fA-F\\d"
  25. # escaped = "%" hex hex
  26. ESCAPED = "%[#{HEX}]{2}"
  27. # mark = "-" | "_" | "." | "!" | "~" | "*" | "'" |
  28. # "(" | ")"
  29. # unreserved = alphanum | mark
  30. UNRESERVED = "-_.!~*'()#{ALNUM}"
  31. # reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
  32. # "$" | ","
  33. # reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
  34. # "$" | "," | "[" | "]" (RFC 2732)
  35. RESERVED = ";/?:@&=+$,\\[\\]"
  36. # domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
  37. DOMLABEL = "(?:[#{ALNUM}](?:[-#{ALNUM}]*[#{ALNUM}])?)"
  38. # toplabel = alpha | alpha *( alphanum | "-" ) alphanum
  39. TOPLABEL = "(?:[#{ALPHA}](?:[-#{ALNUM}]*[#{ALNUM}])?)"
  40. # hostname = *( domainlabel "." ) toplabel [ "." ]
  41. HOSTNAME = "(?:#{DOMLABEL}\\.)*#{TOPLABEL}\\.?"
  42. # :startdoc:
  43. end # PATTERN
  44. # :startdoc:
  45. end # REGEXP
  46. class Parser
  47. include REGEXP
  48. #
  49. # == Synopsis
  50. #
  51. # URI::Parser.new([opts])
  52. #
  53. # == Args
  54. #
  55. # The constructor accepts a hash as options for parser.
  56. # Keys of options are pattern names of URI components
  57. # and values of options are pattern strings.
  58. # The constructor generetes set of regexps for parsing URIs.
  59. #
  60. # You can use the following keys:
  61. #
  62. # * <tt>:ESCAPED</tt> (URI::PATTERN::ESCAPED in default)
  63. # * <tt>:UNRESERVED</tt> (URI::PATTERN::UNRESERVED in default)
  64. # * <tt>:DOMLABEL</tt> (URI::PATTERN::DOMLABEL in default)
  65. # * <tt>:TOPLABEL</tt> (URI::PATTERN::TOPLABEL in default)
  66. # * <tt>:HOSTNAME</tt> (URI::PATTERN::HOSTNAME in default)
  67. #
  68. # == Examples
  69. #
  70. # p = URI::Parser.new(:ESCPAED => "(?:%[a-fA-F0-9]{2}|%u[a-fA-F0-9]{4})"
  71. # u = p.parse("http://example.jp/%uABCD") #=> #<URI::HTTP:0xb78cf4f8 URL:http://example.jp/%uABCD>
  72. # URI.parse(u.to_s) #=> raises URI::InvalidURIError
  73. #
  74. # s = "http://examle.com/ABCD"
  75. # u1 = p.parse(s) #=> #<URI::HTTP:0xb78c3220 URL:http://example.com/ABCD>
  76. # u2 = URI.parse(s) #=> #<URI::HTTP:0xb78b6d54 URL:http://example.com/ABCD>
  77. # u1 == u2 #=> true
  78. # u1.eql?(u2) #=> false
  79. #
  80. def initialize(opts = {})
  81. @pattern = initialize_pattern(opts)
  82. @pattern.each_value {|v| v.freeze}
  83. @pattern.freeze
  84. @regexp = initialize_regexp(@pattern)
  85. @regexp.each_value {|v| v.freeze}
  86. @regexp.freeze
  87. end
  88. attr_reader :pattern, :regexp
  89. def split(uri)
  90. case uri
  91. when ''
  92. # null uri
  93. when @regexp[:ABS_URI]
  94. scheme, opaque, userinfo, host, port,
  95. registry, path, query, fragment = $~[1..-1]
  96. # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
  97. # absoluteURI = scheme ":" ( hier_part | opaque_part )
  98. # hier_part = ( net_path | abs_path ) [ "?" query ]
  99. # opaque_part = uric_no_slash *uric
  100. # abs_path = "/" path_segments
  101. # net_path = "//" authority [ abs_path ]
  102. # authority = server | reg_name
  103. # server = [ [ userinfo "@" ] hostport ]
  104. if !scheme
  105. raise InvalidURIError,
  106. "bad URI(absolute but no scheme): #{uri}"
  107. end
  108. if !opaque && (!path && (!host && !registry))
  109. raise InvalidURIError,
  110. "bad URI(absolute but no path): #{uri}"
  111. end
  112. when @regexp[:REL_URI]
  113. scheme = nil
  114. opaque = nil
  115. userinfo, host, port, registry,
  116. rel_segment, abs_path, query, fragment = $~[1..-1]
  117. if rel_segment && abs_path
  118. path = rel_segment + abs_path
  119. elsif rel_segment
  120. path = rel_segment
  121. elsif abs_path
  122. path = abs_path
  123. end
  124. # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
  125. # relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
  126. # net_path = "//" authority [ abs_path ]
  127. # abs_path = "/" path_segments
  128. # rel_path = rel_segment [ abs_path ]
  129. # authority = server | reg_name
  130. # server = [ [ userinfo "@" ] hostport ]
  131. else
  132. raise InvalidURIError, "bad URI(is not URI?): #{uri}"
  133. end
  134. path = '' if !path && !opaque # (see RFC2396 Section 5.2)
  135. ret = [
  136. scheme,
  137. userinfo, host, port, # X
  138. registry, # X
  139. path, # Y
  140. opaque, # Y
  141. query,
  142. fragment
  143. ]
  144. return ret
  145. end
  146. def parse(uri)
  147. scheme, userinfo, host, port,
  148. registry, path, opaque, query, fragment = self.split(uri)
  149. if scheme && URI.scheme_list.include?(scheme.upcase)
  150. URI.scheme_list[scheme.upcase].new(scheme, userinfo, host, port,
  151. registry, path, opaque, query,
  152. fragment, self)
  153. else
  154. Generic.new(scheme, userinfo, host, port,
  155. registry, path, opaque, query,
  156. fragment, self)
  157. end
  158. end
  159. def join(*str)
  160. u = self.parse(str[0])
  161. str[1 .. -1].each do |x|
  162. u = u.merge(x)
  163. end
  164. u
  165. end
  166. def extract(str, schemes = nil, &block)
  167. if block_given?
  168. str.scan(make_regexp(schemes)) { yield $& }
  169. nil
  170. else
  171. result = []
  172. str.scan(make_regexp(schemes)) { result.push $& }
  173. result
  174. end
  175. end
  176. def make_regexp(schemes = nil)
  177. unless schemes
  178. @regexp[:ABS_URI_REF]
  179. else
  180. /(?=#{Regexp.union(*schemes)}:)#{@pattern[:X_ABS_URI]}/x
  181. end
  182. end
  183. def escape(str, unsafe = @regexp[:UNSAFE])
  184. unless unsafe.kind_of?(Regexp)
  185. # perhaps unsafe is String object
  186. unsafe = Regexp.new("[#{Regexp.quote(unsafe)}]", false)
  187. end
  188. str.gsub(unsafe) do
  189. us = $&
  190. tmp = ''
  191. us.each_byte do |uc|
  192. tmp << sprintf('%%%02X', uc)
  193. end
  194. tmp
  195. end.force_encoding(Encoding::US_ASCII)
  196. end
  197. def unescape(str, escaped = @regexp[:ESCAPED])
  198. str.gsub(escaped) { [$&[1, 2].hex].pack('C') }.force_encoding(str.encoding)
  199. end
  200. @@to_s = Kernel.instance_method(:to_s)
  201. def inspect
  202. @@to_s.bind(self).call
  203. end
  204. private
  205. def initialize_pattern(opts = {})
  206. ret = {}
  207. ret[:ESCAPED] = escaped = (opts.delete(:ESCAPED) || PATTERN::ESCAPED)
  208. ret[:UNRESERVED] = unreserved = opts.delete(:UNRESERVED) || PATTERN::UNRESERVED
  209. ret[:RESERVED] = reserved = opts.delete(:RESERVED) || PATTERN::RESERVED
  210. ret[:DOMLABEL] = domlabel = opts.delete(:DOMLABEL) || PATTERN::DOMLABEL
  211. ret[:TOPLABEL] = toplabel = opts.delete(:TOPLABEL) || PATTERN::TOPLABEL
  212. ret[:HOSTNAME] = hostname = opts.delete(:HOSTNAME)
  213. # RFC 2396 (URI Generic Syntax)
  214. # RFC 2732 (IPv6 Literal Addresses in URL's)
  215. # RFC 2373 (IPv6 Addressing Architecture)
  216. # uric = reserved | unreserved | escaped
  217. ret[:URIC] = uric = "(?:[#{unreserved}#{reserved}]|#{escaped})"
  218. # uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
  219. # "&" | "=" | "+" | "$" | ","
  220. ret[:URIC_NO_SLASH] = uric_no_slash = "(?:[#{unreserved};?:@&=+$,]|#{escaped})"
  221. # query = *uric
  222. ret[:QUERY] = query = "#{uric}*"
  223. # fragment = *uric
  224. ret[:FRAGMENT] = fragment = "#{uric}*"
  225. # hostname = *( domainlabel "." ) toplabel [ "." ]
  226. unless hostname
  227. ret[:HOSTNAME] = hostname = "(?:#{domlabel}\\.)*#{toplabel}\\.?"
  228. end
  229. # RFC 2373, APPENDIX B:
  230. # IPv6address = hexpart [ ":" IPv4address ]
  231. # IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT
  232. # hexpart = hexseq | hexseq "::" [ hexseq ] | "::" [ hexseq ]
  233. # hexseq = hex4 *( ":" hex4)
  234. # hex4 = 1*4HEXDIG
  235. #
  236. # XXX: This definition has a flaw. "::" + IPv4address must be
  237. # allowed too. Here is a replacement.
  238. #
  239. # IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT
  240. ret[:IPV4ADDR] = ipv4addr = "\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}"
  241. # hex4 = 1*4HEXDIG
  242. hex4 = "[#{PATTERN::HEX}]{1,4}"
  243. # lastpart = hex4 | IPv4address
  244. lastpart = "(?:#{hex4}|#{ipv4addr})"
  245. # hexseq1 = *( hex4 ":" ) hex4
  246. hexseq1 = "(?:#{hex4}:)*#{hex4}"
  247. # hexseq2 = *( hex4 ":" ) lastpart
  248. hexseq2 = "(?:#{hex4}:)*#{lastpart}"
  249. # IPv6address = hexseq2 | [ hexseq1 ] "::" [ hexseq2 ]
  250. ret[:IPV6ADDR] = ipv6addr = "(?:#{hexseq2}|(?:#{hexseq1})?::(?:#{hexseq2})?)"
  251. # IPv6prefix = ( hexseq1 | [ hexseq1 ] "::" [ hexseq1 ] ) "/" 1*2DIGIT
  252. # unused
  253. # ipv6reference = "[" IPv6address "]" (RFC 2732)
  254. ret[:IPV6REF] = ipv6ref = "\\[#{ipv6addr}\\]"
  255. # host = hostname | IPv4address
  256. # host = hostname | IPv4address | IPv6reference (RFC 2732)
  257. ret[:HOST] = host = "(?:#{hostname}|#{ipv4addr}|#{ipv6ref})"
  258. # port = *digit
  259. port = '\d*'
  260. # hostport = host [ ":" port ]
  261. ret[:HOSTPORT] = hostport = "#{host}(?::#{port})?"
  262. # userinfo = *( unreserved | escaped |
  263. # ";" | ":" | "&" | "=" | "+" | "$" | "," )
  264. ret[:USERINFO] = userinfo = "(?:[#{unreserved};:&=+$,]|#{escaped})*"
  265. # pchar = unreserved | escaped |
  266. # ":" | "@" | "&" | "=" | "+" | "$" | ","
  267. pchar = "(?:[#{unreserved}:@&=+$,]|#{escaped})"
  268. # param = *pchar
  269. param = "#{pchar}*"
  270. # segment = *pchar *( ";" param )
  271. segment = "#{pchar}*(?:;#{param})*"
  272. # path_segments = segment *( "/" segment )
  273. ret[:PATH_SEGMENTS] = path_segments = "#{segment}(?:/#{segment})*"
  274. # server = [ [ userinfo "@" ] hostport ]
  275. server = "(?:#{userinfo}@)?#{hostport}"
  276. # reg_name = 1*( unreserved | escaped | "$" | "," |
  277. # ";" | ":" | "@" | "&" | "=" | "+" )
  278. ret[:REG_NAME] = reg_name = "(?:[#{unreserved}$,;:@&=+]|#{escaped})+"
  279. # authority = server | reg_name
  280. authority = "(?:#{server}|#{reg_name})"
  281. # rel_segment = 1*( unreserved | escaped |
  282. # ";" | "@" | "&" | "=" | "+" | "$" | "," )
  283. ret[:REL_SEGMENT] = rel_segment = "(?:[#{unreserved};@&=+$,]|#{escaped})+"
  284. # scheme = alpha *( alpha | digit | "+" | "-" | "." )
  285. ret[:SCHEME] = scheme = "[#{PATTERN::ALPHA}][-+.#{PATTERN::ALPHA}\\d]*"
  286. # abs_path = "/" path_segments
  287. ret[:ABS_PATH] = abs_path = "/#{path_segments}"
  288. # rel_path = rel_segment [ abs_path ]
  289. ret[:REL_PATH] = rel_path = "#{rel_segment}(?:#{abs_path})?"
  290. # net_path = "//" authority [ abs_path ]
  291. ret[:NET_PATH] = net_path = "//#{authority}(?:#{abs_path})?"
  292. # hier_part = ( net_path | abs_path ) [ "?" query ]
  293. ret[:HIER_PART] = hier_part = "(?:#{net_path}|#{abs_path})(?:\\?(?:#{query}))?"
  294. # opaque_part = uric_no_slash *uric
  295. ret[:OPAQUE_PART] = opaque_part = "#{uric_no_slash}#{uric}*"
  296. # absoluteURI = scheme ":" ( hier_part | opaque_part )
  297. ret[:ABS_URI] = abs_uri = "#{scheme}:(?:#{hier_part}|#{opaque_part})"
  298. # relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
  299. ret[:REL_URI] = rel_uri = "(?:#{net_path}|#{abs_path}|#{rel_path})(?:\\?#{query})?"
  300. # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
  301. ret[:URI_REF] = uri_ref = "(?:#{abs_uri}|#{rel_uri})?(?:##{fragment})?"
  302. ret[:X_ABS_URI] = "
  303. (#{scheme}): (?# 1: scheme)
  304. (?:
  305. (#{opaque_part}) (?# 2: opaque)
  306. |
  307. (?:(?:
  308. //(?:
  309. (?:(?:(#{userinfo})@)? (?# 3: userinfo)
  310. (?:(#{host})(?::(\\d*))?))? (?# 4: host, 5: port)
  311. |
  312. (#{reg_name}) (?# 6: registry)
  313. )
  314. |
  315. (?!//)) (?# XXX: '//' is the mark for hostport)
  316. (#{abs_path})? (?# 7: path)
  317. )(?:\\?(#{query}))? (?# 8: query)
  318. )
  319. (?:\\#(#{fragment}))? (?# 9: fragment)
  320. "
  321. ret[:X_REL_URI] = "
  322. (?:
  323. (?:
  324. //
  325. (?:
  326. (?:(#{userinfo})@)? (?# 1: userinfo)
  327. (#{host})?(?::(\\d*))? (?# 2: host, 3: port)
  328. |
  329. (#{reg_name}) (?# 4: registry)
  330. )
  331. )
  332. |
  333. (#{rel_segment}) (?# 5: rel_segment)
  334. )?
  335. (#{abs_path})? (?# 6: abs_path)
  336. (?:\\?(#{query}))? (?# 7: query)
  337. (?:\\#(#{fragment}))? (?# 8: fragment)
  338. "
  339. ret
  340. end
  341. def initialize_regexp(pattern)
  342. ret = {}
  343. # for URI::split
  344. ret[:ABS_URI] = Regexp.new('^' + pattern[:X_ABS_URI] + '$', Regexp::EXTENDED)
  345. ret[:REL_URI] = Regexp.new('^' + pattern[:X_REL_URI] + '$', Regexp::EXTENDED)
  346. # for URI::extract
  347. ret[:URI_REF] = Regexp.new(pattern[:URI_REF])
  348. ret[:ABS_URI_REF] = Regexp.new(pattern[:X_ABS_URI], Regexp::EXTENDED)
  349. ret[:REL_URI_REF] = Regexp.new(pattern[:X_REL_URI], Regexp::EXTENDED)
  350. # for URI::escape/unescape
  351. ret[:ESCAPED] = Regexp.new(pattern[:ESCAPED])
  352. ret[:UNSAFE] = Regexp.new("[^#{pattern[:UNRESERVED]}#{pattern[:RESERVED]}]")
  353. # for Generic#initialize
  354. ret[:SCHEME] = Regexp.new("^#{pattern[:SCHEME]}$")
  355. ret[:USERINFO] = Regexp.new("^#{pattern[:USERINFO]}$")
  356. ret[:HOST] = Regexp.new("^#{pattern[:HOST]}$")
  357. ret[:PORT] = Regexp.new("^#{pattern[:PORT]}$")
  358. ret[:OPAQUE] = Regexp.new("^#{pattern[:OPAQUE_PART]}$")
  359. ret[:REGISTRY] = Regexp.new("^#{pattern[:REG_NAME]}$")
  360. ret[:ABS_PATH] = Regexp.new("^#{pattern[:ABS_PATH]}$")
  361. ret[:REL_PATH] = Regexp.new("^#{pattern[:REL_PATH]}$")
  362. ret[:QUERY] = Regexp.new("^#{pattern[:QUERY]}$")
  363. ret[:FRAGMENT] = Regexp.new("^#{pattern[:FRAGMENT]}$")
  364. ret
  365. end
  366. end # class Parser
  367. DEFAULT_PARSER = Parser.new
  368. DEFAULT_PARSER.pattern.each_pair do |sym, str|
  369. unless REGEXP::PATTERN.const_defined?(sym)
  370. REGEXP::PATTERN.const_set(sym, str)
  371. end
  372. end
  373. DEFAULT_PARSER.regexp.each_pair do |sym, str|
  374. const_set(sym, str)
  375. end
  376. module Util # :nodoc:
  377. def make_components_hash(klass, array_hash)
  378. tmp = {}
  379. if array_hash.kind_of?(Array) &&
  380. array_hash.size == klass.component.size - 1
  381. klass.component[1..-1].each_index do |i|
  382. begin
  383. tmp[klass.component[i + 1]] = array_hash[i].clone
  384. rescue TypeError
  385. tmp[klass.component[i + 1]] = array_hash[i]
  386. end
  387. end
  388. elsif array_hash.kind_of?(Hash)
  389. array_hash.each do |key, value|
  390. begin
  391. tmp[key] = value.clone
  392. rescue TypeError
  393. tmp[key] = value
  394. end
  395. end
  396. else
  397. raise ArgumentError,
  398. "expected Array of or Hash of components of #{klass.to_s} (#{klass.component[1..-1].join(', ')})"
  399. end
  400. tmp[:scheme] = klass.to_s.sub(/\A.*::/, '').downcase
  401. return tmp
  402. end
  403. module_function :make_components_hash
  404. end
  405. module Escape
  406. #
  407. # == Synopsis
  408. #
  409. # URI.escape(str [, unsafe])
  410. #
  411. # == Args
  412. #
  413. # +str+::
  414. # String to replaces in.
  415. # +unsafe+::
  416. # Regexp that matches all symbols that must be replaced with codes.
  417. # By default uses <tt>REGEXP::UNSAFE</tt>.
  418. # When this argument is a String, it represents a character set.
  419. #
  420. # == Description
  421. #
  422. # Escapes the string, replacing all unsafe characters with codes.
  423. #
  424. # == Usage
  425. #
  426. # require 'uri'
  427. #
  428. # enc_uri = URI.escape("http://example.com/?a=\11\15")
  429. # p enc_uri
  430. # # => "http://example.com/?a=%09%0D"
  431. #
  432. # p URI.unescape(enc_uri)
  433. # # => "http://example.com/?a=\t\r"
  434. #
  435. # p URI.escape("@?@!", "!?")
  436. # # => "@%3F@%21"
  437. #
  438. def escape(*arg)
  439. DEFAULT_PARSER.escape(*arg)
  440. end
  441. alias encode escape
  442. #
  443. # == Synopsis
  444. #
  445. # URI.unescape(str)
  446. #
  447. # == Args
  448. #
  449. # +str+::
  450. # Unescapes the string.
  451. #
  452. # == Usage
  453. #
  454. # require 'uri'
  455. #
  456. # enc_uri = URI.escape("http://example.com/?a=\11\15")
  457. # p enc_uri
  458. # # => "http://example.com/?a=%09%0D"
  459. #
  460. # p URI.unescape(enc_uri)
  461. # # => "http://example.com/?a=\t\r"
  462. #
  463. def unescape(*arg)
  464. DEFAULT_PARSER.unescape(*arg)
  465. end
  466. alias decode unescape
  467. end
  468. extend Escape
  469. include REGEXP
  470. @@schemes = {}
  471. def self.scheme_list
  472. @@schemes
  473. end
  474. #
  475. # Base class for all URI exceptions.
  476. #
  477. class Error < StandardError; end
  478. #
  479. # Not a URI.
  480. #
  481. class InvalidURIError < Error; end
  482. #
  483. # Not a URI component.
  484. #
  485. class InvalidComponentError < Error; end
  486. #
  487. # URI is valid, bad usage is not.
  488. #
  489. class BadURIError < Error; end
  490. #
  491. # == Synopsis
  492. #
  493. # URI::split(uri)
  494. #
  495. # == Args
  496. #
  497. # +uri+::
  498. # String with URI.
  499. #
  500. # == Description
  501. #
  502. # Splits the string on following parts and returns array with result:
  503. #
  504. # * Scheme
  505. # * Userinfo
  506. # * Host
  507. # * Port
  508. # * Registry
  509. # * Path
  510. # * Opaque
  511. # * Query
  512. # * Fragment
  513. #
  514. # == Usage
  515. #
  516. # require 'uri'
  517. #
  518. # p URI.split("http://www.ruby-lang.org/")
  519. # # => ["http", nil, "www.ruby-lang.org", nil, nil, "/", nil, nil, nil]
  520. #
  521. def self.split(uri)
  522. DEFAULT_PARSER.split(uri)
  523. end
  524. #
  525. # == Synopsis
  526. #
  527. # URI::parse(uri_str)
  528. #
  529. # == Args
  530. #
  531. # +uri_str+::
  532. # String with URI.
  533. #
  534. # == Description
  535. #
  536. # Creates one of the URI's subclasses instance from the string.
  537. #
  538. # == Raises
  539. #
  540. # URI::InvalidURIError
  541. # Raised if URI given is not a correct one.
  542. #
  543. # == Usage
  544. #
  545. # require 'uri'
  546. #
  547. # uri = URI.parse("http://www.ruby-lang.org/")
  548. # p uri
  549. # # => #<URI::HTTP:0x202281be URL:http://www.ruby-lang.org/>
  550. # p uri.scheme
  551. # # => "http"
  552. # p uri.host
  553. # # => "www.ruby-lang.org"
  554. #
  555. def self.parse(uri)
  556. DEFAULT_PARSER.parse(uri)
  557. end
  558. #
  559. # == Synopsis
  560. #
  561. # URI::join(str[, str, ...])
  562. #
  563. # == Args
  564. #
  565. # +str+::
  566. # String(s) to work with
  567. #
  568. # == Description
  569. #
  570. # Joins URIs.
  571. #
  572. # == Usage
  573. #
  574. # require 'uri'
  575. #
  576. # p URI.join("http://localhost/","main.rbx")
  577. # # => #<URI::HTTP:0x2022ac02 URL:http://localhost/main.rbx>
  578. #
  579. def self.join(*str)
  580. DEFAULT_PARSER.join(*str)
  581. end
  582. #
  583. # == Synopsis
  584. #
  585. # URI::extract(str[, schemes][,&blk])
  586. #
  587. # == Args
  588. #
  589. # +str+::
  590. # String to extract URIs from.
  591. # +schemes+::
  592. # Limit URI matching to a specific schemes.
  593. #
  594. # == Description
  595. #
  596. # Extracts URIs from a string. If block given, iterates through all matched URIs.
  597. # Returns nil if block given or array with matches.
  598. #
  599. # == Usage
  600. #
  601. # require "uri"
  602. #
  603. # URI.extract("text here http://foo.example.org/bla and here mailto:test@example.com and here also.")
  604. # # => ["http://foo.example.com/bla", "mailto:test@example.com"]
  605. #
  606. def self.extract(str, schemes = nil, &block)
  607. DEFAULT_PARSER.extract(str, schemes, &block)
  608. end
  609. #
  610. # == Synopsis
  611. #
  612. # URI::regexp([match_schemes])
  613. #
  614. # == Args
  615. #
  616. # +match_schemes+::
  617. # Array of schemes. If given, resulting regexp matches to URIs
  618. # whose scheme is one of the match_schemes.
  619. #
  620. # == Description
  621. # Returns a Regexp object which matches to URI-like strings.
  622. # The Regexp object returned by this method includes arbitrary
  623. # number of capture group (parentheses). Never rely on it's number.
  624. #
  625. # == Usage
  626. #
  627. # require 'uri'
  628. #
  629. # # extract first URI from html_string
  630. # html_string.slice(URI.regexp)
  631. #
  632. # # remove ftp URIs
  633. # html_string.sub(URI.regexp(['ftp'])
  634. #
  635. # # You should not rely on the number of parentheses
  636. # html_string.scan(URI.regexp) do |*matches|
  637. # p $&
  638. # end
  639. #
  640. def self.regexp(schemes = nil)
  641. DEFAULT_PARSER.make_regexp(schemes)
  642. end
  643. end
  644. module Kernel
  645. # alias for URI.parse.
  646. #
  647. # This method is introduced at 1.8.2.
  648. def URI(uri_str) # :doc:
  649. URI.parse(uri_str)
  650. end
  651. module_function :URI
  652. end