PageRenderTime 64ms CodeModel.GetById 25ms RepoModel.GetById 1ms app.codeStats 0ms

/actionpack/lib/action_controller/vendor/html-scanner/html/selector.rb

https://github.com/johnthethird/rails
Ruby | 830 lines | 426 code | 61 blank | 343 comment | 126 complexity | d19351f0977e0819dc7fd4694cfda0e4 MD5 | raw file
  1. #--
  2. # Copyright (c) 2006 Assaf Arkin (http://labnotes.org)
  3. # Under MIT and/or CC By license.
  4. #++
  5. module HTML
  6. # Selects HTML elements using CSS 2 selectors.
  7. #
  8. # The +Selector+ class uses CSS selector expressions to match and select
  9. # HTML elements.
  10. #
  11. # For example:
  12. # selector = HTML::Selector.new "form.login[action=/login]"
  13. # creates a new selector that matches any +form+ element with the class
  14. # +login+ and an attribute +action+ with the value <tt>/login</tt>.
  15. #
  16. # === Matching Elements
  17. #
  18. # Use the #match method to determine if an element matches the selector.
  19. #
  20. # For simple selectors, the method returns an array with that element,
  21. # or +nil+ if the element does not match. For complex selectors (see below)
  22. # the method returns an array with all matched elements, of +nil+ if no
  23. # match found.
  24. #
  25. # For example:
  26. # if selector.match(element)
  27. # puts "Element is a login form"
  28. # end
  29. #
  30. # === Selecting Elements
  31. #
  32. # Use the #select method to select all matching elements starting with
  33. # one element and going through all children in depth-first order.
  34. #
  35. # This method returns an array of all matching elements, an empty array
  36. # if no match is found
  37. #
  38. # For example:
  39. # selector = HTML::Selector.new "input[type=text]"
  40. # matches = selector.select(element)
  41. # matches.each do |match|
  42. # puts "Found text field with name #{match.attributes['name']}"
  43. # end
  44. #
  45. # === Expressions
  46. #
  47. # Selectors can match elements using any of the following criteria:
  48. # * <tt>name</tt> -- Match an element based on its name (tag name).
  49. # For example, <tt>p</tt> to match a paragraph. You can use <tt>*</tt>
  50. # to match any element.
  51. # * <tt>#</tt><tt>id</tt> -- Match an element based on its identifier (the
  52. # <tt>id</tt> attribute). For example, <tt>#</tt><tt>page</tt>.
  53. # * <tt>.class</tt> -- Match an element based on its class name, all
  54. # class names if more than one specified.
  55. # * <tt>[attr]</tt> -- Match an element that has the specified attribute.
  56. # * <tt>[attr=value]</tt> -- Match an element that has the specified
  57. # attribute and value. (More operators are supported see below)
  58. # * <tt>:pseudo-class</tt> -- Match an element based on a pseudo class,
  59. # such as <tt>:nth-child</tt> and <tt>:empty</tt>.
  60. # * <tt>:not(expr)</tt> -- Match an element that does not match the
  61. # negation expression.
  62. #
  63. # When using a combination of the above, the element name comes first
  64. # followed by identifier, class names, attributes, pseudo classes and
  65. # negation in any order. Do not separate these parts with spaces!
  66. # Space separation is used for descendant selectors.
  67. #
  68. # For example:
  69. # selector = HTML::Selector.new "form.login[action=/login]"
  70. # The matched element must be of type +form+ and have the class +login+.
  71. # It may have other classes, but the class +login+ is required to match.
  72. # It must also have an attribute called +action+ with the value
  73. # <tt>/login</tt>.
  74. #
  75. # This selector will match the following element:
  76. # <form class="login form" method="post" action="/login">
  77. # but will not match the element:
  78. # <form method="post" action="/logout">
  79. #
  80. # === Attribute Values
  81. #
  82. # Several operators are supported for matching attributes:
  83. # * <tt>name</tt> -- The element must have an attribute with that name.
  84. # * <tt>name=value</tt> -- The element must have an attribute with that
  85. # name and value.
  86. # * <tt>name^=value</tt> -- The attribute value must start with the
  87. # specified value.
  88. # * <tt>name$=value</tt> -- The attribute value must end with the
  89. # specified value.
  90. # * <tt>name*=value</tt> -- The attribute value must contain the
  91. # specified value.
  92. # * <tt>name~=word</tt> -- The attribute value must contain the specified
  93. # word (space separated).
  94. # * <tt>name|=word</tt> -- The attribute value must start with specified
  95. # word.
  96. #
  97. # For example, the following two selectors match the same element:
  98. # #my_id
  99. # [id=my_id]
  100. # and so do the following two selectors:
  101. # .my_class
  102. # [class~=my_class]
  103. #
  104. # === Alternatives, siblings, children
  105. #
  106. # Complex selectors use a combination of expressions to match elements:
  107. # * <tt>expr1 expr2</tt> -- Match any element against the second expression
  108. # if it has some parent element that matches the first expression.
  109. # * <tt>expr1 > expr2</tt> -- Match any element against the second expression
  110. # if it is the child of an element that matches the first expression.
  111. # * <tt>expr1 + expr2</tt> -- Match any element against the second expression
  112. # if it immediately follows an element that matches the first expression.
  113. # * <tt>expr1 ~ expr2</tt> -- Match any element against the second expression
  114. # that comes after an element that matches the first expression.
  115. # * <tt>expr1, expr2</tt> -- Match any element against the first expression,
  116. # or against the second expression.
  117. #
  118. # Since children and sibling selectors may match more than one element given
  119. # the first element, the #match method may return more than one match.
  120. #
  121. # === Pseudo classes
  122. #
  123. # Pseudo classes were introduced in CSS 3. They are most often used to select
  124. # elements in a given position:
  125. # * <tt>:root</tt> -- Match the element only if it is the root element
  126. # (no parent element).
  127. # * <tt>:empty</tt> -- Match the element only if it has no child elements,
  128. # and no text content.
  129. # * <tt>:content(string)</tt> -- Match the element only if it has <tt>string</tt>
  130. # as its text content (ignoring leading and trailing whitespace).
  131. # * <tt>:only-child</tt> -- Match the element if it is the only child (element)
  132. # of its parent element.
  133. # * <tt>:only-of-type</tt> -- Match the element if it is the only child (element)
  134. # of its parent element and its type.
  135. # * <tt>:first-child</tt> -- Match the element if it is the first child (element)
  136. # of its parent element.
  137. # * <tt>:first-of-type</tt> -- Match the element if it is the first child (element)
  138. # of its parent element of its type.
  139. # * <tt>:last-child</tt> -- Match the element if it is the last child (element)
  140. # of its parent element.
  141. # * <tt>:last-of-type</tt> -- Match the element if it is the last child (element)
  142. # of its parent element of its type.
  143. # * <tt>:nth-child(b)</tt> -- Match the element if it is the b-th child (element)
  144. # of its parent element. The value <tt>b</tt> specifies its index, starting with 1.
  145. # * <tt>:nth-child(an+b)</tt> -- Match the element if it is the b-th child (element)
  146. # in each group of <tt>a</tt> child elements of its parent element.
  147. # * <tt>:nth-child(-an+b)</tt> -- Match the element if it is the first child (element)
  148. # in each group of <tt>a</tt> child elements, up to the first <tt>b</tt> child
  149. # elements of its parent element.
  150. # * <tt>:nth-child(odd)</tt> -- Match element in the odd position (i.e. first, third).
  151. # Same as <tt>:nth-child(2n+1)</tt>.
  152. # * <tt>:nth-child(even)</tt> -- Match element in the even position (i.e. second,
  153. # fourth). Same as <tt>:nth-child(2n+2)</tt>.
  154. # * <tt>:nth-of-type(..)</tt> -- As above, but only counts elements of its type.
  155. # * <tt>:nth-last-child(..)</tt> -- As above, but counts from the last child.
  156. # * <tt>:nth-last-of-type(..)</tt> -- As above, but counts from the last child and
  157. # only elements of its type.
  158. # * <tt>:not(selector)</tt> -- Match the element only if the element does not
  159. # match the simple selector.
  160. #
  161. # As you can see, <tt>:nth-child<tt> pseudo class and its variant can get quite
  162. # tricky and the CSS specification doesn't do a much better job explaining it.
  163. # But after reading the examples and trying a few combinations, it's easy to
  164. # figure out.
  165. #
  166. # For example:
  167. # table tr:nth-child(odd)
  168. # Selects every second row in the table starting with the first one.
  169. #
  170. # div p:nth-child(4)
  171. # Selects the fourth paragraph in the +div+, but not if the +div+ contains
  172. # other elements, since those are also counted.
  173. #
  174. # div p:nth-of-type(4)
  175. # Selects the fourth paragraph in the +div+, counting only paragraphs, and
  176. # ignoring all other elements.
  177. #
  178. # div p:nth-of-type(-n+4)
  179. # Selects the first four paragraphs, ignoring all others.
  180. #
  181. # And you can always select an element that matches one set of rules but
  182. # not another using <tt>:not</tt>. For example:
  183. # p:not(.post)
  184. # Matches all paragraphs that do not have the class <tt>.post</tt>.
  185. #
  186. # === Substitution Values
  187. #
  188. # You can use substitution with identifiers, class names and element values.
  189. # A substitution takes the form of a question mark (<tt>?</tt>) and uses the
  190. # next value in the argument list following the CSS expression.
  191. #
  192. # The substitution value may be a string or a regular expression. All other
  193. # values are converted to strings.
  194. #
  195. # For example:
  196. # selector = HTML::Selector.new "#?", /^\d+$/
  197. # matches any element whose identifier consists of one or more digits.
  198. #
  199. # See http://www.w3.org/TR/css3-selectors/
  200. class Selector
  201. # An invalid selector.
  202. class InvalidSelectorError < StandardError #:nodoc:
  203. end
  204. class << self
  205. # :call-seq:
  206. # Selector.for_class(cls) => selector
  207. #
  208. # Creates a new selector for the given class name.
  209. def for_class(cls)
  210. self.new([".?", cls])
  211. end
  212. # :call-seq:
  213. # Selector.for_id(id) => selector
  214. #
  215. # Creates a new selector for the given id.
  216. def for_id(id)
  217. self.new(["#?", id])
  218. end
  219. end
  220. # :call-seq:
  221. # Selector.new(string, [values ...]) => selector
  222. #
  223. # Creates a new selector from a CSS 2 selector expression.
  224. #
  225. # The first argument is the selector expression. All other arguments
  226. # are used for value substitution.
  227. #
  228. # Throws InvalidSelectorError is the selector expression is invalid.
  229. def initialize(selector, *values)
  230. raise ArgumentError, "CSS expression cannot be empty" if selector.empty?
  231. @source = ""
  232. values = values[0] if values.size == 1 && values[0].is_a?(Array)
  233. # We need a copy to determine if we failed to parse, and also
  234. # preserve the original pass by-ref statement.
  235. statement = selector.strip.dup
  236. # Create a simple selector, along with negation.
  237. simple_selector(statement, values).each { |name, value| instance_variable_set("@#{name}", value) }
  238. @alternates = []
  239. @depends = nil
  240. # Alternative selector.
  241. if statement.sub!(/^\s*,\s*/, "")
  242. second = Selector.new(statement, values)
  243. @alternates << second
  244. # If there are alternate selectors, we group them in the top selector.
  245. if alternates = second.instance_variable_get(:@alternates)
  246. second.instance_variable_set(:@alternates, [])
  247. @alternates.concat alternates
  248. end
  249. @source << " , " << second.to_s
  250. # Sibling selector: create a dependency into second selector that will
  251. # match element immediately following this one.
  252. elsif statement.sub!(/^\s*\+\s*/, "")
  253. second = next_selector(statement, values)
  254. @depends = lambda do |element, first|
  255. if element = next_element(element)
  256. second.match(element, first)
  257. end
  258. end
  259. @source << " + " << second.to_s
  260. # Adjacent selector: create a dependency into second selector that will
  261. # match all elements following this one.
  262. elsif statement.sub!(/^\s*~\s*/, "")
  263. second = next_selector(statement, values)
  264. @depends = lambda do |element, first|
  265. matches = []
  266. while element = next_element(element)
  267. if subset = second.match(element, first)
  268. if first && !subset.empty?
  269. matches << subset.first
  270. break
  271. else
  272. matches.concat subset
  273. end
  274. end
  275. end
  276. matches.empty? ? nil : matches
  277. end
  278. @source << " ~ " << second.to_s
  279. # Child selector: create a dependency into second selector that will
  280. # match a child element of this one.
  281. elsif statement.sub!(/^\s*>\s*/, "")
  282. second = next_selector(statement, values)
  283. @depends = lambda do |element, first|
  284. matches = []
  285. element.children.each do |child|
  286. if child.tag? && subset = second.match(child, first)
  287. if first && !subset.empty?
  288. matches << subset.first
  289. break
  290. else
  291. matches.concat subset
  292. end
  293. end
  294. end
  295. matches.empty? ? nil : matches
  296. end
  297. @source << " > " << second.to_s
  298. # Descendant selector: create a dependency into second selector that
  299. # will match all descendant elements of this one. Note,
  300. elsif statement =~ /^\s+\S+/ && statement != selector
  301. second = next_selector(statement, values)
  302. @depends = lambda do |element, first|
  303. matches = []
  304. stack = element.children.reverse
  305. while node = stack.pop
  306. next unless node.tag?
  307. if subset = second.match(node, first)
  308. if first && !subset.empty?
  309. matches << subset.first
  310. break
  311. else
  312. matches.concat subset
  313. end
  314. elsif children = node.children
  315. stack.concat children.reverse
  316. end
  317. end
  318. matches.empty? ? nil : matches
  319. end
  320. @source << " " << second.to_s
  321. else
  322. # The last selector is where we check that we parsed
  323. # all the parts.
  324. unless statement.empty? || statement.strip.empty?
  325. raise ArgumentError, "Invalid selector: #{statement}"
  326. end
  327. end
  328. end
  329. # :call-seq:
  330. # match(element, first?) => array or nil
  331. #
  332. # Matches an element against the selector.
  333. #
  334. # For a simple selector this method returns an array with the
  335. # element if the element matches, nil otherwise.
  336. #
  337. # For a complex selector (sibling and descendant) this method
  338. # returns an array with all matching elements, nil if no match is
  339. # found.
  340. #
  341. # Use +first_only=true+ if you are only interested in the first element.
  342. #
  343. # For example:
  344. # if selector.match(element)
  345. # puts "Element is a login form"
  346. # end
  347. def match(element, first_only = false)
  348. # Match element if no element name or element name same as element name
  349. if matched = (!@tag_name || @tag_name == element.name)
  350. # No match if one of the attribute matches failed
  351. for attr in @attributes
  352. if element.attributes[attr[0]] !~ attr[1]
  353. matched = false
  354. break
  355. end
  356. end
  357. end
  358. # Pseudo class matches (nth-child, empty, etc).
  359. if matched
  360. for pseudo in @pseudo
  361. unless pseudo.call(element)
  362. matched = false
  363. break
  364. end
  365. end
  366. end
  367. # Negation. Same rules as above, but we fail if a match is made.
  368. if matched && @negation
  369. for negation in @negation
  370. if negation[:tag_name] == element.name
  371. matched = false
  372. else
  373. for attr in negation[:attributes]
  374. if element.attributes[attr[0]] =~ attr[1]
  375. matched = false
  376. break
  377. end
  378. end
  379. end
  380. if matched
  381. for pseudo in negation[:pseudo]
  382. if pseudo.call(element)
  383. matched = false
  384. break
  385. end
  386. end
  387. end
  388. break unless matched
  389. end
  390. end
  391. # If element matched but depends on another element (child,
  392. # sibling, etc), apply the dependent matches instead.
  393. if matched && @depends
  394. matches = @depends.call(element, first_only)
  395. else
  396. matches = matched ? [element] : nil
  397. end
  398. # If this selector is part of the group, try all the alternative
  399. # selectors (unless first_only).
  400. if !first_only || !matches
  401. @alternates.each do |alternate|
  402. break if matches && first_only
  403. if subset = alternate.match(element, first_only)
  404. if matches
  405. matches.concat subset
  406. else
  407. matches = subset
  408. end
  409. end
  410. end
  411. end
  412. matches
  413. end
  414. # :call-seq:
  415. # select(root) => array
  416. #
  417. # Selects and returns an array with all matching elements, beginning
  418. # with one node and traversing through all children depth-first.
  419. # Returns an empty array if no match is found.
  420. #
  421. # The root node may be any element in the document, or the document
  422. # itself.
  423. #
  424. # For example:
  425. # selector = HTML::Selector.new "input[type=text]"
  426. # matches = selector.select(element)
  427. # matches.each do |match|
  428. # puts "Found text field with name #{match.attributes['name']}"
  429. # end
  430. def select(root)
  431. matches = []
  432. stack = [root]
  433. while node = stack.pop
  434. if node.tag? && subset = match(node, false)
  435. subset.each do |match|
  436. matches << match unless matches.any? { |item| item.equal?(match) }
  437. end
  438. elsif children = node.children
  439. stack.concat children.reverse
  440. end
  441. end
  442. matches
  443. end
  444. # Similar to #select but returns the first matching element. Returns +nil+
  445. # if no element matches the selector.
  446. def select_first(root)
  447. stack = [root]
  448. while node = stack.pop
  449. if node.tag? && subset = match(node, true)
  450. return subset.first if !subset.empty?
  451. elsif children = node.children
  452. stack.concat children.reverse
  453. end
  454. end
  455. nil
  456. end
  457. def to_s #:nodoc:
  458. @source
  459. end
  460. # Return the next element after this one. Skips sibling text nodes.
  461. #
  462. # With the +name+ argument, returns the next element with that name,
  463. # skipping other sibling elements.
  464. def next_element(element, name = nil)
  465. if siblings = element.parent.children
  466. found = false
  467. siblings.each do |node|
  468. if node.equal?(element)
  469. found = true
  470. elsif found && node.tag?
  471. return node if (name.nil? || node.name == name)
  472. end
  473. end
  474. end
  475. nil
  476. end
  477. protected
  478. # Creates a simple selector given the statement and array of
  479. # substitution values.
  480. #
  481. # Returns a hash with the values +tag_name+, +attributes+,
  482. # +pseudo+ (classes) and +negation+.
  483. #
  484. # Called the first time with +can_negate+ true to allow
  485. # negation. Called a second time with false since negation
  486. # cannot be negated.
  487. def simple_selector(statement, values, can_negate = true)
  488. tag_name = nil
  489. attributes = []
  490. pseudo = []
  491. negation = []
  492. # Element name. (Note that in negation, this can come at
  493. # any order, but for simplicity we allow if only first).
  494. statement.sub!(/^(\*|[[:alpha:]][\w\-]*)/) do |match|
  495. match.strip!
  496. tag_name = match.downcase unless match == "*"
  497. @source << match
  498. "" # Remove
  499. end
  500. # Get identifier, class, attribute name, pseudo or negation.
  501. while true
  502. # Element identifier.
  503. next if statement.sub!(/^#(\?|[\w\-]+)/) do |match|
  504. id = $1
  505. if id == "?"
  506. id = values.shift
  507. end
  508. @source << "##{id}"
  509. id = Regexp.new("^#{Regexp.escape(id.to_s)}$") unless id.is_a?(Regexp)
  510. attributes << ["id", id]
  511. "" # Remove
  512. end
  513. # Class name.
  514. next if statement.sub!(/^\.([\w\-]+)/) do |match|
  515. class_name = $1
  516. @source << ".#{class_name}"
  517. class_name = Regexp.new("(^|\s)#{Regexp.escape(class_name)}($|\s)") unless class_name.is_a?(Regexp)
  518. attributes << ["class", class_name]
  519. "" # Remove
  520. end
  521. # Attribute value.
  522. next if statement.sub!(/^\[\s*([[:alpha:]][\w\-:]*)\s*((?:[~|^$*])?=)?\s*('[^']*'|"[^*]"|[^\]]*)\s*\]/) do |match|
  523. name, equality, value = $1, $2, $3
  524. if value == "?"
  525. value = values.shift
  526. else
  527. # Handle single and double quotes.
  528. value.strip!
  529. if (value[0] == ?" || value[0] == ?') && value[0] == value[-1]
  530. value = value[1..-2]
  531. end
  532. end
  533. @source << "[#{name}#{equality}'#{value}']"
  534. attributes << [name.downcase.strip, attribute_match(equality, value)]
  535. "" # Remove
  536. end
  537. # Root element only.
  538. next if statement.sub!(/^:root/) do |match|
  539. pseudo << lambda do |element|
  540. element.parent.nil? || !element.parent.tag?
  541. end
  542. @source << ":root"
  543. "" # Remove
  544. end
  545. # Nth-child including last and of-type.
  546. next if statement.sub!(/^:nth-(last-)?(child|of-type)\((odd|even|(\d+|\?)|(-?\d*|\?)?n([+\-]\d+|\?)?)\)/) do |match|
  547. reverse = $1 == "last-"
  548. of_type = $2 == "of-type"
  549. @source << ":nth-#{$1}#{$2}("
  550. case $3
  551. when "odd"
  552. pseudo << nth_child(2, 1, of_type, reverse)
  553. @source << "odd)"
  554. when "even"
  555. pseudo << nth_child(2, 2, of_type, reverse)
  556. @source << "even)"
  557. when /^(\d+|\?)$/ # b only
  558. b = ($1 == "?" ? values.shift : $1).to_i
  559. pseudo << nth_child(0, b, of_type, reverse)
  560. @source << "#{b})"
  561. when /^(-?\d*|\?)?n([+\-]\d+|\?)?$/
  562. a = ($1 == "?" ? values.shift :
  563. $1 == "" ? 1 : $1 == "-" ? -1 : $1).to_i
  564. b = ($2 == "?" ? values.shift : $2).to_i
  565. pseudo << nth_child(a, b, of_type, reverse)
  566. @source << (b >= 0 ? "#{a}n+#{b})" : "#{a}n#{b})")
  567. else
  568. raise ArgumentError, "Invalid nth-child #{match}"
  569. end
  570. "" # Remove
  571. end
  572. # First/last child (of type).
  573. next if statement.sub!(/^:(first|last)-(child|of-type)/) do |match|
  574. reverse = $1 == "last"
  575. of_type = $2 == "of-type"
  576. pseudo << nth_child(0, 1, of_type, reverse)
  577. @source << ":#{$1}-#{$2}"
  578. "" # Remove
  579. end
  580. # Only child (of type).
  581. next if statement.sub!(/^:only-(child|of-type)/) do |match|
  582. of_type = $1 == "of-type"
  583. pseudo << only_child(of_type)
  584. @source << ":only-#{$1}"
  585. "" # Remove
  586. end
  587. # Empty: no child elements or meaningful content (whitespaces
  588. # are ignored).
  589. next if statement.sub!(/^:empty/) do |match|
  590. pseudo << lambda do |element|
  591. empty = true
  592. for child in element.children
  593. if child.tag? || !child.content.strip.empty?
  594. empty = false
  595. break
  596. end
  597. end
  598. empty
  599. end
  600. @source << ":empty"
  601. "" # Remove
  602. end
  603. # Content: match the text content of the element, stripping
  604. # leading and trailing spaces.
  605. next if statement.sub!(/^:content\(\s*(\?|'[^']*'|"[^"]*"|[^)]*)\s*\)/) do |match|
  606. content = $1
  607. if content == "?"
  608. content = values.shift
  609. elsif (content[0] == ?" || content[0] == ?') && content[0] == content[-1]
  610. content = content[1..-2]
  611. end
  612. @source << ":content('#{content}')"
  613. content = Regexp.new("^#{Regexp.escape(content.to_s)}$") unless content.is_a?(Regexp)
  614. pseudo << lambda do |element|
  615. text = ""
  616. for child in element.children
  617. unless child.tag?
  618. text << child.content
  619. end
  620. end
  621. text.strip =~ content
  622. end
  623. "" # Remove
  624. end
  625. # Negation. Create another simple selector to handle it.
  626. if statement.sub!(/^:not\(\s*/, "")
  627. raise ArgumentError, "Double negatives are not missing feature" unless can_negate
  628. @source << ":not("
  629. negation << simple_selector(statement, values, false)
  630. raise ArgumentError, "Negation not closed" unless statement.sub!(/^\s*\)/, "")
  631. @source << ")"
  632. next
  633. end
  634. # No match: moving on.
  635. break
  636. end
  637. # Return hash. The keys are mapped to instance variables.
  638. {:tag_name=>tag_name, :attributes=>attributes, :pseudo=>pseudo, :negation=>negation}
  639. end
  640. # Create a regular expression to match an attribute value based
  641. # on the equality operator (=, ^=, |=, etc).
  642. def attribute_match(equality, value)
  643. regexp = value.is_a?(Regexp) ? value : Regexp.escape(value.to_s)
  644. case equality
  645. when "=" then
  646. # Match the attribute value in full
  647. Regexp.new("^#{regexp}$")
  648. when "~=" then
  649. # Match a space-separated word within the attribute value
  650. Regexp.new("(^|\s)#{regexp}($|\s)")
  651. when "^="
  652. # Match the beginning of the attribute value
  653. Regexp.new("^#{regexp}")
  654. when "$="
  655. # Match the end of the attribute value
  656. Regexp.new("#{regexp}$")
  657. when "*="
  658. # Match substring of the attribute value
  659. regexp.is_a?(Regexp) ? regexp : Regexp.new(regexp)
  660. when "|=" then
  661. # Match the first space-separated item of the attribute value
  662. Regexp.new("^#{regexp}($|\s)")
  663. else
  664. raise InvalidSelectorError, "Invalid operation/value" unless value.empty?
  665. # Match all attributes values (existence check)
  666. //
  667. end
  668. end
  669. # Returns a lambda that can match an element against the nth-child
  670. # pseudo class, given the following arguments:
  671. # * +a+ -- Value of a part.
  672. # * +b+ -- Value of b part.
  673. # * +of_type+ -- True to test only elements of this type (of-type).
  674. # * +reverse+ -- True to count in reverse order (last-).
  675. def nth_child(a, b, of_type, reverse)
  676. # a = 0 means select at index b, if b = 0 nothing selected
  677. return lambda { |element| false } if a == 0 && b == 0
  678. # a < 0 and b < 0 will never match against an index
  679. return lambda { |element| false } if a < 0 && b < 0
  680. b = a + b + 1 if b < 0 # b < 0 just picks last element from each group
  681. b -= 1 unless b == 0 # b == 0 is same as b == 1, otherwise zero based
  682. lambda do |element|
  683. # Element must be inside parent element.
  684. return false unless element.parent && element.parent.tag?
  685. index = 0
  686. # Get siblings, reverse if counting from last.
  687. siblings = element.parent.children
  688. siblings = siblings.reverse if reverse
  689. # Match element name if of-type, otherwise ignore name.
  690. name = of_type ? element.name : nil
  691. found = false
  692. for child in siblings
  693. # Skip text nodes/comments.
  694. if child.tag? && (name == nil || child.name == name)
  695. if a == 0
  696. # Shortcut when a == 0 no need to go past count
  697. if index == b
  698. found = child.equal?(element)
  699. break
  700. end
  701. elsif a < 0
  702. # Only look for first b elements
  703. break if index > b
  704. if child.equal?(element)
  705. found = (index % a) == 0
  706. break
  707. end
  708. else
  709. # Otherwise, break if child found and count == an+b
  710. if child.equal?(element)
  711. found = (index % a) == b
  712. break
  713. end
  714. end
  715. index += 1
  716. end
  717. end
  718. found
  719. end
  720. end
  721. # Creates a only child lambda. Pass +of-type+ to only look at
  722. # elements of its type.
  723. def only_child(of_type)
  724. lambda do |element|
  725. # Element must be inside parent element.
  726. return false unless element.parent && element.parent.tag?
  727. name = of_type ? element.name : nil
  728. other = false
  729. for child in element.parent.children
  730. # Skip text nodes/comments.
  731. if child.tag? && (name == nil || child.name == name)
  732. unless child.equal?(element)
  733. other = true
  734. break
  735. end
  736. end
  737. end
  738. !other
  739. end
  740. end
  741. # Called to create a dependent selector (sibling, descendant, etc).
  742. # Passes the remainder of the statement that will be reduced to zero
  743. # eventually, and array of substitution values.
  744. #
  745. # This method is called from four places, so it helps to put it here
  746. # for reuse. The only logic deals with the need to detect comma
  747. # separators (alternate) and apply them to the selector group of the
  748. # top selector.
  749. def next_selector(statement, values)
  750. second = Selector.new(statement, values)
  751. # If there are alternate selectors, we group them in the top selector.
  752. if alternates = second.instance_variable_get(:@alternates)
  753. second.instance_variable_set(:@alternates, [])
  754. @alternates.concat alternates
  755. end
  756. second
  757. end
  758. end
  759. # See HTML::Selector.new
  760. def self.selector(statement, *values)
  761. Selector.new(statement, *values)
  762. end
  763. class Tag
  764. def select(selector, *values)
  765. selector = HTML::Selector.new(selector, values)
  766. selector.select(self)
  767. end
  768. end
  769. end