/tools/Ruby/lib/ruby/1.8/rexml/parsers/pullparser.rb

http://github.com/agross/netopenspace · Ruby · 196 lines · 122 code · 28 blank · 46 comment · 19 complexity · cc1a9d1fc2b36e63971f5e6e43824123 MD5 · raw file

  1. require 'forwardable'
  2. require 'rexml/parseexception'
  3. require 'rexml/parsers/baseparser'
  4. require 'rexml/xmltokens'
  5. module REXML
  6. module Parsers
  7. # = Using the Pull Parser
  8. # <em>This API is experimental, and subject to change.</em>
  9. # parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
  10. # while parser.has_next?
  11. # res = parser.next
  12. # puts res[1]['att'] if res.start_tag? and res[0] == 'b'
  13. # end
  14. # See the PullEvent class for information on the content of the results.
  15. # The data is identical to the arguments passed for the various events to
  16. # the StreamListener API.
  17. #
  18. # Notice that:
  19. # parser = PullParser.new( "<a>BAD DOCUMENT" )
  20. # while parser.has_next?
  21. # res = parser.next
  22. # raise res[1] if res.error?
  23. # end
  24. #
  25. # Nat Price gave me some good ideas for the API.
  26. class PullParser
  27. include XMLTokens
  28. extend Forwardable
  29. def_delegators( :@parser, :has_next? )
  30. def_delegators( :@parser, :entity )
  31. def_delegators( :@parser, :empty? )
  32. def_delegators( :@parser, :source )
  33. def initialize stream
  34. @entities = {}
  35. @listeners = nil
  36. @parser = BaseParser.new( stream )
  37. @my_stack = []
  38. end
  39. def add_listener( listener )
  40. @listeners = [] unless @listeners
  41. @listeners << listener
  42. end
  43. def each
  44. while has_next?
  45. yield self.pull
  46. end
  47. end
  48. def peek depth=0
  49. if @my_stack.length <= depth
  50. (depth - @my_stack.length + 1).times {
  51. e = PullEvent.new(@parser.pull)
  52. @my_stack.push(e)
  53. }
  54. end
  55. @my_stack[depth]
  56. end
  57. def pull
  58. return @my_stack.shift if @my_stack.length > 0
  59. event = @parser.pull
  60. case event[0]
  61. when :entitydecl
  62. @entities[ event[1] ] =
  63. event[2] unless event[2] =~ /PUBLIC|SYSTEM/
  64. when :text
  65. unnormalized = @parser.unnormalize( event[1], @entities )
  66. event << unnormalized
  67. end
  68. PullEvent.new( event )
  69. end
  70. def unshift token
  71. @my_stack.unshift token
  72. end
  73. end
  74. # A parsing event. The contents of the event are accessed as an +Array?,
  75. # and the type is given either by the ...? methods, or by accessing the
  76. # +type+ accessor. The contents of this object vary from event to event,
  77. # but are identical to the arguments passed to +StreamListener+s for each
  78. # event.
  79. class PullEvent
  80. # The type of this event. Will be one of :tag_start, :tag_end, :text,
  81. # :processing_instruction, :comment, :doctype, :attlistdecl, :entitydecl,
  82. # :notationdecl, :entity, :cdata, :xmldecl, or :error.
  83. def initialize(arg)
  84. @contents = arg
  85. end
  86. def []( start, endd=nil)
  87. if start.kind_of? Range
  88. @contents.slice( start.begin+1 .. start.end )
  89. elsif start.kind_of? Numeric
  90. if endd.nil?
  91. @contents.slice( start+1 )
  92. else
  93. @contents.slice( start+1, endd )
  94. end
  95. else
  96. raise "Illegal argument #{start.inspect} (#{start.class})"
  97. end
  98. end
  99. def event_type
  100. @contents[0]
  101. end
  102. # Content: [ String tag_name, Hash attributes ]
  103. def start_element?
  104. @contents[0] == :start_element
  105. end
  106. # Content: [ String tag_name ]
  107. def end_element?
  108. @contents[0] == :end_element
  109. end
  110. # Content: [ String raw_text, String unnormalized_text ]
  111. def text?
  112. @contents[0] == :text
  113. end
  114. # Content: [ String text ]
  115. def instruction?
  116. @contents[0] == :processing_instruction
  117. end
  118. # Content: [ String text ]
  119. def comment?
  120. @contents[0] == :comment
  121. end
  122. # Content: [ String name, String pub_sys, String long_name, String uri ]
  123. def doctype?
  124. @contents[0] == :start_doctype
  125. end
  126. # Content: [ String text ]
  127. def attlistdecl?
  128. @contents[0] == :attlistdecl
  129. end
  130. # Content: [ String text ]
  131. def elementdecl?
  132. @contents[0] == :elementdecl
  133. end
  134. # Due to the wonders of DTDs, an entity declaration can be just about
  135. # anything. There's no way to normalize it; you'll have to interpret the
  136. # content yourself. However, the following is true:
  137. #
  138. # * If the entity declaration is an internal entity:
  139. # [ String name, String value ]
  140. # Content: [ String text ]
  141. def entitydecl?
  142. @contents[0] == :entitydecl
  143. end
  144. # Content: [ String text ]
  145. def notationdecl?
  146. @contents[0] == :notationdecl
  147. end
  148. # Content: [ String text ]
  149. def entity?
  150. @contents[0] == :entity
  151. end
  152. # Content: [ String text ]
  153. def cdata?
  154. @contents[0] == :cdata
  155. end
  156. # Content: [ String version, String encoding, String standalone ]
  157. def xmldecl?
  158. @contents[0] == :xmldecl
  159. end
  160. def error?
  161. @contents[0] == :error
  162. end
  163. def inspect
  164. @contents[0].to_s + ": " + @contents[1..-1].inspect
  165. end
  166. end
  167. end
  168. end