PageRenderTime 1ms CodeModel.GetById 40ms app.highlight 74ms RepoModel.GetById 26ms app.codeStats 1ms

/vendor/jruby-1.1.6RC1/lib/ruby/1.8/rexml/text.rb

https://bitbucket.org/nicksieger/advent-jruby
Ruby | 344 lines | 226 code | 31 blank | 87 comment | 25 complexity | f23f8f24872fbc11327f424c025178f0 MD5 | raw file
  1require 'rexml/entity'
  2require 'rexml/doctype'
  3require 'rexml/child'
  4require 'rexml/doctype'
  5require 'rexml/parseexception'
  6
  7module REXML
  8  # Represents text nodes in an XML document
  9  class Text < Child
 10    include Comparable
 11    # The order in which the substitutions occur
 12    SPECIALS = [ /&(?!#?[\w-]+;)/u, /</u, />/u, /"/u, /'/u, /\r/u ]
 13    SUBSTITUTES = ['&amp;', '&lt;', '&gt;', '&quot;', '&apos;', '&#13;']
 14    # Characters which are substituted in written strings
 15    SLAICEPS = [ '<', '>', '"', "'", '&' ]
 16    SETUTITSBUS = [ /&lt;/u, /&gt;/u, /&quot;/u, /&apos;/u, /&amp;/u ]
 17
 18    # If +raw+ is true, then REXML leaves the value alone
 19    attr_accessor :raw
 20
 21    ILLEGAL = /(<|&(?!(#{Entity::NAME})|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));))/um
 22    NUMERICENTITY = /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/ 
 23
 24    # Constructor
 25    # +arg+ if a String, the content is set to the String.  If a Text,
 26    # the object is shallowly cloned.  
 27    #
 28    # +respect_whitespace+ (boolean, false) if true, whitespace is
 29    # respected
 30    #
 31    # +parent+ (nil) if this is a Parent object, the parent
 32    # will be set to this.  
 33    #
 34    # +raw+ (nil) This argument can be given three values.
 35    # If true, then the value of used to construct this object is expected to 
 36    # contain no unescaped XML markup, and REXML will not change the text. If 
 37    # this value is false, the string may contain any characters, and REXML will
 38    # escape any and all defined entities whose values are contained in the
 39    # text.  If this value is nil (the default), then the raw value of the 
 40    # parent will be used as the raw value for this node.  If there is no raw
 41    # value for the parent, and no value is supplied, the default is false.
 42    # Use this field if you have entities defined for some text, and you don't
 43    # want REXML to escape that text in output.
 44    #   Text.new( "<&", false, nil, false ) #-> "&lt;&amp;"
 45    #   Text.new( "&lt;&amp;", false, nil, false ) #-> "&amp;lt;&amp;amp;"
 46    #   Text.new( "<&", false, nil, true )  #-> Parse exception
 47    #   Text.new( "&lt;&amp;", false, nil, true )  #-> "&lt;&amp;"
 48    #   # Assume that the entity "s" is defined to be "sean"
 49    #   # and that the entity    "r" is defined to be "russell"
 50    #   Text.new( "sean russell" )          #-> "&s; &r;"
 51    #   Text.new( "sean russell", false, nil, true ) #-> "sean russell"
 52    #
 53    # +entity_filter+ (nil) This can be an array of entities to match in the
 54    # supplied text.  This argument is only useful if +raw+ is set to false.
 55    #   Text.new( "sean russell", false, nil, false, ["s"] ) #-> "&s; russell"
 56    #   Text.new( "sean russell", false, nil, true, ["s"] ) #-> "sean russell"
 57    # In the last example, the +entity_filter+ argument is ignored.
 58    #
 59    # +pattern+ INTERNAL USE ONLY
 60    def initialize(arg, respect_whitespace=false, parent=nil, raw=nil, 
 61      entity_filter=nil, illegal=ILLEGAL )
 62
 63      @raw = false
 64
 65      if parent
 66        super( parent )
 67        @raw = parent.raw 
 68      else
 69        @parent = nil
 70      end
 71
 72      @raw = raw unless raw.nil?
 73      @entity_filter = entity_filter
 74      @normalized = @unnormalized = nil
 75
 76      if arg.kind_of? String
 77        @string = arg.clone
 78        @string.squeeze!(" \n\t") unless respect_whitespace
 79      elsif arg.kind_of? Text
 80        @string = arg.to_s
 81        @raw = arg.raw
 82      elsif
 83        raise "Illegal argument of type #{arg.type} for Text constructor (#{arg})"
 84      end
 85
 86      @string.gsub!( /\r\n?/, "\n" )
 87
 88      # check for illegal characters
 89      if @raw
 90        if @string =~ illegal
 91          raise "Illegal character '#{$1}' in raw string \"#{@string}\""
 92        end
 93      end
 94    end
 95
 96    def node_type
 97      :text
 98    end
 99
100    def empty?
101      @string.size==0
102    end
103
104
105    def clone
106      return Text.new(self)
107    end
108
109
110    # Appends text to this text node.  The text is appended in the +raw+ mode
111    # of this text node.
112    def <<( to_append )
113      @string << to_append.gsub( /\r\n?/, "\n" )
114    end
115
116
117    # +other+ a String or a Text
118    # +returns+ the result of (to_s <=> arg.to_s)
119    def <=>( other )
120      to_s() <=> other.to_s
121    end
122
123    REFERENCE = /#{Entity::REFERENCE}/
124    # Returns the string value of this text node.  This string is always
125    # escaped, meaning that it is a valid XML text node string, and all
126    # entities that can be escaped, have been inserted.  This method respects
127    # the entity filter set in the constructor.
128    #   
129    #   # Assume that the entity "s" is defined to be "sean", and that the 
130    #   # entity "r" is defined to be "russell"
131    #   t = Text.new( "< & sean russell", false, nil, false, ['s'] ) 
132    #   t.to_s   #-> "&lt; &amp; &s; russell"
133    #   t = Text.new( "< & &s; russell", false, nil, false ) 
134    #   t.to_s   #-> "&lt; &amp; &s; russell"
135    #   u = Text.new( "sean russell", false, nil, true )
136    #   u.to_s   #-> "sean russell"
137    def to_s
138      return @string if @raw
139      return @normalized if @normalized
140
141      doctype = nil
142      if @parent
143        doc = @parent.document
144        doctype = doc.doctype if doc
145      end
146
147      @normalized = Text::normalize( @string, doctype, @entity_filter )
148    end
149
150    def inspect
151      @string.inspect
152    end
153
154    # Returns the string value of this text.  This is the text without
155    # entities, as it might be used programmatically, or printed to the
156    # console.  This ignores the 'raw' attribute setting, and any
157    # entity_filter.
158    #
159    #   # Assume that the entity "s" is defined to be "sean", and that the 
160    #   # entity "r" is defined to be "russell"
161    #   t = Text.new( "< & sean russell", false, nil, false, ['s'] ) 
162    #   t.value   #-> "< & sean russell"
163    #   t = Text.new( "< & &s; russell", false, nil, false )
164    #   t.value   #-> "< & sean russell"
165    #   u = Text.new( "sean russell", false, nil, true )
166    #   u.value   #-> "sean russell"
167    def value
168      @unnormalized if @unnormalized
169      doctype = nil
170      if @parent
171        doc = @parent.document
172        doctype = doc.doctype if doc
173      end
174      @unnormalized = Text::unnormalize( @string, doctype )
175    end
176
177    # Sets the contents of this text node.  This expects the text to be 
178    # unnormalized.  It returns self.
179    #
180    #   e = Element.new( "a" )
181    #   e.add_text( "foo" )   # <a>foo</a>
182    #   e[0].value = "bar"    # <a>bar</a>
183    #   e[0].value = "<a>"    # <a>&lt;a&gt;</a>
184    def value=( val )
185      @string = val.gsub( /\r\n?/, "\n" )
186      @unnormalized = nil
187      @normalized = nil
188      @raw = false
189    end
190 
191     def wrap(string, width, addnewline=false)
192       # Recursivly wrap string at width.
193       return string if string.length <= width
194       place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
195       if addnewline then
196         return "\n" + string[0,place] + "\n" + wrap(string[place+1..-1], width)
197       else
198         return string[0,place] + "\n" + wrap(string[place+1..-1], width)
199       end
200     end
201
202    def indent_text(string, level=1, style="\t", indentfirstline=true)
203      return string if level < 0
204      new_string = ''
205      string.each { |line|
206        indent_string = style * level
207        new_line = (indent_string + line).sub(/[\s]+$/,'')
208        new_string << new_line
209      }
210      new_string.strip! unless indentfirstline
211      return new_string
212    end
213 
214    # == DEPRECATED
215    # See REXML::Formatters
216    #
217    def write( writer, indent=-1, transitive=false, ie_hack=false ) 
218      Kernel.warn("#{self.class.name}.write is deprecated.  See REXML::Formatters")
219      formatter = if indent > -1
220          REXML::Formatters::Pretty.new( indent )
221        else
222          REXML::Formatters::Default.new
223        end
224      formatter.write( self, writer )
225    end
226
227    # FIXME
228    # This probably won't work properly
229    def xpath
230      path = @parent.xpath
231      path += "/text()"
232      return path
233    end
234
235    # Writes out text, substituting special characters beforehand.
236    # +out+ A String, IO, or any other object supporting <<( String )
237    # +input+ the text to substitute and the write out
238    #
239    #   z=utf8.unpack("U*")
240    #   ascOut=""
241    #   z.each{|r|
242    #     if r <  0x100
243    #       ascOut.concat(r.chr)
244    #     else
245    #       ascOut.concat(sprintf("&#x%x;", r))
246    #     end
247    #   }
248    #   puts ascOut
249    def write_with_substitution out, input
250      copy = input.clone
251      # Doing it like this rather than in a loop improves the speed
252      copy.gsub!( SPECIALS[0], SUBSTITUTES[0] )
253      copy.gsub!( SPECIALS[1], SUBSTITUTES[1] )
254      copy.gsub!( SPECIALS[2], SUBSTITUTES[2] )
255      copy.gsub!( SPECIALS[3], SUBSTITUTES[3] )
256      copy.gsub!( SPECIALS[4], SUBSTITUTES[4] )
257      copy.gsub!( SPECIALS[5], SUBSTITUTES[5] )
258      out << copy
259    end
260
261    # Reads text, substituting entities
262    def Text::read_with_substitution( input, illegal=nil )
263      copy = input.clone
264
265      if copy =~ illegal
266        raise ParseException.new( "malformed text: Illegal character #$& in \"#{copy}\"" )
267      end if illegal
268      
269      copy.gsub!( /\r\n?/, "\n" )
270      if copy.include? ?&
271        copy.gsub!( SETUTITSBUS[0], SLAICEPS[0] )
272        copy.gsub!( SETUTITSBUS[1], SLAICEPS[1] )
273        copy.gsub!( SETUTITSBUS[2], SLAICEPS[2] )
274        copy.gsub!( SETUTITSBUS[3], SLAICEPS[3] )
275        copy.gsub!( SETUTITSBUS[4], SLAICEPS[4] )
276        copy.gsub!( /&#0*((?:\d+)|(?:x[a-f0-9]+));/ ) {|m|
277          m=$1
278          #m='0' if m==''
279          m = "0#{m}" if m[0] == ?x
280          [Integer(m)].pack('U*')
281        }
282      end
283      copy
284    end
285
286    EREFERENCE = /&(?!#{Entity::NAME};)/
287    # Escapes all possible entities
288    def Text::normalize( input, doctype=nil, entity_filter=nil )
289      copy = input
290      # Doing it like this rather than in a loop improves the speed
291      #copy = copy.gsub( EREFERENCE, '&amp;' )
292      copy = copy.gsub( "&", "&amp;" )
293      if doctype
294        # Replace all ampersands that aren't part of an entity
295        doctype.entities.each_value do |entity|
296          copy = copy.gsub( entity.value, 
297            "&#{entity.name};" ) if entity.value and 
298              not( entity_filter and entity_filter.include?(entity) )
299        end
300      else
301        # Replace all ampersands that aren't part of an entity
302        DocType::DEFAULT_ENTITIES.each_value do |entity|
303          copy = copy.gsub(entity.value, "&#{entity.name};" )
304        end
305      end
306      copy
307    end
308
309    # Unescapes all possible entities
310    def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil )
311      rv = string.clone
312      rv.gsub!( /\r\n?/, "\n" )
313      matches = rv.scan( REFERENCE )
314      return rv if matches.size == 0
315      rv.gsub!( NUMERICENTITY ) {|m|
316        m=$1
317        m = "0#{m}" if m[0] == ?x
318        [Integer(m)].pack('U*')
319      }
320      matches.collect!{|x|x[0]}.compact!
321      if matches.size > 0
322        if doctype
323          matches.each do |entity_reference|
324            unless filter and filter.include?(entity_reference)
325              entity_value = doctype.entity( entity_reference )
326              re = /&#{entity_reference};/
327              rv.gsub!( re, entity_value ) if entity_value
328            end
329          end
330        else
331          matches.each do |entity_reference|
332            unless filter and filter.include?(entity_reference)
333              entity_value = DocType::DEFAULT_ENTITIES[ entity_reference ]
334              re = /&#{entity_reference};/
335              rv.gsub!( re, entity_value.value ) if entity_value
336            end
337          end
338        end
339        rv.gsub!( /&amp;/, '&' )
340      end
341      rv
342    end
343  end
344end