PageRenderTime 99ms CodeModel.GetById 26ms app.highlight 67ms RepoModel.GetById 2ms app.codeStats 0ms

/vendor/bundle/jruby/2.1/gems/rack-1.5.2/lib/rack/utils/okjson.rb

https://github.com/delowong/logstash
Ruby | 599 lines | 454 code | 68 blank | 77 comment | 61 complexity | f17ace4c2356122013157e31b538f36a MD5 | raw file
  1# encoding: UTF-8
  2#
  3# Copyright 2011, 2012 Keith Rarick
  4#
  5# Permission is hereby granted, free of charge, to any person obtaining a copy
  6# of this software and associated documentation files (the "Software"), to deal
  7# in the Software without restriction, including without limitation the rights
  8# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9# copies of the Software, and to permit persons to whom the Software is
 10# furnished to do so, subject to the following conditions:
 11#
 12# The above copyright notice and this permission notice shall be included in
 13# all copies or substantial portions of the Software.
 14#
 15# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 21# THE SOFTWARE.
 22
 23# See https://github.com/kr/okjson for updates.
 24# Imported from the above repo @ d4e8643ad92e14b37d11326855499c7e4108ed17
 25# Namespace modified for vendoring under Rack::Utils
 26
 27require 'stringio'
 28
 29# Some parts adapted from
 30# http://golang.org/src/pkg/json/decode.go and
 31# http://golang.org/src/pkg/utf8/utf8.go
 32module Rack::Utils::OkJson
 33  Upstream = 'LTD7LBKLZWFF7OZK'
 34  extend self
 35
 36
 37  # Decodes a json document in string s and
 38  # returns the corresponding ruby value.
 39  # String s must be valid UTF-8. If you have
 40  # a string in some other encoding, convert
 41  # it first.
 42  #
 43  # String values in the resulting structure
 44  # will be UTF-8.
 45  def decode(s)
 46    ts = lex(s)
 47    v, ts = textparse(ts)
 48    if ts.length > 0
 49      raise Error, 'trailing garbage'
 50    end
 51    v
 52  end
 53
 54
 55  # Parses a "json text" in the sense of RFC 4627.
 56  # Returns the parsed value and any trailing tokens.
 57  # Note: this is almost the same as valparse,
 58  # except that it does not accept atomic values.
 59  def textparse(ts)
 60    if ts.length < 0
 61      raise Error, 'empty'
 62    end
 63
 64    typ, _, val = ts[0]
 65    case typ
 66    when '{' then objparse(ts)
 67    when '[' then arrparse(ts)
 68    else
 69      raise Error, "unexpected #{val.inspect}"
 70    end
 71  end
 72
 73
 74  # Parses a "value" in the sense of RFC 4627.
 75  # Returns the parsed value and any trailing tokens.
 76  def valparse(ts)
 77    if ts.length < 0
 78      raise Error, 'empty'
 79    end
 80
 81    typ, _, val = ts[0]
 82    case typ
 83    when '{' then objparse(ts)
 84    when '[' then arrparse(ts)
 85    when :val,:str then [val, ts[1..-1]]
 86    else
 87      raise Error, "unexpected #{val.inspect}"
 88    end
 89  end
 90
 91
 92  # Parses an "object" in the sense of RFC 4627.
 93  # Returns the parsed value and any trailing tokens.
 94  def objparse(ts)
 95    ts = eat('{', ts)
 96    obj = {}
 97
 98    if ts[0][0] == '}'
 99      return obj, ts[1..-1]
100    end
101
102    k, v, ts = pairparse(ts)
103    obj[k] = v
104
105    if ts[0][0] == '}'
106      return obj, ts[1..-1]
107    end
108
109    loop do
110      ts = eat(',', ts)
111
112      k, v, ts = pairparse(ts)
113      obj[k] = v
114
115      if ts[0][0] == '}'
116        return obj, ts[1..-1]
117      end
118    end
119  end
120
121
122  # Parses a "member" in the sense of RFC 4627.
123  # Returns the parsed values and any trailing tokens.
124  def pairparse(ts)
125    (typ, _, k), ts = ts[0], ts[1..-1]
126    if typ != :str
127      raise Error, "unexpected #{k.inspect}"
128    end
129    ts = eat(':', ts)
130    v, ts = valparse(ts)
131    [k, v, ts]
132  end
133
134
135  # Parses an "array" in the sense of RFC 4627.
136  # Returns the parsed value and any trailing tokens.
137  def arrparse(ts)
138    ts = eat('[', ts)
139    arr = []
140
141    if ts[0][0] == ']'
142      return arr, ts[1..-1]
143    end
144
145    v, ts = valparse(ts)
146    arr << v
147
148    if ts[0][0] == ']'
149      return arr, ts[1..-1]
150    end
151
152    loop do
153      ts = eat(',', ts)
154
155      v, ts = valparse(ts)
156      arr << v
157
158      if ts[0][0] == ']'
159        return arr, ts[1..-1]
160      end
161    end
162  end
163
164
165  def eat(typ, ts)
166    if ts[0][0] != typ
167      raise Error, "expected #{typ} (got #{ts[0].inspect})"
168    end
169    ts[1..-1]
170  end
171
172
173  # Scans s and returns a list of json tokens,
174  # excluding white space (as defined in RFC 4627).
175  def lex(s)
176    ts = []
177    while s.length > 0
178      typ, lexeme, val = tok(s)
179      if typ == nil
180        raise Error, "invalid character at #{s[0,10].inspect}"
181      end
182      if typ != :space
183        ts << [typ, lexeme, val]
184      end
185      s = s[lexeme.length..-1]
186    end
187    ts
188  end
189
190
191  # Scans the first token in s and
192  # returns a 3-element list, or nil
193  # if s does not begin with a valid token.
194  #
195  # The first list element is one of
196  # '{', '}', ':', ',', '[', ']',
197  # :val, :str, and :space.
198  #
199  # The second element is the lexeme.
200  #
201  # The third element is the value of the
202  # token for :val and :str, otherwise
203  # it is the lexeme.
204  def tok(s)
205    case s[0]
206    when ?{  then ['{', s[0,1], s[0,1]]
207    when ?}  then ['}', s[0,1], s[0,1]]
208    when ?:  then [':', s[0,1], s[0,1]]
209    when ?,  then [',', s[0,1], s[0,1]]
210    when ?[  then ['[', s[0,1], s[0,1]]
211    when ?]  then [']', s[0,1], s[0,1]]
212    when ?n  then nulltok(s)
213    when ?t  then truetok(s)
214    when ?f  then falsetok(s)
215    when ?"  then strtok(s)
216    when Spc then [:space, s[0,1], s[0,1]]
217    when ?\t then [:space, s[0,1], s[0,1]]
218    when ?\n then [:space, s[0,1], s[0,1]]
219    when ?\r then [:space, s[0,1], s[0,1]]
220    else          numtok(s)
221    end
222  end
223
224
225  def nulltok(s);  s[0,4] == 'null'  ? [:val, 'null',  nil]   : [] end
226  def truetok(s);  s[0,4] == 'true'  ? [:val, 'true',  true]  : [] end
227  def falsetok(s); s[0,5] == 'false' ? [:val, 'false', false] : [] end
228
229
230  def numtok(s)
231    m = /-?([1-9][0-9]+|[0-9])([.][0-9]+)?([eE][+-]?[0-9]+)?/.match(s)
232    if m && m.begin(0) == 0
233      if m[3] && !m[2]
234        [:val, m[0], Integer(m[1])*(10**Integer(m[3][1..-1]))]
235      elsif m[2]
236        [:val, m[0], Float(m[0])]
237      else
238        [:val, m[0], Integer(m[0])]
239      end
240    else
241      []
242    end
243  end
244
245
246  def strtok(s)
247    m = /"([^"\\]|\\["\/\\bfnrt]|\\u[0-9a-fA-F]{4})*"/.match(s)
248    if ! m
249      raise Error, "invalid string literal at #{abbrev(s)}"
250    end
251    [:str, m[0], unquote(m[0])]
252  end
253
254
255  def abbrev(s)
256    t = s[0,10]
257    p = t['`']
258    t = t[0,p] if p
259    t = t + '...' if t.length < s.length
260    '`' + t + '`'
261  end
262
263
264  # Converts a quoted json string literal q into a UTF-8-encoded string.
265  # The rules are different than for Ruby, so we cannot use eval.
266  # Unquote will raise an error if q contains control characters.
267  def unquote(q)
268    q = q[1...-1]
269    a = q.dup # allocate a big enough string
270    rubydoesenc = false
271    # In ruby >= 1.9, a[w] is a codepoint, not a byte.
272    if a.class.method_defined?(:force_encoding)
273      a.force_encoding('UTF-8')
274      rubydoesenc = true
275    end
276    r, w = 0, 0
277    while r < q.length
278      c = q[r]
279      case true
280      when c == ?\\
281        r += 1
282        if r >= q.length
283          raise Error, "string literal ends with a \"\\\": \"#{q}\""
284        end
285
286        case q[r]
287        when ?",?\\,?/,?'
288          a[w] = q[r]
289          r += 1
290          w += 1
291        when ?b,?f,?n,?r,?t
292          a[w] = Unesc[q[r]]
293          r += 1
294          w += 1
295        when ?u
296          r += 1
297          uchar = begin
298            hexdec4(q[r,4])
299          rescue RuntimeError => e
300            raise Error, "invalid escape sequence \\u#{q[r,4]}: #{e}"
301          end
302          r += 4
303          if surrogate? uchar
304            if q.length >= r+6
305              uchar1 = hexdec4(q[r+2,4])
306              uchar = subst(uchar, uchar1)
307              if uchar != Ucharerr
308                # A valid pair; consume.
309                r += 6
310              end
311            end
312          end
313          if rubydoesenc
314            a[w] = '' << uchar
315            w += 1
316          else
317            w += ucharenc(a, w, uchar)
318          end
319        else
320          raise Error, "invalid escape char #{q[r]} in \"#{q}\""
321        end
322      when c == ?", c < Spc
323        raise Error, "invalid character in string literal \"#{q}\""
324      else
325        # Copy anything else byte-for-byte.
326        # Valid UTF-8 will remain valid UTF-8.
327        # Invalid UTF-8 will remain invalid UTF-8.
328        # In ruby >= 1.9, c is a codepoint, not a byte,
329        # in which case this is still what we want.
330        a[w] = c
331        r += 1
332        w += 1
333      end
334    end
335    a[0,w]
336  end
337
338
339  # Encodes unicode character u as UTF-8
340  # bytes in string a at position i.
341  # Returns the number of bytes written.
342  def ucharenc(a, i, u)
343    case true
344    when u <= Uchar1max
345      a[i] = (u & 0xff).chr
346      1
347    when u <= Uchar2max
348      a[i+0] = (Utag2 | ((u>>6)&0xff)).chr
349      a[i+1] = (Utagx | (u&Umaskx)).chr
350      2
351    when u <= Uchar3max
352      a[i+0] = (Utag3 | ((u>>12)&0xff)).chr
353      a[i+1] = (Utagx | ((u>>6)&Umaskx)).chr
354      a[i+2] = (Utagx | (u&Umaskx)).chr
355      3
356    else
357      a[i+0] = (Utag4 | ((u>>18)&0xff)).chr
358      a[i+1] = (Utagx | ((u>>12)&Umaskx)).chr
359      a[i+2] = (Utagx | ((u>>6)&Umaskx)).chr
360      a[i+3] = (Utagx | (u&Umaskx)).chr
361      4
362    end
363  end
364
365
366  def hexdec4(s)
367    if s.length != 4
368      raise Error, 'short'
369    end
370    (nibble(s[0])<<12) | (nibble(s[1])<<8) | (nibble(s[2])<<4) | nibble(s[3])
371  end
372
373
374  def subst(u1, u2)
375    if Usurr1 <= u1 && u1 < Usurr2 && Usurr2 <= u2 && u2 < Usurr3
376      return ((u1-Usurr1)<<10) | (u2-Usurr2) + Usurrself
377    end
378    return Ucharerr
379  end
380
381
382  def surrogate?(u)
383    Usurr1 <= u && u < Usurr3
384  end
385
386
387  def nibble(c)
388    case true
389    when ?0 <= c && c <= ?9 then c.ord - ?0.ord
390    when ?a <= c && c <= ?z then c.ord - ?a.ord + 10
391    when ?A <= c && c <= ?Z then c.ord - ?A.ord + 10
392    else
393      raise Error, "invalid hex code #{c}"
394    end
395  end
396
397
398  # Encodes x into a json text. It may contain only
399  # Array, Hash, String, Numeric, true, false, nil.
400  # (Note, this list excludes Symbol.)
401  # X itself must be an Array or a Hash.
402  # No other value can be encoded, and an error will
403  # be raised if x contains any other value, such as
404  # Nan, Infinity, Symbol, and Proc, or if a Hash key
405  # is not a String.
406  # Strings contained in x must be valid UTF-8.
407  def encode(x)
408    case x
409    when Hash    then objenc(x)
410    when Array   then arrenc(x)
411    else
412      raise Error, 'root value must be an Array or a Hash'
413    end
414  end
415
416
417  def valenc(x)
418    case x
419    when Hash    then objenc(x)
420    when Array   then arrenc(x)
421    when String  then strenc(x)
422    when Numeric then numenc(x)
423    when true    then "true"
424    when false   then "false"
425    when nil     then "null"
426    else
427      raise Error, "cannot encode #{x.class}: #{x.inspect}"
428    end
429  end
430
431
432  def objenc(x)
433    '{' + x.map{|k,v| keyenc(k) + ':' + valenc(v)}.join(',') + '}'
434  end
435
436
437  def arrenc(a)
438    '[' + a.map{|x| valenc(x)}.join(',') + ']'
439  end
440
441
442  def keyenc(k)
443    case k
444    when String then strenc(k)
445    else
446      raise Error, "Hash key is not a string: #{k.inspect}"
447    end
448  end
449
450
451  def strenc(s)
452    t = StringIO.new
453    t.putc(?")
454    r = 0
455
456    # In ruby >= 1.9, s[r] is a codepoint, not a byte.
457    rubydoesenc = s.class.method_defined?(:encoding)
458
459    while r < s.length
460      case s[r]
461      when ?"  then t.print('\\"')
462      when ?\\ then t.print('\\\\')
463      when ?\b then t.print('\\b')
464      when ?\f then t.print('\\f')
465      when ?\n then t.print('\\n')
466      when ?\r then t.print('\\r')
467      when ?\t then t.print('\\t')
468      else
469        c = s[r]
470        case true
471        when rubydoesenc
472          begin
473            c.ord # will raise an error if c is invalid UTF-8
474            t.write(c)
475          rescue
476            t.write(Ustrerr)
477          end
478        when Spc <= c && c <= ?~
479          t.putc(c)
480        else
481          n = ucharcopy(t, s, r) # ensure valid UTF-8 output
482          r += n - 1 # r is incremented below
483        end
484      end
485      r += 1
486    end
487    t.putc(?")
488    t.string
489  end
490
491
492  def numenc(x)
493    if ((x.nan? || x.infinite?) rescue false)
494      raise Error, "Numeric cannot be represented: #{x}"
495    end
496    "#{x}"
497  end
498
499
500  # Copies the valid UTF-8 bytes of a single character
501  # from string s at position i to I/O object t, and
502  # returns the number of bytes copied.
503  # If no valid UTF-8 char exists at position i,
504  # ucharcopy writes Ustrerr and returns 1.
505  def ucharcopy(t, s, i)
506    n = s.length - i
507    raise Utf8Error if n < 1
508
509    c0 = s[i].ord
510
511    # 1-byte, 7-bit sequence?
512    if c0 < Utagx
513      t.putc(c0)
514      return 1
515    end
516
517    raise Utf8Error if c0 < Utag2 # unexpected continuation byte?
518
519    raise Utf8Error if n < 2 # need continuation byte
520    c1 = s[i+1].ord
521    raise Utf8Error if c1 < Utagx || Utag2 <= c1
522
523    # 2-byte, 11-bit sequence?
524    if c0 < Utag3
525      raise Utf8Error if ((c0&Umask2)<<6 | (c1&Umaskx)) <= Uchar1max
526      t.putc(c0)
527      t.putc(c1)
528      return 2
529    end
530
531    # need second continuation byte
532    raise Utf8Error if n < 3
533
534    c2 = s[i+2].ord
535    raise Utf8Error if c2 < Utagx || Utag2 <= c2
536
537    # 3-byte, 16-bit sequence?
538    if c0 < Utag4
539      u = (c0&Umask3)<<12 | (c1&Umaskx)<<6 | (c2&Umaskx)
540      raise Utf8Error if u <= Uchar2max
541      t.putc(c0)
542      t.putc(c1)
543      t.putc(c2)
544      return 3
545    end
546
547    # need third continuation byte
548    raise Utf8Error if n < 4
549    c3 = s[i+3].ord
550    raise Utf8Error if c3 < Utagx || Utag2 <= c3
551
552    # 4-byte, 21-bit sequence?
553    if c0 < Utag5
554      u = (c0&Umask4)<<18 | (c1&Umaskx)<<12 | (c2&Umaskx)<<6 | (c3&Umaskx)
555      raise Utf8Error if u <= Uchar3max
556      t.putc(c0)
557      t.putc(c1)
558      t.putc(c2)
559      t.putc(c3)
560      return 4
561    end
562
563    raise Utf8Error
564  rescue Utf8Error
565    t.write(Ustrerr)
566    return 1
567  end
568
569
570  class Utf8Error < ::StandardError
571  end
572
573
574  class Error < ::StandardError
575  end
576
577
578  Utagx = 0x80 # 1000 0000
579  Utag2 = 0xc0 # 1100 0000
580  Utag3 = 0xe0 # 1110 0000
581  Utag4 = 0xf0 # 1111 0000
582  Utag5 = 0xF8 # 1111 1000
583  Umaskx = 0x3f # 0011 1111
584  Umask2 = 0x1f # 0001 1111
585  Umask3 = 0x0f # 0000 1111
586  Umask4 = 0x07 # 0000 0111
587  Uchar1max = (1<<7) - 1
588  Uchar2max = (1<<11) - 1
589  Uchar3max = (1<<16) - 1
590  Ucharerr = 0xFFFD # unicode "replacement char"
591  Ustrerr = "\xef\xbf\xbd" # unicode "replacement char"
592  Usurrself = 0x10000
593  Usurr1 = 0xd800
594  Usurr2 = 0xdc00
595  Usurr3 = 0xe000
596
597  Spc = ' '[0]
598  Unesc = {?b=>?\b, ?f=>?\f, ?n=>?\n, ?r=>?\r, ?t=>?\t}
599end