PageRenderTime 60ms CodeModel.GetById 28ms RepoModel.GetById 0ms app.codeStats 1ms

/vendor/ruby/1.9.1/gems/multi_json-1.5.0/lib/multi_json/vendor/okjson.rb

https://github.com/KaylaStuebbe/uwsp-virtual-tour-server
Ruby | 602 lines | 459 code | 68 blank | 75 comment | 61 complexity | 86ae32036e8a9a8066cf525e88b280f6 MD5 | raw file
Possible License(s): Apache-2.0, MIT, GPL-2.0, BSD-3-Clause
  1. # encoding: UTF-8
  2. #
  3. # Copyright 2011, 2012 Keith Rarick
  4. #
  5. # Permission is hereby granted, free of charge, to any person obtaining a copy
  6. # of this software and associated documentation files (the "Software"), to deal
  7. # in the Software without restriction, including without limitation the rights
  8. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. # copies of the Software, and to permit persons to whom the Software is
  10. # furnished to do so, subject to the following conditions:
  11. #
  12. # The above copyright notice and this permission notice shall be included in
  13. # all copies or substantial portions of the Software.
  14. #
  15. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  21. # THE SOFTWARE.
  22. # See https://github.com/kr/okjson for updates.
  23. require 'stringio'
  24. # Some parts adapted from
  25. # http://golang.org/src/pkg/json/decode.go and
  26. # http://golang.org/src/pkg/utf8/utf8.go
  27. module MultiJson
  28. module OkJson
  29. extend self
  30. # Decodes a json document in string s and
  31. # returns the corresponding ruby value.
  32. # String s must be valid UTF-8. If you have
  33. # a string in some other encoding, convert
  34. # it first.
  35. #
  36. # String values in the resulting structure
  37. # will be UTF-8.
  38. def decode(s)
  39. ts = lex(s)
  40. v, ts = textparse(ts)
  41. if ts.length > 0
  42. raise Error, 'trailing garbage'
  43. end
  44. v
  45. end
  46. # Parses a "json text" in the sense of RFC 4627.
  47. # Returns the parsed value and any trailing tokens.
  48. # Note: this is almost the same as valparse,
  49. # except that it does not accept atomic values.
  50. def textparse(ts)
  51. if ts.length < 0
  52. raise Error, 'empty'
  53. end
  54. typ, _, val = ts[0]
  55. case typ
  56. when '{' then objparse(ts)
  57. when '[' then arrparse(ts)
  58. else
  59. raise Error, "unexpected #{val.inspect}"
  60. end
  61. end
  62. # Parses a "value" in the sense of RFC 4627.
  63. # Returns the parsed value and any trailing tokens.
  64. def valparse(ts)
  65. if ts.length < 0
  66. raise Error, 'empty'
  67. end
  68. typ, _, val = ts[0]
  69. case typ
  70. when '{' then objparse(ts)
  71. when '[' then arrparse(ts)
  72. when :val,:str then [val, ts[1..-1]]
  73. else
  74. raise Error, "unexpected #{val.inspect}"
  75. end
  76. end
  77. # Parses an "object" in the sense of RFC 4627.
  78. # Returns the parsed value and any trailing tokens.
  79. def objparse(ts)
  80. ts = eat('{', ts)
  81. obj = {}
  82. if ts[0][0] == '}'
  83. return obj, ts[1..-1]
  84. end
  85. k, v, ts = pairparse(ts)
  86. obj[k] = v
  87. if ts[0][0] == '}'
  88. return obj, ts[1..-1]
  89. end
  90. loop do
  91. ts = eat(',', ts)
  92. k, v, ts = pairparse(ts)
  93. obj[k] = v
  94. if ts[0][0] == '}'
  95. return obj, ts[1..-1]
  96. end
  97. end
  98. end
  99. # Parses a "member" in the sense of RFC 4627.
  100. # Returns the parsed values and any trailing tokens.
  101. def pairparse(ts)
  102. (typ, _, k), ts = ts[0], ts[1..-1]
  103. if typ != :str
  104. raise Error, "unexpected #{k.inspect}"
  105. end
  106. ts = eat(':', ts)
  107. v, ts = valparse(ts)
  108. [k, v, ts]
  109. end
  110. # Parses an "array" in the sense of RFC 4627.
  111. # Returns the parsed value and any trailing tokens.
  112. def arrparse(ts)
  113. ts = eat('[', ts)
  114. arr = []
  115. if ts[0][0] == ']'
  116. return arr, ts[1..-1]
  117. end
  118. v, ts = valparse(ts)
  119. arr << v
  120. if ts[0][0] == ']'
  121. return arr, ts[1..-1]
  122. end
  123. loop do
  124. ts = eat(',', ts)
  125. v, ts = valparse(ts)
  126. arr << v
  127. if ts[0][0] == ']'
  128. return arr, ts[1..-1]
  129. end
  130. end
  131. end
  132. def eat(typ, ts)
  133. if ts[0][0] != typ
  134. raise Error, "expected #{typ} (got #{ts[0].inspect})"
  135. end
  136. ts[1..-1]
  137. end
  138. # Scans s and returns a list of json tokens,
  139. # excluding white space (as defined in RFC 4627).
  140. def lex(s)
  141. ts = []
  142. while s.length > 0
  143. typ, lexeme, val = tok(s)
  144. if typ == nil
  145. raise Error, "invalid character at #{s[0,10].inspect}"
  146. end
  147. if typ != :space
  148. ts << [typ, lexeme, val]
  149. end
  150. s = s[lexeme.length..-1]
  151. end
  152. ts
  153. end
  154. # Scans the first token in s and
  155. # returns a 3-element list, or nil
  156. # if s does not begin with a valid token.
  157. #
  158. # The first list element is one of
  159. # '{', '}', ':', ',', '[', ']',
  160. # :val, :str, and :space.
  161. #
  162. # The second element is the lexeme.
  163. #
  164. # The third element is the value of the
  165. # token for :val and :str, otherwise
  166. # it is the lexeme.
  167. def tok(s)
  168. case s[0]
  169. when ?{ then ['{', s[0,1], s[0,1]]
  170. when ?} then ['}', s[0,1], s[0,1]]
  171. when ?: then [':', s[0,1], s[0,1]]
  172. when ?, then [',', s[0,1], s[0,1]]
  173. when ?[ then ['[', s[0,1], s[0,1]]
  174. when ?] then [']', s[0,1], s[0,1]]
  175. when ?n then nulltok(s)
  176. when ?t then truetok(s)
  177. when ?f then falsetok(s)
  178. when ?" then strtok(s)
  179. when Spc then [:space, s[0,1], s[0,1]]
  180. when ?\t then [:space, s[0,1], s[0,1]]
  181. when ?\n then [:space, s[0,1], s[0,1]]
  182. when ?\r then [:space, s[0,1], s[0,1]]
  183. else numtok(s)
  184. end
  185. end
  186. def nulltok(s); s[0,4] == 'null' ? [:val, 'null', nil] : [] end
  187. def truetok(s); s[0,4] == 'true' ? [:val, 'true', true] : [] end
  188. def falsetok(s); s[0,5] == 'false' ? [:val, 'false', false] : [] end
  189. def numtok(s)
  190. m = /-?([1-9][0-9]+|[0-9])([.][0-9]+)?([eE][+-]?[0-9]+)?/.match(s)
  191. if m && m.begin(0) == 0
  192. if m[3] && !m[2]
  193. [:val, m[0], Integer(m[1])*(10**Integer(m[3][1..-1]))]
  194. elsif m[2]
  195. [:val, m[0], Float(m[0])]
  196. else
  197. [:val, m[0], Integer(m[0])]
  198. end
  199. else
  200. []
  201. end
  202. end
  203. def strtok(s)
  204. m = /"([^"\\]|\\["\/\\bfnrt]|\\u[0-9a-fA-F]{4})*"/.match(s)
  205. if ! m
  206. raise Error, "invalid string literal at #{abbrev(s)}"
  207. end
  208. [:str, m[0], unquote(m[0])]
  209. end
  210. def abbrev(s)
  211. t = s[0,10]
  212. p = t['`']
  213. t = t[0,p] if p
  214. t = t + '...' if t.length < s.length
  215. '`' + t + '`'
  216. end
  217. # Converts a quoted json string literal q into a UTF-8-encoded string.
  218. # The rules are different than for Ruby, so we cannot use eval.
  219. # Unquote will raise an error if q contains control characters.
  220. def unquote(q)
  221. q = q[1...-1]
  222. a = q.dup # allocate a big enough string
  223. rubydoesenc = false
  224. # In ruby >= 1.9, a[w] is a codepoint, not a byte.
  225. if a.class.method_defined?(:force_encoding)
  226. a.force_encoding('UTF-8')
  227. rubydoesenc = true
  228. end
  229. r, w = 0, 0
  230. while r < q.length
  231. c = q[r]
  232. case true
  233. when c == ?\\
  234. r += 1
  235. if r >= q.length
  236. raise Error, "string literal ends with a \"\\\": \"#{q}\""
  237. end
  238. case q[r]
  239. when ?",?\\,?/,?'
  240. a[w] = q[r]
  241. r += 1
  242. w += 1
  243. when ?b,?f,?n,?r,?t
  244. a[w] = Unesc[q[r]]
  245. r += 1
  246. w += 1
  247. when ?u
  248. r += 1
  249. uchar = begin
  250. hexdec4(q[r,4])
  251. rescue RuntimeError => e
  252. raise Error, "invalid escape sequence \\u#{q[r,4]}: #{e}"
  253. end
  254. r += 4
  255. if surrogate? uchar
  256. if q.length >= r+6
  257. uchar1 = hexdec4(q[r+2,4])
  258. uchar = subst(uchar, uchar1)
  259. if uchar != Ucharerr
  260. # A valid pair; consume.
  261. r += 6
  262. end
  263. end
  264. end
  265. if rubydoesenc
  266. a[w] = '' << uchar
  267. w += 1
  268. else
  269. w += ucharenc(a, w, uchar)
  270. end
  271. else
  272. raise Error, "invalid escape char #{q[r]} in \"#{q}\""
  273. end
  274. when c == ?", c < Spc
  275. raise Error, "invalid character in string literal \"#{q}\""
  276. else
  277. # Copy anything else byte-for-byte.
  278. # Valid UTF-8 will remain valid UTF-8.
  279. # Invalid UTF-8 will remain invalid UTF-8.
  280. # In ruby >= 1.9, c is a codepoint, not a byte,
  281. # in which case this is still what we want.
  282. a[w] = c
  283. r += 1
  284. w += 1
  285. end
  286. end
  287. a[0,w]
  288. end
  289. # Encodes unicode character u as UTF-8
  290. # bytes in string a at position i.
  291. # Returns the number of bytes written.
  292. def ucharenc(a, i, u)
  293. case true
  294. when u <= Uchar1max
  295. a[i] = (u & 0xff).chr
  296. 1
  297. when u <= Uchar2max
  298. a[i+0] = (Utag2 | ((u>>6)&0xff)).chr
  299. a[i+1] = (Utagx | (u&Umaskx)).chr
  300. 2
  301. when u <= Uchar3max
  302. a[i+0] = (Utag3 | ((u>>12)&0xff)).chr
  303. a[i+1] = (Utagx | ((u>>6)&Umaskx)).chr
  304. a[i+2] = (Utagx | (u&Umaskx)).chr
  305. 3
  306. else
  307. a[i+0] = (Utag4 | ((u>>18)&0xff)).chr
  308. a[i+1] = (Utagx | ((u>>12)&Umaskx)).chr
  309. a[i+2] = (Utagx | ((u>>6)&Umaskx)).chr
  310. a[i+3] = (Utagx | (u&Umaskx)).chr
  311. 4
  312. end
  313. end
  314. def hexdec4(s)
  315. if s.length != 4
  316. raise Error, 'short'
  317. end
  318. (nibble(s[0])<<12) | (nibble(s[1])<<8) | (nibble(s[2])<<4) | nibble(s[3])
  319. end
  320. def subst(u1, u2)
  321. if Usurr1 <= u1 && u1 < Usurr2 && Usurr2 <= u2 && u2 < Usurr3
  322. return ((u1-Usurr1)<<10) | (u2-Usurr2) + Usurrself
  323. end
  324. return Ucharerr
  325. end
  326. def surrogate?(u)
  327. Usurr1 <= u && u < Usurr3
  328. end
  329. def nibble(c)
  330. case true
  331. when ?0 <= c && c <= ?9 then c.ord - ?0.ord
  332. when ?a <= c && c <= ?z then c.ord - ?a.ord + 10
  333. when ?A <= c && c <= ?Z then c.ord - ?A.ord + 10
  334. else
  335. raise Error, "invalid hex code #{c}"
  336. end
  337. end
  338. # Encodes x into a json text. It may contain only
  339. # Array, Hash, String, Numeric, true, false, nil.
  340. # (Note, this list excludes Symbol.)
  341. # X itself must be an Array or a Hash.
  342. # No other value can be encoded, and an error will
  343. # be raised if x contains any other value, such as
  344. # Nan, Infinity, Symbol, and Proc, or if a Hash key
  345. # is not a String.
  346. # Strings contained in x must be valid UTF-8.
  347. def encode(x)
  348. case x
  349. when Hash then objenc(x)
  350. when Array then arrenc(x)
  351. else
  352. raise Error, 'root value must be an Array or a Hash'
  353. end
  354. end
  355. def valenc(x)
  356. case x
  357. when Hash then objenc(x)
  358. when Array then arrenc(x)
  359. when String then strenc(x)
  360. when Numeric then numenc(x)
  361. when true then "true"
  362. when false then "false"
  363. when nil then "null"
  364. else
  365. if x.respond_to?(:to_json)
  366. x.to_json
  367. else
  368. raise Error, "cannot encode #{x.class}: #{x.inspect}"
  369. end
  370. end
  371. end
  372. def objenc(x)
  373. '{' + x.map{|k,v| keyenc(k) + ':' + valenc(v)}.join(',') + '}'
  374. end
  375. def arrenc(a)
  376. '[' + a.map{|x| valenc(x)}.join(',') + ']'
  377. end
  378. def keyenc(k)
  379. case k
  380. when String then strenc(k)
  381. else
  382. raise Error, "Hash key is not a string: #{k.inspect}"
  383. end
  384. end
  385. def strenc(s)
  386. t = StringIO.new
  387. t.putc(?")
  388. r = 0
  389. # In ruby >= 1.9, s[r] is a codepoint, not a byte.
  390. rubydoesenc = s.class.method_defined?(:encoding)
  391. while r < s.length
  392. case s[r]
  393. when ?" then t.print('\\"')
  394. when ?\\ then t.print('\\\\')
  395. when ?\b then t.print('\\b')
  396. when ?\f then t.print('\\f')
  397. when ?\n then t.print('\\n')
  398. when ?\r then t.print('\\r')
  399. when ?\t then t.print('\\t')
  400. else
  401. c = s[r]
  402. case true
  403. when rubydoesenc
  404. begin
  405. c.ord # will raise an error if c is invalid UTF-8
  406. t.write(c)
  407. rescue
  408. t.write(Ustrerr)
  409. end
  410. when Spc <= c && c <= ?~
  411. t.putc(c)
  412. else
  413. n = ucharcopy(t, s, r) # ensure valid UTF-8 output
  414. r += n - 1 # r is incremented below
  415. end
  416. end
  417. r += 1
  418. end
  419. t.putc(?")
  420. t.string
  421. end
  422. def numenc(x)
  423. if ((x.nan? || x.infinite?) rescue false)
  424. raise Error, "Numeric cannot be represented: #{x}"
  425. end
  426. "#{x}"
  427. end
  428. # Copies the valid UTF-8 bytes of a single character
  429. # from string s at position i to I/O object t, and
  430. # returns the number of bytes copied.
  431. # If no valid UTF-8 char exists at position i,
  432. # ucharcopy writes Ustrerr and returns 1.
  433. def ucharcopy(t, s, i)
  434. n = s.length - i
  435. raise Utf8Error if n < 1
  436. c0 = s[i].ord
  437. # 1-byte, 7-bit sequence?
  438. if c0 < Utagx
  439. t.putc(c0)
  440. return 1
  441. end
  442. raise Utf8Error if c0 < Utag2 # unexpected continuation byte?
  443. raise Utf8Error if n < 2 # need continuation byte
  444. c1 = s[i+1].ord
  445. raise Utf8Error if c1 < Utagx || Utag2 <= c1
  446. # 2-byte, 11-bit sequence?
  447. if c0 < Utag3
  448. raise Utf8Error if ((c0&Umask2)<<6 | (c1&Umaskx)) <= Uchar1max
  449. t.putc(c0)
  450. t.putc(c1)
  451. return 2
  452. end
  453. # need second continuation byte
  454. raise Utf8Error if n < 3
  455. c2 = s[i+2].ord
  456. raise Utf8Error if c2 < Utagx || Utag2 <= c2
  457. # 3-byte, 16-bit sequence?
  458. if c0 < Utag4
  459. u = (c0&Umask3)<<12 | (c1&Umaskx)<<6 | (c2&Umaskx)
  460. raise Utf8Error if u <= Uchar2max
  461. t.putc(c0)
  462. t.putc(c1)
  463. t.putc(c2)
  464. return 3
  465. end
  466. # need third continuation byte
  467. raise Utf8Error if n < 4
  468. c3 = s[i+3].ord
  469. raise Utf8Error if c3 < Utagx || Utag2 <= c3
  470. # 4-byte, 21-bit sequence?
  471. if c0 < Utag5
  472. u = (c0&Umask4)<<18 | (c1&Umaskx)<<12 | (c2&Umaskx)<<6 | (c3&Umaskx)
  473. raise Utf8Error if u <= Uchar3max
  474. t.putc(c0)
  475. t.putc(c1)
  476. t.putc(c2)
  477. t.putc(c3)
  478. return 4
  479. end
  480. raise Utf8Error
  481. rescue Utf8Error
  482. t.write(Ustrerr)
  483. return 1
  484. end
  485. class Utf8Error < ::StandardError
  486. end
  487. class Error < ::StandardError
  488. end
  489. Utagx = 0x80 # 1000 0000
  490. Utag2 = 0xc0 # 1100 0000
  491. Utag3 = 0xe0 # 1110 0000
  492. Utag4 = 0xf0 # 1111 0000
  493. Utag5 = 0xF8 # 1111 1000
  494. Umaskx = 0x3f # 0011 1111
  495. Umask2 = 0x1f # 0001 1111
  496. Umask3 = 0x0f # 0000 1111
  497. Umask4 = 0x07 # 0000 0111
  498. Uchar1max = (1<<7) - 1
  499. Uchar2max = (1<<11) - 1
  500. Uchar3max = (1<<16) - 1
  501. Ucharerr = 0xFFFD # unicode "replacement char"
  502. Ustrerr = "\xef\xbf\xbd" # unicode "replacement char"
  503. Usurrself = 0x10000
  504. Usurr1 = 0xd800
  505. Usurr2 = 0xdc00
  506. Usurr3 = 0xe000
  507. Spc = ' '[0]
  508. Unesc = {?b=>?\b, ?f=>?\f, ?n=>?\n, ?r=>?\r, ?t=>?\t}
  509. end
  510. end