PageRenderTime 27ms CodeModel.GetById 33ms RepoModel.GetById 0ms app.codeStats 0ms

/NekoKun.Serialization.RubyMarshal/RubyMarshalReader.cs

https://bitbucket.org/nekokun/nekokun
C# | 706 lines | 541 code | 28 blank | 137 comment | 81 complexity | 0fc38155fb68b672975b5c8020800c04 MD5 | raw file
Possible License(s): MIT, CC-BY-SA-3.0
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Text;
  4. using System.IO;
  5. using System.Text.RegularExpressions;
  6. namespace NekoKun.Serialization.RubyMarshal
  7. {
  8. class RubyMarshalReader
  9. {
  10. private Stream m_stream;
  11. private BinaryReader m_reader;
  12. private Dictionary<int, object> m_objects;
  13. private Dictionary<int, RubySymbol> m_symbols;
  14. private Dictionary<object, object> m_compat_tbl;
  15. private Converter<object, object> m_proc;
  16. public RubyMarshalReader(Stream input)
  17. {
  18. if (input == null)
  19. {
  20. throw new ArgumentNullException("instance of IO needed");
  21. }
  22. if (!input.CanRead)
  23. {
  24. throw new ArgumentException("instance of IO needed");
  25. }
  26. this.m_stream = input;
  27. this.m_objects = new Dictionary<int, object>();
  28. this.m_symbols = new Dictionary<int, RubySymbol>();
  29. this.m_proc = null;
  30. this.m_compat_tbl = new Dictionary<object, object>();
  31. this.m_reader = new BinaryReader(m_stream, Encoding.ASCII);
  32. }
  33. public object Load()
  34. {
  35. int major = ReadByte();
  36. int minor = ReadByte();
  37. if (major != RubyMarshal.MarshalMajor || minor > RubyMarshal.MarshalMinor)
  38. {
  39. throw new InvalidDataException(string.Format("incompatible marshal file format (can't be read)\n\tformat version {0}.{1} required; {2}.{3} given", RubyMarshal.MarshalMajor, RubyMarshal.MarshalMinor, major, minor));
  40. }
  41. return ReadObject();
  42. }
  43. /// <summary>
  44. /// static int r_byte(struct load_arg *arg)
  45. /// </summary>
  46. /// <returns></returns>
  47. public int ReadByte()
  48. {
  49. return this.m_stream.ReadByte();
  50. }
  51. /// <summary>
  52. /// static long r_long(struct load_arg *arg)
  53. /// </summary>
  54. /// <returns></returns>
  55. public int ReadLong()
  56. {
  57. sbyte num = m_reader.ReadSByte();
  58. if (num <= -5)
  59. return num + 5;
  60. if (num < 0)
  61. {
  62. int output = 0;
  63. for (int i = 0; i < -num; i++)
  64. {
  65. output += (0xff - m_reader.ReadByte()) << (8 * i);
  66. }
  67. return (-output - 1);
  68. }
  69. if (num == 0)
  70. return 0;
  71. if (num <= 4)
  72. {
  73. int output = 0;
  74. for (int i = 0; i < num; i++)
  75. {
  76. output += m_reader.ReadByte() << (8 * i);
  77. }
  78. return output;
  79. }
  80. return (num - 5);
  81. }
  82. /// <summary>
  83. /// static VALUE r_bytes0(long len, struct load_arg *arg)
  84. /// </summary>
  85. /// <param name="len"></param>
  86. /// <returns></returns>
  87. public byte[] ReadBytes0(int len)
  88. {
  89. return this.m_reader.ReadBytes(len);
  90. }
  91. /// <summary>
  92. /// #define r_bytes(arg) r_bytes0(r_long(arg), (arg))
  93. /// </summary>
  94. /// <returns></returns>
  95. public byte[] ReadBytes()
  96. {
  97. return ReadBytes0(ReadLong());
  98. }
  99. /// <summary>
  100. /// static ID r_symlink(struct load_arg *arg)
  101. /// </summary>
  102. /// <returns></returns>
  103. public RubySymbol ReadSymbolLink()
  104. {
  105. int num = ReadLong();
  106. if (num >= this.m_symbols.Count)
  107. throw new InvalidDataException("bad symbol");
  108. return this.m_symbols[num];
  109. }
  110. /// <summary>
  111. /// static ID r_symreal(struct load_arg *arg, int ivar)
  112. /// </summary>
  113. /// <param name="ivar"></param>
  114. /// <returns></returns>
  115. public RubySymbol ReadSymbolReal(bool ivar)
  116. {
  117. byte[] s = ReadBytes();
  118. int n = m_symbols.Count;
  119. RubySymbol id;
  120. Encoding idx = Encoding.UTF8;
  121. m_symbols.Add(n, null);
  122. if (ivar)
  123. {
  124. int num = ReadLong();
  125. while (num-- > 0)
  126. {
  127. id = ReadSymbol();
  128. idx = GetEncoding(id, ReadObject());
  129. }
  130. }
  131. RubyString str = new RubyString(s, idx);
  132. id = RubySymbol.GetSymbol(str);
  133. m_symbols[n] = id;
  134. return id;
  135. }
  136. /// <summary>
  137. /// static int id2encidx(ID id, VALUE val)
  138. /// </summary>
  139. /// <param name="id"></param>
  140. /// <param name="val"></param>
  141. /// <returns></returns>
  142. public Encoding GetEncoding(RubySymbol id, object val)
  143. {
  144. if (id == RubyMarshal.IDs.encoding)
  145. {
  146. return Encoding.GetEncoding(((RubyString)val).Text);
  147. }
  148. else if (id == RubyMarshal.IDs.E)
  149. {
  150. if ((val is bool) && ((bool)val == false))
  151. return Encoding.Default;
  152. if ((val is bool) && ((bool)val == true))
  153. return Encoding.UTF8;
  154. }
  155. return null;
  156. }
  157. /// <summary>
  158. /// static ID r_symbol(struct load_arg *arg)
  159. /// </summary>
  160. /// <returns></returns>
  161. public RubySymbol ReadSymbol()
  162. {
  163. int type;
  164. bool ivar = false;
  165. again:
  166. switch (type = ReadByte())
  167. {
  168. case RubyMarshal.Types.InstanceVariable:
  169. ivar = true;
  170. goto again;
  171. case RubyMarshal.Types.Symbol:
  172. return ReadSymbolReal(ivar);
  173. case RubyMarshal.Types.SymbolLink:
  174. if (ivar)
  175. throw new InvalidDataException("dump format error (symlink with encoding)");
  176. return ReadSymbolLink();
  177. default:
  178. throw new InvalidDataException(String.Format("dump format error for symbol(0x{0:X2})", type));
  179. }
  180. }
  181. /// <summary>
  182. /// static VALUE r_unique(struct load_arg *arg)
  183. /// </summary>
  184. /// <returns></returns>
  185. public RubySymbol ReadUnique()
  186. {
  187. return ReadSymbol();
  188. }
  189. /// <summary>
  190. /// static VALUE r_string(struct load_arg *arg)
  191. /// </summary>
  192. /// <returns></returns>
  193. public RubyString ReadString()
  194. {
  195. byte[] raw = ReadBytes();
  196. RubyString v = new RubyString(raw);
  197. var decoder = Encoding.UTF8.GetDecoder();
  198. decoder.Fallback = System.Text.DecoderFallback.ExceptionFallback;
  199. try
  200. {
  201. decoder.GetCharCount(raw, 0, raw.Length);
  202. v.Encoding = Encoding.UTF8;
  203. }
  204. catch
  205. {
  206. v.Encoding = Encoding.Default;
  207. }
  208. return v;
  209. }
  210. /// <summary>
  211. /// static st_index_t r_prepare(struct load_arg *arg)
  212. /// </summary>
  213. /// <returns></returns>
  214. public int Prepare()
  215. {
  216. int idx = this.m_objects.Count;
  217. this.m_objects.Add(idx, null);
  218. return idx;
  219. }
  220. /// <summary>
  221. /// static VALUE r_entry0(VALUE v, st_index_t num, struct load_arg *arg)
  222. /// </summary>
  223. /// <param name="v"></param>
  224. /// <param name="num"></param>
  225. /// <returns></returns>
  226. public object Entry0(object v, int num)
  227. {
  228. object real_obj = null;
  229. if (this.m_compat_tbl.TryGetValue(v, out real_obj))
  230. {
  231. if (this.m_objects.ContainsKey(num))
  232. this.m_objects[num] = real_obj;
  233. else
  234. this.m_objects.Add(num, real_obj);
  235. }
  236. else
  237. {
  238. if (this.m_objects.ContainsKey(num))
  239. this.m_objects[num] = v;
  240. else
  241. this.m_objects.Add(num, v);
  242. }
  243. return v;
  244. }
  245. /// <summary>
  246. /// #define r_entry(v, arg) r_entry0((v), (arg)->data->num_entries, (arg))
  247. /// </summary>
  248. /// <param name="v"></param>
  249. /// <returns></returns>
  250. public object Entry(object v)
  251. {
  252. return Entry0(v, m_objects.Count);
  253. }
  254. /// <summary>
  255. /// static VALUE r_leave(VALUE v, struct load_arg *arg)
  256. /// </summary>
  257. /// <param name="v"></param>
  258. /// <returns></returns>
  259. public object Leave(object v)
  260. {
  261. object data;
  262. if (this.m_compat_tbl.TryGetValue(v, out data))
  263. {
  264. object real_obj = data;
  265. object key = v;
  266. // TODO: ?? MarshalCompat
  267. // if (st_lookup(compat_allocator_tbl, (st_data_t)allocator, &data)) {
  268. // marshal_compat_t *compat = (marshal_compat_t*)data;
  269. // compat->loader(real_obj, v);
  270. // }
  271. this.m_compat_tbl.Remove(key);
  272. v = real_obj;
  273. }
  274. if (this.m_proc != null)
  275. {
  276. v = this.m_proc(v);
  277. }
  278. return v;
  279. }
  280. /// <summary>
  281. /// static void r_ivar(VALUE obj, int *has_encoding, struct load_arg *arg)
  282. /// </summary>
  283. /// <param name="obj"></param>
  284. /// <param name="has_encoding"></param>
  285. public void ReadInstanceVariable(object obj, ref bool has_encoding)
  286. {
  287. int len = ReadLong();
  288. RubyObject fobj = obj as RubyObject;
  289. if (len > 0)
  290. {
  291. do
  292. {
  293. RubySymbol id = ReadSymbol();
  294. object val = ReadObject();
  295. Encoding idx = GetEncoding(id, val);
  296. if (idx != null)
  297. {
  298. if (fobj != null)
  299. fobj.Encoding = idx;
  300. has_encoding = true;
  301. }
  302. else
  303. {
  304. if (fobj != null)
  305. fobj.InstanceVariable[id] = val;
  306. }
  307. } while (--len > 0);
  308. }
  309. }
  310. public void ReadInstanceVariable(object obj)
  311. {
  312. bool e = false;
  313. ReadInstanceVariable(obj, ref e);
  314. }
  315. /// <summary>
  316. /// static VALUE append_extmod(VALUE obj, VALUE extmod)
  317. /// </summary>
  318. /// <param name="obj"></param>
  319. /// <param name="extmod"></param>
  320. /// <returns></returns>
  321. public object AppendExtendedModule(object obj, List<RubyModule> extmod)
  322. {
  323. RubyObject fobj = obj as RubyObject;
  324. if (fobj != null)
  325. fobj.ExtendModules.AddRange(extmod);
  326. return obj;
  327. }
  328. /// <summary>
  329. /// static VALUE r_object(struct load_arg *arg)
  330. /// </summary>
  331. /// <returns></returns>
  332. public object ReadObject()
  333. {
  334. bool ivp = false;
  335. return ReadObject0(false, ref ivp, null);
  336. }
  337. public object ReadObject0(ref bool ivp, List<RubyModule> extmod)
  338. {
  339. return ReadObject0(true, ref ivp, extmod);
  340. }
  341. public object ReadObject0(List<RubyModule> extmod)
  342. {
  343. bool ivp = false;
  344. return ReadObject0(false, ref ivp, extmod);
  345. }
  346. /// <summary>
  347. /// static VALUE r_object0(struct load_arg *arg, int *ivp, VALUE extmod)
  348. /// </summary>
  349. /// <param name="hasivp"></param>
  350. /// <param name="ivp"></param>
  351. /// <param name="extmod"></param>
  352. /// <returns></returns>
  353. public object ReadObject0(bool hasivp, ref bool ivp, List<RubyModule> extmod)
  354. {
  355. object v = null;
  356. int type = ReadByte();
  357. int id;
  358. object link;
  359. switch (type)
  360. {
  361. case RubyMarshal.Types.Link:
  362. id = ReadLong();
  363. if (!this.m_objects.TryGetValue(id, out link))
  364. {
  365. throw new InvalidDataException("dump format error (unlinked)");
  366. }
  367. v = link;
  368. if (this.m_proc != null)
  369. v = this.m_proc(v);
  370. break;
  371. case RubyMarshal.Types.InstanceVariable:
  372. {
  373. bool ivar = true;
  374. v = ReadObject0(ref ivar, extmod);
  375. bool hasenc = false;
  376. if (ivar) ReadInstanceVariable(v, ref hasenc);
  377. }
  378. break;
  379. case RubyMarshal.Types.Extended:
  380. {
  381. RubyModule m = RubyModule.GetModule(ReadUnique());
  382. if (extmod == null)
  383. extmod = new List<RubyModule>();
  384. extmod.Add(m);
  385. v = ReadObject0(extmod);
  386. RubyObject fobj = v as RubyObject;
  387. if (fobj != null)
  388. {
  389. fobj.ExtendModules.AddRange(extmod);
  390. }
  391. }
  392. break;
  393. case RubyMarshal.Types.UserClass:
  394. {
  395. RubyClass c = RubyClass.GetClass(ReadUnique());
  396. v = ReadObject0(extmod);
  397. if (v is RubyObject)
  398. (v as RubyObject).ClassName = c.Symbol;
  399. }
  400. break;
  401. case RubyMarshal.Types.Nil:
  402. v = RubyNil.Instance;
  403. v = Leave(v);
  404. break;
  405. case RubyMarshal.Types.True:
  406. v = true;
  407. v = Leave(v);
  408. break;
  409. case RubyMarshal.Types.False:
  410. v = false;
  411. v = Leave(v);
  412. break;
  413. case RubyMarshal.Types.Fixnum:
  414. v = ReadLong();
  415. v = Leave(v);
  416. break;
  417. case RubyMarshal.Types.Float:
  418. {
  419. double d;
  420. RubyString fstr = ReadString();
  421. string str = fstr.Text;
  422. if (str == "inf")
  423. d = double.PositiveInfinity;
  424. else if (str == "-inf")
  425. d = double.NegativeInfinity;
  426. else if (str == "nan")
  427. d = double.NaN;
  428. else
  429. {
  430. if (str.Contains("\0"))
  431. {
  432. str = str.Remove(str.IndexOf("\0"));
  433. }
  434. d = Convert.ToDouble(str);
  435. }
  436. v = new RubyFloat(d);
  437. v = Entry(v);
  438. v = Leave(v);
  439. }
  440. break;
  441. case RubyMarshal.Types.Bignum:
  442. {
  443. int sign = 0;
  444. switch (ReadByte())
  445. {
  446. case 0x2b:
  447. sign = 1;
  448. break;
  449. case 0x2d:
  450. sign = -1;
  451. break;
  452. default:
  453. sign = 0;
  454. break;
  455. }
  456. int num3 = ReadLong();
  457. int index = num3 / 2;
  458. int num5 = (num3 + 1) / 2;
  459. uint[] data = new uint[num5];
  460. for (int i = 0; i < index; i++)
  461. {
  462. data[i] = m_reader.ReadUInt32();
  463. }
  464. if (index != num5)
  465. {
  466. data[index] = m_reader.ReadUInt16();
  467. }
  468. v = new RubyBignum(sign, data);
  469. v = Entry(v);
  470. v = Leave(v);
  471. }
  472. break;
  473. case RubyMarshal.Types.String:
  474. v = Entry(ReadString());
  475. v = Leave(v);
  476. break;
  477. case RubyMarshal.Types.Regexp:
  478. {
  479. RubyString str = ReadString();
  480. int options = ReadByte();
  481. bool has_encoding = false;
  482. int idx = Prepare();
  483. if (hasivp)
  484. {
  485. ReadInstanceVariable(str, ref has_encoding);
  486. ivp = false;
  487. }
  488. if (!has_encoding)
  489. {
  490. // TODO: 1.8 compatibility; remove escapes undefined in 1.8
  491. /*
  492. char *ptr = RSTRING_PTR(str), *dst = ptr, *src = ptr;
  493. long len = RSTRING_LEN(str);
  494. long bs = 0;
  495. for (; len-- > 0; *dst++ = *src++) {
  496. switch (*src) {
  497. case '\\': bs++; break;
  498. case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
  499. case 'm': case 'o': case 'p': case 'q': case 'u': case 'y':
  500. case 'E': case 'F': case 'H': case 'I': case 'J': case 'K':
  501. case 'L': case 'N': case 'O': case 'P': case 'Q': case 'R':
  502. case 'S': case 'T': case 'U': case 'V': case 'X': case 'Y':
  503. if (bs & 1) --dst;
  504. default: bs = 0; break;
  505. }
  506. }
  507. rb_str_set_len(str, dst - ptr);
  508. */
  509. }
  510. v = Entry0(new RubyRegexp(str, (RubyRegexpOptions)options), idx);
  511. v = Leave(v);
  512. }
  513. break;
  514. case RubyMarshal.Types.Array:
  515. {
  516. int len = ReadLong();
  517. RubyArray ary = new RubyArray();
  518. v = ary;
  519. v = Entry(v);
  520. while (len-- > 0)
  521. {
  522. ary.Push(ReadObject());
  523. }
  524. v = Leave(v);
  525. }
  526. break;
  527. case RubyMarshal.Types.Hash:
  528. case RubyMarshal.Types.HashWithDefault:
  529. {
  530. int len = ReadLong();
  531. RubyHash hash = new RubyHash();
  532. v = hash;
  533. v = Entry(v);
  534. while (len-- > 0)
  535. {
  536. object key = ReadObject();
  537. object value = ReadObject();
  538. hash.Add(key, value);
  539. }
  540. if (type == RubyMarshal.Types.HashWithDefault)
  541. {
  542. hash.DefaultValue = ReadObject();
  543. }
  544. v = Leave(v);
  545. }
  546. break;
  547. case RubyMarshal.Types.Struct:
  548. {
  549. int idx = Prepare();
  550. RubyStruct obj = new RubyStruct();
  551. RubySymbol klass = ReadUnique();
  552. obj.ClassName = klass;
  553. int len = ReadLong();
  554. v = obj;
  555. v = Entry0(v, idx);
  556. while (len-- > 0)
  557. {
  558. RubySymbol key = ReadSymbol();
  559. object value = ReadObject();
  560. obj.InstanceVariable[key] = value;
  561. }
  562. v = Leave(v);
  563. }
  564. break;
  565. case RubyMarshal.Types.UserDefined:
  566. {
  567. RubySymbol klass = ReadUnique();
  568. RubyString data = ReadString();
  569. if (hasivp)
  570. {
  571. ReadInstanceVariable(data);
  572. ivp = false;
  573. }
  574. RubyUserdefinedDumpObject obj = new RubyUserdefinedDumpObject();
  575. obj.ClassName = klass;
  576. obj.DumpedObject = data.Raw;
  577. v = obj;
  578. v = Entry(v);
  579. v = Leave(v);
  580. }
  581. break;
  582. case RubyMarshal.Types.UserMarshal:
  583. {
  584. RubySymbol klass = ReadUnique();
  585. FuzzyUserdefinedMarshalDumpObject obj = new FuzzyUserdefinedMarshalDumpObject();
  586. v = obj;
  587. if (extmod != null)
  588. AppendExtendedModule(obj, extmod);
  589. v = Entry(v);
  590. object data = ReadObject();
  591. obj.ClassName = klass;
  592. obj.DumpedObject = data;
  593. v = Leave(v);
  594. if (extmod != null)
  595. {
  596. extmod.Clear();
  597. }
  598. }
  599. break;
  600. case RubyMarshal.Types.Object:
  601. {
  602. int idx = Prepare();
  603. RubyObject obj = new RubyObject();
  604. RubySymbol klass = ReadUnique();
  605. obj.ClassName = klass;
  606. v = obj;
  607. v = Entry0(v, idx);
  608. ReadInstanceVariable(v);
  609. v = Leave(v);
  610. }
  611. break;
  612. case RubyMarshal.Types.Class:
  613. {
  614. RubyString str = ReadString();
  615. v = RubyClass.GetClass(RubySymbol.GetSymbol(str));
  616. v = Entry(v);
  617. v = Leave(v);
  618. }
  619. break;
  620. case RubyMarshal.Types.Module:
  621. {
  622. RubyString str = ReadString();
  623. v = RubyModule.GetModule(RubySymbol.GetSymbol(str));
  624. v = Entry(v);
  625. v = Leave(v);
  626. }
  627. break;
  628. case RubyMarshal.Types.Symbol:
  629. if (hasivp)
  630. {
  631. v = ReadSymbolReal(ivp);
  632. ivp = false;
  633. }
  634. else
  635. {
  636. v = ReadSymbolReal(false);
  637. }
  638. v = Leave(v);
  639. break;
  640. case RubyMarshal.Types.SymbolLink:
  641. v = ReadSymbolLink();
  642. break;
  643. case RubyMarshal.Types.Data:
  644. /* TODO: Data Support
  645. {
  646. VALUE klass = path2class(r_unique(arg));
  647. VALUE oldclass = 0;
  648. v = obj_alloc_by_klass(klass, arg, &oldclass);
  649. if (!RB_TYPE_P(v, T_DATA)) {
  650. rb_raise(rb_eArgError, "dump format error");
  651. }
  652. v = r_entry(v, arg);
  653. if (!rb_respond_to(v, s_load_data)) {
  654. rb_raise(rb_eTypeError, "class %s needs to have instance method `_load_data'", rb_class2name(klass));
  655. }
  656. rb_funcall(v, s_load_data, 1, r_object0(arg, 0, extmod));
  657. check_load_arg(arg, s_load_data);
  658. v = r_leave(v, arg);
  659. }
  660. */
  661. case RubyMarshal.Types.ModuleOld:
  662. /*
  663. TODO: ModuleOld Support
  664. {
  665. volatile VALUE str = r_bytes(arg);
  666. v = rb_path_to_class(str);
  667. v = r_entry(v, arg);
  668. v = r_leave(v, arg);
  669. }
  670. */
  671. default:
  672. throw new InvalidDataException(string.Format("dump format error(0x{0:X2})", type));
  673. }
  674. return v;
  675. }
  676. }
  677. }