PageRenderTime 59ms CodeModel.GetById 27ms RepoModel.GetById 0ms app.codeStats 0ms

/Python/Product/Analysis/Unpickler.cs

https://gitlab.com/SplatoonModdingHub/PTVS
C# | 599 lines | 479 code | 88 blank | 32 comment | 50 complexity | ac2d69be02f75ada175929d5e7457ded MD5 | raw file
  1. // Python Tools for Visual Studio
  2. // Copyright(c) Microsoft Corporation
  3. // All rights reserved.
  4. //
  5. // Licensed under the Apache License, Version 2.0 (the License); you may not use
  6. // this file except in compliance with the License. You may obtain a copy of the
  7. // License at http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // THIS CODE IS PROVIDED ON AN *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS
  10. // OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY
  11. // IMPLIED WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
  12. // MERCHANTABLITY OR NON-INFRINGEMENT.
  13. //
  14. // See the Apache Version 2.0 License for specific language governing
  15. // permissions and limitations under the License.
  16. using System;
  17. using System.Collections.Generic;
  18. using System.Diagnostics;
  19. using System.IO;
  20. using System.Numerics;
  21. using System.Text;
  22. using Microsoft.PythonTools.Parsing;
  23. namespace Microsoft.PythonTools.Intellisense {
  24. public static class Unpickle {
  25. /// <summary>
  26. /// Unpickles a Python pickle stream but returns Dictionary[object, object] for PythonDictionaries,
  27. /// arrays for tuples, and List[object] for Python lists. Classes are not supported.
  28. /// </summary>
  29. /// <exception cref="System.Text.DecoderFallbackException"></exception>
  30. /// <exception cref="System.ArgumentException"></exception>
  31. /// <exception cref="System.InvalidOperationException"></exception>
  32. public static object Load(Stream file) {
  33. return new UnpicklerObject(file).Load();
  34. }
  35. /// <summary>
  36. /// Interface for "file-like objects" that implement the protocol needed by load() and friends.
  37. /// This enables the creation of thin wrappers that make fast .NET types and slow Python types look the same.
  38. /// </summary>
  39. internal class FileInput {
  40. private readonly Stream _stream;
  41. public FileInput(Stream file) {
  42. _stream = file;
  43. }
  44. public string Read(int size) {
  45. byte[] bytes = new byte[size];
  46. int read = _stream.Read(bytes, 0, size);
  47. if (read != size) {
  48. throw new EndOfStreamException("end of stream while reading");
  49. }
  50. StringBuilder res = new StringBuilder(size);
  51. for (int i = 0; i < bytes.Length; i++) {
  52. res.Append((char)bytes[i]);
  53. }
  54. return res.ToString();
  55. }
  56. public byte[] ReadBytes(int size) {
  57. byte[] bytes = new byte[size];
  58. int read = _stream.Read(bytes, 0, size);
  59. if (read != size) {
  60. throw new EndOfStreamException("end of stream while reading");
  61. }
  62. return bytes;
  63. }
  64. public string ReadLine() {
  65. StringBuilder res = new StringBuilder();
  66. int curByte;
  67. do {
  68. curByte = _stream.ReadByte();
  69. if (curByte == -1) {
  70. break;
  71. }
  72. if (curByte == '\r') {
  73. curByte = _stream.ReadByte();
  74. }
  75. res.Append((char)curByte);
  76. } while (curByte != '\n');
  77. return res.ToString();
  78. }
  79. public string ReadLineNoNewLine() {
  80. var raw = ReadLine();
  81. return raw.Substring(0, raw.Length - 1);
  82. }
  83. public byte ReadChar() {
  84. var res = _stream.ReadByte();
  85. if (res == -1) {
  86. throw new EndOfStreamException("unexpected EOF while unpickling");
  87. }
  88. return (byte)res;
  89. }
  90. public int ReadInt() {
  91. var c = ReadBytes(4);
  92. return (int)c[0] |
  93. ((int)c[1]) << 8 |
  94. ((int)c[2]) << 16 |
  95. ((int)c[3]) << 24;
  96. }
  97. }
  98. #region Opcode constants
  99. #endregion
  100. class UnpicklerObject {
  101. private static readonly object _mark = new object();
  102. private FileInput _file;
  103. private List<object> _stack;
  104. private List<object> _privMemo;
  105. public UnpicklerObject() {
  106. _privMemo = new List<object>(200);
  107. }
  108. public UnpicklerObject(Stream file)
  109. : this() {
  110. _file = new FileInput(file);
  111. }
  112. public object Load() {
  113. _stack = new List<object>(32);
  114. for (; ; ) {
  115. var opcode = _file.ReadChar();
  116. switch (opcode) {
  117. case Opcode.Append: LoadAppend(); break;
  118. case Opcode.Appends: LoadAppends(); break;
  119. case Opcode.BinFloat: LoadBinFloat(); break;
  120. case Opcode.BinGet: LoadBinGet(); break;
  121. case Opcode.BinInt: LoadBinInt(); break;
  122. case Opcode.BinInt1: LoadBinInt1(); break;
  123. case Opcode.BinInt2: LoadBinInt2(); break;
  124. case Opcode.BinPut: LoadBinPut(); break;
  125. case Opcode.BinString: LoadBinString(); break;
  126. case Opcode.BinUnicode: LoadBinUnicode(); break;
  127. case Opcode.Dict: LoadDict(); break;
  128. case Opcode.Dup: LoadDup(); break;
  129. case Opcode.EmptyDict: LoadEmptyDict(); break;
  130. case Opcode.EmptyList: LoadEmptyList(); break;
  131. case Opcode.EmptyTuple: LoadEmptyTuple(); break;
  132. case Opcode.Float: LoadFloat(); break;
  133. case Opcode.Get: LoadGet(); break;
  134. case Opcode.Int: LoadInt(); break;
  135. case Opcode.List: LoadList(); break;
  136. case Opcode.Long: LoadLong(); break;
  137. case Opcode.Long1: LoadLong1(); break;
  138. case Opcode.Long4: LoadLong4(); break;
  139. case Opcode.LongBinGet: LoadLongBinGet(); break;
  140. case Opcode.LongBinPut: LoadLongBinPut(); break;
  141. case Opcode.Mark: LoadMark(); break;
  142. case Opcode.NewFalse: LoadNewFalse(); break;
  143. case Opcode.NewTrue: LoadNewTrue(); break;
  144. case Opcode.NoneValue: LoadNoneValue(); break;
  145. case Opcode.Pop: LoadPop(); break;
  146. case Opcode.PopMark: LoadPopMark(); break;
  147. case Opcode.Proto: LoadProto(); break;
  148. case Opcode.Put: LoadPut(); break;
  149. case Opcode.SetItem: LoadSetItem(); break;
  150. case Opcode.SetItems: LoadSetItems(); break;
  151. case Opcode.ShortBinstring: LoadShortBinstring(); break;
  152. case Opcode.String: LoadString(); break;
  153. case Opcode.Tuple: LoadTuple(); break;
  154. case Opcode.Tuple1: LoadTuple1(); break;
  155. case Opcode.Tuple2: LoadTuple2(); break;
  156. case Opcode.Tuple3: LoadTuple3(); break;
  157. case Opcode.Unicode: LoadUnicode(); break;
  158. case Opcode.Global: LoadGlobal(); break;
  159. case Opcode.Stop: return PopStack();
  160. default: throw new InvalidOperationException(String.Format("invalid opcode: {0}", opcode));
  161. }
  162. }
  163. }
  164. private void LoadGlobal() {
  165. string module = ReadLineNoNewline();
  166. string attr = ReadLineNoNewline();
  167. Debug.Fail(String.Format("unexpected global in pickle stream {0}.{1}", module, attr));
  168. _stack.Add(null); // no support for actually loading the globals...
  169. }
  170. private object PopStack() {
  171. var res = _stack[_stack.Count - 1];
  172. _stack.RemoveAt(_stack.Count - 1);
  173. return res;
  174. }
  175. private object PeekStack() {
  176. return _stack[_stack.Count - 1];
  177. }
  178. public object[] StackGetSliceAsArray(int start) {
  179. object[] res = new object[_stack.Count - start];
  180. for (int i = 0; i < res.Length; i++) {
  181. res[i] = _stack[i + start];
  182. }
  183. return res;
  184. }
  185. private object MemoGet(int key) {
  186. object value;
  187. if (key < _privMemo.Count && (value = _privMemo[key]) != _mark) {
  188. return value;
  189. }
  190. throw new InvalidOperationException(String.Format("memo key {0} not found", key));
  191. }
  192. private void MemoPut(int key, object value) {
  193. while (key >= _privMemo.Count) {
  194. _privMemo.Add(_mark);
  195. }
  196. _privMemo[key] = value;
  197. }
  198. private int GetMarkIndex() {
  199. int i = _stack.Count - 1;
  200. while (i > 0 && _stack[i] != _mark) i -= 1;
  201. if (i == -1) throw new InvalidOperationException("mark not found");
  202. return i;
  203. }
  204. private string Read(int size) {
  205. string res = _file.Read(size);
  206. if (res.Length < size) {
  207. throw new EndOfStreamException("unexpected EOF while unpickling");
  208. }
  209. return res;
  210. }
  211. private string ReadLineNoNewline() {
  212. string raw = _file.ReadLine();
  213. return raw.Substring(0, raw.Length - 1);
  214. }
  215. private object ReadFloatString() {
  216. return Double.Parse(ReadLineNoNewline());
  217. }
  218. private double ReadFloat64() {
  219. var bytes = _file.ReadBytes(8);
  220. return BitConverter.ToDouble(bytes, 0);
  221. }
  222. private object ReadIntFromString() {
  223. string raw = ReadLineNoNewline();
  224. if ("00" == raw) return False;
  225. else if ("01" == raw) return True;
  226. return Int32.Parse(raw);
  227. }
  228. private int ReadInt32() {
  229. return _file.ReadInt();
  230. }
  231. private object ReadLongFromString() {
  232. var i = ReadLineNoNewline();
  233. if (i.EndsWith("L")) {
  234. i = i.Substring(0, i.Length - 1);
  235. }
  236. return BigInteger.Parse(i);
  237. }
  238. private object ReadLong(int size) {
  239. var bytes = _file.ReadBytes(size);
  240. return new BigInteger(bytes);
  241. }
  242. private char ReadUInt8() {
  243. return (char)_file.ReadChar();
  244. }
  245. private ushort ReadUInt16() {
  246. var bytes = _file.ReadBytes(2);
  247. return BitConverter.ToUInt16(bytes, 0);
  248. }
  249. private void PopMark(int markIndex) {
  250. for (int i = _stack.Count - 1; i >= markIndex; i--) {
  251. _stack.RemoveAt(i);
  252. }
  253. }
  254. /// <summary>
  255. /// Interpret everything from markIndex to the top of the stack as a sequence
  256. /// of key, value, key, value, etc. Set dict[key] = value for each. Pop
  257. /// everything from markIndex up when done.
  258. /// </summary>
  259. private void SetItems(Dictionary<string, object> dict, int markIndex) {
  260. for (int i = markIndex + 1; i < _stack.Count; i += 2) {
  261. dict[(string)_stack[i]] = _stack[i + 1];
  262. }
  263. PopMark(markIndex);
  264. }
  265. private void LoadAppend() {
  266. object item = PopStack();
  267. object seq = PeekStack();
  268. if (seq is List<object>) {
  269. ((List<object>)seq).Add(item);
  270. } else {
  271. throw new InvalidOperationException();
  272. }
  273. }
  274. private void LoadAppends() {
  275. int markIndex = GetMarkIndex();
  276. List<object> seq = (List<object>)_stack[markIndex - 1];
  277. for (int i = markIndex + 1; i < _stack.Count; i++) {
  278. seq.Add(_stack[i]);
  279. }
  280. PopMark(markIndex);
  281. }
  282. private void LoadBinFloat() {
  283. _stack.Add(ReadFloat64());
  284. }
  285. private void LoadBinGet() {
  286. _stack.Add(MemoGet(ReadUInt8()));
  287. }
  288. private void LoadBinInt() {
  289. _stack.Add(ReadInt32());
  290. }
  291. private void LoadBinInt1() {
  292. _stack.Add((int)ReadUInt8());
  293. }
  294. private void LoadBinInt2() {
  295. _stack.Add((int)ReadUInt16());
  296. }
  297. private void LoadBinPut() {
  298. MemoPut(ReadUInt8(), PeekStack());
  299. }
  300. private void LoadBinString() {
  301. _stack.Add(Read(ReadInt32()));
  302. }
  303. private void LoadBinUnicode() {
  304. var bytes = _file.ReadBytes(ReadInt32());
  305. _stack.Add(Encoding.UTF8.GetString(bytes));
  306. }
  307. private void LoadDict() {
  308. int markIndex = GetMarkIndex();
  309. Dictionary<string, object> dict = new Dictionary<string, object>((_stack.Count - 1 - markIndex) / 2);
  310. SetItems(dict, markIndex);
  311. _stack.Add(dict);
  312. }
  313. private void LoadDup() {
  314. _stack.Add(PeekStack());
  315. }
  316. private void LoadEmptyDict() {
  317. _stack.Add(new Dictionary<string, object>());
  318. }
  319. private void LoadEmptyList() {
  320. _stack.Add(new List<object>());
  321. }
  322. private void LoadEmptyTuple() {
  323. _stack.Add(new object[0]);
  324. }
  325. private void LoadFloat() {
  326. _stack.Add(ReadFloatString());
  327. }
  328. private void LoadGet() {
  329. _stack.Add(MemoGet((int)ReadIntFromString()));
  330. }
  331. private void LoadInt() {
  332. _stack.Add(ReadIntFromString());
  333. }
  334. private void LoadList() {
  335. int markIndex = GetMarkIndex();
  336. var list = new List<object>(StackGetSliceAsArray(markIndex + 1));
  337. PopMark(markIndex);
  338. _stack.Add(list);
  339. }
  340. private void LoadLong() {
  341. _stack.Add(ReadLongFromString());
  342. }
  343. private void LoadLong1() {
  344. int size = ReadUInt8();
  345. if (size == 4) {
  346. _stack.Add((BigInteger)ReadInt32());
  347. } else {
  348. _stack.Add(ReadLong(size));
  349. }
  350. }
  351. private void LoadLong4() {
  352. _stack.Add(ReadLong(ReadInt32()));
  353. }
  354. private void LoadLongBinGet() {
  355. _stack.Add(MemoGet((int)ReadInt32()));
  356. }
  357. private void LoadLongBinPut() {
  358. MemoPut(ReadInt32(), PeekStack());
  359. }
  360. private void LoadMark() {
  361. _stack.Add(_mark);
  362. }
  363. private static object False = false;
  364. private static object True = true;
  365. private void LoadNewFalse() {
  366. _stack.Add(False);
  367. }
  368. private void LoadNewTrue() {
  369. _stack.Add(True);
  370. }
  371. private void LoadNoneValue() {
  372. _stack.Add(null);
  373. }
  374. private void LoadPop() {
  375. PopStack();
  376. }
  377. private void LoadPopMark() {
  378. PopMark(GetMarkIndex());
  379. }
  380. private void LoadProto() {
  381. int proto = ReadUInt8();
  382. if (proto > 2) throw new ArgumentException(String.Format("unsupported pickle protocol: {0}", proto));
  383. // discard result
  384. }
  385. private void LoadPut() {
  386. MemoPut((int)ReadIntFromString(), PeekStack());
  387. }
  388. private void LoadSetItem() {
  389. object value = PopStack();
  390. object key = PopStack();
  391. Dictionary<string, object> dict = PeekStack() as Dictionary<string, object>;
  392. if (dict == null) {
  393. throw new InvalidOperationException(
  394. String.Format(
  395. "while executing SETITEM, expected dict at stack[-3], but got {0}",
  396. PeekStack()
  397. )
  398. );
  399. }
  400. dict[(string)key] = value;
  401. }
  402. private void LoadSetItems() {
  403. int markIndex = GetMarkIndex();
  404. Dictionary<string, object> dict = _stack[markIndex - 1] as Dictionary<string, object>;
  405. if (dict == null) {
  406. throw new InvalidOperationException(
  407. String.Format(
  408. "while executing SETITEMS, expected dict below last mark, but got {0}",
  409. _stack[markIndex - 1]
  410. )
  411. );
  412. }
  413. SetItems(dict, markIndex);
  414. }
  415. private void LoadShortBinstring() {
  416. _stack.Add(Read(ReadUInt8()));
  417. }
  418. private void LoadString() {
  419. string repr = ReadLineNoNewline();
  420. if (repr.Length < 2 ||
  421. !(
  422. repr[0] == '"' && repr[repr.Length - 1] == '"' ||
  423. repr[0] == '\'' && repr[repr.Length - 1] == '\''
  424. )
  425. ) {
  426. throw new ArgumentException(String.Format("while executing STRING, expected string that starts and ends with quotes {0}", repr));
  427. }
  428. _stack.Add(LiteralParser.ParseString(repr.Substring(1, repr.Length - 2), false, false));
  429. }
  430. private void LoadTuple() {
  431. int markIndex = GetMarkIndex();
  432. var tuple = StackGetSliceAsArray(markIndex + 1);
  433. PopMark(markIndex);
  434. _stack.Add(tuple);
  435. }
  436. private void LoadTuple1() {
  437. object item0 = PopStack();
  438. _stack.Add(new[] { item0 });
  439. }
  440. private void LoadTuple2() {
  441. object item1 = PopStack();
  442. object item0 = PopStack();
  443. _stack.Add(new[] { item0, item1 });
  444. }
  445. private void LoadTuple3() {
  446. object item2 = PopStack();
  447. object item1 = PopStack();
  448. object item0 = PopStack();
  449. _stack.Add(new[] { item0, item1, item2 });
  450. }
  451. private void LoadUnicode() {
  452. _stack.Add(LiteralParser.ParseString(ReadLineNoNewline(), false, true));
  453. }
  454. }
  455. }
  456. internal static class Opcode {
  457. public const byte Append = (byte)'a';
  458. public const byte Appends = (byte)'e';
  459. public const byte BinFloat = (byte)'G';
  460. public const byte BinGet = (byte)'h';
  461. public const byte BinInt = (byte)'J';
  462. public const byte BinInt1 = (byte)'K';
  463. public const byte BinInt2 = (byte)'M';
  464. public const byte BinPersid = (byte)'Q';
  465. public const byte BinPut = (byte)'q';
  466. public const byte BinString = (byte)'T';
  467. public const byte BinUnicode = (byte)'X';
  468. public const byte Build = (byte)'b';
  469. public const byte Dict = (byte)'d';
  470. public const byte Dup = (byte)'2';
  471. public const byte EmptyDict = (byte)'}';
  472. public const byte EmptyList = (byte)']';
  473. public const byte EmptyTuple = (byte)')';
  474. public const byte Ext1 = (byte)'\x82';
  475. public const byte Ext2 = (byte)'\x83';
  476. public const byte Ext4 = (byte)'\x84';
  477. public const byte Float = (byte)'F';
  478. public const byte Get = (byte)'g';
  479. public const byte Global = (byte)'c';
  480. public const byte Inst = (byte)'i';
  481. public const byte Int = (byte)'I';
  482. public const byte List = (byte)'l';
  483. public const byte Long = (byte)'L';
  484. public const byte Long1 = (byte)'\x8a';
  485. public const byte Long4 = (byte)'\x8b';
  486. public const byte LongBinGet = (byte)'j';
  487. public const byte LongBinPut = (byte)'r';
  488. public const byte Mark = (byte)'(';
  489. public const byte NewFalse = (byte)'\x89';
  490. public const byte NewObj = (byte)'\x81';
  491. public const byte NewTrue = (byte)'\x88';
  492. public const byte NoneValue = (byte)'N';
  493. public const byte Obj = (byte)'o';
  494. public const byte PersId = (byte)'P';
  495. public const byte Pop = (byte)'0';
  496. public const byte PopMark = (byte)'1';
  497. public const byte Proto = (byte)'\x80';
  498. public const byte Put = (byte)'p';
  499. public const byte Reduce = (byte)'R';
  500. public const byte SetItem = (byte)'s';
  501. public const byte SetItems = (byte)'u';
  502. public const byte ShortBinstring = (byte)'U';
  503. public const byte Stop = (byte)'.';
  504. public const byte String = (byte)'S';
  505. public const byte Tuple = (byte)'t';
  506. public const byte Tuple1 = (byte)'\x85';
  507. public const byte Tuple2 = (byte)'\x86';
  508. public const byte Tuple3 = (byte)'\x87';
  509. public const byte Unicode = (byte)'V';
  510. }
  511. }