PageRenderTime 50ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/Python/Product/Analysis/Pickler.cs

https://gitlab.com/SplatoonModdingHub/PTVS
C# | 730 lines | 523 code | 124 blank | 83 comment | 121 complexity | 80919e799575e8082fe7d910f8651f15 MD5 | raw file
  1. // Python Tools for Visual Studio
  2. // Copyright(c) Microsoft Corporation
  3. // All rights reserved.
  4. //
  5. // Licensed under the Apache License, Version 2.0 (the License); you may not use
  6. // this file except in compliance with the License. You may obtain a copy of the
  7. // License at http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // THIS CODE IS PROVIDED ON AN *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS
  10. // OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY
  11. // IMPLIED WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
  12. // MERCHANTABLITY OR NON-INFRINGEMENT.
  13. //
  14. // See the Apache Version 2.0 License for specific language governing
  15. // permissions and limitations under the License.
  16. using System;
  17. using System.Collections;
  18. using System.Collections.Generic;
  19. using System.Diagnostics;
  20. using System.IO;
  21. using System.Linq;
  22. using System.Numerics;
  23. using System.Text;
  24. namespace Microsoft.PythonTools.Intellisense {
  25. /// <summary>
  26. /// Implements a subset of the Python pickling protocol for saving out intellisense databases. Supports dictionary of str, object,
  27. /// object arrays, and list of object as well as primitive types.
  28. /// </summary>
  29. public class Pickler {
  30. private const char LowestPrintableChar = (char)32;
  31. private const char HighestPrintableChar = (char)126;
  32. private const string Newline = "\n";
  33. // max elements that can be set/appended at a time using SETITEMS/APPENDS
  34. private delegate void PickleFunction(Pickler/*!*/ pickler, object value);
  35. private static readonly Dictionary<Type, PickleFunction> _dispatchTable;
  36. private const int _batchSize = 1000;
  37. private FileOutput _file;
  38. private Dictionary<object, int> _privMemo; // internal fast memo which we can use if the user doesn't access memo
  39. static Pickler() {
  40. _dispatchTable = new Dictionary<Type, PickleFunction>();
  41. _dispatchTable[typeof(Dictionary<string, object>)] = SaveDict;
  42. _dispatchTable[typeof(object[])] = SaveTuple;
  43. _dispatchTable[typeof(List<object>)] = SaveList;
  44. }
  45. #region Public API
  46. public Pickler(FileStream output) {
  47. _file = new FileOutput(output);
  48. _privMemo = new Dictionary<object, int>(256, ReferenceEqualityComparer.Instance);
  49. }
  50. public void Dump(object obj) {
  51. WriteProto();
  52. Save(obj);
  53. Write(Opcode.Stop);
  54. }
  55. private void Memoize(object obj) {
  56. if (!_privMemo.ContainsKey(obj)) {
  57. _privMemo[obj] = _privMemo.Count;
  58. }
  59. }
  60. private int MemoizeNew(object obj) {
  61. int res;
  62. Debug.Assert(!_privMemo.ContainsKey(obj));
  63. _privMemo[obj] = res = _privMemo.Count;
  64. return res;
  65. }
  66. private bool MemoContains(object obj) {
  67. return _privMemo.ContainsKey(obj);
  68. }
  69. private bool TryWriteFastGet(object obj) {
  70. int value;
  71. if (_privMemo.TryGetValue(obj, out value)) {
  72. WriteGetOrPut(true, value);
  73. return true;
  74. }
  75. return false;
  76. }
  77. #endregion
  78. #region Save functions
  79. private void Save(object obj) {
  80. PickleFunction pickleFunction;
  81. // several typees are never memoized, check for these first.
  82. if (obj == null) {
  83. SaveNone(this, obj);
  84. } else if (obj is int) {
  85. SaveInteger(this, obj);
  86. } else if (obj is BigInteger) {
  87. SaveLong(this, obj);
  88. } else if (obj is bool) {
  89. SaveBoolean(this, obj);
  90. } else if (obj is double) {
  91. SaveFloat(this, obj);
  92. } else if (!TryWriteFastGet(obj)) {
  93. if (obj is string) {
  94. // strings are common, specialize them.
  95. SaveUnicode(this, obj);
  96. } else {
  97. if (!_dispatchTable.TryGetValue(obj.GetType(), out pickleFunction)) {
  98. throw new InvalidOperationException(String.Format("Unsavable type: {0}", obj.GetType()));
  99. }
  100. pickleFunction(this, obj);
  101. }
  102. }
  103. }
  104. private static void SaveBoolean(Pickler/*!*/ pickler, object obj) {
  105. Debug.Assert(obj.GetType() == typeof(bool), "arg must be bool");
  106. if ((bool)obj) {
  107. pickler.Write(Opcode.NewTrue);
  108. } else {
  109. pickler.Write(Opcode.NewFalse);
  110. }
  111. }
  112. private static void SaveDict(Pickler/*!*/ pickler, object obj) {
  113. Debug.Assert(obj.GetType() == typeof(Dictionary<string, object>), "arg must be dict");
  114. Debug.Assert(!pickler.MemoContains(obj));
  115. int index = pickler.MemoizeNew(obj);
  116. pickler.Write(Opcode.EmptyDict);
  117. pickler.WritePut(index);
  118. pickler.BatchSetItems((Dictionary<string, object>)obj);
  119. }
  120. private static void SaveFloat(Pickler/*!*/ pickler, object obj) {
  121. Debug.Assert(obj.GetType() == typeof(double), "arg must be float");
  122. pickler.Write(Opcode.BinFloat);
  123. pickler.WriteFloat64(obj);
  124. }
  125. private static void SaveInteger(Pickler/*!*/ pickler, object obj) {
  126. Debug.Assert(obj.GetType() == typeof(int), "arg must be int");
  127. if (IsUInt8(obj)) {
  128. pickler.Write(Opcode.BinInt1);
  129. pickler.WriteUInt8(obj);
  130. } else if (IsUInt16(obj)) {
  131. pickler.Write(Opcode.BinInt2);
  132. pickler.WriteUInt16(obj);
  133. } else {
  134. pickler.Write(Opcode.BinInt);
  135. pickler.WriteInt32(obj);
  136. }
  137. }
  138. private static void SaveList(Pickler/*!*/ pickler, object obj) {
  139. Debug.Assert(obj.GetType() == typeof(List<object>), "arg must be list");
  140. Debug.Assert(!pickler.MemoContains(obj));
  141. int index = pickler.MemoizeNew(obj);
  142. pickler.Write(Opcode.EmptyList);
  143. pickler.WritePut(index);
  144. pickler.BatchAppends(((IEnumerable)obj).GetEnumerator());
  145. }
  146. private static readonly BigInteger MaxInt = new BigInteger(Int32.MaxValue);
  147. private static readonly BigInteger MinInt = new BigInteger(Int32.MinValue);
  148. private static void SaveLong(Pickler/*!*/ pickler, object obj) {
  149. Debug.Assert(obj.GetType() == typeof(BigInteger), "arg must be long");
  150. BigInteger bi = (BigInteger)obj;
  151. if (bi.IsZero) {
  152. pickler.Write(Opcode.Long1);
  153. pickler.WriteUInt8(0);
  154. } else if (bi <= MaxInt && bi >= MinInt) {
  155. pickler.Write(Opcode.Long1);
  156. int value = (int)bi;
  157. if (IsInt8(value)) {
  158. pickler.WriteUInt8(1);
  159. pickler._file.Write((char)(byte)value);
  160. } else if (IsInt16(value)) {
  161. pickler.WriteUInt8(2);
  162. pickler.WriteUInt8(value & 0xff);
  163. pickler.WriteUInt8((value >> 8) & 0xff);
  164. } else {
  165. pickler.WriteUInt8(4);
  166. pickler.WriteInt32(value);
  167. }
  168. } else {
  169. byte[] dataBytes = bi.ToByteArray();
  170. if (dataBytes.Length < 256) {
  171. pickler.Write(Opcode.Long1);
  172. pickler.WriteUInt8(dataBytes.Length);
  173. } else {
  174. pickler.Write(Opcode.Long4);
  175. pickler.WriteInt32(dataBytes.Length);
  176. }
  177. foreach (byte b in dataBytes) {
  178. pickler.WriteUInt8(b);
  179. }
  180. }
  181. }
  182. private static void SaveNone(Pickler/*!*/ pickler, object obj) {
  183. Debug.Assert(obj == null, "arg must be None");
  184. pickler.Write(Opcode.NoneValue);
  185. }
  186. private static void SaveTuple(Pickler/*!*/ pickler, object obj) {
  187. Debug.Assert(obj.GetType() == typeof(object[]), "arg must be array");
  188. Debug.Assert(!pickler.MemoContains(obj));
  189. object[] t = (object[])obj;
  190. byte opcode;
  191. bool needMark = false;
  192. int len = t.Length;
  193. if (len == 0) {
  194. opcode = Opcode.EmptyTuple;
  195. } else if (len == 1) {
  196. opcode = Opcode.Tuple1;
  197. } else if (len == 2) {
  198. opcode = Opcode.Tuple2;
  199. } else if (len == 3) {
  200. opcode = Opcode.Tuple3;
  201. } else {
  202. opcode = Opcode.Tuple;
  203. needMark = true;
  204. }
  205. if (needMark) pickler.Write(Opcode.Mark);
  206. var data = t;
  207. for (int i = 0; i < data.Length; i++) {
  208. pickler.Save(data[i]);
  209. }
  210. if (len > 0) {
  211. if (pickler.MemoContains(obj)) {
  212. // recursive tuple
  213. for (int i = 0; i < len; i++) {
  214. pickler.Write(Opcode.Pop);
  215. }
  216. pickler.WriteGet(obj);
  217. return;
  218. }
  219. pickler.Write(opcode);
  220. pickler.Memoize(t);
  221. pickler.WritePut(t);
  222. } else {
  223. pickler.Write(opcode);
  224. }
  225. }
  226. private static void SaveUnicode(Pickler/*!*/ pickler, object obj) {
  227. Debug.Assert(obj.GetType() == typeof(string), "arg must be unicode");
  228. Debug.Assert(!pickler.MemoContains(obj));
  229. var memo = pickler._privMemo[obj] = pickler._privMemo.Count;
  230. pickler.Write(Opcode.BinUnicode);
  231. pickler.WriteUnicodeStringUtf8(obj);
  232. pickler.WriteGetOrPut(false, memo);
  233. }
  234. #endregion
  235. #region Output encoding
  236. /// <summary>
  237. /// Write value in pickle decimalnl_short format.
  238. /// </summary>
  239. private void WriteFloatAsString(object value) {
  240. Debug.Assert(value.GetType() == typeof(double));
  241. Write(((double)value).ToString("R"));
  242. Write(Newline);
  243. }
  244. /// <summary>
  245. /// Write value in pickle float8 format.
  246. /// </summary>
  247. private void WriteFloat64(object value) {
  248. Debug.Assert(value.GetType() == typeof(double));
  249. foreach (var b in BitConverter.GetBytes((double)value).Reverse()) {
  250. Write(b);
  251. }
  252. }
  253. /// <summary>
  254. /// Write value in pickle uint1 format.
  255. /// </summary>
  256. private void WriteUInt8(object value) {
  257. Debug.Assert(IsUInt8(value));
  258. if (value is int) {
  259. Write((char)(int)(value));
  260. } else if (value is BigInteger) {
  261. Write((char)(int)(BigInteger)(value));
  262. } else if (value is byte) {
  263. // TODO: Shouldn't be here
  264. Write((char)(byte)(value));
  265. } else {
  266. throw new InvalidOperationException();
  267. }
  268. }
  269. private void WriteUInt8(int value) {
  270. _file.Write((char)value);
  271. }
  272. /// <summary>
  273. /// Write value in pickle uint2 format.
  274. /// </summary>
  275. private void WriteUInt16(object value) {
  276. Debug.Assert(IsUInt16(value));
  277. int iVal = (int)value;
  278. WriteUInt8(iVal & 0xff);
  279. WriteUInt8((iVal >> 8) & 0xff);
  280. }
  281. /// <summary>
  282. /// Write value in pickle int4 format.
  283. /// </summary>
  284. private void WriteInt32(object value) {
  285. int val = (int)value;
  286. WriteInt32(val);
  287. }
  288. private void WriteInt32(int val) {
  289. _file.Write(val);
  290. }
  291. /// <summary>
  292. /// Write value in pickle decimalnl_short format.
  293. /// </summary>
  294. private void WriteIntAsString(object value) {
  295. Write(value.ToString());
  296. Write(Newline);
  297. }
  298. /// <summary>
  299. /// Write value in pickle decimalnl_short format.
  300. /// </summary>
  301. private void WriteIntAsString(int value) {
  302. Write(value.ToString());
  303. Write(Newline);
  304. }
  305. /// <summary>
  306. /// Write value in pickle decimalnl_long format.
  307. /// </summary>
  308. private void WriteLongAsString(object value) {
  309. Debug.Assert(value.GetType() == typeof(BigInteger));
  310. Write(value.ToString());
  311. Write(Newline);
  312. }
  313. /// <summary>
  314. /// Write value in pickle unicodestring4 format.
  315. /// </summary>
  316. private void WriteUnicodeStringUtf8(object value) {
  317. Debug.Assert(value.GetType() == typeof(string));
  318. string strVal = (string)value;
  319. // if the string contains non-ASCII elements it needs to be re-encoded as UTF8.
  320. for (int i = 0; i < strVal.Length; i++) {
  321. if (strVal[i] >= 128) {
  322. var encodedString = System.Text.Encoding.UTF8.GetBytes((string)value);
  323. WriteInt32(encodedString.Length);
  324. _file.Write(encodedString);
  325. return;
  326. }
  327. }
  328. WriteInt32(strVal.Length);
  329. Write(strVal);
  330. }
  331. #endregion
  332. #region Type checking
  333. /// <summary>
  334. /// Return true if value is appropriate for formatting in pickle uint1 format.
  335. /// </summary>
  336. private static bool IsUInt8(object value) {
  337. if (value is int) {
  338. return IsUInt8((int)value);
  339. }
  340. throw new InvalidOperationException("expected int");
  341. }
  342. private static bool IsUInt8(int value) {
  343. return (value >= 0 && value < 1 << 8);
  344. }
  345. private static bool IsInt8(int value) {
  346. return (value >= SByte.MinValue && value <= SByte.MaxValue);
  347. }
  348. /// <summary>
  349. /// Return true if value is appropriate for formatting in pickle uint2 format.
  350. /// </summary>
  351. private static bool IsUInt16(object value) {
  352. if (value is int) {
  353. return IsUInt16((int)value);
  354. }
  355. throw new InvalidOperationException("expected int");
  356. }
  357. private static bool IsUInt16(int value) {
  358. return (value >= 0 && value < 1 << 16);
  359. }
  360. private static bool IsInt16(int value) {
  361. return (value >= short.MinValue && value <= short.MaxValue);
  362. }
  363. #endregion
  364. #region Output generation helpers
  365. private void Write(string data) {
  366. _file.Write(data);
  367. }
  368. private void Write(char data) {
  369. _file.Write(data);
  370. }
  371. private void Write(byte data) {
  372. _file.Write((char)data);
  373. }
  374. private void WriteGet(object obj) {
  375. Debug.Assert(MemoContains(obj));
  376. WriteGetOrPut(obj, true);
  377. }
  378. private void WriteGetOrPut(object obj, bool isGet) {
  379. Debug.Assert(MemoContains(obj));
  380. WriteGetOrPut(isGet, _privMemo[obj]);
  381. }
  382. private void WriteGetOrPut(bool isGet, object[] tup) {
  383. object index = tup[0];
  384. if (IsUInt8(index)) {
  385. Write(isGet ? Opcode.BinGet : Opcode.BinPut);
  386. WriteUInt8(index);
  387. } else {
  388. Write(isGet ? Opcode.LongBinGet : Opcode.LongBinPut);
  389. WriteInt32(index);
  390. }
  391. }
  392. private void WriteGetOrPut(bool isGet, int index) {
  393. if (index >= 0 && index <= 1 << 8) {
  394. Write(isGet ? Opcode.BinGet : Opcode.BinPut);
  395. WriteUInt8(index);
  396. } else {
  397. Write(isGet ? Opcode.LongBinGet : Opcode.LongBinPut);
  398. WriteInt32(index);
  399. }
  400. }
  401. private void WritePut(object obj) {
  402. WriteGetOrPut(obj, false);
  403. }
  404. private void WritePut(int index) {
  405. WriteGetOrPut(false, index);
  406. }
  407. private void WriteProto() {
  408. Write(Opcode.Proto);
  409. WriteUInt8(2);
  410. }
  411. /// <summary>
  412. /// Emit a series of opcodes that will set append all items indexed by iter
  413. /// to the object at the top of the stack. Use APPENDS if possible, but
  414. /// append no more than BatchSize items at a time.
  415. /// </summary>
  416. private void BatchAppends(IEnumerator enumerator) {
  417. object next;
  418. if (enumerator.MoveNext()) {
  419. next = enumerator.Current;
  420. } else {
  421. return;
  422. }
  423. int batchCompleted = 0;
  424. object current;
  425. // We do a one-item lookahead to avoid emitting an APPENDS for a
  426. // single remaining item.
  427. while (enumerator.MoveNext()) {
  428. current = next;
  429. next = enumerator.Current;
  430. if (batchCompleted == _batchSize) {
  431. Write(Opcode.Appends);
  432. batchCompleted = 0;
  433. }
  434. if (batchCompleted == 0) {
  435. Write(Opcode.Mark);
  436. }
  437. Save(current);
  438. batchCompleted++;
  439. }
  440. if (batchCompleted == _batchSize) {
  441. Write(Opcode.Appends);
  442. batchCompleted = 0;
  443. }
  444. Save(next);
  445. batchCompleted++;
  446. if (batchCompleted > 1) {
  447. Write(Opcode.Appends);
  448. } else {
  449. Write(Opcode.Append);
  450. }
  451. }
  452. /// <summary>
  453. /// Emit a series of opcodes that will set all (key, value) pairs indexed by
  454. /// iter in the object at the top of the stack. Use SETITEMS if possible,
  455. /// but append no more than BatchSize items at a time.
  456. /// </summary>
  457. private void BatchSetItems(Dictionary<string, object> dict) {
  458. KeyValuePair<string, object> kvTuple;
  459. using (var enumerator = dict.GetEnumerator()) {
  460. object nextKey, nextValue;
  461. if (enumerator.MoveNext()) {
  462. kvTuple = enumerator.Current;
  463. nextKey = kvTuple.Key;
  464. nextValue = kvTuple.Value;
  465. } else {
  466. return;
  467. }
  468. int batchCompleted = 0;
  469. object curKey, curValue;
  470. // We do a one-item lookahead to avoid emitting a SETITEMS for a
  471. // single remaining item.
  472. while (enumerator.MoveNext()) {
  473. curKey = nextKey;
  474. curValue = nextValue;
  475. kvTuple = enumerator.Current;
  476. nextKey = kvTuple.Key;
  477. nextValue = kvTuple.Value;
  478. if (batchCompleted == _batchSize) {
  479. Write(Opcode.SetItems);
  480. batchCompleted = 0;
  481. }
  482. if (batchCompleted == 0) {
  483. Write(Opcode.Mark);
  484. }
  485. Save(curKey);
  486. Save(curValue);
  487. batchCompleted++;
  488. }
  489. if (batchCompleted == _batchSize) {
  490. Write(Opcode.SetItems);
  491. batchCompleted = 0;
  492. }
  493. Save(nextKey);
  494. Save(nextValue);
  495. batchCompleted++;
  496. if (batchCompleted > 1) {
  497. Write(Opcode.SetItems);
  498. } else {
  499. Write(Opcode.SetItem);
  500. }
  501. }
  502. }
  503. /// <summary>
  504. /// Emit a series of opcodes that will set all (key, value) pairs indexed by
  505. /// iter in the object at the top of the stack. Use SETITEMS if possible,
  506. /// but append no more than BatchSize items at a time.
  507. /// </summary>
  508. private void BatchSetItems(IEnumerator enumerator) {
  509. object[] kvTuple;
  510. object nextKey, nextValue;
  511. if (enumerator.MoveNext()) {
  512. kvTuple = (object[])enumerator.Current;
  513. nextKey = kvTuple[0];
  514. nextValue = kvTuple[1];
  515. } else {
  516. return;
  517. }
  518. int batchCompleted = 0;
  519. object curKey, curValue;
  520. // We do a one-item lookahead to avoid emitting a SETITEMS for a
  521. // single remaining item.
  522. while (enumerator.MoveNext()) {
  523. curKey = nextKey;
  524. curValue = nextValue;
  525. kvTuple = (object[])enumerator.Current;
  526. nextKey = kvTuple[0];
  527. nextValue = kvTuple[1];
  528. if (batchCompleted == _batchSize) {
  529. Write(Opcode.SetItems);
  530. batchCompleted = 0;
  531. }
  532. if (batchCompleted == 0) {
  533. Write(Opcode.Mark);
  534. }
  535. Save(curKey);
  536. Save(curValue);
  537. batchCompleted++;
  538. }
  539. if (batchCompleted == _batchSize) {
  540. Write(Opcode.SetItems);
  541. batchCompleted = 0;
  542. }
  543. Save(nextKey);
  544. Save(nextValue);
  545. batchCompleted++;
  546. if (batchCompleted > 1) {
  547. Write(Opcode.SetItems);
  548. } else {
  549. Write(Opcode.SetItem);
  550. }
  551. }
  552. #endregion
  553. #region Other private helper methods
  554. private Exception CannotPickle(object obj, string format, params object[] args) {
  555. StringBuilder msgBuilder = new StringBuilder();
  556. msgBuilder.Append("Can't pickle ");
  557. msgBuilder.Append(obj.ToString());
  558. if (format != null) {
  559. msgBuilder.Append(": ");
  560. msgBuilder.Append(String.Format(format, args));
  561. }
  562. throw new InvalidOperationException(msgBuilder.ToString());
  563. }
  564. /// <summary>
  565. /// Interface for "file-like objects" that implement the protocol needed by dump() and friends.
  566. /// This enables the creation of thin wrappers that make fast .NET types and slow Python types look the same.
  567. /// </summary>
  568. internal class FileOutput {
  569. private readonly byte[] int32chars = new byte[4];
  570. private readonly FileStream _writer;
  571. public FileOutput(FileStream writer) {
  572. _writer = writer;
  573. }
  574. public void Write(byte[] data) {
  575. _writer.Write(data, 0, data.Length);
  576. }
  577. public void Write(string data) {
  578. for (int i = 0; i < data.Length; i++) {
  579. _writer.WriteByte((byte)data[i]);
  580. }
  581. }
  582. public virtual void Write(int data) {
  583. int32chars[0] = (byte)((data & 0xff));
  584. int32chars[1] = (byte)((data >> 8) & 0xff);
  585. int32chars[2] = (byte)((data >> 16) & 0xff);
  586. int32chars[3] = (byte)((data >> 24) & 0xff);
  587. _writer.Write(int32chars, 0, 4);
  588. }
  589. public virtual void Write(char data) {
  590. _writer.WriteByte((byte)data);
  591. }
  592. }
  593. #endregion
  594. class ReferenceEqualityComparer : IEqualityComparer<object> {
  595. public static ReferenceEqualityComparer Instance = new ReferenceEqualityComparer();
  596. #region IEqualityComparer<object> Members
  597. public new bool Equals(object x, object y) {
  598. return x == y;
  599. }
  600. public int GetHashCode(object obj) {
  601. return System.Runtime.CompilerServices.RuntimeHelpers.GetHashCode(obj);
  602. }
  603. #endregion
  604. }
  605. }
  606. }