PageRenderTime 52ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 0ms

/Servicing/1.0/Release/Product/Python/Analysis/Parsing/LiteralParser.cs

#
C# | 572 lines | 548 code | 8 blank | 16 comment | 39 complexity | d81ebc61fdccca20744763836fa01aa7 MD5 | raw file
Possible License(s): Apache-2.0, MPL-2.0-no-copyleft-exception, BSD-3-Clause
  1. /* ****************************************************************************
  2. *
  3. * Copyright (c) Microsoft Corporation.
  4. *
  5. * This source code is subject to terms and conditions of the Apache License, Version 2.0. A
  6. * copy of the license can be found in the License.html file at the root of this distribution. If
  7. * you cannot locate the Apache License, Version 2.0, please send an email to
  8. * vspython@microsoft.com. By using this source code in any fashion, you are agreeing to be bound
  9. * by the terms of the Apache License, Version 2.0.
  10. *
  11. * You must not remove this notice, or any other, from this software.
  12. *
  13. * ***************************************************************************/
  14. using System;
  15. using System.Collections.Generic;
  16. using System.Diagnostics;
  17. using System.Globalization;
  18. using System.Numerics;
  19. using System.Text;
  20. namespace Microsoft.PythonTools.Parsing {
  21. /// <summary>
  22. /// Summary description for ConstantValue.
  23. /// </summary>
  24. internal static class LiteralParser {
  25. public static string ParseString(string text, bool isRaw, bool isUni) {
  26. return ParseString(text.ToCharArray(), 0, text.Length, isRaw, isUni, false);
  27. }
  28. public static string ParseString(char[] text, int start, int length, bool isRaw, bool isUni, bool normalizeLineEndings) {
  29. Debug.Assert(text != null);
  30. if (isRaw && !isUni && !normalizeLineEndings) return new String(text, start, length);
  31. StringBuilder buf = null;
  32. int i = start;
  33. int l = start + length;
  34. int val;
  35. while (i < l) {
  36. char ch = text[i++];
  37. if ((!isRaw || isUni) && ch == '\\') {
  38. if (buf == null) {
  39. buf = new StringBuilder(length);
  40. buf.Append(text, start, i - start - 1);
  41. }
  42. if (i >= l) {
  43. if (isRaw) {
  44. buf.Append('\\');
  45. break;
  46. } else {
  47. throw new ArgumentException("Trailing \\ in string");
  48. }
  49. }
  50. ch = text[i++];
  51. if (ch == 'u' || ch == 'U') {
  52. int len = (ch == 'u') ? 4 : 8;
  53. int max = 16;
  54. if (isUni && !isRaw) {
  55. if (TryParseInt(text, i, len, max, out val)) {
  56. buf.Append((char)val);
  57. i += len;
  58. } else {
  59. throw new System.Text.DecoderFallbackException(String.Format(@"'unicodeescape' codec can't decode bytes in position {0}: truncated \uXXXX escape", i));
  60. }
  61. } else {
  62. buf.Append('\\');
  63. buf.Append(ch);
  64. }
  65. } else {
  66. if (isRaw) {
  67. buf.Append('\\');
  68. buf.Append(ch);
  69. continue;
  70. }
  71. switch (ch) {
  72. case 'a': buf.Append('\a'); continue;
  73. case 'b': buf.Append('\b'); continue;
  74. case 'f': buf.Append('\f'); continue;
  75. case 'n': buf.Append('\n'); continue;
  76. case 'r': buf.Append('\r'); continue;
  77. case 't': buf.Append('\t'); continue;
  78. case 'v': buf.Append('\v'); continue;
  79. case '\\': buf.Append('\\'); continue;
  80. case '\'': buf.Append('\''); continue;
  81. case '\"': buf.Append('\"'); continue;
  82. case '\r': if (i < l && text[i] == '\n') i++; continue;
  83. case '\n': continue;
  84. case 'x': //hex
  85. if (!TryParseInt(text, i, 2, 16, out val)) {
  86. goto default;
  87. }
  88. buf.Append((char)val);
  89. i += 2;
  90. continue;
  91. case '0':
  92. case '1':
  93. case '2':
  94. case '3':
  95. case '4':
  96. case '5':
  97. case '6':
  98. case '7': {
  99. int onechar;
  100. val = ch - '0';
  101. if (i < l && HexValue(text[i], out onechar) && onechar < 8) {
  102. val = val * 8 + onechar;
  103. i++;
  104. if (i < l && HexValue(text[i], out onechar) && onechar < 8) {
  105. val = val * 8 + onechar;
  106. i++;
  107. }
  108. }
  109. }
  110. buf.Append((char)val);
  111. continue;
  112. default:
  113. buf.Append("\\");
  114. buf.Append(ch);
  115. continue;
  116. }
  117. }
  118. } else if (ch == '\r' && normalizeLineEndings) {
  119. if (buf == null) {
  120. buf = new StringBuilder(length);
  121. buf.Append(text, start, i - start - 1);
  122. }
  123. // normalize line endings
  124. if (i < text.Length && text[i] == '\n') {
  125. i++;
  126. }
  127. buf.Append('\n');
  128. } else if (buf != null) {
  129. buf.Append(ch);
  130. }
  131. }
  132. if (buf != null) {
  133. return buf.ToString();
  134. }
  135. return new String(text, start, length);
  136. }
  137. internal static List<char> ParseBytes(char[] text, int start, int length, bool isRaw, bool normalizeLineEndings) {
  138. Debug.Assert(text != null);
  139. List<char> buf = new List<char>(length);
  140. int i = start;
  141. int l = start + length;
  142. int val;
  143. while (i < l) {
  144. char ch = text[i++];
  145. if (!isRaw && ch == '\\') {
  146. if (i >= l) {
  147. throw new ArgumentException("Trailing \\ in string");
  148. }
  149. ch = text[i++];
  150. switch (ch) {
  151. case 'a': buf.Add('\a'); continue;
  152. case 'b': buf.Add('\b'); continue;
  153. case 'f': buf.Add('\f'); continue;
  154. case 'n': buf.Add('\n'); continue;
  155. case 'r': buf.Add('\r'); continue;
  156. case 't': buf.Add('\t'); continue;
  157. case 'v': buf.Add('\v'); continue;
  158. case '\\': buf.Add('\\'); continue;
  159. case '\'': buf.Add('\''); continue;
  160. case '\"': buf.Add('\"'); continue;
  161. case '\r': if (i < l && text[i] == '\n') i++; continue;
  162. case '\n': continue;
  163. case 'x': //hex
  164. if (!TryParseInt(text, i, 2, 16, out val)) {
  165. goto default;
  166. }
  167. buf.Add((char)val);
  168. i += 2;
  169. continue;
  170. case '0':
  171. case '1':
  172. case '2':
  173. case '3':
  174. case '4':
  175. case '5':
  176. case '6':
  177. case '7': {
  178. int onechar;
  179. val = ch - '0';
  180. if (i < l && HexValue(text[i], out onechar) && onechar < 8) {
  181. val = val * 8 + onechar;
  182. i++;
  183. if (i < l && HexValue(text[i], out onechar) && onechar < 8) {
  184. val = val * 8 + onechar;
  185. i++;
  186. }
  187. }
  188. }
  189. buf.Add((char)val);
  190. continue;
  191. default:
  192. buf.Add('\\');
  193. buf.Add(ch);
  194. continue;
  195. }
  196. } else if (ch == '\r' && normalizeLineEndings) {
  197. // normalize line endings
  198. if (i < text.Length && text[i] == '\n') {
  199. i++;
  200. }
  201. buf.Add('\n');
  202. } else {
  203. buf.Add(ch);
  204. }
  205. }
  206. return buf;
  207. }
  208. private static bool HexValue(char ch, out int value) {
  209. switch (ch) {
  210. case '0':
  211. case '\x660': value = 0; break;
  212. case '1':
  213. case '\x661': value = 1; break;
  214. case '2':
  215. case '\x662': value = 2; break;
  216. case '3':
  217. case '\x663': value = 3; break;
  218. case '4':
  219. case '\x664': value = 4; break;
  220. case '5':
  221. case '\x665': value = 5; break;
  222. case '6':
  223. case '\x666': value = 6; break;
  224. case '7':
  225. case '\x667': value = 7; break;
  226. case '8':
  227. case '\x668': value = 8; break;
  228. case '9':
  229. case '\x669': value = 9; break;
  230. default:
  231. if (ch >= 'a' && ch <= 'z') {
  232. value = ch - 'a' + 10;
  233. } else if (ch >= 'A' && ch <= 'Z') {
  234. value = ch - 'A' + 10;
  235. } else {
  236. value = -1;
  237. return false;
  238. }
  239. break;
  240. }
  241. return true;
  242. }
  243. private static int HexValue(char ch) {
  244. int value;
  245. if (!HexValue(ch, out value)) {
  246. throw new ArgumentException("bad char for integer value: " + ch);
  247. }
  248. return value;
  249. }
  250. private static int CharValue(char ch, int b) {
  251. int val = HexValue(ch);
  252. if (val >= b) {
  253. throw new ArgumentException(String.Format("bad char for the integer value: '{0}' (base {1})", ch, b));
  254. }
  255. return val;
  256. }
  257. private static bool ParseInt(string text, int b, out int ret) {
  258. ret = 0;
  259. long m = 1;
  260. for (int i = text.Length - 1; i >= 0; i--) {
  261. // avoid the exception here. Not only is throwing it expensive,
  262. // but loading the resources for it is also expensive
  263. long lret = (long)ret + m * CharValue(text[i], b);
  264. if (Int32.MinValue <= lret && lret <= Int32.MaxValue) {
  265. ret = (int)lret;
  266. } else {
  267. return false;
  268. }
  269. m *= b;
  270. if (Int32.MinValue > m || m > Int32.MaxValue) {
  271. return false;
  272. }
  273. }
  274. return true;
  275. }
  276. private static bool TryParseInt(char[] text, int start, int length, int b, out int value) {
  277. value = 0;
  278. if (start + length > text.Length) {
  279. return false;
  280. }
  281. for (int i = start, end = start + length; i < end; i++) {
  282. int onechar;
  283. if (HexValue(text[i], out onechar) && onechar < b) {
  284. value = value * b + onechar;
  285. } else {
  286. return false;
  287. }
  288. }
  289. return true;
  290. }
  291. public static object ParseInteger(string text, int b) {
  292. Debug.Assert(b != 0);
  293. int iret;
  294. if (!ParseInt(text, b, out iret)) {
  295. BigInteger ret = ParseBigInteger(text, b);
  296. if (ret >= Int32.MinValue && ret <= Int32.MaxValue) {
  297. return (int)ret;
  298. }
  299. return ret;
  300. }
  301. return iret;
  302. }
  303. public static object ParseIntegerSign(string text, int b) {
  304. int start = 0, end = text.Length, saveb = b;
  305. short sign = 1;
  306. if (b < 0 || b == 1 || b > 36) {
  307. throw new ArgumentException("base must be >= 2 and <= 36");
  308. }
  309. ParseIntegerStart(text, ref b, ref start, end, ref sign);
  310. int ret = 0;
  311. try {
  312. int saveStart = start;
  313. for (; ; ) {
  314. int digit;
  315. if (start >= end) {
  316. if (saveStart == start) {
  317. throw new ArgumentException("Invalid integer literal");
  318. }
  319. break;
  320. }
  321. if (!HexValue(text[start], out digit)) break;
  322. if (!(digit < b)) {
  323. if (text[start] == 'l' || text[start] == 'L') {
  324. break;
  325. }
  326. throw new ArgumentException("Invalid integer literal");
  327. }
  328. checked {
  329. // include sign here so that System.Int32.MinValue won't overflow
  330. ret = ret * b + sign * digit;
  331. }
  332. start++;
  333. }
  334. } catch (OverflowException) {
  335. return ParseBigIntegerSign(text, saveb);
  336. }
  337. ParseIntegerEnd(text, start, end);
  338. return ret;
  339. }
  340. private static void ParseIntegerStart(string text, ref int b, ref int start, int end, ref short sign) {
  341. // Skip whitespace
  342. while (start < end && Char.IsWhiteSpace(text, start)) start++;
  343. // Sign?
  344. if (start < end) {
  345. switch (text[start]) {
  346. case '-':
  347. sign = -1;
  348. goto case '+';
  349. case '+':
  350. start++;
  351. break;
  352. }
  353. }
  354. // Skip whitespace
  355. while (start < end && Char.IsWhiteSpace(text, start)) start++;
  356. // Determine base
  357. if (b == 0) {
  358. if (start < end && text[start] == '0') {
  359. // Hex, oct, or bin
  360. if (++start < end) {
  361. switch (text[start]) {
  362. case 'x':
  363. case 'X':
  364. start++;
  365. b = 16;
  366. break;
  367. case 'o':
  368. case 'O':
  369. b = 8;
  370. start++;
  371. break;
  372. case 'b':
  373. case 'B':
  374. start++;
  375. b = 2;
  376. break;
  377. }
  378. }
  379. if (b == 0) {
  380. // Keep the leading zero
  381. start--;
  382. b = 8;
  383. }
  384. } else {
  385. b = 10;
  386. }
  387. }
  388. }
  389. private static void ParseIntegerEnd(string text, int start, int end) {
  390. // Skip whitespace
  391. while (start < end && Char.IsWhiteSpace(text, start)) start++;
  392. if (start < end) {
  393. throw new ArgumentException("invalid integer number literal");
  394. }
  395. }
  396. public static BigInteger ParseBigInteger(string text, int b) {
  397. Debug.Assert(b != 0);
  398. BigInteger ret = BigInteger.Zero;
  399. BigInteger m = BigInteger.One;
  400. if (text.Length != 0) {
  401. int i = text.Length - 1;
  402. if (text[i] == 'l' || text[i] == 'L') i -= 1;
  403. int groupMax = 7;
  404. if (b <= 10) groupMax = 9;// 2 147 483 647
  405. while (i >= 0) {
  406. // extract digits in a batch
  407. int smallMultiplier = 1;
  408. uint uval = 0;
  409. for (int j = 0; j < groupMax && i >= 0; j++) {
  410. uval = (uint)(CharValue(text[i--], b) * smallMultiplier + uval);
  411. smallMultiplier *= b;
  412. }
  413. // this is more generous than needed
  414. ret += m * (BigInteger)uval;
  415. if (i >= 0) m = m * (smallMultiplier);
  416. }
  417. }
  418. return ret;
  419. }
  420. public static BigInteger ParseBigIntegerSign(string text, int b) {
  421. int start = 0, end = text.Length;
  422. short sign = 1;
  423. if (b < 0 || b == 1 || b > 36) {
  424. throw new ArgumentException("base must be >= 2 and <= 36");
  425. }
  426. ParseIntegerStart(text, ref b, ref start, end, ref sign);
  427. BigInteger ret = BigInteger.Zero;
  428. int saveStart = start;
  429. for (; ; ) {
  430. int digit;
  431. if (start >= end) {
  432. if (start == saveStart) {
  433. throw new ArgumentException("Invalid integer literal");
  434. }
  435. break;
  436. }
  437. if (!HexValue(text[start], out digit)) break;
  438. if (!(digit < b)) {
  439. if (text[start] == 'l' || text[start] == 'L') {
  440. break;
  441. }
  442. throw new ArgumentException("Invalid integer literal");
  443. }
  444. ret = ret * b + digit;
  445. start++;
  446. }
  447. if (start < end && (text[start] == 'l' || text[start] == 'L')) {
  448. start++;
  449. }
  450. ParseIntegerEnd(text, start, end);
  451. return sign < 0 ? -ret : ret;
  452. }
  453. public static double ParseFloat(string text) {
  454. try {
  455. //
  456. // Strings that end with '\0' is the specific case that CLR libraries allow,
  457. // however Python doesn't. Since we use CLR floating point number parser,
  458. // we must check explicitly for the strings that end with '\0'
  459. //
  460. if (text != null && text.Length > 0 && text[text.Length - 1] == '\0') {
  461. throw new ArgumentException("null byte in float literal");
  462. }
  463. return ParseFloatNoCatch(text);
  464. } catch (OverflowException) {
  465. return text.TrimStart().StartsWith("-") ? Double.NegativeInfinity : Double.PositiveInfinity;
  466. }
  467. }
  468. private static double ParseFloatNoCatch(string text) {
  469. string s = ReplaceUnicodeDigits(text);
  470. switch (s.ToLower().TrimStart()) {
  471. case "nan":
  472. case "+nan":
  473. case "-nan":
  474. return double.NaN;
  475. case "inf":
  476. case "+inf":
  477. return double.PositiveInfinity;
  478. case "-inf":
  479. return double.NegativeInfinity;
  480. default:
  481. // pass NumberStyles to disallow ,'s in float strings.
  482. double res = double.Parse(s, NumberStyles.Float, System.Globalization.CultureInfo.InvariantCulture);
  483. return (res == 0.0 && text.TrimStart().StartsWith("-")) ? NegativeZero : res;
  484. }
  485. }
  486. internal const double NegativeZero = -0.0;
  487. private static string ReplaceUnicodeDigits(string text) {
  488. StringBuilder replacement = null;
  489. for (int i = 0; i < text.Length; i++) {
  490. if (text[i] >= '\x660' && text[i] <= '\x669') {
  491. if (replacement == null) replacement = new StringBuilder(text);
  492. replacement[i] = (char)(text[i] - '\x660' + '0');
  493. }
  494. }
  495. if (replacement != null) {
  496. text = replacement.ToString();
  497. }
  498. return text;
  499. }
  500. // ParseComplex helpers
  501. private static char[] signs = new char[] { '+', '-' };
  502. private static Exception ExnMalformed() {
  503. return new ArgumentException("complex() arg is a malformed string");
  504. }
  505. public static Complex ParseImaginary(string text) {
  506. try {
  507. return new Complex(0.0, double.Parse(
  508. text.Substring(0, text.Length - 1),
  509. System.Globalization.CultureInfo.InvariantCulture.NumberFormat
  510. ));
  511. } catch (OverflowException) {
  512. return new Complex(0, Double.PositiveInfinity);
  513. }
  514. }
  515. }
  516. }