PageRenderTime 49ms CodeModel.GetById 19ms RepoModel.GetById 1ms app.codeStats 0ms

/IronPython_Main/Languages/IronPython/IronPython/Runtime/LiteralParser.cs

#
C# | 650 lines | 622 code | 11 blank | 17 comment | 38 complexity | 7bb571a9c1cf0404c489b5cfb1b0edf4 MD5 | raw file
Possible License(s): GPL-2.0, MPL-2.0-no-copyleft-exception, CPL-1.0, CC-BY-SA-3.0, BSD-3-Clause, ISC, AGPL-3.0, LGPL-2.1, Apache-2.0
  1. /* ****************************************************************************
  2. *
  3. * Copyright (c) Microsoft Corporation.
  4. *
  5. * This source code is subject to terms and conditions of the Apache License, Version 2.0. A
  6. * copy of the license can be found in the License.html file at the root of this distribution. If
  7. * you cannot locate the Apache License, Version 2.0, please send an email to
  8. * dlr@microsoft.com. By using this source code in any fashion, you are agreeing to be bound
  9. * by the terms of the Apache License, Version 2.0.
  10. *
  11. * You must not remove this notice, or any other, from this software.
  12. *
  13. *
  14. * ***************************************************************************/
  15. using System;
  16. using System.Collections.Generic;
  17. using System.Diagnostics;
  18. using System.Globalization;
  19. using System.Text;
  20. using IronPython.Runtime.Exceptions;
  21. using IronPython.Runtime.Operations;
  22. using Microsoft.Scripting.Runtime;
  23. using Microsoft.Scripting.Utils;
  24. #if CLR2
  25. using Microsoft.Scripting.Math;
  26. using Complex = Microsoft.Scripting.Math.Complex64;
  27. #else
  28. using System.Numerics;
  29. #endif
  30. namespace IronPython.Runtime {
  31. /// <summary>
  32. /// Summary description for ConstantValue.
  33. /// </summary>
  34. public static class LiteralParser {
  35. public static string ParseString(string text, bool isRaw, bool isUni) {
  36. return ParseString(text.ToCharArray(), 0, text.Length, isRaw, isUni, false);
  37. }
  38. public static string ParseString(char[] text, int start, int length, bool isRaw, bool isUni, bool normalizeLineEndings) {
  39. Debug.Assert(text != null);
  40. if (isRaw && !isUni && !normalizeLineEndings) return new String(text, start, length);
  41. StringBuilder buf = null;
  42. int i = start;
  43. int l = start + length;
  44. int val;
  45. while (i < l) {
  46. char ch = text[i++];
  47. if ((!isRaw || isUni) && ch == '\\') {
  48. if (buf == null) {
  49. buf = new StringBuilder(length);
  50. buf.Append(text, start, i - start - 1);
  51. }
  52. if (i >= l) {
  53. if (isRaw) {
  54. buf.Append('\\');
  55. break;
  56. } else {
  57. throw PythonOps.ValueError("Trailing \\ in string");
  58. }
  59. }
  60. ch = text[i++];
  61. if (ch == 'u' || ch == 'U') {
  62. int len = (ch == 'u') ? 4 : 8;
  63. int max = 16;
  64. if (isUni) {
  65. if (TryParseInt(text, i, len, max, out val)) {
  66. buf.Append((char)val);
  67. i += len;
  68. } else {
  69. throw PythonOps.UnicodeEncodeError(@"'unicodeescape' codec can't decode bytes in position {0}: truncated \uXXXX escape", i);
  70. }
  71. } else {
  72. buf.Append('\\');
  73. buf.Append(ch);
  74. }
  75. } else {
  76. if (isRaw) {
  77. buf.Append('\\');
  78. buf.Append(ch);
  79. continue;
  80. }
  81. switch (ch) {
  82. case 'a': buf.Append('\a'); continue;
  83. case 'b': buf.Append('\b'); continue;
  84. case 'f': buf.Append('\f'); continue;
  85. case 'n': buf.Append('\n'); continue;
  86. case 'r': buf.Append('\r'); continue;
  87. case 't': buf.Append('\t'); continue;
  88. case 'v': buf.Append('\v'); continue;
  89. case '\\': buf.Append('\\'); continue;
  90. case '\'': buf.Append('\''); continue;
  91. case '\"': buf.Append('\"'); continue;
  92. case '\r': if (i < l && text[i] == '\n') i++; continue;
  93. case '\n': continue;
  94. case 'x': //hex
  95. if (!TryParseInt(text, i, 2, 16, out val)) {
  96. goto default;
  97. }
  98. buf.Append((char)val);
  99. i += 2;
  100. continue;
  101. case '0':
  102. case '1':
  103. case '2':
  104. case '3':
  105. case '4':
  106. case '5':
  107. case '6':
  108. case '7': {
  109. int onechar;
  110. val = ch - '0';
  111. if (i < l && HexValue(text[i], out onechar) && onechar < 8) {
  112. val = val * 8 + onechar;
  113. i++;
  114. if (i < l && HexValue(text[i], out onechar) && onechar < 8) {
  115. val = val * 8 + onechar;
  116. i++;
  117. }
  118. }
  119. }
  120. buf.Append((char)val);
  121. continue;
  122. default:
  123. buf.Append("\\");
  124. buf.Append(ch);
  125. continue;
  126. }
  127. }
  128. } else if (ch == '\r' && normalizeLineEndings) {
  129. if (buf == null) {
  130. buf = new StringBuilder(length);
  131. buf.Append(text, start, i - start - 1);
  132. }
  133. // normalize line endings
  134. if (i < text.Length && text[i] == '\n') {
  135. i++;
  136. }
  137. buf.Append('\n');
  138. } else if (buf != null) {
  139. buf.Append(ch);
  140. }
  141. }
  142. if (buf != null) {
  143. return buf.ToString();
  144. }
  145. return new String(text, start, length);
  146. }
  147. internal static List<byte> ParseBytes(char[] text, int start, int length, bool isRaw, bool normalizeLineEndings) {
  148. Debug.Assert(text != null);
  149. List<byte> buf = new List<byte>(length);
  150. int i = start;
  151. int l = start + length;
  152. int val;
  153. while (i < l) {
  154. char ch = text[i++];
  155. if (!isRaw && ch == '\\') {
  156. if (i >= l) {
  157. throw PythonOps.ValueError("Trailing \\ in string");
  158. }
  159. ch = text[i++];
  160. switch (ch) {
  161. case 'a': buf.Add((byte)'\a'); continue;
  162. case 'b': buf.Add((byte)'\b'); continue;
  163. case 'f': buf.Add((byte)'\f'); continue;
  164. case 'n': buf.Add((byte)'\n'); continue;
  165. case 'r': buf.Add((byte)'\r'); continue;
  166. case 't': buf.Add((byte)'\t'); continue;
  167. case 'v': buf.Add((byte)'\v'); continue;
  168. case '\\': buf.Add((byte)'\\'); continue;
  169. case '\'': buf.Add((byte)'\''); continue;
  170. case '\"': buf.Add((byte)'\"'); continue;
  171. case '\r': if (i < l && text[i] == '\n') i++; continue;
  172. case '\n': continue;
  173. case 'x': //hex
  174. if (!TryParseInt(text, i, 2, 16, out val)) {
  175. goto default;
  176. }
  177. buf.Add((byte)val);
  178. i += 2;
  179. continue;
  180. case '0':
  181. case '1':
  182. case '2':
  183. case '3':
  184. case '4':
  185. case '5':
  186. case '6':
  187. case '7': {
  188. int onechar;
  189. val = ch - '0';
  190. if (i < l && HexValue(text[i], out onechar) && onechar < 8) {
  191. val = val * 8 + onechar;
  192. i++;
  193. if (i < l && HexValue(text[i], out onechar) && onechar < 8) {
  194. val = val * 8 + onechar;
  195. i++;
  196. }
  197. }
  198. }
  199. buf.Add((byte)val);
  200. continue;
  201. default:
  202. buf.Add((byte)'\\');
  203. buf.Add((byte)ch);
  204. continue;
  205. }
  206. } else if (ch == '\r' && normalizeLineEndings) {
  207. // normalize line endings
  208. if (i < text.Length && text[i] == '\n') {
  209. i++;
  210. }
  211. buf.Add((byte)'\n');
  212. } else {
  213. buf.Add((byte)ch);
  214. }
  215. }
  216. return buf;
  217. }
  218. private static bool HexValue(char ch, out int value) {
  219. switch (ch) {
  220. case '0':
  221. case '\x660': value = 0; break;
  222. case '1':
  223. case '\x661': value = 1; break;
  224. case '2':
  225. case '\x662': value = 2; break;
  226. case '3':
  227. case '\x663': value = 3; break;
  228. case '4':
  229. case '\x664': value = 4; break;
  230. case '5':
  231. case '\x665': value = 5; break;
  232. case '6':
  233. case '\x666': value = 6; break;
  234. case '7':
  235. case '\x667': value = 7; break;
  236. case '8':
  237. case '\x668': value = 8; break;
  238. case '9':
  239. case '\x669': value = 9; break;
  240. default:
  241. if (ch >= 'a' && ch <= 'z') {
  242. value = ch - 'a' + 10;
  243. } else if (ch >= 'A' && ch <= 'Z') {
  244. value = ch - 'A' + 10;
  245. } else {
  246. value = -1;
  247. return false;
  248. }
  249. break;
  250. }
  251. return true;
  252. }
  253. private static int HexValue(char ch) {
  254. int value;
  255. if (!HexValue(ch, out value)) {
  256. throw new ValueErrorException("bad char for integer value: " + ch);
  257. }
  258. return value;
  259. }
  260. private static int CharValue(char ch, int b) {
  261. int val = HexValue(ch);
  262. if (val >= b) {
  263. throw new ValueErrorException(String.Format("bad char for the integer value: '{0}' (base {1})", ch, b));
  264. }
  265. return val;
  266. }
  267. private static bool ParseInt(string text, int b, out int ret) {
  268. ret = 0;
  269. long m = 1;
  270. for (int i = text.Length - 1; i >= 0; i--) {
  271. // avoid the exception here. Not only is throwing it expensive,
  272. // but loading the resources for it is also expensive
  273. long lret = (long)ret + m * CharValue(text[i], b);
  274. if (Int32.MinValue <= lret && lret <= Int32.MaxValue) {
  275. ret = (int)lret;
  276. } else {
  277. return false;
  278. }
  279. m *= b;
  280. if (Int32.MinValue > m || m > Int32.MaxValue) {
  281. return false;
  282. }
  283. }
  284. return true;
  285. }
  286. private static bool TryParseInt(char[] text, int start, int length, int b, out int value) {
  287. value = 0;
  288. if (start + length > text.Length) {
  289. return false;
  290. }
  291. for (int i = start, end = start + length; i < end; i++) {
  292. int onechar;
  293. if (HexValue(text[i], out onechar) && onechar < b) {
  294. value = value * b + onechar;
  295. } else {
  296. return false;
  297. }
  298. }
  299. return true;
  300. }
  301. public static object ParseInteger(string text, int b) {
  302. Debug.Assert(b != 0);
  303. int iret;
  304. if (!ParseInt(text, b, out iret)) {
  305. BigInteger ret = ParseBigInteger(text, b);
  306. if (!ret.AsInt32(out iret)) {
  307. return ret;
  308. }
  309. }
  310. return ScriptingRuntimeHelpers.Int32ToObject(iret);
  311. }
  312. public static object ParseIntegerSign(string text, int b) {
  313. int start = 0, end = text.Length, saveb = b;
  314. short sign = 1;
  315. if (b < 0 || b == 1 || b > 36) {
  316. throw new ValueErrorException("base must be >= 2 and <= 36");
  317. }
  318. ParseIntegerStart(text, ref b, ref start, end, ref sign);
  319. int ret = 0;
  320. try {
  321. int saveStart = start;
  322. for (; ; ) {
  323. int digit;
  324. if (start >= end) {
  325. if (saveStart == start) {
  326. throw new ValueErrorException("Invalid integer literal");
  327. }
  328. break;
  329. }
  330. if (!HexValue(text[start], out digit)) break;
  331. if (!(digit < b)) {
  332. if (text[start] == 'l' || text[start] == 'L') {
  333. break;
  334. }
  335. throw new ValueErrorException("Invalid integer literal");
  336. }
  337. checked {
  338. // include sign here so that System.Int32.MinValue won't overflow
  339. ret = ret * b + sign * digit;
  340. }
  341. start++;
  342. }
  343. } catch (OverflowException) {
  344. return ParseBigIntegerSign(text, saveb);
  345. }
  346. ParseIntegerEnd(text, start, end);
  347. return ScriptingRuntimeHelpers.Int32ToObject(ret);
  348. }
  349. private static void ParseIntegerStart(string text, ref int b, ref int start, int end, ref short sign) {
  350. // Skip whitespace
  351. while (start < end && Char.IsWhiteSpace(text, start)) start++;
  352. // Sign?
  353. if (start < end) {
  354. switch (text[start]) {
  355. case '-':
  356. sign = -1;
  357. goto case '+';
  358. case '+':
  359. start++;
  360. break;
  361. }
  362. }
  363. // Skip whitespace
  364. while (start < end && Char.IsWhiteSpace(text, start)) start++;
  365. // Determine base
  366. if (b == 0) {
  367. if (start < end && text[start] == '0') {
  368. // Hex, oct, or bin
  369. if (++start < end) {
  370. switch(text[start]) {
  371. case 'x':
  372. case 'X':
  373. start++;
  374. b = 16;
  375. break;
  376. case 'o':
  377. case 'O':
  378. b = 8;
  379. start++;
  380. break;
  381. case 'b':
  382. case 'B':
  383. start++;
  384. b = 2;
  385. break;
  386. }
  387. }
  388. if (b == 0) {
  389. // Keep the leading zero
  390. start--;
  391. b = 8;
  392. }
  393. } else {
  394. b = 10;
  395. }
  396. }
  397. }
  398. private static void ParseIntegerEnd(string text, int start, int end) {
  399. // Skip whitespace
  400. while (start < end && Char.IsWhiteSpace(text, start)) start++;
  401. if (start < end) {
  402. throw new ValueErrorException("invalid integer number literal");
  403. }
  404. }
  405. public static BigInteger ParseBigInteger(string text, int b) {
  406. Debug.Assert(b != 0);
  407. BigInteger ret = BigInteger.Zero;
  408. BigInteger m = BigInteger.One;
  409. int i = text.Length - 1;
  410. if (text[i] == 'l' || text[i] == 'L') i -= 1;
  411. int groupMax = 7;
  412. if (b <= 10) groupMax = 9;// 2 147 483 647
  413. while (i >= 0) {
  414. // extract digits in a batch
  415. int smallMultiplier = 1;
  416. uint uval = 0;
  417. for (int j = 0; j < groupMax && i >= 0; j++) {
  418. uval = (uint)(CharValue(text[i--], b) * smallMultiplier + uval);
  419. smallMultiplier *= b;
  420. }
  421. // this is more generous than needed
  422. ret += m * (BigInteger)uval;
  423. if (i >= 0) m = m * (smallMultiplier);
  424. }
  425. return ret;
  426. }
  427. public static BigInteger ParseBigIntegerSign(string text, int b) {
  428. int start = 0, end = text.Length;
  429. short sign = 1;
  430. if (b < 0 || b == 1 || b > 36) {
  431. throw new ValueErrorException("base must be >= 2 and <= 36");
  432. }
  433. ParseIntegerStart(text, ref b, ref start, end, ref sign);
  434. BigInteger ret = BigInteger.Zero;
  435. int saveStart = start;
  436. for (; ; ) {
  437. int digit;
  438. if (start >= end) {
  439. if (start == saveStart) {
  440. throw new ValueErrorException("Invalid integer literal");
  441. }
  442. break;
  443. }
  444. if (!HexValue(text[start], out digit)) break;
  445. if (!(digit < b)) {
  446. if (text[start] == 'l' || text[start] == 'L') {
  447. break;
  448. }
  449. throw new ValueErrorException("Invalid integer literal");
  450. }
  451. ret = ret * b + digit;
  452. start++;
  453. }
  454. if (start < end && (text[start] == 'l' || text[start] == 'L')) {
  455. start++;
  456. }
  457. ParseIntegerEnd(text, start, end);
  458. return sign < 0 ? -ret : ret;
  459. }
  460. public static double ParseFloat(string text) {
  461. try {
  462. //
  463. // Strings that end with '\0' is the specific case that CLR libraries allow,
  464. // however Python doesn't. Since we use CLR floating point number parser,
  465. // we must check explicitly for the strings that end with '\0'
  466. //
  467. if (text != null && text.Length > 0 && text[text.Length - 1] == '\0') {
  468. throw PythonOps.ValueError("null byte in float literal");
  469. }
  470. return ParseFloatNoCatch(text);
  471. } catch (OverflowException) {
  472. return text.lstrip().StartsWith("-") ? Double.NegativeInfinity : Double.PositiveInfinity;
  473. }
  474. }
  475. private static double ParseFloatNoCatch(string text) {
  476. string s = ReplaceUnicodeDigits(text);
  477. switch (s.lower().lstrip()) {
  478. case "nan":
  479. case "+nan":
  480. case "-nan":
  481. return double.NaN;
  482. case "inf":
  483. case "+inf":
  484. return double.PositiveInfinity;
  485. case "-inf":
  486. return double.NegativeInfinity;
  487. default:
  488. // pass NumberStyles to disallow ,'s in float strings.
  489. double res = double.Parse(s, NumberStyles.Float, System.Globalization.CultureInfo.InvariantCulture);
  490. return (res == 0.0 && text.lstrip().StartsWith("-")) ? DoubleOps.NegativeZero : res;
  491. }
  492. }
  493. private static string ReplaceUnicodeDigits(string text) {
  494. StringBuilder replacement = null;
  495. for (int i = 0; i < text.Length; i++) {
  496. if (text[i] >= '\x660' && text[i] <= '\x669') {
  497. if (replacement == null) replacement = new StringBuilder(text);
  498. replacement[i] = (char)(text[i] - '\x660' + '0');
  499. }
  500. }
  501. if (replacement != null) {
  502. text = replacement.ToString();
  503. }
  504. return text;
  505. }
  506. // ParseComplex helpers
  507. private static char[] signs = new char[] { '+', '-' };
  508. private static Exception ExnMalformed() {
  509. return PythonOps.ValueError("complex() arg is a malformed string");
  510. }
  511. public static Complex ParseComplex(string s) {
  512. // remove no-meaning spaces and convert to lowercase
  513. string text = s.Trim().ToLower();
  514. if (String.IsNullOrEmpty(text)) {
  515. throw PythonOps.ValueError("complex() arg is an empty string");
  516. }
  517. if (text.IndexOf(' ') != -1) {
  518. throw ExnMalformed();
  519. }
  520. // remove 1 layer of parens
  521. if (text.StartsWith("(") && text.EndsWith(")")) {
  522. text = text.Substring(1, text.Length - 2);
  523. }
  524. try {
  525. int len = text.Length;
  526. string real, imag;
  527. if (text[len - 1] == 'j') {
  528. // last sign delimits real and imaginary...
  529. int signPos = text.LastIndexOfAny(signs);
  530. // ... unless it's after 'e', so we bypass up to 2 of those here
  531. for (int i = 0; signPos > 0 && text[signPos - 1] == 'e'; i++) {
  532. if (i == 2) {
  533. // too many 'e's
  534. throw ExnMalformed();
  535. }
  536. signPos = text.Substring(0, signPos - 1).LastIndexOfAny(signs);
  537. }
  538. // no real component
  539. if (signPos < 0) {
  540. return MathUtils.MakeImaginary((len == 1) ? 1 : ParseFloatNoCatch(text.Substring(0, len - 1)));
  541. }
  542. real = text.Substring(0, signPos);
  543. imag = text.Substring(signPos, len - signPos - 1);
  544. if (imag.Length == 1) {
  545. imag += "1"; // convert +/- to +1/-1
  546. }
  547. } else {
  548. // 'j' delimits real and imaginary
  549. string[] splitText = text.Split(new char[] { 'j' });
  550. // no imaginary component
  551. if (splitText.Length == 1) {
  552. return MathUtils.MakeReal(ParseFloatNoCatch(text));
  553. }
  554. // there should only be one j
  555. if (splitText.Length != 2) {
  556. throw ExnMalformed();
  557. }
  558. real = splitText[1];
  559. imag = splitText[0];
  560. // a sign must follow the 'j'
  561. if (!(real.StartsWith("+") || real.StartsWith("-"))) {
  562. throw ExnMalformed();
  563. }
  564. }
  565. return new Complex(String.IsNullOrEmpty(real) ? 0 : ParseFloatNoCatch(real), ParseFloatNoCatch(imag));
  566. } catch (OverflowException) {
  567. throw PythonOps.ValueError("complex() literal too large to convert");
  568. } catch {
  569. throw ExnMalformed();
  570. }
  571. }
  572. public static Complex ParseImaginary(string text) {
  573. try {
  574. return MathUtils.MakeImaginary(double.Parse(
  575. text.Substring(0, text.Length - 1),
  576. System.Globalization.CultureInfo.InvariantCulture.NumberFormat
  577. ));
  578. } catch (OverflowException) {
  579. return new Complex(0, Double.PositiveInfinity);
  580. }
  581. }
  582. }
  583. }