PageRenderTime 51ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 1ms

/DICK.B1/IronPython/Runtime/LiteralParser.cs

https://bitbucket.org/williamybs/uidipythontool
C# | 619 lines | 592 code | 9 blank | 18 comment | 35 complexity | 28d7ab60e1c4636b5eecdd5200790386 MD5 | raw file
  1. /* ****************************************************************************
  2. *
  3. * Copyright (c) Microsoft Corporation.
  4. *
  5. * This source code is subject to terms and conditions of the Microsoft Public License. A
  6. * copy of the license can be found in the License.html file at the root of this distribution. If
  7. * you cannot locate the Microsoft Public License, please send an email to
  8. * dlr@microsoft.com. By using this source code in any fashion, you are agreeing to be bound
  9. * by the terms of the Microsoft Public License.
  10. *
  11. * You must not remove this notice, or any other, from this software.
  12. *
  13. *
  14. * ***************************************************************************/
  15. using System;
  16. using System.Collections.Generic;
  17. using System.Diagnostics;
  18. using System.Globalization;
  19. using System.Text;
  20. using IronPython.Runtime.Operations;
  21. using Microsoft.Scripting.Utils;
  22. #if CLR2
  23. using Microsoft.Scripting.Math;
  24. using Complex = Microsoft.Scripting.Math.Complex64;
  25. #else
  26. using System.Numerics;
  27. #endif
  28. namespace IronPython.Runtime {
  29. /// <summary>
  30. /// Summary description for ConstantValue.
  31. /// </summary>
  32. public static class LiteralParser {
  33. public static string ParseString(string text, bool isRaw, bool isUni) {
  34. return ParseString(text, isRaw, isUni, true);
  35. }
  36. public static string ParseString(string text, bool isRaw, bool isUni, bool complete) {
  37. Debug.Assert(text != null);
  38. if (isRaw && !isUni) return text;
  39. //PERFORMANCE-ISSUE ??? maybe optimize for the 0-escapes case
  40. StringBuilder buf = new StringBuilder(text.Length);
  41. int i = 0;
  42. int l = text.Length;
  43. int val;
  44. while (i < l) {
  45. char ch = text[i++];
  46. if (ch == '\\') {
  47. if (i >= l) {
  48. if (!complete) {
  49. break;
  50. } else if (isRaw) {
  51. buf.Append('\\');
  52. break;
  53. } else {
  54. throw PythonOps.ValueError("Trailing \\ in string");
  55. }
  56. }
  57. ch = text[i++];
  58. if (ch == 'u' || ch == 'U') {
  59. int len = (ch == 'u') ? 4 : 8;
  60. int max = 16;
  61. if (isUni) {
  62. if (TryParseInt(text, i, len, max, out val)) {
  63. buf.Append((char)val);
  64. i += len;
  65. } else {
  66. throw PythonOps.UnicodeEncodeError(@"'unicodeescape' codec can't decode bytes in position {0}: truncated \uXXXX escape", i);
  67. }
  68. } else {
  69. buf.Append('\\');
  70. buf.Append(ch);
  71. }
  72. } else {
  73. if (isRaw) {
  74. buf.Append('\\');
  75. buf.Append(ch);
  76. continue;
  77. }
  78. switch (ch) {
  79. case 'a': buf.Append('\a'); continue;
  80. case 'b': buf.Append('\b'); continue;
  81. case 'f': buf.Append('\f'); continue;
  82. case 'n': buf.Append('\n'); continue;
  83. case 'r': buf.Append('\r'); continue;
  84. case 't': buf.Append('\t'); continue;
  85. case 'v': buf.Append('\v'); continue;
  86. case '\\': buf.Append('\\'); continue;
  87. case '\'': buf.Append('\''); continue;
  88. case '\"': buf.Append('\"'); continue;
  89. case '\r': if (i < l && text[i] == '\n') i++; continue;
  90. case '\n': continue;
  91. case 'x': //hex
  92. if (!TryParseInt(text, i, 2, 16, out val)) {
  93. goto default;
  94. }
  95. buf.Append((char)val);
  96. i += 2;
  97. continue;
  98. case '0':
  99. case '1':
  100. case '2':
  101. case '3':
  102. case '4':
  103. case '5':
  104. case '6':
  105. case '7': {
  106. int onechar;
  107. val = ch - '0';
  108. if (i < l && HexValue(text[i], out onechar) && onechar < 8) {
  109. val = val * 8 + onechar;
  110. i++;
  111. if (i < l && HexValue(text[i], out onechar) && onechar < 8) {
  112. val = val * 8 + onechar;
  113. i++;
  114. }
  115. }
  116. }
  117. buf.Append((char)val);
  118. continue;
  119. default:
  120. buf.Append("\\");
  121. buf.Append(ch);
  122. continue;
  123. }
  124. }
  125. } else {
  126. buf.Append(ch);
  127. }
  128. }
  129. return buf.ToString();
  130. }
  131. internal static List<byte> ParseBytes(string text, bool isRaw, bool complete) {
  132. Debug.Assert(text != null);
  133. //PERFORMANCE-ISSUE ??? maybe optimize for the 0-escapes case
  134. List<byte> buf = new List<byte>(text.Length);
  135. int i = 0;
  136. int l = text.Length;
  137. int val;
  138. while (i < l) {
  139. char ch = text[i++];
  140. if (!isRaw && ch == '\\') {
  141. if (i >= l) {
  142. if (!complete) {
  143. break;
  144. } else {
  145. throw PythonOps.ValueError("Trailing \\ in string");
  146. }
  147. }
  148. ch = text[i++];
  149. switch (ch) {
  150. case 'a': buf.Add((byte)'\a'); continue;
  151. case 'b': buf.Add((byte)'\b'); continue;
  152. case 'f': buf.Add((byte)'\f'); continue;
  153. case 'n': buf.Add((byte)'\n'); continue;
  154. case 'r': buf.Add((byte)'\r'); continue;
  155. case 't': buf.Add((byte)'\t'); continue;
  156. case 'v': buf.Add((byte)'\v'); continue;
  157. case '\\': buf.Add((byte)'\\'); continue;
  158. case '\'': buf.Add((byte)'\''); continue;
  159. case '\"': buf.Add((byte)'\"'); continue;
  160. case '\r': if (i < l && text[i] == '\n') i++; continue;
  161. case '\n': continue;
  162. case 'x': //hex
  163. if (!TryParseInt(text, i, 2, 16, out val)) {
  164. goto default;
  165. }
  166. buf.Add((byte)val);
  167. i += 2;
  168. continue;
  169. case '0':
  170. case '1':
  171. case '2':
  172. case '3':
  173. case '4':
  174. case '5':
  175. case '6':
  176. case '7': {
  177. int onechar;
  178. val = ch - '0';
  179. if (i < l && HexValue(text[i], out onechar) && onechar < 8) {
  180. val = val * 8 + onechar;
  181. i++;
  182. if (i < l && HexValue(text[i], out onechar) && onechar < 8) {
  183. val = val * 8 + onechar;
  184. i++;
  185. }
  186. }
  187. }
  188. buf.Add((byte)val);
  189. continue;
  190. default:
  191. buf.Add((byte)'\\');
  192. buf.Add((byte)ch);
  193. continue;
  194. }
  195. } else {
  196. buf.Add((byte)ch);
  197. }
  198. }
  199. return buf;
  200. }
  201. private static bool HexValue(char ch, out int value) {
  202. switch (ch) {
  203. case '0':
  204. case '\x660': value = 0; break;
  205. case '1':
  206. case '\x661': value = 1; break;
  207. case '2':
  208. case '\x662': value = 2; break;
  209. case '3':
  210. case '\x663': value = 3; break;
  211. case '4':
  212. case '\x664': value = 4; break;
  213. case '5':
  214. case '\x665': value = 5; break;
  215. case '6':
  216. case '\x666': value = 6; break;
  217. case '7':
  218. case '\x667': value = 7; break;
  219. case '8':
  220. case '\x668': value = 8; break;
  221. case '9':
  222. case '\x669': value = 9; break;
  223. default:
  224. if (ch >= 'a' && ch <= 'z') {
  225. value = ch - 'a' + 10;
  226. } else if (ch >= 'A' && ch <= 'Z') {
  227. value = ch - 'A' + 10;
  228. } else {
  229. value = -1;
  230. return false;
  231. }
  232. break;
  233. }
  234. return true;
  235. }
  236. private static int HexValue(char ch) {
  237. int value;
  238. if (!HexValue(ch, out value)) {
  239. throw new ArgumentException("bad char for integer value: " + ch);
  240. }
  241. return value;
  242. }
  243. private static int CharValue(char ch, int b) {
  244. int val = HexValue(ch);
  245. if (val >= b) {
  246. throw new ArgumentException(String.Format("bad char for the integer value: '{0}' (base {1})", ch, b));
  247. }
  248. return val;
  249. }
  250. private static bool ParseInt(string text, int b, out int ret) {
  251. ret = 0;
  252. long m = 1;
  253. for (int i = text.Length - 1; i >= 0; i--) {
  254. // avoid the exception here. Not only is throwing it expensive,
  255. // but loading the resources for it is also expensive
  256. long lret = (long)ret + m * CharValue(text[i], b);
  257. if (Int32.MinValue <= lret && lret <= Int32.MaxValue) {
  258. ret = (int)lret;
  259. } else {
  260. return false;
  261. }
  262. m *= b;
  263. if (Int32.MinValue > m || m > Int32.MaxValue) {
  264. return false;
  265. }
  266. }
  267. return true;
  268. }
  269. private static bool TryParseInt(string text, int start, int length, int b, out int value) {
  270. value = 0;
  271. if (start + length > text.Length) {
  272. return false;
  273. }
  274. for (int i = start, end = start + length; i < end; i++) {
  275. int onechar;
  276. if (HexValue(text[i], out onechar) && onechar < b) {
  277. value = value * b + onechar;
  278. } else {
  279. return false;
  280. }
  281. }
  282. return true;
  283. }
  284. public static object ParseInteger(string text, int b) {
  285. Debug.Assert(b != 0);
  286. int iret;
  287. if (!ParseInt(text, b, out iret)) {
  288. BigInteger ret = ParseBigInteger(text, b);
  289. if (!ret.AsInt32(out iret)) {
  290. return ret;
  291. }
  292. }
  293. return iret;
  294. }
  295. public static object ParseIntegerSign(string text, int b) {
  296. int start = 0, end = text.Length, saveb = b;
  297. short sign = 1;
  298. if (b < 0 || b == 1 || b > 36) {
  299. throw new ArgumentException("base must be >= 2 and <= 36");
  300. }
  301. ParseIntegerStart(text, ref b, ref start, end, ref sign);
  302. int ret = 0;
  303. try {
  304. int saveStart = start;
  305. for (; ; ) {
  306. int digit;
  307. if (start >= end) {
  308. if (saveStart == start) {
  309. throw new ArgumentException("Invalid integer literal");
  310. }
  311. break;
  312. }
  313. if (!HexValue(text[start], out digit)) break;
  314. if (!(digit < b)) {
  315. if (text[start] == 'l' || text[start] == 'L') {
  316. break;
  317. }
  318. throw new ArgumentException("Invalid integer literal");
  319. }
  320. checked {
  321. // include sign here so that System.Int32.MinValue won't overflow
  322. ret = ret * b + sign * digit;
  323. }
  324. start++;
  325. }
  326. } catch (OverflowException) {
  327. return ParseBigIntegerSign(text, saveb);
  328. }
  329. ParseIntegerEnd(text, start, end);
  330. return ret;
  331. }
  332. private static void ParseIntegerStart(string text, ref int b, ref int start, int end, ref short sign) {
  333. // Skip whitespace
  334. while (start < end && Char.IsWhiteSpace(text, start)) start++;
  335. // Sign?
  336. if (start < end) {
  337. switch (text[start]) {
  338. case '-':
  339. sign = -1;
  340. goto case '+';
  341. case '+':
  342. start++;
  343. break;
  344. }
  345. }
  346. // Skip whitespace
  347. while (start < end && Char.IsWhiteSpace(text, start)) start++;
  348. // Determine base
  349. if (b == 0) {
  350. if (start < end && text[start] == '0') {
  351. start++;
  352. // Hex, oct, or bin
  353. b = 8;
  354. if (start < end) {
  355. if (text[start] == 'x' || text[start] == 'X') {
  356. start++;
  357. b = 16;
  358. } else if (text[start] == 'o' || text[start] == 'O') {
  359. start++;
  360. } else if (text[start] == 'b' || text[start] == 'B') {
  361. start++;
  362. b = 2;
  363. }
  364. }
  365. } else {
  366. b = 10;
  367. }
  368. }
  369. }
  370. private static void ParseIntegerEnd(string text, int start, int end) {
  371. // Skip whitespace
  372. while (start < end && Char.IsWhiteSpace(text, start)) start++;
  373. if (start < end) {
  374. throw new ArgumentException("invalid integer number literal");
  375. }
  376. }
  377. public static BigInteger ParseBigInteger(string text, int b) {
  378. Debug.Assert(b != 0);
  379. BigInteger ret = BigInteger.Zero;
  380. BigInteger m = BigInteger.One;
  381. int i = text.Length - 1;
  382. if (text[i] == 'l' || text[i] == 'L') i -= 1;
  383. int groupMax = 7;
  384. if (b <= 10) groupMax = 9;// 2 147 483 647
  385. while (i >= 0) {
  386. // extract digits in a batch
  387. int smallMultiplier = 1;
  388. uint uval = 0;
  389. for (int j = 0; j < groupMax && i >= 0; j++) {
  390. uval = (uint)(CharValue(text[i--], b) * smallMultiplier + uval);
  391. smallMultiplier *= b;
  392. }
  393. // this is more generous than needed
  394. ret += m * (BigInteger)uval;
  395. if (i >= 0) m = m * (smallMultiplier);
  396. }
  397. return ret;
  398. }
  399. public static BigInteger ParseBigIntegerSign(string text, int b) {
  400. int start = 0, end = text.Length;
  401. short sign = 1;
  402. if (b < 0 || b == 1 || b > 36) {
  403. throw new ArgumentException("base must be >= 2 and <= 36");
  404. }
  405. ParseIntegerStart(text, ref b, ref start, end, ref sign);
  406. BigInteger ret = BigInteger.Zero;
  407. int saveStart = start;
  408. for (; ; ) {
  409. int digit;
  410. if (start >= end) {
  411. if (start == saveStart) {
  412. throw new ArgumentException("Invalid integer literal");
  413. }
  414. break;
  415. }
  416. if (!HexValue(text[start], out digit)) break;
  417. if (!(digit < b)) {
  418. if (text[start] == 'l' || text[start] == 'L') {
  419. break;
  420. }
  421. throw new ArgumentException("Invalid integer literal");
  422. }
  423. ret = ret * b + digit;
  424. start++;
  425. }
  426. if (start < end && (text[start] == 'l' || text[start] == 'L')) {
  427. start++;
  428. }
  429. ParseIntegerEnd(text, start, end);
  430. return sign < 0 ? -ret : ret;
  431. }
  432. public static double ParseFloat(string text) {
  433. try {
  434. //
  435. // Strings that end with '\0' is the specific case that CLR libraries allow,
  436. // however Python doesn't. Since we use CLR floating point number parser,
  437. // we must check explicitly for the strings that end with '\0'
  438. //
  439. if (text != null && text.Length > 0 && text[text.Length - 1] == '\0') {
  440. throw PythonOps.ValueError("null byte in float literal");
  441. }
  442. return ParseFloatNoCatch(text);
  443. } catch (OverflowException) {
  444. return text.lstrip().StartsWith("-") ? Double.NegativeInfinity : Double.PositiveInfinity;
  445. }
  446. }
  447. private static double ParseFloatNoCatch(string text) {
  448. string s = ReplaceUnicodeDigits(text);
  449. switch (s.lower().lstrip()) {
  450. case "nan":
  451. case "+nan":
  452. case "-nan":
  453. return double.NaN;
  454. case "inf":
  455. case "+inf":
  456. return double.PositiveInfinity;
  457. case "-inf":
  458. return double.NegativeInfinity;
  459. default:
  460. // pass NumberStyles to disallow ,'s in float strings.
  461. double res = double.Parse(s, NumberStyles.Float, System.Globalization.CultureInfo.InvariantCulture);
  462. return (res == 0.0 && text.lstrip().StartsWith("-")) ? DoubleOps.NegativeZero : res;
  463. }
  464. }
  465. private static string ReplaceUnicodeDigits(string text) {
  466. StringBuilder replacement = null;
  467. for (int i = 0; i < text.Length; i++) {
  468. if (text[i] >= '\x660' && text[i] <= '\x669') {
  469. if (replacement == null) replacement = new StringBuilder(text);
  470. replacement[i] = (char)(text[i] - '\x660' + '0');
  471. }
  472. }
  473. if (replacement != null) {
  474. text = replacement.ToString();
  475. }
  476. return text;
  477. }
  478. // ParseComplex helpers
  479. private static char[] signs = new char[] { '+', '-' };
  480. private static Exception ExnMalformed() {
  481. return PythonOps.ValueError("complex() arg is a malformed string");
  482. }
  483. public static Complex ParseComplex(string s) {
  484. // remove no-meaning spaces and convert to lowercase
  485. string text = s.Trim().ToLower();
  486. if (String.IsNullOrEmpty(text)) {
  487. throw PythonOps.ValueError("complex() arg is an empty string");
  488. }
  489. if (text.IndexOf(' ') != -1) {
  490. throw ExnMalformed();
  491. }
  492. // remove 1 layer of parens
  493. if (text.StartsWith("(") && text.EndsWith(")")) {
  494. text = text.Substring(1, text.Length - 2);
  495. }
  496. try {
  497. int len = text.Length;
  498. string real, imag;
  499. if (text[len - 1] == 'j') {
  500. // last sign delimits real and imaginary...
  501. int signPos = text.LastIndexOfAny(signs);
  502. // ... unless it's after 'e', so we bypass up to 2 of those here
  503. for (int i = 0; signPos > 0 && text[signPos - 1] == 'e'; i++) {
  504. if (i == 2) {
  505. // too many 'e's
  506. throw ExnMalformed();
  507. }
  508. signPos = text.Substring(0, signPos - 1).LastIndexOfAny(signs);
  509. }
  510. // no real component
  511. if (signPos < 0) {
  512. return MathUtils.MakeImaginary((len == 1) ? 1 : ParseFloatNoCatch(text.Substring(0, len - 1)));
  513. }
  514. real = text.Substring(0, signPos);
  515. imag = text.Substring(signPos, len - signPos - 1);
  516. if (imag.Length == 1) {
  517. imag += "1"; // convert +/- to +1/-1
  518. }
  519. } else {
  520. // 'j' delimits real and imaginary
  521. string[] splitText = text.Split(new char[] { 'j' });
  522. // no imaginary component
  523. if (splitText.Length == 1) {
  524. return MathUtils.MakeReal(ParseFloatNoCatch(text));
  525. }
  526. // there should only be one j
  527. if (splitText.Length != 2) {
  528. throw ExnMalformed();
  529. }
  530. real = splitText[1];
  531. imag = splitText[0];
  532. // a sign must follow the 'j'
  533. if (!(real.StartsWith("+") || real.StartsWith("-"))) {
  534. throw ExnMalformed();
  535. }
  536. }
  537. return new Complex(String.IsNullOrEmpty(real) ? 0 : ParseFloatNoCatch(real), ParseFloatNoCatch(imag));
  538. } catch (OverflowException) {
  539. throw PythonOps.ValueError("complex() literal too large to convert");
  540. } catch {
  541. throw ExnMalformed();
  542. }
  543. }
  544. public static Complex ParseImaginary(string text) {
  545. try {
  546. return MathUtils.MakeImaginary(double.Parse(
  547. text.Substring(0, text.Length - 1),
  548. System.Globalization.CultureInfo.InvariantCulture.NumberFormat
  549. ));
  550. } catch (OverflowException) {
  551. return new Complex(0, Double.PositiveInfinity);
  552. }
  553. }
  554. }
  555. }