PageRenderTime 52ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 1ms

/ExprObjModel/SchemeLexParse.cs

http://swscheme.codeplex.com
C# | 1140 lines | 1029 code | 78 blank | 33 comment | 190 complexity | 9ecbfdbe60267aa62bd74710731b9f70 MD5 | raw file
Possible License(s): GPL-2.0
  1. /*
  2. This file is part of Sunlit World Scheme
  3. http://swscheme.codeplex.com/
  4. Copyright (c) 2010 by Edward Kiser (edkiser@gmail.com)
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License along
  14. with this program; if not, write to the Free Software Foundation, Inc.,
  15. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  16. */
  17. using System;
  18. using System.Collections.Generic;
  19. using System.Linq;
  20. using System.Text;
  21. using System.Text.RegularExpressions;
  22. using ExprObjModel.Lexing;
  23. using BigMath;
  24. using ControlledWindowLib;
  25. namespace ExprObjModel
  26. {
  27. public enum LexemeType
  28. {
  29. Whitespace, LeftParen, PoundLeftParen, PoundSLeftParen, PoundMLeftParen, RightParen, Dot,
  30. PoundSigLeftParen, PoundMsgLeftParen, PoundVector3LeftParen, PoundVertex3LeftParen,
  31. PoundVector2LeftParen, PoundVertex2LeftParen, PoundQuatLeftParen,
  32. Quote, QuasiQuote, Unquote, UnquoteSplicing,
  33. BeginString, CharEscape, HexEscape, OctEscape, UnicodeEscape, StrChars, EndString,
  34. BeginSymbol, EndSymbol,
  35. IPV4EndPoint, IPV4Address, IPV6EndPoint, IPV6Address, Guid,
  36. Symbol, Integer, HexInteger, OctalInteger, Double, Char, HexChar, BooleanTrue, BooleanFalse,
  37. Numerator, FractionBar, Denominator,
  38. BeginComment, CommentChars, EndComment,
  39. EndOfInput, LexicalError,
  40. }
  41. public struct ScanResult
  42. {
  43. public LexemeType type;
  44. public string str;
  45. }
  46. public class SchemeScanner
  47. {
  48. static SchemeScanner()
  49. {
  50. modes = new IMatcher<LexemeType>[5];
  51. CompoundMatcherFactory<LexemeType> cmf = new CompoundMatcherFactory<LexemeType>();
  52. cmf.AddRegex(@"\G\s+", LexemeType.Whitespace);
  53. cmf.AddString("(", LexemeType.LeftParen);
  54. cmf.AddString("#(", LexemeType.PoundLeftParen);
  55. cmf.AddString("#s(", LexemeType.PoundSLeftParen);
  56. cmf.AddString("#m(", LexemeType.PoundMLeftParen);
  57. cmf.AddString("#sig(", LexemeType.PoundSigLeftParen);
  58. cmf.AddString("#msg(", LexemeType.PoundMsgLeftParen);
  59. cmf.AddString("#vec3(", LexemeType.PoundVector3LeftParen);
  60. cmf.AddString("#vtx3(", LexemeType.PoundVertex3LeftParen);
  61. cmf.AddString("#vec2(", LexemeType.PoundVector2LeftParen);
  62. cmf.AddString("#vtx2(", LexemeType.PoundVertex2LeftParen);
  63. cmf.AddString("#quat(", LexemeType.PoundQuatLeftParen);
  64. cmf.AddString(")", LexemeType.RightParen);
  65. cmf.AddString(".", LexemeType.Dot);
  66. cmf.AddString("'", LexemeType.Quote);
  67. cmf.AddString("`", LexemeType.QuasiQuote);
  68. cmf.AddString(",", LexemeType.Unquote);
  69. cmf.AddString(",@", LexemeType.UnquoteSplicing);
  70. cmf.AddString("\"", LexemeType.BeginString);
  71. cmf.AddString("|", LexemeType.BeginSymbol);
  72. cmf.AddString(";", LexemeType.BeginComment);
  73. cmf.AddString("#t", LexemeType.BooleanTrue);
  74. cmf.AddString("#f", LexemeType.BooleanFalse);
  75. cmf.AddRegex(@"\G#g\{([0-9A-Fa-f]{8}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{12})\}", LexemeType.Guid);
  76. cmf.AddRegex(@"\G(\+|-)?[0-9]+(?=/)", LexemeType.Numerator);
  77. cmf.AddRegex(@"\G(\+|-)?(([0-9]+[Ee](\+|-)?[0-9]+)|([0-9]+\.[0-9]*)([Ee](\+|-)?[0-9]+)?|(([0-9]*\.[0-9]+)([Ee](\+|-)?[0-9]+)?))", LexemeType.Double);
  78. cmf.AddRegex(@"\G(\+|-)?[0-9]+", LexemeType.Integer);
  79. cmf.AddRegex(@"\G(\+|-|~)?#x[0-9A-Fa-f]+", LexemeType.HexInteger);
  80. cmf.AddRegex(@"\G#\\x[0-9A-Fa-f]{4}", LexemeType.HexChar);
  81. cmf.AddRegex(@"\G#\\[A-Za-z]+", LexemeType.Char);
  82. cmf.AddRegex(@"\G#\\[!-~]", LexemeType.Char);
  83. cmf.AddRegex(@"\G[A-Za-z!$%&*+./:<=>?@^_~][A-Za-z0-9!$%&*+\-./:<=>?@^_~]*", LexemeType.Symbol);
  84. cmf.AddRegex(@"\G-(?:[A-Za-z!$%&*+./:<=>?@^_~][A-Za-z0-9!$%&*+\-./:<=>?@^_~]*)?", LexemeType.Symbol);
  85. cmf.AddRegex(@"\G#ipv4\[[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+\]:[0-9]+", LexemeType.IPV4EndPoint);
  86. cmf.AddRegex(@"\G#ipv4\[[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+\]", LexemeType.IPV4Address);
  87. cmf.AddRegex(@"\G#ipv6\[([0-9A-Fa-f:]*)\]:[0-9]+", LexemeType.IPV6EndPoint);
  88. cmf.AddRegex(@"\G#ipv6\[([0-9A-Fa-f:]*)\]", LexemeType.IPV6Address);
  89. modes[0] = cmf.GetMatcher();
  90. // ----- in string -----
  91. cmf.Clear();
  92. cmf.AddRegex(@"\\[abtnvfre\\\""]", LexemeType.CharEscape);
  93. cmf.AddRegex(@"\\x[0-9A-Fa-f]{2}", LexemeType.HexEscape);
  94. cmf.AddRegex(@"\\[0-7]{3}", LexemeType.OctEscape);
  95. cmf.AddRegex(@"\\u[0-9A-Fa-f]{4}", LexemeType.UnicodeEscape);
  96. cmf.AddRegex(@"[ !#-\[\]-~]+", LexemeType.StrChars);
  97. cmf.AddString("\"", LexemeType.EndString);
  98. modes[1] = cmf.GetMatcher();
  99. // ----- in symbol -----
  100. cmf.Clear();
  101. cmf.AddRegex(@"\\[abtnvfre\\\|]", LexemeType.CharEscape);
  102. cmf.AddRegex(@"\\x[0-9A-Fa-f]{2}", LexemeType.HexEscape);
  103. cmf.AddRegex(@"\\[0-7]{3}", LexemeType.OctEscape);
  104. cmf.AddRegex(@"\\u[0-9A-Fa-f]{4}", LexemeType.UnicodeEscape);
  105. cmf.AddRegex(@"[ !-\[\]-{}-~]+", LexemeType.StrChars);
  106. cmf.AddString("|", LexemeType.EndSymbol);
  107. modes[2] = cmf.GetMatcher();
  108. // ----- in comment -----
  109. cmf.Clear();
  110. cmf.AddRegex(@"[ -~]+", LexemeType.CommentChars);
  111. cmf.AddRegex(@"\n", LexemeType.EndComment);
  112. modes[3] = cmf.GetMatcher();
  113. // ----- in fraction -----
  114. cmf.Clear();
  115. cmf.AddRegex(@"/", LexemeType.FractionBar);
  116. cmf.AddRegex(@"\G-?(0|[1-9][0-9]*)", LexemeType.Denominator);
  117. modes[4] = cmf.GetMatcher();
  118. }
  119. public SchemeScanner()
  120. {
  121. mode = 0;
  122. }
  123. private static int NextMode(int mode, LexemeType l)
  124. {
  125. switch(l)
  126. {
  127. case LexemeType.BeginString: return 1;
  128. case LexemeType.BeginSymbol: return 2;
  129. case LexemeType.BeginComment: return 3;
  130. case LexemeType.EndString: return 0;
  131. case LexemeType.EndSymbol: return 0;
  132. case LexemeType.EndComment: return 0;
  133. case LexemeType.Numerator: return 4;
  134. case LexemeType.Denominator: return 0;
  135. default: return mode;
  136. }
  137. }
  138. private static IMatcher<LexemeType>[] modes;
  139. private int mode;
  140. public void ResetMode() { mode = 0; }
  141. public void Scan(string str, int pos, out ScanResult sr, out int newPos)
  142. {
  143. if (pos == str.Length)
  144. {
  145. sr.type = LexemeType.EndOfInput;
  146. sr.str = "";
  147. newPos = pos;
  148. }
  149. else
  150. {
  151. IMatcher<LexemeType> i = modes[mode];
  152. bool result = i.Matches(str, pos);
  153. if (result)
  154. {
  155. sr.type = (LexemeType)i.AcceptCode;
  156. sr.str = i.Match;
  157. newPos = pos + i.MatchLength;
  158. mode = NextMode(mode, sr.type);
  159. }
  160. else
  161. {
  162. sr.type = LexemeType.LexicalError;
  163. sr.str = str.Substring(pos, 1);
  164. newPos = pos + 1;
  165. }
  166. }
  167. }
  168. }
  169. public interface IStringSource
  170. {
  171. bool Next(int parenDepth);
  172. string Current { get; }
  173. }
  174. public class SingleString : IStringSource
  175. {
  176. public SingleString(string s)
  177. {
  178. this.s = s;
  179. this.pos = 0;
  180. }
  181. private string s;
  182. private int pos;
  183. public bool Next(int parenDepth)
  184. {
  185. ++pos;
  186. return (pos == 1);
  187. }
  188. public string Current { get { return s; } }
  189. }
  190. public class StringArraySource : IStringSource
  191. {
  192. public StringArraySource(string[] sa)
  193. {
  194. this.sa = sa;
  195. pos = -1;
  196. }
  197. private string[] sa;
  198. private int pos;
  199. public bool Next(int parenDepth)
  200. {
  201. ++pos;
  202. return (pos >= 0 && pos < sa.Length);
  203. }
  204. public string Current { get { return sa[pos]; } }
  205. }
  206. public class LexemeSource
  207. {
  208. private int parenDepth;
  209. public LexemeSource(IStringSource ss)
  210. {
  211. sc = new SchemeScanner();
  212. this.ss = ss;
  213. parenDepth = 0;
  214. NextString();
  215. }
  216. private void NextString()
  217. {
  218. bool b = ss.Next(parenDepth);
  219. if (b)
  220. {
  221. s = ss.Current + "\n";
  222. endOfStrings = false;
  223. pos = 0;
  224. }
  225. else
  226. {
  227. endOfStrings = true;
  228. }
  229. }
  230. private SchemeScanner sc;
  231. private IStringSource ss;
  232. private bool endOfStrings;
  233. private string s;
  234. private int pos;
  235. private ScanResult sr;
  236. public static bool IsLParen(LexemeType lt)
  237. {
  238. LexemeType[] lts = new LexemeType[]
  239. {
  240. LexemeType.LeftParen,
  241. LexemeType.PoundLeftParen,
  242. LexemeType.PoundMLeftParen,
  243. LexemeType.PoundSLeftParen,
  244. LexemeType.PoundMsgLeftParen,
  245. LexemeType.PoundSigLeftParen,
  246. LexemeType.PoundVector3LeftParen,
  247. LexemeType.PoundVertex3LeftParen,
  248. LexemeType.PoundVector2LeftParen,
  249. LexemeType.PoundQuatLeftParen,
  250. };
  251. return lts.Any(x => x == lt);
  252. }
  253. public bool Next()
  254. {
  255. if (endOfStrings)
  256. {
  257. sr.type = LexemeType.EndOfInput;
  258. sr.str = "";
  259. return false;
  260. }
  261. int newPos;
  262. sc.Scan(s, pos, out sr, out newPos);
  263. if (IsLParen(sr.type)) ++parenDepth;
  264. if (sr.type == LexemeType.RightParen)
  265. {
  266. --parenDepth;
  267. if (parenDepth < 0) parenDepth = 0;
  268. }
  269. if (newPos == pos)
  270. {
  271. NextString();
  272. return Next();
  273. }
  274. else
  275. {
  276. pos = newPos;
  277. return true;
  278. }
  279. }
  280. public ScanResult Current { get { return sr; } }
  281. public LexemeType CurrentType { get { return sr.type; } }
  282. public string CurrentString { get { return sr.str; } }
  283. }
  284. public class ParsingException : ApplicationException
  285. {
  286. public ParsingException(): base() { }
  287. public ParsingException(string message): base(message) { }
  288. public ParsingException(string message, Exception cause): base(message, cause) { }
  289. }
  290. public class SchemeDataReader
  291. {
  292. public SchemeDataReader(LexemeSource ls)
  293. {
  294. this.ls = ls;
  295. }
  296. private LexemeSource ls;
  297. private static bool IsImportant(LexemeType t)
  298. {
  299. switch(t)
  300. {
  301. case LexemeType.Whitespace: return false;
  302. case LexemeType.BeginComment: return false;
  303. case LexemeType.CommentChars: return false;
  304. case LexemeType.EndComment: return false;
  305. default: return true;
  306. }
  307. }
  308. private static bool IsStartOfSchemeItem(LexemeType t)
  309. {
  310. switch(t)
  311. {
  312. case LexemeType.LeftParen: return true;
  313. case LexemeType.PoundLeftParen: return true;
  314. case LexemeType.PoundSLeftParen: return true;
  315. case LexemeType.PoundMLeftParen: return true;
  316. case LexemeType.PoundSigLeftParen: return true;
  317. case LexemeType.PoundMsgLeftParen: return true;
  318. case LexemeType.PoundVector3LeftParen: return true;
  319. case LexemeType.PoundVertex3LeftParen: return true;
  320. case LexemeType.PoundVector2LeftParen: return true;
  321. case LexemeType.PoundQuatLeftParen: return true;
  322. case LexemeType.Quote: return true;
  323. case LexemeType.QuasiQuote: return true;
  324. case LexemeType.Unquote: return true;
  325. case LexemeType.UnquoteSplicing: return true;
  326. case LexemeType.BeginString: return true;
  327. case LexemeType.BeginSymbol: return true;
  328. case LexemeType.Symbol: return true;
  329. case LexemeType.Integer: return true;
  330. case LexemeType.Char: return true;
  331. case LexemeType.HexChar: return true;
  332. case LexemeType.BooleanTrue: return true;
  333. case LexemeType.BooleanFalse: return true;
  334. case LexemeType.Numerator: return true;
  335. case LexemeType.IPV4Address: return true;
  336. case LexemeType.IPV4EndPoint: return true;
  337. case LexemeType.IPV6Address: return true;
  338. case LexemeType.IPV6EndPoint: return true;
  339. case LexemeType.Guid: return true;
  340. default: return false;
  341. }
  342. }
  343. /*
  344. It is assumed that, at the start of each ReadSomething function, ls points
  345. to the FIRST lexeme of the Something.
  346. Therefore, for a list or vector, it points to the ( or #(.
  347. At the end of each ReadSomething function, ls points to the FIRST lexeme
  348. AFTER the Something. This could be End Of Input.
  349. */
  350. private void ReadUnimportant()
  351. {
  352. while (!IsImportant(ls.CurrentType))
  353. {
  354. ls.Next();
  355. }
  356. }
  357. private char ReadCharEscape()
  358. {
  359. System.Diagnostics.Debug.Assert(ls.CurrentType == LexemeType.CharEscape);
  360. string s = ls.CurrentString;
  361. char r;
  362. switch(s[1])
  363. {
  364. case '\\': r = '\\'; break;
  365. case '"': r = '"'; break;
  366. case '|': r = '|'; break;
  367. case 'a': r = '\a'; break;
  368. case 'b': r = '\b'; break;
  369. case 't': r = '\t'; break;
  370. case 'n': r = '\n'; break;
  371. case 'v': r = '\v'; break;
  372. case 'f': r = '\f'; break;
  373. case 'r': r = '\r'; break;
  374. case 'e': r = '\x1b'; break;
  375. default: System.Diagnostics.Debug.Assert(false); r = s[1]; break;
  376. }
  377. ls.Next();
  378. return r;
  379. }
  380. private static int NumericValue(char c)
  381. {
  382. if (c >= '0' && c <= '9') return (int)(c - '0');
  383. if (c >= 'A' && c <= 'Z') return (int)(c - 'A' + 10);
  384. if (c >= 'a' && c <= 'z') return (int)(c - 'a' + 10);
  385. System.Diagnostics.Debug.Assert(false);
  386. return 0;
  387. }
  388. private char ReadHexEscape()
  389. {
  390. System.Diagnostics.Debug.Assert(ls.CurrentType == LexemeType.HexEscape);
  391. string s = ls.CurrentString;
  392. int v = NumericValue(s[2]) * 16 + NumericValue(s[3]);
  393. ls.Next();
  394. return (char)v;
  395. }
  396. private char ReadUnicodeEscape()
  397. {
  398. System.Diagnostics.Debug.Assert(ls.CurrentType == LexemeType.UnicodeEscape);
  399. string s = ls.CurrentString;
  400. int v = NumericValue(s[2]) * 4096 + NumericValue(s[3]) * 256 + NumericValue(s[4]) * 16 + NumericValue(s[5]);
  401. ls.Next();
  402. return (char)v;
  403. }
  404. private char ReadOctEscape()
  405. {
  406. System.Diagnostics.Debug.Assert(ls.CurrentType == LexemeType.OctEscape);
  407. string s = ls.CurrentString;
  408. int v = NumericValue(s[1]) * 64 + NumericValue(s[2]) * 8 + NumericValue(s[3]);
  409. v &= 0xFF;
  410. ls.Next();
  411. return (char)v;
  412. }
  413. private double ReadDouble()
  414. {
  415. System.Diagnostics.Debug.Assert(ls.CurrentType == LexemeType.Double);
  416. double d = double.Parse(ls.CurrentString);
  417. ls.Next();
  418. return d;
  419. }
  420. private SchemeString ReadString()
  421. {
  422. System.Diagnostics.Debug.Assert(ls.CurrentType == LexemeType.BeginString);
  423. ls.Next(); // drop BeginString
  424. StringBuilder sb = new StringBuilder();
  425. bool building = true;
  426. while (building)
  427. {
  428. switch(ls.CurrentType)
  429. {
  430. case LexemeType.CharEscape:
  431. sb.Append(ReadCharEscape());
  432. break;
  433. case LexemeType.HexEscape:
  434. sb.Append(ReadHexEscape());
  435. break;
  436. case LexemeType.OctEscape:
  437. sb.Append(ReadOctEscape());
  438. break;
  439. case LexemeType.UnicodeEscape:
  440. sb.Append(ReadUnicodeEscape());
  441. break;
  442. case LexemeType.StrChars:
  443. sb.Append(ls.CurrentString);
  444. ls.Next();
  445. break;
  446. case LexemeType.EndString:
  447. building = false;
  448. break;
  449. default:
  450. throw new ParsingException("ReadString: Error parsing string");
  451. }
  452. }
  453. ls.Next(); // drop EndString
  454. return new SchemeString(sb.ToString());
  455. }
  456. private Symbol ReadEscapedSymbol()
  457. {
  458. System.Diagnostics.Debug.Assert(ls.CurrentType == LexemeType.BeginSymbol);
  459. ls.Next(); // drop BeginSymbol
  460. StringBuilder sb = new StringBuilder();
  461. bool building = true;
  462. while (building)
  463. {
  464. switch(ls.CurrentType)
  465. {
  466. case LexemeType.CharEscape:
  467. sb.Append(ReadCharEscape());
  468. break;
  469. case LexemeType.HexEscape:
  470. sb.Append(ReadHexEscape());
  471. break;
  472. case LexemeType.OctEscape:
  473. sb.Append(ReadOctEscape());
  474. break;
  475. case LexemeType.UnicodeEscape:
  476. sb.Append(ReadUnicodeEscape());
  477. break;
  478. case LexemeType.StrChars:
  479. sb.Append(ls.CurrentString);
  480. ls.Next();
  481. break;
  482. case LexemeType.EndSymbol:
  483. building = false;
  484. break;
  485. default:
  486. throw new ParsingException("ReadEscapedSymbol: Error parsing escaped symbol");
  487. }
  488. }
  489. ls.Next(); // drop EndString
  490. Symbol s = new Symbol(sb.ToString());
  491. return s;
  492. }
  493. private BigInteger ReadInteger()
  494. {
  495. System.Diagnostics.Debug.Assert(ls.CurrentType == LexemeType.Integer);
  496. BigInteger b = BigInteger.Parse(ls.CurrentString, 10u);
  497. ls.Next();
  498. return b;
  499. }
  500. private static Regex hexIntegerRegex = new Regex(@"\G((?:\+|-|~)?)#x([0-9A-Fa-f]+)", RegexOptions.Compiled);
  501. private BigInteger ReadHexInteger()
  502. {
  503. System.Diagnostics.Debug.Assert(ls.CurrentType == LexemeType.HexInteger);
  504. Match m = hexIntegerRegex.Match(ls.CurrentString);
  505. string sign = m.Groups[1].Value;
  506. string digits = m.Groups[2].Value;
  507. BigInteger b;
  508. if (sign == "-")
  509. {
  510. b = -BigInteger.Parse(digits, 16u);
  511. }
  512. else if (sign == "~")
  513. {
  514. b = ~BigInteger.Parse(digits, 16u);
  515. }
  516. else
  517. {
  518. System.Diagnostics.Debug.Assert(sign == "+" || sign == "");
  519. b = BigInteger.Parse(digits, 16u);
  520. }
  521. ls.Next();
  522. return b;
  523. }
  524. private object ReadRational()
  525. {
  526. System.Diagnostics.Debug.Assert(ls.CurrentType == LexemeType.Numerator);
  527. BigInteger n = BigInteger.Parse(ls.CurrentString, 10u);
  528. ls.Next();
  529. if (ls.CurrentType != LexemeType.FractionBar)
  530. throw new ParsingException("Fraction Bar Expected after Numerator");
  531. ls.Next();
  532. if (ls.CurrentType != LexemeType.Denominator)
  533. throw new ParsingException("Denominator Expected after Fraction Bar");
  534. BigInteger d = BigInteger.Parse(ls.CurrentString, 10u);
  535. ls.Next();
  536. if (d == BigInteger.One) return n;
  537. return new BigRational(n, d);
  538. }
  539. private char ReadCharacter()
  540. {
  541. System.Diagnostics.Debug.Assert(ls.CurrentType == LexemeType.Char);
  542. string z = ls.CurrentString;
  543. z = z.Substring(2, z.Length-2);
  544. char ch;
  545. if (z.Length == 1) ch = z[0];
  546. else if (string.Compare(z, "nul", true) == 0) ch = (char)0;
  547. else if (string.Compare(z, "bel", true) == 0) ch = '\a';
  548. else if (string.Compare(z, "backspace", true) == 0) ch = '\b';
  549. else if (string.Compare(z, "tab", true) == 0) ch = '\t';
  550. else if (string.Compare(z, "newline", true) == 0) ch = '\n';
  551. else if (string.Compare(z, "vt", true) == 0) ch = '\v';
  552. else if (string.Compare(z, "page", true) == 0) ch = '\f';
  553. else if (string.Compare(z, "return", true) == 0) ch = '\r';
  554. else if (string.Compare(z, "space", true) == 0) ch = ' ';
  555. else throw new ParsingException("Unknown character "+z);
  556. ls.Next();
  557. return ch;
  558. }
  559. private char ReadHexChar()
  560. {
  561. System.Diagnostics.Debug.Assert(ls.CurrentType == LexemeType.HexChar);
  562. string z = ls.CurrentString;
  563. int v = NumericValue(z[3]) * 4096 + NumericValue(z[4]) * 256 + NumericValue(z[5]) * 16 + NumericValue(z[6]);
  564. ls.Next();
  565. return (char)v;
  566. }
  567. private static Regex guidRegex = new Regex(@"\G#g\{([0-9A-Fa-f]{8}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{12})\}", RegexOptions.Compiled);
  568. private Guid ReadGuid()
  569. {
  570. System.Diagnostics.Debug.Assert(ls.CurrentType == LexemeType.Guid);
  571. string z = ls.CurrentString;
  572. Match m = guidRegex.Match(z);
  573. System.Diagnostics.Debug.Assert(m.Success);
  574. ls.Next();
  575. return new Guid(m.Groups[1].Value);
  576. }
  577. private object ReadIPV4Address()
  578. {
  579. System.Diagnostics.Debug.Assert(ls.CurrentType == LexemeType.IPV4Address);
  580. Regex r = new Regex("#ipv4\\[(.*)\\]", RegexOptions.None);
  581. Match m = r.Match(ls.CurrentString);
  582. System.Diagnostics.Debug.Assert(m.Success);
  583. try
  584. {
  585. System.Net.IPAddress ipAddr = System.Net.IPAddress.Parse(m.Groups[1].Value);
  586. return ipAddr;
  587. }
  588. finally
  589. {
  590. ls.Next();
  591. }
  592. }
  593. private object ReadIPV4EndPoint()
  594. {
  595. System.Diagnostics.Debug.Assert(ls.CurrentType == LexemeType.IPV4EndPoint);
  596. Regex r = new Regex("#ipv4\\[(.*)\\]:([0-9]+)", RegexOptions.None);
  597. Match m = r.Match(ls.CurrentString);
  598. System.Diagnostics.Debug.Assert(m.Success);
  599. try
  600. {
  601. System.Net.IPAddress ipAddr = System.Net.IPAddress.Parse(m.Groups[1].Value);
  602. int port = int.Parse(m.Groups[2].Value);
  603. return new System.Net.IPEndPoint(ipAddr, port);
  604. }
  605. finally
  606. {
  607. ls.Next();
  608. }
  609. }
  610. private object ReadIPV6Address()
  611. {
  612. System.Diagnostics.Debug.Assert(ls.CurrentType == LexemeType.IPV6Address);
  613. Regex r = new Regex("#ipv6\\[(.*)\\]", RegexOptions.None);
  614. Match m = r.Match(ls.CurrentString);
  615. System.Diagnostics.Debug.Assert(m.Success);
  616. try
  617. {
  618. System.Net.IPAddress ipAddr = System.Net.IPAddress.Parse(m.Groups[1].Value);
  619. return ipAddr;
  620. }
  621. finally
  622. {
  623. ls.Next();
  624. }
  625. }
  626. private object ReadIPV6EndPoint()
  627. {
  628. System.Diagnostics.Debug.Assert(ls.CurrentType == LexemeType.IPV6EndPoint);
  629. Regex r = new Regex("#ipv6\\[(.*)\\]:([0-9]+)", RegexOptions.None);
  630. Match m = r.Match(ls.CurrentString);
  631. System.Diagnostics.Debug.Assert(m.Success);
  632. try
  633. {
  634. System.Net.IPAddress ipAddr = System.Net.IPAddress.Parse(m.Groups[1].Value);
  635. int port = int.Parse(m.Groups[2].Value);
  636. return new System.Net.IPEndPoint(ipAddr, port);
  637. }
  638. finally
  639. {
  640. ls.Next();
  641. }
  642. }
  643. private ExprObjModel.ObjectSystem.Signature ReadSignature()
  644. {
  645. System.Diagnostics.Debug.Assert(ls.CurrentType == LexemeType.PoundSigLeftParen);
  646. ls.Next();
  647. ReadUnimportant();
  648. Symbol type = null;
  649. if (ls.CurrentType == LexemeType.Symbol)
  650. {
  651. type = ReadSymbol();
  652. }
  653. else if (ls.CurrentType == LexemeType.BeginSymbol)
  654. {
  655. type = ReadEscapedSymbol();
  656. }
  657. else
  658. {
  659. throw new ParsingException("ReadSignature: Error reading type, expected symbol, got " + ls.CurrentType);
  660. }
  661. ReadUnimportant();
  662. if (ls.CurrentType == LexemeType.RightParen)
  663. {
  664. ls.Next();
  665. return new ExprObjModel.ObjectSystem.Signature(type, Enumerable.Empty<Symbol>());
  666. }
  667. else if (ls.CurrentType != LexemeType.Dot)
  668. {
  669. throw new ParsingException("ReadSignature: Expected Dot or RightParen, got " + ls.CurrentType);
  670. }
  671. ls.Next();
  672. ReadUnimportant();
  673. List<Symbol> ps = new List<Symbol>();
  674. while (true)
  675. {
  676. if (ls.CurrentType == LexemeType.Symbol)
  677. {
  678. Symbol p = ReadSymbol();
  679. ps.Add(p);
  680. }
  681. else if (ls.CurrentType == LexemeType.BeginSymbol)
  682. {
  683. Symbol p = ReadEscapedSymbol();
  684. ps.Add(p);
  685. }
  686. else if (ls.CurrentType == LexemeType.RightParen)
  687. {
  688. ls.Next();
  689. break;
  690. }
  691. else
  692. {
  693. throw new ParsingException("ReadSignature: Error reading type, expected Symbol or RightParen, got " + ls.CurrentType);
  694. }
  695. ReadUnimportant();
  696. }
  697. if (ps.Count == 0) throw new ParsingException("ReadSignature: Dot without parameters");
  698. return new ExprObjModel.ObjectSystem.Signature(type, ps);
  699. }
  700. private ExprObjModel.ObjectSystem.Message<object> ReadMessage()
  701. {
  702. System.Diagnostics.Debug.Assert(ls.CurrentType == LexemeType.PoundMsgLeftParen);
  703. ls.Next();
  704. ReadUnimportant();
  705. Symbol type = null;
  706. if (ls.CurrentType == LexemeType.Symbol)
  707. {
  708. type = ReadSymbol();
  709. }
  710. else if (ls.CurrentType == LexemeType.BeginSymbol)
  711. {
  712. type = ReadEscapedSymbol();
  713. }
  714. else
  715. {
  716. throw new ParsingException("ReadMessage : Error reading type, expected symbol, got " + ls.CurrentType);
  717. }
  718. ReadUnimportant();
  719. if (ls.CurrentType == LexemeType.RightParen)
  720. {
  721. ls.Next();
  722. return new ExprObjModel.ObjectSystem.Message<object>(type, Enumerable.Empty<Tuple<Symbol, object>>());
  723. }
  724. else if (ls.CurrentType != LexemeType.Dot)
  725. {
  726. throw new ParsingException("ReadMessage: Expected Dot or RightParen, got " + ls.CurrentType);
  727. }
  728. ls.Next();
  729. ReadUnimportant();
  730. List<Tuple<Symbol, object>> args = new List<Tuple<Symbol, object>>();
  731. while (true)
  732. {
  733. Symbol p = null;
  734. if (ls.CurrentType == LexemeType.Symbol)
  735. {
  736. p = ReadSymbol();
  737. }
  738. else if (ls.CurrentType == LexemeType.BeginSymbol)
  739. {
  740. p = ReadEscapedSymbol();
  741. }
  742. else if (ls.CurrentType == LexemeType.RightParen)
  743. {
  744. ls.Next();
  745. break;
  746. }
  747. else
  748. {
  749. throw new ParsingException("ReadMessage: Error reading key, expected symbol, got " + ls.CurrentType);
  750. }
  751. ReadUnimportant();
  752. if (ls.CurrentType == LexemeType.RightParen)
  753. {
  754. throw new ParsingException("ReadMessage: Error reading value, got RightParen");
  755. }
  756. object val = ReadItem();
  757. ReadUnimportant();
  758. args.Add(new Tuple<Symbol, object>(p, val));
  759. }
  760. if (args.Count == 0) throw new ParsingException("ReadMessage: Dot without arguments");
  761. return new ExprObjModel.ObjectSystem.Message<object>(type, args);
  762. }
  763. private object ReadList()
  764. {
  765. System.Diagnostics.Debug.Assert(ls.CurrentType == LexemeType.LeftParen);
  766. ConsCell beginList = new ConsCell("dummy", SpecialValue.EMPTY_LIST);
  767. ConsCell endList = beginList;
  768. ls.Next(); // drop LeftParen
  769. while(true)
  770. {
  771. ReadUnimportant();
  772. if (ls.CurrentType == LexemeType.EndOfInput)
  773. {
  774. throw new ParsingException("ReadList: Unexpected end of input");
  775. }
  776. else if (ls.CurrentType == LexemeType.RightParen)
  777. {
  778. ls.Next(); // drop RightParen
  779. return beginList.cdr;
  780. }
  781. else if (ls.CurrentType == LexemeType.Dot)
  782. {
  783. ls.Next(); // drop Dot
  784. endList.cdr = ReadItem();
  785. ReadUnimportant();
  786. if (ls.CurrentType != LexemeType.RightParen)
  787. {
  788. throw new ParsingException("ReadList: Improperly dotted list");
  789. }
  790. ls.Next(); // drop RightParen
  791. return beginList.cdr;
  792. }
  793. else
  794. {
  795. ConsCell k = new ConsCell();
  796. k.car = ReadItem();
  797. k.cdr = SpecialValue.EMPTY_LIST;
  798. endList.cdr = k;
  799. endList = k;
  800. }
  801. }
  802. }
  803. private object ReadVector()
  804. {
  805. System.Diagnostics.Debug.Assert(ls.CurrentType == LexemeType.PoundLeftParen);
  806. Deque<object> d = new Deque<object>();
  807. ls.Next(); // drop PoundLeftParen
  808. while(true)
  809. {
  810. ReadUnimportant();
  811. if (ls.CurrentType == LexemeType.EndOfInput)
  812. {
  813. throw new ParsingException("ReadVector: Unexpected end of input");
  814. }
  815. else if (ls.CurrentType == LexemeType.RightParen)
  816. {
  817. ls.Next(); // drop RightParen
  818. return d;
  819. }
  820. else
  821. {
  822. ReadUnimportant();
  823. d.PushBack(ReadItem());
  824. }
  825. }
  826. }
  827. private object ReadHashSet()
  828. {
  829. System.Diagnostics.Debug.Assert(ls.CurrentType == LexemeType.PoundSLeftParen);
  830. ls.Next();
  831. SchemeHashSet hs = new SchemeHashSet();
  832. while (true)
  833. {
  834. ReadUnimportant();
  835. if (ls.CurrentType == LexemeType.EndOfInput)
  836. {
  837. throw new ParsingException("ReadHashSet: Unexpected end of input");
  838. }
  839. else if (ls.CurrentType == LexemeType.RightParen)
  840. {
  841. ls.Next();
  842. return hs;
  843. }
  844. else
  845. {
  846. object r = ReadItem();
  847. if (!(Procedures.ProxyDiscovery.IsHashable(r))) throw new ParsingException("ReadHashSet: Un-hashable item in set");
  848. hs.Add(r);
  849. }
  850. }
  851. }
  852. private object ReadHashMap()
  853. {
  854. System.Diagnostics.Debug.Assert(ls.CurrentType == LexemeType.PoundMLeftParen);
  855. ls.Next();
  856. SchemeHashMap hm = new SchemeHashMap();
  857. while (true)
  858. {
  859. ReadUnimportant();
  860. if (ls.CurrentType == LexemeType.EndOfInput)
  861. {
  862. throw new ParsingException("ReadHashMap: Unexpected end of input");
  863. }
  864. else if (ls.CurrentType == LexemeType.RightParen)
  865. {
  866. ls.Next();
  867. return hm;
  868. }
  869. else
  870. {
  871. object r = ReadItem();
  872. if (!(r is ConsCell)) throw new ParsingException("ReadHashMap: items must be pairs");
  873. ConsCell ccr = (ConsCell)r;
  874. if (!(Procedures.ProxyDiscovery.IsHashable(ccr.car))) throw new ParsingException("ReadHashMap: Un-hashable key in map");
  875. hm[ccr.car] = ccr.cdr;
  876. }
  877. }
  878. }
  879. private BigRational ReadVec3Part()
  880. {
  881. ReadUnimportant();
  882. if (ls.CurrentType == LexemeType.Integer)
  883. {
  884. return new BigRational((BigInteger)ReadInteger(), BigInteger.One);
  885. }
  886. else if (ls.CurrentType == LexemeType.HexInteger)
  887. {
  888. return new BigRational((BigInteger)ReadHexInteger(), BigInteger.One);
  889. }
  890. else if (ls.CurrentType == LexemeType.Numerator)
  891. {
  892. return (BigRational)ReadRational();
  893. }
  894. else
  895. {
  896. throw new ParsingException("Vec3 component must be integer or rational");
  897. }
  898. }
  899. private object ReadVector2()
  900. {
  901. System.Diagnostics.Debug.Assert(ls.CurrentType == LexemeType.PoundVector2LeftParen);
  902. ls.Next();
  903. BigRational x = ReadVec3Part();
  904. BigRational y = ReadVec3Part();
  905. ReadUnimportant();
  906. if (ls.CurrentType == LexemeType.RightParen)
  907. {
  908. ls.Next();
  909. return new Vector2(x, y);
  910. }
  911. else
  912. {
  913. throw new ParsingException("Right parenthesis expected");
  914. }
  915. }
  916. private object ReadVertex2()
  917. {
  918. System.Diagnostics.Debug.Assert(ls.CurrentType == LexemeType.PoundVertex2LeftParen);
  919. ls.Next();
  920. BigRational x = ReadVec3Part();
  921. BigRational y = ReadVec3Part();
  922. ReadUnimportant();
  923. if (ls.CurrentType == LexemeType.RightParen)
  924. {
  925. ls.Next();
  926. return new Vertex2(x, y);
  927. }
  928. else
  929. {
  930. throw new ParsingException("Right parenthesis expected");
  931. }
  932. }
  933. private object ReadVector3()
  934. {
  935. System.Diagnostics.Debug.Assert(ls.CurrentType == LexemeType.PoundVector3LeftParen);
  936. ls.Next();
  937. BigRational x = ReadVec3Part();
  938. BigRational y = ReadVec3Part();
  939. BigRational z = ReadVec3Part();
  940. ReadUnimportant();
  941. if (ls.CurrentType == LexemeType.RightParen)
  942. {
  943. ls.Next();
  944. return new Vector3(x, y, z);
  945. }
  946. else
  947. {
  948. throw new ParsingException("Right parenthesis expected");
  949. }
  950. }
  951. private object ReadVertex3()
  952. {
  953. System.Diagnostics.Debug.Assert(ls.CurrentType == LexemeType.PoundVertex3LeftParen);
  954. ls.Next();
  955. BigRational x = ReadVec3Part();
  956. BigRational y = ReadVec3Part();
  957. BigRational z = ReadVec3Part();
  958. ReadUnimportant();
  959. if (ls.CurrentType == LexemeType.RightParen)
  960. {
  961. ls.Next();
  962. return new Vertex3(x, y, z);
  963. }
  964. else
  965. {
  966. throw new ParsingException("Right parenthesis expected");
  967. }
  968. }
  969. private object ReadQuaternion()
  970. {
  971. System.Diagnostics.Debug.Assert(ls.CurrentType == LexemeType.PoundQuatLeftParen);
  972. ls.Next();
  973. BigRational w = ReadVec3Part();
  974. BigRational x = ReadVec3Part();
  975. BigRational y = ReadVec3Part();
  976. BigRational z = ReadVec3Part();
  977. ReadUnimportant();
  978. if (ls.CurrentType == LexemeType.RightParen)
  979. {
  980. ls.Next();
  981. return new Quaternion(w, x, y, z);
  982. }
  983. else
  984. {
  985. throw new ParsingException("Right parenthesis expected");
  986. }
  987. }
  988. private object Quoted(string quote, object obj)
  989. {
  990. Symbol s = new Symbol(quote);
  991. ConsCell c2 = new ConsCell(obj, SpecialValue.EMPTY_LIST);
  992. ConsCell c1 = new ConsCell(s, c2);
  993. return c1;
  994. }
  995. private object ReadQuoted(string quote)
  996. {
  997. // ls.CurrentType could be Quote, QuasiQuote, Unquote, or UnquoteSplicing
  998. ls.Next(); // drop it
  999. ReadUnimportant();
  1000. if (ls.CurrentType == LexemeType.EndOfInput)
  1001. throw new ParsingException("ReadQuoted: Unexpected end of input");
  1002. return Quoted(quote, ReadItem());
  1003. }
  1004. private Symbol ReadSymbol()
  1005. {
  1006. System.Diagnostics.Debug.Assert(ls.CurrentType == LexemeType.Symbol);
  1007. Symbol s = new Symbol(ls.CurrentString); ls.Next(); return s;
  1008. }
  1009. public object ReadItem()
  1010. {
  1011. ReadUnimportant();
  1012. switch(ls.CurrentType)
  1013. {
  1014. case LexemeType.LeftParen: return ReadList();
  1015. case LexemeType.PoundLeftParen: return ReadVector();
  1016. case LexemeType.PoundSLeftParen: return ReadHashSet();
  1017. case LexemeType.PoundMLeftParen: return ReadHashMap();
  1018. case LexemeType.PoundSigLeftParen: return ReadSignature();
  1019. case LexemeType.PoundMsgLeftParen: return ReadMessage();
  1020. case LexemeType.PoundVector3LeftParen: return ReadVector3();
  1021. case LexemeType.PoundVertex3LeftParen: return ReadVertex3();
  1022. case LexemeType.PoundVector2LeftParen: return ReadVector2();
  1023. case LexemeType.PoundVertex2LeftParen: return ReadVertex2();
  1024. case LexemeType.PoundQuatLeftParen: return ReadQuaternion();
  1025. case LexemeType.Quote: return ReadQuoted("quote");
  1026. case LexemeType.Unquote: return ReadQuoted("unquote");
  1027. case LexemeType.QuasiQuote: return ReadQuoted("quasiquote");
  1028. case LexemeType.UnquoteSplicing: return ReadQuoted("unquote-splicing");
  1029. case LexemeType.BeginString: return ReadString();
  1030. case LexemeType.BeginSymbol: return ReadEscapedSymbol();
  1031. case LexemeType.Symbol: return ReadSymbol();
  1032. case LexemeType.Integer: return ReadInteger();
  1033. case LexemeType.HexInteger: return ReadHexInteger();
  1034. case LexemeType.Double: return ReadDouble();
  1035. case LexemeType.Char: return ReadCharacter();
  1036. case LexemeType.HexChar: return ReadHexChar();
  1037. case LexemeType.BooleanTrue: ls.Next(); return true;
  1038. case LexemeType.BooleanFalse: ls.Next(); return false;
  1039. case LexemeType.Guid: return ReadGuid();
  1040. case LexemeType.Numerator: return ReadRational();
  1041. case LexemeType.IPV4Address: return ReadIPV4Address();
  1042. case LexemeType.IPV4EndPoint: return ReadIPV4EndPoint();
  1043. case LexemeType.IPV6Address: return ReadIPV6Address();
  1044. case LexemeType.IPV6EndPoint: return ReadIPV6EndPoint();
  1045. case LexemeType.EndOfInput: return null;
  1046. default: LexemeType erroneousType = ls.CurrentType; ls.Next(); throw new ParsingException("ReadItem: Unexpected " + erroneousType.ToString());
  1047. }
  1048. }
  1049. public static object ReadItem(string str)
  1050. {
  1051. LexemeSource ls = new LexemeSource(new SingleString(str));
  1052. SchemeDataReader sdr = new SchemeDataReader(ls);
  1053. object o = sdr.ReadItem();
  1054. return o;
  1055. }
  1056. }
  1057. }