PageRenderTime 46ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 1ms

/lisp/Reader.cs

http://github.com/toshok/shelisp
C# | 586 lines | 544 code | 34 blank | 8 comment | 47 complexity | a751732b55108d26eba1234a47b67382 MD5 | raw file
Possible License(s): GPL-3.0
  1. using System;
  2. using System.Collections;
  3. using System.Collections.Generic;
  4. using System.IO;
  5. using System.Text;
  6. namespace Shelisp {
  7. public static class Reader {
  8. class StringPositionReader : StreamReader {
  9. public StringPositionReader (string str)
  10. : base (str)
  11. {
  12. Position = 0;
  13. }
  14. public override int Read ()
  15. {
  16. Position++;
  17. return base.Read ();
  18. }
  19. public override int Read (char [] buffer, int index, int count)
  20. {
  21. int read = base.Read (buffer, index, count);
  22. Position += read;
  23. return read;
  24. }
  25. public override int ReadBlock (char [] buffer, int index, int count)
  26. {
  27. int read = base.ReadBlock (buffer, index, count);
  28. Position += read;
  29. return read;
  30. }
  31. public override string ReadLine ()
  32. {
  33. string line = base.ReadLine ();
  34. Position += line.Length; /* +1 for \n? */
  35. return line;
  36. }
  37. public override string ReadToEnd ()
  38. {
  39. string rest = base.ReadToEnd ();
  40. Position += rest.Length;
  41. return rest;
  42. }
  43. public int Position { get; private set; }
  44. }
  45. public static Shelisp.Object ReadFromString (string s)
  46. {
  47. int unused;
  48. return ReadFromString (s, out unused);
  49. }
  50. public static Shelisp.Object ReadFromString (string s, out int end_position)
  51. {
  52. var ms = new MemoryStream (Encoding.UTF8.GetBytes (s));
  53. var sr = new StreamReader (ms);
  54. var obj = Read (sr);
  55. end_position = (int)ms.Position; // this is totally wrong. it's a byte position, not a character one
  56. return obj;
  57. }
  58. public static Shelisp.Object Read (StreamReader s, char valid_end = (char)0)
  59. {
  60. char ch;
  61. bool escaped = false;
  62. StringBuilder sb = new StringBuilder ();
  63. start:
  64. if (s.EndOfStream) {
  65. if (escaped)
  66. throw new Exception ("unexpected end of file");
  67. else if (sb.Length > 0)
  68. return ReadSymbolLikeThing (sb.ToString());
  69. else {
  70. return null;
  71. //throw new Exception ("end of stream while reading");
  72. }
  73. }
  74. if (escaped) {
  75. // XXX unicode? control characters? what else?
  76. sb.Append ((char)s.Read());
  77. escaped = false;
  78. goto start;
  79. }
  80. else {
  81. ch = (char)s.Peek();
  82. if (Char.IsWhiteSpace (ch)) {
  83. if (sb.Length > 0) {
  84. return ReadSymbolLikeThing (sb.ToString());
  85. }
  86. else {
  87. do {
  88. s.Read();
  89. ch = (char)s.Peek();
  90. } while (Char.IsWhiteSpace (ch));
  91. goto start;
  92. }
  93. }
  94. if (ch == '?') {
  95. s.Read();
  96. return ReadCharacterLiteral (s);
  97. }
  98. else if (ch == '\\') {
  99. s.Read();
  100. escaped = true;
  101. goto start;
  102. }
  103. if (ch == '"') {
  104. return ReadString (s);
  105. }
  106. else if (ch == '(') {
  107. if (sb.Length > 0) {
  108. return ReadSymbolLikeThing (sb.ToString());
  109. }
  110. else {
  111. return ReadList (s);
  112. }
  113. }
  114. else if (ch == '[') {
  115. if (sb.Length > 0) {
  116. return ReadSymbolLikeThing (sb.ToString());
  117. }
  118. else {
  119. return ReadVector (s);
  120. }
  121. }
  122. else if (ch == valid_end) {
  123. if (sb.Length > 0) {
  124. // we return the item first. our caller will call us again at which point we'll end up in the else branch here.
  125. return ReadSymbolLikeThing (sb.ToString());
  126. }
  127. else {
  128. s.Read(); // consume the end character
  129. return null; // a special sentinel, check ReadList/ReadVector
  130. }
  131. }
  132. else if (ch == ';') {
  133. // comment, ignore the rest of the line and continue reading
  134. if (sb.Length > 0) {
  135. return ReadSymbolLikeThing (sb.ToString());
  136. }
  137. else {
  138. s.ReadLine();
  139. goto start;
  140. }
  141. }
  142. else if (ch == '\'') {
  143. if (sb.Length > 0) {
  144. return ReadSymbolLikeThing (sb.ToString());
  145. }
  146. else {
  147. s.Read(); // consume the quote and recurse
  148. return new List (L.intern ("quote"), new List (Read(s, valid_end), L.Qnil));
  149. }
  150. }
  151. else if (ch == '`') {
  152. if (sb.Length > 0) {
  153. return ReadSymbolLikeThing (sb.ToString());
  154. }
  155. else {
  156. s.Read(); // consume the backquote and recurse
  157. return new List (L.intern ("`"), new List (Read(s, valid_end), L.Qnil));
  158. }
  159. }
  160. else if (ch == '#') {
  161. if (sb.Length > 0) {
  162. return ReadSymbolLikeThing (sb.ToString());
  163. }
  164. else {
  165. s.Read(); // consume the #
  166. ch = (char)s.Peek();
  167. switch (ch) {
  168. case 'x': // unicode codepoint
  169. s.Read(); // consume the x
  170. return new Number (ReadHexNumber (s));
  171. case 'o': // octal constant
  172. s.Read(); // consume the o
  173. return new Number (ReadOctalNumber (s));
  174. case 'b': // binary constant
  175. s.Read(); // consume the 'b'
  176. return new Number (ReadBinaryNumber (s));
  177. case '0': case '1': case '2': case '3': case '4': case '5':
  178. case '6': case '7': case '8': case '9':
  179. return new Number (ReadRadixNumber (s));
  180. case '\'': // anonymous functions
  181. s.Read(); // consume the '
  182. return new List (L.intern ("function"), new List (Read(s, valid_end), L.Qnil));
  183. default:
  184. throw new LispInvalidReadSyntaxException (string.Format ("#{0}", ch));
  185. }
  186. }
  187. }
  188. else if (ch == ',') {
  189. if (sb.Length > 0) {
  190. return ReadSymbolLikeThing (sb.ToString());
  191. }
  192. else {
  193. s.Read(); // consume the comma
  194. if ((char)s.Peek() == '@') {
  195. s.Read(); // consume the @ and recurse
  196. return new List (L.intern (",@"), new List (Read(s, valid_end), L.Qnil));
  197. }
  198. else {
  199. return new List (L.intern (","), new List (Read(s, valid_end), L.Qnil));
  200. }
  201. }
  202. }
  203. else {
  204. sb.Append ((char)s.Read());
  205. goto start;
  206. }
  207. }
  208. }
  209. private static Shelisp.Object ReadSymbolLikeThing (string contents)
  210. {
  211. int i;
  212. if (Int32.TryParse (contents, out i))
  213. return new Shelisp.Number (i);
  214. float f;
  215. if (Single.TryParse (contents, out f))
  216. return new Shelisp.Number (f);
  217. return L.intern (contents);
  218. }
  219. static int ReturnCharOrThrow (TextReader s, int value)
  220. {
  221. // check if the next character in our reader is valid ending punctuation (whitespace or ')'.. anything else?)
  222. char ch = (char)s.Peek();
  223. if (Char.IsWhiteSpace(ch) || ch == ')' || ch == ']')
  224. return value;
  225. throw new LispInvalidReadSyntaxException ("?");
  226. }
  227. static int ReadHexNumber (TextReader s)
  228. {
  229. string hex = "0123456789ABCDEF";
  230. int value = 0;
  231. int hex_digit;
  232. while ((hex_digit = hex.IndexOf (Char.ToUpper ((char)s.Peek()))) != -1) {
  233. s.Read();
  234. value = (value << 4) + hex_digit;
  235. }
  236. return value;
  237. }
  238. static int ReadOctalNumber (TextReader s)
  239. {
  240. int value = 0;
  241. while (true) {
  242. char ch = (char)s.Peek();
  243. if (ch >= '0' && ch <= '7') {
  244. value = (value * 8) + (ch - '0');
  245. s.Read();
  246. }
  247. else
  248. break;
  249. }
  250. return value;
  251. }
  252. static int ReadBinaryNumber (TextReader s)
  253. {
  254. int value = 0;
  255. int hex_digit;
  256. char ch;
  257. while (true) {
  258. ch = (char)s.Peek();
  259. if (ch != '0' && ch != '1')
  260. break;
  261. s.Read();
  262. value = (value << 1) + ch - '0';
  263. }
  264. return value;
  265. }
  266. static int ReadRadixNumber (TextReader s)
  267. {
  268. throw new NotImplementedException ();
  269. }
  270. static int ReadCharacterLiteralAsNumber (TextReader s)
  271. {
  272. char ch = (char)s.Peek();
  273. if (ch == '\\') {
  274. // escape sequence
  275. s.Read(); // read the slash
  276. ch = (char)s.Read(); // unconditionally read the next character
  277. switch (ch) {
  278. case 'a':
  279. case 'A': {
  280. if (s.Peek() == '-') {
  281. // consume the A- prefix
  282. ch = (char)s.Read();
  283. if (ch != '-')
  284. throw new Exception ("invalid escape sequence");
  285. // read the next part of the character
  286. ch = (char)ReadCharacterLiteralAsNumber(s);
  287. return ch | 0x400000;
  288. }
  289. else {
  290. // ?\a ⇒ 7 ; control-g, C-g
  291. return 7;
  292. }
  293. }
  294. case 'b': // ?\b ⇒ 8 ; backspace, <BS>, C-h
  295. return 8;
  296. case 't': // ?\t ⇒ 9 ; tab, <TAB>, C-i
  297. return 9;
  298. case 'n': // ?\n ⇒ 10 ; newline, C-j
  299. return 10;
  300. case 'v': // ?\v ⇒ 11 ; vertical tab, C-k
  301. return 11;
  302. case 'f': // ?\f ⇒ 12 ; formfeed character, C-l
  303. return 12;
  304. case 'r': // ?\r ⇒ 13 ; carriage return, <RET>, C-m
  305. return 13;
  306. case 'e': // ?\e ⇒ 27 ; escape character, <ESC>, C-[
  307. return 27;
  308. case 's':
  309. case 'S': {
  310. if (s.Peek() == '-') {
  311. // consume the S- prefix
  312. ch = (char)s.Read();
  313. if (ch != '-')
  314. throw new Exception ("invalid escape sequence");
  315. // read the next part of the character
  316. ch = (char)ReadCharacterLiteralAsNumber(s);
  317. return ch | 0x2000000;
  318. }
  319. else {
  320. // ?\s ⇒ 32 ; space character, <SPC>
  321. return 32;
  322. }
  323. }
  324. case 'd': // ?\d ⇒ 127 ; delete character, <DEL>
  325. return 127;
  326. case 'x': {
  327. return ReadHexNumber (s);
  328. }
  329. case 'U': {
  330. int value = ReadHexNumber (s);
  331. if (value > 0x10ffff)
  332. throw new Exception ("unicode codepoint out of acceptable range");
  333. return value;
  334. }
  335. case '^': {
  336. ch = (char)s.Read();
  337. return Char.ToUpper(ch)-'A';
  338. }
  339. case 'c':
  340. case 'C': {
  341. // consume the C- prefix
  342. ch = (char)s.Read();
  343. if (ch != '-')
  344. throw new Exception ("invalid escape sequence");
  345. // read the next part of the character
  346. ch = (char)ReadCharacterLiteralAsNumber(s);
  347. // and turn it into a control character
  348. // if it's an ascii letter, return ToUpper(ch)-'A'
  349. // if it's anything else, | it with 0x40000000
  350. if (ch >= 'a' && ch <= 'z')
  351. return ch - 'a';
  352. else if (ch >= 'A' && ch <= 'Z')
  353. return ch - 'A';
  354. else
  355. return ch | 0x40000000;
  356. }
  357. case 'm':
  358. case 'M': {
  359. // consume the M- prefix
  360. ch = (char)s.Read();
  361. if (ch != '-')
  362. throw new Exception ("invalid escape sequence");
  363. // read the next part of the character
  364. ch = (char)ReadCharacterLiteralAsNumber(s);
  365. return ch | 0x8000000;
  366. }
  367. case 'h':
  368. case 'H': {
  369. // consume the H- prefix
  370. ch = (char)s.Read();
  371. if (ch != '-')
  372. throw new Exception ("invalid escape sequence");
  373. // read the next part of the character
  374. ch = (char)ReadCharacterLiteralAsNumber(s);
  375. return ch | 0x1000000;
  376. }
  377. case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
  378. return ReadOctalNumber (s);
  379. }
  380. default:
  381. return ReturnCharOrThrow (s, ch);
  382. }
  383. }
  384. else {
  385. s.Read(); // read the value
  386. return ReturnCharOrThrow (s, ch);
  387. }
  388. }
  389. private static Shelisp.Number ReadCharacterLiteral (TextReader s)
  390. {
  391. return new Shelisp.Number (ReturnCharOrThrow (s, ReadCharacterLiteralAsNumber(s)));
  392. }
  393. private static Shelisp.String ReadString (TextReader s)
  394. {
  395. StringBuilder sb = new StringBuilder ();
  396. // consume the initial "
  397. s.Read();
  398. bool escaped = false;
  399. while (true) {
  400. if (escaped) {
  401. // make sure the control code is valid and append it
  402. sb.Append ((char)s.Read());
  403. escaped = false;
  404. }
  405. else {
  406. if (s.Peek() == '\\') {
  407. s.Read();
  408. escaped = true;
  409. }
  410. else if (s.Peek() == '"') {
  411. // make sure to consume the last "
  412. s.Read();
  413. break;
  414. }
  415. else
  416. sb.Append ((char)s.Read());
  417. }
  418. }
  419. return new String (sb.ToString());
  420. }
  421. private static Shelisp.Object ReadList (StreamReader s)
  422. {
  423. Debug.Print ("ReadList>");
  424. // consume the (
  425. s.Read();
  426. List<Shelisp.Object> objs = new List<Shelisp.Object>();
  427. Shelisp.Object obj;
  428. bool dot_seen = false;
  429. bool el_after_dot = false;
  430. while ((obj = Read (s, ')')) != null) {
  431. if (obj.LispEq (L.intern("."))) {
  432. if (dot_seen)
  433. throw new LispInvalidReadSyntaxException (". in wrong context");
  434. dot_seen = true;
  435. continue;
  436. }
  437. else if (dot_seen) {
  438. if (el_after_dot)
  439. throw new LispInvalidReadSyntaxException (". in wrong context");
  440. el_after_dot = true;
  441. }
  442. Debug.Print ("+ {0}", obj);
  443. objs.Add (obj);
  444. }
  445. Shelisp.Object rv;
  446. if (dot_seen)
  447. rv = L.make_list_atom_tail (objs.ToArray());
  448. else
  449. rv = L.make_list (objs.ToArray());
  450. Debug.Print ("ReadList returning {0}", rv);
  451. return rv;
  452. }
  453. private static Shelisp.Vector ReadVector (StreamReader s)
  454. {
  455. Debug.Print ("ReadVector>");
  456. // consume the [
  457. s.Read();
  458. List<Shelisp.Object> objs = new List<Shelisp.Object>();
  459. Shelisp.Object obj;
  460. while ((obj = Read (s, ']')) != null) {
  461. Debug.Print ("+ {0}", obj);
  462. objs.Add (obj);
  463. }
  464. var rv = new Vector (objs.ToArray());
  465. Debug.Print ("ReadList returning {0}", rv);
  466. return rv;
  467. }
  468. [LispBuiltin]
  469. public static Shelisp.Object Fread (L l, [LispOptional] Shelisp.Object stream)
  470. {
  471. #if notyet
  472. if (L.NILP (stream))
  473. stream = standard_input.Eval(); // standard-input is a variable that by default is 't', so the minibuffer
  474. if (stream is Buffer)
  475. stream = ...; // read from the entire buffer
  476. else if (stream is Marker)
  477. stream = ...; // read the buffer starting at the marker. the point has no effect
  478. else if ((stream is List && L.Qlambda.LispEq (Fcar (l, (List)stream))) ||
  479. (stream is Symbol && !L.Qunbound.LispEq (((Symbol)stream).function)))
  480. stream = ...; // function
  481. else if (L.Qt.LispEq (stream))
  482. stream = ...; // minibuffer
  483. #endif
  484. // XXX
  485. return L.Qnil;
  486. }
  487. [LispBuiltin]
  488. public static Shelisp.Object Fread_from_string (L l, Shelisp.Object str, [LispOptional] Shelisp.Object start, Shelisp.Object end)
  489. {
  490. #if false
  491. if (!(str is String))
  492. throw new WrongTypeArgumentException ("stringp", str);
  493. Shelisp.Object obj;
  494. int pos;
  495. obj = Reader.Read ((string)(Shelisp.String)str, out pos);
  496. return new List (obj, (Number)pos);
  497. #endif
  498. return L.Qnil;
  499. }
  500. [LispBuiltin (DocString = @"Whether to use lexical binding when evaluating code.
  501. Non-nil means that the code in the current buffer should be evaluated
  502. with lexical binding.
  503. This variable is automatically set from the file variables of an
  504. interpreted Lisp file read using `load'. Unlike other file local
  505. variables, this must be set in the first line of a file.")]
  506. public static bool Vlexical_binding = false;
  507. // XXX this is meant to be buffer local
  508. // Fmake_variable_buffer_local (Qlexical_binding);
  509. [LispBuiltin (DocString = @"Used for internal purposes by `load'.")]
  510. public static Shelisp.Object Vcurrent_load_list = L.Qnil;
  511. [LispBuiltin (DocString = @"An alist of expressions to be evalled when particular files are loaded.
  512. Each element looks like (REGEXP-OR-FEATURE FORMS...).
  513. REGEXP-OR-FEATURE is either a regular expression to match file names, or
  514. a symbol \(a feature name).
  515. When `load' is run and the file-name argument matches an element's
  516. REGEXP-OR-FEATURE, or when `provide' is run and provides the symbol
  517. REGEXP-OR-FEATURE, the FORMS in the element are executed.
  518. An error in FORMS does not undo the load, but does prevent execution of
  519. the rest of the FORMS.")]
  520. public static Shelisp.Object Vafter_load_alist = L.Qnil;
  521. }
  522. }