/parsing/d/lexer.d

http://github.com/wilkie/djehuty · D · 972 lines · 837 code · 59 blank · 76 comment · 392 complexity · 816a0f9d6d05d927eeca06b3ec8f4a22 MD5 · raw file

  1. module parsing.d.lexer;
  2. import parsing.token;
  3. import parsing.lexer;
  4. import parsing.d.tokens;
  5. import djehuty;
  6. import data.stack;
  7. import io.console;
  8. class DLexer : Lexer {
  9. this(Stream stream) {
  10. super(stream);
  11. _bank = new Stack!(Token);
  12. _stream = stream;
  13. }
  14. void push(Token token) {
  15. _bank.push(token);
  16. }
  17. string line() {
  18. return _line;
  19. }
  20. string line(uint number) {
  21. number--;
  22. if (number >= _lines.length) {
  23. return "";
  24. }
  25. return _lines[number];
  26. }
  27. Token pop() {
  28. if (!_bank.empty) {
  29. return _bank.pop();
  30. }
  31. Token current;
  32. current.line = _lineNumber;
  33. current.column = _pos + 1;
  34. // will give us a string for the line of utf8 characters.
  35. for(;;) {
  36. if (_line is null || _pos >= _line.length) {
  37. if(!_stream.readLine(_line)) {
  38. return Token.init;
  39. }
  40. _lines ~= _line;
  41. _lineNumber++;
  42. _pos = 0;
  43. current.line++;
  44. current.column = 1;
  45. }
  46. // now break up the line into tokens
  47. // the return for the line is whitespace, and can be ignored
  48. for(; _pos <= _line.length; _pos++) {
  49. char chr;
  50. if (_pos == _line.length) {
  51. chr = '\n';
  52. }
  53. else {
  54. chr = _line[_pos];
  55. }
  56. switch (state) {
  57. default:
  58. // error
  59. _error("error");
  60. return Token.init;
  61. case LexerState.Normal:
  62. if (tokenMapping[chr] != DToken.Invalid) {
  63. DToken newType = tokenMapping[chr];
  64. switch(current.type) {
  65. case DToken.And: // &
  66. if (newType == DToken.And) {
  67. // &&
  68. current.type = DToken.LogicalAnd;
  69. }
  70. else if (newType == DToken.Assign) {
  71. // &=
  72. current.type = DToken.AndAssign;
  73. }
  74. else {
  75. goto default;
  76. }
  77. break;
  78. case DToken.Or: // |
  79. if (newType == DToken.Or) {
  80. // ||
  81. current.type = DToken.LogicalOr;
  82. }
  83. else if (newType == DToken.Assign) {
  84. // |=
  85. current.type = DToken.OrAssign;
  86. }
  87. else {
  88. goto default;
  89. }
  90. break;
  91. case DToken.Add: // +
  92. if (newType == DToken.Assign) {
  93. // +=
  94. current.type = DToken.AddAssign;
  95. }
  96. else if (newType == DToken.Add) {
  97. // ++
  98. current.type = DToken.Increment;
  99. }
  100. else {
  101. goto default;
  102. }
  103. break;
  104. case DToken.Sub: // -
  105. if (newType == DToken.Assign) {
  106. // -=
  107. current.type = DToken.SubAssign;
  108. }
  109. else if (newType == DToken.Sub) {
  110. // --
  111. current.type = DToken.Decrement;
  112. }
  113. else {
  114. goto default;
  115. }
  116. break;
  117. case DToken.Div: // /
  118. if (newType == DToken.Assign) {
  119. // /=
  120. current.type = DToken.DivAssign;
  121. }
  122. else if (newType == DToken.Add) {
  123. // /+
  124. }
  125. else if (newType == DToken.Div) {
  126. // //
  127. }
  128. else if (newType == DToken.Mul) {
  129. // /*
  130. }
  131. else {
  132. goto default;
  133. }
  134. break;
  135. case DToken.Mul: // *
  136. if (newType == DToken.Assign) {
  137. // *=
  138. current.type = DToken.MulAssign;
  139. }
  140. else {
  141. goto default;
  142. }
  143. break;
  144. case DToken.Mod: // %
  145. if (newType == DToken.Assign) {
  146. // %=
  147. current.type = DToken.ModAssign;
  148. }
  149. else {
  150. goto default;
  151. }
  152. break;
  153. case DToken.Xor: // ^
  154. if (newType == DToken.Assign) {
  155. // ^=
  156. current.type = DToken.XorAssign;
  157. }
  158. else {
  159. goto default;
  160. }
  161. break;
  162. case DToken.Cat: // ~
  163. if (newType == DToken.Assign) {
  164. // ~=
  165. current.type = DToken.CatAssign;
  166. }
  167. else {
  168. goto default;
  169. }
  170. break;
  171. case DToken.Assign: // =
  172. if (newType == DToken.Assign) {
  173. // ==
  174. current.type = DToken.Equals;
  175. }
  176. else {
  177. goto default;
  178. }
  179. break;
  180. case DToken.LessThan: // <
  181. if (newType == DToken.LessThan) {
  182. // <<
  183. current.type = DToken.ShiftLeft;
  184. }
  185. else if (newType == DToken.Assign) {
  186. // <=
  187. current.type = DToken.LessThanEqual;
  188. }
  189. else if (newType == DToken.GreaterThan) {
  190. // <>
  191. current.type = DToken.LessThanGreaterThan;
  192. }
  193. else {
  194. goto default;
  195. }
  196. break;
  197. case DToken.GreaterThan: // >
  198. if (newType == DToken.GreaterThan) {
  199. // >>
  200. current.type = DToken.ShiftRight;
  201. }
  202. else if (newType == DToken.Assign) {
  203. // >=
  204. current.type = DToken.GreaterThanEqual;
  205. }
  206. else {
  207. goto default;
  208. }
  209. break;
  210. case DToken.ShiftLeft: // <<
  211. if (newType == DToken.Assign) {
  212. // <<=
  213. current.type = DToken.ShiftLeftAssign;
  214. }
  215. else {
  216. goto default;
  217. }
  218. break;
  219. case DToken.ShiftRight: // >>
  220. if (newType == DToken.Assign) {
  221. // >>=
  222. current.type = DToken.ShiftRightAssign;
  223. }
  224. else if (newType == DToken.GreaterThan) {
  225. // >>>
  226. current.type = DToken.ShiftRightSigned;
  227. }
  228. else {
  229. goto default;
  230. }
  231. break;
  232. case DToken.ShiftRightSigned: // >>>
  233. if (newType == DToken.Assign) {
  234. // >>>=
  235. current.type = DToken.ShiftRightSignedAssign;
  236. }
  237. else {
  238. goto default;
  239. }
  240. break;
  241. case DToken.LessThanGreaterThan: // <>
  242. if (newType == DToken.Assign) {
  243. // <>=
  244. current.type = DToken.LessThanGreaterThanEqual;
  245. }
  246. else {
  247. goto default;
  248. }
  249. break;
  250. case DToken.Bang: // !
  251. if (newType == DToken.LessThan) {
  252. // !<
  253. current.type = DToken.NotLessThan;
  254. }
  255. else if (newType == DToken.GreaterThan) {
  256. // !>
  257. current.type = DToken.NotGreaterThan;
  258. }
  259. else if (newType == DToken.Assign) {
  260. // !=
  261. current.type = DToken.NotEquals;
  262. }
  263. else {
  264. goto default;
  265. }
  266. break;
  267. case DToken.NotLessThan: // !<
  268. if (newType == DToken.GreaterThan) {
  269. // !<>
  270. current.type = DToken.NotLessThanGreaterThan;
  271. }
  272. else if (newType == DToken.Assign) {
  273. // !<=
  274. current.type = DToken.NotLessThanEqual;
  275. }
  276. else {
  277. goto default;
  278. }
  279. break;
  280. case DToken.NotGreaterThan: // !>
  281. if (newType == DToken.Assign) {
  282. // !>=
  283. current.type = DToken.NotGreaterThanEqual;
  284. }
  285. else {
  286. goto default;
  287. }
  288. break;
  289. case DToken.NotLessThanGreaterThan: // !<>
  290. if (newType == DToken.Assign) {
  291. // !<>=
  292. current.type = DToken.NotLessThanGreaterThanEqual;
  293. }
  294. else {
  295. goto default;
  296. }
  297. break;
  298. case DToken.Dot: // .
  299. if (newType == DToken.Dot) {
  300. // ..
  301. current.type = DToken.Slice;
  302. }
  303. else {
  304. goto default;
  305. }
  306. break;
  307. case DToken.Slice: // ..
  308. if (newType == DToken.Dot) {
  309. // ...
  310. current.type = DToken.Variadic;
  311. }
  312. else {
  313. goto default;
  314. }
  315. break;
  316. case DToken.Invalid:
  317. current.type = tokenMapping[chr];
  318. break;
  319. default:
  320. // Token Error
  321. if (current.type != DToken.Invalid) {
  322. current.columnEnd = _pos;
  323. current.lineEnd = _lineNumber;
  324. return current;
  325. }
  326. // _error("Unknown operator.");
  327. return Token.init;
  328. }
  329. continue;
  330. }
  331. // A character that will switch states continues
  332. // Strings
  333. if (chr == '\'') {
  334. state = LexerState.String;
  335. inStringType = StringType.Character;
  336. cur_string = "";
  337. if (current.type != DToken.Invalid) {
  338. current.columnEnd = _pos;
  339. current.lineEnd = _lineNumber;
  340. _pos++;
  341. return current;
  342. }
  343. continue;
  344. }
  345. else if (chr == '"') {
  346. state = LexerState.String;
  347. inStringType = StringType.DoubleQuote;
  348. cur_string = "";
  349. if (current.type != DToken.Invalid) {
  350. current.columnEnd = _pos;
  351. current.lineEnd = _lineNumber;
  352. _pos++;
  353. return current;
  354. }
  355. continue;
  356. }
  357. else if (chr == '`') {
  358. state = LexerState.String;
  359. inStringType = StringType.WhatYouSeeQuote;
  360. cur_string = "";
  361. if (current.type != DToken.Invalid) {
  362. current.columnEnd = _pos;
  363. current.lineEnd = _lineNumber;
  364. _pos++;
  365. return current;
  366. }
  367. continue;
  368. }
  369. // Whitespace
  370. else if (chr == ' ' || chr == '\t' || chr == '\n') {
  371. if (current.type != DToken.Invalid) {
  372. current.columnEnd = _pos;
  373. current.lineEnd = _lineNumber;
  374. _pos++;
  375. return current;
  376. }
  377. current.column++;
  378. continue;
  379. }
  380. // Identifiers
  381. else if ((chr >= 'a' && chr <= 'z') || (chr >= 'A' && chr <= 'Z') || chr == '_') {
  382. state = LexerState.Identifier;
  383. cur_string = "";
  384. if (current.type != DToken.Invalid) {
  385. current.columnEnd = _pos;
  386. current.lineEnd = _lineNumber;
  387. return current;
  388. }
  389. goto case LexerState.Identifier;
  390. }
  391. // Numbers
  392. else if (chr >= '0' && chr <= '9') {
  393. // reset to invalid base
  394. cur_base = 0;
  395. cur_decimal = 0;
  396. cur_denominator = 1;
  397. cur_exponent = 0;
  398. if (current.type == DToken.Dot) {
  399. current.type = DToken.Invalid;
  400. inDecimal = true;
  401. inExponent = false;
  402. cur_integer = 0;
  403. cur_base = 10;
  404. state = LexerState.FloatingPoint;
  405. goto case LexerState.FloatingPoint;
  406. }
  407. else {
  408. state = LexerState.Integer;
  409. if (current.type != DToken.Invalid) {
  410. current.columnEnd = _pos;
  411. current.lineEnd = _lineNumber;
  412. return current;
  413. }
  414. goto case LexerState.Integer;
  415. }
  416. }
  417. break;
  418. case LexerState.String:
  419. if (inEscape) {
  420. inEscape = false;
  421. if (chr == 't') {
  422. chr = '\t';
  423. }
  424. else if (chr == 'b') {
  425. chr = '\b';
  426. }
  427. else if (chr == 'r') {
  428. chr = '\r';
  429. }
  430. else if (chr == 'n') {
  431. chr = '\n';
  432. }
  433. else if (chr == '0') {
  434. chr = '\0';
  435. }
  436. else if (chr == 'x' || chr == 'X') {
  437. // BLEH!
  438. }
  439. cur_string ~= chr;
  440. continue;
  441. }
  442. if (inStringType == StringType.DoubleQuote) {
  443. if (chr == '"') {
  444. state = LexerState.Normal;
  445. current.type = DToken.StringLiteral;
  446. current.columnEnd = _pos;
  447. current.lineEnd = _lineNumber;
  448. if (cur_string !is null) {
  449. current.value = cur_string;
  450. }
  451. _pos++;
  452. return current;
  453. }
  454. }
  455. else if (inStringType == StringType.RawWhatYouSeeQuote) {
  456. if (chr == '"') {
  457. state = LexerState.Normal;
  458. current.type = DToken.StringLiteral;
  459. current.columnEnd = _pos;
  460. current.lineEnd = _lineNumber;
  461. if (cur_string !is null) {
  462. current.value = cur_string;
  463. }
  464. _pos++;
  465. return current;
  466. }
  467. }
  468. else if (inStringType == StringType.WhatYouSeeQuote) {
  469. if (chr == '`') {
  470. state = LexerState.Normal;
  471. current.type = DToken.StringLiteral;
  472. current.columnEnd = _pos;
  473. current.lineEnd = _lineNumber;
  474. if (cur_string !is null) {
  475. current.value = cur_string;
  476. }
  477. _pos++;
  478. return current;
  479. }
  480. }
  481. else { // StringType.Character
  482. if (chr == '\'') {
  483. if (cur_string.length > 1) {
  484. // error
  485. goto default;
  486. }
  487. state = LexerState.Normal;
  488. current.type = DToken.CharacterLiteral;
  489. current.columnEnd = _pos;
  490. current.lineEnd = _lineNumber;
  491. if (cur_string !is null) {
  492. current.value = cur_string;
  493. }
  494. _pos++;
  495. return current;
  496. }
  497. }
  498. if ((inStringType == StringType.DoubleQuote || inStringType == StringType.Character) && (chr == '\\')) {
  499. // Escaped Characters
  500. inEscape = true;
  501. }
  502. else {
  503. cur_string ~= chr;
  504. }
  505. continue;
  506. case LexerState.Comment:
  507. break;
  508. case LexerState.Identifier:
  509. // check for valid succeeding character
  510. if ((chr < 'a' || chr > 'z') && (chr < 'A' || chr > 'Z') && chr != '_' && (chr < '0' || chr > '9')) {
  511. // Invalid identifier symbol
  512. static DToken keywordStart = DToken.Abstract;
  513. static const string[] keywordList = ["abstract", "alias", "align", "asm", "assert", "auto",
  514. "body", "bool", "break", "byte", "case", "cast","catch","cdouble","cent","cfloat","char",
  515. "class","const","continue","creal","dchar","debug","default","delegate","delete","deprecated",
  516. "do","double","else","enum","export","extern","false","final","finally","float","for","foreach",
  517. "foreach_reverse","function","goto","idouble","if","ifloat","import","in","inout","int","interface",
  518. "invariant","ireal","is","lazy","long","macro","mixin","module","new","null","out","override",
  519. "package","pragma","private","protected","public","real","ref","return","scope","short","static",
  520. "struct","super","switch","synchronized","template","this","throw","true","try",
  521. "typedef","typeid","typeof","ubyte","ucent","uint","ulong","union","unittest","ushort","version",
  522. "void","volatile","wchar","while","with"
  523. ];
  524. current.type = DToken.Identifier;
  525. foreach(size_t i, keyword; keywordList) {
  526. if (cur_string == keyword) {
  527. current.type = keywordStart + i;
  528. cur_string = null;
  529. break;
  530. }
  531. }
  532. if (cur_string !is null) {
  533. current.value = cur_string;
  534. }
  535. state = LexerState.Normal;
  536. if (current.type != DToken.Invalid) {
  537. current.columnEnd = _pos;
  538. current.lineEnd = _lineNumber;
  539. return current;
  540. }
  541. goto case LexerState.Normal;
  542. }
  543. cur_string ~= chr;
  544. continue;
  545. case LexerState.Integer:
  546. // check for valid succeeding character
  547. // we may want to switch to floating point state
  548. if (chr == '.') {
  549. if (cur_base <= 0) {
  550. cur_base = 10;
  551. }
  552. else if (cur_base == 2) {
  553. _error("Cannot have binary floating point literals");
  554. return Token.init;
  555. }
  556. else if (cur_base == 8) {
  557. _error("Cannot have octal floating point literals");
  558. return Token.init;
  559. }
  560. // Reset this just in case, it will get interpreted
  561. // in the Floating Point state
  562. inDecimal = false;
  563. inExponent = false;
  564. state = LexerState.FloatingPoint;
  565. goto case LexerState.FloatingPoint;
  566. }
  567. else if ((chr == 'p' || chr == 'P') && cur_base == 16) {
  568. // Reset this just in case, it will get interpreted
  569. // in the Floating Point state
  570. inDecimal = false;
  571. inExponent = false;
  572. state = LexerState.FloatingPoint;
  573. goto case LexerState.FloatingPoint;
  574. }
  575. else if (chr == '_') {
  576. // ignore
  577. if (cur_base == -1) {
  578. // OCTAL
  579. cur_base = 8;
  580. }
  581. }
  582. else if (cur_base == 0) {
  583. // this is the first value
  584. if (chr == '0') {
  585. // octal or 0 or 0.0, etc
  586. // use an invalid value so we can decide
  587. cur_base = -1;
  588. cur_integer = 0;
  589. }
  590. else if (chr >= '1' && chr <= '9') {
  591. cur_base = 10;
  592. cur_integer = (chr - '0');
  593. }
  594. // Cannot be any other value
  595. else {
  596. _error("Integer literal expected.");
  597. return Token.init;
  598. }
  599. }
  600. else if (cur_base == -1) {
  601. // this is the second value of an ambiguous base
  602. if (chr >= '0' && chr <= '7') {
  603. // OCTAL
  604. cur_base = 8;
  605. cur_integer = (chr - '0');
  606. }
  607. else if (chr == 'x' || chr == 'X') {
  608. // HEX
  609. cur_base = 16;
  610. }
  611. else if (chr == 'b' || chr == 'B') {
  612. // BINARY
  613. cur_base = 2;
  614. }
  615. else {
  616. // 0 ?
  617. current.type = DToken.IntegerLiteral;
  618. current.columnEnd = _pos;
  619. current.lineEnd = _lineNumber;
  620. state = LexerState.Normal;
  621. return current;
  622. }
  623. }
  624. else if (cur_base == 16) {
  625. if ((chr < '0' || chr > '9') && (chr < 'a' || chr > 'f') && (chr < 'A' || chr > 'F')) {
  626. current.type = DToken.IntegerLiteral;
  627. current.value = cur_integer;
  628. current.columnEnd = _pos;
  629. current.lineEnd = _lineNumber;
  630. state = LexerState.Normal;
  631. return current;
  632. }
  633. else {
  634. cur_integer *= cur_base;
  635. if (chr >= 'a' && chr <= 'f') {
  636. cur_integer += 10 + (chr - 'a');
  637. }
  638. else if (chr >= 'A' && chr <= 'F') {
  639. cur_integer += 10 + (chr - 'A');
  640. }
  641. else {
  642. cur_integer += (chr - '0');
  643. }
  644. }
  645. }
  646. else if (cur_base == 10) {
  647. if (chr < '0' || chr > '9') {
  648. current.type = DToken.IntegerLiteral;
  649. current.value = cur_integer;
  650. current.columnEnd = _pos;
  651. current.lineEnd = _lineNumber;
  652. state = LexerState.Normal;
  653. return current;
  654. }
  655. else {
  656. cur_integer *= cur_base;
  657. cur_integer += (chr - '0');
  658. }
  659. }
  660. else if (cur_base == 8) {
  661. if (chr >= '8' && chr <= '9') {
  662. _error("Digits higher than 7 in an octal integer literal are invalid.");
  663. return Token.init;
  664. }
  665. else if (chr < '0' || chr > '7') {
  666. current.type = DToken.IntegerLiteral;
  667. current.value = cur_integer;
  668. current.columnEnd = _pos;
  669. current.lineEnd = _lineNumber;
  670. state = LexerState.Normal;
  671. return current;
  672. }
  673. else {
  674. cur_integer *= cur_base;
  675. cur_integer += (chr - '0');
  676. }
  677. }
  678. else if (cur_base == 2) {
  679. if (chr < '0' || chr > '1') {
  680. current.type = DToken.IntegerLiteral;
  681. current.value = cur_integer;
  682. current.columnEnd = _pos;
  683. current.lineEnd = _lineNumber;
  684. state = LexerState.Normal;
  685. return current;
  686. }
  687. else {
  688. cur_integer *= cur_base;
  689. cur_integer += (chr - '0');
  690. }
  691. }
  692. continue;
  693. case LexerState.FloatingPoint:
  694. if (chr == '_') {
  695. continue;
  696. }
  697. else if (chr == '.' && (cur_base == 10 || cur_base == 16)) {
  698. // We are now parsing the decimal portion
  699. if (inDecimal) {
  700. _error("Only one decimal point is allowed per floating point literal.");
  701. return Token.init;
  702. }
  703. else if (inExponent) {
  704. _error("Cannot put a decimal point after an exponent in a floating point literal.");
  705. }
  706. inDecimal = true;
  707. }
  708. else if (cur_base == 16 && (chr == 'p' || chr == 'P')) {
  709. // We are now parsing the exponential portion
  710. inDecimal = false;
  711. inExponent = true;
  712. cur_exponent = -1;
  713. }
  714. else if (cur_base == 10 && (chr == 'e' || chr == 'E')) {
  715. // We are now parsing the exponential portion
  716. inDecimal = false;
  717. inExponent = true;
  718. cur_exponent = -1;
  719. }
  720. else if (cur_base == 10) {
  721. if (chr == 'p' || chr == 'P') {
  722. _error("Cannot have a hexidecimal exponent in a non-hexidecimal floating point literal.");
  723. return Token.init;
  724. }
  725. else if (chr < '0' || chr > '9') {
  726. if (inExponent && cur_exponent == -1) {
  727. _error("You need to specify a value for the exponent part of the floating point literal.");
  728. return Token.init;
  729. }
  730. current.type = DToken.FloatingPointLiteral;
  731. double value = cast(double)cur_integer + (cast(double)cur_decimal / cast(double)cur_denominator);
  732. double exp = 1;
  733. for(size_t i = 0; i < cur_exponent; i++) {
  734. exp *= cur_base;
  735. }
  736. value *= exp;
  737. current.value = value;
  738. current.columnEnd = _pos;
  739. current.lineEnd = _lineNumber;
  740. state = LexerState.Normal;
  741. return current;
  742. }
  743. else if (inExponent) {
  744. if (cur_exponent == -1) {
  745. cur_exponent = 0;
  746. }
  747. cur_exponent *= cur_base;
  748. cur_exponent += (chr - '0');
  749. }
  750. else {
  751. cur_decimal *= cur_base;
  752. cur_denominator *= cur_base;
  753. cur_decimal += (chr - '0');
  754. }
  755. }
  756. else { // cur_base == 16
  757. if ((chr < '0' || chr > '9') && (chr < 'a' || chr > 'f') && (chr < 'A' || chr > 'F')) {
  758. if (inDecimal && !inExponent) {
  759. _error("You need to provide an exponent with the decimal portion of a hexidecimal floating point number. Ex: 0xff.3p2");
  760. return Token.init;
  761. }
  762. if (inExponent && cur_exponent == -1) {
  763. _error("You need to specify a value for the exponent part of the floating point literal.");
  764. return Token.init;
  765. }
  766. current.type = DToken.FloatingPointLiteral;
  767. double value = cast(double)cur_integer + (cast(double)cur_decimal / cast(double)cur_denominator);
  768. double exp = 1;
  769. for(size_t i = 0; i < cur_exponent; i++) {
  770. exp *= 2;
  771. }
  772. value *= exp;
  773. current.value = value;
  774. current.columnEnd = _pos;
  775. current.lineEnd = _lineNumber;
  776. state = LexerState.Normal;
  777. return current;
  778. }
  779. else if (inExponent) {
  780. if (cur_exponent == -1) {
  781. cur_exponent = 0;
  782. }
  783. cur_exponent *= cur_base;
  784. if (chr >= 'A' && chr <= 'F') {
  785. cur_exponent += 10 + (chr - 'A');
  786. }
  787. else if (chr >= 'a' && chr <= 'f') {
  788. cur_exponent += 10 + (chr - 'a');
  789. }
  790. else {
  791. cur_exponent += (chr - '0');
  792. }
  793. }
  794. else {
  795. cur_decimal *= cur_base;
  796. cur_denominator *= cur_base;
  797. if (chr >= 'A' && chr <= 'F') {
  798. cur_decimal += 10 + (chr - 'A');
  799. }
  800. else if (chr >= 'a' && chr <= 'f') {
  801. cur_decimal += 10 + (chr - 'a');
  802. }
  803. else {
  804. cur_decimal += (chr - '0');
  805. }
  806. }
  807. }
  808. continue;
  809. }
  810. }
  811. if (current.type != DToken.Invalid) {
  812. current.columnEnd = _pos;
  813. current.lineEnd = _lineNumber;
  814. return current;
  815. }
  816. current.line++;
  817. current.column = 1;
  818. if (state != LexerState.String) {
  819. state = LexerState.Normal;
  820. }
  821. else {
  822. if (inStringType == StringType.Character) {
  823. _error("Unmatched character literal.");
  824. return Token.init;
  825. }
  826. cur_string ~= '\n';
  827. }
  828. }
  829. return Token.init;
  830. }
  831. int opApply(int delegate(ref Token) loopbody) {
  832. int ret;
  833. Token foo;
  834. while((foo = this.pop()).type != DToken.Invalid) {
  835. if ((ret = loopbody(foo)) > 0) {
  836. return 1;
  837. }
  838. }
  839. return ret;
  840. }
  841. private:
  842. void _error(string msg) {
  843. Console.forecolor = Color.Red;
  844. Console.putln("Lexical Error: file.d @ ", _lineNumber+1, ":", _pos+1, " - ", msg);
  845. Console.putln();
  846. }
  847. // Describe the number lexer states
  848. enum LexerState : uint {
  849. Normal,
  850. String,
  851. Comment,
  852. Identifier,
  853. Integer,
  854. FloatingPoint
  855. }
  856. LexerState state;
  857. bool inEscape;
  858. // Describe the string lexer states
  859. enum StringType : uint {
  860. DoubleQuote, // "..."
  861. WhatYouSeeQuote, // `...`
  862. RawWhatYouSeeQuote, // r"..."
  863. Character, // '.'
  864. }
  865. StringType inStringType;
  866. // Describe the comment lexer states
  867. enum CommentType : uint {
  868. BlockComment,
  869. LineComment,
  870. NestedComment
  871. }
  872. CommentType inCommentType;
  873. string cur_string;
  874. Stream _stream;
  875. string _line;
  876. size_t _lineNumber;
  877. size_t _pos;
  878. static const DToken[] tokenMapping = [
  879. '!':DToken.Bang,
  880. ':':DToken.Colon,
  881. ';':DToken.Semicolon,
  882. '.':DToken.Dot,
  883. ',':DToken.Comma,
  884. '(':DToken.LeftParen,
  885. ')':DToken.RightParen,
  886. '{':DToken.LeftCurly,
  887. '}':DToken.RightCurly,
  888. '[':DToken.LeftBracket,
  889. ']':DToken.RightBracket,
  890. '<':DToken.LessThan,
  891. '>':DToken.GreaterThan,
  892. '=':DToken.Assign,
  893. '+':DToken.Add,
  894. '-':DToken.Sub,
  895. '~':DToken.Cat,
  896. '*':DToken.Mul,
  897. '/':DToken.Div,
  898. '^':DToken.Xor,
  899. '|':DToken.Or,
  900. '&':DToken.And,
  901. '%':DToken.Mod,
  902. ];
  903. int cur_base;
  904. ulong cur_integer;
  905. bool cur_integer_signed;
  906. ulong cur_decimal;
  907. ulong cur_exponent;
  908. ulong cur_denominator;
  909. bool inDecimal;
  910. bool inExponent;
  911. string[] _lines;
  912. Stack!(Token) _bank;
  913. }