/tools/dscribe/lexer.d

http://github.com/wilkie/djehuty · D · 691 lines · 652 code · 26 blank · 13 comment · 338 complexity · 4a0ac3461825a1fae5fe6d1b3b37d7a0 MD5 · raw file

  1. /*
  2. * lexer.d
  3. *
  4. * This module implements the D lexicon.
  5. *
  6. */
  7. module tools.dscribe.lexer;
  8. import parsing.lexer;
  9. import core.event;
  10. import core.string;
  11. import core.regex;
  12. import tools.dscribe.tokens;
  13. import console.main;
  14. class LexerD : Lexer {
  15. this() {
  16. // D Lexicon
  17. addRule(Lex.Whitespace, `\s*`);
  18. addRule(Lex.Operator, `[/=+*^%\-<>!~&\|\.]+`);
  19. // Other Lexicon
  20. addRule(Lex.Delimiter, `[()\][{}?,;:$]`);
  21. // Wysiwyg String Literal
  22. addRule(Lex.WysiwygString, "`([^`]*)`");
  23. addRule(Lex.WysiwygString, `r"([^"]*)"`);
  24. // String Literal
  25. addRule(Lex.DoubleQuotedString, `"((?:[^\\"](?:\\.)?)*)"`);
  26. // Comment Line
  27. addRule(Lex.CommentLine, `//([^\n\r]*)`);
  28. // Comment Blocks
  29. addRule(Lex.CommentBlock, `/\*([^\*](?:\*[^/])?)*\*/`);
  30. // Nested Comment Blocks
  31. addRule(Lex.CommentNestedStart, `/\+`);
  32. nestedCommentState = newState(); // For within comment blocks
  33. // /+ afsdfasdf /+ afsdasdf +/ asdfasdfsda +/
  34. addRule(Lex.CommentNestedStart, `/\+`);
  35. addRule(Lex.CommentNestedEnd, `\+/`);
  36. addRule(Lex.Comment, `([^\+/](?:\+[^/])?)*`);
  37. setState(normalState);
  38. // These rules slow down the lexer with their complexity
  39. addRule(Lex.DecimalFloat, `([0-9][_0-9]*|0)\.(?:[_0-9]+(?:[eE](?:\+|-)?[_0-9]+)?)?([fF]i?|Li?|i)?`);
  40. addRule(Lex.DecimalFloat, `\.[0-9][_0-9]*(?:[eE](?:\+|-)?[_0-9]+)?([fF]i?|Li?|i)?`);
  41. addRule(Lex.DecimalFloat, `[0-9][_0-9]*[eE][+-][_0-9]+([fF]i?|Li?|i)?`);
  42. addRule(Lex.HexFloat, `0[xX](?:[0-9a-fA-F_]+)?\.?[0-9a-fA-F_]+[pP][+-]?[_0-9]+`);
  43. addRule(Lex.IntegerFloat, `([1-9][_0-9]*|0)([fF]|L)?i`);
  44. // Identifier
  45. addRule(Lex.Identifier, `[_a-zA-Z][_a-zA-Z0-9]*`);
  46. addRule(Lex.HexLiteral, `0[xX][0-9a-fA-F_]+([uUL]*)`);
  47. addRule(Lex.BinaryLiteral, `0[bB][01_]+([uUL]*)`);
  48. addRule(Lex.OctalLiteral, `0[_0-7]+([uUL]*)`);
  49. addRule(Lex.IntegerLiteral, `([1-9][_0-9]*|0)([uUL]*)`);
  50. // Special Tokens
  51. addRule(Lex.SpecialLine,
  52. `#line\s+(0x[0-9a-fA-F_]+|0b[01_]+|0[_0-7]+|(?:[1-9][_0-9]*|0))(?:\s+("[^"]*"))?`);
  53. }
  54. override bool raiseSignal(uint signal) {
  55. switch(signal) {
  56. case Lex.Delimiter:
  57. switch(token.getString()[0]) {
  58. case '(':
  59. token = new Token(Lex.LeftParen);
  60. break;
  61. case ')':
  62. token = new Token(Lex.RightParen);
  63. break;
  64. case '{':
  65. token = new Token(Lex.LeftCurly);
  66. break;
  67. case '}':
  68. token = new Token(Lex.RightCurly);
  69. break;
  70. case '[':
  71. token = new Token(Lex.LeftBrace);
  72. break;
  73. case ']':
  74. token = new Token(Lex.RightBrace);
  75. break;
  76. case '?':
  77. token = new Token(Lex.QuestionMark);
  78. break;
  79. case ',':
  80. token = new Token(Lex.Comma);
  81. break;
  82. case ';':
  83. token = new Token(Lex.Semicolon);
  84. break;
  85. case ':':
  86. token = new Token(Lex.Colon);
  87. break;
  88. case '$':
  89. token = new Token(Lex.Dollar);
  90. break;
  91. default:
  92. // unknown block delimiter
  93. return true;
  94. }
  95. break;
  96. case Lex.Operator:
  97. switch(token.getString().toString) {
  98. case `==`:
  99. token = new Token(Lex.Equal);
  100. break;
  101. case `=`:
  102. token = new Token(Lex.Assign);
  103. break;
  104. case `>>`:
  105. token = new Token(Lex.RightShift);
  106. break;
  107. case `>>=`:
  108. token = new Token(Lex.RightShiftAssign);
  109. break;
  110. case `>>>`:
  111. token = new Token(Lex.RightShiftUnsigned);
  112. break;
  113. case `>>>=`:
  114. token = new Token(Lex.RightShiftUnsignedAssign);
  115. break;
  116. case `>`:
  117. token = new Token(Lex.GreaterThan);
  118. break;
  119. case `>=`:
  120. token = new Token(Lex.GreaterOrEqual);
  121. break;
  122. case `<<`:
  123. token = new Token(Lex.LeftShift);
  124. break;
  125. case `<<=`:
  126. token = new Token(Lex.LeftShiftAssign);
  127. break;
  128. case `<`:
  129. token = new Token(Lex.LessThan);
  130. break;
  131. case `<=`:
  132. token = new Token(Lex.LessOrEqual);
  133. break;
  134. case `<>`:
  135. token = new Token(Lex.NotEqual);
  136. break;
  137. case `!=`:
  138. token = new Token(Lex.UnorderedNotEqual);
  139. break;
  140. case `!<>`:
  141. token = new Token(Lex.UnorderedEqual);
  142. break;
  143. case `!>=`:
  144. token = new Token(Lex.UnorderedLessThan);
  145. break;
  146. case `!<=`:
  147. token = new Token(Lex.UnorderedGreaterThan);
  148. break;
  149. case `!<`:
  150. token = new Token(Lex.UnorderedGreaterOrEqual);
  151. break;
  152. case `!>`:
  153. token = new Token(Lex.UnorderedLessOrEqual);
  154. break;
  155. case `<>=`:
  156. token = new Token(Lex.Tautology);
  157. break;
  158. case `!<>=`:
  159. token = new Token(Lex.UnorderedContradiction);
  160. break;
  161. case `.`:
  162. token = new Token(Lex.Dot);
  163. break;
  164. case `..`:
  165. token = new Token(Lex.DotDot);
  166. break;
  167. case `...`:
  168. token = new Token(Lex.DotDotDot);
  169. break;
  170. case `&&`:
  171. token = new Token(Lex.AndAnd);
  172. break;
  173. case `&=`:
  174. token = new Token(Lex.AndAssign);
  175. break;
  176. case `&`:
  177. token = new Token(Lex.And);
  178. break;
  179. case `||`:
  180. token = new Token(Lex.OrOr);
  181. break;
  182. case `|=`:
  183. token = new Token(Lex.OrAssign);
  184. break;
  185. case `|`:
  186. token = new Token(Lex.Or);
  187. break;
  188. case `-=`:
  189. token = new Token(Lex.SubAssign);
  190. break;
  191. case `-`:
  192. token = new Token(Lex.Sub);
  193. break;
  194. case `+=`:
  195. token = new Token(Lex.AddAssign);
  196. break;
  197. case `+`:
  198. token = new Token(Lex.Add);
  199. break;
  200. case `*=`:
  201. token = new Token(Lex.MulAssign);
  202. break;
  203. case `*`:
  204. token = new Token(Lex.Mul);
  205. break;
  206. case `/=`:
  207. token = new Token(Lex.DivAssign);
  208. break;
  209. case `/`:
  210. token = new Token(Lex.Div);
  211. break;
  212. case `~=`:
  213. token = new Token(Lex.CatAssign);
  214. break;
  215. case `~`:
  216. token = new Token(Lex.Cat);
  217. break;
  218. case `!`:
  219. token = new Token(Lex.Bang);
  220. break;
  221. default:
  222. // unknown operator
  223. return true;
  224. }
  225. break;
  226. case Lex.CommentLine:
  227. token = new Token(Lex.Comment, _1);
  228. break;
  229. case Lex.Whitespace:
  230. return true;
  231. case Lex.DoubleQuotedString:
  232. case Lex.WysiwygString:
  233. token = new Token(Lex.StringLiteral, _1);
  234. break;
  235. case Lex.CommentBlock:
  236. // The grouping is the actual comment data
  237. token = new Token(Lex.Comment, _1);
  238. break;
  239. case Lex.CommentNestedStart:
  240. if (getState() == nestedCommentState) {
  241. nestedCommentDepth++;
  242. comment ~= token.getString();
  243. return true;
  244. }
  245. else {
  246. comment = new String("");
  247. nestedCommentDepth = 0;
  248. setState(nestedCommentState);
  249. return true;
  250. }
  251. case Lex.CommentNestedEnd:
  252. if (nestedCommentDepth == 0) {
  253. setState(normalState);
  254. return true;
  255. }
  256. else {
  257. comment ~= token.getString();
  258. nestedCommentDepth--;
  259. return true;
  260. }
  261. case Lex.HexLiteral:
  262. ulong value = 0;
  263. foreach(chr; token.getString()[2..token.getString().length]) {
  264. if (chr != '_') {
  265. value *= 16;
  266. if (chr >= 'a' && chr <= 'f') {
  267. value += 10 + (chr - 'a');
  268. }
  269. else if (chr >= 'A' && chr <= 'F') {
  270. value += 10 + (chr - 'A');
  271. }
  272. else {
  273. value += chr - '0';
  274. }
  275. }
  276. }
  277. token = new Token(Lex.IntegerLiteral, value);
  278. break;
  279. case Lex.OctalLiteral:
  280. ulong value = 0;
  281. foreach(chr; token.getString()[1..token.getString().length]) {
  282. if (chr != '_') {
  283. value *= 8;
  284. value += chr - '0';
  285. }
  286. }
  287. token = new Token(Lex.IntegerLiteral, value);
  288. break;
  289. case Lex.BinaryLiteral:
  290. ulong value = 0;
  291. foreach(chr; token.getString()[2..token.getString().length]) {
  292. if (chr != '_') {
  293. value *= 2;
  294. value += chr - '0';
  295. }
  296. }
  297. token = new Token(Lex.IntegerLiteral, value);
  298. break;
  299. case Lex.IntegerLiteral:
  300. ulong value = 0;
  301. foreach(chr; token.getString()) {
  302. if (chr != '_') {
  303. value *= 10;
  304. value += chr - '0';
  305. }
  306. }
  307. token = new Token(Lex.IntegerLiteral, value);
  308. break;
  309. case Lex.Identifier:
  310. if (token.getString() == `abstract`) {
  311. token = new Token(Lex.Abstract);
  312. }
  313. else if (token.getString() == `alias`) {
  314. token = new Token(Lex.Alias);
  315. }
  316. else if (token.getString() == `align`) {
  317. token = new Token(Lex.Align);
  318. }
  319. else if (token.getString() == `asm`) {
  320. token = new Token(Lex.Asm);
  321. }
  322. else if (token.getString() == `assert`) {
  323. token = new Token(Lex.Assert);
  324. }
  325. else if (token.getString() == `auto`) {
  326. token = new Token(Lex.Auto);
  327. }
  328. else if (token.getString() == `body`) {
  329. token = new Token(Lex.Body);
  330. }
  331. else if (token.getString() == `bool`) {
  332. token = new Token(Lex.Bool);
  333. }
  334. else if (token.getString() == `break`) {
  335. token = new Token(Lex.Break);
  336. }
  337. else if (token.getString() == `byte`) {
  338. token = new Token(Lex.Byte);
  339. }
  340. else if (token.getString() == `case`) {
  341. token = new Token(Lex.Case);
  342. }
  343. else if (token.getString() == `cast`) {
  344. token = new Token(Lex.Cast);
  345. }
  346. else if (token.getString() == `catch`) {
  347. token = new Token(Lex.Catch);
  348. }
  349. else if (token.getString() == `cdouble`) {
  350. token = new Token(Lex.Cdouble);
  351. }
  352. else if (token.getString() == `cent`) {
  353. token = new Token(Lex.Cent);
  354. }
  355. else if (token.getString() == `cfloat`) {
  356. token = new Token(Lex.Cfloat);
  357. }
  358. else if (token.getString() == `char`) {
  359. token = new Token(Lex.Char);
  360. }
  361. else if (token.getString() == `class`) {
  362. token = new Token(Lex.Class);
  363. }
  364. else if (token.getString() == `const`) {
  365. token = new Token(Lex.Const);
  366. }
  367. else if (token.getString() == `continue`) {
  368. token = new Token(Lex.Continue);
  369. }
  370. else if (token.getString() == `creal`) {
  371. token = new Token(Lex.Creal);
  372. }
  373. else if (token.getString() == `dchar`) {
  374. token = new Token(Lex.Dchar);
  375. }
  376. else if (token.getString() == `debug`) {
  377. token = new Token(Lex.Debug);
  378. }
  379. else if (token.getString() == `default`) {
  380. token = new Token(Lex.Default);
  381. }
  382. else if (token.getString() == `delegate`) {
  383. token = new Token(Lex.Delegate);
  384. }
  385. else if (token.getString() == `delete`) {
  386. token = new Token(Lex.Delete);
  387. }
  388. else if (token.getString() == `deprecated`) {
  389. token = new Token(Lex.Deprecated);
  390. }
  391. else if (token.getString() == `do`) {
  392. token = new Token(Lex.Do);
  393. }
  394. else if (token.getString() == `double`) {
  395. token = new Token(Lex.Double);
  396. }
  397. else if (token.getString() == `else`) {
  398. token = new Token(Lex.Else);
  399. }
  400. else if (token.getString() == `enum`) {
  401. token = new Token(Lex.Enum);
  402. }
  403. else if (token.getString() == `export`) {
  404. token = new Token(Lex.Enum);
  405. }
  406. else if (token.getString() == `extern`) {
  407. token = new Token(Lex.Extern);
  408. }
  409. else if (token.getString() == `false`) {
  410. token = new Token(Lex.False);
  411. }
  412. else if (token.getString() == `final`) {
  413. token = new Token(Lex.Final);
  414. }
  415. else if (token.getString() == `finally`) {
  416. token = new Token(Lex.Finally);
  417. }
  418. else if (token.getString() == `float`) {
  419. token = new Token(Lex.Float);
  420. }
  421. else if (token.getString() == `for`) {
  422. token = new Token(Lex.For);
  423. }
  424. else if (token.getString() == `foreach`) {
  425. token = new Token(Lex.Foreach);
  426. }
  427. else if (token.getString() == `foreach_reverse`) {
  428. token = new Token(Lex.Foreach_reverse);
  429. }
  430. else if (token.getString() == `function`) {
  431. token = new Token(Lex.Function);
  432. }
  433. else if (token.getString() == `goto`) {
  434. token = new Token(Lex.Goto);
  435. }
  436. else if (token.getString() == `idouble`) {
  437. token = new Token(Lex.Idouble);
  438. }
  439. else if (token.getString() == `if`) {
  440. token = new Token(Lex.If);
  441. }
  442. else if (token.getString() == `ifloat`) {
  443. token = new Token(Lex.Ifloat);
  444. }
  445. else if (token.getString() == `import`) {
  446. token = new Token(Lex.Import);
  447. }
  448. else if (token.getString() == `in`) {
  449. token = new Token(Lex.In);
  450. }
  451. else if (token.getString() == `inout`) {
  452. token = new Token(Lex.Inout);
  453. }
  454. else if (token.getString() == `int`) {
  455. token = new Token(Lex.Int);
  456. }
  457. else if (token.getString() == `interface`) {
  458. token = new Token(Lex.Interface);
  459. }
  460. else if (token.getString() == `invariant`) {
  461. token = new Token(Lex.Invariant);
  462. }
  463. else if (token.getString() == `ireal`) {
  464. token = new Token(Lex.Ireal);
  465. }
  466. else if (token.getString() == `is`) {
  467. token = new Token(Lex.Is);
  468. }
  469. else if (token.getString() == `lazy`) {
  470. token = new Token(Lex.Lazy);
  471. }
  472. else if (token.getString() == `long`) {
  473. token = new Token(Lex.Long);
  474. }
  475. else if (token.getString() == `macro`) {
  476. token = new Token(Lex.Macro);
  477. }
  478. else if (token.getString() == `mixin`) {
  479. token = new Token(Lex.Mixin);
  480. }
  481. else if (token.getString() == `module`) {
  482. token = new Token(Lex.Module);
  483. }
  484. else if (token.getString() == `new`) {
  485. token = new Token(Lex.New);
  486. }
  487. else if (token.getString() == `null`) {
  488. token = new Token(Lex.Null);
  489. }
  490. else if (token.getString() == `out`) {
  491. token = new Token(Lex.Out);
  492. }
  493. else if (token.getString() == `override`) {
  494. token = new Token(Lex.Override);
  495. }
  496. else if (token.getString() == `package`) {
  497. token = new Token(Lex.Package);
  498. }
  499. else if (token.getString() == `pragma`) {
  500. token = new Token(Lex.Pragma);
  501. }
  502. else if (token.getString() == `private`) {
  503. token = new Token(Lex.Private);
  504. }
  505. else if (token.getString() == `protected`) {
  506. token = new Token(Lex.Protected);
  507. }
  508. else if (token.getString() == `public`) {
  509. token = new Token(Lex.Public);
  510. }
  511. else if (token.getString() == `real`) {
  512. token = new Token(Lex.Real);
  513. }
  514. else if (token.getString() == `ref`) {
  515. token = new Token(Lex.Ref);
  516. }
  517. else if (token.getString() == `return`) {
  518. token = new Token(Lex.Return);
  519. }
  520. else if (token.getString() == `scope`) {
  521. token = new Token(Lex.Scope);
  522. }
  523. else if (token.getString() == `short`) {
  524. token = new Token(Lex.Short);
  525. }
  526. else if (token.getString() == `static`) {
  527. token = new Token(Lex.Static);
  528. }
  529. else if (token.getString() == `struct`) {
  530. token = new Token(Lex.Struct);
  531. }
  532. else if (token.getString() == `super`) {
  533. token = new Token(Lex.Super);
  534. }
  535. else if (token.getString() == `switch`) {
  536. token = new Token(Lex.Switch);
  537. }
  538. else if (token.getString() == `synchronized`) {
  539. token = new Token(Lex.Synchronized);
  540. }
  541. else if (token.getString() == `template`) {
  542. token = new Token(Lex.Template);
  543. }
  544. else if (token.getString() == `this`) {
  545. token = new Token(Lex.This);
  546. }
  547. else if (token.getString() == `throw`) {
  548. token = new Token(Lex.Throw);
  549. }
  550. else if (token.getString() == `true`) {
  551. token = new Token(Lex.True);
  552. }
  553. else if (token.getString() == `try`) {
  554. token = new Token(Lex.Try);
  555. }
  556. else if (token.getString() == `typedef`) {
  557. token = new Token(Lex.Typedef);
  558. }
  559. else if (token.getString() == `typeid`) {
  560. token = new Token(Lex.Typeid);
  561. }
  562. else if (token.getString() == `typeof`) {
  563. token = new Token(Lex.Typeof);
  564. }
  565. else if (token.getString() == `ubyte`) {
  566. token = new Token(Lex.Ubyte);
  567. }
  568. else if (token.getString() == `ucent`) {
  569. token = new Token(Lex.Ucent);
  570. }
  571. else if (token.getString() == `uint`) {
  572. token = new Token(Lex.Uint);
  573. }
  574. else if (token.getString() == `ulong`) {
  575. token = new Token(Lex.Ulong);
  576. }
  577. else if (token.getString() == `union`) {
  578. token = new Token(Lex.Union);
  579. }
  580. else if (token.getString() == `unittest`) {
  581. token = new Token(Lex.Unittest);
  582. }
  583. else if (token.getString() == `ushort`) {
  584. token = new Token(Lex.Ushort);
  585. }
  586. else if (token.getString() == `version`) {
  587. token = new Token(Lex.Version);
  588. }
  589. else if (token.getString() == `void`) {
  590. token = new Token(Lex.Void);
  591. }
  592. else if (token.getString() == `volatile`) {
  593. token = new Token(Lex.Volatile);
  594. }
  595. else if (token.getString() == `wchar`) {
  596. token = new Token(Lex.Wchar);
  597. }
  598. else if (token.getString() == `while`) {
  599. token = new Token(Lex.While);
  600. }
  601. else if (token.getString() == `with`) {
  602. token = new Token(Lex.With);
  603. }
  604. else if (token.getString()[0..2] == `__`) {
  605. // Reserved Identifiers
  606. if (token.getString() == `__FILE__`) {
  607. token = new Token(Lex.StringLiteral, new String("file.d"));
  608. }
  609. else if (token.getString() == `__LINE__`) {
  610. token = new Token(Lex.IntegerLiteral, new String(0));
  611. }
  612. else if (token.getString() == `__DATE__`) {
  613. token = new Token(Lex.StringLiteral, new String("mmmm dd yyyy"));
  614. }
  615. else if (token.getString() == `__TIME__`) {
  616. token = new Token(Lex.StringLiteral, new String("hh:mm:ss"));
  617. }
  618. else if (token.getString() == `__TIMESTAMP__`) {
  619. token = new Token(Lex.StringLiteral, new String("www mmm dd hh:mm:ss yyyy"));
  620. }
  621. else if (token.getString() == `__VENDER__`) {
  622. token = new Token(Lex.StringLiteral, new String(""));
  623. }
  624. else if (token.getString() == `__VERSION__`) {
  625. token = new Token(Lex.StringLiteral, new String(0));
  626. }
  627. }
  628. break;
  629. default:
  630. break;
  631. }
  632. if (token.getString is null) {
  633. Console.put(((token.getId())), " (", token.getInteger(), ") ");
  634. }
  635. else {
  636. Console.put(((token.getId())), " [", token.getString(), "] ");
  637. }
  638. return super.raiseSignal(token.getId());
  639. }
  640. protected:
  641. uint normalState;
  642. uint commentBlockState;
  643. uint nestedCommentState;
  644. uint nestedCommentDepth;
  645. uint stringLiteralState;
  646. uint wysiwygLiteralState;
  647. uint wysiwygRLiteralState;
  648. String comment;
  649. }