PageRenderTime 66ms CodeModel.GetById 27ms RepoModel.GetById 0ms app.codeStats 0ms

/tests/output/d/40007-Lexer.d

http://github.com/bengardner/uncrustify
D | 2413 lines | 2111 code | 135 blank | 167 comment | 279 complexity | 8e17c2a2568f7a38960f0145c06ff5c3 MD5 | raw file
Possible License(s): GPL-2.0
  1. /+
  2. * Copyright (c) 1999-2006 by Digital Mars
  3. * All Rights Reserved
  4. * written by Walter Bright www.digitalmars.com
  5. * License for redistribution is by either the Artistic License in artistic.txt, or the GNU General Public License in gnu.txt.
  6. * See the included readme.txt for details.
  7. * D Language conversion by: J Duncan
  8. +/
  9. /**
  10. * d language lexer
  11. */
  12. module dparser.Lexer;
  13. import dparser.Root;
  14. import dparser.Tokens;
  15. import dparser.Token;
  16. import dparser.Keyword;
  17. import dparser.Types;
  18. import dparser.Module;
  19. import dparser.Identifier;
  20. import dparser.unialpha;
  21. import dparser.OutBuffer;
  22. //private import std.ctype;
  23. //private import std.string;
  24. //import dwf.core.debugapi;
  25. int errno = 0;
  26. //#if _WIN32 && __DMC__
  27. // from \dm\src\include\setlocal.h
  28. //extern "C" char * __cdecl __locale_decpoint;
  29. char* __locale_decpoint;
  30. //#endif
  31. //const uint LS = 0x2028; // UTF line separator
  32. //const uint PS = 0x2029; // UTF paragraph separator
  33. //extern int isUniAlpha(unsigned u);
  34. //extern int HtmlNamedEntity(unsigned char *p, int length);
  35. /**
  36. * Lexer object
  37. */
  38. class Lexer
  39. {
  40. static Identifier[char[]] stringtable;
  41. static OutBuffer stringbuffer;
  42. static Token * freelist;
  43. Token token; // current token
  44. Module mod; // current module
  45. Loc loc; // for error messages
  46. ubyte *base; // pointer to start of buffer
  47. ubyte *end; // past end of buffer
  48. ubyte *p; // current character
  49. int doDocComment; // collect doc comment information
  50. int anyToken; // !=0 means seen at least one token
  51. int commentToken; // !=0 means comments are TOKcomment's
  52. this(Module mod, ubyte* base, uint begoffset, uint endoffset, int doDocComment, int commentToken)
  53. {
  54. if (stringbuffer is null) {
  55. stringbuffer = new OutBuffer;
  56. }
  57. loc = Loc(mod, 1);
  58. this.base = base;
  59. this.end = base + endoffset;
  60. this.p = base + begoffset;
  61. this.mod = mod;
  62. this.doDocComment = doDocComment;
  63. this.commentToken = commentToken;
  64. /*
  65. * If first line starts with '#!', ignore the line
  66. */
  67. if (p[0] == '#' && p[1] == '!') {
  68. p += 2;
  69. while (true) {
  70. ubyte c = *p;
  71. switch (c) {
  72. case '\n':
  73. p++;
  74. break;
  75. case '\r':
  76. p++;
  77. if (*p == '\n') {
  78. p++;
  79. }
  80. break;
  81. case 0:
  82. case 0x1A:
  83. break;
  84. default:
  85. if (c & 0x80) {
  86. uint u = decodeUTF();
  87. if (u == PS || u == LS) {
  88. break;
  89. }
  90. }
  91. p++;
  92. continue;
  93. }
  94. break;
  95. }
  96. loc.linnum = 2;
  97. }
  98. }
  99. // generate a unique identifier for this string
  100. static Identifier idPool(in char[] str) {
  101. // StringValue sv;
  102. // uint len = s.length;
  103. // StringValue sv = stringtable.update(s, len);
  104. // Identifier* id = cast(Identifier*) sv.ptrvalue;
  105. // if( id is null )
  106. if ((str in stringtable) == null) {
  107. stringtable[str] = new Identifier(str, TOK.TOKidentifier);
  108. }
  109. return(stringtable[str]);
  110. }
  111. static void initKeywords() {
  112. // build character map
  113. cmtable_init();
  114. // create keyword tokens & identifiers
  115. dparser.Keyword.initKeywords();
  116. // create standard lexer tokens
  117. dparser.Token.createLexerTokens();
  118. }
  119. // Combine two document comments into one.
  120. static char[] combineComments(char[] c1, char[] c2) {
  121. char[] c = c2;
  122. if (c1.length) {
  123. c = c1;
  124. if (c2.length) {
  125. c = c1 ~ "\n" ~ c2;
  126. }
  127. }
  128. return(c);
  129. }
  130. // Decode UTF character. Issue error messages for invalid sequences. Return decoded character, advance p to last character in UTF sequence.
  131. //! fix
  132. uint decodeUTF() {
  133. ubyte * s = p;
  134. ubyte c = *s;
  135. assert(c & 0x80);
  136. if (!(c & 0x80)) {
  137. return(c);
  138. }
  139. return(cast(uint)'X');
  140. /*
  141. * dchar u;
  142. * uint len;
  143. *
  144. *
  145. *
  146. * // Check length of remaining string up to 6 UTF-8 characters
  147. * for( len = 1; len < 6 && s[len]; len++ )
  148. * {
  149. *
  150. * }
  151. * /+
  152. * uint idx = 0;
  153. * char* msg = utf_decodeChar( s, len, &idx, &u );
  154. * p += idx - 1;
  155. * if( msg )
  156. * {
  157. * error(msg);
  158. * }
  159. * +/
  160. * return u;
  161. */
  162. }
  163. void error(...) {
  164. if ((mod !is null) && !global.gag) {
  165. writefln(formatLoc(loc, _arguments, _argptr));
  166. /*
  167. * char[] p = loc.toChars();
  168. * if( p.length )
  169. * writef( "%s: ", p );
  170. * writefx( stdout, _arguments, _argptr, 1 );
  171. */
  172. if (global.errors >= global.max_errors) { // moderate blizzard of cascading messages
  173. throw new Exception("too many errors");
  174. }
  175. }
  176. global.errors++;
  177. }
  178. void errorLoc(Loc loc, ...) {
  179. if ((mod !is null) && !global.gag) {
  180. writefln(formatLoc(loc, _arguments, _argptr));
  181. /*
  182. * char[] p = loc.toChars();
  183. * if( p.length )
  184. * writef("%s: ", p);
  185. * writefx(stdout, _arguments, _argptr, 1);
  186. */
  187. if (global.errors >= 20) { // moderate blizzard of cascading messages
  188. throw new Exception("too many errors");
  189. }
  190. }
  191. global.errors++;
  192. }
  193. TOK nextToken() {
  194. if (token.next) {
  195. Token* t = token.next;
  196. memcpy(&token, t, Token.sizeof);
  197. // t.next = freelist;
  198. // freelist = t;
  199. }
  200. else {
  201. scan(&token);
  202. }
  203. // token.print();
  204. return(token.value);
  205. }
  206. Token* peek(inout Token ct) {
  207. Token* t;
  208. if (ct.next) {
  209. t = ct.next;
  210. }
  211. else {
  212. t = new Token;
  213. scan(t);
  214. t.next = null;
  215. ct.next = t;
  216. }
  217. return(t);
  218. }
  219. // Turn next token in buffer into a token.
  220. void scan(Token* t) {
  221. // debug writefln("scan token");
  222. uint lastLine = loc.linnum;
  223. uint linnum;
  224. t.blockComment = null;
  225. t.lineComment = null;
  226. while (true) {
  227. t.ptr = p;
  228. // debug writefln( " p = %d, *p = ", cast(uint)p, cast(char)*p );
  229. switch (*p) {
  230. case 0:
  231. case 0x1a:
  232. t.value = TOK.TOKeof; // end of file
  233. // debug writefln( " EOF" );
  234. return;
  235. case ' ':
  236. case '\t':
  237. case '\v':
  238. case '\f':
  239. p++;
  240. // debug writefln( " whitespace" );
  241. continue; // skip white space
  242. case '\r':
  243. // debug writefln( " cr" );
  244. p++;
  245. if (*p != '\n') { // if CR stands by itself
  246. loc.linnum++;
  247. }
  248. continue; // skip white space
  249. case '\n':
  250. // debug writefln( " nl" );
  251. p++;
  252. loc.linnum++;
  253. continue; // skip white space
  254. case '0':
  255. case '1':
  256. case '2':
  257. case '3':
  258. case '4':
  259. case '5':
  260. case '6':
  261. case '7':
  262. case '8':
  263. case '9':
  264. t.value = number(t);
  265. return;
  266. /*
  267. * #if CSTRINGS
  268. * case '\'':
  269. * t.value = charConstant(t, 0);
  270. * return;
  271. *
  272. * case '"':
  273. * t.value = stringConstant(t,0);
  274. * return;
  275. *
  276. * case 'l':
  277. * case 'L':
  278. * if( p[1] == '\'')
  279. * {
  280. * p++;
  281. * t.value = charConstant(t, 1);
  282. * return;
  283. * }
  284. * else if( p[1] == '"')
  285. * {
  286. * p++;
  287. * t.value = stringConstant(t, 1);
  288. * return;
  289. * }
  290. * #else
  291. */
  292. case '\'':
  293. // debug writefln( " char" );
  294. t.value = charConstant(t, 0);
  295. return;
  296. case 'r':
  297. // debug writefln( " wysiwyg" );
  298. if (p[1] != '"') {
  299. goto case_ident;
  300. }
  301. p++;
  302. case '`':
  303. t.value = wysiwygStringConstant(t, *p);
  304. return;
  305. case 'x':
  306. // debug writefln( " hex string" );
  307. if (p[1] != '"') {
  308. goto case_ident;
  309. }
  310. p++;
  311. t.value = hexStringConstant(t);
  312. return;
  313. case '"':
  314. // debug writefln( " string" );
  315. t.value = escapeStringConstant(t, 0);
  316. // debug writefln( t.ustring );
  317. return;
  318. case '\\': // escaped string literal
  319. // debug writefln( " escaped string literal" );
  320. uint c;
  321. stringbuffer.offset = 0;
  322. do {
  323. p++;
  324. c = escapeSequence();
  325. stringbuffer.write(c);
  326. } while (*p == '\\');
  327. // t.len = stringbuffer.offset;
  328. // stringbuffer.write(cast(byte)0);
  329. t.ustring = stringbuffer.toString;
  330. // memcpy( t.ustring.ptr, stringbuffer.data, stringbuffer.offset );
  331. t.postfix = 0;
  332. t.value = TOK.TOKstring;
  333. return;
  334. case 'l':
  335. case 'L':
  336. // #endif
  337. case 'a':
  338. case 'b':
  339. case 'c':
  340. case 'd':
  341. case 'e':
  342. case 'f':
  343. case 'g':
  344. case 'h':
  345. case 'i':
  346. case 'j':
  347. case 'k':
  348. case 'm':
  349. case 'n':
  350. case 'o':
  351. case 'p':
  352. case 'q': /*case 'r':*/
  353. case 's':
  354. case 't':
  355. case 'u':
  356. case 'v':
  357. case 'w': /*case 'x':*/
  358. case 'y':
  359. case 'z':
  360. case 'A':
  361. case 'B':
  362. case 'C':
  363. case 'D':
  364. case 'E':
  365. case 'F':
  366. case 'G':
  367. case 'H':
  368. case 'I':
  369. case 'J':
  370. case 'K':
  371. case 'M':
  372. case 'N':
  373. case 'O':
  374. case 'P':
  375. case 'Q':
  376. case 'R':
  377. case 'S':
  378. case 'T':
  379. case 'U':
  380. case 'V':
  381. case 'W':
  382. case 'X':
  383. case 'Y':
  384. case 'Z':
  385. case '_':
  386. case_ident:
  387. {
  388. // debug writefln( " identifier" );
  389. ubyte c;
  390. do {
  391. c = *++p;
  392. } while (isidchar(c) || (c & 0x80 && isUniAlpha(decodeUTF())));
  393. // sv = stringtable.update((char *)t.ptr, p - t.ptr);
  394. char[] tmp;
  395. tmp.length = p - t.ptr;
  396. memcpy(tmp.ptr, t.ptr, p - t.ptr);
  397. Identifier id;
  398. Identifier * pid = tmp in stringtable;
  399. if (pid) {
  400. id = *pid;
  401. }
  402. if (id is null) {
  403. id = new Identifier(tmp, TOK.TOKidentifier);
  404. stringtable[tmp] = id;
  405. }
  406. t.ident = id;
  407. t.value = cast(TOK)id.value;
  408. anyToken = 1;
  409. // if special identifier token
  410. if (*t.ptr == '_') {
  411. static char date[11 + 1];
  412. static char time[8 + 1];
  413. static char timestamp[24 + 1];
  414. if (!date[0]) { // lazy evaluation
  415. //!!
  416. /+
  417. * time_t t;
  418. * char *p;
  419. * .time(&t);
  420. * p = ctime(&t);
  421. * assert(p);
  422. * sprintf(date.ptr, "%.6s %.4s", p + 4, p + 20);
  423. * sprintf(time.ptr, "%.8s", p + 11);
  424. * sprintf(timestamp.ptr, "%.24s", p);
  425. +/
  426. }
  427. if (mod && id is Id.FILE) {
  428. t.value = TOK.TOKstring;
  429. if (loc.filename.length) {
  430. t.ustring = loc.filename;
  431. }
  432. else {
  433. t.ustring = mod.ident.toChars();
  434. }
  435. goto Llen;
  436. }
  437. else if (mod && id == Id.LINE) {
  438. t.value = TOK.TOKint64v;
  439. t.uns64value = loc.linnum;
  440. }
  441. else if (id == Id.DATE) {
  442. t.value = TOK.TOKstring;
  443. //! t.ustring = date;
  444. goto Llen;
  445. }
  446. else if (id == Id.TIME) {
  447. t.value = TOK.TOKstring;
  448. //! t.ustring = time;
  449. goto Llen;
  450. }
  451. else if (id == Id.TIMESTAMP) {
  452. t.value = TOK.TOKstring;
  453. //! t.ustring = timestamp;
  454. Llen:
  455. t.postfix = 0;
  456. // t.len = strlen((char *)t.ustring);
  457. }
  458. }
  459. //printf("t.value = %d\n",t.value);
  460. return;
  461. }
  462. // comments
  463. case '/':
  464. p++;
  465. switch (*p) {
  466. case '=':
  467. p++;
  468. t.value = TOK.TOKdivass;
  469. return;
  470. case '*': // '/*'
  471. p++;
  472. linnum = loc.linnum;
  473. while (true) {
  474. while (true) {
  475. ubyte c = *p;
  476. switch (c) {
  477. case '/':
  478. break;
  479. case '\n':
  480. loc.linnum++;
  481. p++;
  482. continue;
  483. case '\r':
  484. p++;
  485. if (*p != '\n') {
  486. loc.linnum++;
  487. }
  488. continue;
  489. case 0:
  490. case 0x1A:
  491. error("unterminated /* */ comment");
  492. p = end;
  493. t.value = TOK.TOKeof;
  494. return;
  495. default:
  496. if (c & 0x80) {
  497. uint u = decodeUTF();
  498. if (u == PS || u == LS) {
  499. loc.linnum++;
  500. }
  501. }
  502. p++;
  503. continue;
  504. }
  505. break;
  506. }
  507. p++;
  508. if (p[-2] == '*' && p - 3 != t.ptr) {
  509. break;
  510. }
  511. }
  512. if (commentToken) {
  513. t.value = TOK.TOKcomment;
  514. return;
  515. }
  516. // if /** but not /**/
  517. else if (doDocComment && t.ptr[2] == '*' && p - 4 != t.ptr) {
  518. getDocComment(t, lastLine == linnum); //! ?
  519. }
  520. continue;
  521. case '/': // do // style comments
  522. linnum = loc.linnum;
  523. while (1) {
  524. ubyte c = *++p;
  525. switch (c) {
  526. case '\n':
  527. break;
  528. case '\r':
  529. if (p[1] == '\n') {
  530. p++;
  531. }
  532. break;
  533. case 0:
  534. case 0x1a:
  535. if (commentToken) {
  536. p = end;
  537. t.value = TOK.TOKcomment;
  538. return;
  539. }
  540. if (doDocComment && t.ptr[2] == '/') {
  541. getDocComment(t, lastLine == linnum);
  542. }
  543. p = end;
  544. t.value = TOK.TOKeof;
  545. return;
  546. default:
  547. if (c & 0x80) {
  548. uint u = decodeUTF();
  549. if (u == PS || u == LS) {
  550. break;
  551. }
  552. }
  553. continue;
  554. }
  555. break;
  556. }
  557. if (commentToken) {
  558. p++;
  559. loc.linnum++;
  560. t.value = TOK.TOKcomment;
  561. return;
  562. }
  563. if (doDocComment && t.ptr[2] == '/') {
  564. getDocComment(t, lastLine == linnum);
  565. }
  566. p++;
  567. loc.linnum++;
  568. continue;
  569. case '+':
  570. {
  571. int nest;
  572. linnum = loc.linnum;
  573. p++;
  574. nest = 1;
  575. while (1) {
  576. ubyte c = *p;
  577. switch (c) {
  578. case '/':
  579. p++;
  580. if (*p == '+') {
  581. p++;
  582. nest++;
  583. }
  584. continue;
  585. case '+':
  586. p++;
  587. if (*p == '/') {
  588. p++;
  589. if (--nest == 0) {
  590. break;
  591. }
  592. }
  593. continue;
  594. case '\r':
  595. p++;
  596. if (*p != '\n') {
  597. loc.linnum++;
  598. }
  599. continue;
  600. case '\n':
  601. loc.linnum++;
  602. p++;
  603. continue;
  604. case 0:
  605. case 0x1A:
  606. error("unterminated /+ +/ comment");
  607. p = end;
  608. t.value = TOK.TOKeof;
  609. return;
  610. default:
  611. if (c & 0x80) {
  612. uint u = decodeUTF();
  613. if (u == PS || u == LS) {
  614. loc.linnum++;
  615. }
  616. }
  617. p++;
  618. continue;
  619. }
  620. break;
  621. }
  622. if (commentToken) {
  623. t.value = TOK.TOKcomment;
  624. return;
  625. }
  626. if (doDocComment && t.ptr[2] == '+' && p - 4 != t.ptr) {
  627. // if /++ but not /++/
  628. getDocComment(t, lastLine == linnum);
  629. }
  630. continue;
  631. }
  632. default:
  633. break;
  634. }
  635. t.value = TOK.TOKdiv;
  636. return;
  637. case '.':
  638. p++;
  639. if (isdigit(*p)) {
  640. p--;
  641. t.value = inreal(t);
  642. }
  643. else if (p[0] == '.') {
  644. if (p[1] == '.') {
  645. p += 2;
  646. t.value = TOK.TOKdotdotdot;
  647. }
  648. else {
  649. p++;
  650. t.value = TOK.TOKslice;
  651. }
  652. }
  653. else {
  654. t.value = TOK.TOKdot;
  655. }
  656. return;
  657. case '&':
  658. p++;
  659. if (*p == '=') {
  660. p++;
  661. t.value = TOK.TOKandass;
  662. }
  663. else if (*p == '&') {
  664. p++;
  665. t.value = TOK.TOKandand;
  666. }
  667. else {
  668. t.value = TOK.TOKand;
  669. }
  670. return;
  671. // |, ||, |=
  672. case '|':
  673. p++;
  674. if (*p == '=') {
  675. p++;
  676. t.value = TOK.TOKorass;
  677. }
  678. else if (*p == '|') {
  679. p++;
  680. t.value = TOK.TOKoror;
  681. }
  682. else {
  683. t.value = TOK.TOKor;
  684. }
  685. return;
  686. case '-':
  687. p++;
  688. if (*p == '=') {
  689. p++;
  690. t.value = TOK.TOKminass;
  691. }
  692. else if (*p == '-') {
  693. p++;
  694. t.value = TOK.TOKminusminus;
  695. }
  696. else {
  697. t.value = TOK.TOKmin;
  698. }
  699. return;
  700. // +, +=, ++
  701. case '+':
  702. p++;
  703. if (*p == '=') {
  704. p++;
  705. t.value = TOK.TOKaddass; // +=
  706. }
  707. else if (*p == '+') {
  708. p++;
  709. t.value = TOK.TOKplusplus; // ++
  710. }
  711. else {
  712. t.value = TOK.TOKadd; // +
  713. }
  714. return;
  715. // <, <=, <<=, <<, <>=, <>
  716. case '<':
  717. p++;
  718. if (*p == '=') {
  719. p++;
  720. t.value = TOK.TOKle; // <=
  721. }
  722. else if (*p == '<') {
  723. p++;
  724. if (*p == '=') {
  725. p++;
  726. t.value = TOK.TOKshlass; // <<=
  727. }
  728. else {
  729. t.value = TOK.TOKshl; // <<
  730. }
  731. }
  732. else if (*p == '>') {
  733. p++;
  734. if (*p == '=') {
  735. p++;
  736. t.value = TOK.TOKleg; // <>=
  737. }
  738. else {
  739. t.value = TOK.TOKlg; // <>
  740. }
  741. }
  742. else {
  743. t.value = TOK.TOKlt; // <
  744. }
  745. return;
  746. // >, >>, >>>, >=, >>=, >>>=
  747. case '>':
  748. p++;
  749. if (*p == '=') {
  750. p++;
  751. t.value = TOK.TOKge; // >=
  752. }
  753. else if (*p == '>') {
  754. p++;
  755. if (*p == '=') {
  756. p++;
  757. t.value = TOK.TOKshrass; // >>=
  758. }
  759. else if (*p == '>') {
  760. p++;
  761. if (*p == '=') {
  762. p++;
  763. t.value = TOK.TOKushrass; // >>>=
  764. }
  765. else {
  766. t.value = TOK.TOKushr; // >>>
  767. }
  768. }
  769. else {
  770. t.value = TOK.TOKshr; // >>
  771. }
  772. }
  773. else {
  774. t.value = TOK.TOKgt; // >
  775. }
  776. return;
  777. case '!':
  778. p++;
  779. if (*p == '=') {
  780. p++;
  781. if (*p == '=') {
  782. p++;
  783. t.value = TOK.TOKnotidentity; // !==
  784. }
  785. else {
  786. t.value = TOK.TOKnotequal; // !=
  787. }
  788. }
  789. else if (*p == '<') {
  790. p++;
  791. if (*p == '>') {
  792. p++;
  793. if (*p == '=') {
  794. p++;
  795. t.value = TOK.TOKunord; // !<>=
  796. }
  797. else {
  798. t.value = TOK.TOKue; // !<>
  799. }
  800. }
  801. else if (*p == '=') {
  802. p++;
  803. t.value = TOK.TOKug; // !<=
  804. }
  805. else {
  806. t.value = TOK.TOKuge; // !<
  807. }
  808. }
  809. else if (*p == '>') {
  810. p++;
  811. if (*p == '=') {
  812. p++;
  813. t.value = TOK.TOKul; // !>=
  814. }
  815. else {
  816. t.value = TOK.TOKule; // !>
  817. }
  818. }
  819. else {
  820. t.value = TOK.TOKnot; // !
  821. }
  822. return;
  823. case '=':
  824. p++;
  825. if (*p == '=') {
  826. p++;
  827. if (*p == '=') {
  828. p++;
  829. t.value = TOK.TOKidentity; // ===
  830. }
  831. else {
  832. t.value = TOK.TOKequal; // ==
  833. }
  834. }
  835. else {
  836. t.value = TOK.TOKassign; // =
  837. }
  838. return;
  839. case '~':
  840. p++;
  841. if (*p == '=') {
  842. p++;
  843. t.value = TOK.TOKcatass; // ~=
  844. }
  845. else {
  846. t.value = TOK.TOKtilde; // ~
  847. }
  848. return;
  849. // SINGLE
  850. case '(': p++; t.value = TOK.TOKlparen; return;
  851. case ')': p++; t.value = TOK.TOKrparen; return;
  852. case '[': p++; t.value = TOK.TOKlbracket; return;
  853. case ']': p++; t.value = TOK.TOKrbracket; return;
  854. case '{': p++; t.value = TOK.TOKlcurly; return;
  855. case '}': p++; t.value = TOK.TOKrcurly; return;
  856. case '?': p++; t.value = TOK.TOKquestion; return;
  857. case ',': p++; t.value = TOK.TOKcomma; return;
  858. case ';': p++; t.value = TOK.TOKsemicolon; return;
  859. case ':': p++; t.value = TOK.TOKcolon; return;
  860. case '$': p++; t.value = TOK.TOKdollar; return;
  861. // DOUBLE
  862. case '*': p++; if (*p == '=') {
  863. p++; t.value = TOK.TOKmulass;
  864. }
  865. else {
  866. t.value = TOK.TOKmul;
  867. } return;
  868. case '%': p++; if (*p == '=') {
  869. p++; t.value = TOK.TOKmodass;
  870. }
  871. else {
  872. t.value = TOK.TOKmod;
  873. } return;
  874. case '^': p++; if (*p == '=') {
  875. p++; t.value = TOK.TOKxorass;
  876. }
  877. else {
  878. t.value = TOK.TOKxor;
  879. } return;
  880. // removed 148 case '~': p++; if( *p == '=' ) { p++; t.value = TOK.TOKcatass; } else t.value = TOK.TOKtilde; return;
  881. case '#':
  882. p++;
  883. Pragma();
  884. continue;
  885. default:
  886. {
  887. debug writefln(" default char");
  888. ubyte c = *p;
  889. if (c & 0x80) {
  890. uint u = decodeUTF();
  891. // Check for start of unicode identifier
  892. if (isUniAlpha(u)) {
  893. goto case_ident;
  894. }
  895. if (u == PS || u == LS) {
  896. loc.linnum++;
  897. p++;
  898. continue;
  899. }
  900. }
  901. if (isprint(c)) {
  902. error("unsupported char '%s'", cast(char)c);
  903. }
  904. else {
  905. error("unsupported char 0x%02x", cast(ubyte)c);
  906. }
  907. p++;
  908. continue;
  909. }
  910. }
  911. }
  912. }
  913. // Parse escape sequence.
  914. uint escapeSequence() {
  915. uint c;
  916. int n;
  917. int ndigits;
  918. c = *p;
  919. switch (c) {
  920. case '\'':
  921. case '"':
  922. case '?':
  923. case '\\':
  924. Lconsume:
  925. p++;
  926. break;
  927. case 'a': c = 7; goto Lconsume;
  928. case 'b': c = 8; goto Lconsume;
  929. case 'f': c = 12; goto Lconsume;
  930. case 'n': c = 10; goto Lconsume;
  931. case 'r': c = 13; goto Lconsume;
  932. case 't': c = 9; goto Lconsume;
  933. case 'v': c = 11; goto Lconsume;
  934. case 'u':
  935. ndigits = 4;
  936. goto Lhex;
  937. case 'U':
  938. ndigits = 8;
  939. goto Lhex;
  940. case 'x':
  941. ndigits = 2;
  942. Lhex:
  943. p++;
  944. c = *p;
  945. if (ishex(c)) {
  946. uint v;
  947. n = 0;
  948. v = 0;
  949. while (1) {
  950. if (isdigit(c)) {
  951. c -= '0';
  952. }
  953. else if (islower(c)) {
  954. c -= 'a' - 10;
  955. }
  956. else {
  957. c -= 'A' - 10;
  958. }
  959. v = v * 16 + c;
  960. c = *++p;
  961. if (++n == ndigits) {
  962. break;
  963. }
  964. if (!ishex(c)) {
  965. error("escape hex sequence has %d hex digits instead of %d", n, ndigits);
  966. break;
  967. }
  968. }
  969. //! if( ndigits != 2 && !utf_isValidDchar(v))
  970. //! error("invalid UTF character \\U%08x", v);
  971. c = v;
  972. }
  973. else {
  974. error("undefined escape hex sequence \\%s\n", c);
  975. }
  976. break;
  977. case '&': // named character entity
  978. for (ubyte *idstart = ++p; 1; p++) {
  979. switch (*p) {
  980. case ';':
  981. //!!!
  982. /+
  983. * c = HtmlNamedEntity(idstart, p - idstart);
  984. * if( c == ~0 )
  985. * {
  986. * error("unnamed character entity &%.*s;", p - idstart, idstart);
  987. * c = ' ';
  988. * }
  989. *
  990. * p++;
  991. +/
  992. break;
  993. default:
  994. if (isalpha(*p) || (p != idstart + 1 && isdigit(*p))) {
  995. continue;
  996. }
  997. error("unterminated named entity");
  998. break;
  999. }
  1000. break;
  1001. }
  1002. break;
  1003. case 0:
  1004. case 0x1a: // end of file
  1005. c = '\\';
  1006. break;
  1007. default:
  1008. if (isoctal(c)) {
  1009. ubyte v;
  1010. n = 0;
  1011. do {
  1012. v = v * 8 + (c - '0');
  1013. c = *++p;
  1014. } while (++n < 3 && isoctal(c));
  1015. c = v;
  1016. }
  1017. else {
  1018. error("undefined escape sequence \\%s\n", c);
  1019. }
  1020. break;
  1021. }
  1022. return(c);
  1023. }
  1024. /**************************************
  1025. */
  1026. TOK wysiwygStringConstant(Token *t, int tc) {
  1027. uint c;
  1028. Loc start = loc;
  1029. p++;
  1030. stringbuffer.offset = 0;
  1031. while (1) {
  1032. c = *p++;
  1033. switch (c) {
  1034. case '\n':
  1035. loc.linnum++;
  1036. break;
  1037. case '\r':
  1038. if (*p == '\n') {
  1039. continue; // ignore
  1040. }
  1041. c = '\n'; // treat EndOfLine as \n character
  1042. loc.linnum++;
  1043. break;
  1044. case 0:
  1045. case 0x1a:
  1046. error("unterminated string constant starting at %s", start.toChars());
  1047. t.ustring = "";
  1048. t.postfix = 0;
  1049. return(TOK.TOKstring);
  1050. case '"':
  1051. case '`':
  1052. if (c == tc) {
  1053. // t.len = stringbuffer.offset;
  1054. stringbuffer.write(cast(byte)0);
  1055. t.ustring = stringbuffer.toString;
  1056. // t.ustring = (ubyte *)mem.malloc(stringbuffer.offset);
  1057. // memcpy(t.ustring, stringbuffer.data, stringbuffer.offset);
  1058. stringPostfix(t);
  1059. return(TOK.TOKstring);
  1060. }
  1061. break;
  1062. default:
  1063. if (c & 0x80) {
  1064. p--;
  1065. uint u = decodeUTF();
  1066. p++;
  1067. if (u == PS || u == LS) {
  1068. loc.linnum++;
  1069. }
  1070. stringbuffer.write(u);
  1071. continue;
  1072. }
  1073. break;
  1074. }
  1075. stringbuffer.write(c);
  1076. }
  1077. }
  1078. /**************************************
  1079. * Lex hex strings:
  1080. * x"0A ae 34FE BD"
  1081. */
  1082. TOK hexStringConstant(Token *t) {
  1083. uint c;
  1084. Loc start = loc;
  1085. uint n = 0;
  1086. uint v;
  1087. p++;
  1088. stringbuffer.offset = 0;
  1089. while (1) {
  1090. c = *p++;
  1091. switch (c) {
  1092. case ' ':
  1093. case '\t':
  1094. case '\v':
  1095. case '\f':
  1096. continue; // skip white space
  1097. case '\r':
  1098. if (*p == '\n') {
  1099. continue; // ignore
  1100. }
  1101. // Treat isolated '\r' as if it were a '\n'
  1102. case '\n':
  1103. loc.linnum++;
  1104. continue;
  1105. case 0:
  1106. case 0x1a:
  1107. error("unterminated string constant starting at %s", start.toChars());
  1108. t.ustring = "";
  1109. t.postfix = 0;
  1110. return(TOK.TOKstring);
  1111. case '"':
  1112. if (n & 1) {
  1113. error("odd number (%d) of hex characters in hex string", n);
  1114. stringbuffer.write(v);
  1115. }
  1116. // t.len = stringbuffer.offset;
  1117. // stringbuffer.write(cast(byte)0);
  1118. t.ustring = stringbuffer.toString;
  1119. // t.ustring = (ubyte *)mem.malloc(stringbuffer.offset);
  1120. // memcpy(t.ustring, stringbuffer.data, stringbuffer.offset);
  1121. stringPostfix(t);
  1122. return(TOK.TOKstring);
  1123. default:
  1124. if (c >= '0' && c <= '9') {
  1125. c -= '0';
  1126. }
  1127. else if (c >= 'a' && c <= 'f') {
  1128. c -= 'a' - 10;
  1129. }
  1130. else if (c >= 'A' && c <= 'F') {
  1131. c -= 'A' - 10;
  1132. }
  1133. else if (c & 0x80) {
  1134. p--;
  1135. uint u = decodeUTF();
  1136. p++;
  1137. if (u == PS || u == LS) {
  1138. loc.linnum++;
  1139. }
  1140. else {
  1141. error("non-hex character \\u%x", u);
  1142. }
  1143. }
  1144. else {
  1145. error("non-hex character '%s'", c);
  1146. }
  1147. if (n & 1) {
  1148. v = (v << 4) | c;
  1149. stringbuffer.write(v);
  1150. }
  1151. else {
  1152. v = c;
  1153. }
  1154. n++;
  1155. break;
  1156. }
  1157. }
  1158. }
  1159. /**************************************
  1160. */
  1161. TOK escapeStringConstant(Token *t, int wide) {
  1162. uint c;
  1163. Loc start = loc;
  1164. p++;
  1165. stringbuffer.offset = 0;
  1166. // debug writefln( "escape string constant: %s", std.string.toString( cast(char*)p ) );
  1167. while (1) {
  1168. c = *p++;
  1169. switch (c) {
  1170. case '\\':
  1171. switch (*p) {
  1172. case 'u':
  1173. case 'U':
  1174. case '&':
  1175. c = escapeSequence();
  1176. stringbuffer.write(c);
  1177. continue;
  1178. default:
  1179. c = escapeSequence();
  1180. break;
  1181. }
  1182. break;
  1183. case '\n':
  1184. loc.linnum++;
  1185. break;
  1186. case '\r':
  1187. if (*p == '\n') {
  1188. continue; // ignore
  1189. }
  1190. c = '\n'; // treat EndOfLine as \n character
  1191. loc.linnum++;
  1192. break;
  1193. case '"':
  1194. // writefln( "end of string: ", stringbuffer.toString );
  1195. t.ustring = stringbuffer.toString().dup;
  1196. // t.len = stringbuffer.offset;
  1197. // stringbuffer.write(cast(byte)0);
  1198. // t.ustring = (ubyte *)mem.malloc(stringbuffer.offset);
  1199. // memcpy(t.ustring, stringbuffer.data, stringbuffer.offset);
  1200. stringPostfix(t);
  1201. return(TOK.TOKstring);
  1202. case 0:
  1203. case 0x1a:
  1204. p--;
  1205. error("unterminated string constant starting at %s", start.toChars());
  1206. t.ustring = "";
  1207. // t.len = 0;
  1208. t.postfix = 0;
  1209. return(TOK.TOKstring);
  1210. default:
  1211. if (c & 0x80) {
  1212. p--;
  1213. c = decodeUTF();
  1214. if (c == LS || c == PS) {
  1215. c = '\n';
  1216. loc.linnum++;
  1217. }
  1218. p++;
  1219. stringbuffer.write(cast(char)c);
  1220. continue;
  1221. }
  1222. break;
  1223. }
  1224. stringbuffer.write(cast(char)c);
  1225. // writefln( stringbuffer.toString );
  1226. }
  1227. }
  1228. //**************************************
  1229. TOK charConstant(Token *t, int wide) {
  1230. uint c;
  1231. TOK tk = TOK.TOKcharv;
  1232. //printf("Lexer.charConstant\n");
  1233. p++;
  1234. c = *p++;
  1235. switch (c) {
  1236. case '\\':
  1237. switch (*p) {
  1238. case 'u':
  1239. t.uns64value = escapeSequence();
  1240. tk = TOK.TOKwcharv;
  1241. break;
  1242. case 'U':
  1243. case '&':
  1244. t.uns64value = escapeSequence();
  1245. tk = TOK.TOKdcharv;
  1246. break;
  1247. default:
  1248. t.uns64value = escapeSequence();
  1249. break;
  1250. }
  1251. break;
  1252. case '\n':
  1253. L1:
  1254. loc.linnum++;
  1255. case '\r':
  1256. case 0:
  1257. case 0x1a:
  1258. case '\'':
  1259. error("unterminated character constant");
  1260. return(tk);
  1261. default:
  1262. if (c & 0x80) {
  1263. p--;
  1264. c = decodeUTF();
  1265. p++;
  1266. if (c == LS || c == PS) {
  1267. goto L1;
  1268. }
  1269. if (c < 0xd800 || (c >= 0xe000 && c < 0xfffe)) {
  1270. tk = TOK.TOKwcharv;
  1271. }
  1272. else {
  1273. tk = TOK.TOKdcharv;
  1274. }
  1275. }
  1276. t.uns64value = c;
  1277. break;
  1278. }
  1279. if (*p != '\'') {
  1280. error("unterminated character constant");
  1281. return(tk);
  1282. }
  1283. p++;
  1284. return(tk);
  1285. }
  1286. // Get postfix of string literal.
  1287. void stringPostfix(Token *t) {
  1288. switch (*p) {
  1289. case 'c':
  1290. case 'w':
  1291. case 'd':
  1292. t.postfix = *p;
  1293. p++;
  1294. break;
  1295. default:
  1296. t.postfix = 0;
  1297. break;
  1298. }
  1299. }
  1300. /***************************************
  1301. * Read \u or \U unicode sequence
  1302. * Input:
  1303. * u 'u' or 'U'
  1304. */
  1305. /*
  1306. * uint Wchar(uint u)
  1307. * {
  1308. * uint value;
  1309. * uint n;
  1310. * ubyte c;
  1311. * uint nchars;
  1312. *
  1313. * nchars = (u == 'U') ? 8 : 4;
  1314. * value = 0;
  1315. * for (n = 0; 1; n++)
  1316. * {
  1317. * ++p;
  1318. * if( n == nchars)
  1319. * break;
  1320. * c = *p;
  1321. * if( !ishex(c))
  1322. * {
  1323. * error("\\%s sequence must be followed by %d hex characters", u, nchars);
  1324. * break;
  1325. * }
  1326. * if( isdigit(c))
  1327. * c -= '0';
  1328. * else if( islower(c))
  1329. * c -= 'a' - 10;
  1330. * else
  1331. * c -= 'A' - 10;
  1332. * value <<= 4;
  1333. * value |= c;
  1334. * }
  1335. * return value;
  1336. * }
  1337. */
  1338. /**************************************
  1339. * Read in a number.
  1340. * If it's an integer, store it in tok.TKutok.Vlong.
  1341. * integers can be decimal, octal or hex
  1342. * Handle the suffixes U, UL, LU, L, etc.
  1343. * If it's double, store it in tok.TKutok.Vdouble.
  1344. * Returns:
  1345. * TKnum
  1346. * TKdouble,...
  1347. */
  1348. TOK number(Token *t) {
  1349. //debug writefln("Lexer.number()");
  1350. // We use a state machine to collect numbers
  1351. enum STATE {
  1352. STATE_initial,
  1353. STATE_0,
  1354. STATE_decimal,
  1355. STATE_octal,
  1356. STATE_octale,
  1357. STATE_hex,
  1358. STATE_binary,
  1359. STATE_hex0,
  1360. STATE_binary0,
  1361. STATE_hexh,
  1362. STATE_error
  1363. }
  1364. enum FLAGS {
  1365. FLAGS_decimal = 1, // decimal
  1366. FLAGS_unsigned = 2, // u or U suffix
  1367. FLAGS_long = 4, // l or L suffix
  1368. }
  1369. FLAGS flags = FLAGS.FLAGS_decimal;
  1370. int i;
  1371. TOK result;
  1372. int base;
  1373. stringbuffer.offset = 0;
  1374. // stringbuffer.data = null;
  1375. STATE state = STATE.STATE_initial;
  1376. ubyte * start = p;
  1377. TOK _isreal() {
  1378. p = start;
  1379. return(inreal(t));
  1380. }
  1381. while (true) {
  1382. char c = cast(char)*p;
  1383. switch (state) {
  1384. case STATE.STATE_initial: // opening state
  1385. if (c == '0') {
  1386. state = STATE.STATE_0;
  1387. }
  1388. else {
  1389. state = STATE.STATE_decimal;
  1390. }
  1391. break;
  1392. case STATE.STATE_0:
  1393. flags = cast(FLAGS)(flags & ~FLAGS.FLAGS_decimal);
  1394. switch (c) {
  1395. // #if ZEROH
  1396. // case 'H': // 0h
  1397. // case 'h':
  1398. // goto hexh;
  1399. // #endif
  1400. case 'X':
  1401. case 'x':
  1402. state = STATE.STATE_hex0;
  1403. break;
  1404. case '.':
  1405. if (p[1] == '.') { // .. is a separate token
  1406. goto done;
  1407. }
  1408. case 'i':
  1409. case 'f':
  1410. case 'F':
  1411. goto _Real;
  1412. // #if ZEROH
  1413. // case 'E':
  1414. // case 'e':
  1415. // goto case_hex;
  1416. // #endif
  1417. case 'B':
  1418. case 'b':
  1419. state = STATE.STATE_binary0;
  1420. break;
  1421. case '0':
  1422. case '1':
  1423. case '2':
  1424. case '3':
  1425. case '4':
  1426. case '5':
  1427. case '6':
  1428. case '7':
  1429. state = STATE.STATE_octal;
  1430. break;
  1431. // #if ZEROH
  1432. // case '8': case '9': case 'A':
  1433. // case 'C': case 'D': case 'F':
  1434. // case 'a': case 'c': case 'd': case 'f':
  1435. // case_hex:
  1436. // state = STATE.STATE_hexh;
  1437. // break;
  1438. // #endif
  1439. case '_':
  1440. state = STATE.STATE_octal;
  1441. p++;
  1442. continue;
  1443. default:
  1444. goto done;
  1445. }
  1446. break;
  1447. case STATE.STATE_decimal: // reading decimal number
  1448. // if its not a digit - decimal complete or not a decimal
  1449. if (!isdigit(c)) {
  1450. // debug writefln( "\tnon-digit( %s )", c );
  1451. // #if ZEROH
  1452. // if( ishex(c) || c == 'H' || c == 'h' )
  1453. // goto hexh;
  1454. // #endif
  1455. //! wtf ?
  1456. // ignore embedded _
  1457. if (c == '_') {
  1458. p++;
  1459. continue;
  1460. }
  1461. // check decimal point - make real
  1462. if (c == '.' && p[1] != '.') {
  1463. goto _Real;
  1464. }
  1465. // check for mantra - make real
  1466. if (c == 'i' || c == 'f' || c == 'F' || c == 'e' || c == 'E') {
  1467. _Real: // It's a real number. Back up and rescan as a real
  1468. p = start;
  1469. return(inreal(t));
  1470. }
  1471. goto done;
  1472. }
  1473. break;
  1474. case STATE.STATE_hex0: // reading hex number
  1475. case STATE.STATE_hex:
  1476. if (!ishex(c)) {
  1477. if (c == '_') { // ignore embedded _
  1478. p++;
  1479. continue;
  1480. }
  1481. if (c == '.' && p[1] != '.') {
  1482. goto _Real;
  1483. }
  1484. if (c == 'P' || c == 'p' || c == 'i') {
  1485. goto _Real;
  1486. }
  1487. if (state == STATE.STATE_hex0) {
  1488. error("Hex digit expected, not '%s'", c);
  1489. }
  1490. goto done;
  1491. }
  1492. state = STATE.STATE_hex;
  1493. break;
  1494. // #if ZEROH
  1495. // hexh:
  1496. // state = STATE.STATE_hexh;
  1497. //
  1498. // case STATE.STATE_hexh: // parse numbers like 0FFh
  1499. // if( !ishex(c))
  1500. // {
  1501. // if( c == 'H' || c == 'h')
  1502. // {
  1503. // p++;
  1504. // base = 16;
  1505. // goto done;
  1506. // }
  1507. // else
  1508. // {
  1509. // // Check for something like 1E3 or 0E24
  1510. // if( memchr(stringbuffer.data.ptr, 'E', stringbuffer.offset) || memchr( stringbuffer.data.ptr, 'e', stringbuffer.offset))
  1511. // goto _Real;
  1512. // error("Hex digit expected, not '%s'", c);
  1513. // goto done;
  1514. // }
  1515. // }
  1516. // break;
  1517. // #endif
  1518. case STATE.STATE_octal: // reading octal number
  1519. case STATE.STATE_octale: // reading octal number with non-octal digits
  1520. if (!isoctal(c)) {
  1521. // #if ZEROH
  1522. // if( ishex(c) || c == 'H' || c == 'h' )
  1523. // goto hexh;
  1524. // #endif
  1525. if (c == '_') { // ignore embedded _
  1526. p++;
  1527. continue;
  1528. }
  1529. if (c == '.' && p[1] != '.') {
  1530. goto _Real;
  1531. }
  1532. if (c == 'i') {
  1533. goto _Real;
  1534. }
  1535. if (isdigit(c)) {
  1536. state = STATE.STATE_octale;
  1537. }
  1538. else {
  1539. goto done;
  1540. }
  1541. }
  1542. break;
  1543. case STATE.STATE_binary0: // starting binary number
  1544. case STATE.STATE_binary: // reading binary number
  1545. if (c != '0' && c != '1') {
  1546. // #if ZEROH
  1547. // if( ishex(c) || c == 'H' || c == 'h' )
  1548. // goto hexh;
  1549. // #endif
  1550. if (c == '_') { // ignore embedded _
  1551. p++;
  1552. continue;
  1553. }
  1554. if (state == STATE.STATE_binary0) {
  1555. error("binary digit expected");
  1556. state = STATE.STATE_error;
  1557. break;
  1558. }
  1559. else {
  1560. goto done;
  1561. }
  1562. }
  1563. state = STATE.STATE_binary;
  1564. break;
  1565. case STATE.STATE_error: // for error recovery
  1566. if (!isdigit(c)) { // scan until non-digit
  1567. goto done;
  1568. }
  1569. break;
  1570. default:
  1571. assert(0);
  1572. }
  1573. stringbuffer.write(cast(char)c);
  1574. p++;
  1575. }
  1576. done:
  1577. stringbuffer.write(cast(char)0); // terminate string
  1578. // debug writefln( "\tdigit complete( %s )", stringbuffer.toString );
  1579. if (state == STATE.STATE_octale) {
  1580. error("Octal digit expected");
  1581. }
  1582. uinteger_t n; // unsigned >=64 bit integer type
  1583. if (stringbuffer.offset == 2 && (state == STATE.STATE_decimal || state == STATE.STATE_0)) {
  1584. n = stringbuffer.data[0] - '0';
  1585. }
  1586. else {
  1587. // Convert string to integer
  1588. char* p = cast(char*)stringbuffer.data.ptr;
  1589. int r = 10;
  1590. int d;
  1591. if (*p == '0') {
  1592. if (p[1] == 'x' || p[1] == 'X') {
  1593. // "0x#"
  1594. p += 2;
  1595. r = 16;
  1596. }
  1597. else if (p[1] == 'b' || p[1] == 'B') {
  1598. // "0b#" - binary
  1599. p += 2;
  1600. r = 2;
  1601. }
  1602. else if (isdigit(p[1])) {
  1603. p += 1;
  1604. r = 8;
  1605. }
  1606. }
  1607. n = 0;
  1608. while (true) {
  1609. if (*p >= '0' && *p <= '9') {
  1610. d = *p - '0';
  1611. }
  1612. else if (*p >= 'a' && *p <= 'z') {
  1613. d = *p - 'a' + 10;
  1614. }
  1615. else if (*p >= 'A' && *p <= 'Z') {
  1616. d = *p - 'A' + 10;
  1617. }
  1618. else {
  1619. break;
  1620. }
  1621. if (d >= r) {
  1622. break;
  1623. }
  1624. if (n * r + d < n) {
  1625. error("integer overflow");
  1626. break;
  1627. }
  1628. n = n * r + d;
  1629. p++;
  1630. }
  1631. // if n needs more than 64 bits
  1632. if (n.sizeof > 8 && n > 0xffffffffffffffffL) {
  1633. error("integer overflow");
  1634. }
  1635. }
  1636. // Parse trailing 'u', 'U', 'l' or 'L' in any combination
  1637. while (true) {
  1638. ubyte f;
  1639. switch (*p) {
  1640. case 'U':
  1641. case 'u':
  1642. f = FLAGS.FLAGS_unsigned;
  1643. goto L1;
  1644. case 'L':
  1645. case 'l':
  1646. f = FLAGS.FLAGS_long;
  1647. L1:
  1648. p++;
  1649. if (flags & f) {
  1650. error("unrecognized token");
  1651. }
  1652. flags = cast(FLAGS)(flags | f);
  1653. continue;
  1654. default:
  1655. break;
  1656. }
  1657. break;
  1658. }
  1659. switch (flags) {
  1660. case 0:
  1661. /* Octal or Hexadecimal constant.
  1662. * First that fits: int, uint, long, ulong
  1663. */
  1664. if (n & 0x8000000000000000L) {
  1665. result = TOK.TOKuns64v;
  1666. }
  1667. else if (n & 0xffffffff00000000L) {
  1668. result = TOK.TOKint64v;
  1669. }
  1670. else if (n & 0x80000000) {
  1671. result = TOK.TOKuns32v;
  1672. }
  1673. else {
  1674. result = TOK.TOKint32v;
  1675. }
  1676. break;
  1677. case FLAGS.FLAGS_decimal:
  1678. /* First that fits: int, long, long long
  1679. */
  1680. if (n & 0x8000000000000000L) {
  1681. error("signed integer overflow");
  1682. result = TOK.TOKuns64v;
  1683. }
  1684. else if (n & 0xffffffff80000000L) {
  1685. result = TOK.TOKint64v;
  1686. }
  1687. else {
  1688. result = TOK.TOKint32v;
  1689. }
  1690. break;
  1691. case FLAGS.FLAGS_unsigned:
  1692. case FLAGS.FLAGS_decimal | FLAGS.FLAGS_unsigned:
  1693. /* First that fits: uint, ulong
  1694. */
  1695. if (n & 0xffffffff00000000L) {
  1696. result = TOK.TOKuns64v;
  1697. }
  1698. else {
  1699. result = TOK.TOKuns32v;
  1700. }
  1701. break;
  1702. case FLAGS.FLAGS_decimal | FLAGS.FLAGS_long:
  1703. if (n & 0x8000000000000000L) {
  1704. error("signed integer overflow");
  1705. result = TOK.TOKuns64v;
  1706. }
  1707. else {
  1708. result = TOK.TOKint64v;
  1709. }
  1710. break;
  1711. case FLAGS.FLAGS_long:
  1712. if (n & 0x8000000000000000L) {
  1713. result = TOK.TOKuns64v;
  1714. }
  1715. else {
  1716. result = TOK.TOKint64v;
  1717. }
  1718. break;
  1719. case FLAGS.FLAGS_unsigned | FLAGS.FLAGS_long:
  1720. case FLAGS.FLAGS_decimal | FLAGS.FLAGS_unsigned | FLAGS.FLAGS_long:
  1721. result = TOK.TOKuns64v;
  1722. break;
  1723. default:
  1724. debug writefln("%x", flags);
  1725. assert(0);
  1726. }
  1727. t.uns64value = n;
  1728. return(result);
  1729. }
  1730. /**************************************
  1731. * Read in characters, converting them to real.
  1732. * Bugs:
  1733. * Exponent overflow not detected.
  1734. * Too much requested precision is not detected.
  1735. */
  1736. TOK inreal(Token *t) {
  1737. int dblstate;
  1738. uint c;
  1739. char hex; // is this a hexadecimal-floating-constant?
  1740. TOK result;
  1741. //printf("Lexer.inreal()\n");
  1742. stringbuffer.offset = 0;
  1743. dblstate = 0;
  1744. hex = 0;
  1745. Lnext:
  1746. while (1) {
  1747. // Get next char from input
  1748. c = *p++;
  1749. //printf("dblstate = %d, c = '%s'\n", dblstate, c);
  1750. while (1) {
  1751. switch (dblstate) {
  1752. case 0: // opening state
  1753. if (c == '0') {
  1754. dblstate = 9;
  1755. }
  1756. else if (c == '.') {
  1757. dblstate = 3;
  1758. }
  1759. else {
  1760. dblstate = 1;
  1761. }
  1762. break;
  1763. case 9:
  1764. dblstate = 1;
  1765. if (c == 'X' || c == 'x') {
  1766. hex++;
  1767. break;
  1768. }
  1769. case 1: // digits to left of .
  1770. case 3: // digits to right of .
  1771. case 7: // continuing exponent digits
  1772. if (!isdigit(c) && !(hex && isxdigit(c))) {
  1773. if (c == '_') {
  1774. goto Lnext; // ignore embedded '_'
  1775. }
  1776. dblstate++;
  1777. continue;
  1778. }
  1779. break;
  1780. case 2: // no more digits to left of .
  1781. if (c == '.') {
  1782. dblstate++;
  1783. break;
  1784. }
  1785. case 4: // no more digits to right of .
  1786. if ((c == 'E' || c == 'e') || hex && (c == 'P' || c == 'p')) {
  1787. dblstate = 5;
  1788. hex = 0; // exponent is always decimal
  1789. break;
  1790. }
  1791. if (hex) {
  1792. error("binary-exponent-part required");
  1793. }
  1794. goto done;
  1795. case 5: // looking immediately to right of E
  1796. dblstate++;
  1797. if (c == '-' || c == '+') {
  1798. break;
  1799. }
  1800. case 6: // 1st exponent digit expected
  1801. if (!isdigit(c)) {
  1802. error("exponent expected");
  1803. }
  1804. dblstate++;
  1805. break;
  1806. case 8: // past end of exponent digits
  1807. goto done;
  1808. }
  1809. break;
  1810. }
  1811. stringbuffer.write(c);
  1812. }
  1813. done:
  1814. p--;
  1815. stringbuffer.write(cast(byte)0);
  1816. // #if _WIN32 && __DMC__
  1817. char *save = __locale_decpoint;
  1818. __locale_decpoint = ".";
  1819. // #endif
  1820. t.float80value = strtold(cast(char *)stringbuffer.data.ptr, null);
  1821. errno = 0;
  1822. switch (*p) {
  1823. case 'F':
  1824. case 'f':
  1825. strtof(cast(char *)stringbuffer.data.ptr, null);
  1826. result = TOK.TOKfloat32v;
  1827. p++;
  1828. break;
  1829. default:
  1830. strtod(cast(char *)stringbuffer.data.ptr, null);
  1831. result = TOK.TOKfloat64v;
  1832. break;
  1833. case 'L':
  1834. case 'l':
  1835. result = TOK.TOKfloat80v;
  1836. p++;
  1837. break;
  1838. }
  1839. if (*p == 'i' || *p == 'I') {
  1840. p++;
  1841. switch (result) {
  1842. case TOK.TOKfloat32v:
  1843. result = TOK.TOKimaginary32v;
  1844. break;
  1845. case TOK.TOKfloat64v:
  1846. result = TOK.TOKimaginary64v;
  1847. break;
  1848. case TOK.TOKfloat80v:
  1849. result = TOK.TOKimaginary80v;
  1850. break;
  1851. }
  1852. }
  1853. // #if _WIN32 && __DMC__
  1854. __locale_decpoint = save;
  1855. // #endif
  1856. if (errno == ERANGE) {
  1857. error("number is not representable");
  1858. }
  1859. return(result);
  1860. }
  1861. /*********************************************
  1862. * Do pragma.
  1863. * Currently, the only pragma supported is:
  1864. * #line linnum [filespec]
  1865. */
  1866. void Pragma() {
  1867. Token tok;
  1868. int linnum;
  1869. char[] filespec;
  1870. Loc loc = this.loc;
  1871. scan(&tok);
  1872. if (tok.value != TOK.TOKidentifier || tok.ident != Id.line) {
  1873. goto Lerr;
  1874. }
  1875. scan(&tok);
  1876. if (tok.value == TOK.TOKint32v || tok.value == TOK.TOKint64v) {
  1877. linnum = tok.uns64value - 1;
  1878. }
  1879. else {
  1880. goto Lerr;
  1881. }
  1882. while (1) {
  1883. switch (*p) {
  1884. case 0:
  1885. case 0x1a:
  1886. case '\n':
  1887. Lnewline:
  1888. this.loc.linnum = linnum;
  1889. if (filespec.length) {
  1890. this.loc.filename = filespec;
  1891. }
  1892. return;
  1893. case '\r':
  1894. p++;
  1895. if (*p != '\n') {
  1896. p--;
  1897. goto Lnewline;
  1898. }
  1899. continue;
  1900. case ' ':
  1901. case '\t':
  1902. case '\v':
  1903. case '\f':
  1904. p++;
  1905. continue; // skip white space
  1906. case '_':
  1907. if (mod && memcmp(p, cast(char*)"__FILE__", 8) == 0) {
  1908. p += 8;
  1909. //! filespec = mem.strdup(loc.filename ? loc.filename : mod.ident.toChars());
  1910. }
  1911. continue;
  1912. case '"':
  1913. if (filespec) {
  1914. goto Lerr;
  1915. }
  1916. stringbuffer.offset = 0;
  1917. p++;
  1918. while (1) {
  1919. uint c;
  1920. c = *p;
  1921. switch (c) {
  1922. case '\n':
  1923. case '\r':
  1924. case 0:
  1925. case 0x1a:
  1926. goto Lerr;
  1927. case '"':
  1928. stringbuffer.write(cast(byte)0);
  1929. // filespec = mem.strdup((char *)stringbuffer.data);
  1930. filespec = stringbuffer.toString.dup;
  1931. p++;
  1932. break;
  1933. default:
  1934. if (c & 0x80) {
  1935. uint u = decodeUTF();
  1936. if (u == PS || u == LS) {
  1937. goto Lerr;
  1938. }
  1939. }
  1940. stringbuffer.write(c);
  1941. p++;
  1942. continue;
  1943. }
  1944. break;
  1945. }
  1946. continue;
  1947. default:
  1948. if (*p & 0x80) {
  1949. uint u = decodeUTF();
  1950. if (u == PS || u == LS) {
  1951. goto Lnewline;
  1952. }
  1953. }
  1954. goto Lerr;
  1955. }
  1956. }
  1957. Lerr:
  1958. errorLoc(loc, "#line integer [\"filespec\"]\\n expected");
  1959. }
  1960. /***************************************************
  1961. * Parse doc comment embedded between t.ptr and p.
  1962. * Remove trailing blanks and tabs from lines.
  1963. * Replace all newlines with \n.
  1964. * Remove leading comment character from each line.
  1965. * Decide if it's a lineComment or a blockComment.
  1966. * Append to previous one for this token.
  1967. */
  1968. void getDocComment(Token *t, uint lineComment) {
  1969. auto OutBuffer buf = new OutBuffer;
  1970. ubyte ct = t.ptr[2];
  1971. ubyte *q = t.ptr + 3; // start of comment text
  1972. int linestart = 0;
  1973. ubyte *qend = p;
  1974. if (ct == '*' || ct == '+') {
  1975. qend -= 2;
  1976. }
  1977. // Scan over initial row of ****'s or ++++'s or ////'s
  1978. for (; q < qend; q++) {
  1979. if (*q != ct) {
  1980. break;
  1981. }
  1982. }
  1983. // Remove trailing row of ****'s or ++++'s
  1984. if (ct != '/') {
  1985. for (; q < qend; qend--) {
  1986. if (qend[-1] != ct) {
  1987. break;
  1988. }
  1989. }
  1990. }
  1991. for (; q < qend; q++) {
  1992. ubyte c = *q;
  1993. switch (c) {
  1994. case '*':
  1995. case '+':
  1996. if (linestart && c == ct) {
  1997. linestart = 0;
  1998. // Trim preceding whitespace up to preceding \n
  1999. while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t')) {
  2000. buf.offset--;
  2001. }
  2002. continue;
  2003. }
  2004. break;
  2005. case ' ':
  2006. case '\t':
  2007. break;
  2008. case '\r':
  2009. if (q[1] == '\n') {
  2010. continue; // skip the \r
  2011. }
  2012. goto Lnewline;
  2013. default:
  2014. if (c == 226) {
  2015. // If LS or PS
  2016. if (q[1] == 128 &&
  2017. (q[2] == 168 || q[2] == 169)) {
  2018. q += 2;
  2019. goto Lnewline;
  2020. }
  2021. }
  2022. linestart = 0;
  2023. break;
  2024. Lnewline:
  2025. c = '\n'; // replace all newlines with \n
  2026. case '\n':
  2027. linestart = 1;
  2028. // Trim trailing whitespace
  2029. while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t')) {
  2030. buf.offset--;
  2031. }
  2032. break;
  2033. }
  2034. buf.write(c);
  2035. }
  2036. // Always end with a newline
  2037. if (!buf.offset || buf.data[buf.offset - 1] != '\n') {
  2038. buf.writenl();
  2039. }
  2040. //buf.write(cast(char)0);
  2041. // It's a line comment if the start of the doc comment comes
  2042. // after other non-whitespace on the same line.
  2043. // ubyte** dc = (lineComment && anyToken)
  2044. // ? &t.lineComment
  2045. // : &t.blockComment;
  2046. char[] dc = (lineComment && anyToken) ? t.lineComment : t.blockComment;
  2047. // Combine with previous doc comment, if any
  2048. if (dc.length) {
  2049. dc = combineComments(dc, buf.toString().dup);
  2050. }
  2051. else {
  2052. dc = buf.toString().dup;
  2053. }
  2054. // writefln( dc );
  2055. if (lineComment && anyToken) {
  2056. t.lineComment = dc;
  2057. }
  2058. else {
  2059. t.blockComment = dc;
  2060. }
  2061. }
  2062. }
  2063. // character maps
  2064. static ubyte[256] cmtable;
  2065. const int CMoctal = 0x1;
  2066. const int CMhex = 0x2;
  2067. const int CMidchar = 0x4;
  2068. ubyte isoctal(ubyte c) {
  2069. return(cmtable[c] & CMoctal);
  2070. }
  2071. ubyte ishex(ubyte c) {
  2072. return(cmtable[c] & CMhex);
  2073. }
  2074. ubyte isidchar(ubyte c) {
  2075. return(cmtable[c] & CMidchar);
  2076. }
  2077. static void cmtable_init() {
  2078. for (uint c = 0; c < cmtable.length; c++) {
  2079. if ('0' <= c && c <= '7') {
  2080. cmtable[c] |= CMoctal;
  2081. }
  2082. if (isdigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')) {
  2083. cmtable[c] |= CMhex;
  2084. }
  2085. if (isalnum(c) || c == '_') {
  2086. cmtable[c] |= CMidchar;
  2087. }
  2088. }
  2089. }
  2090. /+
  2091. * struct StringValue
  2092. * {
  2093. * union
  2094. * {
  2095. * int intvalue;
  2096. * void *ptrvalue;
  2097. * dchar *string;
  2098. * }
  2099. *
  2100. * char[] lstring;
  2101. * }
  2102. * #define CASE_BASIC_TYPES
  2103. * case TOKwchar: case TOKdchar:
  2104. * case TOKbit: case TOKbool: case TOKchar:
  2105. * case TOKint8: case TOKuns8:
  2106. * case TOKint16: case TOKuns16:
  2107. * case TOKint32: case TOKuns32:
  2108. * case TOKint64: case TOKuns64:
  2109. * case TOKfloat32: case TOKfloat64: case TOKfloat80:
  2110. * case TOKimaginary32: case TOKimaginary64: case TOKimaginary80:
  2111. * case TOKcomplex32: case TOKcomplex64: case TOKcomplex80:
  2112. * case TOKvoid:
  2113. *
  2114. * #define CASE_BASIC_TYPES_X(t) \
  2115. * case TOKvoid: t = Type::tvoid; goto LabelX; \
  2116. * case TOKint8: t = Type::tint8; goto LabelX; \
  2117. * case TOKuns8: t = Type::tuns8; goto LabelX; \
  2118. * case TOKint16: t = Type::tint16; goto LabelX; \
  2119. * case TOKuns16: t = Type::tuns16; goto LabelX; \
  2120. * case TOKint32: t = Type::tint32; goto LabelX; \
  2121. * case TOKuns32: t = Type::tuns32; goto LabelX; \
  2122. * case TOKint64: t = Type::tint64; goto LabelX; \
  2123. * case TOKuns64: t = Type::tuns64; goto LabelX; \
  2124. * case TOKfloat32: t = Type::tfloat32; goto LabelX; \
  2125. * case TOKfloat64: t = Type::tfloat64; goto LabelX; \
  2126. * case TOKfloat80: t = Type::tfloat80; goto LabelX; \
  2127. * case TOKimaginary32: t = Type::timaginary32; goto LabelX; \
  2128. * case TOKimaginary64: t = Type::timaginary64; goto LabelX; \
  2129. * case TOKimaginary80: t = Type::timaginary80; goto LabelX; \
  2130. * case TOKcomplex32: t = Type::tcomplex32; goto LabelX; \
  2131. * case TOKcomplex64: t = Type::tcomplex64; goto LabelX; \
  2132. * case TOKcomplex80: t = Type::tcomplex80; goto LabelX; \
  2133. * case TOKbit: t = Type::tbit; goto LabelX; \
  2134. * case TOKchar: t = Type::tchar; goto LabelX; \
  2135. * case TOKwchar: t = Type::twchar; goto LabelX; \
  2136. * case TOKdchar: t = Type::tdchar; goto LabelX; \
  2137. * LabelX
  2138. +/