PageRenderTime 54ms CodeModel.GetById 22ms RepoModel.GetById 1ms app.codeStats 0ms

/tests/output/d/40007-Lexer.d

http://github.com/bengardner/uncrustify
D | 2413 lines | 2111 code | 135 blank | 167 comment | 279 complexity | 8e17c2a2568f7a38960f0145c06ff5c3 MD5 | raw file
Possible License(s): GPL-2.0

Large files files are truncated, but you can click here to view the full file

  1. /+
  2. * Copyright (c) 1999-2006 by Digital Mars
  3. * All Rights Reserved
  4. * written by Walter Bright www.digitalmars.com
  5. * License for redistribution is by either the Artistic License in artistic.txt, or the GNU General Public License in gnu.txt.
  6. * See the included readme.txt for details.
  7. * D Language conversion by: J Duncan
  8. +/
  9. /**
  10. * d language lexer
  11. */
  12. module dparser.Lexer;
  13. import dparser.Root;
  14. import dparser.Tokens;
  15. import dparser.Token;
  16. import dparser.Keyword;
  17. import dparser.Types;
  18. import dparser.Module;
  19. import dparser.Identifier;
  20. import dparser.unialpha;
  21. import dparser.OutBuffer;
  22. //private import std.ctype;
  23. //private import std.string;
  24. //import dwf.core.debugapi;
  25. int errno = 0;
  26. //#if _WIN32 && __DMC__
  27. // from \dm\src\include\setlocal.h
  28. //extern "C" char * __cdecl __locale_decpoint;
  29. char* __locale_decpoint;
  30. //#endif
  31. //const uint LS = 0x2028; // UTF line separator
  32. //const uint PS = 0x2029; // UTF paragraph separator
  33. //extern int isUniAlpha(unsigned u);
  34. //extern int HtmlNamedEntity(unsigned char *p, int length);
  35. /**
  36. * Lexer object
  37. */
  38. class Lexer
  39. {
  40. static Identifier[char[]] stringtable;
  41. static OutBuffer stringbuffer;
  42. static Token * freelist;
  43. Token token; // current token
  44. Module mod; // current module
  45. Loc loc; // for error messages
  46. ubyte *base; // pointer to start of buffer
  47. ubyte *end; // past end of buffer
  48. ubyte *p; // current character
  49. int doDocComment; // collect doc comment information
  50. int anyToken; // !=0 means seen at least one token
  51. int commentToken; // !=0 means comments are TOKcomment's
  52. this(Module mod, ubyte* base, uint begoffset, uint endoffset, int doDocComment, int commentToken)
  53. {
  54. if (stringbuffer is null) {
  55. stringbuffer = new OutBuffer;
  56. }
  57. loc = Loc(mod, 1);
  58. this.base = base;
  59. this.end = base + endoffset;
  60. this.p = base + begoffset;
  61. this.mod = mod;
  62. this.doDocComment = doDocComment;
  63. this.commentToken = commentToken;
  64. /*
  65. * If first line starts with '#!', ignore the line
  66. */
  67. if (p[0] == '#' && p[1] == '!') {
  68. p += 2;
  69. while (true) {
  70. ubyte c = *p;
  71. switch (c) {
  72. case '\n':
  73. p++;
  74. break;
  75. case '\r':
  76. p++;
  77. if (*p == '\n') {
  78. p++;
  79. }
  80. break;
  81. case 0:
  82. case 0x1A:
  83. break;
  84. default:
  85. if (c & 0x80) {
  86. uint u = decodeUTF();
  87. if (u == PS || u == LS) {
  88. break;
  89. }
  90. }
  91. p++;
  92. continue;
  93. }
  94. break;
  95. }
  96. loc.linnum = 2;
  97. }
  98. }
  99. // generate a unique identifier for this string
  100. static Identifier idPool(in char[] str) {
  101. // StringValue sv;
  102. // uint len = s.length;
  103. // StringValue sv = stringtable.update(s, len);
  104. // Identifier* id = cast(Identifier*) sv.ptrvalue;
  105. // if( id is null )
  106. if ((str in stringtable) == null) {
  107. stringtable[str] = new Identifier(str, TOK.TOKidentifier);
  108. }
  109. return(stringtable[str]);
  110. }
  111. static void initKeywords() {
  112. // build character map
  113. cmtable_init();
  114. // create keyword tokens & identifiers
  115. dparser.Keyword.initKeywords();
  116. // create standard lexer tokens
  117. dparser.Token.createLexerTokens();
  118. }
  119. // Combine two document comments into one.
  120. static char[] combineComments(char[] c1, char[] c2) {
  121. char[] c = c2;
  122. if (c1.length) {
  123. c = c1;
  124. if (c2.length) {
  125. c = c1 ~ "\n" ~ c2;
  126. }
  127. }
  128. return(c);
  129. }
  130. // Decode UTF character. Issue error messages for invalid sequences. Return decoded character, advance p to last character in UTF sequence.
  131. //! fix
  132. uint decodeUTF() {
  133. ubyte * s = p;
  134. ubyte c = *s;
  135. assert(c & 0x80);
  136. if (!(c & 0x80)) {
  137. return(c);
  138. }
  139. return(cast(uint)'X');
  140. /*
  141. * dchar u;
  142. * uint len;
  143. *
  144. *
  145. *
  146. * // Check length of remaining string up to 6 UTF-8 characters
  147. * for( len = 1; len < 6 && s[len]; len++ )
  148. * {
  149. *
  150. * }
  151. * /+
  152. * uint idx = 0;
  153. * char* msg = utf_decodeChar( s, len, &idx, &u );
  154. * p += idx - 1;
  155. * if( msg )
  156. * {
  157. * error(msg);
  158. * }
  159. * +/
  160. * return u;
  161. */
  162. }
  163. void error(...) {
  164. if ((mod !is null) && !global.gag) {
  165. writefln(formatLoc(loc, _arguments, _argptr));
  166. /*
  167. * char[] p = loc.toChars();
  168. * if( p.length )
  169. * writef( "%s: ", p );
  170. * writefx( stdout, _arguments, _argptr, 1 );
  171. */
  172. if (global.errors >= global.max_errors) { // moderate blizzard of cascading messages
  173. throw new Exception("too many errors");
  174. }
  175. }
  176. global.errors++;
  177. }
  178. void errorLoc(Loc loc, ...) {
  179. if ((mod !is null) && !global.gag) {
  180. writefln(formatLoc(loc, _arguments, _argptr));
  181. /*
  182. * char[] p = loc.toChars();
  183. * if( p.length )
  184. * writef("%s: ", p);
  185. * writefx(stdout, _arguments, _argptr, 1);
  186. */
  187. if (global.errors >= 20) { // moderate blizzard of cascading messages
  188. throw new Exception("too many errors");
  189. }
  190. }
  191. global.errors++;
  192. }
  193. TOK nextToken() {
  194. if (token.next) {
  195. Token* t = token.next;
  196. memcpy(&token, t, Token.sizeof);
  197. // t.next = freelist;
  198. // freelist = t;
  199. }
  200. else {
  201. scan(&token);
  202. }
  203. // token.print();
  204. return(token.value);
  205. }
  206. Token* peek(inout Token ct) {
  207. Token* t;
  208. if (ct.next) {
  209. t = ct.next;
  210. }
  211. else {
  212. t = new Token;
  213. scan(t);
  214. t.next = null;
  215. ct.next = t;
  216. }
  217. return(t);
  218. }
  219. // Turn next token in buffer into a token.
  220. void scan(Token* t) {
  221. // debug writefln("scan token");
  222. uint lastLine = loc.linnum;
  223. uint linnum;
  224. t.blockComment = null;
  225. t.lineComment = null;
  226. while (true) {
  227. t.ptr = p;
  228. // debug writefln( " p = %d, *p = ", cast(uint)p, cast(char)*p );
  229. switch (*p) {
  230. case 0:
  231. case 0x1a:
  232. t.value = TOK.TOKeof; // end of file
  233. // debug writefln( " EOF" );
  234. return;
  235. case ' ':
  236. case '\t':
  237. case '\v':
  238. case '\f':
  239. p++;
  240. // debug writefln( " whitespace" );
  241. continue; // skip white space
  242. case '\r':
  243. // debug writefln( " cr" );
  244. p++;
  245. if (*p != '\n') { // if CR stands by itself
  246. loc.linnum++;
  247. }
  248. continue; // skip white space
  249. case '\n':
  250. // debug writefln( " nl" );
  251. p++;
  252. loc.linnum++;
  253. continue; // skip white space
  254. case '0':
  255. case '1':
  256. case '2':
  257. case '3':
  258. case '4':
  259. case '5':
  260. case '6':
  261. case '7':
  262. case '8':
  263. case '9':
  264. t.value = number(t);
  265. return;
  266. /*
  267. * #if CSTRINGS
  268. * case '\'':
  269. * t.value = charConstant(t, 0);
  270. * return;
  271. *
  272. * case '"':
  273. * t.value = stringConstant(t,0);
  274. * return;
  275. *
  276. * case 'l':
  277. * case 'L':
  278. * if( p[1] == '\'')
  279. * {
  280. * p++;
  281. * t.value = charConstant(t, 1);
  282. * return;
  283. * }
  284. * else if( p[1] == '"')
  285. * {
  286. * p++;
  287. * t.value = stringConstant(t, 1);
  288. * return;
  289. * }
  290. * #else
  291. */
  292. case '\'':
  293. // debug writefln( " char" );
  294. t.value = charConstant(t, 0);
  295. return;
  296. case 'r':
  297. // debug writefln( " wysiwyg" );
  298. if (p[1] != '"') {
  299. goto case_ident;
  300. }
  301. p++;
  302. case '`':
  303. t.value = wysiwygStringConstant(t, *p);
  304. return;
  305. case 'x':
  306. // debug writefln( " hex string" );
  307. if (p[1] != '"') {
  308. goto case_ident;
  309. }
  310. p++;
  311. t.value = hexStringConstant(t);
  312. return;
  313. case '"':
  314. // debug writefln( " string" );
  315. t.value = escapeStringConstant(t, 0);
  316. // debug writefln( t.ustring );
  317. return;
  318. case '\\': // escaped string literal
  319. // debug writefln( " escaped string literal" );
  320. uint c;
  321. stringbuffer.offset = 0;
  322. do {
  323. p++;
  324. c = escapeSequence();
  325. stringbuffer.write(c);
  326. } while (*p == '\\');
  327. // t.len = stringbuffer.offset;
  328. // stringbuffer.write(cast(byte)0);
  329. t.ustring = stringbuffer.toString;
  330. // memcpy( t.ustring.ptr, stringbuffer.data, stringbuffer.offset );
  331. t.postfix = 0;
  332. t.value = TOK.TOKstring;
  333. return;
  334. case 'l':
  335. case 'L':
  336. // #endif
  337. case 'a':
  338. case 'b':
  339. case 'c':
  340. case 'd':
  341. case 'e':
  342. case 'f':
  343. case 'g':
  344. case 'h':
  345. case 'i':
  346. case 'j':
  347. case 'k':
  348. case 'm':
  349. case 'n':
  350. case 'o':
  351. case 'p':
  352. case 'q': /*case 'r':*/
  353. case 's':
  354. case 't':
  355. case 'u':
  356. case 'v':
  357. case 'w': /*case 'x':*/
  358. case 'y':
  359. case 'z':
  360. case 'A':
  361. case 'B':
  362. case 'C':
  363. case 'D':
  364. case 'E':
  365. case 'F':
  366. case 'G':
  367. case 'H':
  368. case 'I':
  369. case 'J':
  370. case 'K':
  371. case 'M':
  372. case 'N':
  373. case 'O':
  374. case 'P':
  375. case 'Q':
  376. case 'R':
  377. case 'S':
  378. case 'T':
  379. case 'U':
  380. case 'V':
  381. case 'W':
  382. case 'X':
  383. case 'Y':
  384. case 'Z':
  385. case '_':
  386. case_ident:
  387. {
  388. // debug writefln( " identifier" );
  389. ubyte c;
  390. do {
  391. c = *++p;
  392. } while (isidchar(c) || (c & 0x80 && isUniAlpha(decodeUTF())));
  393. // sv = stringtable.update((char *)t.ptr, p - t.ptr);
  394. char[] tmp;
  395. tmp.length = p - t.ptr;
  396. memcpy(tmp.ptr, t.ptr, p - t.ptr);
  397. Identifier id;
  398. Identifier * pid = tmp in stringtable;
  399. if (pid) {
  400. id = *pid;
  401. }
  402. if (id is null) {
  403. id = new Identifier(tmp, TOK.TOKidentifier);
  404. stringtable[tmp] = id;
  405. }
  406. t.ident = id;
  407. t.value = cast(TOK)id.value;
  408. anyToken = 1;
  409. // if special identifier token
  410. if (*t.ptr == '_') {
  411. static char date[11 + 1];
  412. static char time[8 + 1];
  413. static char timestamp[24 + 1];
  414. if (!date[0]) { // lazy evaluation
  415. //!!
  416. /+
  417. * time_t t;
  418. * char *p;
  419. * .time(&t);
  420. * p = ctime(&t);
  421. * assert(p);
  422. * sprintf(date.ptr, "%.6s %.4s", p + 4, p + 20);
  423. * sprintf(time.ptr, "%.8s", p + 11);
  424. * sprintf(timestamp.ptr, "%.24s", p);
  425. +/
  426. }
  427. if (mod && id is Id.FILE) {
  428. t.value = TOK.TOKstring;
  429. if (loc.filename.length) {
  430. t.ustring = loc.filename;
  431. }
  432. else {
  433. t.ustring = mod.ident.toChars();
  434. }
  435. goto Llen;
  436. }
  437. else if (mod && id == Id.LINE) {
  438. t.value = TOK.TOKint64v;
  439. t.uns64value = loc.linnum;
  440. }
  441. else if (id == Id.DATE) {
  442. t.value = TOK.TOKstring;
  443. //! t.ustring = date;
  444. goto Llen;
  445. }
  446. else if (id == Id.TIME) {
  447. t.value = TOK.TOKstring;
  448. //! t.ustring = time;
  449. goto Llen;
  450. }
  451. else if (id == Id.TIMESTAMP) {
  452. t.value = TOK.TOKstring;
  453. //! t.ustring = timestamp;
  454. Llen:
  455. t.postfix = 0;
  456. // t.len = strlen((char *)t.ustring);
  457. }
  458. }
  459. //printf("t.value = %d\n",t.value);
  460. return;
  461. }
  462. // comments
  463. case '/':
  464. p++;
  465. switch (*p) {
  466. case '=':
  467. p++;
  468. t.value = TOK.TOKdivass;
  469. return;
  470. case '*': // '/*'
  471. p++;
  472. linnum = loc.linnum;
  473. while (true) {
  474. while (true) {
  475. ubyte c = *p;
  476. switch (c) {
  477. case '/':
  478. break;
  479. case '\n':
  480. loc.linnum++;
  481. p++;
  482. continue;
  483. case '\r':
  484. p++;
  485. if (*p != '\n') {
  486. loc.linnum++;
  487. }
  488. continue;
  489. case 0:
  490. case 0x1A:
  491. error("unterminated /* */ comment");
  492. p = end;
  493. t.value = TOK.TOKeof;
  494. return;
  495. default:
  496. if (c & 0x80) {
  497. uint u = decodeUTF();
  498. if (u == PS || u == LS) {
  499. loc.linnum++;
  500. }
  501. }
  502. p++;
  503. continue;
  504. }
  505. break;
  506. }
  507. p++;
  508. if (p[-2] == '*' && p - 3 != t.ptr) {
  509. break;
  510. }
  511. }
  512. if (commentToken) {
  513. t.value = TOK.TOKcomment;
  514. return;
  515. }
  516. // if /** but not /**/
  517. else if (doDocComment && t.ptr[2] == '*' && p - 4 != t.ptr) {
  518. getDocComment(t, lastLine == linnum); //! ?
  519. }
  520. continue;
  521. case '/': // do // style comments
  522. linnum = loc.linnum;
  523. while (1) {
  524. ubyte c = *++p;
  525. switch (c) {
  526. case '\n':
  527. break;
  528. case '\r':
  529. if (p[1] == '\n') {
  530. p++;
  531. }
  532. break;
  533. case 0:
  534. case 0x1a:
  535. if (commentToken) {
  536. p = end;
  537. t.value = TOK.TOKcomment;
  538. return;
  539. }
  540. if (doDocComment && t.ptr[2] == '/') {
  541. getDocComment(t, lastLine == linnum);
  542. }
  543. p = end;
  544. t.value = TOK.TOKeof;
  545. return;
  546. default:
  547. if (c & 0x80) {
  548. uint u = decodeUTF();
  549. if (u == PS || u == LS) {
  550. break;
  551. }
  552. }
  553. continue;
  554. }
  555. break;
  556. }
  557. if (commentToken) {
  558. p++;
  559. loc.linnum++;
  560. t.value = TOK.TOKcomment;
  561. return;
  562. }
  563. if (doDocComment && t.ptr[2] == '/') {
  564. getDocComment(t, lastLine == linnum);
  565. }
  566. p++;
  567. loc.linnum++;
  568. continue;
  569. case '+':
  570. {
  571. int nest;
  572. linnum = loc.linnum;
  573. p++;
  574. nest = 1;
  575. while (1) {
  576. ubyte c = *p;
  577. switch (c) {
  578. case '/':
  579. p++;
  580. if (*p == '+') {
  581. p++;
  582. nest++;
  583. }
  584. continue;
  585. case '+':
  586. p++;
  587. if (*p == '/') {
  588. p++;
  589. if (--nest == 0) {
  590. break;
  591. }
  592. }
  593. continue;
  594. case '\r':
  595. p++;
  596. if (*p != '\n') {
  597. loc.linnum++;
  598. }
  599. continue;
  600. case '\n':
  601. loc.linnum++;
  602. p++;
  603. continue;
  604. case 0:
  605. case 0x1A:
  606. error("unterminated /+ +/ comment");
  607. p = end;
  608. t.value = TOK.TOKeof;
  609. return;
  610. default:
  611. if (c & 0x80) {
  612. uint u = decodeUTF();
  613. if (u == PS || u == LS) {
  614. loc.linnum++;
  615. }
  616. }
  617. p++;
  618. continue;
  619. }
  620. break;
  621. }
  622. if (commentToken) {
  623. t.value = TOK.TOKcomment;
  624. return;
  625. }
  626. if (doDocComment && t.ptr[2] == '+' && p - 4 != t.ptr) {
  627. // if /++ but not /++/
  628. getDocComment(t, lastLine == linnum);
  629. }
  630. continue;
  631. }
  632. default:
  633. break;
  634. }
  635. t.value = TOK.TOKdiv;
  636. return;
  637. case '.':
  638. p++;
  639. if (isdigit(*p)) {
  640. p--;
  641. t.value = inreal(t);
  642. }
  643. else if (p[0] == '.') {
  644. if (p[1] == '.') {
  645. p += 2;
  646. t.value = TOK.TOKdotdotdot;
  647. }
  648. else {
  649. p++;
  650. t.value = TOK.TOKslice;
  651. }
  652. }
  653. else {
  654. t.value = TOK.TOKdot;
  655. }
  656. return;
  657. case '&':
  658. p++;
  659. if (*p == '=') {
  660. p++;
  661. t.value = TOK.TOKandass;
  662. }
  663. else if (*p == '&') {
  664. p++;
  665. t.value = TOK.TOKandand;
  666. }
  667. else {
  668. t.value = TOK.TOKand;
  669. }
  670. return;
  671. // |, ||, |=
  672. case '|':
  673. p++;
  674. if (*p == '=') {
  675. p++;
  676. t.value = TOK.TOKorass;
  677. }
  678. else if (*p == '|') {
  679. p++;
  680. t.value = TOK.TOKoror;
  681. }
  682. else {
  683. t.value = TOK.TOKor;
  684. }
  685. return;
  686. case '-':
  687. p++;
  688. if (*p == '=') {
  689. p++;
  690. t.value = TOK.TOKminass;
  691. }
  692. else if (*p == '-') {
  693. p++;
  694. t.value = TOK.TOKminusminus;
  695. }
  696. else {
  697. t.value = TOK.TOKmin;
  698. }
  699. return;
  700. // +, +=, ++
  701. case '+':
  702. p++;
  703. if (*p == '=') {
  704. p++;
  705. t.value = TOK.TOKaddass; // +=
  706. }
  707. else if (*p == '+') {
  708. p++;
  709. t.value = TOK.TOKplusplus; // ++
  710. }
  711. else {
  712. t.value = TOK.TOKadd; // +
  713. }
  714. return;
  715. // <, <=, <<=, <<, <>=, <>
  716. case '<':
  717. p++;
  718. if (*p == '=') {
  719. p++;
  720. t.value = TOK.TOKle; // <=
  721. }
  722. else if (*p == '<') {
  723. p++;
  724. if (*p == '=') {
  725. p++;
  726. t.value = TOK.TOKshlass; // <<=
  727. }
  728. else {
  729. t.value = TOK.TOKshl; // <<
  730. }
  731. }
  732. else if (*p == '>') {
  733. p++;
  734. if (*p == '=') {
  735. p++;
  736. t.value = TOK.TOKleg; // <>=
  737. }
  738. else {
  739. t.value = TOK.TOKlg; // <>
  740. }
  741. }
  742. else {
  743. t.value = TOK.TOKlt; // <
  744. }
  745. return;
  746. // >, >>, >>>, >=, >>=, >>>=
  747. case '>':
  748. p++;
  749. if (*p == '=') {
  750. p++;
  751. t.value = TOK.TOKge; // >=
  752. }
  753. else if (*p == '>') {
  754. p++;
  755. if (*p == '=') {
  756. p++;
  757. t.value = TOK.TOKshrass; // >>=
  758. }
  759. else if (*p == '>') {
  760. p++;
  761. if (*p == '=') {
  762. p++;
  763. t.value = TOK.TOKushrass; // >>>=
  764. }
  765. else {
  766. t.value = TOK.TOKushr; // >>>
  767. }
  768. }
  769. else {
  770. t.value = TOK.TOKshr; // >>
  771. }
  772. }
  773. else {
  774. t.value = TOK.TOKgt; // >
  775. }
  776. return;
  777. case '!':
  778. p++;
  779. if (*p == '=') {
  780. p++;
  781. if (*p == '=') {
  782. p++;
  783. t.value = TOK.TOKnotidentity; // !==
  784. }
  785. else {
  786. t.value = TOK.TOKnotequal; // !=
  787. }
  788. }
  789. else if (*p == '<') {
  790. p++;
  791. if (*p == '>') {
  792. p++;
  793. if (*p == '=') {
  794. p++;
  795. t.value = TOK.TOKunord; // !<>=
  796. }
  797. else {
  798. t.value = TOK.TOKue; // !<>
  799. }
  800. }
  801. else if (*p == '=') {
  802. p++;
  803. t.value = TOK.TOKug; // !<=
  804. }
  805. else {
  806. t.value = TOK.TOKuge; // !<
  807. }
  808. }
  809. else if (*p == '>') {
  810. p++;
  811. if (*p == '=') {
  812. p++;
  813. t.value = TOK.TOKul; // !>=
  814. }
  815. else {
  816. t.value = TOK.TOKule; // !>
  817. }
  818. }
  819. else {
  820. t.value = TOK.TOKnot; // !
  821. }
  822. return;
  823. case '=':
  824. p++;
  825. if (*p == '=') {
  826. p++;
  827. if (*p == '=') {
  828. p++;
  829. t.value = TOK.TOKidentity; // ===
  830. }
  831. else {
  832. t.value = TOK.TOKequal; // ==
  833. }
  834. }
  835. else {
  836. t.value = TOK.TOKassign; // =
  837. }
  838. return;
  839. case '~':
  840. p++;
  841. if (*p == '=') {
  842. p++;
  843. t.value = TOK.TOKcatass; // ~=
  844. }
  845. else {
  846. t.value = TOK.TOKtilde; // ~
  847. }
  848. return;
  849. // SINGLE
  850. case '(': p++; t.value = TOK.TOKlparen; return;
  851. case ')': p++; t.value = TOK.TOKrparen; return;
  852. case '[': p++; t.value = TOK.TOKlbracket; return;
  853. case ']': p++; t.value = TOK.TOKrbracket; return;
  854. case '{': p++; t.value = TOK.TOKlcurly; return;
  855. case '}': p++; t.value = TOK.TOKrcurly; return;
  856. case '?': p++; t.value = TOK.TOKquestion; return;
  857. case ',': p++; t.value = TOK.TOKcomma; return;
  858. case ';': p++; t.value = TOK.TOKsemicolon; return;
  859. case ':': p++; t.value = TOK.TOKcolon; return;
  860. case '$': p++; t.value = TOK.TOKdollar; return;
  861. // DOUBLE
  862. case '*': p++; if (*p == '=') {
  863. p++; t.value = TOK.TOKmulass;
  864. }
  865. else {
  866. t.value = TOK.TOKmul;
  867. } return;
  868. case '%': p++; if (*p == '=') {
  869. p++; t.value = TOK.TOKmodass;
  870. }
  871. else {
  872. t.value = TOK.TOKmod;
  873. } return;
  874. case '^': p++; if (*p == '=') {
  875. p++; t.value = TOK.TOKxorass;
  876. }
  877. else {
  878. t.value = TOK.TOKxor;
  879. } return;
  880. // removed 148 case '~': p++; if( *p == '=' ) { p++; t.value = TOK.TOKcatass; } else t.value = TOK.TOKtilde; return;
  881. case '#':
  882. p++;
  883. Pragma();
  884. continue;
  885. default:
  886. {
  887. debug writefln(" default char");
  888. ubyte c = *p;
  889. if (c & 0x80) {
  890. uint u = decodeUTF();
  891. // Check for start of unicode identifier
  892. if (isUniAlpha(u)) {
  893. goto case_ident;
  894. }
  895. if (u == PS || u == LS) {
  896. loc.linnum++;
  897. p++;
  898. continue;
  899. }
  900. }
  901. if (isprint(c)) {
  902. error("unsupported char '%s'", cast(char)c);
  903. }
  904. else {
  905. error("unsupported char 0x%02x", cast(ubyte)c);
  906. }
  907. p++;
  908. continue;
  909. }
  910. }
  911. }
  912. }
  913. // Parse escape sequence.
  914. uint escapeSequence() {
  915. uint c;
  916. int n;
  917. int ndigits;
  918. c = *p;
  919. switch (c) {
  920. case '\'':
  921. case '"':
  922. case '?':
  923. case '\\':
  924. Lconsume:
  925. p++;
  926. break;
  927. case 'a': c = 7; goto Lconsume;
  928. case 'b': c = 8; goto Lconsume;
  929. case 'f': c = 12; goto Lconsume;
  930. case 'n': c = 10; goto Lconsume;
  931. case 'r': c = 13; goto Lconsume;
  932. case 't': c = 9; goto Lconsume;
  933. case 'v': c = 11; goto Lconsume;
  934. case 'u':
  935. ndigits = 4;
  936. goto Lhex;
  937. case 'U':
  938. ndigits = 8;
  939. goto Lhex;
  940. case 'x':
  941. ndigits = 2;
  942. Lhex:
  943. p++;
  944. c = *p;
  945. if (ishex(c)) {
  946. uint v;
  947. n = 0;
  948. v = 0;
  949. while (1) {
  950. if (isdigit(c)) {
  951. c -= '0';
  952. }
  953. else if (islower(c)) {
  954. c -= 'a' - 10;
  955. }
  956. else {
  957. c -= 'A' - 10;
  958. }
  959. v = v * 16 + c;
  960. c = *++p;
  961. if (++n == ndigits) {
  962. break;
  963. }
  964. if (!ishex(c)) {
  965. error("escape hex sequence has %d hex digits instead of %d", n, ndigits);
  966. break;
  967. }
  968. }
  969. //! if( ndigits != 2 && !utf_isValidDchar(v))
  970. //! error("invalid UTF character \\U%08x", v);
  971. c = v;
  972. }
  973. else {
  974. error("undefined escape hex sequence \\%s\n", c);
  975. }
  976. break;
  977. case '&': // named character entity
  978. for (ubyte *idstart = ++p; 1; p++) {
  979. switch (*p) {
  980. case ';':
  981. //!!!
  982. /+
  983. * c = HtmlNamedEntity(idstart, p - idstart);
  984. * if( c == ~0 )
  985. * {
  986. * error("unnamed character entity &%.*s;", p - idstart, idstart);
  987. * c = ' ';
  988. * }
  989. *
  990. * p++;
  991. +/
  992. break;
  993. default:
  994. if (isalpha(*p) || (p != idstart + 1 && isdigit(*p))) {
  995. continue;
  996. }
  997. error("unterminated named entity");
  998. break;
  999. }
  1000. break;
  1001. }
  1002. break;
  1003. case 0:
  1004. case 0x1a: // end of file
  1005. c = '\\';
  1006. break;
  1007. default:
  1008. if (isoctal(c)) {
  1009. ubyte v;
  1010. n = 0;
  1011. do {
  1012. v = v * 8 + (c - '0');
  1013. c = *++p;
  1014. } while (++n < 3 && isoctal(c));
  1015. c = v;
  1016. }
  1017. else {
  1018. error("undefined escape sequence \\%s\n", c);
  1019. }
  1020. break;
  1021. }
  1022. return(c);
  1023. }
  1024. /**************************************
  1025. */
  1026. TOK wysiwygStringConstant(Token *t, int tc) {
  1027. uint c;
  1028. Loc start = loc;
  1029. p++;
  1030. stringbuffer.offset = 0;
  1031. while (1) {
  1032. c = *p++;
  1033. switch (c) {
  1034. case '\n':
  1035. loc.linnum++;
  1036. break;
  1037. case '\r':
  1038. if (*p == '\n') {
  1039. continue; // ignore
  1040. }
  1041. c = '\n'; // treat EndOfLine as \n character
  1042. loc.linnum++;
  1043. break;
  1044. case 0:
  1045. case 0x1a:
  1046. error("unterminated string constant starting at %s", start.toChars());
  1047. t.ustring = "";
  1048. t.postfix = 0;
  1049. return(TOK.TOKstring);
  1050. case '"':
  1051. case '`':
  1052. if (c == tc) {
  1053. // t.len = stringbuffer.offset;
  1054. stringbuffer.write(cast(byte)0);
  1055. t.ustring = stringbuffer.toString;
  1056. // t.ustring = (ubyte *)mem.malloc(stringbuffer.offset);
  1057. // memcpy(t.ustring, stringbuffer.data, stringbuffer.offset);
  1058. stringPostfix(t);
  1059. return(TOK.TOKstring);
  1060. }
  1061. break;
  1062. default:
  1063. if (c & 0x80) {
  1064. p--;
  1065. uint u = decodeUTF();
  1066. p++;
  1067. if (u == PS || u == LS) {
  1068. loc.linnum++;
  1069. }
  1070. stringbuffer.write(u);
  1071. continue;
  1072. }
  1073. break;
  1074. }
  1075. stringbuffer.write(c);
  1076. }
  1077. }
  1078. /**************************************
  1079. * Lex hex strings:
  1080. * x"0A ae 34FE BD"
  1081. */
  1082. TOK hexStringConstant(Token *t) {
  1083. uint c;
  1084. Loc start = loc;
  1085. uint n = 0;
  1086. uint v;
  1087. p++;
  1088. stringbuffer.offset = 0;
  1089. while (1) {
  1090. c = *p++;
  1091. switch (c) {
  1092. case ' ':
  1093. case '\t':
  1094. case '\v':
  1095. case '\f':
  1096. continue; // skip white space
  1097. case '\r':
  1098. if (*p == '\n') {
  1099. continue; // ignore
  1100. }
  1101. // Treat isolated '\r' as if it were a '\n'
  1102. case '\n':
  1103. loc.linnum++;
  1104. continue;
  1105. case 0:
  1106. case 0x1a:
  1107. error("unterminated string constant starting at %s", start.toChars());
  1108. t.ustring = "";
  1109. t.postfix = 0;
  1110. return(TOK.TOKstring);
  1111. case '"':
  1112. if (n & 1) {
  1113. error("odd number (%d) of hex characters in hex string", n);
  1114. stringbuffer.write(v);
  1115. }
  1116. // t.len = stringbuffer.offset;
  1117. // stringbuffer.write(cast(byte)0);
  1118. t.ustring = stringbuffer.toString;
  1119. // t.ustring = (ubyte *)mem.malloc(stringbuffer.offset);
  1120. // memcpy(t.ustring, stringbuffer.data, stringbuffer.offset);
  1121. stringPostfix(t);
  1122. return(TOK.TOKstring);
  1123. default:
  1124. if (c >= '0' && c <= '9') {
  1125. c -= '0';
  1126. }
  1127. else if (c >= 'a' && c <= 'f') {
  1128. c -= 'a' - 10;
  1129. }
  1130. else if (c >= 'A' && c <= 'F') {
  1131. c -= 'A' - 10;
  1132. }
  1133. else if (c & 0x80) {
  1134. p--;
  1135. uint u = decodeUTF();
  1136. p++;
  1137. if (u == PS || u == LS) {
  1138. loc.linnum++;
  1139. }
  1140. else {
  1141. error("non-hex character \\u%x", u);
  1142. }
  1143. }
  1144. else {
  1145. error("non-hex character '%s'", c);
  1146. }
  1147. if (n & 1) {
  1148. v = (v << 4) | c;
  1149. stringbuffer.write(v);
  1150. }
  1151. else {
  1152. v = c;
  1153. }
  1154. n++;
  1155. break;
  1156. }
  1157. }
  1158. }
  1159. /**************************************
  1160. */
  1161. TOK escapeStringConstant(Token *t, int wide) {
  1162. uint c;
  1163. Loc start = loc;
  1164. p++;
  1165. stringbuffer.offset = 0;
  1166. // debug writefln( "escape string constant: %s", std.string.toString( cast(char*)p ) );
  1167. while (1) {
  1168. c = *p++;
  1169. switch (c) {
  1170. case '\\':
  1171. switch (*p) {
  1172. case 'u':
  1173. case 'U':
  1174. case '&':
  1175. c = escapeSequence();
  1176. stringbuffer.write(c);
  1177. continue;
  1178. default:
  1179. c = escapeSequence();
  1180. break;
  1181. }
  1182. break;
  1183. case '\n':
  1184. loc.linnum++;
  1185. break;
  1186. case '\r':
  1187. if (*p == '\n') {
  1188. continue; // ignore
  1189. }
  1190. c = '\n'; // treat EndOfLine as \n character
  1191. loc.linnum++;
  1192. break;
  1193. case '"':
  1194. // writefln( "end of string: ", stringbuffer.toString );
  1195. t.ustring = stringbuffer.toString().dup;
  1196. // t.len = stringbuffer.offset;
  1197. // stringbuffer.write(cast(byte)0);
  1198. // t.ustring = (ubyte *)mem.malloc(stringbuffer.offset);
  1199. // memcpy(t.ustring, stringbuffer.data, stringbuffer.offset);
  1200. stringPostfix(t);
  1201. return(TOK.TOKstring);
  1202. case 0:
  1203. case 0x1a:
  1204. p--;
  1205. error("unterminated string constant starting at %s", start.toChars());
  1206. t.ustring = "";
  1207. // t.len = 0;
  1208. t.postfix = 0;
  1209. return(TOK.TOKstring);
  1210. default:
  1211. if (c & 0x80) {
  1212. p--;
  1213. c = decodeUTF();
  1214. if (c == LS || c == PS) {
  1215. c = '\n';
  1216. loc.linnum++;
  1217. }
  1218. p++;
  1219. stringbuffer.write(cast(char)c);
  1220. continue;
  1221. }
  1222. break;
  1223. }
  1224. stringbuffer.write(cast(char)c);
  1225. // writefln( stringbuffer.toString );
  1226. }
  1227. }
  1228. //**************************************
  1229. TOK charConstant(Token *t, int wide) {
  1230. uint c;
  1231. TOK tk = TOK.TOKcharv;
  1232. //printf("Lexer.charConstant\n");
  1233. p++;
  1234. c = *p++;
  1235. switch (c) {
  1236. case '\\':
  1237. switch (*p) {
  1238. case 'u':
  1239. t.uns64value = escapeSequence();
  1240. tk = TOK.TOKwcharv;
  1241. break;
  1242. case 'U':
  1243. case '&':
  1244. t.uns64value = escapeSequence();
  1245. tk = TOK.TOKdcharv;
  1246. break;
  1247. default:
  1248. t.uns64value = escapeSequence();
  1249. break;
  1250. }
  1251. break;
  1252. case '\n':
  1253. L1:
  1254. loc.linnum++;
  1255. case '\r':
  1256. case 0:
  1257. case 0x1a:
  1258. case '\'':
  1259. error("unterminated character constant");
  1260. return(tk);
  1261. default:
  1262. if (c & 0x80) {
  1263. p--;
  1264. c = decodeUTF();
  1265. p++;
  1266. if (c == LS || c == PS) {
  1267. goto L1;
  1268. }
  1269. if (c < 0xd800 || (c >= 0xe000 && c < 0xfffe)) {
  1270. tk = TOK.TOKwcharv;
  1271. }
  1272. else {
  1273. tk = TOK.TOKdcharv;
  1274. }
  1275. }
  1276. t.uns64value = c;
  1277. break;
  1278. }
  1279. if (*p != '\'') {
  1280. error("unterminated character constant");
  1281. return(tk);
  1282. }
  1283. p++;
  1284. return(tk);
  1285. }
  1286. // Get postfix of string literal.
  1287. void stringPostfix(Token *t) {
  1288. switch (*p) {
  1289. case 'c':
  1290. case 'w':
  1291. case 'd':
  1292. t.postfix = *p;
  1293. p++;
  1294. break;
  1295. default:
  1296. t.postfix = 0;
  1297. break;
  1298. }
  1299. }
  1300. /***************************************
  1301. * Read \u or \U unicode sequence
  1302. * Input:
  1303. * u 'u' or 'U'
  1304. */
  1305. /*
  1306. * uint Wchar(uint u)
  1307. * {
  1308. * uint value;
  1309. * uint n;
  1310. * ubyte c;
  1311. * uint nchars;
  1312. *
  1313. * nchars = (u == 'U') ? 8 : 4;
  1314. * value = 0;
  1315. * for (n = 0; 1; n++)
  1316. * {
  1317. * ++p;
  1318. * if( n == nchars)
  1319. * break;
  1320. * c = *p;
  1321. * if( !ishex(c))
  1322. * {
  1323. * error("\\%s sequence must be followed by %d hex characters", u, nchars);
  1324. * break;
  1325. * }
  1326. * if( isdigit(c))
  1327. * c -= '0';
  1328. * else if( islower(c))
  1329. * c -= 'a' - 10;
  1330. * else
  1331. * c -= 'A' - 10;
  1332. * value <<= 4;
  1333. * value |= c;
  1334. * }
  1335. * return value;
  1336. * }
  1337. */
  1338. /**************************************
  1339. * Read in a number.
  1340. * If it's an integer, store it in tok.TKutok.Vlong.
  1341. * integers can be decimal, octal or hex
  1342. * Handle the suffixes U, UL, LU, L, etc.
  1343. * If it's double, store it in tok.TKutok.Vdouble.
  1344. * Returns:
  1345. * TKnum
  1346. * TKdouble,...
  1347. */
  1348. TOK number(Token *t) {
  1349. //debug writefln("Lexer.number()");
  1350. // We use a state machine to collect numbers
  1351. enum STATE {
  1352. STATE_initial,
  1353. STATE_0,
  1354. STATE_decimal,
  1355. STATE_octal,
  1356. STATE_octale,
  1357. STATE_hex,
  1358. STATE_binary,
  1359. STATE_hex0,
  1360. STATE_binary0,
  1361. STATE_hexh,
  1362. STATE_error
  1363. }
  1364. enum FLAGS {
  1365. FLAGS_decimal = 1, // decimal
  1366. FLAGS_unsigned = 2, // u or U suffix
  1367. FLAGS_long = 4, // l or L suffix
  1368. }
  1369. FLAGS flags = FLAGS.FLAGS_decimal;
  1370. int i;
  1371. TOK result;
  1372. int base;
  1373. stringbuffer.offset = 0;
  1374. // stringbuffer.data = null;
  1375. STATE state = STATE.STATE_initial;
  1376. ubyte * start = p;
  1377. TOK _isreal() {
  1378. p = start;
  1379. return(inreal(t));
  1380. }
  1381. while (true) {
  1382. char c = cast(char)*p;
  1383. switch (state) {
  1384. case STATE.STATE_initial: // opening state
  1385. if (c == '0') {
  1386. state = STATE.STATE_0;
  1387. }
  1388. else {
  1389. state = STATE.STATE_decimal;
  1390. }
  1391. break;
  1392. case STATE.STATE_0:
  1393. flags = cast(FLAGS)(flags & ~FLAGS.FLAGS_decimal);
  1394. switch (c) {
  1395. // #if ZEROH
  1396. // case 'H': // 0h
  1397. // case 'h':
  1398. // goto hexh;
  1399. // #endif
  1400. case 'X':
  1401. case 'x':
  1402. state = STATE.STATE_hex0;
  1403. break;
  1404. case '.':
  1405. if (p[1] == '.') { // .. is a separate token
  1406. goto done;
  1407. }
  1408. case 'i':
  1409. case 'f':
  1410. case 'F':
  1411. goto _Real;
  1412. // #if ZEROH
  1413. // case 'E':
  1414. // case 'e':
  1415. // goto case_hex;
  1416. // #endif
  1417. case 'B':
  1418. case 'b':
  1419. state = STATE.STATE_binary0;
  1420. break;
  1421. case '0':

Large files files are truncated, but you can click here to view the full file