PageRenderTime 68ms CodeModel.GetById 29ms RepoModel.GetById 0ms app.codeStats 1ms

/tests/input/d/Lexer.d

http://github.com/bengardner/uncrustify
D | 2335 lines | 2051 code | 117 blank | 167 comment | 238 complexity | 5d46d62f71385db3b041a44504c71bf1 MD5 | raw file
Possible License(s): GPL-2.0
  1. /+
  2. Copyright (c) 1999-2006 by Digital Mars
  3. All Rights Reserved
  4. written by Walter Bright www.digitalmars.com
  5. License for redistribution is by either the Artistic License in artistic.txt, or the GNU General Public License in gnu.txt.
  6. See the included readme.txt for details.
  7. D Language conversion by: J Duncan
  8. +/
  9. /**
  10. d language lexer
  11. */
  12. module dparser.Lexer;
  13. import dparser.Root;
  14. import dparser.Tokens;
  15. import dparser.Token;
  16. import dparser.Keyword;
  17. import dparser.Types;
  18. import dparser.Module;
  19. import dparser.Identifier;
  20. import dparser.unialpha;
  21. import dparser.OutBuffer;
  22. //private import std.ctype;
  23. //private import std.string;
  24. //import dwf.core.debugapi;
  25. int errno = 0;
  26. //#if _WIN32 && __DMC__
  27. // from \dm\src\include\setlocal.h
  28. //extern "C" char * __cdecl __locale_decpoint;
  29. char* __locale_decpoint;
  30. //#endif
  31. //const uint LS = 0x2028; // UTF line separator
  32. //const uint PS = 0x2029; // UTF paragraph separator
  33. //extern int isUniAlpha(unsigned u);
  34. //extern int HtmlNamedEntity(unsigned char *p, int length);
  35. /**
  36. Lexer object
  37. */
  38. class Lexer
  39. {
  40. static Identifier[char[]] stringtable;
  41. static OutBuffer stringbuffer;
  42. static Token* freelist;
  43. Token token; // current token
  44. Module mod; // current module
  45. Loc loc; // for error messages
  46. ubyte *base; // pointer to start of buffer
  47. ubyte *end; // past end of buffer
  48. ubyte *p; // current character
  49. int doDocComment; // collect doc comment information
  50. int anyToken; // !=0 means seen at least one token
  51. int commentToken; // !=0 means comments are TOKcomment's
  52. this( Module mod, ubyte* base, uint begoffset, uint endoffset, int doDocComment, int commentToken )
  53. {
  54. if( stringbuffer is null )
  55. stringbuffer = new OutBuffer;
  56. loc = Loc(mod, 1);
  57. this.base = base;
  58. this.end = base + endoffset;
  59. this.p = base + begoffset;
  60. this.mod = mod;
  61. this.doDocComment = doDocComment;
  62. this.commentToken = commentToken;
  63. /*
  64. If first line starts with '#!', ignore the line
  65. */
  66. if( p[0] == '#' && p[1] =='!' )
  67. {
  68. p += 2;
  69. while( true )
  70. {
  71. ubyte c = *p;
  72. switch( c )
  73. {
  74. case '\n':
  75. p++;
  76. break;
  77. case '\r':
  78. p++;
  79. if( *p == '\n' )
  80. p++;
  81. break;
  82. case 0:
  83. case 0x1A:
  84. break;
  85. default:
  86. if( c & 0x80 )
  87. {
  88. uint u = decodeUTF();
  89. if( u == PS || u == LS )
  90. break;
  91. }
  92. p++;
  93. continue;
  94. }
  95. break;
  96. }
  97. loc.linnum = 2;
  98. }
  99. }
  100. // generate a unique identifier for this string
  101. static Identifier idPool( in char[] str )
  102. {
  103. // StringValue sv;
  104. // uint len = s.length;
  105. // StringValue sv = stringtable.update(s, len);
  106. // Identifier* id = cast(Identifier*) sv.ptrvalue;
  107. // if( id is null )
  108. if( (str in stringtable) == null )
  109. {
  110. stringtable[str] = new Identifier( str, TOK.TOKidentifier );
  111. }
  112. return stringtable[str];
  113. }
  114. static void initKeywords()
  115. {
  116. // build character map
  117. cmtable_init();
  118. // create keyword tokens & identifiers
  119. dparser.Keyword.initKeywords();
  120. // create standard lexer tokens
  121. dparser.Token.createLexerTokens();
  122. }
  123. // Combine two document comments into one.
  124. static char[] combineComments( char[] c1, char[] c2 )
  125. {
  126. char[] c = c2;
  127. if( c1.length )
  128. {
  129. c = c1;
  130. if( c2.length )
  131. {
  132. c = c1 ~ "\n" ~ c2;
  133. }
  134. }
  135. return c;
  136. }
  137. // Decode UTF character. Issue error messages for invalid sequences. Return decoded character, advance p to last character in UTF sequence.
  138. //! fix
  139. uint decodeUTF()
  140. {
  141. ubyte* s = p;
  142. ubyte c = *s;
  143. assert( c & 0x80 );
  144. if( !(c & 0x80) )
  145. return c;
  146. return cast(uint) 'X';
  147. /*
  148. dchar u;
  149. uint len;
  150. // Check length of remaining string up to 6 UTF-8 characters
  151. for( len = 1; len < 6 && s[len]; len++ )
  152. {
  153. }
  154. /+
  155. uint idx = 0;
  156. char* msg = utf_decodeChar( s, len, &idx, &u );
  157. p += idx - 1;
  158. if( msg )
  159. {
  160. error(msg);
  161. }
  162. +/
  163. return u;
  164. */
  165. }
  166. void error( ... )
  167. {
  168. if( (mod !is null) && !global.gag )
  169. {
  170. writefln( formatLoc( loc, _arguments, _argptr ) );
  171. /*
  172. char[] p = loc.toChars();
  173. if( p.length )
  174. writef( "%s: ", p );
  175. writefx( stdout, _arguments, _argptr, 1 );
  176. */
  177. if( global.errors >= global.max_errors ) // moderate blizzard of cascading messages
  178. throw new Exception( "too many errors" );
  179. }
  180. global.errors++;
  181. }
  182. void errorLoc(Loc loc, ...)
  183. {
  184. if( (mod !is null) && !global.gag )
  185. {
  186. writefln( formatLoc( loc, _arguments, _argptr ) );
  187. /*
  188. char[] p = loc.toChars();
  189. if( p.length )
  190. writef("%s: ", p);
  191. writefx(stdout, _arguments, _argptr, 1);
  192. */
  193. if( global.errors >= 20 ) // moderate blizzard of cascading messages
  194. throw new Exception( "too many errors" );
  195. }
  196. global.errors++;
  197. }
  198. TOK nextToken()
  199. {
  200. if( token.next )
  201. {
  202. Token* t = token.next;
  203. memcpy( &token, t, Token.sizeof );
  204. // t.next = freelist;
  205. // freelist = t;
  206. }
  207. else
  208. {
  209. scan( &token );
  210. }
  211. // token.print();
  212. return token.value;
  213. }
  214. Token* peek( inout Token ct )
  215. {
  216. Token* t;
  217. if( ct.next )
  218. t = ct.next;
  219. else
  220. {
  221. t = new Token;
  222. scan( t );
  223. t.next = null;
  224. ct.next = t;
  225. }
  226. return t;
  227. }
  228. // Turn next token in buffer into a token.
  229. void scan( Token* t )
  230. {
  231. // debug writefln("scan token");
  232. uint lastLine = loc.linnum;
  233. uint linnum;
  234. t.blockComment = null;
  235. t.lineComment = null;
  236. while( true )
  237. {
  238. t.ptr = p;
  239. // debug writefln( " p = %d, *p = ", cast(uint)p, cast(char)*p );
  240. switch( *p )
  241. {
  242. case 0:
  243. case 0x1a:
  244. t.value = TOK.TOKeof; // end of file
  245. // debug writefln( " EOF" );
  246. return;
  247. case ' ':
  248. case '\t':
  249. case '\v':
  250. case '\f':
  251. p++;
  252. // debug writefln( " whitespace" );
  253. continue; // skip white space
  254. case '\r':
  255. // debug writefln( " cr" );
  256. p++;
  257. if( *p != '\n' ) // if CR stands by itself
  258. loc.linnum++;
  259. continue; // skip white space
  260. case '\n':
  261. // debug writefln( " nl" );
  262. p++;
  263. loc.linnum++;
  264. continue; // skip white space
  265. case '0': case '1': case '2': case '3': case '4':
  266. case '5': case '6': case '7': case '8': case '9':
  267. t.value = number(t);
  268. return;
  269. /*
  270. #if CSTRINGS
  271. case '\'':
  272. t.value = charConstant(t, 0);
  273. return;
  274. case '"':
  275. t.value = stringConstant(t,0);
  276. return;
  277. case 'l':
  278. case 'L':
  279. if( p[1] == '\'')
  280. {
  281. p++;
  282. t.value = charConstant(t, 1);
  283. return;
  284. }
  285. else if( p[1] == '"')
  286. {
  287. p++;
  288. t.value = stringConstant(t, 1);
  289. return;
  290. }
  291. #else
  292. */
  293. case '\'':
  294. // debug writefln( " char" );
  295. t.value = charConstant(t,0);
  296. return;
  297. case 'r':
  298. // debug writefln( " wysiwyg" );
  299. if( p[1] != '"')
  300. goto case_ident;
  301. p++;
  302. case '`':
  303. t.value = wysiwygStringConstant(t, *p);
  304. return;
  305. case 'x':
  306. // debug writefln( " hex string" );
  307. if( p[1] != '"')
  308. goto case_ident;
  309. p++;
  310. t.value = hexStringConstant(t);
  311. return;
  312. case '"':
  313. // debug writefln( " string" );
  314. t.value = escapeStringConstant( t, 0 );
  315. // debug writefln( t.ustring );
  316. return;
  317. case '\\': // escaped string literal
  318. // debug writefln( " escaped string literal" );
  319. uint c;
  320. stringbuffer.offset = 0;
  321. do
  322. {
  323. p++;
  324. c = escapeSequence();
  325. stringbuffer.write(c);
  326. } while (*p == '\\');
  327. // t.len = stringbuffer.offset;
  328. // stringbuffer.write(cast(byte)0);
  329. t.ustring = stringbuffer.toString;
  330. // memcpy( t.ustring.ptr, stringbuffer.data, stringbuffer.offset );
  331. t.postfix = 0;
  332. t.value = TOK.TOKstring;
  333. return;
  334. case 'l':
  335. case 'L':
  336. // #endif
  337. case 'a': case 'b': case 'c': case 'd': case 'e':
  338. case 'f': case 'g': case 'h': case 'i': case 'j':
  339. case 'k': case 'm': case 'n': case 'o':
  340. case 'p': case 'q': /*case 'r':*/ case 's': case 't':
  341. case 'u': case 'v': case 'w': /*case 'x':*/ case 'y':
  342. case 'z':
  343. case 'A': case 'B': case 'C': case 'D': case 'E':
  344. case 'F': case 'G': case 'H': case 'I': case 'J':
  345. case 'K': case 'M': case 'N': case 'O':
  346. case 'P': case 'Q': case 'R': case 'S': case 'T':
  347. case 'U': case 'V': case 'W': case 'X': case 'Y':
  348. case 'Z':
  349. case '_':
  350. case_ident:
  351. {
  352. // debug writefln( " identifier" );
  353. ubyte c;
  354. do
  355. {
  356. c = *++p;
  357. } while( isidchar(c) || (c & 0x80 && isUniAlpha( decodeUTF())) );
  358. // sv = stringtable.update((char *)t.ptr, p - t.ptr);
  359. char[] tmp;
  360. tmp.length = p - t.ptr;
  361. memcpy( tmp.ptr, t.ptr, p - t.ptr );
  362. Identifier id;
  363. Identifier* pid = tmp in stringtable;
  364. if( pid )
  365. {
  366. id = *pid;
  367. }
  368. if( id is null )
  369. {
  370. id = new Identifier( tmp, TOK.TOKidentifier );
  371. stringtable[tmp] = id;
  372. }
  373. t.ident = id;
  374. t.value = cast(TOK) id.value;
  375. anyToken = 1;
  376. // if special identifier token
  377. if( *t.ptr == '_')
  378. {
  379. static char date[11+1];
  380. static char time[8+1];
  381. static char timestamp[24+1];
  382. if( !date[0] ) // lazy evaluation
  383. {
  384. //!!
  385. /+
  386. time_t t;
  387. char *p;
  388. .time(&t);
  389. p = ctime(&t);
  390. assert(p);
  391. sprintf(date.ptr, "%.6s %.4s", p + 4, p + 20);
  392. sprintf(time.ptr, "%.8s", p + 11);
  393. sprintf(timestamp.ptr, "%.24s", p);
  394. +/
  395. }
  396. if( mod && id is Id.FILE )
  397. {
  398. t.value = TOK.TOKstring;
  399. if( loc.filename.length )
  400. t.ustring = loc.filename;
  401. else
  402. t.ustring = mod.ident.toChars();
  403. goto Llen;
  404. }
  405. else if( mod && id == Id.LINE )
  406. {
  407. t.value = TOK.TOKint64v;
  408. t.uns64value = loc.linnum;
  409. }
  410. else if( id == Id.DATE )
  411. {
  412. t.value = TOK.TOKstring;
  413. //! t.ustring = date;
  414. goto Llen;
  415. }
  416. else if( id == Id.TIME )
  417. {
  418. t.value = TOK.TOKstring;
  419. //! t.ustring = time;
  420. goto Llen;
  421. }
  422. else if( id == Id.TIMESTAMP )
  423. {
  424. t.value = TOK.TOKstring;
  425. //! t.ustring = timestamp;
  426. Llen:
  427. t.postfix = 0;
  428. // t.len = strlen((char *)t.ustring);
  429. }
  430. }
  431. //printf("t.value = %d\n",t.value);
  432. return;
  433. }
  434. // comments
  435. case '/':
  436. p++;
  437. switch( *p )
  438. {
  439. case '=':
  440. p++;
  441. t.value = TOK.TOKdivass;
  442. return;
  443. case '*': // '/*'
  444. p++;
  445. linnum = loc.linnum;
  446. while( true )
  447. {
  448. while( true )
  449. {
  450. ubyte c = *p;
  451. switch( c )
  452. {
  453. case '/':
  454. break;
  455. case '\n':
  456. loc.linnum++;
  457. p++;
  458. continue;
  459. case '\r':
  460. p++;
  461. if( *p != '\n')
  462. loc.linnum++;
  463. continue;
  464. case 0:
  465. case 0x1A:
  466. error("unterminated /* */ comment");
  467. p = end;
  468. t.value = TOK.TOKeof;
  469. return;
  470. default:
  471. if( c & 0x80)
  472. {
  473. uint u = decodeUTF();
  474. if( u == PS || u == LS )
  475. loc.linnum++;
  476. }
  477. p++;
  478. continue;
  479. }
  480. break;
  481. }
  482. p++;
  483. if( p[-2] == '*' && p - 3 != t.ptr )
  484. break;
  485. }
  486. if( commentToken )
  487. {
  488. t.value = TOK.TOKcomment;
  489. return;
  490. }
  491. // if /** but not /**/
  492. else if( doDocComment && t.ptr[2] == '*' && p - 4 != t.ptr )
  493. getDocComment( t, lastLine == linnum ); //! ?
  494. continue;
  495. case '/': // do // style comments
  496. linnum = loc.linnum;
  497. while (1)
  498. {
  499. ubyte c = *++p;
  500. switch (c)
  501. {
  502. case '\n':
  503. break;
  504. case '\r':
  505. if( p[1] == '\n')
  506. p++;
  507. break;
  508. case 0:
  509. case 0x1a:
  510. if( commentToken )
  511. {
  512. p = end;
  513. t.value = TOK.TOKcomment;
  514. return;
  515. }
  516. if( doDocComment && t.ptr[2] == '/' )
  517. getDocComment( t, lastLine == linnum );
  518. p = end;
  519. t.value = TOK.TOKeof;
  520. return;
  521. default:
  522. if( c & 0x80)
  523. {
  524. uint u = decodeUTF();
  525. if( u == PS || u == LS)
  526. break;
  527. }
  528. continue;
  529. }
  530. break;
  531. }
  532. if( commentToken )
  533. {
  534. p++;
  535. loc.linnum++;
  536. t.value = TOK.TOKcomment;
  537. return;
  538. }
  539. if( doDocComment && t.ptr[2] == '/' )
  540. getDocComment( t, lastLine == linnum );
  541. p++;
  542. loc.linnum++;
  543. continue;
  544. case '+':
  545. { int nest;
  546. linnum = loc.linnum;
  547. p++;
  548. nest = 1;
  549. while (1)
  550. {
  551. ubyte c = *p;
  552. switch (c)
  553. {
  554. case '/':
  555. p++;
  556. if( *p == '+')
  557. {
  558. p++;
  559. nest++;
  560. }
  561. continue;
  562. case '+':
  563. p++;
  564. if( *p == '/')
  565. {
  566. p++;
  567. if( --nest == 0)
  568. break;
  569. }
  570. continue;
  571. case '\r':
  572. p++;
  573. if( *p != '\n')
  574. loc.linnum++;
  575. continue;
  576. case '\n':
  577. loc.linnum++;
  578. p++;
  579. continue;
  580. case 0:
  581. case 0x1A:
  582. error("unterminated /+ +/ comment");
  583. p = end;
  584. t.value = TOK.TOKeof;
  585. return;
  586. default:
  587. if( c & 0x80 )
  588. {
  589. uint u = decodeUTF();
  590. if( u == PS || u == LS)
  591. loc.linnum++;
  592. }
  593. p++;
  594. continue;
  595. }
  596. break;
  597. }
  598. if( commentToken )
  599. {
  600. t.value = TOK.TOKcomment;
  601. return;
  602. }
  603. if( doDocComment && t.ptr[2] == '+' && p - 4 != t.ptr )
  604. {
  605. // if /++ but not /++/
  606. getDocComment(t, lastLine == linnum);
  607. }
  608. continue;
  609. }
  610. default:
  611. break;
  612. }
  613. t.value = TOK.TOKdiv;
  614. return;
  615. case '.':
  616. p++;
  617. if( isdigit(*p))
  618. {
  619. p--;
  620. t.value = inreal(t);
  621. }
  622. else if( p[0] == '.')
  623. {
  624. if( p[1] == '.')
  625. {
  626. p += 2;
  627. t.value = TOK.TOKdotdotdot;
  628. }
  629. else
  630. {
  631. p++;
  632. t.value = TOK.TOKslice;
  633. }
  634. }
  635. else
  636. t.value = TOK.TOKdot;
  637. return;
  638. case '&':
  639. p++;
  640. if( *p == '=')
  641. {
  642. p++;
  643. t.value = TOK.TOKandass;
  644. }
  645. else if( *p == '&')
  646. {
  647. p++;
  648. t.value = TOK.TOKandand;
  649. }
  650. else
  651. t.value = TOK.TOKand;
  652. return;
  653. // |, ||, |=
  654. case '|':
  655. p++;
  656. if( *p == '=' )
  657. { p++;
  658. t.value = TOK.TOKorass;
  659. }
  660. else if( *p == '|')
  661. { p++;
  662. t.value = TOK.TOKoror;
  663. }
  664. else
  665. t.value = TOK.TOKor;
  666. return;
  667. case '-':
  668. p++;
  669. if( *p == '=')
  670. { p++;
  671. t.value = TOK.TOKminass;
  672. }
  673. else if( *p == '-')
  674. { p++;
  675. t.value = TOK.TOKminusminus;
  676. }
  677. else
  678. t.value = TOK.TOKmin;
  679. return;
  680. // +, +=, ++
  681. case '+':
  682. p++;
  683. if( *p == '=')
  684. {
  685. p++;
  686. t.value = TOK.TOKaddass; // +=
  687. }
  688. else if( *p == '+')
  689. { p++;
  690. t.value = TOK.TOKplusplus; // ++
  691. }
  692. else
  693. t.value = TOK.TOKadd; // +
  694. return;
  695. // <, <=, <<=, <<, <>=, <>
  696. case '<':
  697. p++;
  698. if( *p == '=')
  699. { p++;
  700. t.value = TOK.TOKle; // <=
  701. }
  702. else if( *p == '<')
  703. { p++;
  704. if( *p == '=')
  705. { p++;
  706. t.value = TOK.TOKshlass; // <<=
  707. }
  708. else
  709. t.value = TOK.TOKshl; // <<
  710. }
  711. else if( *p == '>')
  712. { p++;
  713. if( *p == '=')
  714. { p++;
  715. t.value = TOK.TOKleg; // <>=
  716. }
  717. else
  718. t.value = TOK.TOKlg; // <>
  719. }
  720. else
  721. t.value = TOK.TOKlt; // <
  722. return;
  723. // >, >>, >>>, >=, >>=, >>>=
  724. case '>':
  725. p++;
  726. if( *p == '=')
  727. { p++;
  728. t.value = TOK.TOKge; // >=
  729. }
  730. else if( *p == '>')
  731. { p++;
  732. if( *p == '=')
  733. { p++;
  734. t.value = TOK.TOKshrass; // >>=
  735. }
  736. else if( *p == '>')
  737. { p++;
  738. if( *p == '=')
  739. { p++;
  740. t.value = TOK.TOKushrass; // >>>=
  741. }
  742. else
  743. t.value = TOK.TOKushr; // >>>
  744. }
  745. else
  746. t.value = TOK.TOKshr; // >>
  747. }
  748. else
  749. t.value = TOK.TOKgt; // >
  750. return;
  751. case '!':
  752. p++;
  753. if( *p == '=')
  754. { p++;
  755. if( *p == '=')
  756. {
  757. p++;
  758. t.value = TOK.TOKnotidentity; // !==
  759. }
  760. else
  761. t.value = TOK.TOKnotequal; // !=
  762. }
  763. else if( *p == '<')
  764. {
  765. p++;
  766. if( *p == '>')
  767. {
  768. p++;
  769. if( *p == '=')
  770. {
  771. p++;
  772. t.value = TOK.TOKunord; // !<>=
  773. }
  774. else
  775. t.value = TOK.TOKue; // !<>
  776. }
  777. else if( *p == '=')
  778. {
  779. p++;
  780. t.value = TOK.TOKug; // !<=
  781. }
  782. else
  783. t.value = TOK.TOKuge; // !<
  784. }
  785. else if( *p == '>')
  786. { p++;
  787. if( *p == '=')
  788. {
  789. p++;
  790. t.value = TOK.TOKul; // !>=
  791. }
  792. else
  793. t.value = TOK.TOKule; // !>
  794. }
  795. else
  796. t.value = TOK.TOKnot; // !
  797. return;
  798. case '=':
  799. p++;
  800. if( *p == '=')
  801. {
  802. p++;
  803. if( *p == '=')
  804. {
  805. p++;
  806. t.value = TOK.TOKidentity; // ===
  807. }
  808. else
  809. t.value = TOK.TOKequal; // ==
  810. }
  811. else
  812. t.value = TOK.TOKassign; // =
  813. return;
  814. case '~':
  815. p++;
  816. if( *p == '=' )
  817. {
  818. p++;
  819. t.value = TOK.TOKcatass; // ~=
  820. }
  821. else
  822. t.value = TOK.TOKtilde; // ~
  823. return;
  824. // SINGLE
  825. case '(': p++; t.value = TOK.TOKlparen; return;
  826. case ')': p++; t.value = TOK.TOKrparen; return;
  827. case '[': p++; t.value = TOK.TOKlbracket; return;
  828. case ']': p++; t.value = TOK.TOKrbracket; return;
  829. case '{': p++; t.value = TOK.TOKlcurly; return;
  830. case '}': p++; t.value = TOK.TOKrcurly; return;
  831. case '?': p++; t.value = TOK.TOKquestion; return;
  832. case ',': p++; t.value = TOK.TOKcomma; return;
  833. case ';': p++; t.value = TOK.TOKsemicolon; return;
  834. case ':': p++; t.value = TOK.TOKcolon; return;
  835. case '$': p++; t.value = TOK.TOKdollar; return;
  836. // DOUBLE
  837. case '*': p++; if( *p == '=' ) { p++; t.value = TOK.TOKmulass; } else t.value = TOK.TOKmul; return;
  838. case '%': p++; if( *p == '=' ) { p++; t.value = TOK.TOKmodass; } else t.value = TOK.TOKmod; return;
  839. case '^': p++; if( *p == '=' ) { p++; t.value = TOK.TOKxorass; } else t.value = TOK.TOKxor; return;
  840. // removed 148 case '~': p++; if( *p == '=' ) { p++; t.value = TOK.TOKcatass; } else t.value = TOK.TOKtilde; return;
  841. case '#':
  842. p++;
  843. Pragma();
  844. continue;
  845. default:
  846. {
  847. debug writefln( " default char" );
  848. ubyte c = *p;
  849. if( c & 0x80 )
  850. {
  851. uint u = decodeUTF();
  852. // Check for start of unicode identifier
  853. if( isUniAlpha(u) )
  854. goto case_ident;
  855. if( u == PS || u == LS )
  856. {
  857. loc.linnum++;
  858. p++;
  859. continue;
  860. }
  861. }
  862. if( isprint(c))
  863. error("unsupported char '%s'", cast(char)c);
  864. else
  865. error("unsupported char 0x%02x", cast(ubyte)c);
  866. p++;
  867. continue;
  868. }
  869. }
  870. }
  871. }
  872. // Parse escape sequence.
  873. uint escapeSequence()
  874. {
  875. uint c;
  876. int n;
  877. int ndigits;
  878. c = *p;
  879. switch ( c )
  880. {
  881. case '\'':
  882. case '"':
  883. case '?':
  884. case '\\':
  885. Lconsume:
  886. p++;
  887. break;
  888. case 'a': c = 7; goto Lconsume;
  889. case 'b': c = 8; goto Lconsume;
  890. case 'f': c = 12; goto Lconsume;
  891. case 'n': c = 10; goto Lconsume;
  892. case 'r': c = 13; goto Lconsume;
  893. case 't': c = 9; goto Lconsume;
  894. case 'v': c = 11; goto Lconsume;
  895. case 'u':
  896. ndigits = 4;
  897. goto Lhex;
  898. case 'U':
  899. ndigits = 8;
  900. goto Lhex;
  901. case 'x':
  902. ndigits = 2;
  903. Lhex:
  904. p++;
  905. c = *p;
  906. if( ishex(c))
  907. {
  908. uint v;
  909. n = 0;
  910. v = 0;
  911. while (1)
  912. {
  913. if( isdigit(c))
  914. c -= '0';
  915. else if( islower(c))
  916. c -= 'a' - 10;
  917. else
  918. c -= 'A' - 10;
  919. v = v * 16 + c;
  920. c = *++p;
  921. if( ++n == ndigits)
  922. break;
  923. if( !ishex(c))
  924. { error("escape hex sequence has %d hex digits instead of %d", n, ndigits);
  925. break;
  926. }
  927. }
  928. //! if( ndigits != 2 && !utf_isValidDchar(v))
  929. //! error("invalid UTF character \\U%08x", v);
  930. c = v;
  931. }
  932. else
  933. error("undefined escape hex sequence \\%s\n",c);
  934. break;
  935. case '&': // named character entity
  936. for( ubyte *idstart = ++p; 1; p++ )
  937. {
  938. switch( *p )
  939. {
  940. case ';':
  941. //!!!
  942. /+
  943. c = HtmlNamedEntity(idstart, p - idstart);
  944. if( c == ~0 )
  945. {
  946. error("unnamed character entity &%.*s;", p - idstart, idstart);
  947. c = ' ';
  948. }
  949. p++;
  950. +/
  951. break;
  952. default:
  953. if( isalpha(*p) || (p != idstart + 1 && isdigit(*p)))
  954. continue;
  955. error("unterminated named entity");
  956. break;
  957. }
  958. break;
  959. }
  960. break;
  961. case 0:
  962. case 0x1a: // end of file
  963. c = '\\';
  964. break;
  965. default:
  966. if( isoctal(c) )
  967. {
  968. ubyte v;
  969. n = 0;
  970. do
  971. {
  972. v = v * 8 + (c - '0');
  973. c = *++p;
  974. } while (++n < 3 && isoctal(c));
  975. c = v;
  976. }
  977. else
  978. error("undefined escape sequence \\%s\n",c);
  979. break;
  980. }
  981. return c;
  982. }
  983. /**************************************
  984. */
  985. TOK wysiwygStringConstant( Token *t, int tc )
  986. {
  987. uint c;
  988. Loc start = loc;
  989. p++;
  990. stringbuffer.offset = 0;
  991. while (1)
  992. {
  993. c = *p++;
  994. switch( c )
  995. {
  996. case '\n':
  997. loc.linnum++;
  998. break;
  999. case '\r':
  1000. if( *p == '\n')
  1001. continue; // ignore
  1002. c = '\n'; // treat EndOfLine as \n character
  1003. loc.linnum++;
  1004. break;
  1005. case 0:
  1006. case 0x1a:
  1007. error("unterminated string constant starting at %s", start.toChars());
  1008. t.ustring = "";
  1009. t.postfix = 0;
  1010. return TOK.TOKstring;
  1011. case '"':
  1012. case '`':
  1013. if( c == tc)
  1014. {
  1015. // t.len = stringbuffer.offset;
  1016. stringbuffer.write(cast(byte)0);
  1017. t.ustring = stringbuffer.toString;
  1018. // t.ustring = (ubyte *)mem.malloc(stringbuffer.offset);
  1019. // memcpy(t.ustring, stringbuffer.data, stringbuffer.offset);
  1020. stringPostfix(t);
  1021. return TOK.TOKstring;
  1022. }
  1023. break;
  1024. default:
  1025. if( c & 0x80)
  1026. {
  1027. p--;
  1028. uint u = decodeUTF();
  1029. p++;
  1030. if( u == PS || u == LS)
  1031. loc.linnum++;
  1032. stringbuffer.write(u);
  1033. continue;
  1034. }
  1035. break;
  1036. }
  1037. stringbuffer.write(c);
  1038. }
  1039. }
  1040. /**************************************
  1041. * Lex hex strings:
  1042. * x"0A ae 34FE BD"
  1043. */
  1044. TOK hexStringConstant(Token *t)
  1045. {
  1046. uint c;
  1047. Loc start = loc;
  1048. uint n = 0;
  1049. uint v;
  1050. p++;
  1051. stringbuffer.offset = 0;
  1052. while (1)
  1053. {
  1054. c = *p++;
  1055. switch (c)
  1056. {
  1057. case ' ':
  1058. case '\t':
  1059. case '\v':
  1060. case '\f':
  1061. continue; // skip white space
  1062. case '\r':
  1063. if( *p == '\n')
  1064. continue; // ignore
  1065. // Treat isolated '\r' as if it were a '\n'
  1066. case '\n':
  1067. loc.linnum++;
  1068. continue;
  1069. case 0:
  1070. case 0x1a:
  1071. error("unterminated string constant starting at %s", start.toChars());
  1072. t.ustring = "";
  1073. t.postfix = 0;
  1074. return TOK.TOKstring;
  1075. case '"':
  1076. if( n & 1 )
  1077. {
  1078. error("odd number (%d) of hex characters in hex string", n);
  1079. stringbuffer.write(v);
  1080. }
  1081. // t.len = stringbuffer.offset;
  1082. // stringbuffer.write(cast(byte)0);
  1083. t.ustring = stringbuffer.toString;
  1084. // t.ustring = (ubyte *)mem.malloc(stringbuffer.offset);
  1085. // memcpy(t.ustring, stringbuffer.data, stringbuffer.offset);
  1086. stringPostfix(t);
  1087. return TOK.TOKstring;
  1088. default:
  1089. if( c >= '0' && c <= '9')
  1090. c -= '0';
  1091. else if( c >= 'a' && c <= 'f')
  1092. c -= 'a' - 10;
  1093. else if( c >= 'A' && c <= 'F')
  1094. c -= 'A' - 10;
  1095. else if( c & 0x80)
  1096. {
  1097. p--;
  1098. uint u = decodeUTF();
  1099. p++;
  1100. if( u == PS || u == LS)
  1101. loc.linnum++;
  1102. else
  1103. error("non-hex character \\u%x", u);
  1104. }
  1105. else
  1106. error("non-hex character '%s'", c);
  1107. if( n & 1)
  1108. {
  1109. v = (v << 4) | c;
  1110. stringbuffer.write(v);
  1111. }
  1112. else
  1113. v = c;
  1114. n++;
  1115. break;
  1116. }
  1117. }
  1118. }
  1119. /**************************************
  1120. */
  1121. TOK escapeStringConstant(Token *t, int wide)
  1122. {
  1123. uint c;
  1124. Loc start = loc;
  1125. p++;
  1126. stringbuffer.offset = 0;
  1127. // debug writefln( "escape string constant: %s", std.string.toString( cast(char*)p ) );
  1128. while( 1 )
  1129. {
  1130. c = *p++;
  1131. switch( c )
  1132. {
  1133. case '\\':
  1134. switch (*p)
  1135. {
  1136. case 'u':
  1137. case 'U':
  1138. case '&':
  1139. c = escapeSequence();
  1140. stringbuffer.write(c);
  1141. continue;
  1142. default:
  1143. c = escapeSequence();
  1144. break;
  1145. }
  1146. break;
  1147. case '\n':
  1148. loc.linnum++;
  1149. break;
  1150. case '\r':
  1151. if( *p == '\n')
  1152. continue; // ignore
  1153. c = '\n'; // treat EndOfLine as \n character
  1154. loc.linnum++;
  1155. break;
  1156. case '"':
  1157. // writefln( "end of string: ", stringbuffer.toString );
  1158. t.ustring = stringbuffer.toString().dup;
  1159. // t.len = stringbuffer.offset;
  1160. // stringbuffer.write(cast(byte)0);
  1161. // t.ustring = (ubyte *)mem.malloc(stringbuffer.offset);
  1162. // memcpy(t.ustring, stringbuffer.data, stringbuffer.offset);
  1163. stringPostfix(t);
  1164. return TOK.TOKstring;
  1165. case 0:
  1166. case 0x1a:
  1167. p--;
  1168. error("unterminated string constant starting at %s", start.toChars());
  1169. t.ustring = "";
  1170. // t.len = 0;
  1171. t.postfix = 0;
  1172. return TOK.TOKstring;
  1173. default:
  1174. if( c & 0x80 )
  1175. {
  1176. p--;
  1177. c = decodeUTF();
  1178. if( c == LS || c == PS )
  1179. {
  1180. c = '\n';
  1181. loc.linnum++;
  1182. }
  1183. p++;
  1184. stringbuffer.write(cast(char)c);
  1185. continue;
  1186. }
  1187. break;
  1188. }
  1189. stringbuffer.write(cast(char)c);
  1190. // writefln( stringbuffer.toString );
  1191. }
  1192. }
  1193. //**************************************
  1194. TOK charConstant(Token *t, int wide)
  1195. {
  1196. uint c;
  1197. TOK tk = TOK.TOKcharv;
  1198. //printf("Lexer.charConstant\n");
  1199. p++;
  1200. c = *p++;
  1201. switch( c )
  1202. {
  1203. case '\\':
  1204. switch (*p)
  1205. {
  1206. case 'u':
  1207. t.uns64value = escapeSequence();
  1208. tk = TOK.TOKwcharv;
  1209. break;
  1210. case 'U':
  1211. case '&':
  1212. t.uns64value = escapeSequence();
  1213. tk = TOK.TOKdcharv;
  1214. break;
  1215. default:
  1216. t.uns64value = escapeSequence();
  1217. break;
  1218. }
  1219. break;
  1220. case '\n':
  1221. L1:
  1222. loc.linnum++;
  1223. case '\r':
  1224. case 0:
  1225. case 0x1a:
  1226. case '\'':
  1227. error("unterminated character constant");
  1228. return tk;
  1229. default:
  1230. if( c & 0x80)
  1231. {
  1232. p--;
  1233. c = decodeUTF();
  1234. p++;
  1235. if( c == LS || c == PS )
  1236. goto L1;
  1237. if( c < 0xd800 || (c >= 0xe000 && c < 0xfffe))
  1238. tk = TOK.TOKwcharv;
  1239. else
  1240. tk = TOK.TOKdcharv;
  1241. }
  1242. t.uns64value = c;
  1243. break;
  1244. }
  1245. if( *p != '\'' )
  1246. {
  1247. error("unterminated character constant");
  1248. return tk;
  1249. }
  1250. p++;
  1251. return tk;
  1252. }
  1253. // Get postfix of string literal.
  1254. void stringPostfix(Token *t)
  1255. {
  1256. switch( *p )
  1257. {
  1258. case 'c':
  1259. case 'w':
  1260. case 'd':
  1261. t.postfix = *p;
  1262. p++;
  1263. break;
  1264. default:
  1265. t.postfix = 0;
  1266. break;
  1267. }
  1268. }
  1269. /***************************************
  1270. * Read \u or \U unicode sequence
  1271. * Input:
  1272. * u 'u' or 'U'
  1273. */
  1274. /*
  1275. uint Wchar(uint u)
  1276. {
  1277. uint value;
  1278. uint n;
  1279. ubyte c;
  1280. uint nchars;
  1281. nchars = (u == 'U') ? 8 : 4;
  1282. value = 0;
  1283. for (n = 0; 1; n++)
  1284. {
  1285. ++p;
  1286. if( n == nchars)
  1287. break;
  1288. c = *p;
  1289. if( !ishex(c))
  1290. {
  1291. error("\\%s sequence must be followed by %d hex characters", u, nchars);
  1292. break;
  1293. }
  1294. if( isdigit(c))
  1295. c -= '0';
  1296. else if( islower(c))
  1297. c -= 'a' - 10;
  1298. else
  1299. c -= 'A' - 10;
  1300. value <<= 4;
  1301. value |= c;
  1302. }
  1303. return value;
  1304. }
  1305. */
  1306. /**************************************
  1307. * Read in a number.
  1308. * If it's an integer, store it in tok.TKutok.Vlong.
  1309. * integers can be decimal, octal or hex
  1310. * Handle the suffixes U, UL, LU, L, etc.
  1311. * If it's double, store it in tok.TKutok.Vdouble.
  1312. * Returns:
  1313. * TKnum
  1314. * TKdouble,...
  1315. */
  1316. TOK number(Token *t)
  1317. {
  1318. //debug writefln("Lexer.number()");
  1319. // We use a state machine to collect numbers
  1320. enum STATE
  1321. {
  1322. STATE_initial,
  1323. STATE_0,
  1324. STATE_decimal,
  1325. STATE_octal,
  1326. STATE_octale,
  1327. STATE_hex,
  1328. STATE_binary,
  1329. STATE_hex0,
  1330. STATE_binary0,
  1331. STATE_hexh,
  1332. STATE_error
  1333. }
  1334. enum FLAGS
  1335. {
  1336. FLAGS_decimal = 1, // decimal
  1337. FLAGS_unsigned = 2, // u or U suffix
  1338. FLAGS_long = 4, // l or L suffix
  1339. }
  1340. FLAGS flags = FLAGS.FLAGS_decimal;
  1341. int i;
  1342. TOK result;
  1343. int base;
  1344. stringbuffer.offset = 0;
  1345. // stringbuffer.data = null;
  1346. STATE state = STATE.STATE_initial;
  1347. ubyte* start = p;
  1348. TOK _isreal()
  1349. {
  1350. p = start;
  1351. return inreal(t);
  1352. }
  1353. while( true )
  1354. {
  1355. char c = cast(char)*p;
  1356. switch( state )
  1357. {
  1358. case STATE.STATE_initial: // opening state
  1359. if( c == '0' )
  1360. state = STATE.STATE_0;
  1361. else
  1362. state = STATE.STATE_decimal;
  1363. break;
  1364. case STATE.STATE_0:
  1365. flags = cast(FLAGS) (flags & ~FLAGS.FLAGS_decimal);
  1366. switch (c)
  1367. {
  1368. // #if ZEROH
  1369. // case 'H': // 0h
  1370. // case 'h':
  1371. // goto hexh;
  1372. // #endif
  1373. case 'X':
  1374. case 'x':
  1375. state = STATE.STATE_hex0;
  1376. break;
  1377. case '.':
  1378. if( p[1] == '.') // .. is a separate token
  1379. goto done;
  1380. case 'i':
  1381. case 'f':
  1382. case 'F':
  1383. goto _Real;
  1384. // #if ZEROH
  1385. // case 'E':
  1386. // case 'e':
  1387. // goto case_hex;
  1388. // #endif
  1389. case 'B':
  1390. case 'b':
  1391. state = STATE.STATE_binary0;
  1392. break;
  1393. case '0': case '1': case '2': case '3':
  1394. case '4': case '5': case '6': case '7':
  1395. state = STATE.STATE_octal;
  1396. break;
  1397. // #if ZEROH
  1398. // case '8': case '9': case 'A':
  1399. // case 'C': case 'D': case 'F':
  1400. // case 'a': case 'c': case 'd': case 'f':
  1401. // case_hex:
  1402. // state = STATE.STATE_hexh;
  1403. // break;
  1404. // #endif
  1405. case '_':
  1406. state = STATE.STATE_octal;
  1407. p++;
  1408. continue;
  1409. default:
  1410. goto done;
  1411. }
  1412. break;
  1413. case STATE.STATE_decimal: // reading decimal number
  1414. // if its not a digit - decimal complete or not a decimal
  1415. if( !isdigit( c ) )
  1416. {
  1417. // debug writefln( "\tnon-digit( %s )", c );
  1418. // #if ZEROH
  1419. // if( ishex(c) || c == 'H' || c == 'h' )
  1420. // goto hexh;
  1421. // #endif
  1422. //! wtf ?
  1423. // ignore embedded _
  1424. if( c == '_' )
  1425. {
  1426. p++;
  1427. continue;
  1428. }
  1429. // check decimal point - make real
  1430. if( c == '.' && p[1] != '.' )
  1431. goto _Real;
  1432. // check for mantra - make real
  1433. if( c == 'i' || c == 'f' || c == 'F' || c == 'e' || c == 'E' )
  1434. {
  1435. _Real: // It's a real number. Back up and rescan as a real
  1436. p = start;
  1437. return inreal(t);
  1438. }
  1439. goto done;
  1440. }
  1441. break;
  1442. case STATE.STATE_hex0: // reading hex number
  1443. case STATE.STATE_hex:
  1444. if( !ishex(c) )
  1445. {
  1446. if( c == '_') // ignore embedded _
  1447. {
  1448. p++;
  1449. continue;
  1450. }
  1451. if( c == '.' && p[1] != '.' )
  1452. goto _Real;
  1453. if( c == 'P' || c == 'p' || c == 'i' )
  1454. goto _Real;
  1455. if( state == STATE.STATE_hex0 )
  1456. error("Hex digit expected, not '%s'", c);
  1457. goto done;
  1458. }
  1459. state = STATE.STATE_hex;
  1460. break;
  1461. // #if ZEROH
  1462. // hexh:
  1463. // state = STATE.STATE_hexh;
  1464. //
  1465. // case STATE.STATE_hexh: // parse numbers like 0FFh
  1466. // if( !ishex(c))
  1467. // {
  1468. // if( c == 'H' || c == 'h')
  1469. // {
  1470. // p++;
  1471. // base = 16;
  1472. // goto done;
  1473. // }
  1474. // else
  1475. // {
  1476. // // Check for something like 1E3 or 0E24
  1477. // if( memchr(stringbuffer.data.ptr, 'E', stringbuffer.offset) || memchr( stringbuffer.data.ptr, 'e', stringbuffer.offset))
  1478. // goto _Real;
  1479. // error("Hex digit expected, not '%s'", c);
  1480. // goto done;
  1481. // }
  1482. // }
  1483. // break;
  1484. // #endif
  1485. case STATE.STATE_octal: // reading octal number
  1486. case STATE.STATE_octale: // reading octal number with non-octal digits
  1487. if( !isoctal(c) )
  1488. {
  1489. // #if ZEROH
  1490. // if( ishex(c) || c == 'H' || c == 'h' )
  1491. // goto hexh;
  1492. // #endif
  1493. if( c == '_' ) // ignore embedded _
  1494. {
  1495. p++;
  1496. continue;
  1497. }
  1498. if( c == '.' && p[1] != '.' )
  1499. goto _Real;
  1500. if( c == 'i' )
  1501. goto _Real;
  1502. if( isdigit(c) )
  1503. state = STATE.STATE_octale;
  1504. else
  1505. goto done;
  1506. }
  1507. break;
  1508. case STATE.STATE_binary0: // starting binary number
  1509. case STATE.STATE_binary: // reading binary number
  1510. if( c != '0' && c != '1')
  1511. {
  1512. // #if ZEROH
  1513. // if( ishex(c) || c == 'H' || c == 'h' )
  1514. // goto hexh;
  1515. // #endif
  1516. if( c == '_' ) // ignore embedded _
  1517. {
  1518. p++;
  1519. continue;
  1520. }
  1521. if( state == STATE.STATE_binary0 )
  1522. {
  1523. error("binary digit expected");
  1524. state = STATE.STATE_error;
  1525. break;
  1526. }
  1527. else
  1528. goto done;
  1529. }
  1530. state = STATE.STATE_binary;
  1531. break;
  1532. case STATE.STATE_error: // for error recovery
  1533. if( !isdigit(c) ) // scan until non-digit
  1534. goto done;
  1535. break;
  1536. default:
  1537. assert(0);
  1538. }
  1539. stringbuffer.write(cast(char)c);
  1540. p++;
  1541. }
  1542. done:
  1543. stringbuffer.write( cast(char)0 ); // terminate string
  1544. // debug writefln( "\tdigit complete( %s )", stringbuffer.toString );
  1545. if( state == STATE.STATE_octale )
  1546. error("Octal digit expected");
  1547. uinteger_t n; // unsigned >=64 bit integer type
  1548. if( stringbuffer.offset == 2 && ( state == STATE.STATE_decimal || state == STATE.STATE_0 ) )
  1549. n = stringbuffer.data[0] - '0';
  1550. else
  1551. {
  1552. // Convert string to integer
  1553. char* p = cast(char*)stringbuffer.data.ptr;
  1554. int r = 10;
  1555. int d;
  1556. if( *p == '0' )
  1557. {
  1558. if( p[1] == 'x' || p[1] == 'X')
  1559. {
  1560. // "0x#"
  1561. p += 2;
  1562. r = 16;
  1563. }
  1564. else if( p[1] == 'b' || p[1] == 'B')
  1565. {
  1566. // "0b#" - binary
  1567. p += 2;
  1568. r = 2;
  1569. }
  1570. else if( isdigit(p[1]))
  1571. {
  1572. p += 1;
  1573. r = 8;
  1574. }
  1575. }
  1576. n = 0;
  1577. while( true )
  1578. {
  1579. if( *p >= '0' && *p <= '9' )
  1580. d = *p - '0';
  1581. else if( *p >= 'a' && *p <= 'z' )
  1582. d = *p - 'a' + 10;
  1583. else if( *p >= 'A' && *p <= 'Z' )
  1584. d = *p - 'A' + 10;
  1585. else
  1586. break;
  1587. if( d >= r )
  1588. break;
  1589. if( n * r + d < n)
  1590. {
  1591. error( "integer overflow" );
  1592. break;
  1593. }
  1594. n = n * r + d;
  1595. p++;
  1596. }
  1597. // if n needs more than 64 bits
  1598. if( n.sizeof > 8 && n > 0xffffffffffffffffL )
  1599. error("integer overflow");
  1600. }
  1601. // Parse trailing 'u', 'U', 'l' or 'L' in any combination
  1602. while( true )
  1603. {
  1604. ubyte f;
  1605. switch( *p )
  1606. {
  1607. case 'U':
  1608. case 'u':
  1609. f = FLAGS.FLAGS_unsigned;
  1610. goto L1;
  1611. case 'L':
  1612. case 'l':
  1613. f = FLAGS.FLAGS_long;
  1614. L1:
  1615. p++;
  1616. if( flags & f )
  1617. error("unrecognized token");
  1618. flags = cast(FLAGS) (flags | f);
  1619. continue;
  1620. default:
  1621. break;
  1622. }
  1623. break;
  1624. }
  1625. switch ( flags )
  1626. {
  1627. case 0:
  1628. /* Octal or Hexadecimal constant.
  1629. * First that fits: int, uint, long, ulong
  1630. */
  1631. if( n & 0x8000000000000000L )
  1632. result = TOK.TOKuns64v;
  1633. else if( n & 0xffffffff00000000L )
  1634. result = TOK.TOKint64v;
  1635. else if( n & 0x80000000 )
  1636. result = TOK.TOKuns32v;
  1637. else
  1638. result = TOK.TOKint32v;
  1639. break;
  1640. case FLAGS.FLAGS_decimal:
  1641. /* First that fits: int, long, long long
  1642. */
  1643. if( n & 0x8000000000000000L )
  1644. {
  1645. error("signed integer overflow");
  1646. result = TOK.TOKuns64v;
  1647. }
  1648. else if( n & 0xffffffff80000000L )
  1649. result = TOK.TOKint64v;
  1650. else
  1651. result = TOK.TOKint32v;
  1652. break;
  1653. case FLAGS.FLAGS_unsigned:
  1654. case FLAGS.FLAGS_decimal | FLAGS.FLAGS_unsigned:
  1655. /* First that fits: uint, ulong
  1656. */
  1657. if( n & 0xffffffff00000000L )
  1658. result = TOK.TOKuns64v;
  1659. else
  1660. result = TOK.TOKuns32v;
  1661. break;
  1662. case FLAGS.FLAGS_decimal | FLAGS.FLAGS_long:
  1663. if( n & 0x8000000000000000L )
  1664. {
  1665. error("signed integer overflow");
  1666. result = TOK.TOKuns64v;
  1667. }
  1668. else
  1669. result = TOK.TOKint64v;
  1670. break;
  1671. case FLAGS.FLAGS_long:
  1672. if( n & 0x8000000000000000L )
  1673. result = TOK.TOKuns64v;
  1674. else
  1675. result = TOK.TOKint64v;
  1676. break;
  1677. case FLAGS.FLAGS_unsigned | FLAGS.FLAGS_long:
  1678. case FLAGS.FLAGS_decimal | FLAGS.FLAGS_unsigned | FLAGS.FLAGS_long:
  1679. result = TOK.TOKuns64v;
  1680. break;
  1681. default:
  1682. debug writefln("%x",flags);
  1683. assert(0);
  1684. }
  1685. t.uns64value = n;
  1686. return result;
  1687. }
  1688. /**************************************
  1689. * Read in characters, converting them to real.
  1690. * Bugs:
  1691. * Exponent overflow not detected.
  1692. * Too much requested precision is not detected.
  1693. */
  1694. TOK inreal(Token *t)
  1695. {
  1696. int dblstate;
  1697. uint c;
  1698. char hex; // is this a hexadecimal-floating-constant?
  1699. TOK result;
  1700. //printf("Lexer.inreal()\n");
  1701. stringbuffer.offset = 0;
  1702. dblstate = 0;
  1703. hex = 0;
  1704. Lnext:
  1705. while (1)
  1706. {
  1707. // Get next char from input
  1708. c = *p++;
  1709. //printf("dblstate = %d, c = '%s'\n", dblstate, c);
  1710. while (1)
  1711. {
  1712. switch (dblstate)
  1713. {
  1714. case 0: // opening state
  1715. if( c == '0')
  1716. dblstate = 9;
  1717. else if( c == '.')
  1718. dblstate = 3;
  1719. else
  1720. dblstate = 1;
  1721. break;
  1722. case 9:
  1723. dblstate = 1;
  1724. if( c == 'X' || c == 'x')
  1725. {
  1726. hex++;
  1727. break;
  1728. }
  1729. case 1: // digits to left of .
  1730. case 3: // digits to right of .
  1731. case 7: // continuing exponent digits
  1732. if( !isdigit(c) && !(hex && isxdigit(c)))
  1733. {
  1734. if( c == '_')
  1735. goto Lnext; // ignore embedded '_'
  1736. dblstate++;
  1737. continue;
  1738. }
  1739. break;
  1740. case 2: // no more digits to left of .
  1741. if( c == '.')
  1742. {
  1743. dblstate++;
  1744. break;
  1745. }
  1746. case 4: // no more digits to right of .
  1747. if( (c == 'E' || c == 'e') || hex && (c == 'P' || c == 'p'))
  1748. {
  1749. dblstate = 5;
  1750. hex = 0; // exponent is always decimal
  1751. break;
  1752. }
  1753. if( hex)
  1754. error("binary-exponent-part required");
  1755. goto done;
  1756. case 5: // looking immediately to right of E
  1757. dblstate++;
  1758. if( c == '-' || c == '+')
  1759. break;
  1760. case 6: // 1st exponent digit expected
  1761. if( !isdigit(c))
  1762. error("exponent expected");
  1763. dblstate++;
  1764. break;
  1765. case 8: // past end of exponent digits
  1766. goto done;
  1767. }
  1768. break;
  1769. }
  1770. stringbuffer.write(c);
  1771. }
  1772. done:
  1773. p--;
  1774. stringbuffer.write(cast(byte)0);
  1775. // #if _WIN32 && __DMC__
  1776. char *save = __locale_decpoint;
  1777. __locale_decpoint = ".";
  1778. // #endif
  1779. t.float80value = strtold(cast(char *)stringbuffer.data.ptr, null);
  1780. errno = 0;
  1781. switch( *p )
  1782. {
  1783. case 'F':
  1784. case 'f':
  1785. strtof(cast(char *)stringbuffer.data.ptr, null);
  1786. result = TOK.TOKfloat32v;
  1787. p++;
  1788. break;
  1789. default:
  1790. strtod(cast(char *)stringbuffer.data.ptr, null);
  1791. result = TOK.TOKfloat64v;
  1792. break;
  1793. case 'L':
  1794. case 'l':
  1795. result = TOK.TOKfloat80v;
  1796. p++;
  1797. break;
  1798. }
  1799. if( *p == 'i' || *p == 'I')
  1800. {
  1801. p++;
  1802. switch( result )
  1803. {
  1804. case TOK.TOKfloat32v:
  1805. result = TOK.TOKimaginary32v;
  1806. break;
  1807. case TOK.TOKfloat64v:
  1808. result = TOK.TOKimaginary64v;
  1809. break;
  1810. case TOK.TOKfloat80v:
  1811. result = TOK.TOKimaginary80v;
  1812. break;
  1813. }
  1814. }
  1815. // #if _WIN32 && __DMC__
  1816. __locale_decpoint = save;
  1817. // #endif
  1818. if( errno == ERANGE)
  1819. error("number is not representable");
  1820. return result;
  1821. }
  1822. /*********************************************
  1823. * Do pragma.
  1824. * Currently, the only pragma supported is:
  1825. * #line linnum [filespec]
  1826. */
  1827. void Pragma()
  1828. {
  1829. Token tok;
  1830. int linnum;
  1831. char[] filespec;
  1832. Loc loc = this.loc;
  1833. scan(&tok);
  1834. if( tok.value != TOK.TOKidentifier || tok.ident != Id.line )
  1835. goto Lerr;
  1836. scan(&tok);
  1837. if( tok.value == TOK.TOKint32v || tok.value == TOK.TOKint64v )
  1838. linnum = tok.uns64value - 1;
  1839. else
  1840. goto Lerr;
  1841. while (1)
  1842. {
  1843. switch (*p)
  1844. {
  1845. case 0:
  1846. case 0x1a:
  1847. case '\n':
  1848. Lnewline:
  1849. this.loc.linnum = linnum;
  1850. if( filespec.length )
  1851. this.loc.filename = filespec;
  1852. return;
  1853. case '\r':
  1854. p++;
  1855. if( *p != '\n')
  1856. { p--;
  1857. goto Lnewline;
  1858. }
  1859. continue;
  1860. case ' ':
  1861. case '\t':
  1862. case '\v':
  1863. case '\f':
  1864. p++;
  1865. continue; // skip white space
  1866. case '_':
  1867. if( mod && memcmp(p, cast(char*)"__FILE__", 8) == 0)
  1868. {
  1869. p += 8;
  1870. //! filespec = mem.strdup(loc.filename ? loc.filename : mod.ident.toChars());
  1871. }
  1872. continue;
  1873. case '"':
  1874. if( filespec )
  1875. goto Lerr;
  1876. stringbuffer.offset = 0;
  1877. p++;
  1878. while (1)
  1879. {
  1880. uint c;
  1881. c = *p;
  1882. switch (c)
  1883. {
  1884. case '\n':
  1885. case '\r':
  1886. case 0:
  1887. case 0x1a:
  1888. goto Lerr;
  1889. case '"':
  1890. stringbuffer.write(cast(byte)0);
  1891. // filespec = mem.strdup((char *)stringbuffer.data);
  1892. filespec = stringbuffer.toString.dup;
  1893. p++;
  1894. break;
  1895. default:
  1896. if( c & 0x80 )
  1897. {
  1898. uint u = decodeUTF();
  1899. if( u == PS || u == LS )
  1900. goto Lerr;
  1901. }
  1902. stringbuffer.write(c);
  1903. p++;
  1904. continue;
  1905. }
  1906. break;
  1907. }
  1908. continue;
  1909. default:
  1910. if( *p & 0x80 )
  1911. {
  1912. uint u = decodeUTF();
  1913. if( u == PS || u == LS)
  1914. goto Lnewline;
  1915. }
  1916. goto Lerr;
  1917. }
  1918. }
  1919. Lerr:
  1920. errorLoc(loc, "#line integer [\"filespec\"]\\n expected");
  1921. }
  1922. /***************************************************
  1923. * Parse doc comment embedded between t.ptr and p.
  1924. * Remove trailing blanks and tabs from lines.
  1925. * Replace all newlines with \n.
  1926. * Remove leading comment character from each line.
  1927. * Decide if it's a lineComment or a blockComment.
  1928. * Append to previous one for this token.
  1929. */
  1930. void getDocComment( Token *t, uint lineComment )
  1931. {
  1932. auto OutBuffer buf = new OutBuffer;
  1933. ubyte ct = t.ptr[2];
  1934. ubyte *q = t.ptr + 3; // start of comment text
  1935. int linestart = 0;
  1936. ubyte *qend = p;
  1937. if( ct == '*' || ct == '+')
  1938. qend -= 2;
  1939. // Scan over initial row of ****'s or ++++'s or ////'s
  1940. for (; q < qend; q++)
  1941. {
  1942. if( *q != ct)
  1943. break;
  1944. }
  1945. // Remove trailing row of ****'s or ++++'s
  1946. if( ct != '/')
  1947. {
  1948. for (; q < qend; qend--)
  1949. {
  1950. if( qend[-1] != ct)
  1951. break;
  1952. }
  1953. }
  1954. for (; q < qend; q++)
  1955. {
  1956. ubyte c = *q;
  1957. switch (c)
  1958. {
  1959. case '*':
  1960. case '+':
  1961. if( linestart && c == ct)
  1962. { linestart = 0;
  1963. // Trim preceding whitespace up to preceding \n
  1964. while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t'))
  1965. buf.offset--;
  1966. continue;
  1967. }
  1968. break;
  1969. case ' ':
  1970. case '\t':
  1971. break;
  1972. case '\r':
  1973. if( q[1] == '\n')
  1974. continue; // skip the \r
  1975. goto Lnewline;
  1976. default:
  1977. if( c == 226)
  1978. {
  1979. // If LS or PS
  1980. if( q[1] == 128 &&
  1981. (q[2] == 168 || q[2] == 169))
  1982. {
  1983. q += 2;
  1984. goto Lnewline;
  1985. }
  1986. }
  1987. linestart = 0;
  1988. break;
  1989. Lnewline:
  1990. c = '\n'; // replace all newlines with \n
  1991. case '\n':
  1992. linestart = 1;
  1993. // Trim trailing whitespace
  1994. while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t'))
  1995. buf.offset--;
  1996. break;
  1997. }
  1998. buf.write(c);
  1999. }
  2000. // Always end with a newline
  2001. if( !buf.offset || buf.data[buf.offset - 1] != '\n' )
  2002. buf.writenl();
  2003. //buf.write(cast(char)0);
  2004. // It's a line comment if the start of the doc comment comes
  2005. // after other non-whitespace on the same line.
  2006. // ubyte** dc = (lineComment && anyToken)
  2007. // ? &t.lineComment
  2008. // : &t.blockComment;
  2009. char[] dc = (lineComment && anyToken) ? t.lineComment : t.blockComment;
  2010. // Combine with previous doc comment, if any
  2011. if( dc.length )
  2012. dc = combineComments( dc, buf.toString().dup );
  2013. else
  2014. dc = buf.toString().dup;
  2015. // writefln( dc );
  2016. if( lineComment && anyToken )
  2017. t.lineComment = dc;
  2018. else
  2019. t.blockComment = dc;
  2020. }
  2021. }
  2022. // character maps
  2023. static ubyte[256] cmtable;
  2024. const int CMoctal = 0x1;
  2025. const int CMhex = 0x2;
  2026. const int CMidchar = 0x4;
  2027. ubyte isoctal (ubyte c) { return cmtable[c] & CMoctal; }
  2028. ubyte ishex (ubyte c) { return cmtable[c] & CMhex; }
  2029. ubyte isidchar(ubyte c) { return cmtable[c] & CMidchar; }
  2030. static void cmtable_init()
  2031. {
  2032. for( uint c = 0; c < cmtable.length; c++ )
  2033. {
  2034. if( '0' <= c && c <= '7' )
  2035. cmtable[c] |= CMoctal;
  2036. if( isdigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F') )
  2037. cmtable[c] |= CMhex;
  2038. if( isalnum(c) || c == '_' )
  2039. cmtable[c] |= CMidchar;
  2040. }
  2041. }
  2042. /+
  2043. struct StringValue
  2044. {
  2045. union
  2046. {
  2047. int intvalue;
  2048. void *ptrvalue;
  2049. dchar *string;
  2050. }
  2051. char[] lstring;
  2052. }
  2053. #define CASE_BASIC_TYPES
  2054. case TOKwchar: case TOKdchar:
  2055. case TOKbit: case TOKbool: case TOKchar:
  2056. case TOKint8: case TOKuns8:
  2057. case TOKint16: case TOKuns16:
  2058. case TOKint32: case TOKuns32:
  2059. case TOKint64: case TOKuns64:
  2060. case TOKfloat32: case TOKfloat64: case TOKfloat80:
  2061. case TOKimaginary32: case TOKimaginary64: case TOKimaginary80:
  2062. case TOKcomplex32: case TOKcomplex64: case TOKcomplex80:
  2063. case TOKvoid:
  2064. #define CASE_BASIC_TYPES_X(t) \
  2065. case TOKvoid: t = Type::tvoid; goto LabelX; \
  2066. case TOKint8: t = Type::tint8; goto LabelX; \
  2067. case TOKuns8: t = Type::tuns8; goto LabelX; \
  2068. case TOKint16: t = Type::tint16; goto LabelX; \
  2069. case TOKuns16: t = Type::tuns16; goto LabelX; \
  2070. case TOKint32: t = Type::tint32; goto LabelX; \
  2071. case TOKuns32: t = Type::tuns32; goto LabelX; \
  2072. case TOKint64: t = Type::tint64; goto LabelX; \
  2073. case TOKuns64: t = Type::tuns64; goto LabelX; \
  2074. case TOKfloat32: t = Type::tfloat32; goto LabelX; \
  2075. case TOKfloat64: t = Type::tfloat64; goto LabelX; \
  2076. case TOKfloat80: t = Type::tfloat80; goto LabelX; \
  2077. case TOKimaginary32: t = Type::timaginary32; goto LabelX; \
  2078. case TOKimaginary64: t = Type::timaginary64; goto LabelX; \
  2079. case TOKimaginary80: t = Type::timaginary80; goto LabelX; \
  2080. case TOKcomplex32: t = Type::tcomplex32; goto LabelX; \
  2081. case TOKcomplex64: t = Type::tcomplex64; goto LabelX; \
  2082. case TOKcomplex80: t = Type::tcomplex80; goto LabelX; \
  2083. case TOKbit: t = Type::tbit; goto LabelX; \
  2084. case TOKchar: t = Type::tchar; goto LabelX; \
  2085. case TOKwchar: t = Type::twchar; goto LabelX; \
  2086. case TOKdchar: t = Type::tdchar; goto LabelX; \
  2087. LabelX
  2088. +/