/lexer.c

https://bitbucket.org/hyades/compiler · C · 1210 lines · 1141 code · 41 blank · 28 comment · 124 complexity · 83086817da85176061be2aa6c35e643a MD5 · raw file

  1. /*
  2. -=-=-=-=-=-=-=-=-=-=-=-=-=
  3. BATCH 26
  4. -=-=-=-=-=-=-=-=-=-=-=-=-=
  5. AAYUSH AHUJA 2010A7PS023P
  6. MAYANK GUPTA 2010A7PS022P
  7. -=-=-=-=-=-=-=-=-=-=-=-=-=
  8. lexer.c
  9. -=-=-=-=-=-=-=-=-=-=-=-=-=
  10. */
  11. #include<stdio.h>
  12. #include<ctype.h>
  13. #include<string.h>
  14. #include<stdlib.h>
  15. #include<fcntl.h>
  16. #include"lexerDef.h"
  17. #include"lexer.h"
  18. extern bool any_error;
  19. tokenInfo getNextToken(int fp ,keywordTable kt, bool *error, int *linenumber)//get next token
  20. {
  21. tokenInfo t;
  22. static int back = 0;
  23. int state=1,i=0;
  24. char c;
  25. char lexeme[100] = {}; //assuming max lexeme size as 100
  26. while(1)
  27. {
  28. switch(state)
  29. {
  30. case 1:
  31. c = getNextChar(fp,&back);
  32. //printf(" %c ",c);
  33. if((int)c==EOF)return NULL;
  34. else if(c=='-')
  35. {
  36. state = 2;
  37. lexeme[i++] = c;
  38. }
  39. else if(c=='!')
  40. {
  41. state = 3;
  42. lexeme[i++] = c;
  43. }
  44. else if(c=='#')
  45. {
  46. state = 5;
  47. lexeme[i++] = c;
  48. }
  49. else if(c=='%')
  50. {
  51. state = 7;
  52. lexeme[i++] = c;
  53. }
  54. else if(c=='&')
  55. {
  56. state = 8;
  57. lexeme[i++] = c;
  58. }
  59. else if(c=='(')
  60. {
  61. state = 11;
  62. lexeme[i++] = c;
  63. }
  64. else if(c==')')
  65. {
  66. state = 12;
  67. lexeme[i++] = c;
  68. }
  69. else if(c=='*')
  70. {
  71. state = 13;
  72. lexeme[i++] = c;
  73. }
  74. else if(c=='.')
  75. {
  76. state = 14;
  77. lexeme[i++] = c;
  78. }
  79. else if(c=='/')
  80. {
  81. state = 15;
  82. lexeme[i++] = c;
  83. }
  84. else if(c==':')
  85. {
  86. state = 16;
  87. lexeme[i++] = c;
  88. }
  89. else if(c==';')
  90. {
  91. state = 17;
  92. lexeme[i++] = c;
  93. }
  94. else if(c==',')
  95. {
  96. state = 47;
  97. lexeme[i++] = c;
  98. }
  99. else if(c=='@')
  100. {
  101. state = 18;
  102. lexeme[i++] = c;
  103. }
  104. else if(c=='[')
  105. {
  106. state = 21;
  107. lexeme[i++] = c;
  108. }
  109. else if(c==']')
  110. {
  111. state = 30;
  112. lexeme[i++] = c;
  113. }
  114. else if(c=='_')
  115. {
  116. state = 31;
  117. lexeme[i++] =c;
  118. }
  119. else if(c=='~')
  120. {
  121. state = 34;
  122. lexeme[i++] = c;
  123. }
  124. else if(c=='+')
  125. {
  126. state = 35;
  127. lexeme[i++] = c;
  128. }
  129. else if(c=='<')
  130. {
  131. state = 36;
  132. lexeme[i++] = c;
  133. }
  134. else if(c=='=')
  135. {
  136. state = 41;
  137. lexeme[i++] = c;
  138. }
  139. else if(c=='>')
  140. {
  141. state = 43;
  142. lexeme[i++] = c;
  143. }
  144. else if(isdigit(c))
  145. {
  146. state = 22;
  147. lexeme[i++] = c;
  148. }
  149. else if((c=='a')||(c>='e'&&c<='z'))
  150. {
  151. state = 26;
  152. lexeme[i++] =c;
  153. }
  154. else if(c>='b'&&c<='d')
  155. {
  156. state = 27;
  157. lexeme[i++] =c;
  158. }
  159. else if(isspace(c))
  160. {
  161. state = 45;
  162. if(c=='\n')
  163. (*linenumber)++;
  164. }
  165. else
  166. {
  167. *error = 1;
  168. t = (tokenInfo)malloc(sizeof(tokenInfo));
  169. t->s = TK_ERROR2;
  170. lexeme[i++] =c;
  171. lexeme[i] = '\0';
  172. strcpy(t->lexeme,lexeme);
  173. return t;
  174. }
  175. break;
  176. case 2:
  177. t = (tokenInfo)malloc(sizeof(tokenInfo));
  178. t->s = TK_MINUS;
  179. lexeme[i] = '\0';
  180. strcpy(t->lexeme,lexeme);
  181. return t;
  182. break;
  183. case 3:
  184. c = getNextChar(fp,&back);
  185. if(c=='=')
  186. {
  187. state = 4;
  188. lexeme[i++]=c;
  189. }
  190. else
  191. {
  192. *error = TRUE;
  193. t = (tokenInfo)malloc(sizeof(tokenInfo));
  194. t->s = TK_ERROR;
  195. lexeme[i] = '\0';
  196. strcpy(t->lexeme,lexeme);
  197. return t;
  198. }
  199. break;
  200. case 4:
  201. t = (tokenInfo)malloc(sizeof(tokenInfo));
  202. t->s = TK_NE;
  203. lexeme[i] = '\0';
  204. strcpy(t->lexeme,lexeme);
  205. return t;
  206. break;
  207. case 5:
  208. c = getNextChar(fp,&back);
  209. if(c>='a'&&c<='z')
  210. {
  211. state =6;
  212. lexeme[i++] =c;
  213. }
  214. else
  215. {
  216. *error = TRUE;
  217. t = (tokenInfo)malloc(sizeof(tokenInfo));
  218. t->s = TK_ERROR;
  219. lexeme[i] = '\0';
  220. strcpy(t->lexeme,lexeme);
  221. return t;
  222. }
  223. break;
  224. case 6:
  225. c = getNextChar(fp,&back);
  226. if(c>='a'&&c<='z')
  227. {
  228. state = 6;
  229. lexeme[i++] = c;
  230. }
  231. else
  232. {
  233. back = 1;
  234. t = (tokenInfo)malloc(sizeof(tokenInfo));
  235. t->s = TK_RECORDID;
  236. lexeme[i]='\0';
  237. strcpy(t->lexeme,lexeme);
  238. return t;
  239. }
  240. break;
  241. case 7:
  242. t = (tokenInfo)malloc(sizeof(tokenInfo));
  243. t->s = TK_COMMENT;
  244. state = 46; //FOR COMMENTS
  245. break;
  246. case 8:
  247. c = getNextChar(fp,&back);
  248. if(c=='&')
  249. {
  250. state = 9;
  251. lexeme[i++] = c;
  252. }
  253. else
  254. {
  255. *error = TRUE;
  256. t = (tokenInfo)malloc(sizeof(tokenInfo));
  257. t->s = TK_ERROR;
  258. lexeme[i] = '\0';
  259. strcpy(t->lexeme,lexeme);
  260. return t;
  261. }
  262. break;
  263. case 9:
  264. c = getNextChar(fp,&back);
  265. if(c=='&')
  266. {
  267. state = 10;
  268. lexeme[i++] =c;
  269. }
  270. else
  271. {
  272. *error = TRUE;
  273. t = (tokenInfo)malloc(sizeof(tokenInfo));
  274. t->s = TK_ERROR;
  275. lexeme[i] = '\0';
  276. strcpy(t->lexeme,lexeme);
  277. return t;
  278. }
  279. break;
  280. case 10:
  281. t = (tokenInfo)malloc(sizeof(tokenInfo));
  282. lexeme[i] = '\0';
  283. strcpy(t->lexeme,lexeme);
  284. t->s = TK_AND;
  285. return t;
  286. break;
  287. case 11:
  288. t = (tokenInfo)malloc(sizeof(tokenInfo));
  289. t->s = TK_OP;
  290. lexeme[i] = '\0';
  291. strcpy(t->lexeme,lexeme);
  292. return t;
  293. break;
  294. case 12:
  295. t = (tokenInfo)malloc(sizeof(tokenInfo));
  296. t->s = TK_CL;
  297. lexeme[i] = '\0';
  298. strcpy(t->lexeme,lexeme);
  299. return t;
  300. break;
  301. case 13:
  302. t = (tokenInfo)malloc(sizeof(tokenInfo));
  303. t->s = TK_MUL;
  304. lexeme[i] = '\0';
  305. strcpy(t->lexeme,lexeme);
  306. return t;
  307. break;
  308. case 14:
  309. t = (tokenInfo)malloc(sizeof(tokenInfo));
  310. t->s = TK_DOT;
  311. lexeme[i] = '\0';
  312. strcpy(t->lexeme,lexeme);
  313. return t;
  314. break;
  315. case 15:
  316. t = (tokenInfo)malloc(sizeof(tokenInfo));
  317. t->s = TK_DIV;
  318. lexeme[i] = '\0';
  319. strcpy(t->lexeme,lexeme);
  320. return t;
  321. break;
  322. case 16:
  323. t = (tokenInfo)malloc(sizeof(tokenInfo));
  324. t->s = TK_COLON;
  325. lexeme[i] = '\0';
  326. strcpy(t->lexeme,lexeme);
  327. return t;
  328. break;
  329. case 17:
  330. t = (tokenInfo)malloc(sizeof(tokenInfo));
  331. t->s = TK_SEM;
  332. lexeme[i] = '\0';
  333. strcpy(t->lexeme,lexeme);
  334. return t;
  335. break;
  336. case 18:
  337. c = getNextChar(fp,&back);
  338. if(c=='@')
  339. {
  340. lexeme[i++] = c;
  341. state = 19;
  342. }
  343. else
  344. {
  345. *error = TRUE;
  346. t = (tokenInfo)malloc(sizeof(tokenInfo));
  347. t->s = TK_ERROR;
  348. lexeme[i] = '\0';
  349. strcpy(t->lexeme,lexeme);
  350. return t;
  351. }
  352. break;
  353. case 19:
  354. c = getNextChar(fp,&back);
  355. if(c=='@')
  356. {
  357. lexeme[i++] = c;
  358. state = 20;
  359. }
  360. else
  361. {
  362. *error = TRUE;
  363. t = (tokenInfo)malloc(sizeof(tokenInfo));
  364. t->s = TK_ERROR;
  365. lexeme[i] = '\0';
  366. strcpy(t->lexeme,lexeme);
  367. return t;
  368. }
  369. break;
  370. case 20:
  371. t = (tokenInfo)malloc(sizeof(tokenInfo));
  372. t->s = TK_OR;
  373. lexeme[i] = '\0';
  374. strcpy(t->lexeme,lexeme);
  375. return t;
  376. break;
  377. case 21:
  378. t = (tokenInfo)malloc(sizeof(tokenInfo));
  379. t->s = TK_SQL;
  380. lexeme[i] = '\0';
  381. strcpy(t->lexeme,lexeme);
  382. return t;
  383. break;
  384. case 22:
  385. c = getNextChar(fp,&back);
  386. if(isdigit(c))
  387. {
  388. state = 22;
  389. lexeme[i++] = c;
  390. }
  391. else if(c=='.')
  392. {
  393. state = 23;
  394. lexeme[i++] = c;
  395. }
  396. else
  397. {
  398. back = 1;
  399. t = (tokenInfo)malloc(sizeof(tokenInfo));
  400. t->s = TK_NUM;
  401. lexeme[i]='\0';
  402. strcpy(t->lexeme,lexeme);
  403. return t;
  404. }
  405. break;
  406. case 23:
  407. c = getNextChar(fp,&back);
  408. if(isdigit(c))
  409. {
  410. state = 24;
  411. lexeme[i++] = c;
  412. }
  413. else
  414. {
  415. *error = TRUE;
  416. t = (tokenInfo)malloc(sizeof(tokenInfo));
  417. t->s = TK_ERROR;
  418. lexeme[i] = '\0';
  419. strcpy(t->lexeme,lexeme);
  420. return t;
  421. }
  422. break;
  423. case 24:
  424. c = getNextChar(fp,&back);
  425. if(isdigit(c))
  426. {
  427. lexeme[i++] = c;
  428. state = 25;
  429. }
  430. else
  431. {
  432. *error = TRUE;
  433. t = (tokenInfo)malloc(sizeof(tokenInfo));
  434. t->s = TK_ERROR;
  435. lexeme[i] = '\0';
  436. strcpy(t->lexeme,lexeme);
  437. return t;
  438. }
  439. break;
  440. case 25:
  441. t = (tokenInfo)malloc(sizeof(tokenInfo));
  442. t->s = TK_RNUM;
  443. lexeme[i]='\0';
  444. strcpy(t->lexeme,lexeme);
  445. return t;
  446. break;
  447. case 26:
  448. c = getNextChar(fp,&back);
  449. if(c>='a'&&c<='z')
  450. {
  451. lexeme[i++] = c;
  452. state = 26;
  453. }
  454. else
  455. {
  456. back = 1;
  457. //t= (tokenInfo)malloc(sizeof(tokenInfo));
  458. lexeme[i]='\0';
  459. t=keywordId(lexeme, kt);
  460. return t;
  461. }
  462. break;
  463. case 27:
  464. c = getNextChar(fp,&back);
  465. if(c>='a'&&c<='z')
  466. {
  467. lexeme[i++] = c;
  468. state = 26;
  469. }
  470. else if(c>='2'&&c<='7')
  471. {
  472. lexeme[i++] = c;
  473. state = 28;
  474. }
  475. else
  476. {
  477. back = 1;
  478. t = (tokenInfo)malloc(sizeof(tokenInfo));
  479. t->s = TK_FIELDID;
  480. lexeme[i]='\0';
  481. strcpy(t->lexeme,lexeme);
  482. return t;
  483. }
  484. break;
  485. case 28:
  486. c = getNextChar(fp,&back);
  487. if(c>='b'&&c<='d')
  488. {
  489. lexeme[i++] = c;
  490. state = 28;
  491. }
  492. else if (c>='2'&&c<='7')
  493. {
  494. lexeme[i++] = c;
  495. state = 29;
  496. }
  497. else
  498. {
  499. back = 1;
  500. t = (tokenInfo)malloc(sizeof(tokenInfo));
  501. t->s = TK_ID;
  502. lexeme[i]='\0';
  503. strcpy(t->lexeme,lexeme);
  504. return t;
  505. }
  506. break;
  507. case 29:
  508. c = getNextChar(fp,&back);
  509. if(c>='2'&&c<='7')
  510. {
  511. lexeme[i++] = c;
  512. state = 29;
  513. }
  514. else
  515. {
  516. back = 1;
  517. t = (tokenInfo)malloc(sizeof(tokenInfo));
  518. t->s = TK_ID;
  519. lexeme[i]='\0';
  520. strcpy(t->lexeme,lexeme);
  521. return t;
  522. }
  523. break;
  524. case 30:
  525. t = (tokenInfo)malloc(sizeof(tokenInfo));
  526. t->s = TK_SQR;
  527. lexeme[i]='\0';
  528. strcpy(t->lexeme,lexeme);
  529. return t;
  530. break;
  531. case 31:
  532. c = getNextChar(fp,&back);
  533. if(isalpha(c))
  534. {
  535. lexeme[i++] =c;
  536. state = 32;
  537. }
  538. else
  539. {
  540. *error = TRUE;
  541. t = (tokenInfo)malloc(sizeof(tokenInfo));
  542. t->s = TK_ERROR;
  543. lexeme[i] = '\0';
  544. strcpy(t->lexeme,lexeme);
  545. return t;
  546. }
  547. break;
  548. case 32:
  549. c = getNextChar(fp,&back);
  550. if(isalpha(c))
  551. {
  552. lexeme[i++] = c;
  553. state = 32;
  554. }
  555. else if(isdigit(c))
  556. {
  557. lexeme[i++] = c;
  558. state = 33;
  559. }
  560. else
  561. {
  562. back = 1;
  563. //t = (tokenInfo)malloc(sizeof(tokenInfo));
  564. lexeme[i]='\0';
  565. t=mainFun(lexeme, kt);
  566. return t;
  567. }
  568. break;
  569. case 33:
  570. c = getNextChar(fp,&back);
  571. if(isdigit(c))
  572. {
  573. lexeme[i++] = c;
  574. state = 33;
  575. }
  576. else
  577. {
  578. back = 1;
  579. t = (tokenInfo)malloc(sizeof(tokenInfo));
  580. t->s = TK_FUNID;
  581. lexeme[i]='\0';
  582. strcpy(t->lexeme,lexeme);
  583. return t;
  584. }
  585. break;
  586. case 34:
  587. t = (tokenInfo)malloc(sizeof(tokenInfo));
  588. t->s = TK_NOT;
  589. lexeme[i]='\0';
  590. strcpy(t->lexeme,lexeme);
  591. return t;
  592. break;
  593. case 35:
  594. t = (tokenInfo)malloc(sizeof(tokenInfo));
  595. t->s = TK_PLUS;
  596. lexeme[i]='\0';
  597. strcpy(t->lexeme,lexeme);
  598. return t;
  599. break;
  600. case 36:
  601. c = getNextChar(fp,&back);
  602. if(c=='-')
  603. {
  604. state = 37;
  605. lexeme[i++]=c;
  606. }
  607. else if(c=='=')
  608. {
  609. state = 40;
  610. lexeme[i++]=c;
  611. }
  612. else
  613. {
  614. t = (tokenInfo)malloc(sizeof(tokenInfo));
  615. t->s = TK_LT;
  616. lexeme[i]='\0';
  617. strcpy(t->lexeme,lexeme);
  618. return t;
  619. }
  620. break;
  621. case 37:
  622. c = getNextChar(fp,&back);
  623. if(c=='-')
  624. {
  625. state = 38;
  626. lexeme[i++] = c;
  627. }
  628. else
  629. {
  630. *error = TRUE;
  631. t = (tokenInfo)malloc(sizeof(tokenInfo));
  632. t->s = TK_ERROR;
  633. lexeme[i] = '\0';
  634. strcpy(t->lexeme,lexeme);
  635. return t;
  636. }
  637. break;
  638. case 38:
  639. c = getNextChar(fp,&back);
  640. if(c=='-')
  641. {
  642. state = 39;
  643. lexeme[i++]=c;
  644. }
  645. else
  646. {
  647. *error = TRUE;
  648. t = (tokenInfo)malloc(sizeof(tokenInfo));
  649. t->s = TK_ERROR;
  650. lexeme[i] = '\0';
  651. strcpy(t->lexeme,lexeme);
  652. return t;
  653. }
  654. break;
  655. case 39:
  656. t = (tokenInfo)malloc(sizeof(tokenInfo));
  657. t->s = TK_ASSIGNOP;
  658. lexeme[i]='\0';
  659. strcpy(t->lexeme,lexeme);
  660. return t;
  661. break;
  662. case 40:
  663. t = (tokenInfo)malloc(sizeof(tokenInfo));
  664. t->s = TK_LE;
  665. lexeme[i]='\0';
  666. strcpy(t->lexeme,lexeme);
  667. return t;
  668. break;
  669. case 41:
  670. c = getNextChar(fp,&back);
  671. if(c=='=')
  672. {
  673. state = 42;
  674. lexeme[i++]=c;
  675. }
  676. else
  677. {
  678. *error = TRUE;
  679. t = (tokenInfo)malloc(sizeof(tokenInfo));
  680. t->s = TK_ERROR;
  681. lexeme[i] = '\0';
  682. strcpy(t->lexeme,lexeme);
  683. return t;
  684. }
  685. break;
  686. case 42:
  687. t = (tokenInfo)malloc(sizeof(tokenInfo));
  688. t->s = TK_EQ;
  689. lexeme[i]='\0';
  690. strcpy(t->lexeme,lexeme);
  691. return t;
  692. break;
  693. case 43:
  694. c = getNextChar(fp,&back);
  695. if(c=='=')
  696. {
  697. state = 44;
  698. lexeme[i++]=c;
  699. }
  700. else
  701. {
  702. t = (tokenInfo)malloc(sizeof(tokenInfo));
  703. t->s = TK_GT;
  704. lexeme[i]='\0';
  705. strcpy(t->lexeme,lexeme);
  706. return t;
  707. }
  708. break;
  709. case 44:
  710. t = (tokenInfo)malloc(sizeof(tokenInfo));
  711. t->s = TK_GE;
  712. lexeme[i]='\0';
  713. strcpy(t->lexeme,lexeme);
  714. return t;
  715. break;
  716. case 45: //WHITESPACE STATE
  717. c = getNextChar(fp,&back);
  718. if(c=='\n')
  719. {
  720. //printf("newline\n");
  721. (*linenumber)++;
  722. state = 45;
  723. }
  724. else if(isspace(c))
  725. {
  726. state = 45;
  727. }
  728. else
  729. {
  730. back = 1;
  731. state = 1;
  732. }
  733. break;
  734. case 46: //COMMENT STATE
  735. c = getNextChar(fp,&back);
  736. if(c=='\n')
  737. {
  738. lexeme[i]='\0';
  739. strcpy(t->lexeme,lexeme);
  740. (*linenumber)++;
  741. return t;
  742. }
  743. else state = 46;
  744. break;
  745. case 47:
  746. t = (tokenInfo)malloc(sizeof(tokenInfo));
  747. t->s = TK_COMMA;
  748. lexeme[i]='\0';
  749. strcpy(t->lexeme,lexeme);
  750. return t;
  751. break;
  752. }
  753. //printf("State = %d c = %c line=%d\n", state,c,*linenumber);
  754. }
  755. }
  756. int getStream(int fp, buffer B, buffersize k)//reads k characters from source file into buffer B
  757. {
  758. /*
  759. int ch, i=0;
  760. for (ch = fgetc(fp); ch != EOF && ch != '\n' && i<k; ch = fgetc(fp))
  761. B[i++] = (char)ch;
  762. return fp;
  763. */
  764. int amtRead=0;
  765. amtRead = read(fp,B,k);
  766. return amtRead;
  767. }
  768. char getNextChar(int fp, int *back)//gets next character from source file at position x
  769. {
  770. static int x=0,k=2000,y;
  771. static char cb1[2000];
  772. static char cb2[2000];
  773. buffer b1=cb1;
  774. buffer b2=cb2;
  775. int flag=0;
  776. x%=k*2;
  777. //printf("x = %d\n",x);
  778. if(*back>=1)
  779. {
  780. x-=*back;
  781. x+=k*2;
  782. x%=k*2;
  783. *back=0;
  784. }
  785. else
  786. {
  787. if(x==0)
  788. {
  789. y=getStream(fp, b1, k);
  790. flag=0;
  791. }
  792. else if(x==k)
  793. {
  794. y=getStream(fp, b2, k);
  795. flag=1;
  796. }
  797. }
  798. if(x>=y+flag*k&&y<k)
  799. {
  800. x++;
  801. return (char)EOF;
  802. }
  803. if(x<k)
  804. return b1[x++];
  805. return b2[x++ - k];
  806. }
  807. void addKeyword(keywordTable kt, char *keyword, symbol s)//recursively called to add keyword to keywordTable
  808. {
  809. int hval,hashkey=48;//twice the no. of keywords
  810. hval=hash(keyword,hashkey);
  811. while(kt[hval].present==TRUE)
  812. hval=(++hval)%hashkey;
  813. strcpy(kt[hval].keyword, keyword);
  814. kt[hval].present=TRUE;
  815. kt[hval].s=s;
  816. }
  817. int hash(char *keyword, int hashkey)//hash function
  818. {
  819. int hash=0,mul=3,i=0;
  820. while(keyword[i]!='\0')
  821. hash=(hash*mul+keyword[i++])%hashkey;
  822. return hash;
  823. }
  824. void initkt(keywordTable kt)//initialize keywordTable with keywords
  825. {
  826. addKeyword(kt, "_main", TK_MAIN );
  827. addKeyword(kt, "call", TK_CALL);
  828. addKeyword(kt, "else", TK_ELSE);
  829. addKeyword(kt, "end", TK_END);
  830. addKeyword(kt, "endif", TK_ENDIF);
  831. addKeyword(kt, "endrecord", TK_ENDRECORD);
  832. addKeyword(kt, "endwhile", TK_ENDWHILE);
  833. addKeyword(kt, "global", TK_GLOBAL);
  834. addKeyword(kt, "if", TK_IF);
  835. addKeyword(kt, "input", TK_INPUT);
  836. addKeyword(kt, "int", TK_INT);
  837. addKeyword(kt, "list", TK_LIST);
  838. addKeyword(kt, "output", TK_OUTPUT);
  839. addKeyword(kt, "parameter", TK_PARAMETER);
  840. addKeyword(kt, "parameters", TK_PARAMETERS);
  841. addKeyword(kt, "read", TK_READ);
  842. addKeyword(kt, "real", TK_REAL);
  843. addKeyword(kt, "record", TK_RECORD);
  844. addKeyword(kt, "return", TK_RETURN);
  845. addKeyword(kt, "then", TK_THEN);
  846. addKeyword(kt, "type", TK_TYPE);
  847. addKeyword(kt, "while", TK_WHILE);
  848. addKeyword(kt, "with", TK_WITH);
  849. addKeyword(kt, "write", TK_WRITE);
  850. }
  851. tokenInfo keywordId(char *lexeme, keywordTable kt)//returns token for lexeme if it is a keyword or fieldname
  852. {
  853. int hval,hashkey=48;//twice the no. of keywords
  854. tokenInfo t = (tokenInfo)malloc(sizeof(tokenInfo));
  855. strcpy(t->lexeme,lexeme);
  856. hval=hash(lexeme,hashkey);
  857. //printf("%s\n", lexeme);
  858. while(1)
  859. {
  860. if(kt[hval].present==FALSE)
  861. {
  862. t->s=TK_FIELDID;
  863. return t;
  864. }
  865. else if(!strcmp(lexeme,kt[hval].keyword))
  866. {
  867. t->s=kt[hval].s;
  868. return t;
  869. }
  870. hval++;
  871. hval=hval%hashkey;
  872. }
  873. }
  874. tokenInfo mainFun(char *lexeme, keywordTable kt)//returns token for lexeme if it is main or function
  875. {
  876. tokenInfo t = (tokenInfo)malloc(sizeof(tokenInfo));
  877. strcpy(t->lexeme,lexeme);
  878. if(!strcmp(lexeme, "_main"))
  879. {
  880. t->s=TK_MAIN;
  881. return t;
  882. }
  883. t->s=TK_FUNID;
  884. return t;
  885. }
  886. char* toStr ( symbol s )
  887. {
  888. switch(s)
  889. {
  890. case TK_AND :
  891. return "TK_AND";
  892. case TK_ASSIGNOP :
  893. return "TK_ASSIGNOP";
  894. case TK_CALL :
  895. return "TK_CALL";
  896. case TK_CL :
  897. return "TK_CL";
  898. case TK_COLON :
  899. return "TK_COLON";
  900. case TK_COMMENT :
  901. return "TK_COMMENT";
  902. case TK_DIV :
  903. return "TK_DIV";
  904. case TK_DOT :
  905. return "TK_DOT";
  906. case TK_ELSE :
  907. return "TK_ELSE";
  908. case TK_END :
  909. return "TK_END";
  910. case TK_ENDIF :
  911. return "TK_ENDIF";
  912. case TK_ENDRECORD :
  913. return "TK_ENDRECORD";
  914. case TK_ENDWHILE :
  915. return "TK_ENDWHILE";
  916. case TK_EQ :
  917. return "TK_EQ";
  918. case TK_FIELDID :
  919. return "TK_FIELDID";
  920. case TK_FUNID :
  921. return "TK_FUNID";
  922. case TK_GE :
  923. return "TK_GE";
  924. case TK_GLOBAL :
  925. return "TK_GLOBAL";
  926. case TK_GT :
  927. return "TK_GT";
  928. case TK_ID :
  929. return "TK_ID";
  930. case TK_IF :
  931. return "TK_IF";
  932. case TK_INPUT :
  933. return "TK_INPUT";
  934. case TK_INT :
  935. return "TK_INT";
  936. case TK_LE :
  937. return "TK_LE";
  938. case TK_LIST :
  939. return "TK_LIST";
  940. case TK_LT :
  941. return "TK_LT";
  942. case TK_MAIN :
  943. return "TK_MAIN";
  944. case TK_MINUS :
  945. return "TK_MINUS";
  946. case TK_MUL :
  947. return "TK_MUL";
  948. case TK_NE :
  949. return "TK_NE";
  950. case TK_NOT :
  951. return "TK_NOT";
  952. case TK_NUM :
  953. return "TK_NUM";
  954. case TK_OP :
  955. return "TK_OP";
  956. case TK_OR :
  957. return "TK_OR";
  958. case TK_OUTPUT :
  959. return "TK_OUTPUT";
  960. case TK_PARAMETER :
  961. return "TK_PARAMETER";
  962. case TK_PARAMETERS :
  963. return "TK_PARAMETERS";
  964. case TK_PLUS :
  965. return "TK_PLUS";
  966. case TK_READ :
  967. return "TK_READ";
  968. case TK_REAL :
  969. return "TK_REAL";
  970. case TK_RECORD :
  971. return "TK_RECORD";
  972. case TK_RECORDID :
  973. return "TK_RECORDID";
  974. case TK_RETURN :
  975. return "TK_RETURN";
  976. case TK_RNUM :
  977. return "TK_RNUM";
  978. case TK_SEM :
  979. return "TK_SEM";
  980. case TK_SQL :
  981. return "TK_SQL";
  982. case TK_SQR :
  983. return "TK_SQR";
  984. case TK_THEN :
  985. return "TK_THEN";
  986. case TK_TYPE :
  987. return "TK_TYPE";
  988. case TK_WHILE :
  989. return "TK_WHILE";
  990. case TK_WITH :
  991. return "TK_WITH";
  992. case TK_WRITE :
  993. return "TK_WRITE";
  994. case TK_COMMA :
  995. return "TK_COMMA";
  996. case program :
  997. return "program";
  998. case mainfunction :
  999. return "mainfunction";
  1000. case otherfunctions :
  1001. return "otherfunctions";
  1002. case function :
  1003. return "function";
  1004. case input_par :
  1005. return "input_par";
  1006. case output_par :
  1007. return "output_par";
  1008. case constructeddatatype :
  1009. return "constructeddatatype";
  1010. case remaining_list :
  1011. return "remaining_list";
  1012. case parameter_list :
  1013. return "parameter_list";
  1014. case stmts :
  1015. return "stmts";
  1016. case typedefinitions :
  1017. return "typedefinitions";
  1018. case typedefinition :
  1019. return "typedefinition";
  1020. case fielddefinitions :
  1021. return "fielddefinitions";
  1022. case primitivedatatype :
  1023. return "primitivedatatype";
  1024. case fielddefinition :
  1025. return "fielddefinition";
  1026. case morefields :
  1027. return "morefields";
  1028. case declarations :
  1029. return "declarations";
  1030. case declaration :
  1031. return "declaration";
  1032. case datatype :
  1033. return "datatype";
  1034. case global_or_not :
  1035. return "global_or_not";
  1036. case assignmentstmt :
  1037. return "assignmentstmt";
  1038. case funcallstmt :
  1039. return "funcallstmt";
  1040. case outputparameters :
  1041. return "outputparameters";
  1042. case inputparameters :
  1043. return "inputparameters";
  1044. case iterativestmt :
  1045. return "iterativestmt";
  1046. case conditionalstmt :
  1047. return "conditionalstmt";
  1048. case elsepart :
  1049. return "elsepart";
  1050. case stmt :
  1051. return "stmt";
  1052. case otherstmts :
  1053. return "otherstmts";
  1054. case iostmt :
  1055. return "iostmt";
  1056. case singleorrecid :
  1057. return "singleorrecid";
  1058. case allvar :
  1059. return "allvar";
  1060. case expprime :
  1061. return "expprime";
  1062. case term :
  1063. return "term";
  1064. case termprime :
  1065. return "termprime";
  1066. case factor :
  1067. return "factor";
  1068. case arithmeticexpression :
  1069. return "arithmeticexpression";
  1070. case highprecedenceoperators :
  1071. return "highprecedenceoperators";
  1072. case lowprecedenceoperators :
  1073. return "lowprecedenceoperators";
  1074. case all :
  1075. return "all";
  1076. case temp :
  1077. return "temp";
  1078. case booleanexpression :
  1079. return "booleanexpression";
  1080. case var :
  1081. return "var";
  1082. case logicalop :
  1083. return "logicalop";
  1084. case relationalop :
  1085. return "relationalop";
  1086. case returnstmt :
  1087. return "returnstmt";
  1088. case optionalreturn :
  1089. return "optionalreturn";
  1090. case more_ids :
  1091. return "more_ids";
  1092. case newstate :
  1093. return "newstate";
  1094. case idlist :
  1095. return "idlist";
  1096. case TK_EPS :
  1097. return "eps";
  1098. default:
  1099. return "INVALID";
  1100. }
  1101. }
  1102. tokenList createTokenList(int fp, keywordTable kt)//create Token List
  1103. {
  1104. int linenumber = 1,q;
  1105. bool error = 0;
  1106. tokenInfo t;
  1107. tokenList list,curr=NULL;
  1108. while(1)
  1109. {
  1110. t = getNextToken(fp,kt,&error,&linenumber);
  1111. //printf("LEXEME:%s\n",t->lexeme);
  1112. if(t==NULL)
  1113. {
  1114. break;
  1115. }
  1116. tokenList temp=(tokenList)malloc(sizeof(tokenList));
  1117. if(curr==NULL)
  1118. {
  1119. curr=temp;
  1120. list=temp;
  1121. }
  1122. else
  1123. {
  1124. curr->next=temp;
  1125. curr=curr->next;
  1126. }
  1127. curr->t=t;
  1128. if(t->s==TK_COMMENT)curr->linenumber=linenumber-1;
  1129. else curr->linenumber=linenumber;
  1130. curr->next=NULL;
  1131. if(error)
  1132. break;
  1133. }
  1134. return list;
  1135. }
  1136. void printTokenList(keywordTable kt, tokenList list)//print Token List
  1137. {
  1138. int q;
  1139. while(list!=NULL)
  1140. {
  1141. //printf("LEXEME:%s\n",list->t->lexeme);
  1142. if(list->t->s==TK_ERROR)
  1143. {
  1144. printf("ERROR_3: Unknown pattern %s at line %d\n", list->t->lexeme, list->linenumber);
  1145. any_error=1;
  1146. break;
  1147. }
  1148. else if(list->t->s==TK_ERROR2)
  1149. {
  1150. printf("ERROR_2: Unknown Symbol %s at line %d\n", list->t->lexeme, list->linenumber);
  1151. any_error=1;
  1152. break;
  1153. }
  1154. else if((strlen(list->t->lexeme) > 30 && list->t->s==TK_FUNID) || (strlen(list->t->lexeme) > 20 && list->t->s!=TK_FUNID) || list->t->lexeme[strlen(list->t->lexeme)-1] =='!')
  1155. {
  1156. q=20;
  1157. //printf("sdgdg\n");
  1158. if(list->t->s==TK_FUNID)
  1159. q=30;
  1160. printf("ERROR_1 : Identifier at line %d is longer than the prescribed length of %d characters\n", list->linenumber,q);
  1161. any_error=1;
  1162. break;
  1163. }
  1164. //else
  1165. // printf("%s\n",list->t->lexeme);
  1166. printf("%-20s%-30s%-10d\n",toStr(list->t->s), list->t->lexeme, list->linenumber);
  1167. list=list->next;
  1168. }
  1169. }