/usr.bin/indent/lexi.c

https://bitbucket.org/freebsd/freebsd-head/ · C · 608 lines · 498 code · 24 blank · 86 comment · 133 complexity · 1e1d54d7a176987fd818c04e271d489a MD5 · raw file

  1. /*
  2. * Copyright (c) 1985 Sun Microsystems, Inc.
  3. * Copyright (c) 1980, 1993
  4. * The Regents of the University of California. All rights reserved.
  5. * All rights reserved.
  6. *
  7. * Redistribution and use in source and binary forms, with or without
  8. * modification, are permitted provided that the following conditions
  9. * are met:
  10. * 1. Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * 2. Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in the
  14. * documentation and/or other materials provided with the distribution.
  15. * 3. All advertising materials mentioning features or use of this software
  16. * must display the following acknowledgement:
  17. * This product includes software developed by the University of
  18. * California, Berkeley and its contributors.
  19. * 4. Neither the name of the University nor the names of its contributors
  20. * may be used to endorse or promote products derived from this software
  21. * without specific prior written permission.
  22. *
  23. * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  24. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  25. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  26. * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  27. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  28. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  29. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  30. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  31. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  32. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  33. * SUCH DAMAGE.
  34. */
  35. #if 0
  36. #ifndef lint
  37. static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93";
  38. #endif /* not lint */
  39. #endif
  40. #include <sys/cdefs.h>
  41. __FBSDID("$FreeBSD$");
  42. /*
  43. * Here we have the token scanner for indent. It scans off one token and puts
  44. * it in the global variable "token". It returns a code, indicating the type
  45. * of token scanned.
  46. */
  47. #include <err.h>
  48. #include <stdio.h>
  49. #include <ctype.h>
  50. #include <stdlib.h>
  51. #include <string.h>
  52. #include "indent_globs.h"
  53. #include "indent_codes.h"
  54. #include "indent.h"
  55. #define alphanum 1
  56. #define opchar 3
  57. struct templ {
  58. const char *rwd;
  59. int rwcode;
  60. };
  61. struct templ specials[1000] =
  62. {
  63. {"switch", 1},
  64. {"case", 2},
  65. {"break", 0},
  66. {"struct", 3},
  67. {"union", 3},
  68. {"enum", 3},
  69. {"default", 2},
  70. {"int", 4},
  71. {"char", 4},
  72. {"float", 4},
  73. {"double", 4},
  74. {"long", 4},
  75. {"short", 4},
  76. {"typdef", 4},
  77. {"unsigned", 4},
  78. {"register", 4},
  79. {"static", 4},
  80. {"global", 4},
  81. {"extern", 4},
  82. {"void", 4},
  83. {"const", 4},
  84. {"volatile", 4},
  85. {"goto", 0},
  86. {"return", 0},
  87. {"if", 5},
  88. {"while", 5},
  89. {"for", 5},
  90. {"else", 6},
  91. {"do", 6},
  92. {"sizeof", 7},
  93. {0, 0}
  94. };
  95. char chartype[128] =
  96. { /* this is used to facilitate the decision of
  97. * what type (alphanumeric, operator) each
  98. * character is */
  99. 0, 0, 0, 0, 0, 0, 0, 0,
  100. 0, 0, 0, 0, 0, 0, 0, 0,
  101. 0, 0, 0, 0, 0, 0, 0, 0,
  102. 0, 0, 0, 0, 0, 0, 0, 0,
  103. 0, 3, 0, 0, 1, 3, 3, 0,
  104. 0, 0, 3, 3, 0, 3, 0, 3,
  105. 1, 1, 1, 1, 1, 1, 1, 1,
  106. 1, 1, 0, 0, 3, 3, 3, 3,
  107. 0, 1, 1, 1, 1, 1, 1, 1,
  108. 1, 1, 1, 1, 1, 1, 1, 1,
  109. 1, 1, 1, 1, 1, 1, 1, 1,
  110. 1, 1, 1, 0, 0, 0, 3, 1,
  111. 0, 1, 1, 1, 1, 1, 1, 1,
  112. 1, 1, 1, 1, 1, 1, 1, 1,
  113. 1, 1, 1, 1, 1, 1, 1, 1,
  114. 1, 1, 1, 0, 3, 0, 3, 0
  115. };
  116. int
  117. lexi(void)
  118. {
  119. int unary_delim; /* this is set to 1 if the current token
  120. * forces a following operator to be unary */
  121. static int last_code; /* the last token type returned */
  122. static int l_struct; /* set to 1 if the last token was 'struct' */
  123. int code; /* internal code to be returned */
  124. char qchar; /* the delimiter character for a string */
  125. e_token = s_token; /* point to start of place to save token */
  126. unary_delim = false;
  127. ps.col_1 = ps.last_nl; /* tell world that this token started in
  128. * column 1 iff the last thing scanned was nl */
  129. ps.last_nl = false;
  130. while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
  131. ps.col_1 = false; /* leading blanks imply token is not in column
  132. * 1 */
  133. if (++buf_ptr >= buf_end)
  134. fill_buffer();
  135. }
  136. /* Scan an alphanumeric token */
  137. if (chartype[(int)*buf_ptr] == alphanum || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
  138. /*
  139. * we have a character or number
  140. */
  141. const char *j; /* used for searching thru list of
  142. *
  143. * reserved words */
  144. struct templ *p;
  145. if (isdigit(*buf_ptr) || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
  146. int seendot = 0,
  147. seenexp = 0,
  148. seensfx = 0;
  149. if (*buf_ptr == '0' &&
  150. (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
  151. *e_token++ = *buf_ptr++;
  152. *e_token++ = *buf_ptr++;
  153. while (isxdigit(*buf_ptr)) {
  154. CHECK_SIZE_TOKEN;
  155. *e_token++ = *buf_ptr++;
  156. }
  157. }
  158. else
  159. while (1) {
  160. if (*buf_ptr == '.') {
  161. if (seendot)
  162. break;
  163. else
  164. seendot++;
  165. }
  166. CHECK_SIZE_TOKEN;
  167. *e_token++ = *buf_ptr++;
  168. if (!isdigit(*buf_ptr) && *buf_ptr != '.') {
  169. if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
  170. break;
  171. else {
  172. seenexp++;
  173. seendot++;
  174. CHECK_SIZE_TOKEN;
  175. *e_token++ = *buf_ptr++;
  176. if (*buf_ptr == '+' || *buf_ptr == '-')
  177. *e_token++ = *buf_ptr++;
  178. }
  179. }
  180. }
  181. while (1) {
  182. if (!(seensfx & 1) &&
  183. (*buf_ptr == 'U' || *buf_ptr == 'u')) {
  184. CHECK_SIZE_TOKEN;
  185. *e_token++ = *buf_ptr++;
  186. seensfx |= 1;
  187. continue;
  188. }
  189. if (!(seensfx & 2) &&
  190. (*buf_ptr == 'L' || *buf_ptr == 'l')) {
  191. CHECK_SIZE_TOKEN;
  192. if (buf_ptr[1] == buf_ptr[0])
  193. *e_token++ = *buf_ptr++;
  194. *e_token++ = *buf_ptr++;
  195. seensfx |= 2;
  196. continue;
  197. }
  198. break;
  199. }
  200. }
  201. else
  202. while (chartype[(int)*buf_ptr] == alphanum || *buf_ptr == BACKSLASH) {
  203. /* fill_buffer() terminates buffer with newline */
  204. if (*buf_ptr == BACKSLASH) {
  205. if (*(buf_ptr + 1) == '\n') {
  206. buf_ptr += 2;
  207. if (buf_ptr >= buf_end)
  208. fill_buffer();
  209. } else
  210. break;
  211. }
  212. CHECK_SIZE_TOKEN;
  213. /* copy it over */
  214. *e_token++ = *buf_ptr++;
  215. if (buf_ptr >= buf_end)
  216. fill_buffer();
  217. }
  218. *e_token++ = '\0';
  219. while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
  220. if (++buf_ptr >= buf_end)
  221. fill_buffer();
  222. }
  223. ps.its_a_keyword = false;
  224. ps.sizeof_keyword = false;
  225. if (l_struct && !ps.p_l_follow) {
  226. /* if last token was 'struct' and we're not
  227. * in parentheses, then this token
  228. * should be treated as a declaration */
  229. l_struct = false;
  230. last_code = ident;
  231. ps.last_u_d = true;
  232. return (decl);
  233. }
  234. ps.last_u_d = l_struct; /* Operator after identifier is binary
  235. * unless last token was 'struct' */
  236. l_struct = false;
  237. last_code = ident; /* Remember that this is the code we will
  238. * return */
  239. if (auto_typedefs) {
  240. const char *q = s_token;
  241. size_t q_len = strlen(q);
  242. /* Check if we have an "_t" in the end */
  243. if (q_len > 2 &&
  244. (strcmp(q + q_len - 2, "_t") == 0)) {
  245. ps.its_a_keyword = true;
  246. ps.last_u_d = true;
  247. goto found_auto_typedef;
  248. }
  249. }
  250. /*
  251. * This loop will check if the token is a keyword.
  252. */
  253. for (p = specials; (j = p->rwd) != 0; p++) {
  254. const char *q = s_token; /* point at scanned token */
  255. if (*j++ != *q++ || *j++ != *q++)
  256. continue; /* This test depends on the fact that
  257. * identifiers are always at least 1 character
  258. * long (ie. the first two bytes of the
  259. * identifier are always meaningful) */
  260. if (q[-1] == 0)
  261. break; /* If its a one-character identifier */
  262. while (*q++ == *j)
  263. if (*j++ == 0)
  264. goto found_keyword; /* I wish that C had a multi-level
  265. * break... */
  266. }
  267. if (p->rwd) { /* we have a keyword */
  268. found_keyword:
  269. ps.its_a_keyword = true;
  270. ps.last_u_d = true;
  271. switch (p->rwcode) {
  272. case 1: /* it is a switch */
  273. return (swstmt);
  274. case 2: /* a case or default */
  275. return (casestmt);
  276. case 3: /* a "struct" */
  277. /*
  278. * Next time around, we will want to know that we have had a
  279. * 'struct'
  280. */
  281. l_struct = true;
  282. /* FALLTHROUGH */
  283. case 4: /* one of the declaration keywords */
  284. found_auto_typedef:
  285. if (ps.p_l_follow) {
  286. ps.cast_mask |= (1 << ps.p_l_follow) & ~ps.sizeof_mask;
  287. break; /* inside parens: cast, param list or sizeof */
  288. }
  289. last_code = decl;
  290. return (decl);
  291. case 5: /* if, while, for */
  292. return (sp_paren);
  293. case 6: /* do, else */
  294. return (sp_nparen);
  295. case 7:
  296. ps.sizeof_keyword = true;
  297. default: /* all others are treated like any other
  298. * identifier */
  299. return (ident);
  300. } /* end of switch */
  301. } /* end of if (found_it) */
  302. if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
  303. char *tp = buf_ptr;
  304. while (tp < buf_end)
  305. if (*tp++ == ')' && (*tp == ';' || *tp == ','))
  306. goto not_proc;
  307. strncpy(ps.procname, token, sizeof ps.procname - 1);
  308. ps.in_parameter_declaration = 1;
  309. rparen_count = 1;
  310. not_proc:;
  311. }
  312. /*
  313. * The following hack attempts to guess whether or not the current
  314. * token is in fact a declaration keyword -- one that has been
  315. * typedefd
  316. */
  317. if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
  318. && !ps.p_l_follow
  319. && !ps.block_init
  320. && (ps.last_token == rparen || ps.last_token == semicolon ||
  321. ps.last_token == decl ||
  322. ps.last_token == lbrace || ps.last_token == rbrace)) {
  323. ps.its_a_keyword = true;
  324. ps.last_u_d = true;
  325. last_code = decl;
  326. return decl;
  327. }
  328. if (last_code == decl) /* if this is a declared variable, then
  329. * following sign is unary */
  330. ps.last_u_d = true; /* will make "int a -1" work */
  331. last_code = ident;
  332. return (ident); /* the ident is not in the list */
  333. } /* end of procesing for alpanum character */
  334. /* Scan a non-alphanumeric token */
  335. *e_token++ = *buf_ptr; /* if it is only a one-character token, it is
  336. * moved here */
  337. *e_token = '\0';
  338. if (++buf_ptr >= buf_end)
  339. fill_buffer();
  340. switch (*token) {
  341. case '\n':
  342. unary_delim = ps.last_u_d;
  343. ps.last_nl = true; /* remember that we just had a newline */
  344. code = (had_eof ? 0 : newline);
  345. /*
  346. * if data has been exhausted, the newline is a dummy, and we should
  347. * return code to stop
  348. */
  349. break;
  350. case '\'': /* start of quoted character */
  351. case '"': /* start of string */
  352. qchar = *token;
  353. if (troff) {
  354. e_token[-1] = '`';
  355. if (qchar == '"')
  356. *e_token++ = '`';
  357. e_token = chfont(&bodyf, &stringf, e_token);
  358. }
  359. do { /* copy the string */
  360. while (1) { /* move one character or [/<char>]<char> */
  361. if (*buf_ptr == '\n') {
  362. diag2(1, "Unterminated literal");
  363. goto stop_lit;
  364. }
  365. CHECK_SIZE_TOKEN; /* Only have to do this once in this loop,
  366. * since CHECK_SIZE guarantees that there
  367. * are at least 5 entries left */
  368. *e_token = *buf_ptr++;
  369. if (buf_ptr >= buf_end)
  370. fill_buffer();
  371. if (*e_token == BACKSLASH) { /* if escape, copy extra char */
  372. if (*buf_ptr == '\n') /* check for escaped newline */
  373. ++line_no;
  374. if (troff) {
  375. *++e_token = BACKSLASH;
  376. if (*buf_ptr == BACKSLASH)
  377. *++e_token = BACKSLASH;
  378. }
  379. *++e_token = *buf_ptr++;
  380. ++e_token; /* we must increment this again because we
  381. * copied two chars */
  382. if (buf_ptr >= buf_end)
  383. fill_buffer();
  384. }
  385. else
  386. break; /* we copied one character */
  387. } /* end of while (1) */
  388. } while (*e_token++ != qchar);
  389. if (troff) {
  390. e_token = chfont(&stringf, &bodyf, e_token - 1);
  391. if (qchar == '"')
  392. *e_token++ = '\'';
  393. }
  394. stop_lit:
  395. code = ident;
  396. break;
  397. case ('('):
  398. case ('['):
  399. unary_delim = true;
  400. code = lparen;
  401. break;
  402. case (')'):
  403. case (']'):
  404. code = rparen;
  405. break;
  406. case '#':
  407. unary_delim = ps.last_u_d;
  408. code = preesc;
  409. break;
  410. case '?':
  411. unary_delim = true;
  412. code = question;
  413. break;
  414. case (':'):
  415. code = colon;
  416. unary_delim = true;
  417. break;
  418. case (';'):
  419. unary_delim = true;
  420. code = semicolon;
  421. break;
  422. case ('{'):
  423. unary_delim = true;
  424. /*
  425. * if (ps.in_or_st) ps.block_init = 1;
  426. */
  427. /* ? code = ps.block_init ? lparen : lbrace; */
  428. code = lbrace;
  429. break;
  430. case ('}'):
  431. unary_delim = true;
  432. /* ? code = ps.block_init ? rparen : rbrace; */
  433. code = rbrace;
  434. break;
  435. case 014: /* a form feed */
  436. unary_delim = ps.last_u_d;
  437. ps.last_nl = true; /* remember this so we can set 'ps.col_1'
  438. * right */
  439. code = form_feed;
  440. break;
  441. case (','):
  442. unary_delim = true;
  443. code = comma;
  444. break;
  445. case '.':
  446. unary_delim = false;
  447. code = period;
  448. break;
  449. case '-':
  450. case '+': /* check for -, +, --, ++ */
  451. code = (ps.last_u_d ? unary_op : binary_op);
  452. unary_delim = true;
  453. if (*buf_ptr == token[0]) {
  454. /* check for doubled character */
  455. *e_token++ = *buf_ptr++;
  456. /* buffer overflow will be checked at end of loop */
  457. if (last_code == ident || last_code == rparen) {
  458. code = (ps.last_u_d ? unary_op : postop);
  459. /* check for following ++ or -- */
  460. unary_delim = false;
  461. }
  462. }
  463. else if (*buf_ptr == '=')
  464. /* check for operator += */
  465. *e_token++ = *buf_ptr++;
  466. else if (*buf_ptr == '>') {
  467. /* check for operator -> */
  468. *e_token++ = *buf_ptr++;
  469. if (!pointer_as_binop) {
  470. unary_delim = false;
  471. code = unary_op;
  472. ps.want_blank = false;
  473. }
  474. }
  475. break; /* buffer overflow will be checked at end of
  476. * switch */
  477. case '=':
  478. if (ps.in_or_st)
  479. ps.block_init = 1;
  480. #ifdef undef
  481. if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */
  482. e_token[-1] = *buf_ptr++;
  483. if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
  484. *e_token++ = *buf_ptr++;
  485. *e_token++ = '='; /* Flip =+ to += */
  486. *e_token = 0;
  487. }
  488. #else
  489. if (*buf_ptr == '=') {/* == */
  490. *e_token++ = '='; /* Flip =+ to += */
  491. buf_ptr++;
  492. *e_token = 0;
  493. }
  494. #endif
  495. code = binary_op;
  496. unary_delim = true;
  497. break;
  498. /* can drop thru!!! */
  499. case '>':
  500. case '<':
  501. case '!': /* ops like <, <<, <=, !=, etc */
  502. if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
  503. *e_token++ = *buf_ptr;
  504. if (++buf_ptr >= buf_end)
  505. fill_buffer();
  506. }
  507. if (*buf_ptr == '=')
  508. *e_token++ = *buf_ptr++;
  509. code = (ps.last_u_d ? unary_op : binary_op);
  510. unary_delim = true;
  511. break;
  512. default:
  513. if (token[0] == '/' && *buf_ptr == '*') {
  514. /* it is start of comment */
  515. *e_token++ = '*';
  516. if (++buf_ptr >= buf_end)
  517. fill_buffer();
  518. code = comment;
  519. unary_delim = ps.last_u_d;
  520. break;
  521. }
  522. while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
  523. /*
  524. * handle ||, &&, etc, and also things as in int *****i
  525. */
  526. *e_token++ = *buf_ptr;
  527. if (++buf_ptr >= buf_end)
  528. fill_buffer();
  529. }
  530. code = (ps.last_u_d ? unary_op : binary_op);
  531. unary_delim = true;
  532. } /* end of switch */
  533. if (code != newline) {
  534. l_struct = false;
  535. last_code = code;
  536. }
  537. if (buf_ptr >= buf_end) /* check for input buffer empty */
  538. fill_buffer();
  539. ps.last_u_d = unary_delim;
  540. *e_token = '\0'; /* null terminate the token */
  541. return (code);
  542. }
  543. /*
  544. * Add the given keyword to the keyword table, using val as the keyword type
  545. */
  546. void
  547. addkey(char *key, int val)
  548. {
  549. struct templ *p = specials;
  550. while (p->rwd)
  551. if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
  552. return;
  553. else
  554. p++;
  555. if (p >= specials + sizeof specials / sizeof specials[0])
  556. return; /* For now, table overflows are silently
  557. * ignored */
  558. p->rwd = key;
  559. p->rwcode = val;
  560. p[1].rwd = 0;
  561. p[1].rwcode = 0;
  562. }