PageRenderTime 43ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 1ms

/graphviz-cmake/cmd/lefty/dot2l/dotlex.c

https://bitbucket.org/akristmann/custom_build
C | 360 lines | 299 code | 35 blank | 26 comment | 121 complexity | 7c67f75dfad7ad367b5c7681a9121a17 MD5 | raw file
Possible License(s): MPL-2.0-no-copyleft-exception, EPL-1.0, CPL-1.0, BSD-3-Clause, LGPL-2.1
  1. /* $Id: dotlex.c,v 1.8 2011/01/25 16:30:46 ellson Exp $ $Revision: 1.8 $ */
  2. /* vim:set shiftwidth=4 ts=8: */
  3. /*************************************************************************
  4. * Copyright (c) 2011 AT&T Intellectual Property
  5. * All rights reserved. This program and the accompanying materials
  6. * are made available under the terms of the Eclipse Public License v1.0
  7. * which accompanies this distribution, and is available at
  8. * http://www.eclipse.org/legal/epl-v10.html
  9. *
  10. * Contributors: See CVS logs. Details at http://www.graphviz.org/
  11. *************************************************************************/
  12. /* the graph lexer */
  13. typedef void *Tobj;
  14. #include "common.h"
  15. #include "dotparse.h"
  16. #include "dot2l.h"
  17. #include "io.h"
  18. #include "triefa.c"
  19. static int syntax_errors;
  20. static int lexer_fd;
  21. #define LEXBUFSIZ 10240
  22. static char *lexbuf, *lexptr;
  23. static int lexsiz;
  24. static int in_comment;
  25. static int comment_start;
  26. int line_number;
  27. static char *lex_gets (int);
  28. static int lex_token (char *);
  29. static void error_context (void);
  30. static char *skip_wscomments (char *);
  31. static char *scan_token (char *);
  32. static char *scan_num (char *);
  33. static char *quoted_string (char *);
  34. static char *html_string (char *);
  35. int lex_begin (int ioi) {
  36. lexer_fd = ioi;
  37. lexptr = NULL;
  38. if (!(lexbuf = malloc (LEXBUFSIZ))) {
  39. fprintf (stderr, "cannot allocate buffer\n");
  40. return -1;
  41. }
  42. lexsiz = LEXBUFSIZ;
  43. return 0;
  44. }
  45. int myyylex (void) { /* for debugging */
  46. int rv = myyylex ();
  47. fprintf (stderr, "returning %d\n", rv);
  48. if (rv == T_id)
  49. fprintf (stderr, "string val is %s\n", yylval.s);
  50. return rv;
  51. }
  52. int yylex (void) {
  53. int token;
  54. char *p;
  55. /* if the parser has accepted a graph, reset and return EOF */
  56. if (yaccdone) {
  57. yaccdone = FALSE;
  58. return EOF;
  59. }
  60. /* get a nonempty lex buffer */
  61. do {
  62. if ((lexptr == NULL) || (lexptr[0] == '\0'))
  63. if ((lexptr = lex_gets (0)) == NULL) {
  64. if (in_comment)
  65. fprintf (
  66. stderr,
  67. "warning, nonterminated comment in line %d\n",
  68. comment_start
  69. );
  70. return EOF;
  71. }
  72. lexptr = skip_wscomments (lexptr);
  73. } while (lexptr[0] == '\0');
  74. /* scan quoted strings */
  75. if (lexptr[0] == '\"') {
  76. lexptr = quoted_string (lexptr);
  77. yylval.s = (char *) strdup (lexbuf);
  78. return T_id;
  79. }
  80. /* scan html strings */
  81. if (lexptr[0] == '<') {
  82. lexptr = html_string (lexptr);
  83. yylval.s = (char *) strdup (lexbuf);
  84. return T_id;
  85. }
  86. /* scan edge operator */
  87. if (etype && (strncmp (lexptr, etype, strlen (etype)) == 0)) {
  88. lexptr += strlen (etype);
  89. return T_edgeop;
  90. }
  91. /* scan numbers */
  92. if ((p = scan_num (lexptr))) {
  93. lexptr = p;
  94. yylval.s = strdup (lexbuf);
  95. return T_id;
  96. }
  97. else {
  98. if (ispunct (lexptr[0]) && (lexptr[0] != '_'))
  99. return *lexptr++;
  100. else
  101. lexptr = scan_token (lexptr);
  102. }
  103. /* scan other tokens */
  104. token = lex_token (lexbuf);
  105. if (token == -1) {
  106. yylval.s = strdup (lexbuf);
  107. token = T_id;
  108. }
  109. return token;
  110. }
  111. void
  112. yyerror (char *fmt, char *s) {
  113. if (syntax_errors++)
  114. return;
  115. fprintf (stderr, "graph parser: ");
  116. fprintf (stderr, fmt, s);
  117. fprintf (stderr, " near line %d\n", line_number);
  118. error_context ();
  119. }
  120. static char *lex_gets (int curlen) {
  121. char *clp;
  122. int len;
  123. do {
  124. /* off by one so we can back up in LineBuf */
  125. if (IOreadline (
  126. lexer_fd, lexbuf + curlen + 1, lexsiz - curlen - 1
  127. ) == -1)
  128. break;
  129. clp = lexbuf + curlen + 1;
  130. len = strlen (clp);
  131. clp[len++] = '\n';
  132. clp[len] = 0;
  133. if (clp == lexbuf + 1 && clp[0] == '#') {
  134. /* comment line or cpp line sync */
  135. if (sscanf (clp+1, "%d", &line_number) == 0)
  136. line_number++;
  137. len = 0;
  138. clp[len] = 0;
  139. continue;
  140. }
  141. line_number++;
  142. if ((len = strlen (clp)) > 1) {
  143. if (clp[len - 2] == '\\') {
  144. len = len - 2;
  145. clp[len] = '\0';
  146. }
  147. }
  148. curlen += len;
  149. if (lexsiz - curlen - 1 < 1024) {
  150. if (!(lexbuf = realloc (lexbuf, lexsiz * 2))) {
  151. fprintf (stderr, "cannot grow buffer\n");
  152. return NULL;
  153. }
  154. lexsiz *= 2;
  155. }
  156. } while (clp[len - 1] != '\n');
  157. if (curlen > 0)
  158. return lexbuf + 1;
  159. else
  160. return NULL;
  161. }
  162. static int lex_token (char *p) {
  163. TFA_Init ();
  164. while (*p)
  165. TFA_Advance (*p++);
  166. return
  167. TFA_Definition ();
  168. }
  169. static void error_context (void) {
  170. char *p, *q;
  171. if (lexptr == NULL)
  172. return;
  173. fprintf (stderr, "context: ");
  174. for (p = lexptr - 1; (p > lexbuf) && (isspace (*p) == FALSE); p--)
  175. ;
  176. for (q = lexbuf; q < p; q++)
  177. fputc (*q, stderr);
  178. fputs (" >>> ", stderr);
  179. for (; q < lexptr; q++)
  180. fputc (*q, stderr);
  181. fputs (" <<< ", stderr);
  182. fputs (lexptr, stderr);
  183. }
  184. /* i wrote this and it still frightens me */
  185. /* skip white space and comments in p */
  186. static char *skip_wscomments (char *p) {
  187. do {
  188. while (isspace (*p))
  189. p++;
  190. while (in_comment && p[0]) {
  191. while (p[0] && (p[0] != '*'))
  192. p++;
  193. if (p[0]) {
  194. if (p[1] == '/') {
  195. in_comment = FALSE;
  196. p += 2;
  197. break;
  198. } else
  199. p++;
  200. }
  201. }
  202. if (p[0] == '/') {
  203. if (p[1] == '/')
  204. while (*p)
  205. p++; /* skip to end of line */
  206. else {
  207. if (p[1] == '*') {
  208. in_comment = TRUE;
  209. comment_start = line_number;
  210. p += 2;
  211. continue;
  212. }
  213. else
  214. break; /* return a slash */
  215. }
  216. } else {
  217. if (!isspace (*p))
  218. break;
  219. }
  220. } while (p[0]);
  221. return p;
  222. }
  223. /* scan an unquoted token and return the position after its terminator */
  224. static char *scan_token (char *p) {
  225. char *q;
  226. q = lexbuf;
  227. if (p == '\0')
  228. return NULL;
  229. while (isalnum (*p) || (*p == '_') || (!isascii (*p)))
  230. *q++ = *p++;
  231. *q = '\0';
  232. return p;
  233. }
  234. static char *scan_num (char *p) {
  235. char *q, *z;
  236. int saw_rp = FALSE;
  237. int saw_digit = FALSE;
  238. z = p;
  239. q = lexbuf;
  240. if (*z == '-')
  241. *q++ = *z++;
  242. if (*z == '.') {
  243. saw_rp = TRUE;
  244. *q++ = *z++;
  245. }
  246. while (isdigit (*z)) {
  247. saw_digit = TRUE;
  248. *q++ = *z++;
  249. }
  250. if ((*z == '.') && (saw_rp == FALSE)) {
  251. saw_rp = TRUE;
  252. *q++ = *z++;
  253. while (isdigit (*z)) {
  254. saw_digit = TRUE;
  255. *q++ = *z++;
  256. }
  257. }
  258. *q = '\0';
  259. if (saw_digit && *z && (isalpha (*z)))
  260. yyerror ("badly formed number %s", lexbuf);
  261. if (saw_digit == FALSE)
  262. z = NULL;
  263. return z;
  264. }
  265. /* scan a quoted string and return the position after its terminator */
  266. static char *quoted_string (char *p) {
  267. char quote, *q;
  268. quote = *p++;
  269. q = lexbuf;
  270. while ((*p) && (*p != quote)) {
  271. if (*p == '\\') {
  272. if (*(p+1) == quote)
  273. p++;
  274. else {
  275. if (*(p+1) == '\\')
  276. *q++ = *p++;
  277. }
  278. }
  279. *q++ = *p++;
  280. }
  281. if (*p == '\0')
  282. yyerror ("string ran past end of line", "");
  283. else
  284. p++;
  285. *q = 0;
  286. return p;
  287. }
  288. /* scan a html string and return the position after its terminator */
  289. static char *html_string (char *p) {
  290. char *q, *pbuf;
  291. int bal;
  292. p++;
  293. bal = 1;
  294. q = lexbuf;
  295. *q++ = '>';
  296. while (*p && *p != '<' && *p != '>')
  297. p++;
  298. for (;;) {
  299. while (*p) {
  300. if (*p == '<')
  301. bal++;
  302. else if (*p == '>') {
  303. bal--;
  304. if (bal == 0) {
  305. *q++ = '<';
  306. *q = 0;
  307. return p + 1;
  308. }
  309. }
  310. *q++ = *p++;
  311. }
  312. pbuf = lexbuf;
  313. if ((lexptr = lex_gets (p - lexbuf - 1)) == NULL) {
  314. fprintf (
  315. stderr,
  316. "warning, unterminated html label in line %d\n",
  317. line_number
  318. );
  319. return NULL;
  320. }
  321. q += (lexbuf - pbuf);
  322. p += (lexbuf - pbuf);
  323. }
  324. return NULL;
  325. }