dotlex.c | searchcode

/graphviz-cmake/cmd/lefty/dot2l/dotlex.c

https://bitbucket.org/akristmann/custom_build
C | 360 lines | 299 code | 35 blank | 26 comment | 121 complexity | 7c67f75dfad7ad367b5c7681a9121a17 MD5 | raw file
Possible License(s): MPL-2.0-no-copyleft-exception, EPL-1.0, CPL-1.0, BSD-3-Clause, LGPL-2.1

/* $Id: dotlex.c,v 1.8 2011/01/25 16:30:46 ellson Exp $ $Revision: 1.8 $ */
/* vim:set shiftwidth=4 ts=8: */

/*************************************************************************
 * Copyright (c) 2011 AT&T Intellectual Property 
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 *
 * Contributors: See CVS logs. Details at http://www.graphviz.org/
 *************************************************************************/

/* the graph lexer */

typedef void *Tobj;
#include "common.h"
#include "dotparse.h"
#include "dot2l.h"
#include "io.h"
#include "triefa.c"

static int syntax_errors;
static int lexer_fd;
#define LEXBUFSIZ 10240
static char *lexbuf, *lexptr;
static int lexsiz;
static int in_comment;
static int comment_start;
int line_number;

static char *lex_gets (int);
static int lex_token (char *);
static void error_context (void);
static char *skip_wscomments (char *);
static char *scan_token (char *);
static char *scan_num (char *);
static char *quoted_string (char *);
static char *html_string (char *);

int lex_begin (int ioi) {
    lexer_fd = ioi;
    lexptr = NULL;
    if (!(lexbuf = malloc (LEXBUFSIZ))) {
        fprintf (stderr, "cannot allocate buffer\n");
        return -1;
    }
    lexsiz = LEXBUFSIZ;
    return 0;
}

int myyylex (void) {        /* for debugging */
    int rv = myyylex ();

    fprintf (stderr, "returning %d\n", rv);
    if (rv == T_id)
        fprintf (stderr, "string val is %s\n", yylval.s);
    return rv;
}

int yylex (void) {
    int token;
    char *p;

    /* if the parser has accepted a graph, reset and return EOF */
    if (yaccdone) {
        yaccdone = FALSE;
        return EOF;
    }

    /* get a nonempty lex buffer */
    do {
        if ((lexptr == NULL) || (lexptr[0] == '\0'))
            if ((lexptr = lex_gets (0)) == NULL) {
                if (in_comment)
                    fprintf (
                        stderr,
                        "warning, nonterminated comment in line %d\n",
                        comment_start
                    );
                return EOF;
            }
        lexptr = skip_wscomments (lexptr);
    } while (lexptr[0] == '\0');

    /* scan quoted strings */
    if (lexptr[0] == '\"') {
        lexptr = quoted_string (lexptr);
        yylval.s = (char *) strdup (lexbuf);
        return T_id;
    }

    /* scan html strings */
    if (lexptr[0] == '<') {
        lexptr = html_string (lexptr);
        yylval.s = (char *) strdup (lexbuf);
        return T_id;
    }

    /* scan edge operator */
    if (etype && (strncmp (lexptr, etype, strlen (etype)) == 0)) {
        lexptr += strlen (etype);
        return T_edgeop;
    }

    /* scan numbers */
    if ((p = scan_num (lexptr))) {
        lexptr = p;
        yylval.s =  strdup (lexbuf);
        return T_id;
    }
    else {
        if (ispunct (lexptr[0]) && (lexptr[0] != '_'))
            return *lexptr++;
        else
            lexptr = scan_token (lexptr);
    }

    /* scan other tokens */
    token = lex_token (lexbuf);
    if (token == -1) {
        yylval.s = strdup (lexbuf);
        token = T_id;
    }
    return token;
}

void
yyerror (char *fmt, char *s) {
    if (syntax_errors++)
        return;
    fprintf (stderr, "graph parser: ");
    fprintf (stderr, fmt, s);
    fprintf (stderr, " near line %d\n", line_number);
    error_context ();
}

static char *lex_gets (int curlen) {
    char *clp;
    int len;

    do {
        /* off by one so we can back up in LineBuf */
        if (IOreadline (
            lexer_fd, lexbuf + curlen + 1, lexsiz - curlen - 1
        ) == -1)
            break;
        clp = lexbuf + curlen + 1;
        len = strlen (clp);
        clp[len++] = '\n';
        clp[len] = 0;

        if (clp == lexbuf + 1 && clp[0] == '#') {
            /* comment line or cpp line sync */
            if (sscanf (clp+1, "%d", &line_number) == 0)
                line_number++;
            len = 0;
            clp[len] = 0;
            continue;
        }

        line_number++;
        if ((len = strlen (clp)) > 1) {
            if (clp[len - 2] == '\\') {
                len = len - 2;
                clp[len] = '\0';
            }
        }
        curlen += len;
        if (lexsiz - curlen - 1 < 1024) {
            if (!(lexbuf = realloc (lexbuf, lexsiz * 2))) {
                fprintf (stderr, "cannot grow buffer\n");
                return NULL;
            }
            lexsiz *= 2;
        }
    } while (clp[len - 1] != '\n');

    if (curlen > 0)
        return lexbuf + 1;
    else
        return NULL;
}

static int lex_token (char *p) {
    TFA_Init ();
    while (*p)
        TFA_Advance (*p++);
    return
        TFA_Definition ();
}

static void error_context (void) {
    char *p, *q;

    if (lexptr == NULL)
        return;
    fprintf (stderr, "context: ");
    for (p = lexptr - 1; (p > lexbuf) && (isspace (*p) == FALSE); p--)
        ;
    for (q = lexbuf; q < p; q++)
        fputc (*q, stderr);
    fputs (" >>> ", stderr);
    for (; q < lexptr; q++)
        fputc (*q, stderr);
    fputs (" <<< ", stderr);
    fputs (lexptr, stderr);
}

/* i wrote this and it still frightens me */
/* skip white space and comments in p */
static char *skip_wscomments (char *p) {
    do {
        while (isspace (*p))
            p++;
        while (in_comment && p[0]) {
            while (p[0] && (p[0] != '*'))
                p++;
            if (p[0]) {
                if (p[1] == '/') {
                    in_comment = FALSE;
                    p += 2;
                    break;
                } else
                    p++;
            }
        }
        if (p[0] == '/') {
            if (p[1] == '/')
                while (*p)
                    p++;    /* skip to end of line */
            else {
                if (p[1] == '*') {
                    in_comment = TRUE;
                    comment_start = line_number;
                    p += 2;
                    continue;
                }
                else
                    break;    /* return a slash */
            }
        } else {
            if (!isspace (*p))
                break;
        }
    } while (p[0]);
    return p;
}

/* scan an unquoted token and return the position after its terminator */
static char *scan_token (char *p) {
    char *q;

    q = lexbuf;
    if (p == '\0')
        return NULL;
    while (isalnum (*p) || (*p == '_') || (!isascii (*p)))
        *q++ = *p++;
    *q = '\0';
    return p;
}

static char *scan_num (char *p) {
    char *q, *z;
    int saw_rp = FALSE;
    int saw_digit = FALSE;

    z = p;
    q = lexbuf;
    if (*z == '-')
        *q++ = *z++;
    if (*z == '.') {
        saw_rp = TRUE;
        *q++ = *z++;
    }
    while (isdigit (*z)) {
        saw_digit = TRUE;
        *q++ = *z++;
    }
    if ((*z == '.') && (saw_rp == FALSE)) {
        saw_rp = TRUE;
        *q++ = *z++;
        while (isdigit (*z)) {
            saw_digit = TRUE;
            *q++ = *z++;
        }
    }
    *q = '\0';
    if (saw_digit && *z && (isalpha (*z)))
        yyerror ("badly formed number %s", lexbuf);

    if (saw_digit == FALSE)
        z = NULL;
    return z;
}

/* scan a quoted string and return the position after its terminator */
static char *quoted_string (char *p) {
    char quote, *q;

    quote = *p++;
    q = lexbuf;
    while ((*p) && (*p != quote)) {
        if (*p == '\\') {
            if (*(p+1) == quote)
                p++;
            else {
                if (*(p+1) == '\\')
                    *q++ = *p++;
            }
        }
        *q++ = *p++;
    }
    if (*p == '\0')
        yyerror ("string ran past end of line", "");
    else
        p++;
    *q = 0;
    return p;
}

/* scan a html string and return the position after its terminator */
static char *html_string (char *p) {
    char *q, *pbuf;
    int bal;

    p++;
    bal = 1;
    q = lexbuf;
    *q++ = '>';
    while (*p && *p != '<' && *p != '>')
        p++;
    for (;;) {
        while (*p) {
            if (*p == '<')
                bal++;
            else if (*p == '>') {
                bal--;
                if (bal == 0) {
                    *q++ = '<';
                    *q = 0;
                    return p + 1;
                }
            }
            *q++ = *p++;
        }
        pbuf = lexbuf;
        if ((lexptr = lex_gets (p - lexbuf - 1)) == NULL) {
            fprintf (
                stderr,
                "warning, unterminated html label in line %d\n",
                line_number
            );
            return NULL;
        }
        q += (lexbuf - pbuf);
        p += (lexbuf - pbuf);
    }
    return NULL;
}