PageRenderTime 521ms CodeModel.GetById 151ms app.highlight 99ms RepoModel.GetById 115ms app.codeStats 0ms

/Parser/parser.c

http://unladen-swallow.googlecode.com/
C | 436 lines | 315 code | 54 blank | 67 comment | 98 complexity | 1da01b7fe4e60020e3d010c25c46a177 MD5 | raw file
  1
  2/* Parser implementation */
  3
  4/* For a description, see the comments at end of this file */
  5
  6/* XXX To do: error recovery */
  7
  8#include "Python.h"
  9#include "pgenheaders.h"
 10#include "token.h"
 11#include "grammar.h"
 12#include "node.h"
 13#include "parser.h"
 14#include "errcode.h"
 15
 16
 17#ifdef Py_DEBUG
 18extern int Py_DebugFlag;
 19#define D(x) if (!Py_DebugFlag); else x
 20#else
 21#define D(x)
 22#endif
 23
 24
 25/* STACK DATA TYPE */
 26
 27static void s_reset(stack *);
 28
 29static void
 30s_reset(stack *s)
 31{
 32	s->s_top = &s->s_base[MAXSTACK];
 33}
 34
 35#define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK])
 36
 37static int
 38s_push(register stack *s, dfa *d, node *parent)
 39{
 40	register stackentry *top;
 41	if (s->s_top == s->s_base) {
 42		fprintf(stderr, "s_push: parser stack overflow\n");
 43		return E_NOMEM;
 44	}
 45	top = --s->s_top;
 46	top->s_dfa = d;
 47	top->s_parent = parent;
 48	top->s_state = 0;
 49	return 0;
 50}
 51
 52#ifdef Py_DEBUG
 53
 54static void
 55s_pop(register stack *s)
 56{
 57	if (s_empty(s))
 58		Py_FatalError("s_pop: parser stack underflow -- FATAL");
 59	s->s_top++;
 60}
 61
 62#else /* !Py_DEBUG */
 63
 64#define s_pop(s) (s)->s_top++
 65
 66#endif
 67
 68
 69/* PARSER CREATION */
 70
 71parser_state *
 72PyParser_New(grammar *g, int start)
 73{
 74	parser_state *ps;
 75	
 76	if (!g->g_accel)
 77		PyGrammar_AddAccelerators(g);
 78	ps = (parser_state *)PyMem_MALLOC(sizeof(parser_state));
 79	if (ps == NULL)
 80		return NULL;
 81	ps->p_grammar = g;
 82#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
 83	ps->p_flags = 0;
 84#endif
 85	ps->p_tree = PyNode_New(start);
 86	if (ps->p_tree == NULL) {
 87		PyMem_FREE(ps);
 88		return NULL;
 89	}
 90	s_reset(&ps->p_stack);
 91	(void) s_push(&ps->p_stack, PyGrammar_FindDFA(g, start), ps->p_tree);
 92	return ps;
 93}
 94
 95void
 96PyParser_Delete(parser_state *ps)
 97{
 98	/* NB If you want to save the parse tree,
 99	   you must set p_tree to NULL before calling delparser! */
100	PyNode_Free(ps->p_tree);
101	PyMem_FREE(ps);
102}
103
104
105/* PARSER STACK OPERATIONS */
106
107static int
108shift(register stack *s, int type, char *str, int newstate, int lineno, int col_offset)
109{
110	int err;
111	assert(!s_empty(s));
112	err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno, col_offset);
113	if (err)
114		return err;
115	s->s_top->s_state = newstate;
116	return 0;
117}
118
119static int
120push(register stack *s, int type, dfa *d, int newstate, int lineno, int col_offset)
121{
122	int err;
123	register node *n;
124	n = s->s_top->s_parent;
125	assert(!s_empty(s));
126	err = PyNode_AddChild(n, type, (char *)NULL, lineno, col_offset);
127	if (err)
128		return err;
129	s->s_top->s_state = newstate;
130	return s_push(s, d, CHILD(n, NCH(n)-1));
131}
132
133
134/* PARSER PROPER */
135
136static int
137classify(parser_state *ps, int type, char *str)
138{
139	grammar *g = ps->p_grammar;
140	register int n = g->g_ll.ll_nlabels;
141	
142	if (type == NAME) {
143		register char *s = str;
144		register label *l = g->g_ll.ll_label;
145		register int i;
146		for (i = n; i > 0; i--, l++) {
147			if (l->lb_type != NAME || l->lb_str == NULL ||
148			    l->lb_str[0] != s[0] ||
149			    strcmp(l->lb_str, s) != 0)
150				continue;
151#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
152                        if (ps->p_flags & CO_FUTURE_PRINT_FUNCTION &&
153                            s[0] == 'p' && strcmp(s, "print") == 0) { 
154                                break; /* no longer a keyword */
155                        }
156#endif
157			D(printf("It's a keyword\n"));
158			return n - i;
159		}
160	}
161	
162	{
163		register label *l = g->g_ll.ll_label;
164		register int i;
165		for (i = n; i > 0; i--, l++) {
166			if (l->lb_type == type && l->lb_str == NULL) {
167				D(printf("It's a token we know\n"));
168				return n - i;
169			}
170		}
171	}
172	
173	D(printf("Illegal token\n"));
174	return -1;
175}
176
177#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
178static void
179future_hack(parser_state *ps)
180{
181	node *n = ps->p_stack.s_top->s_parent;
182	node *ch, *cch;
183	int i;
184
185	/* from __future__ import ..., must have at least 4 children */
186	n = CHILD(n, 0);
187	if (NCH(n) < 4)
188		return;
189	ch = CHILD(n, 0);
190	if (STR(ch) == NULL || strcmp(STR(ch), "from") != 0)
191		return;
192	ch = CHILD(n, 1);
193	if (NCH(ch) == 1 && STR(CHILD(ch, 0)) &&
194	    strcmp(STR(CHILD(ch, 0)), "__future__") != 0)
195		return;
196	ch = CHILD(n, 3);
197	/* ch can be a star, a parenthesis or import_as_names */
198	if (TYPE(ch) == STAR)
199		return;
200	if (TYPE(ch) == LPAR)
201		ch = CHILD(n, 4);
202	
203	for (i = 0; i < NCH(ch); i += 2) {
204		cch = CHILD(ch, i);
205		if (NCH(cch) >= 1 && TYPE(CHILD(cch, 0)) == NAME) {
206			char *str_ch = STR(CHILD(cch, 0));
207			if (strcmp(str_ch, FUTURE_WITH_STATEMENT) == 0) {
208				ps->p_flags |= CO_FUTURE_WITH_STATEMENT;
209			} else if (strcmp(str_ch, FUTURE_PRINT_FUNCTION) == 0) {
210				ps->p_flags |= CO_FUTURE_PRINT_FUNCTION;
211			} else if (strcmp(str_ch, FUTURE_UNICODE_LITERALS) == 0) {
212				ps->p_flags |= CO_FUTURE_UNICODE_LITERALS;
213			}
214		}
215	}
216}
217#endif /* future keyword */
218
219int
220PyParser_AddToken(register parser_state *ps, register int type, char *str,
221	          int lineno, int col_offset, int *expected_ret)
222{
223	register int ilabel;
224	int err;
225	
226	D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str));
227	
228	/* Find out which label this token is */
229	ilabel = classify(ps, type, str);
230	if (ilabel < 0)
231		return E_SYNTAX;
232	
233	/* Loop until the token is shifted or an error occurred */
234	for (;;) {
235		/* Fetch the current dfa and state */
236		register dfa *d = ps->p_stack.s_top->s_dfa;
237		register state *s = &d->d_state[ps->p_stack.s_top->s_state];
238		
239		D(printf(" DFA '%s', state %d:",
240			d->d_name, ps->p_stack.s_top->s_state));
241		
242		/* Check accelerator */
243		if (s->s_lower <= ilabel && ilabel < s->s_upper) {
244			register int x = s->s_accel[ilabel - s->s_lower];
245			if (x != -1) {
246				if (x & (1<<7)) {
247					/* Push non-terminal */
248					int nt = (x >> 8) + NT_OFFSET;
249					int arrow = x & ((1<<7)-1);
250					dfa *d1 = PyGrammar_FindDFA(
251						ps->p_grammar, nt);
252					if ((err = push(&ps->p_stack, nt, d1,
253						arrow, lineno, col_offset)) > 0) {
254						D(printf(" MemError: push\n"));
255						return err;
256					}
257					D(printf(" Push ...\n"));
258					continue;
259				}
260				
261				/* Shift the token */
262				if ((err = shift(&ps->p_stack, type, str,
263						x, lineno, col_offset)) > 0) {
264					D(printf(" MemError: shift.\n"));
265					return err;
266				}
267				D(printf(" Shift.\n"));
268				/* Pop while we are in an accept-only state */
269				while (s = &d->d_state
270						[ps->p_stack.s_top->s_state],
271					s->s_accept && s->s_narcs == 1) {
272					D(printf("  DFA '%s', state %d: "
273						 "Direct pop.\n",
274						 d->d_name,
275						 ps->p_stack.s_top->s_state));
276#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
277					if (d->d_name[0] == 'i' &&
278					    strcmp(d->d_name,
279						   "import_stmt") == 0)
280						future_hack(ps);
281#endif
282					s_pop(&ps->p_stack);
283					if (s_empty(&ps->p_stack)) {
284						D(printf("  ACCEPT.\n"));
285						return E_DONE;
286					}
287					d = ps->p_stack.s_top->s_dfa;
288				}
289				return E_OK;
290			}
291		}
292		
293		if (s->s_accept) {
294#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
295			if (d->d_name[0] == 'i' &&
296			    strcmp(d->d_name, "import_stmt") == 0)
297				future_hack(ps);
298#endif
299			/* Pop this dfa and try again */
300			s_pop(&ps->p_stack);
301			D(printf(" Pop ...\n"));
302			if (s_empty(&ps->p_stack)) {
303				D(printf(" Error: bottom of stack.\n"));
304				return E_SYNTAX;
305			}
306			continue;
307		}
308		
309		/* Stuck, report syntax error */
310		D(printf(" Error.\n"));
311		if (expected_ret) {
312			if (s->s_lower == s->s_upper - 1) {
313				/* Only one possible expected token */
314				*expected_ret = ps->p_grammar->
315				    g_ll.ll_label[s->s_lower].lb_type;
316			}
317			else 
318		        	*expected_ret = -1;
319		}
320		return E_SYNTAX;
321	}
322}
323
324
325#ifdef Py_DEBUG
326
327/* DEBUG OUTPUT */
328
329void
330dumptree(grammar *g, node *n)
331{
332	int i;
333	
334	if (n == NULL)
335		printf("NIL");
336	else {
337		label l;
338		l.lb_type = TYPE(n);
339		l.lb_str = STR(n);
340		printf("%s", PyGrammar_LabelRepr(&l));
341		if (ISNONTERMINAL(TYPE(n))) {
342			printf("(");
343			for (i = 0; i < NCH(n); i++) {
344				if (i > 0)
345					printf(",");
346				dumptree(g, CHILD(n, i));
347			}
348			printf(")");
349		}
350	}
351}
352
353void
354showtree(grammar *g, node *n)
355{
356	int i;
357	
358	if (n == NULL)
359		return;
360	if (ISNONTERMINAL(TYPE(n))) {
361		for (i = 0; i < NCH(n); i++)
362			showtree(g, CHILD(n, i));
363	}
364	else if (ISTERMINAL(TYPE(n))) {
365		printf("%s", _PyParser_TokenNames[TYPE(n)]);
366		if (TYPE(n) == NUMBER || TYPE(n) == NAME)
367			printf("(%s)", STR(n));
368		printf(" ");
369	}
370	else
371		printf("? ");
372}
373
374void
375printtree(parser_state *ps)
376{
377	if (Py_DebugFlag) {
378		printf("Parse tree:\n");
379		dumptree(ps->p_grammar, ps->p_tree);
380		printf("\n");
381		printf("Tokens:\n");
382		showtree(ps->p_grammar, ps->p_tree);
383		printf("\n");
384	}
385	printf("Listing:\n");
386	PyNode_ListTree(ps->p_tree);
387	printf("\n");
388}
389
390#endif /* Py_DEBUG */
391
392/*
393
394Description
395-----------
396
397The parser's interface is different than usual: the function addtoken()
398must be called for each token in the input.  This makes it possible to
399turn it into an incremental parsing system later.  The parsing system
400constructs a parse tree as it goes.
401
402A parsing rule is represented as a Deterministic Finite-state Automaton
403(DFA).  A node in a DFA represents a state of the parser; an arc represents
404a transition.  Transitions are either labeled with terminal symbols or
405with non-terminals.  When the parser decides to follow an arc labeled
406with a non-terminal, it is invoked recursively with the DFA representing
407the parsing rule for that as its initial state; when that DFA accepts,
408the parser that invoked it continues.  The parse tree constructed by the
409recursively called parser is inserted as a child in the current parse tree.
410
411The DFA's can be constructed automatically from a more conventional
412language description.  An extended LL(1) grammar (ELL(1)) is suitable.
413Certain restrictions make the parser's life easier: rules that can produce
414the empty string should be outlawed (there are other ways to put loops
415or optional parts in the language).  To avoid the need to construct
416FIRST sets, we can require that all but the last alternative of a rule
417(really: arc going out of a DFA's state) must begin with a terminal
418symbol.
419
420As an example, consider this grammar:
421
422expr:	term (OP term)*
423term:	CONSTANT | '(' expr ')'
424
425The DFA corresponding to the rule for expr is:
426
427------->.---term-->.------->
428	^          |
429	|          |
430	\----OP----/
431
432The parse tree generated for the input a+b is:
433
434(expr: (term: (NAME: a)), (OP: +), (term: (NAME: b)))
435
436*/