mal_parser.c | searchcode

/monetdb5/mal/mal_parser.c

Large files files are truncated, but you can click here to view the full file

/*
 * The contents of this file are subject to the MonetDB Public License
 * Version 1.1 (the "License"); you may not use this file except in
 * compliance with the License. You may obtain a copy of the License at
 * http://www.monetdb.org/Legal/MonetDBLicense
 *
 * Software distributed under the License is distributed on an "AS IS"
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
 * License for the specific language governing rights and limitations
 * under the License.
 *
 * The Original Code is the MonetDB Database System.
 *
 * The Initial Developer of the Original Code is CWI.
 * Portions created by CWI are Copyright (C) 1997-July 2008 CWI.
 * Copyright August 2008-2014 MonetDB B.V.
 * All Rights Reserved.
 */

/* (c): M. L. Kersten
*/

#include "monetdb_config.h"
#include "mal_parser.h"
#include "mal_resolve.h"
#include "mal_linker.h"
#include "mal_atom.h"       /* for malAtomDefinition(), malAtomProperty() */
#include "mal_interpreter.h"    /* for showErrors() */
#include "mal_instruction.h"    /* for pushEndInstruction(), findVariableLength() */
#include "mal_namespace.h"
#include "mal_utils.h"
#include "mal_builder.h"
#include "mal_type.h"
#include "mal_private.h"

#define FATALINPUT MAXERRORS+1
#define NL(X) ((X)=='\n' || (X)=='\r')

static str idCopy(Client cntxt, int len);
static str strCopy(Client cntxt, int len);


/* Before a line is parsed we check for a request to echo it.
 * This command should be executed at the beginning of a parse
 * request and each time we encounter EOL.
*/
void echoInput(Client cntxt)
{
	if (cntxt->listing == 1) {
		char *c = CURRENT(cntxt);
		mnstr_printf(cntxt->fdout,"#");
		while (*c && !NL(*c)) {
			mnstr_printf(cntxt->fdout, "%c", *c++);
		}

		mnstr_printf(cntxt->fdout, "\n");
	}
}

static inline void
skipSpace(Client cntxt)
{
	char *s= &currChar(cntxt);
	for (;;) {
		switch (*s++) {
		case ' ':
		case '\t':
		case '\n':
		case '\r':
			nextChar(cntxt);
			break;
		default:
			return;
		}
	}
}

static inline void
advance(Client cntxt, int length)
{
	cntxt->yycur += length;
	skipSpace(cntxt);
}

/*
 * The most recurring situation is to recognize identifiers.
 * This process is split into a few steps to simplify subsequent
 * construction and comparison.
 * IdLength searches the end of an identifier without changing
 * the cursor into the input pool.
 * IdCopy subsequently prepares a GDK string for inclusion in the
 * instruction datastructures.
*/

short opCharacter[256];
short idCharacter[256];
short idCharacter2[256];

void
initParser(void)
{
	int i;

	for (i = 0; i < 256; i++) {
		idCharacter2[i] = isalpha(i) || isdigit(i);
		idCharacter[i] = isalpha(i);
	}
	for (i = 0; i < 256; i++)
		switch (i) {
		case '-': case '!': case '\\': case '$': case '%':
		case '^': case '*': case '~': case '+': case '&':
		case '|': case '<': case '>': case '=': case '/':
		case ':':
			opCharacter[i] = 1;
		}

	idCharacter[TMPMARKER] = 1;
	idCharacter2[TMPMARKER] = 1;
	idCharacter2['@'] = 1;
}

#undef isdigit
#define isdigit(X)  ((X) >= '0' && (X) <= '9')

int
idLength(Client cntxt)
{
	str s, t;
	skipSpace(cntxt);
	s = CURRENT(cntxt);
	t = s;

	if (!idCharacter[(int) (*s)])
		return 0;
	/* avoid a clash with old temporaries */
	if (s[0] == TMPMARKER)
		s[0] = REFMARKER;
	/* prepare escape of temporary names */
	s++;
	while (idCharacter2[(int) (*s)])
		s++;
	return (int) (s - t);
}

/* Simple type identifiers can not be marked with a type variable. */
static int
typeidLength(Client cntxt)
{
	int l;
	str s;
	skipSpace(cntxt);
	s = CURRENT(cntxt);

	if (!idCharacter[(int) (*s)])
		return 0;
	l = 1;
	s++;
	idCharacter[TMPMARKER] = 0;
	while (idCharacter[(int) (*s)] || isdigit(*s)) {
		s++;
		l++;
	}
	idCharacter[TMPMARKER] = 1;
	return l;
}

static str
idCopy(Client cntxt, int length)
{
	str s = GDKmalloc(length + 1);
	if (s == NULL)
		return NULL;
	memcpy(s, CURRENT(cntxt), (size_t) length);
	s[length] = 0;
	/* avoid a clash with old temporaries */
	if (s[0] == TMPMARKER)
		s[0] = REFMARKER;
	advance(cntxt, length);
	return s;
}

int
MALkeyword(Client cntxt, str kw, int length)
{
	skipSpace(cntxt);
	if (MALlookahead(cntxt, kw, length)) {
		advance(cntxt, length);
		return 1;
	}
	return 0;
}

int
MALlookahead(Client cntxt, str kw, int length)
{
	int i;

	skipSpace(cntxt);
	/* avoid double test or use lowercase only. */
	if (currChar(cntxt) == *kw &&
		strncmp(CURRENT(cntxt), kw, length) == 0 &&
		!idCharacter[(int) (CURRENT(cntxt)[length])] &&
		!isdigit((int) (CURRENT(cntxt)[length]))) {
		return 1;
	}
	/* check for captialized versions */
	for (i = 0; i < length; i++)
		if (tolower(CURRENT(cntxt)[i]) != kw[i])
			return 0;
	if (!idCharacter[(int) (CURRENT(cntxt)[length])] &&
		!isdigit((int) (CURRENT(cntxt)[length]))) {
		return 1;
	}
	return 0;
}
/*
 * Keyphrase testing is limited to a few characters only
 * (check manually). To speed this up we use a pipelined and inline macros.
*/

static inline int
keyphrase1(Client cntxt, str kw)
{
	skipSpace(cntxt);
	if (currChar(cntxt) == *kw) {
		advance(cntxt, 1);
		return 1;
	}
	return 0;
}

static inline int
keyphrase2(Client cntxt, str kw)
{
	skipSpace(cntxt);
	if (CURRENT(cntxt)[0] == kw[0] && CURRENT(cntxt)[1] == kw[1]) {
		advance(cntxt, 2);
		return 1;
	}
	return 0;
}

/*
 * A similar approach is used for string literals.
 * Beware, string lengths returned include the
 * brackets and escapes. They are eaten away in strCopy.
 * We should provide the C-method to split strings and
 * concatenate them upon retrieval[todo]
*/
int
stringLength(Client cntxt)
{
	int l = 0;
	int quote = 0;
	str s;
	skipSpace(cntxt);
	s = CURRENT(cntxt);

	if (*s != '"')
		return 0;
	s++;
	while (*s) {
		if (quote) {
			l++;
			s++;
			quote = 0;
		} else {
			if (*s == '"')
				break;
			quote = *s == '\\';
			l++;
			s++;
		}
	}
	return l + 2;
}

/*Beware, the idcmp routine uses a short cast to compare multiple bytes
 * at once. This may cause problems when the net string length is zero.
*/

str
strCopy(Client cntxt, int length)
{
	str s;
	int i;

	i = length < 4 ? 4 : length;
	s = GDKzalloc(i);
	if (s == 0)
		return NULL;
	memcpy(s, CURRENT(cntxt) + 1, (size_t) (length - 2));
	mal_unquote(s);
	return s;
}

/*
 * And a similar approach is used for operator names.
 * A lookup table is considered, because it generally is
 * faster then a non-dense switch.
*/
int
operatorLength(Client cntxt)
{
	int l = 0;
	str s;

	skipSpace(cntxt);
	for (s = CURRENT(cntxt); *s; s++) {
		if (opCharacter[(int) (*s)])
			l++;
		else
			return l;
	}
	return l;
}

str
operatorCopy(Client cntxt, int length)
{
	return idCopy(cntxt,length);
}

/*
 * For error reporting we may have to find the start of the previous line,
 * which, ofcourse, is easy given the client buffer.
 * The remaining functions are self-explanatory.
*/
str
lastline(Client cntxt)
{
	str s = CURRENT(cntxt);
	if (NL(*s))
		s++;
	while (s && s > cntxt->fdin->buf && !NL(*s))
		s--;
	if (NL(*s))
		s++;
	return s;
}

ssize_t
position(Client cntxt)
{
	str s = lastline(cntxt);
	return (ssize_t) (CURRENT(cntxt) - s);
}

/*
 * Upon encountering an error we skip to the nearest semicolon,
 * or comment terminated by a new line
 */
static inline void
skipToEnd(Client cntxt)
{
	char c;
	while ((c = *CURRENT(cntxt)) != ';' && c && c != '\n')
		nextChar(cntxt);
	if (c && c != '\n')
		nextChar(cntxt);
}

/*
 * The lexical analyser for constants is a little more complex.
 * Aside from getting its length, we need an indication of its type.
 * The constant structure is initialized for later use.
 */
int
cstToken(Client cntxt, ValPtr cst)
{
	int i = 0;
	int hex = 0;
	str s = CURRENT(cntxt);

	cst->vtype = TYPE_int;
	cst->val.lval = 0;
	switch (*s) {
	case '{': case '[':
		/* JSON Literal */
		break;
	case '"':
		cst->vtype = TYPE_str;
		i = stringLength(cntxt);
		cst->val.sval = strCopy(cntxt, i);
		if (cst->val.sval)
			cst->len = (int) strlen(cst->val.sval);
		else
			cst->len = 0;
		return i;
	case '-':
		i++;
		s++;
	case '0':
		if ((s[1] == 'x' || s[1] == 'X')) {
			/* deal with hex */
			hex = TRUE;
			i += 2;
			s += 2;
		}
	case '1': case '2': case '3': case '4': case '5':
	case '6': case '7': case '8': case '9':
		if (hex)
			while (isdigit((int) *s) || isalpha((int) *s)) {
				if (!((tolower(*s) >= 'a' && tolower(*s) <= 'f')
					  || isdigit((int) *s)))
					break;
				i++;
				s++;
			}
		else
			while (isdigit((int) *s)) {
				i++;
				s++;
			}

		if (hex)
			goto handleInts;
	case '.':
		if (*s == '.' && isdigit(*(s + 1))) {
			i++;
			s++;
			while (isdigit(*s)) {
				i++;
				s++;
			}
			cst->vtype = TYPE_flt;
		}
		if (*s == 'e' || *s == 'E') {
			i++;
			s++;
			if (*s == '-' || *s == '+') {
				i++;
				s++;
			}
			cst->vtype = TYPE_dbl;
			while (isdigit(*s)) {
				i++;
				s++;
			}
		}
		if (cst->vtype == TYPE_flt) {
			int len = i;
			float *pval = 0;
			fltFromStr(CURRENT(cntxt), &len, &pval);
			if (pval) {
				cst->val.fval = *pval;
				GDKfree(pval);
			} else
				cst->val.fval = 0;
		}
		if (cst->vtype == TYPE_dbl) {
			int len = i;
			double *pval = 0;
			dblFromStr(CURRENT(cntxt), &len, &pval);
			if (pval) {
				cst->val.dval = *pval;
				GDKfree(pval);
			} else
				cst->val.dval = 0;

			if (cst->val.dval > FLT_MIN && cst->val.dval <= FLT_MAX) {
				cst->vtype = TYPE_flt;
				cst->val.fval = (flt) cst->val.dval;
			}
		}
		if (*s == '@') {
			int len = (int) sizeof(lng);
			lng l, *pval = &l;
			lngFromStr(CURRENT(cntxt), &len, &pval);
			if (l == lng_nil || l < 0
#if SIZEOF_OID < SIZEOF_LNG
				|| l > GDK_oid_max
#endif
				)
				cst->val.oval = oid_nil;
			else
				cst->val.oval = (oid) l;
			cst->vtype = TYPE_oid;
			i++;
			s++;
			while (isdigit(*s)) {
				i++;
				s++;
			}
			return i;
		}
		if (*s == 'L') {
			if (cst->vtype == TYPE_int)
				cst->vtype = TYPE_lng;
			if (cst->vtype == TYPE_flt)
				cst->vtype = TYPE_dbl;
			i++;
			s++;
			if (*s == 'L') {
				i++;
				s++;
			}
			if (cst->vtype == TYPE_dbl) {
				int len = i;
				double *pval = 0;
				dblFromStr(CURRENT(cntxt), &len, &pval);
				if (pval) {
					cst->val.dval = *pval;
					GDKfree(pval);
				} else
					cst->val.dval = 0;
			} else {
				int len = i;
				lng *pval = 0;
				lngFromStr(CURRENT(cntxt), &len, &pval);
				if (pval) {
					cst->val.lval = *pval;
					GDKfree(pval);
				} else
					cst->val.lval = 0;
			}
			return i;
		}
#ifdef HAVE_HGE
		if (*s == 'H' && cst->vtype == TYPE_int) {
			int len = i;
			hge *pval = 0;
			cst->vtype = TYPE_hge;
			i++;
			s++;
			if (*s == 'H') {
				i++;
				s++;
			}
			hgeFromStr(CURRENT(cntxt), &len, &pval);
			if (pval) {
				cst->val.hval = *pval;
				GDKfree(pval);
			} else
				cst->val.hval = 0;
			return i;
		}
#endif
handleInts:
		assert(cst->vtype != TYPE_lng);
#ifdef HAVE_HGE
		assert(cst->vtype != TYPE_hge);
#endif
		if (cst->vtype == TYPE_int) {
#ifdef HAVE_HGE
			int len = (int) sizeof(hge);
			hge l, *pval = &l;
			if (hgeFromStr(CURRENT(cntxt), &len, &pval) <= 0 || l == hge_nil)
				l = hge_nil;

			if ((hge) GDK_int_min < l && l <= (hge) GDK_int_max) {
				cst->vtype = TYPE_int;
				cst->val.ival = (int) l;
			} else
			if ((hge) GDK_lng_min < l && l <= (hge) GDK_lng_max) {
				cst->vtype = TYPE_lng;
				cst->val.lval = (lng) l;
			} else {
				cst->vtype = TYPE_hge;
				cst->val.hval = l;
				if (l == hge_nil)
					showException(cntxt->fdout, SYNTAX, "convertConstant", "integer parse error");
			}
#else
			int len = (int) sizeof(lng);
			lng l, *pval = &l;
			if (lngFromStr(CURRENT(cntxt), &len, &pval) <= 0 || l == lng_nil)
				l = lng_nil;

			if ((lng) GDK_int_min < l && l <= (lng) GDK_int_max) {
				cst->vtype = TYPE_int;
				cst->val.ival = (int) l;
			} else {
				cst->vtype = TYPE_lng;
				cst->val.lval = l;
				if (l == lng_nil)
					showException(cntxt->fdout, SYNTAX, "convertConstant", "integer parse error");
			}
#endif
		}
		return i;

	case 'f':
		if (strncmp(s, "false", 5) == 0 && !isalnum((int) *(s + 5)) &&
			*(s + 5) != '_') {
			cst->vtype = TYPE_bit;
			cst->val.btval = 0;
			cst->len = 1;
			return 5;
		}
		return 0;
	case 't':
		if (strncmp(s, "true", 4) == 0 && !isalnum((int) *(s + 4)) &&
			*(s + 4) != '_') {
			cst->vtype = TYPE_bit;
			cst->val.btval = 1;
			cst->len = 1;
			return 4;
		}
		return 0;
	case 'n':
		if (strncmp(s, "nil", 3) == 0 && !isalnum((int) *(s + 3)) &&
			*(s + 3) != '_') {
			cst->vtype = TYPE_void;
			cst->len = 0;
			cst->val.oval = oid_nil;
			return 3;
		}
	}
	return 0;
}

#define cstCopy(C,I)  idCopy(C,I)

/* Type qualifier
 * Types are recognized as identifiers preceded by a colon.
 * They may be extended with a property list
 * and 'any' types can be marked with an alias.
 * The type qualifier parser returns the encoded type
 * as a short 32-bit integer.
 * The syntax structure is
 *
 * @multitable @columnfractions 0.15 0.8
 * @item typeQualifier
 * @tab : typeName propQualifier
 * @item typeName
 * @tab : scalarType | collectionType | anyType
 * @item scalarType
 * @tab :  ':' @sc{ identifier}
 * @item collectionType
 * @tab :  ':' @sc{ bat} ['[' col ',' col ']']
 * @item anyType
 * @tab :  ':' @sc{ any} [typeAlias]
 * @item col
 * @tab :  scalarType | anyType
 * @item propQualifier
 * @tab :  ['@{' property '@}']
 * @end multitable
 *
 * The type ANY matches any type specifier.
 * Appending it with an alias turns it into a type variable.
 * The type alias is \$DIGIT (1-9) and can be used to relate types
 * by type equality.
 * The type variable are defined within the context of a function
 * scope.
 * Additional information, such as a repetition factor,
 * encoding tables, or type dependency should be modelled as properties.
 */
static int
typeAlias(Client cntxt, int tpe)
{
	int t;

	if (tpe != TYPE_any)
		return -1;
	if (currChar(cntxt) == TMPMARKER) {
		nextChar(cntxt);
		t = currChar(cntxt) - '0';
		if (t <= 0 || t > 9)
			parseError(cntxt, "[1-9] expected\n");
		else
			nextChar(cntxt);
		return t;
	}
	return -1;
}

/*
 * The simple type analysis currently assumes a proper type identifier.
 * We should change getMALtype to return a failure instead.
 */
static int
simpleTypeId(Client cntxt)
{
	int l, tpe;

	nextChar(cntxt);
	l = typeidLength(cntxt);
	if (l == 0) {
		parseError(cntxt, "Type identifier expected\n");
		cntxt->yycur--; /* keep it */
		return -1;
	}
	tpe = getTypeIndex(CURRENT(cntxt), l, -1);
	if (tpe < 0) {
		parseError(cntxt, "Type identifier expected\n");
		cntxt->yycur -= l; /* keep it */
		return TYPE_void;
	}
	advance(cntxt, l);
	return tpe;
}

static int
parseTypeId(Client cntxt, int defaultType)
{
	int i = TYPE_any, ht, tt, kh = 0, kt = 0;
	char *s = CURRENT(cntxt);

	if (strncmp(s, ":bat[", 5) == 0) {
		/* parse :bat[:type,:type] */
		advance(cntxt, 5);
		if (currChar(cntxt) == ':') {
			ht = simpleTypeId(cntxt);
			kh = typeAlias(cntxt, ht);
		} else
			ht = TYPE_any;

		if (currChar(cntxt) != ',') {
			parseError(cntxt, "',' expected\n");
			return i;
		}
		nextChar(cntxt); /* skip , */
		skipSpace(cntxt);
		if (currChar(cntxt) == ':') {
			tt = simpleTypeId(cntxt);
			kt = typeAlias(cntxt, tt);
		} else
			tt = TYPE_any;

		i = newBatType(ht, tt);
		if (kh > 0)
			setAnyHeadIndex(i, kh);
		if (kt > 0)
			setAnyColumnIndex(i, kt);

		if (currChar(cntxt) != ']')
			parseError(cntxt, "']' expected\n");
		nextChar(cntxt); /* skip ']' */
		skipSpace(cntxt);
		return i;
	}
	if ((strncmp(s, ":bat", 4) == 0 ||
		 strncmp(s, ":BAT", 4) == 0) && !idCharacter[(int) s[4]]) {
		advance(cntxt, 4);
		return TYPE_bat;
	}
	if (strncmp(s, ":col", 4) == 0 && !idCharacter[(int) s[4]]) {
		/* parse default for :col[:any] */
		advance(cntxt, 4);
		return newColumnType(TYPE_any);
	}
	if (currChar(cntxt) == ':') {
		ht = simpleTypeId(cntxt);
		kt = typeAlias(cntxt, ht);
		if (kt > 0)
			setAnyColumnIndex(ht, kt);
		return ht;
	}
	parseError(cntxt, "<type identifier> expected\n");
	return defaultType;
}

static inline int
typeElm(Client cntxt, int def)
{
	if (currChar(cntxt) != ':')
		return def;  /* no type qualifier */
	return parseTypeId(cntxt, def);
}

 /*
 * The Parser
 * The client is responsible to collect the
 * input for parsing in a single string before calling the parser.
 * Once the input is available parsing runs in a critial section for
 * a single client thread.
 *
 * The parser uses the rigid structure of the language to speedup
 * analysis. In particular, each input line is translated into
 * a MAL instruction record as quickly as possible. Its context is
 * manipulated during the parsing process, by keeping the  curPrg,
 * curBlk, and curInstr variables.
 *
 * The language statements of the parser are gradually introduced, with
 * the overall integration framework last.
 * The convention is to return a zero when an error has been
 * reported or when the structure can not be recognized.
 * Furthermore, we assume that blancs have been skipped before entering
 * recognition of a new token.
 *
 * Module statement.
 * The module and import commands have immediate effect.
 * The module statement switches the location for symbol table update
 * to a specific named area. The effect is that all definitions may become
 * globally known (?) and symbol table should be temporarilly locked
 * for updates by concurrent users.
 *
 * @multitable @columnfractions 0.15 0.8
 * @item moduleStmt
 * @tab :  @sc{atom} ident [':'ident]
 * @item
 * @tab | @sc{module} ident
 * @end multitable
 *
 * An atom statement does not introduce a new module.
*/
static void
helpInfo(Client cntxt, str *help)
{
	int l;

	if (MALkeyword(cntxt, "comment", 7)) {
		skipSpace(cntxt);
		if ((l = stringLength(cntxt))) {
			*help = strCopy(cntxt, l);
			if (*help)
				advance(cntxt, l - 1);
		} else {
			parseError(cntxt, "<string> expected\n");
		}
	} else if (currChar(cntxt) != ';')
		parseError(cntxt, "';' expected\n");
	skipToEnd(cntxt);
}

static void
propList(Client cntxt, int arg)
{
	MalBlkPtr curBlk = cntxt->curprg->def;
	int l;
	malType tpe;

	if (keyphrase1(cntxt, "{")) {
		do {
			str pname, opname;
			int i, lo;
			ValRecord cst;

			l = idLength(cntxt);
			if (l == 0)
				break;
			pname = idCopy(cntxt, l);
			/* localize value , simplified version */
			lo = operatorLength(cntxt);
			if (lo > 0)
				opname = operatorCopy(cntxt, lo);
			else
				opname = GDKstrdup("");
			if ((i = cstToken(cntxt, &cst))) {
				advance(cntxt, i);
				if (currChar(cntxt) == ':') {
					tpe = simpleTypeId(cntxt);
					if (tpe >=0 && tpe != TYPE_any){
						str msg =convertConstant(tpe, &cst);
						if( msg) GDKfree(msg);
					} else
						parseError(cntxt, "simple type expected\n");
				}
				varSetProperty(curBlk, arg, pname, opname, &cst);
			} else {
				varSetProperty(curBlk, arg, pname, NULL, NULL);
			}
			GDKfree(pname);
			GDKfree(opname);
		} while (keyphrase1(cntxt, ","));
		if (!keyphrase1(cntxt, "}"))
			/* return (MalBlkPtr) */
			parseError(cntxt, "'}' expected\n");
	}
}

static InstrPtr
binding(Client cntxt, MalBlkPtr curBlk, InstrPtr curInstr, int flag)
{
	int l, varid = -1;
	malType type;

	l = idLength(cntxt);
	if (l > 0) {
		varid = findVariableLength(curBlk, CURRENT(cntxt), l);
		if (varid < 0) {
			varid = newVariable(curBlk, idCopy(cntxt, l), TYPE_any);
			if ( varid < 0)
				return curInstr;
			type = typeElm(cntxt, TYPE_any);
			if (isPolymorphic(type))
				setPolymorphic(curInstr, type, TRUE);
			setVarType(curBlk, varid, type);
			propList(cntxt, varid);
		} else if (flag) {
			parseError(cntxt, "Argument defined twice\n");
			typeElm(cntxt, getVarType(curBlk, varid));
			propList(cntxt, varid);
		} else {
			advance(cntxt, l);
			type = typeElm(cntxt, getVarType(curBlk, varid));
			if( type != getVarType(curBlk,varid))
				parseError(cntxt, "Incompatible argument type\n");
			if (isPolymorphic(type))
				setPolymorphic(curInstr, type, TRUE);
			setVarType(curBlk, varid, type);
			propList(cntxt, varid);
		}
	} else if (currChar(cntxt) == ':') {
		type = typeElm(cntxt, TYPE_any);
		varid = newTmpVariable(curBlk, type);
		if ( varid < 0)
			return curInstr;
		if ( isPolymorphic(type))
			setPolymorphic(curInstr, type, TRUE);
		setVarType(curBlk, varid, type);
		propList(cntxt, varid);
	} else {
		parseError(cntxt, "argument expected\n");
		return curInstr;
	}
	if( varid >=0)
		curInstr = pushArgument(curBlk, curInstr, varid);
	return curInstr;
}

/*
 * At this stage the LHS part has been parsed and the destination
 * variables have been set. Next step is to parse the expression,
 * which starts with an operand.
 * This code is used in both positions of the expression
 */
static int
term(Client cntxt, MalBlkPtr curBlk, InstrPtr *curInstr, int ret)
{
	int i, idx, flag, free = 1;
	ValRecord cst;
	str v = NULL;
	int cstidx = -1;
	malType tpe = TYPE_any;

	if ((i = cstToken(cntxt, &cst))) {
		cstidx = fndConstant(curBlk, &cst, MAL_VAR_WINDOW);
		if (cstidx >= 0) {
			advance(cntxt, i);
			if (currChar(cntxt) == ':') {
				tpe = typeElm(cntxt, getVarType(curBlk, cstidx));
				if (tpe < 0)
					return 3;
				if(tpe == getVarType(curBlk,cstidx) ){
					setVarUDFtype(curBlk, cstidx);
				} else {
					cstidx = defConstant(curBlk, tpe, &cst);
					setPolymorphic(*curInstr, tpe, FALSE);
					setVarUDFtype(curBlk, cstidx);
					free = 0;
				}
			} else if (cst.vtype != getVarType(curBlk, cstidx)) {
				cstidx = defConstant(curBlk, cst.vtype, &cst);
				setPolymorphic(*curInstr, cst.vtype, FALSE);
				free = 0;
			}
			/* protect against leaks coming from constant reuse */
			if (free && ATOMextern(cst.vtype) && cst.val.pval)
				VALclear(&cst);
			*curInstr = pushArgument(curBlk, *curInstr, cstidx);
			return ret;
		} else {
			/* add a new constant */
			advance(cntxt, i);
			flag = currChar(cntxt) == ':';
			tpe = typeElm(cntxt, cst.vtype);
			if (tpe < 0)
				return 3;
			cstidx = defConstant(curBlk, tpe, &cst);
			setPolymorphic(*curInstr, tpe, FALSE);
			if (flag)
				setVarUDFtype(curBlk, cstidx);
			*curInstr = pushArgument(curBlk, *curInstr, cstidx);
			return ret;
		}
	} else if ((i = idLength(cntxt))) {
		if ((idx = findVariableLength(curBlk, CURRENT(cntxt), i)) == -1) {
			v = idCopy(cntxt, i);
			idx = newVariable(curBlk, v, TYPE_any);
			if( idx <0)
				return 0;
			propList(cntxt, idx);
		} else {
			advance(cntxt, i);
			propList(cntxt, idx);
		}
		*curInstr = pushArgument(curBlk, *curInstr, idx);
	} else if (currChar(cntxt) == ':') {
		tpe = typeElm(cntxt, TYPE_any);
		if (tpe < 0)
			return 3;
		setPolymorphic(*curInstr, tpe, FALSE);
		idx = newTypeVariable(curBlk, tpe);
		propList(cntxt, idx);
		*curInstr = pushArgument(curBlk, *curInstr, idx);
		return ret;
	}
	return 0;
}

static str
parseAtom(Client cntxt)
{
	str modnme = 0;
	int l, tpe;
	char *nxt = CURRENT(cntxt);

	if ((l = idLength(cntxt)) <= 0)
		return parseError(cntxt, "atom name expected\n");

	/* parse: ATOM id:type */
	modnme = putName(nxt, l);
	advance(cntxt, l);
	if (currChar(cntxt) != ':')
		tpe = TYPE_void;  /* no type qualifier */
	else
		tpe = parseTypeId(cntxt, TYPE_int);
	malAtomDefinition(cntxt->fdout, modnme, tpe);
	cntxt->nspace = fixModule(cntxt->nspace, modnme);
	cntxt->nspace->isAtomModule = TRUE;
	skipSpace(cntxt);
	helpInfo(cntxt, &cntxt->nspace->help);
	return "";
}

static str
parseLibrary(Client cntxt)
{
	str libnme = 0, s;
	int l;
	char *nxt;
	ValRecord cst;

	nxt = CURRENT(cntxt);
	if ((l = idLength(cntxt)) <= 0) {
		if ((l = cstToken(cntxt, &cst)) && cst.vtype == TYPE_str) {
			advance(cntxt, l);
			libnme = putName(nxt + 1, l - 2);
		} else
			return parseError(cntxt, "<library name> or <library path> expected\n");
	} else
		libnme = putName(nxt, l);
	s = loadLibrary(libnme, TRUE);
	(void) putName(nxt, l);
	if (s){
		mnstr_printf(cntxt->fdout, "#WARNING: %s\n", s);
		GDKfree(s);
	}
	advance(cntxt, l);
	return "";
}

/*
 * It might be handy to clone a module.
 * It gets a copy of all functions known at the point of creation.
 */
static str parseModule(Client cntxt)
{
	str modnme = 0;
	int l;
	char *nxt;

	nxt = CURRENT(cntxt);
	if ((l = idLength(cntxt)) <= 0)
		return parseError(cntxt, "<module path> expected\n");
	modnme = putName(nxt, l);
	advance(cntxt, l);
	cntxt->nspace = fixModule(cntxt->nspace, modnme);
	skipSpace(cntxt);
	helpInfo(cntxt, &cntxt->nspace->help);
	return "";
}

/*
 * Include statement
 * An include statement is immediately taken into effect. This
 * calls for temporary switching the input for a particular client.
 * The administration for this is handled by malInclude.
 * No listing is produced, because module sources are assumed to
 * be debugged upfront already.
 * @multitable @columnfractions 0.15 0.8
 * @item includeStmt
 * @tab : @sc{include} identifier
 * @item
 * @tab | @sc{include} string_literal
 * @end multitable
 *
 * Include files should be handled in line with parsing. This way we
 * are ensured that any possible signature definition will be known
 * afterwards. The effect is that errors in the include sequence are
 * marked as warnings.
 */
static str
parseInclude(Client cntxt)
{
	str modnme = 0, s;
	int x;
	char *nxt;

	if (!MALkeyword(cntxt, "include", 7))
		return 0;
	nxt = CURRENT(cntxt);

	if ((x = idLength(cntxt)) > 0) {
		modnme = putName(nxt, x);
		advance(cntxt, x);
	} else if ((x = stringLength(cntxt)) > 0) {
		modnme = putName(nxt + 1, x - 1);
		advance(cntxt, x);
	} else
		return parseError(cntxt, "<module name> expected\n");

	if (currChar(cntxt) != ';') {
		parseError(cntxt, "';' expected\n");
		skipToEnd(cntxt);
		return 0;
	}
	skipToEnd(cntxt);

	s = loadLibrary(modnme, FALSE);
	if (s) {
		parseError(cntxt, s);
		GDKfree(s);
		return 0;
	}
	if ((s = malInclude(cntxt, modnme, 0))) {
		parseError(cntxt, s);
		GDKfree(s);
		return 0;
	}
	return "";
}

/*
 * Definition
 * The definition statements share a lot in common, which calls for factoring
 * out the code in a few text macros. Upon encountering a definition, we
 * initialize a MAL instruction container. We should also check for
 * non-terminated definitions.
 *
 * @multitable @columnfractions 0.15 0.8
 * @item program
 * @tab : ( definition [helpinfo] | statement ) *
 *
 * @item definition
 * @tab : moduleStmt | includeStmt
 * @item
 * @tab  |  commandStmt | patternStmt
 * @item
 * @tab  | functionStmt | factoryStmt
 * @item
 * @tab  | includeStmt
 * @end multitable
 *
 * Beware, a function signature f(a1..an):(b1..bn) is parsed in such a way that
 * the symbol table and stackframe contains the sequence
 * f,a1..an,b1..bn. This slightly complicates the implementation
 * of the return statement.
 *
 * Note, the function name could be mod.fcn, which calls for storing
 * the function definition in a particular module instead of the current one.
 */
static MalBlkPtr
fcnHeader(Client cntxt, int kind)
{
	int l;
	malType tpe;
	str fnme, modnme = NULL;
	char ch;
	Symbol curPrg;
	MalBlkPtr curBlk = 0;
	InstrPtr curInstr;

	l = operatorLength(cntxt);
	if (l == 0)
		l = idLength(cntxt);
	if (l == 0) {
		parseError(cntxt, "<identifier> | <operator> expected\n");
		skipToEnd(cntxt);
		return 0;
	}

	fnme = putName(((char *) CURRENT(cntxt)), l);
	advance(cntxt, l);

	if (currChar(cntxt) == '.') {
		nextChar(cntxt); /* skip '.' */
		modnme = fnme;
		l = operatorLength(cntxt);
		if (l == 0)
			l = idLength(cntxt);
		if (l == 0){
			parseError(cntxt, "<identifier> | <operator> expected\n");
			skipToEnd(cntxt);
			return 0;
		}
		fnme = putName(((char *) CURRENT(cntxt)), l);
		advance(cntxt, l);
	}

	/* temporary suspend capturing statements in main block */
	if (cntxt->backup){
		parseError(cntxt, "mal_parser: unexpected recursion\n");
		skipToEnd(cntxt);
		return 0;
	}
	cntxt->backup = cntxt->curprg;
	cntxt->curprg = newFunction(putName("user", 4), fnme, kind);
	curPrg = cntxt->curprg;
	curBlk = curPrg->def;
	curBlk->flowfixed = 0;
	curBlk->typefixed = 0;
	curInstr = getInstrPtr(curBlk, 0);
	propList(cntxt, curInstr->argv[0]);

	if (currChar(cntxt) != '('){
		parseError(cntxt, "function header '(' expected\n");
		skipToEnd(cntxt);
		return curBlk;
	}
	advance(cntxt, 1);

	setModuleId(curInstr, modnme ? putName(modnme, strlen(modnme)) :
			putName(cntxt->nspace->name, strlen(cntxt->nspace->name)));

	if (isModuleDefined(cntxt->nspace, getModuleId(curInstr)) == FALSE) {
		if (cntxt->backup) {
			cntxt->curprg = cntxt->backup;
			cntxt->backup = 0;
		}
		parseError(cntxt, "<module> not defined\n");
		return curBlk;
	}

	/* get calling parameters */
	ch = currChar(cntxt);
	while (ch != ')' && ch && !NL(ch)) {
		curInstr = binding(cntxt, curBlk, curInstr, 1);
		/* the last argument may be variable length */
		if (MALkeyword(cntxt, "...", 3)) {
			curInstr->varargs |= VARARGS;
			setPolymorphic(curInstr, TYPE_any, TRUE);
			break;
		}
		if ((ch = currChar(cntxt)) != ',') {
			if (ch == ')')
				break;
			if (cntxt->backup) {
				cntxt->curprg = cntxt->backup;
				cntxt->backup = 0;
			}
			parseError(cntxt, "',' expected\n");
			skipToEnd(cntxt);
			return curBlk;
		} else
			nextChar(cntxt);  /* skip ',' */
		skipSpace(cntxt);
		ch = currChar(cntxt);
	}
	if (currChar(cntxt) != ')') {
		freeInstruction(curInstr);
		parseError(cntxt, "')' expected\n");
		skipToEnd(cntxt);
		return curBlk;
	}
	advance(cntxt, 1); /* skip ')' */
/*
   The return type is either a single type or multiple return type structure.
   We simply keep track of the number of arguments added and
   during the final phase reshuffle the return values to the beginning (?)
 */
	if (currChar(cntxt) == ':') {
		tpe = typeElm(cntxt, TYPE_void);
		setPolymorphic(curInstr, tpe, TRUE);
		setVarType(curBlk, curInstr->argv[0], tpe);
		/* we may be confronted by a variable target type list */
		if (MALkeyword(cntxt, "...", 3)) {
			curInstr->varargs |= VARRETS;
			setPolymorphic(curInstr, TYPE_any, TRUE);
		}

		propList(cntxt, curInstr->argv[0]);
	} else if (keyphrase1(cntxt, "(")) { /* deal with compound return */
		int retc = curInstr->argc, i1, i2 = 0;
		int max;
		short *newarg;
		/* parse multi-target result */
		/* skipSpace(cntxt);*/
		ch = currChar(cntxt);
		while (ch != ')' && ch && !NL(ch)) {
			curInstr = binding(cntxt, curBlk, curInstr, 0);
			/* we may be confronted by a variable target type list */
			if (MALkeyword(cntxt, "...", 3)) {
				curInstr->varargs |= VARRETS;
				setPolymorphic(curInstr, TYPE_any, TRUE);
			}
			if ((ch = currChar(cntxt)) != ',') {
				if (ch == ')')
					break;
				parseError(cntxt, "',' expected\n");
				skipToEnd(cntxt);
				return curBlk;
			} else {
				nextChar(cntxt); /* skip ',' */
			}
			skipSpace(cntxt);
			ch = currChar(cntxt);
		}
		/* re-arrange the parameters, results first*/
		max = curInstr->maxarg;
		newarg = (short *) GDKmalloc(max * sizeof(curInstr->argv[0]));
		if (newarg == NULL){
			parseError(cntxt, MAL_MALLOC_FAIL);
			skipToEnd(cntxt);
			return curBlk;
		}
		for (i1 = retc; i1 < curInstr->argc; i1++)
			newarg[i2++] = curInstr->argv[i1];
		curInstr->retc = curInstr->argc - retc;
		for (i1 = 1; i1 < retc; i1++)
			newarg[i2++] = curInstr->argv[i1];
		curInstr->argc = i2;
		for (; i2 < max; i2++)
			newarg[i2] = 0;
		for (i1 = 0; i1 < max; i1++)
			curInstr->argv[i1] = newarg[i1];
		GDKfree(newarg);
		if (currChar(cntxt) != ')') {
			freeInstruction(curInstr);
			if (cntxt->backup) {
				cntxt->curprg = cntxt->backup;
				cntxt->backup = 0;
			}
			parseError(cntxt, "')' expected\n");
			skipToEnd(cntxt);
			return curBlk;
		}
		nextChar(cntxt); /* skip ')' */
	} else { /* default */
		setVarType(curBlk, 0, TYPE_void);
	}
	if (curInstr != getInstrPtr(curBlk, 0)) {
		freeInstruction(getInstrPtr(curBlk, 0));
		getInstrPtr(curBlk, 0) = curInstr;
	}
	return curBlk;
}

/*
 * The common theme in definitions is to parse the argument list.
 * @multitable @columnfractions .15 .8
 * @item header
 * @tab :  hdrName '(' params ')' result
 * @item result
 * @tab :  paramType | '(' params ')'
 * @item params
 * @tab :  binding [',' binding]*
 * @item binding
 * @tab :  identifier typeName [propQualifier]
 * @end multitable
*/
/*
 * MAL variables are statically/dynamically typed.
 * Function and procedure arguments should always be typed.
 * We do not permit polymorphism at this interpretation level.
 *
 * The type information maintained simplifies analysis of
 * column results. If the underlying type is not known, then it
 * may be replaced once during execution of a MAL instruction
 * typically as a side-effect of calling a bat-returning function.
 *
 * We should also allow for variable argument lists. However, they
 * may only appear in patterns, because the calling context is necessary
 * to resolve the actual argument list. Furthermore, we can not
 * assume much about its type structure.
 * Variables are extended with a property list to enable
 * optimizers to make decisions. (See the section on properties).
*/
/*
 * @-
 */
/*
 * Each procedure definition opens a structure in which the
 * information is gathered. The enclosing module is statically
 * determined.
 *
 * A proc-header translates into a single MAL instruction.
 * Since no recursive rules are included, we can stick to
 * using a single global variable to accummulate the
 * properties.
 *
 * The external commands and rules come with a short
 * help information.
*/

static MalBlkPtr
parseCommandPattern(Client cntxt, int kind)
{
	MalBlkPtr curBlk = 0;
	Symbol curPrg = 0;
	InstrPtr curInstr = 0;
	str modnme = NULL;

	curBlk = fcnHeader(cntxt, kind);
	if (curBlk == NULL) 
		return curBlk;
	getInstrPtr(curBlk, 0)->token = kind;
	curPrg = cntxt->curprg;
	curPrg->kind = kind;
	curInstr = getInstrPtr(curBlk, 0);

	modnme = getModuleId(getInstrPtr(curBlk, 0));
	if (modnme && isModuleDefined(cntxt->nspace, modnme) == FALSE)
		return (MalBlkPtr) parseError(cntxt, "<module> not defined\n");
	modnme = modnme ? modnme : cntxt->nspace->name;

	if (isModuleDefined(cntxt->nspace, putName(modnme, strlen(modnme))))
		insertSymbol(findModule(cntxt->nspace, putName(modnme, strlen(modnme))), curPrg);
	else
		return (MalBlkPtr) parseError(cntxt, "<module> not found\n");
	trimMalBlk(curBlk);
	chkProgram(cntxt->fdout, cntxt->nspace, curBlk);
	if (cntxt->backup) {
		cntxt->curprg = cntxt->backup;
		cntxt->backup = 0;
	}
/*
 * Short-cut function calls
 * Most functions are (dynamically) linked with the kernel as
 * commands or pattern definitions.  This enables for fast execution.
 *
 * In addition we allow functions to be bound to both
 * a linked C-function and a MAL specification block.
 * It the function address is not available, the interpreter
 * will use the MAL block instead.
 * This scheme is intended for just-in-time compilation.
 *
 * [note, command and patterns do not have a MAL block]
 */
	if (MALkeyword(cntxt, "address", 7)) {
		str nme;
		int i;
		i = idLength(cntxt);
		if (i == 0) {
			parseError(cntxt, "<identifier> expected\n");
			return 0;
		}
		cntxt->blkmode = 0;
		nme = idCopy(cntxt, i);
		if (getModuleId(curInstr))
			setModuleId(curInstr, NULL);
		setModuleScope(curInstr,
				findModule(cntxt->nspace, putName(modnme, strlen(modnme))));
		curInstr->fcn = getAddress(cntxt->fdout, cntxt->srcFile, modnme, nme, TRUE);
		curBlk->binding = nme;
		if (cntxt->nspace->isAtomModule) {
			if (curInstr->fcn == NULL) {
				parseError(cntxt, "<address> not found\n");
				return 0;
			}
			malAtomProperty(curBlk, curInstr);
		}
		skipSpace(cntxt);
	} else {
		parseError(cntxt, "'address' expected\n");
		return 0;
	}
	helpInfo(cntxt, &curBlk->help);
	showErrors(cntxt);
	if (curBlk && cntxt->listing > 1)
		printFunction(cntxt->fdout, curBlk, 0, cntxt->listing);
	return curBlk;
}

static MalBlkPtr
parseFunction(Client cntxt, int kind)
{
	MalBlkPtr curBlk = 0;

	curBlk = fcnHeader(cntxt, kind);
	if (curBlk == NULL) 
		return curBlk;
	if (MALkeyword(cntxt, "address", 7)) {
		str nme;
		int i;
		InstrPtr curInstr = getInstrPtr(curBlk, 0);
		i = idLength(cntxt);
		if (i == 0) {
			parseError(cntxt, "<identifier> expected\n");
			return 0;
		}
		nme = idCopy(cntxt, i);
		curInstr->fcn = getAddress(cntxt->fdout, cntxt->srcFile, cntxt->nspace->name, nme, TRUE);
		GDKfree(nme);
		if (curInstr->fcn == NULL) {
			parseError(cntxt, "<address> not found\n");
			return 0;
		}
		skipSpace(cntxt);
	}
	/* block is terminated at the END statement */
	helpInfo(cntxt, &curBlk->help);
	return curBlk;
}

/*
 * Functions and  factories end with a labeled end-statement.
 * The routine below checks for misalignment of the closing statements.
 * Any instruction parsed after the function block is considered an error.
 */
static int
parseEnd(Client cntxt)
{
	MalBlkPtr curBlk = 0;
	Symbol curPrg = 0;
	int l, showit = 0;

	if (MALkeyword(cntxt, "end", 3)) {
		curPrg = cntxt->curprg;
		curBlk = curPrg->def;
		l = idLength(cntxt);
		if (l == 0)
			l = operatorLength(cntxt);

		if (strncmp(CURRENT(cntxt), getModuleId(getSignature(curPrg)), l) == 0) {
			advance(cntxt, l);
			skipSpace(cntxt);
			if (currChar(cntxt) == '.')
				nextChar(cntxt);
			skipSpace(cntxt);
			l = idLength(cntxt);
			if (l == 0)
				l = operatorLength(cntxt);
		}
		/* parse fcn */
		if ((l == (int) strlen(curPrg->name) &&
			 strncmp(CURRENT(cntxt), curPrg->name, l) == 0) || l == 0) {} else {
			parseError(cntxt, "non matching end label\n");
		}
		pushEndInstruction(curBlk);
		insertSymbol(cntxt->nspace, cntxt->curprg);
		trimMalBlk(cntxt->curprg->def);
		cntxt->blkmode = 0;
		curBlk->typefixed = 0;
		chkProgram(cntxt->fdout, cntxt->nspace, cntxt->curprg->def);
		if (cntxt->backup) {
			cntxt->curprg = cntxt->backup;
			cntxt->backup = 0;
		}
		showit = TRUE;
		skipToEnd(cntxt);
		if (showit && cntxt->listing > 1)
			printFunction(cntxt->fdout, curBlk, 0, cntxt->listing);
		showErrors(cntxt);
		return 1;
	}
	return 0;
}
/*
 * Most instructions are simple assignments, possibly
 * modified with a barrier/catch tag.
 * @multitable @columnfractions .15 .8
 * @item statement
 * @tab :  tag varlist [':=' expr ] propQualifier
 * @item tag
 * @tab :  @sc{ return} | @sc{ barrier} | @sc{ catch}
 * @item
 * @tab |  @sc{ leave} | @sc{ redo} |
 * @item expr
 * @tab :  fcncall
 * @item
 * @tab : [factor  operator] factor
 * @item varlist
 * @tab :  variable
 * @item
 * @tab |  @verb{'{' variable {',' variable}* ')' }
 * @item variable
 * @tab :  identifier propQualifier
 * @item factor
 * @tab :  constant | var
 * @end multitable
 *
 * The basic types are also predefined as a variable.
 * This makes it easier to communicate types to MAL patterns.
 */

#define GETvariable	\
	if ((varid = findVariableLength(curBlk, CURRENT(cntxt), l)) == -1) { \
		arg = idCopy(cntxt, l);	 \
		varid = newVariable(curBlk, arg, TYPE_any);	\
		assert(varid >=  0);\
	} else \
		advance(cntxt, l);

/* The parameter of parseArguments is the return value of the enclosing function. */
static int
parseArguments(Client cntxt, MalBlkPtr curBlk, InstrPtr *curInstr)
{
	while (currChar(cntxt) != ')') {
		switch (term(cntxt, curBlk, curInstr, 0)) {
		case 0:
			break;
		case 2: return 2;
		case 3: return 3;
		default:
			parseError(cntxt, "<factor> expected\n");
			pushInstruction(curBlk, *curInstr);
			skipToEnd(cntxt);
			return 1;
		}
		if (currChar(cntxt) == ',')
			advance(cntxt, 1);
		else if (currChar(cntxt) != ')') {
			parseError(cntxt, "',' expected\n");
			cntxt->yycur--; /* keep it */
			break;
		}
	}
	if (currChar(cntxt) == ')')
		advance(cntxt, 1);
	return 0;
}

static void
parseAssign(Client cntxt, int cntrl)
{
	InstrPtr curInstr;
	MalBlkPtr curBlk;
	Symbol curPrg;
	int i = 0, l, type = TYPE_any, varid = -1;
	str arg = 0;
	ValRecord cst;

	curPrg = cntxt->curprg;
	curBlk = curPrg->def;
	curInstr = newInstruction(curBlk, cntrl ? cntrl : ASSIGNsymbol);

	/* start the parsing by recognition of the lhs of an assignment */
	if (currChar(cntxt) == '(') {
		/* parsing multi-assignment */
		advance(cntxt, 1);
		curInstr->argc = 0; /*reset to handle pushArg correctly !! */
		curInstr->retc = 0;
		while (currChar(cntxt) != ')' && currChar(cntxt)) {
			l = idLength(cntxt);
			i = cstToken(cntxt, &cst);
			if (l == 0 || i) {
				parseError(cntxt, "<identifier> expected\n");
				skipToEnd(cntxt);
				freeInstruction(curInstr);
				return;
			}
			GETvariable;
			if (currChar(cntxt) == ':') {
				setVarUDFtype(curBlk, varid);
				type = typeElm(cntxt, getVarType(curBlk, varid));
				if (type < 0)
					goto part3;
				setPolymorphic(curInstr, type, FALSE);
				setVarType(curBlk, varid, type);
			}
			propList(cntxt, varid);
			curInstr = pushArgument(curBlk, curInstr, varid);
			curInstr->retc++;
			if (currChar(cntxt) == ')')
				break;
			if (currChar(cntxt) == ',')
				keyphrase1(cntxt, ",");
		}
		advance(cntxt, 1); /* skip ')' */
		if (curInstr->retc == 0) {
			/* add dummy variable */
			curInstr = pushArgument(curBlk, curInstr, newTmpVariable(curBlk, TYPE_any));
			curInstr->retc++;
		}
	} else {
		/* are we dealing with a simple assignment? */
		l = idLength(cntxt);
		i = cstToken(cntxt, &cst);
		if (l == 0 || i) {
			/* we haven't seen a target variable */
			/* flow of control statements may end here. */
			/* shouldn't allow for nameless controls todo*/
			if (i && cst.vtype == TYPE_str)
				GDKfree(cst.val.sval);
			if (cntrl == LEAVEsymbol || cntrl == REDOsymbol ||
				cntrl == RETURNsymbol || cntrl == EXITsymbol) {
				curInstr->argv[0] = getBarrierEnvelop(curBlk);
				pushInstruction(curBlk, curInstr);
				if (currChar(cntxt) != ';')
					parseError(cntxt, "<identifier> expected\n");
				skipToEnd(cntxt);
				return;
			}
			getArg(curInstr, 0) = newTmpVariable(curBlk, TYPE_any);
			pushInstruction(curBlk, curInstr);
			parseError(cntxt, "<identifier> expected\n");
			skipToEnd(cntxt);
			return;
		}
		/* Check if we are dealing with module.fcn call*/
		if (CURRENT(cntxt)[l] == '.' || CURRENT(cntxt)[l] == '(') {
			curInstr->argv[0] = newTmpVariable(curBlk, TYPE_any);
			goto FCNcallparse;
		}

		/* Get target variable details*/
		GETvariable;
		if (!(currChar(cntxt) == ':' && CURRENT(cntxt)[1] == '=')) {
			curInstr->argv[0] = varid;
			if (currChar(cntxt) == ':') {
				setVarUDFtype(curBlk, varid);
				type = typeElm(cntxt, getVarType(curBlk, varid));
				if (type < 0)
					goto part3;
				setPolymorphic(curInstr, type, FALSE);
				setVarType(curBlk, varid, type);
			}
		}
		propList(cntxt, varid);
		curInstr->argv[0] = varid;
	}
	/* look for assignment operator */
	if (!keyphrase2(cntxt, ":=")) {
		/* no assignment !! a control variable is allowed */
		/* for the case RETURN X, we normalize it to include the function arguments */
		if (cntrl == RETURNsymbol || cntrl == YIELDsymbol) {
			int e;
			InstrPtr sig = getInstrPtr(curBlk,0);
			curInstr->retc = 0;
			for (e = 0; e < sig->retc; e++) 
				curInstr = pushReturn(curBlk, curInstr, getArg(sig, e));
		}

		goto part3;
	}
	if (currChar(cntxt) == '(') {
		/* parse multi assignment */
		advance(cntxt, 1);
		switch (parseArguments(cntxt, curBlk, &curInstr)) {
		case 2: goto part2;
		default:
		case 3: goto part3;
		}
		/* unreachable */
	}
/*
 * We have so far the LHS part of an assignment. The remainder is
 * either a simple term expression, a multi assignent, or the start
 * of a function call.
 */
FCNcallparse:
	if ((l = idLength(cntxt)) && CURRENT(cntxt)[l] == '(') {
		/*  parseError(cntxt,"<module> expected\n");*/
		setModuleId(curInstr, getModuleId(getInstrPtr(curBlk, 0)));
		i = l;
		goto FCNcallparse2;
	} else if ((l = idLength(cntxt)) && CURRENT(cntxt)[l] == '.') {
		/* continue with parseing a function/operator call */
		arg = putName(CURRENT(cntxt), l);
		advance(cntxt, l + 1); /* skip '.' too */
		setModuleId(curInstr, arg);
		i = idLength(cntxt);
		if (i == 0)
			i = operatorLength(cntxt);
FCNcallparse2:
		if (i) {
			setFunctionId(curInstr, putName(((char *) CURRENT(cntxt)), i));
			advance(cntxt, i);
		} else {
			parseError(cntxt, "<functionname> expected\n");
			skipToEnd(cntxt);
			pushInstruction(curBlk, curInstr);
			return;
		}
		skipSpace(cntxt);
		if (currChar(cntxt) != '(') {
			parseError(cntxt, "'(' expected\n");
			skipToEnd(cntxt);
			pushInstruction(curBlk, curInstr);
			return;
		}
		advance(cntxt, 1);
		switch (parseArguments(cntxt, curBlk, &curInstr)) {
		case 2: goto part2;
		default:
		case 3: goto part3;
		}
		/* unreachable */
	}
	/* Handle the ordinary assignments and expressions */
	switch (term(cntxt, curBlk, &curInstr, 2)) {
	case 2: goto part2;
	case 3: goto part3;
	}
part2:  /* consume <operator><term> part of expression */
	if ((i = operatorLength(cntxt))) {
		/* simple arithmetic operator expression */
		setFunctionId(curInstr, putName(((char *) CURRENT(cntxt)), i));
		advance(cntxt, i);
		curInstr->modname = putName("calc", 4);
		if ((l = idLength(cntxt)) && !(l == 3 && strncmp(CURRENT(cntxt), "nil", 3) == 0)) {
			GETvariable;
			curInstr = pushArgument(curBlk, curInstr, varid);
			goto part3;
		}
		switch (term(cntxt, curBlk, &curInstr, 3)) {
		case 2: goto part2;
		case 3: goto part3;
		}
		parseError(cntxt, "<term> expected\n");
		skipToEnd(cntxt);
		pushInstruction(curBlk, curInstr);
		return;
	} else {
		skipSpace(cntxt);
		if (currChar(cntxt) == '(')
			parseError(cntxt, "module name missing\n");
		else if (currChar(cntxt) != ';' && currChar(cntxt) != '#') {
			parseError(cntxt, "operator expected\n");
			skipToEnd(cntxt);
		}
		pushInstruction(curBlk, curInstr);
		return;
	}
part3:
	skipSpace(cntxt);
	if (currChar(cntxt) != ';')
		parseError(cntxt, "';' expected\n");
	skipToEnd(cntxt);
	pushInstruction(curBlk, curInstr);
	if (cntrl == RETURNsymbol && !(curInstr->token == ASSIGNsymbol || getModuleId(curInstr) != 0))
		parseError(cntxt, "return assignment expected\n");
}

/*
 * A piggybacked way to ship tuples is to mimick
 * a copycommand through the language interface.
 * All tuples are stored in a temporary file
 * whose name can be picked up immediately afterwards.
 * The code should be made thread safe, by storing
 * the file name in the client record.
 */
static void
parseTuple(Client cntxt)
{
	InstrPtr curInstr;
	MalBlkPtr curBlk;
	Symbol curPrg;
	FILE *f = 0;
	char buf[MAXPATHLEN];
	int c;

	sprintf(buf, "input%d", (int) (cntxt - mal_clients));
	f = fopen(buf, "w");
	if (f == NULL)
		showException(cntxt->fdout, SYNTAX, "parser", "temp file not writeable");
	while ((c = currChar(cntxt)) == '[' && c) {
		do {
			if (f && c)
				fputc(c, f);
			nextChar(cntxt);
		} while ((c = currChar(cntxt)) != '\n' && c);
		if (f && c && fputc(c, f) == EOF) {
			(void) fclose(f);
			if (unlink(buf) < 0)
				showException(cntxt->fdout, SYNTAX, "parser", "out of temp file space and unable to unlink");
			showException(cntxt->fdout, SYNTAX, "parser", "out of temp file space");
			return;
		}
		nextChar(cntxt);
	}
	curPrg = cntxt->curprg;
	curBlk = curPrg->def;
	curInstr = newAssignment(curBlk);
	setModuleId(curInstr, putName("io", 2));
	setFunctionId(curInstr, putName("data", 4));
	c = newVariable(curBlk, GDKstrdup("tuples"), TYPE_any);
	getArg(curInstr, 0) = c;
	pushStr(curBlk, curInstr, buf);
	if (f != NULL)
		(void) fclose(f);
}

#define BRKONERR if (curPrg->def->errors >= MAXERRORS) \
		return curPrg->def->errors;
int
parseMAL(Client cntxt, Symbol curPrg)
{
	int cntrl = 0;
	/*Symbol curPrg= cntxt->curprg;*/
	char c;

	echoInput(cntxt);
	/* here the work takes place */
	while ((c = currChar(cntxt))) {
		switch (c) {
		case '\n': case '\r': case '\f':
			nextChar(cntxt);
			echoInput(cntxt);
			continue;
		case ';': case '\t': case ' ':
			nextChar(cntxt);
			continue;
		case '[':
			/* parse a tuple value. stops at end of line */
			parseTuple(cntxt);
			break;
		case '#':
		{ /* keep the full line comments unless it is a MX #line */
			char start[256], *e = start, c;
			MalBlkPtr curBlk = cntxt->curprg->def;
			InstrPtr curInstr;

			*e = 0;
			nextChar(cntxt);
			while ((c = currChar(cntxt))) {
				if (e < start + 256 - 1)
					*e++ = c;
				nextChar(cntxt);
				if (c == '\n' || c == '\r') {
					*e = 0;
					if (e > start)
						e--;
					/* prevChar(cntxt);*/
					break;
				}
			}
			if (e > start)
				*e = 0;
			if (e > start && curBlk->stop > 0 &&
				strncmp("line ", start, 5) != 0) {
				ValRecord cst;
/*
 * Comment lines produced by Mx, i.e. #line directives are not saved.
 * The deadcode optimizer removes all comment information.
 */
				curInstr = newInstruction(curBlk, REMsymbol);
				cst.vtype = TYPE_str;
				cst.len = (int) strlen(start);
				cst.val.sval = GDKstrdup(start);
				getArg(curInstr, 0) = defConstant(curBlk, TYPE_str, &cst);
				clrVarConstant(curBlk, getArg(curInstr, 0));
				setVarDisabled(curBlk, getArg(curInstr, 0));
				pushInstruction(curBlk, curInstr);
			}
			echoInput(cntxt);
		}
			continue;
		case 'A': case 'a':
			if (MALkeyword(cntxt, "atom", 4) &&
				parseAtom(cntxt) != 0)
				break;
			goto allLeft;
		case 'b': case 'B':
			if (MALkeyword(cntxt, "barrier", 7)) {
				cntxt->blkmode++;
				cntrl = BARRIERsymbol;
			}
			goto allLeft;
		case 'C': case 'c':
			if (MALkeyword(cntxt, "command", 7)) {
				parseCommandPattern(cntxt, COMMANDsymbol);
				continue;
			}
			if (MALkeyword(cntxt, "catch", 5)) {
				cntxt->blkmode++;
				cntrl = CATCHsymbol;
				goto allLeft;
			}
			goto allLeft;
		case 'E': case 'e':
			if (MALkeyword(cntxt, "exit", 4)) {
				if (cntxt->blkmode > 0)
					cntxt->blkmode--;
				cntrl = EXITsymbol;
			} else if (parseEnd(cntxt)) {
				break;
			}
			goto allLeft;
		case 'F': case 'f':
			if (MALkeyword(cntxt, "function", 8)) {
				cntxt->blkmode++;
				if (parseFunction(cntxt, FUNCTIONsymbol))
					break;
			} else if (MALkeyword(cntxt, "factory", 7)) {
				cntxt->blkmode++;
				parseFunction(cntxt, FACTORYsymbol);
				break;
			}
			goto allLeft;
		case 'H': case 'h':
			if (MALkeyword(cntxt, "handler", 5)) {
				skipToEnd(cntxt);
				cntxt->blkmode++;
				break;
			}
		case 'i': if (parseInclude(cntxt))
				continue;
			goto allLeft;
		case 'L': case 'l':
			if (MALkeyword(cntxt, "library", 7)) {
				parseLibrary(cntxt);
				continue;
			}
			if (MALkeyword(cntxt, "leave", 5))
				cntrl = LEAVEsymbol;
			goto allLeft;
		case 'M': case 'm':
			if (MALkeyword(cntxt, "module", 6) &&
				parseModule(cntxt) != 0)
				break;
			goto allLeft;
		case 'P': case 'p':
			if (MALkeyword(cntxt, "pattern", 7)) {
				parseCommandPatter…
Large files files are truncated, but you can click here to view the full file