PageRenderTime 61ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/monetdb5/mal/mal_parser.c

https://bitbucket.org/msaecker/monetdb-opencl
C | 2086 lines | 1975 code | 38 blank | 73 comment | 58 complexity | 2039f3529966f3ab149445149f57a7de MD5 | raw file
Possible License(s): MPL-2.0-no-copyleft-exception, LGPL-2.0

Large files files are truncated, but you can click here to view the full file

  1. /*
  2. * The contents of this file are subject to the MonetDB Public License
  3. * Version 1.1 (the "License"); you may not use this file except in
  4. * compliance with the License. You may obtain a copy of the License at
  5. * http://www.monetdb.org/Legal/MonetDBLicense
  6. *
  7. * Software distributed under the License is distributed on an "AS IS"
  8. * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
  9. * License for the specific language governing rights and limitations
  10. * under the License.
  11. *
  12. * The Original Code is the MonetDB Database System.
  13. *
  14. * The Initial Developer of the Original Code is CWI.
  15. * Portions created by CWI are Copyright (C) 1997-July 2008 CWI.
  16. * Copyright August 2008-2014 MonetDB B.V.
  17. * All Rights Reserved.
  18. */
  19. /* (c): M. L. Kersten
  20. */
  21. #include "monetdb_config.h"
  22. #include "mal_parser.h"
  23. #include "mal_resolve.h"
  24. #include "mal_linker.h"
  25. #include "mal_atom.h" /* for malAtomDefinition(), malAtomProperty() */
  26. #include "mal_interpreter.h" /* for showErrors() */
  27. #include "mal_instruction.h" /* for pushEndInstruction(), findVariableLength() */
  28. #include "mal_namespace.h"
  29. #include "mal_utils.h"
  30. #include "mal_builder.h"
  31. #include "mal_type.h"
  32. #include "mal_private.h"
  33. #define FATALINPUT MAXERRORS+1
  34. #define NL(X) ((X)=='\n' || (X)=='\r')
  35. static str idCopy(Client cntxt, int len);
  36. static str strCopy(Client cntxt, int len);
  37. /* Before a line is parsed we check for a request to echo it.
  38. * This command should be executed at the beginning of a parse
  39. * request and each time we encounter EOL.
  40. */
  41. void echoInput(Client cntxt)
  42. {
  43. if (cntxt->listing == 1) {
  44. char *c = CURRENT(cntxt);
  45. mnstr_printf(cntxt->fdout,"#");
  46. while (*c && !NL(*c)) {
  47. mnstr_printf(cntxt->fdout, "%c", *c++);
  48. }
  49. mnstr_printf(cntxt->fdout, "\n");
  50. }
  51. }
  52. static inline void
  53. skipSpace(Client cntxt)
  54. {
  55. char *s= &currChar(cntxt);
  56. for (;;) {
  57. switch (*s++) {
  58. case ' ':
  59. case '\t':
  60. case '\n':
  61. case '\r':
  62. nextChar(cntxt);
  63. break;
  64. default:
  65. return;
  66. }
  67. }
  68. }
  69. static inline void
  70. advance(Client cntxt, int length)
  71. {
  72. cntxt->yycur += length;
  73. skipSpace(cntxt);
  74. }
  75. /*
  76. * The most recurring situation is to recognize identifiers.
  77. * This process is split into a few steps to simplify subsequent
  78. * construction and comparison.
  79. * IdLength searches the end of an identifier without changing
  80. * the cursor into the input pool.
  81. * IdCopy subsequently prepares a GDK string for inclusion in the
  82. * instruction datastructures.
  83. */
  84. short opCharacter[256];
  85. short idCharacter[256];
  86. short idCharacter2[256];
  87. void
  88. initParser(void)
  89. {
  90. int i;
  91. for (i = 0; i < 256; i++) {
  92. idCharacter2[i] = isalpha(i) || isdigit(i);
  93. idCharacter[i] = isalpha(i);
  94. }
  95. for (i = 0; i < 256; i++)
  96. switch (i) {
  97. case '-': case '!': case '\\': case '$': case '%':
  98. case '^': case '*': case '~': case '+': case '&':
  99. case '|': case '<': case '>': case '=': case '/':
  100. case ':':
  101. opCharacter[i] = 1;
  102. }
  103. idCharacter[TMPMARKER] = 1;
  104. idCharacter2[TMPMARKER] = 1;
  105. idCharacter2['@'] = 1;
  106. }
  107. #undef isdigit
  108. #define isdigit(X) ((X) >= '0' && (X) <= '9')
  109. int
  110. idLength(Client cntxt)
  111. {
  112. str s, t;
  113. skipSpace(cntxt);
  114. s = CURRENT(cntxt);
  115. t = s;
  116. if (!idCharacter[(int) (*s)])
  117. return 0;
  118. /* avoid a clash with old temporaries */
  119. if (s[0] == TMPMARKER)
  120. s[0] = REFMARKER;
  121. /* prepare escape of temporary names */
  122. s++;
  123. while (idCharacter2[(int) (*s)])
  124. s++;
  125. return (int) (s - t);
  126. }
  127. /* Simple type identifiers can not be marked with a type variable. */
  128. static int
  129. typeidLength(Client cntxt)
  130. {
  131. int l;
  132. str s;
  133. skipSpace(cntxt);
  134. s = CURRENT(cntxt);
  135. if (!idCharacter[(int) (*s)])
  136. return 0;
  137. l = 1;
  138. s++;
  139. idCharacter[TMPMARKER] = 0;
  140. while (idCharacter[(int) (*s)] || isdigit(*s)) {
  141. s++;
  142. l++;
  143. }
  144. idCharacter[TMPMARKER] = 1;
  145. return l;
  146. }
  147. static str
  148. idCopy(Client cntxt, int length)
  149. {
  150. str s = GDKmalloc(length + 1);
  151. if (s == NULL)
  152. return NULL;
  153. memcpy(s, CURRENT(cntxt), (size_t) length);
  154. s[length] = 0;
  155. /* avoid a clash with old temporaries */
  156. if (s[0] == TMPMARKER)
  157. s[0] = REFMARKER;
  158. advance(cntxt, length);
  159. return s;
  160. }
  161. int
  162. MALkeyword(Client cntxt, str kw, int length)
  163. {
  164. skipSpace(cntxt);
  165. if (MALlookahead(cntxt, kw, length)) {
  166. advance(cntxt, length);
  167. return 1;
  168. }
  169. return 0;
  170. }
  171. int
  172. MALlookahead(Client cntxt, str kw, int length)
  173. {
  174. int i;
  175. skipSpace(cntxt);
  176. /* avoid double test or use lowercase only. */
  177. if (currChar(cntxt) == *kw &&
  178. strncmp(CURRENT(cntxt), kw, length) == 0 &&
  179. !idCharacter[(int) (CURRENT(cntxt)[length])] &&
  180. !isdigit((int) (CURRENT(cntxt)[length]))) {
  181. return 1;
  182. }
  183. /* check for captialized versions */
  184. for (i = 0; i < length; i++)
  185. if (tolower(CURRENT(cntxt)[i]) != kw[i])
  186. return 0;
  187. if (!idCharacter[(int) (CURRENT(cntxt)[length])] &&
  188. !isdigit((int) (CURRENT(cntxt)[length]))) {
  189. return 1;
  190. }
  191. return 0;
  192. }
  193. /*
  194. * Keyphrase testing is limited to a few characters only
  195. * (check manually). To speed this up we use a pipelined and inline macros.
  196. */
  197. static inline int
  198. keyphrase1(Client cntxt, str kw)
  199. {
  200. skipSpace(cntxt);
  201. if (currChar(cntxt) == *kw) {
  202. advance(cntxt, 1);
  203. return 1;
  204. }
  205. return 0;
  206. }
  207. static inline int
  208. keyphrase2(Client cntxt, str kw)
  209. {
  210. skipSpace(cntxt);
  211. if (CURRENT(cntxt)[0] == kw[0] && CURRENT(cntxt)[1] == kw[1]) {
  212. advance(cntxt, 2);
  213. return 1;
  214. }
  215. return 0;
  216. }
  217. /*
  218. * A similar approach is used for string literals.
  219. * Beware, string lengths returned include the
  220. * brackets and escapes. They are eaten away in strCopy.
  221. * We should provide the C-method to split strings and
  222. * concatenate them upon retrieval[todo]
  223. */
  224. int
  225. stringLength(Client cntxt)
  226. {
  227. int l = 0;
  228. int quote = 0;
  229. str s;
  230. skipSpace(cntxt);
  231. s = CURRENT(cntxt);
  232. if (*s != '"')
  233. return 0;
  234. s++;
  235. while (*s) {
  236. if (quote) {
  237. l++;
  238. s++;
  239. quote = 0;
  240. } else {
  241. if (*s == '"')
  242. break;
  243. quote = *s == '\\';
  244. l++;
  245. s++;
  246. }
  247. }
  248. return l + 2;
  249. }
  250. /*Beware, the idcmp routine uses a short cast to compare multiple bytes
  251. * at once. This may cause problems when the net string length is zero.
  252. */
  253. str
  254. strCopy(Client cntxt, int length)
  255. {
  256. str s;
  257. int i;
  258. i = length < 4 ? 4 : length;
  259. s = GDKzalloc(i);
  260. if (s == 0)
  261. return NULL;
  262. memcpy(s, CURRENT(cntxt) + 1, (size_t) (length - 2));
  263. mal_unquote(s);
  264. return s;
  265. }
  266. /*
  267. * And a similar approach is used for operator names.
  268. * A lookup table is considered, because it generally is
  269. * faster then a non-dense switch.
  270. */
  271. int
  272. operatorLength(Client cntxt)
  273. {
  274. int l = 0;
  275. str s;
  276. skipSpace(cntxt);
  277. for (s = CURRENT(cntxt); *s; s++) {
  278. if (opCharacter[(int) (*s)])
  279. l++;
  280. else
  281. return l;
  282. }
  283. return l;
  284. }
  285. str
  286. operatorCopy(Client cntxt, int length)
  287. {
  288. return idCopy(cntxt,length);
  289. }
  290. /*
  291. * For error reporting we may have to find the start of the previous line,
  292. * which, ofcourse, is easy given the client buffer.
  293. * The remaining functions are self-explanatory.
  294. */
  295. str
  296. lastline(Client cntxt)
  297. {
  298. str s = CURRENT(cntxt);
  299. if (NL(*s))
  300. s++;
  301. while (s && s > cntxt->fdin->buf && !NL(*s))
  302. s--;
  303. if (NL(*s))
  304. s++;
  305. return s;
  306. }
  307. ssize_t
  308. position(Client cntxt)
  309. {
  310. str s = lastline(cntxt);
  311. return (ssize_t) (CURRENT(cntxt) - s);
  312. }
  313. /*
  314. * Upon encountering an error we skip to the nearest semicolon,
  315. * or comment terminated by a new line
  316. */
  317. static inline void
  318. skipToEnd(Client cntxt)
  319. {
  320. char c;
  321. while ((c = *CURRENT(cntxt)) != ';' && c && c != '\n')
  322. nextChar(cntxt);
  323. if (c && c != '\n')
  324. nextChar(cntxt);
  325. }
  326. /*
  327. * The lexical analyser for constants is a little more complex.
  328. * Aside from getting its length, we need an indication of its type.
  329. * The constant structure is initialized for later use.
  330. */
  331. int
  332. cstToken(Client cntxt, ValPtr cst)
  333. {
  334. int i = 0;
  335. int hex = 0;
  336. str s = CURRENT(cntxt);
  337. cst->vtype = TYPE_int;
  338. cst->val.lval = 0;
  339. switch (*s) {
  340. case '{': case '[':
  341. /* JSON Literal */
  342. break;
  343. case '"':
  344. cst->vtype = TYPE_str;
  345. i = stringLength(cntxt);
  346. cst->val.sval = strCopy(cntxt, i);
  347. if (cst->val.sval)
  348. cst->len = (int) strlen(cst->val.sval);
  349. else
  350. cst->len = 0;
  351. return i;
  352. case '-':
  353. i++;
  354. s++;
  355. case '0':
  356. if ((s[1] == 'x' || s[1] == 'X')) {
  357. /* deal with hex */
  358. hex = TRUE;
  359. i += 2;
  360. s += 2;
  361. }
  362. case '1': case '2': case '3': case '4': case '5':
  363. case '6': case '7': case '8': case '9':
  364. if (hex)
  365. while (isdigit((int) *s) || isalpha((int) *s)) {
  366. if (!((tolower(*s) >= 'a' && tolower(*s) <= 'f')
  367. || isdigit((int) *s)))
  368. break;
  369. i++;
  370. s++;
  371. }
  372. else
  373. while (isdigit((int) *s)) {
  374. i++;
  375. s++;
  376. }
  377. if (hex)
  378. goto handleInts;
  379. case '.':
  380. if (*s == '.' && isdigit(*(s + 1))) {
  381. i++;
  382. s++;
  383. while (isdigit(*s)) {
  384. i++;
  385. s++;
  386. }
  387. cst->vtype = TYPE_flt;
  388. }
  389. if (*s == 'e' || *s == 'E') {
  390. i++;
  391. s++;
  392. if (*s == '-' || *s == '+') {
  393. i++;
  394. s++;
  395. }
  396. cst->vtype = TYPE_dbl;
  397. while (isdigit(*s)) {
  398. i++;
  399. s++;
  400. }
  401. }
  402. if (cst->vtype == TYPE_flt) {
  403. int len = i;
  404. float *pval = 0;
  405. fltFromStr(CURRENT(cntxt), &len, &pval);
  406. if (pval) {
  407. cst->val.fval = *pval;
  408. GDKfree(pval);
  409. } else
  410. cst->val.fval = 0;
  411. }
  412. if (cst->vtype == TYPE_dbl) {
  413. int len = i;
  414. double *pval = 0;
  415. dblFromStr(CURRENT(cntxt), &len, &pval);
  416. if (pval) {
  417. cst->val.dval = *pval;
  418. GDKfree(pval);
  419. } else
  420. cst->val.dval = 0;
  421. if (cst->val.dval > FLT_MIN && cst->val.dval <= FLT_MAX) {
  422. cst->vtype = TYPE_flt;
  423. cst->val.fval = (flt) cst->val.dval;
  424. }
  425. }
  426. if (*s == '@') {
  427. int len = (int) sizeof(lng);
  428. lng l, *pval = &l;
  429. lngFromStr(CURRENT(cntxt), &len, &pval);
  430. if (l == lng_nil || l < 0
  431. #if SIZEOF_OID < SIZEOF_LNG
  432. || l > GDK_oid_max
  433. #endif
  434. )
  435. cst->val.oval = oid_nil;
  436. else
  437. cst->val.oval = (oid) l;
  438. cst->vtype = TYPE_oid;
  439. i++;
  440. s++;
  441. while (isdigit(*s)) {
  442. i++;
  443. s++;
  444. }
  445. return i;
  446. }
  447. if (*s == 'L') {
  448. if (cst->vtype == TYPE_int)
  449. cst->vtype = TYPE_lng;
  450. if (cst->vtype == TYPE_flt)
  451. cst->vtype = TYPE_dbl;
  452. i++;
  453. s++;
  454. if (*s == 'L') {
  455. i++;
  456. s++;
  457. }
  458. if (cst->vtype == TYPE_dbl) {
  459. int len = i;
  460. double *pval = 0;
  461. dblFromStr(CURRENT(cntxt), &len, &pval);
  462. if (pval) {
  463. cst->val.dval = *pval;
  464. GDKfree(pval);
  465. } else
  466. cst->val.dval = 0;
  467. } else {
  468. int len = i;
  469. lng *pval = 0;
  470. lngFromStr(CURRENT(cntxt), &len, &pval);
  471. if (pval) {
  472. cst->val.lval = *pval;
  473. GDKfree(pval);
  474. } else
  475. cst->val.lval = 0;
  476. }
  477. return i;
  478. }
  479. #ifdef HAVE_HGE
  480. if (*s == 'H' && cst->vtype == TYPE_int) {
  481. int len = i;
  482. hge *pval = 0;
  483. cst->vtype = TYPE_hge;
  484. i++;
  485. s++;
  486. if (*s == 'H') {
  487. i++;
  488. s++;
  489. }
  490. hgeFromStr(CURRENT(cntxt), &len, &pval);
  491. if (pval) {
  492. cst->val.hval = *pval;
  493. GDKfree(pval);
  494. } else
  495. cst->val.hval = 0;
  496. return i;
  497. }
  498. #endif
  499. handleInts:
  500. assert(cst->vtype != TYPE_lng);
  501. #ifdef HAVE_HGE
  502. assert(cst->vtype != TYPE_hge);
  503. #endif
  504. if (cst->vtype == TYPE_int) {
  505. #ifdef HAVE_HGE
  506. int len = (int) sizeof(hge);
  507. hge l, *pval = &l;
  508. if (hgeFromStr(CURRENT(cntxt), &len, &pval) <= 0 || l == hge_nil)
  509. l = hge_nil;
  510. if ((hge) GDK_int_min < l && l <= (hge) GDK_int_max) {
  511. cst->vtype = TYPE_int;
  512. cst->val.ival = (int) l;
  513. } else
  514. if ((hge) GDK_lng_min < l && l <= (hge) GDK_lng_max) {
  515. cst->vtype = TYPE_lng;
  516. cst->val.lval = (lng) l;
  517. } else {
  518. cst->vtype = TYPE_hge;
  519. cst->val.hval = l;
  520. if (l == hge_nil)
  521. showException(cntxt->fdout, SYNTAX, "convertConstant", "integer parse error");
  522. }
  523. #else
  524. int len = (int) sizeof(lng);
  525. lng l, *pval = &l;
  526. if (lngFromStr(CURRENT(cntxt), &len, &pval) <= 0 || l == lng_nil)
  527. l = lng_nil;
  528. if ((lng) GDK_int_min < l && l <= (lng) GDK_int_max) {
  529. cst->vtype = TYPE_int;
  530. cst->val.ival = (int) l;
  531. } else {
  532. cst->vtype = TYPE_lng;
  533. cst->val.lval = l;
  534. if (l == lng_nil)
  535. showException(cntxt->fdout, SYNTAX, "convertConstant", "integer parse error");
  536. }
  537. #endif
  538. }
  539. return i;
  540. case 'f':
  541. if (strncmp(s, "false", 5) == 0 && !isalnum((int) *(s + 5)) &&
  542. *(s + 5) != '_') {
  543. cst->vtype = TYPE_bit;
  544. cst->val.btval = 0;
  545. cst->len = 1;
  546. return 5;
  547. }
  548. return 0;
  549. case 't':
  550. if (strncmp(s, "true", 4) == 0 && !isalnum((int) *(s + 4)) &&
  551. *(s + 4) != '_') {
  552. cst->vtype = TYPE_bit;
  553. cst->val.btval = 1;
  554. cst->len = 1;
  555. return 4;
  556. }
  557. return 0;
  558. case 'n':
  559. if (strncmp(s, "nil", 3) == 0 && !isalnum((int) *(s + 3)) &&
  560. *(s + 3) != '_') {
  561. cst->vtype = TYPE_void;
  562. cst->len = 0;
  563. cst->val.oval = oid_nil;
  564. return 3;
  565. }
  566. }
  567. return 0;
  568. }
  569. #define cstCopy(C,I) idCopy(C,I)
  570. /* Type qualifier
  571. * Types are recognized as identifiers preceded by a colon.
  572. * They may be extended with a property list
  573. * and 'any' types can be marked with an alias.
  574. * The type qualifier parser returns the encoded type
  575. * as a short 32-bit integer.
  576. * The syntax structure is
  577. *
  578. * @multitable @columnfractions 0.15 0.8
  579. * @item typeQualifier
  580. * @tab : typeName propQualifier
  581. * @item typeName
  582. * @tab : scalarType | collectionType | anyType
  583. * @item scalarType
  584. * @tab : ':' @sc{ identifier}
  585. * @item collectionType
  586. * @tab : ':' @sc{ bat} ['[' col ',' col ']']
  587. * @item anyType
  588. * @tab : ':' @sc{ any} [typeAlias]
  589. * @item col
  590. * @tab : scalarType | anyType
  591. * @item propQualifier
  592. * @tab : ['@{' property '@}']
  593. * @end multitable
  594. *
  595. * The type ANY matches any type specifier.
  596. * Appending it with an alias turns it into a type variable.
  597. * The type alias is \$DIGIT (1-9) and can be used to relate types
  598. * by type equality.
  599. * The type variable are defined within the context of a function
  600. * scope.
  601. * Additional information, such as a repetition factor,
  602. * encoding tables, or type dependency should be modelled as properties.
  603. */
  604. static int
  605. typeAlias(Client cntxt, int tpe)
  606. {
  607. int t;
  608. if (tpe != TYPE_any)
  609. return -1;
  610. if (currChar(cntxt) == TMPMARKER) {
  611. nextChar(cntxt);
  612. t = currChar(cntxt) - '0';
  613. if (t <= 0 || t > 9)
  614. parseError(cntxt, "[1-9] expected\n");
  615. else
  616. nextChar(cntxt);
  617. return t;
  618. }
  619. return -1;
  620. }
  621. /*
  622. * The simple type analysis currently assumes a proper type identifier.
  623. * We should change getMALtype to return a failure instead.
  624. */
  625. static int
  626. simpleTypeId(Client cntxt)
  627. {
  628. int l, tpe;
  629. nextChar(cntxt);
  630. l = typeidLength(cntxt);
  631. if (l == 0) {
  632. parseError(cntxt, "Type identifier expected\n");
  633. cntxt->yycur--; /* keep it */
  634. return -1;
  635. }
  636. tpe = getTypeIndex(CURRENT(cntxt), l, -1);
  637. if (tpe < 0) {
  638. parseError(cntxt, "Type identifier expected\n");
  639. cntxt->yycur -= l; /* keep it */
  640. return TYPE_void;
  641. }
  642. advance(cntxt, l);
  643. return tpe;
  644. }
  645. static int
  646. parseTypeId(Client cntxt, int defaultType)
  647. {
  648. int i = TYPE_any, ht, tt, kh = 0, kt = 0;
  649. char *s = CURRENT(cntxt);
  650. if (strncmp(s, ":bat[", 5) == 0) {
  651. /* parse :bat[:type,:type] */
  652. advance(cntxt, 5);
  653. if (currChar(cntxt) == ':') {
  654. ht = simpleTypeId(cntxt);
  655. kh = typeAlias(cntxt, ht);
  656. } else
  657. ht = TYPE_any;
  658. if (currChar(cntxt) != ',') {
  659. parseError(cntxt, "',' expected\n");
  660. return i;
  661. }
  662. nextChar(cntxt); /* skip , */
  663. skipSpace(cntxt);
  664. if (currChar(cntxt) == ':') {
  665. tt = simpleTypeId(cntxt);
  666. kt = typeAlias(cntxt, tt);
  667. } else
  668. tt = TYPE_any;
  669. i = newBatType(ht, tt);
  670. if (kh > 0)
  671. setAnyHeadIndex(i, kh);
  672. if (kt > 0)
  673. setAnyColumnIndex(i, kt);
  674. if (currChar(cntxt) != ']')
  675. parseError(cntxt, "']' expected\n");
  676. nextChar(cntxt); /* skip ']' */
  677. skipSpace(cntxt);
  678. return i;
  679. }
  680. if ((strncmp(s, ":bat", 4) == 0 ||
  681. strncmp(s, ":BAT", 4) == 0) && !idCharacter[(int) s[4]]) {
  682. advance(cntxt, 4);
  683. return TYPE_bat;
  684. }
  685. if (strncmp(s, ":col", 4) == 0 && !idCharacter[(int) s[4]]) {
  686. /* parse default for :col[:any] */
  687. advance(cntxt, 4);
  688. return newColumnType(TYPE_any);
  689. }
  690. if (currChar(cntxt) == ':') {
  691. ht = simpleTypeId(cntxt);
  692. kt = typeAlias(cntxt, ht);
  693. if (kt > 0)
  694. setAnyColumnIndex(ht, kt);
  695. return ht;
  696. }
  697. parseError(cntxt, "<type identifier> expected\n");
  698. return defaultType;
  699. }
  700. static inline int
  701. typeElm(Client cntxt, int def)
  702. {
  703. if (currChar(cntxt) != ':')
  704. return def; /* no type qualifier */
  705. return parseTypeId(cntxt, def);
  706. }
  707. /*
  708. * The Parser
  709. * The client is responsible to collect the
  710. * input for parsing in a single string before calling the parser.
  711. * Once the input is available parsing runs in a critial section for
  712. * a single client thread.
  713. *
  714. * The parser uses the rigid structure of the language to speedup
  715. * analysis. In particular, each input line is translated into
  716. * a MAL instruction record as quickly as possible. Its context is
  717. * manipulated during the parsing process, by keeping the curPrg,
  718. * curBlk, and curInstr variables.
  719. *
  720. * The language statements of the parser are gradually introduced, with
  721. * the overall integration framework last.
  722. * The convention is to return a zero when an error has been
  723. * reported or when the structure can not be recognized.
  724. * Furthermore, we assume that blancs have been skipped before entering
  725. * recognition of a new token.
  726. *
  727. * Module statement.
  728. * The module and import commands have immediate effect.
  729. * The module statement switches the location for symbol table update
  730. * to a specific named area. The effect is that all definitions may become
  731. * globally known (?) and symbol table should be temporarilly locked
  732. * for updates by concurrent users.
  733. *
  734. * @multitable @columnfractions 0.15 0.8
  735. * @item moduleStmt
  736. * @tab : @sc{atom} ident [':'ident]
  737. * @item
  738. * @tab | @sc{module} ident
  739. * @end multitable
  740. *
  741. * An atom statement does not introduce a new module.
  742. */
  743. static void
  744. helpInfo(Client cntxt, str *help)
  745. {
  746. int l;
  747. if (MALkeyword(cntxt, "comment", 7)) {
  748. skipSpace(cntxt);
  749. if ((l = stringLength(cntxt))) {
  750. *help = strCopy(cntxt, l);
  751. if (*help)
  752. advance(cntxt, l - 1);
  753. } else {
  754. parseError(cntxt, "<string> expected\n");
  755. }
  756. } else if (currChar(cntxt) != ';')
  757. parseError(cntxt, "';' expected\n");
  758. skipToEnd(cntxt);
  759. }
  760. static void
  761. propList(Client cntxt, int arg)
  762. {
  763. MalBlkPtr curBlk = cntxt->curprg->def;
  764. int l;
  765. malType tpe;
  766. if (keyphrase1(cntxt, "{")) {
  767. do {
  768. str pname, opname;
  769. int i, lo;
  770. ValRecord cst;
  771. l = idLength(cntxt);
  772. if (l == 0)
  773. break;
  774. pname = idCopy(cntxt, l);
  775. /* localize value , simplified version */
  776. lo = operatorLength(cntxt);
  777. if (lo > 0)
  778. opname = operatorCopy(cntxt, lo);
  779. else
  780. opname = GDKstrdup("");
  781. if ((i = cstToken(cntxt, &cst))) {
  782. advance(cntxt, i);
  783. if (currChar(cntxt) == ':') {
  784. tpe = simpleTypeId(cntxt);
  785. if (tpe >=0 && tpe != TYPE_any){
  786. str msg =convertConstant(tpe, &cst);
  787. if( msg) GDKfree(msg);
  788. } else
  789. parseError(cntxt, "simple type expected\n");
  790. }
  791. varSetProperty(curBlk, arg, pname, opname, &cst);
  792. } else {
  793. varSetProperty(curBlk, arg, pname, NULL, NULL);
  794. }
  795. GDKfree(pname);
  796. GDKfree(opname);
  797. } while (keyphrase1(cntxt, ","));
  798. if (!keyphrase1(cntxt, "}"))
  799. /* return (MalBlkPtr) */
  800. parseError(cntxt, "'}' expected\n");
  801. }
  802. }
  803. static InstrPtr
  804. binding(Client cntxt, MalBlkPtr curBlk, InstrPtr curInstr, int flag)
  805. {
  806. int l, varid = -1;
  807. malType type;
  808. l = idLength(cntxt);
  809. if (l > 0) {
  810. varid = findVariableLength(curBlk, CURRENT(cntxt), l);
  811. if (varid < 0) {
  812. varid = newVariable(curBlk, idCopy(cntxt, l), TYPE_any);
  813. if ( varid < 0)
  814. return curInstr;
  815. type = typeElm(cntxt, TYPE_any);
  816. if (isPolymorphic(type))
  817. setPolymorphic(curInstr, type, TRUE);
  818. setVarType(curBlk, varid, type);
  819. propList(cntxt, varid);
  820. } else if (flag) {
  821. parseError(cntxt, "Argument defined twice\n");
  822. typeElm(cntxt, getVarType(curBlk, varid));
  823. propList(cntxt, varid);
  824. } else {
  825. advance(cntxt, l);
  826. type = typeElm(cntxt, getVarType(curBlk, varid));
  827. if( type != getVarType(curBlk,varid))
  828. parseError(cntxt, "Incompatible argument type\n");
  829. if (isPolymorphic(type))
  830. setPolymorphic(curInstr, type, TRUE);
  831. setVarType(curBlk, varid, type);
  832. propList(cntxt, varid);
  833. }
  834. } else if (currChar(cntxt) == ':') {
  835. type = typeElm(cntxt, TYPE_any);
  836. varid = newTmpVariable(curBlk, type);
  837. if ( varid < 0)
  838. return curInstr;
  839. if ( isPolymorphic(type))
  840. setPolymorphic(curInstr, type, TRUE);
  841. setVarType(curBlk, varid, type);
  842. propList(cntxt, varid);
  843. } else {
  844. parseError(cntxt, "argument expected\n");
  845. return curInstr;
  846. }
  847. if( varid >=0)
  848. curInstr = pushArgument(curBlk, curInstr, varid);
  849. return curInstr;
  850. }
  851. /*
  852. * At this stage the LHS part has been parsed and the destination
  853. * variables have been set. Next step is to parse the expression,
  854. * which starts with an operand.
  855. * This code is used in both positions of the expression
  856. */
  857. static int
  858. term(Client cntxt, MalBlkPtr curBlk, InstrPtr *curInstr, int ret)
  859. {
  860. int i, idx, flag, free = 1;
  861. ValRecord cst;
  862. str v = NULL;
  863. int cstidx = -1;
  864. malType tpe = TYPE_any;
  865. if ((i = cstToken(cntxt, &cst))) {
  866. cstidx = fndConstant(curBlk, &cst, MAL_VAR_WINDOW);
  867. if (cstidx >= 0) {
  868. advance(cntxt, i);
  869. if (currChar(cntxt) == ':') {
  870. tpe = typeElm(cntxt, getVarType(curBlk, cstidx));
  871. if (tpe < 0)
  872. return 3;
  873. if(tpe == getVarType(curBlk,cstidx) ){
  874. setVarUDFtype(curBlk, cstidx);
  875. } else {
  876. cstidx = defConstant(curBlk, tpe, &cst);
  877. setPolymorphic(*curInstr, tpe, FALSE);
  878. setVarUDFtype(curBlk, cstidx);
  879. free = 0;
  880. }
  881. } else if (cst.vtype != getVarType(curBlk, cstidx)) {
  882. cstidx = defConstant(curBlk, cst.vtype, &cst);
  883. setPolymorphic(*curInstr, cst.vtype, FALSE);
  884. free = 0;
  885. }
  886. /* protect against leaks coming from constant reuse */
  887. if (free && ATOMextern(cst.vtype) && cst.val.pval)
  888. VALclear(&cst);
  889. *curInstr = pushArgument(curBlk, *curInstr, cstidx);
  890. return ret;
  891. } else {
  892. /* add a new constant */
  893. advance(cntxt, i);
  894. flag = currChar(cntxt) == ':';
  895. tpe = typeElm(cntxt, cst.vtype);
  896. if (tpe < 0)
  897. return 3;
  898. cstidx = defConstant(curBlk, tpe, &cst);
  899. setPolymorphic(*curInstr, tpe, FALSE);
  900. if (flag)
  901. setVarUDFtype(curBlk, cstidx);
  902. *curInstr = pushArgument(curBlk, *curInstr, cstidx);
  903. return ret;
  904. }
  905. } else if ((i = idLength(cntxt))) {
  906. if ((idx = findVariableLength(curBlk, CURRENT(cntxt), i)) == -1) {
  907. v = idCopy(cntxt, i);
  908. idx = newVariable(curBlk, v, TYPE_any);
  909. if( idx <0)
  910. return 0;
  911. propList(cntxt, idx);
  912. } else {
  913. advance(cntxt, i);
  914. propList(cntxt, idx);
  915. }
  916. *curInstr = pushArgument(curBlk, *curInstr, idx);
  917. } else if (currChar(cntxt) == ':') {
  918. tpe = typeElm(cntxt, TYPE_any);
  919. if (tpe < 0)
  920. return 3;
  921. setPolymorphic(*curInstr, tpe, FALSE);
  922. idx = newTypeVariable(curBlk, tpe);
  923. propList(cntxt, idx);
  924. *curInstr = pushArgument(curBlk, *curInstr, idx);
  925. return ret;
  926. }
  927. return 0;
  928. }
  929. static str
  930. parseAtom(Client cntxt)
  931. {
  932. str modnme = 0;
  933. int l, tpe;
  934. char *nxt = CURRENT(cntxt);
  935. if ((l = idLength(cntxt)) <= 0)
  936. return parseError(cntxt, "atom name expected\n");
  937. /* parse: ATOM id:type */
  938. modnme = putName(nxt, l);
  939. advance(cntxt, l);
  940. if (currChar(cntxt) != ':')
  941. tpe = TYPE_void; /* no type qualifier */
  942. else
  943. tpe = parseTypeId(cntxt, TYPE_int);
  944. malAtomDefinition(cntxt->fdout, modnme, tpe);
  945. cntxt->nspace = fixModule(cntxt->nspace, modnme);
  946. cntxt->nspace->isAtomModule = TRUE;
  947. skipSpace(cntxt);
  948. helpInfo(cntxt, &cntxt->nspace->help);
  949. return "";
  950. }
  951. static str
  952. parseLibrary(Client cntxt)
  953. {
  954. str libnme = 0, s;
  955. int l;
  956. char *nxt;
  957. ValRecord cst;
  958. nxt = CURRENT(cntxt);
  959. if ((l = idLength(cntxt)) <= 0) {
  960. if ((l = cstToken(cntxt, &cst)) && cst.vtype == TYPE_str) {
  961. advance(cntxt, l);
  962. libnme = putName(nxt + 1, l - 2);
  963. } else
  964. return parseError(cntxt, "<library name> or <library path> expected\n");
  965. } else
  966. libnme = putName(nxt, l);
  967. s = loadLibrary(libnme, TRUE);
  968. (void) putName(nxt, l);
  969. if (s){
  970. mnstr_printf(cntxt->fdout, "#WARNING: %s\n", s);
  971. GDKfree(s);
  972. }
  973. advance(cntxt, l);
  974. return "";
  975. }
  976. /*
  977. * It might be handy to clone a module.
  978. * It gets a copy of all functions known at the point of creation.
  979. */
  980. static str parseModule(Client cntxt)
  981. {
  982. str modnme = 0;
  983. int l;
  984. char *nxt;
  985. nxt = CURRENT(cntxt);
  986. if ((l = idLength(cntxt)) <= 0)
  987. return parseError(cntxt, "<module path> expected\n");
  988. modnme = putName(nxt, l);
  989. advance(cntxt, l);
  990. cntxt->nspace = fixModule(cntxt->nspace, modnme);
  991. skipSpace(cntxt);
  992. helpInfo(cntxt, &cntxt->nspace->help);
  993. return "";
  994. }
  995. /*
  996. * Include statement
  997. * An include statement is immediately taken into effect. This
  998. * calls for temporary switching the input for a particular client.
  999. * The administration for this is handled by malInclude.
  1000. * No listing is produced, because module sources are assumed to
  1001. * be debugged upfront already.
  1002. * @multitable @columnfractions 0.15 0.8
  1003. * @item includeStmt
  1004. * @tab : @sc{include} identifier
  1005. * @item
  1006. * @tab | @sc{include} string_literal
  1007. * @end multitable
  1008. *
  1009. * Include files should be handled in line with parsing. This way we
  1010. * are ensured that any possible signature definition will be known
  1011. * afterwards. The effect is that errors in the include sequence are
  1012. * marked as warnings.
  1013. */
  1014. static str
  1015. parseInclude(Client cntxt)
  1016. {
  1017. str modnme = 0, s;
  1018. int x;
  1019. char *nxt;
  1020. if (!MALkeyword(cntxt, "include", 7))
  1021. return 0;
  1022. nxt = CURRENT(cntxt);
  1023. if ((x = idLength(cntxt)) > 0) {
  1024. modnme = putName(nxt, x);
  1025. advance(cntxt, x);
  1026. } else if ((x = stringLength(cntxt)) > 0) {
  1027. modnme = putName(nxt + 1, x - 1);
  1028. advance(cntxt, x);
  1029. } else
  1030. return parseError(cntxt, "<module name> expected\n");
  1031. if (currChar(cntxt) != ';') {
  1032. parseError(cntxt, "';' expected\n");
  1033. skipToEnd(cntxt);
  1034. return 0;
  1035. }
  1036. skipToEnd(cntxt);
  1037. s = loadLibrary(modnme, FALSE);
  1038. if (s) {
  1039. parseError(cntxt, s);
  1040. GDKfree(s);
  1041. return 0;
  1042. }
  1043. if ((s = malInclude(cntxt, modnme, 0))) {
  1044. parseError(cntxt, s);
  1045. GDKfree(s);
  1046. return 0;
  1047. }
  1048. return "";
  1049. }
  1050. /*
  1051. * Definition
  1052. * The definition statements share a lot in common, which calls for factoring
  1053. * out the code in a few text macros. Upon encountering a definition, we
  1054. * initialize a MAL instruction container. We should also check for
  1055. * non-terminated definitions.
  1056. *
  1057. * @multitable @columnfractions 0.15 0.8
  1058. * @item program
  1059. * @tab : ( definition [helpinfo] | statement ) *
  1060. *
  1061. * @item definition
  1062. * @tab : moduleStmt | includeStmt
  1063. * @item
  1064. * @tab | commandStmt | patternStmt
  1065. * @item
  1066. * @tab | functionStmt | factoryStmt
  1067. * @item
  1068. * @tab | includeStmt
  1069. * @end multitable
  1070. *
  1071. * Beware, a function signature f(a1..an):(b1..bn) is parsed in such a way that
  1072. * the symbol table and stackframe contains the sequence
  1073. * f,a1..an,b1..bn. This slightly complicates the implementation
  1074. * of the return statement.
  1075. *
  1076. * Note, the function name could be mod.fcn, which calls for storing
  1077. * the function definition in a particular module instead of the current one.
  1078. */
  1079. static MalBlkPtr
  1080. fcnHeader(Client cntxt, int kind)
  1081. {
  1082. int l;
  1083. malType tpe;
  1084. str fnme, modnme = NULL;
  1085. char ch;
  1086. Symbol curPrg;
  1087. MalBlkPtr curBlk = 0;
  1088. InstrPtr curInstr;
  1089. l = operatorLength(cntxt);
  1090. if (l == 0)
  1091. l = idLength(cntxt);
  1092. if (l == 0) {
  1093. parseError(cntxt, "<identifier> | <operator> expected\n");
  1094. skipToEnd(cntxt);
  1095. return 0;
  1096. }
  1097. fnme = putName(((char *) CURRENT(cntxt)), l);
  1098. advance(cntxt, l);
  1099. if (currChar(cntxt) == '.') {
  1100. nextChar(cntxt); /* skip '.' */
  1101. modnme = fnme;
  1102. l = operatorLength(cntxt);
  1103. if (l == 0)
  1104. l = idLength(cntxt);
  1105. if (l == 0){
  1106. parseError(cntxt, "<identifier> | <operator> expected\n");
  1107. skipToEnd(cntxt);
  1108. return 0;
  1109. }
  1110. fnme = putName(((char *) CURRENT(cntxt)), l);
  1111. advance(cntxt, l);
  1112. }
  1113. /* temporary suspend capturing statements in main block */
  1114. if (cntxt->backup){
  1115. parseError(cntxt, "mal_parser: unexpected recursion\n");
  1116. skipToEnd(cntxt);
  1117. return 0;
  1118. }
  1119. cntxt->backup = cntxt->curprg;
  1120. cntxt->curprg = newFunction(putName("user", 4), fnme, kind);
  1121. curPrg = cntxt->curprg;
  1122. curBlk = curPrg->def;
  1123. curBlk->flowfixed = 0;
  1124. curBlk->typefixed = 0;
  1125. curInstr = getInstrPtr(curBlk, 0);
  1126. propList(cntxt, curInstr->argv[0]);
  1127. if (currChar(cntxt) != '('){
  1128. parseError(cntxt, "function header '(' expected\n");
  1129. skipToEnd(cntxt);
  1130. return curBlk;
  1131. }
  1132. advance(cntxt, 1);
  1133. setModuleId(curInstr, modnme ? putName(modnme, strlen(modnme)) :
  1134. putName(cntxt->nspace->name, strlen(cntxt->nspace->name)));
  1135. if (isModuleDefined(cntxt->nspace, getModuleId(curInstr)) == FALSE) {
  1136. if (cntxt->backup) {
  1137. cntxt->curprg = cntxt->backup;
  1138. cntxt->backup = 0;
  1139. }
  1140. parseError(cntxt, "<module> not defined\n");
  1141. return curBlk;
  1142. }
  1143. /* get calling parameters */
  1144. ch = currChar(cntxt);
  1145. while (ch != ')' && ch && !NL(ch)) {
  1146. curInstr = binding(cntxt, curBlk, curInstr, 1);
  1147. /* the last argument may be variable length */
  1148. if (MALkeyword(cntxt, "...", 3)) {
  1149. curInstr->varargs |= VARARGS;
  1150. setPolymorphic(curInstr, TYPE_any, TRUE);
  1151. break;
  1152. }
  1153. if ((ch = currChar(cntxt)) != ',') {
  1154. if (ch == ')')
  1155. break;
  1156. if (cntxt->backup) {
  1157. cntxt->curprg = cntxt->backup;
  1158. cntxt->backup = 0;
  1159. }
  1160. parseError(cntxt, "',' expected\n");
  1161. skipToEnd(cntxt);
  1162. return curBlk;
  1163. } else
  1164. nextChar(cntxt); /* skip ',' */
  1165. skipSpace(cntxt);
  1166. ch = currChar(cntxt);
  1167. }
  1168. if (currChar(cntxt) != ')') {
  1169. freeInstruction(curInstr);
  1170. parseError(cntxt, "')' expected\n");
  1171. skipToEnd(cntxt);
  1172. return curBlk;
  1173. }
  1174. advance(cntxt, 1); /* skip ')' */
  1175. /*
  1176. The return type is either a single type or multiple return type structure.
  1177. We simply keep track of the number of arguments added and
  1178. during the final phase reshuffle the return values to the beginning (?)
  1179. */
  1180. if (currChar(cntxt) == ':') {
  1181. tpe = typeElm(cntxt, TYPE_void);
  1182. setPolymorphic(curInstr, tpe, TRUE);
  1183. setVarType(curBlk, curInstr->argv[0], tpe);
  1184. /* we may be confronted by a variable target type list */
  1185. if (MALkeyword(cntxt, "...", 3)) {
  1186. curInstr->varargs |= VARRETS;
  1187. setPolymorphic(curInstr, TYPE_any, TRUE);
  1188. }
  1189. propList(cntxt, curInstr->argv[0]);
  1190. } else if (keyphrase1(cntxt, "(")) { /* deal with compound return */
  1191. int retc = curInstr->argc, i1, i2 = 0;
  1192. int max;
  1193. short *newarg;
  1194. /* parse multi-target result */
  1195. /* skipSpace(cntxt);*/
  1196. ch = currChar(cntxt);
  1197. while (ch != ')' && ch && !NL(ch)) {
  1198. curInstr = binding(cntxt, curBlk, curInstr, 0);
  1199. /* we may be confronted by a variable target type list */
  1200. if (MALkeyword(cntxt, "...", 3)) {
  1201. curInstr->varargs |= VARRETS;
  1202. setPolymorphic(curInstr, TYPE_any, TRUE);
  1203. }
  1204. if ((ch = currChar(cntxt)) != ',') {
  1205. if (ch == ')')
  1206. break;
  1207. parseError(cntxt, "',' expected\n");
  1208. skipToEnd(cntxt);
  1209. return curBlk;
  1210. } else {
  1211. nextChar(cntxt); /* skip ',' */
  1212. }
  1213. skipSpace(cntxt);
  1214. ch = currChar(cntxt);
  1215. }
  1216. /* re-arrange the parameters, results first*/
  1217. max = curInstr->maxarg;
  1218. newarg = (short *) GDKmalloc(max * sizeof(curInstr->argv[0]));
  1219. if (newarg == NULL){
  1220. parseError(cntxt, MAL_MALLOC_FAIL);
  1221. skipToEnd(cntxt);
  1222. return curBlk;
  1223. }
  1224. for (i1 = retc; i1 < curInstr->argc; i1++)
  1225. newarg[i2++] = curInstr->argv[i1];
  1226. curInstr->retc = curInstr->argc - retc;
  1227. for (i1 = 1; i1 < retc; i1++)
  1228. newarg[i2++] = curInstr->argv[i1];
  1229. curInstr->argc = i2;
  1230. for (; i2 < max; i2++)
  1231. newarg[i2] = 0;
  1232. for (i1 = 0; i1 < max; i1++)
  1233. curInstr->argv[i1] = newarg[i1];
  1234. GDKfree(newarg);
  1235. if (currChar(cntxt) != ')') {
  1236. freeInstruction(curInstr);
  1237. if (cntxt->backup) {
  1238. cntxt->curprg = cntxt->backup;
  1239. cntxt->backup = 0;
  1240. }
  1241. parseError(cntxt, "')' expected\n");
  1242. skipToEnd(cntxt);
  1243. return curBlk;
  1244. }
  1245. nextChar(cntxt); /* skip ')' */
  1246. } else { /* default */
  1247. setVarType(curBlk, 0, TYPE_void);
  1248. }
  1249. if (curInstr != getInstrPtr(curBlk, 0)) {
  1250. freeInstruction(getInstrPtr(curBlk, 0));
  1251. getInstrPtr(curBlk, 0) = curInstr;
  1252. }
  1253. return curBlk;
  1254. }
  1255. /*
  1256. * The common theme in definitions is to parse the argument list.
  1257. * @multitable @columnfractions .15 .8
  1258. * @item header
  1259. * @tab : hdrName '(' params ')' result
  1260. * @item result
  1261. * @tab : paramType | '(' params ')'
  1262. * @item params
  1263. * @tab : binding [',' binding]*
  1264. * @item binding
  1265. * @tab : identifier typeName [propQualifier]
  1266. * @end multitable
  1267. */
  1268. /*
  1269. * MAL variables are statically/dynamically typed.
  1270. * Function and procedure arguments should always be typed.
  1271. * We do not permit polymorphism at this interpretation level.
  1272. *
  1273. * The type information maintained simplifies analysis of
  1274. * column results. If the underlying type is not known, then it
  1275. * may be replaced once during execution of a MAL instruction
  1276. * typically as a side-effect of calling a bat-returning function.
  1277. *
  1278. * We should also allow for variable argument lists. However, they
  1279. * may only appear in patterns, because the calling context is necessary
  1280. * to resolve the actual argument list. Furthermore, we can not
  1281. * assume much about its type structure.
  1282. * Variables are extended with a property list to enable
  1283. * optimizers to make decisions. (See the section on properties).
  1284. */
  1285. /*
  1286. * @-
  1287. */
  1288. /*
  1289. * Each procedure definition opens a structure in which the
  1290. * information is gathered. The enclosing module is statically
  1291. * determined.
  1292. *
  1293. * A proc-header translates into a single MAL instruction.
  1294. * Since no recursive rules are included, we can stick to
  1295. * using a single global variable to accummulate the
  1296. * properties.
  1297. *
  1298. * The external commands and rules come with a short
  1299. * help information.
  1300. */
  1301. static MalBlkPtr
  1302. parseCommandPattern(Client cntxt, int kind)
  1303. {
  1304. MalBlkPtr curBlk = 0;
  1305. Symbol curPrg = 0;
  1306. InstrPtr curInstr = 0;
  1307. str modnme = NULL;
  1308. curBlk = fcnHeader(cntxt, kind);
  1309. if (curBlk == NULL)
  1310. return curBlk;
  1311. getInstrPtr(curBlk, 0)->token = kind;
  1312. curPrg = cntxt->curprg;
  1313. curPrg->kind = kind;
  1314. curInstr = getInstrPtr(curBlk, 0);
  1315. modnme = getModuleId(getInstrPtr(curBlk, 0));
  1316. if (modnme && isModuleDefined(cntxt->nspace, modnme) == FALSE)
  1317. return (MalBlkPtr) parseError(cntxt, "<module> not defined\n");
  1318. modnme = modnme ? modnme : cntxt->nspace->name;
  1319. if (isModuleDefined(cntxt->nspace, putName(modnme, strlen(modnme))))
  1320. insertSymbol(findModule(cntxt->nspace, putName(modnme, strlen(modnme))), curPrg);
  1321. else
  1322. return (MalBlkPtr) parseError(cntxt, "<module> not found\n");
  1323. trimMalBlk(curBlk);
  1324. chkProgram(cntxt->fdout, cntxt->nspace, curBlk);
  1325. if (cntxt->backup) {
  1326. cntxt->curprg = cntxt->backup;
  1327. cntxt->backup = 0;
  1328. }
  1329. /*
  1330. * Short-cut function calls
  1331. * Most functions are (dynamically) linked with the kernel as
  1332. * commands or pattern definitions. This enables for fast execution.
  1333. *
  1334. * In addition we allow functions to be bound to both
  1335. * a linked C-function and a MAL specification block.
  1336. * It the function address is not available, the interpreter
  1337. * will use the MAL block instead.
  1338. * This scheme is intended for just-in-time compilation.
  1339. *
  1340. * [note, command and patterns do not have a MAL block]
  1341. */
  1342. if (MALkeyword(cntxt, "address", 7)) {
  1343. str nme;
  1344. int i;
  1345. i = idLength(cntxt);
  1346. if (i == 0) {
  1347. parseError(cntxt, "<identifier> expected\n");
  1348. return 0;
  1349. }
  1350. cntxt->blkmode = 0;
  1351. nme = idCopy(cntxt, i);
  1352. if (getModuleId(curInstr))
  1353. setModuleId(curInstr, NULL);
  1354. setModuleScope(curInstr,
  1355. findModule(cntxt->nspace, putName(modnme, strlen(modnme))));
  1356. curInstr->fcn = getAddress(cntxt->fdout, cntxt->srcFile, modnme, nme, TRUE);
  1357. curBlk->binding = nme;
  1358. if (cntxt->nspace->isAtomModule) {
  1359. if (curInstr->fcn == NULL) {
  1360. parseError(cntxt, "<address> not found\n");
  1361. return 0;
  1362. }
  1363. malAtomProperty(curBlk, curInstr);
  1364. }
  1365. skipSpace(cntxt);
  1366. } else {
  1367. parseError(cntxt, "'address' expected\n");
  1368. return 0;
  1369. }
  1370. helpInfo(cntxt, &curBlk->help);
  1371. showErrors(cntxt);
  1372. if (curBlk && cntxt->listing > 1)
  1373. printFunction(cntxt->fdout, curBlk, 0, cntxt->listing);
  1374. return curBlk;
  1375. }
  1376. static MalBlkPtr
  1377. parseFunction(Client cntxt, int kind)
  1378. {
  1379. MalBlkPtr curBlk = 0;
  1380. curBlk = fcnHeader(cntxt, kind);
  1381. if (curBlk == NULL)
  1382. return curBlk;
  1383. if (MALkeyword(cntxt, "address", 7)) {
  1384. str nme;
  1385. int i;
  1386. InstrPtr curInstr = getInstrPtr(curBlk, 0);
  1387. i = idLength(cntxt);
  1388. if (i == 0) {
  1389. parseError(cntxt, "<identifier> expected\n");
  1390. return 0;
  1391. }
  1392. nme = idCopy(cntxt, i);
  1393. curInstr->fcn = getAddress(cntxt->fdout, cntxt->srcFile, cntxt->nspace->name, nme, TRUE);
  1394. GDKfree(nme);
  1395. if (curInstr->fcn == NULL) {
  1396. parseError(cntxt, "<address> not found\n");
  1397. return 0;
  1398. }
  1399. skipSpace(cntxt);
  1400. }
  1401. /* block is terminated at the END statement */
  1402. helpInfo(cntxt, &curBlk->help);
  1403. return curBlk;
  1404. }
  1405. /*
  1406. * Functions and factories end with a labeled end-statement.
  1407. * The routine below checks for misalignment of the closing statements.
  1408. * Any instruction parsed after the function block is considered an error.
  1409. */
  1410. static int
  1411. parseEnd(Client cntxt)
  1412. {
  1413. MalBlkPtr curBlk = 0;
  1414. Symbol curPrg = 0;
  1415. int l, showit = 0;
  1416. if (MALkeyword(cntxt, "end", 3)) {
  1417. curPrg = cntxt->curprg;
  1418. curBlk = curPrg->def;
  1419. l = idLength(cntxt);
  1420. if (l == 0)
  1421. l = operatorLength(cntxt);
  1422. if (strncmp(CURRENT(cntxt), getModuleId(getSignature(curPrg)), l) == 0) {
  1423. advance(cntxt, l);
  1424. skipSpace(cntxt);
  1425. if (currChar(cntxt) == '.')
  1426. nextChar(cntxt);
  1427. skipSpace(cntxt);
  1428. l = idLength(cntxt);
  1429. if (l == 0)
  1430. l = operatorLength(cntxt);
  1431. }
  1432. /* parse fcn */
  1433. if ((l == (int) strlen(curPrg->name) &&
  1434. strncmp(CURRENT(cntxt), curPrg->name, l) == 0) || l == 0) {} else {
  1435. parseError(cntxt, "non matching end label\n");
  1436. }
  1437. pushEndInstruction(curBlk);
  1438. insertSymbol(cntxt->nspace, cntxt->curprg);
  1439. trimMalBlk(cntxt->curprg->def);
  1440. cntxt->blkmode = 0;
  1441. curBlk->typefixed = 0;
  1442. chkProgram(cntxt->fdout, cntxt->nspace, cntxt->curprg->def);
  1443. if (cntxt->backup) {
  1444. cntxt->curprg = cntxt->backup;
  1445. cntxt->backup = 0;
  1446. }
  1447. showit = TRUE;
  1448. skipToEnd(cntxt);
  1449. if (showit && cntxt->listing > 1)
  1450. printFunction(cntxt->fdout, curBlk, 0, cntxt->listing);
  1451. showErrors(cntxt);
  1452. return 1;
  1453. }
  1454. return 0;
  1455. }
  1456. /*
  1457. * Most instructions are simple assignments, possibly
  1458. * modified with a barrier/catch tag.
  1459. * @multitable @columnfractions .15 .8
  1460. * @item statement
  1461. * @tab : tag varlist [':=' expr ] propQualifier
  1462. * @item tag
  1463. * @tab : @sc{ return} | @sc{ barrier} | @sc{ catch}
  1464. * @item
  1465. * @tab | @sc{ leave} | @sc{ redo} |
  1466. * @item expr
  1467. * @tab : fcncall
  1468. * @item
  1469. * @tab : [factor operator] factor
  1470. * @item varlist
  1471. * @tab : variable
  1472. * @item
  1473. * @tab | @verb{'{' variable {',' variable}* ')' }
  1474. * @item variable
  1475. * @tab : identifier propQualifier
  1476. * @item factor
  1477. * @tab : constant | var
  1478. * @end multitable
  1479. *
  1480. * The basic types are also predefined as a variable.
  1481. * This makes it easier to communicate types to MAL patterns.
  1482. */
  1483. #define GETvariable \
  1484. if ((varid = findVariableLength(curBlk, CURRENT(cntxt), l)) == -1) { \
  1485. arg = idCopy(cntxt, l); \
  1486. varid = newVariable(curBlk, arg, TYPE_any); \
  1487. assert(varid >= 0);\
  1488. } else \
  1489. advance(cntxt, l);
  1490. /* The parameter of parseArguments is the return value of the enclosing function. */
  1491. static int
  1492. parseArguments(Client cntxt, MalBlkPtr curBlk, InstrPtr *curInstr)
  1493. {
  1494. while (currChar(cntxt) != ')') {
  1495. switch (term(cntxt, curBlk, curInstr, 0)) {
  1496. case 0:
  1497. break;
  1498. case 2: return 2;
  1499. case 3: return 3;
  1500. default:
  1501. parseError(cntxt, "<factor> expected\n");
  1502. pushInstruction(curBlk, *curInstr);
  1503. skipToEnd(cntxt);
  1504. return 1;
  1505. }
  1506. if (currChar(cntxt) == ',')
  1507. advance(cntxt, 1);
  1508. else if (currChar(cntxt) != ')') {
  1509. parseError(cntxt, "',' expected\n");
  1510. cntxt->yycur--; /* keep it */
  1511. break;
  1512. }
  1513. }
  1514. if (currChar(cntxt) == ')')
  1515. advance(cntxt, 1);
  1516. return 0;
  1517. }
  1518. static void
  1519. parseAssign(Client cntxt, int cntrl)
  1520. {
  1521. InstrPtr curInstr;
  1522. MalBlkPtr curBlk;
  1523. Symbol curPrg;
  1524. int i = 0, l, type = TYPE_any, varid = -1;
  1525. str arg = 0;
  1526. ValRecord cst;
  1527. curPrg = cntxt->curprg;
  1528. curBlk = curPrg->def;
  1529. curInstr = newInstruction(curBlk, cntrl ? cntrl : ASSIGNsymbol);
  1530. /* start the parsing by recognition of the lhs of an assignment */
  1531. if (currChar(cntxt) == '(') {
  1532. /* parsing multi-assignment */
  1533. advance(cntxt, 1);
  1534. curInstr->argc = 0; /*reset to handle pushArg correctly !! */
  1535. curInstr->retc = 0;
  1536. while (currChar(cntxt) != ')' && currChar(cntxt)) {
  1537. l = idLength(cntxt);
  1538. i = cstToken(cntxt, &cst);
  1539. if (l == 0 || i) {
  1540. parseError(cntxt, "<identifier> expected\n");
  1541. skipToEnd(cntxt);
  1542. freeInstruction(curInstr);
  1543. return;
  1544. }
  1545. GETvariable;
  1546. if (currChar(cntxt) == ':') {
  1547. setVarUDFtype(curBlk, varid);
  1548. type = typeElm(cntxt, getVarType(curBlk, varid));
  1549. if (type < 0)
  1550. goto part3;
  1551. setPolymorphic(curInstr, type, FALSE);
  1552. setVarType(curBlk, varid, type);
  1553. }
  1554. propList(cntxt, varid);
  1555. curInstr = pushArgument(curBlk, curInstr, varid);
  1556. curInstr->retc++;
  1557. if (currChar(cntxt) == ')')
  1558. break;
  1559. if (currChar(cntxt) == ',')
  1560. keyphrase1(cntxt, ",");
  1561. }
  1562. advance(cntxt, 1); /* skip ')' */
  1563. if (curInstr->retc == 0) {
  1564. /* add dummy variable */
  1565. curInstr = pushArgument(curBlk, curInstr, newTmpVariable(curBlk, TYPE_any));
  1566. curInstr->retc++;
  1567. }
  1568. } else {
  1569. /* are we dealing with a simple assignment? */
  1570. l = idLength(cntxt);
  1571. i = cstToken(cntxt, &cst);
  1572. if (l == 0 || i) {
  1573. /* we haven't seen a target variable */
  1574. /* flow of control statements may end here. */
  1575. /* shouldn't allow for nameless controls todo*/
  1576. if (i && cst.vtype == TYPE_str)
  1577. GDKfree(cst.val.sval);
  1578. if (cntrl == LEAVEsymbol || cntrl == REDOsymbol ||
  1579. cntrl == RETURNsymbol || cntrl == EXITsymbol) {
  1580. curInstr->argv[0] = getBarrierEnvelop(curBlk);
  1581. pushInstruction(curBlk, curInstr);
  1582. if (currChar(cntxt) != ';')
  1583. parseError(cntxt, "<identifier> expected\n");
  1584. skipToEnd(cntxt);
  1585. return;
  1586. }
  1587. getArg(curInstr, 0) = newTmpVariable(curBlk, TYPE_any);
  1588. pushInstruction(curBlk, curInstr);
  1589. parseError(cntxt, "<identifier> expected\n");
  1590. skipToEnd(cntxt);
  1591. return;
  1592. }
  1593. /* Check if we are dealing with module.fcn call*/
  1594. if (CURRENT(cntxt)[l] == '.' || CURRENT(cntxt)[l] == '(') {
  1595. curInstr->argv[0] = newTmpVariable(curBlk, TYPE_any);
  1596. goto FCNcallparse;
  1597. }
  1598. /* Get target variable details*/
  1599. GETvariable;
  1600. if (!(currChar(cntxt) == ':' && CURRENT(cntxt)[1] == '=')) {
  1601. curInstr->argv[0] = varid;
  1602. if (currChar(cntxt) == ':') {
  1603. setVarUDFtype(curBlk, varid);
  1604. type = typeElm(cntxt, getVarType(curBlk, varid));
  1605. if (type < 0)
  1606. goto part3;
  1607. setPolymorphic(curInstr, type, FALSE);
  1608. setVarType(curBlk, varid, type);
  1609. }
  1610. }
  1611. propList(cntxt, varid);
  1612. curInstr->argv[0] = varid;
  1613. }
  1614. /* look for assignment operator */
  1615. if (!keyphrase2(cntxt, ":=")) {
  1616. /* no assignment !! a control variable is allowed */
  1617. /* for the case RETURN X, we normalize it to include the function arguments */
  1618. if (cntrl == RETURNsymbol || cntrl == YIELDsymbol) {
  1619. int e;
  1620. InstrPtr sig = getInstrPtr(curBlk,0);
  1621. curInstr->retc = 0;
  1622. for (e = 0; e < sig->retc; e++)
  1623. curInstr = pushReturn(curBlk, curInstr, getArg(sig, e));
  1624. }
  1625. goto part3;
  1626. }
  1627. if (currChar(cntxt) == '(') {
  1628. /* parse multi assignment */
  1629. advance(cntxt, 1);
  1630. switch (parseArguments(cntxt, curBlk, &curInstr)) {
  1631. case 2: goto part2;
  1632. default:
  1633. case 3: goto part3;
  1634. }
  1635. /* unreachable */
  1636. }
  1637. /*
  1638. * We have so far the LHS part of an assignment. The remainder is
  1639. * either a simple term expression, a multi assignent, or the start
  1640. * of a function call.
  1641. */
  1642. FCNcallparse:
  1643. if ((l = idLength(cntxt)) && CURRENT(cntxt)[l] == '(') {
  1644. /* parseError(cntxt,"<module> expected\n");*/
  1645. setModuleId(curInstr, getModuleId(getInstrPtr(curBlk, 0)));
  1646. i = l;
  1647. goto FCNcallparse2;
  1648. } else if ((l = idLength(cntxt)) && CURRENT(cntxt)[l] == '.') {
  1649. /* continue with parseing a function/operator call */
  1650. arg = putName(CURRENT(cntxt), l);
  1651. advance(cntxt, l + 1); /* skip '.' too */
  1652. setModuleId(curInstr, arg);
  1653. i = idLength(cntxt);
  1654. if (i == 0)
  1655. i = operatorLength(cntxt);
  1656. FCNcallparse2:
  1657. if (i) {
  1658. setFunctionId(curInstr, putName(((char *) CURRENT(cntxt)), i));
  1659. advance(cntxt, i);
  1660. } else {
  1661. parseError(cntxt, "<functionname> expected\n");
  1662. skipToEnd(cntxt);
  1663. pushInstruction(curBlk, curInstr);
  1664. return;
  1665. }
  1666. skipSpace(cntxt);
  1667. if (currChar(cntxt) != '(') {
  1668. parseError(cntxt, "'(' expected\n");
  1669. skipToEnd(cntxt);
  1670. pushInstruction(curBlk, curInstr);
  1671. return;
  1672. }
  1673. advance(cntxt, 1);
  1674. switch (parseArguments(cntxt, curBlk, &curInstr)) {
  1675. case 2: goto part2;
  1676. default:
  1677. case 3: goto part3;
  1678. }
  1679. /* unreachable */
  1680. }
  1681. /* Handle the ordinary assignments and expressions */
  1682. switch (term(cntxt, curBlk, &curInstr, 2)) {
  1683. case 2: goto part2;
  1684. case 3: goto part3;
  1685. }
  1686. part2: /* consume <operator><term> part of expression */
  1687. if ((i = operatorLength(cntxt))) {
  1688. /* simple arithmetic operator expression */
  1689. setFunctionId(curInstr, putName(((char *) CURRENT(cntxt)), i));
  1690. advance(cntxt, i);
  1691. curInstr->modname = putName("calc", 4);
  1692. if ((l = idLength(cntxt)) && !(l == 3 && strncmp(CURRENT(cntxt), "nil", 3) == 0)) {
  1693. GETvariable;
  1694. curInstr = pushArgument(curBlk, curInstr, varid);
  1695. goto part3;
  1696. }
  1697. switch (term(cntxt, curBlk, &curInstr, 3)) {
  1698. case 2: goto part2;
  1699. case 3: goto part3;
  1700. }
  1701. parseError(cntxt, "<term> expected\n");
  1702. skipToEnd(cntxt);
  1703. pushInstruction(curBlk, curInstr);
  1704. return;
  1705. } else {
  1706. skipSpace(cntxt);
  1707. if (currChar(cntxt) == '(')
  1708. parseError(cntxt, "module name missing\n");
  1709. else if (currChar(cntxt) != ';' && currChar(cntxt) != '#') {
  1710. parseError(cntxt, "operator expected\n");
  1711. skipToEnd(cntxt);
  1712. }
  1713. pushInstruction(curBlk, curInstr);
  1714. return;
  1715. }
  1716. part3:
  1717. skipSpace(cntxt);
  1718. if (currChar(cntxt) != ';')
  1719. parseError(cntxt, "';' expected\n");
  1720. skipToEnd(cntxt);
  1721. pushInstruction(curBlk, curInstr);
  1722. if (cntrl == RETURNsymbol && !(curInstr->token == ASSIGNsymbol || getModuleId(curInstr) != 0))
  1723. parseError(cntxt, "return assignment expected\n");
  1724. }
  1725. /*
  1726. * A piggybacked way to ship tuples is to mimick
  1727. * a copycommand through the language interface.
  1728. * All tuples are stored in a temporary file
  1729. * whose name can be picked up immediately afterwards.
  1730. * The code should be made thread safe, by storing
  1731. * the file name in the client record.
  1732. */
  1733. static void
  1734. parseTuple(Client cntxt)
  1735. {
  1736. InstrPtr curInstr;
  1737. MalBlkPtr curBlk;
  1738. Symbol curPrg;
  1739. FILE *f = 0;
  1740. char buf[MAXPATHLEN];
  1741. int c;
  1742. sprintf(buf, "input%d", (int) (cntxt - mal_clients));
  1743. f = fopen(buf, "w");
  1744. if (f == NULL)
  1745. showException(cntxt->fdout, SYNTAX, "parser", "temp file not writeable");
  1746. while ((c = currChar(cntxt)) == '[' && c) {
  1747. do {
  1748. if (f && c)
  1749. fputc(c, f);
  1750. nextChar(cntxt);
  1751. } while ((c = currChar(cntxt)) != '\n' && c);
  1752. if (f && c && fputc(c, f) == EOF) {
  1753. (void) fclose(f);
  1754. if (unlink(buf) < 0)
  1755. showException(cntxt->fdout, SYNTAX, "parser", "out of temp file space and unable to unlink");
  1756. showException(cntxt->fdout, SYNTAX, "parser", "out of temp file space");
  1757. return;
  1758. }
  1759. nextChar(cntxt);
  1760. }
  1761. curPrg = cntxt->curprg;
  1762. curBlk = curPrg->def;
  1763. curInstr = newAssignment(curBlk);
  1764. setModuleId(curInstr, putName("io", 2));
  1765. setFunctionId(curInstr, putName("data", 4));
  1766. c = newVariable(curBlk, GDKstrdup("tuples"), TYPE_any);
  1767. getArg(curInstr, 0) = c;
  1768. pushStr(curBlk, curInstr, buf);
  1769. if (f != NULL)
  1770. (void) fclose(f);
  1771. }
  1772. #define BRKONERR if (curPrg->def->errors >= MAXERRORS) \
  1773. return curPrg->def->errors;
  1774. int
  1775. parseMAL(Client cntxt, Symbol curPrg)
  1776. {
  1777. int cntrl = 0;
  1778. /*Symbol curPrg= cntxt->curprg;*/
  1779. char c;
  1780. echoInput(cntxt);
  1781. /* here the work takes place */
  1782. while ((c = currChar(cntxt))) {
  1783. switch (c) {
  1784. case '\n': case '\r': case '\f':
  1785. nextChar(cntxt);
  1786. echoInput(cntxt);
  1787. continue;
  1788. case ';': case '\t': case ' ':
  1789. nextChar(cntxt);
  1790. continue;
  1791. case '[':
  1792. /* parse a tuple value. stops at end of line */
  1793. parseTuple(cntxt);
  1794. break;
  1795. case '#':
  1796. { /* keep the full line comments unless it is a MX #line */
  1797. char start[256], *e = start, c;
  1798. MalBlkPtr curBlk = cntxt->curprg->def;
  1799. InstrPtr curInstr;
  1800. *e = 0;
  1801. nextChar(cntxt);
  1802. while ((c = currChar(cntxt))) {
  1803. if (e < start + 256 - 1)
  1804. *e++ = c;
  1805. nextChar(cntxt);
  1806. if (c == '\n' || c == '\r') {
  1807. *e = 0;
  1808. if (e > start)
  1809. e--;
  1810. /* prevChar(cntxt);*/
  1811. break;
  1812. }
  1813. }
  1814. if (e > start)
  1815. *e = 0;
  1816. if (e > start && curBlk->stop > 0 &&
  1817. strncmp("line ", start, 5) != 0) {
  1818. ValRecord cst;
  1819. /*
  1820. * Comment lines produced by Mx, i.e. #line directives are not saved.
  1821. * The deadcode optimizer removes all comment information.
  1822. */
  1823. curInstr = newInstruction(curBlk, REMsymbol);
  1824. cst.vtype = TYPE_str;
  1825. cst.len = (int) strlen(start);
  1826. cst.val.sval = GDKstrdup(start);
  1827. getArg(curInstr, 0) = defConstant(curBlk, TYPE_str, &cst);
  1828. clrVarConstant(curBlk, getArg(curInstr, 0));
  1829. setVarDisabled(curBlk, getArg(curInstr, 0));
  1830. pushInstruction(curBlk, curInstr);
  1831. }
  1832. echoInput(cntxt);
  1833. }
  1834. continue;
  1835. case 'A': case 'a':
  1836. if (MALkeyword(cntxt, "atom", 4) &&
  1837. parseAtom(cntxt) != 0)
  1838. break;
  1839. goto allLeft;
  1840. case 'b': case 'B':
  1841. if (MALkeyword(cntxt, "barrier", 7)) {
  1842. cntxt->blkmode++;
  1843. cntrl = BARRIERsymbol;
  1844. }
  1845. goto allLeft;
  1846. case 'C': case 'c':
  1847. if (MALkeyword(cntxt, "command", 7)) {
  1848. parseCommandPattern(cntxt, COMMANDsymbol);
  1849. continue;
  1850. }
  1851. if (MALkeyword(cntxt, "catch", 5)) {
  1852. cntxt->blkmode++;
  1853. cntrl = CATCHsymbol;
  1854. goto allLeft;
  1855. }
  1856. goto allLeft;
  1857. case 'E': case 'e':
  1858. if (MALkeyword(cntxt, "exit", 4)) {
  1859. if (cntxt->blkmode > 0)
  1860. cntxt->blkmode--;
  1861. cntrl = EXITsymbol;
  1862. } else if (parseEnd(cntxt)) {
  1863. break;
  1864. }
  1865. goto allLeft;
  1866. case 'F': case 'f':
  1867. if (MALkeyword(cntxt, "function", 8)) {
  1868. cntxt->blkmode++;
  1869. if (parseFunction(cntxt, FUNCTIONsymbol))
  1870. break;
  1871. } else if (MALkeyword(cntxt, "factory", 7)) {
  1872. cntxt->blkmode++;
  1873. parseFunction(cntxt, FACTORYsymbol);
  1874. break;
  1875. }
  1876. goto allLeft;
  1877. case 'H': case 'h':
  1878. if (MALkeyword(cntxt, "handler", 5)) {
  1879. skipToEnd(cntxt);
  1880. cntxt->blkmode++;
  1881. break;
  1882. }
  1883. case 'i': if (parseInclude(cntxt))
  1884. continue;
  1885. goto allLeft;
  1886. case 'L': case 'l':
  1887. if (MALkeyword(cntxt, "library", 7)) {
  1888. parseLibrary(cntxt);
  1889. continue;
  1890. }
  1891. if (MALkeyword(cntxt, "leave", 5))
  1892. cntrl = LEAVEsymbol;
  1893. goto allLeft;
  1894. case 'M': case 'm':
  1895. if (MALkeyword(cntxt, "module", 6) &&
  1896. parseModule(cntxt) != 0)
  1897. break;
  1898. goto allLeft;
  1899. case 'P': case 'p':
  1900. if (MALkeyword(cntxt, "pattern", 7)) {
  1901. parseCommandPatter

Large files files are truncated, but you can click here to view the full file