PageRenderTime 225ms CodeModel.GetById 8ms app.highlight 206ms RepoModel.GetById 2ms app.codeStats 0ms

/TeXmacs-1.0.7.11-src/src/System/Language/packrat_parser.cpp

#
C++ | 849 lines | 800 code | 20 blank | 29 comment | 90 complexity | 084ccf6226188647a76e89dec04e5fa3 MD5 | raw file
Possible License(s): GPL-3.0, GPL-2.0, MPL-2.0-no-copyleft-exception
  1
  2/******************************************************************************
  3* MODULE     : packrat_parser.cpp
  4* DESCRIPTION: efficient packrat parsing
  5* COPYRIGHT  : (C) 2010  Joris van der Hoeven
  6*******************************************************************************
  7* This software falls under the GNU general public license version 3 or later.
  8* It comes WITHOUT ANY WARRANTY WHATSOEVER. For details, see the file LICENSE
  9* in the root directory or <http://www.gnu.org/licenses/gpl-3.0.html>.
 10******************************************************************************/
 11
 12#include "packrat_parser.hpp"
 13#include "analyze.hpp"
 14#include "drd_std.hpp"
 15
 16extern tree the_et;
 17bool packrat_invalid_colors= false;
 18
 19/******************************************************************************
 20* Constructor
 21******************************************************************************/
 22
 23packrat_parser_rep::packrat_parser_rep (packrat_grammar gr):
 24  grammar (gr->grammar),
 25  productions (gr->productions),
 26  properties (gr->properties),
 27  current_tree (packrat_uninit),
 28  current_string (""),
 29  current_start (-1),
 30  current_end (-1),
 31  current_path_pos (-1),
 32  current_pos_path (-1),
 33  current_cursor (-1),
 34  current_input (),
 35  current_cache (PACKRAT_UNDEFINED),
 36  current_production (packrat_uninit) {}
 37
 38packrat_parser
 39make_packrat_parser (string lan, tree in) {
 40  static string         last_lan   = "";
 41  static tree           last_in    = "";
 42  static packrat_parser last_par;
 43  if (lan != last_lan || in != last_in) {
 44    packrat_grammar gr= find_packrat_grammar (lan);
 45    last_lan   = lan;
 46    last_in    = copy (in);
 47    last_par   = packrat_parser (gr, in);
 48  }
 49  return last_par;
 50}
 51
 52packrat_parser
 53make_packrat_parser (string lan, tree in, path in_pos) {
 54  static string         last_lan   = "";
 55  static tree           last_in    = "";
 56  static path           last_in_pos= path ();
 57  static packrat_parser last_par;
 58  if (lan != last_lan || in != last_in || in_pos != last_in_pos) {
 59    packrat_grammar gr= find_packrat_grammar (lan);
 60    last_lan   = lan;
 61    last_in    = copy (in);
 62    last_in_pos= copy (last_in_pos);
 63    last_par   = packrat_parser (gr, in, in_pos);
 64  }
 65  return last_par;
 66}
 67
 68/******************************************************************************
 69* Setting up the input
 70******************************************************************************/
 71
 72void
 73packrat_parser_rep::set_input (tree t) {
 74  current_string= "";
 75  current_tree  = t;
 76  serialize (t, path ());
 77  if (DEBUG_FLATTEN)
 78    cout << "Input " << current_string << "\n";
 79  current_input= encode_tokens (current_string);
 80}
 81
 82void
 83packrat_parser_rep::set_cursor (path p) {
 84  if (is_nil (p)) current_cursor= -1;
 85  else current_cursor= encode_tree_position (p);
 86  //cout << current_input << ", " << current_cursor << "\n";
 87}
 88
 89/******************************************************************************
 90* Encoding and decoding of cursor positions in the input
 91******************************************************************************/
 92
 93C
 94packrat_parser_rep::encode_string_position (int i) {
 95  if (i < 0) return PACKRAT_FAILED;
 96  int j=0;
 97  C k=0;
 98  while (j<i && j<N(current_string)) {
 99    tm_char_forwards (current_string, j);
100    k++;
101  }
102  return k;
103}
104
105int
106packrat_parser_rep::encode_path (tree t, path p, path pos) {
107  //cout << "Search " << pos << " in " << t << ", " << p << "\n";
108  //cout << "Range " << current_start[p] << " -- " << current_end[p] << "\n";
109  if (is_nil (pos) || !current_start->contains (p)) return -1;
110  else if (is_atomic (t)) {
111    if (current_path_pos->contains (p * pos))
112      return current_path_pos[p * pos];
113    else if (pos->item < 0 || pos->item > N(t->label)) return -1;
114    return current_start[p] + pos->item;
115  }
116  else {
117    if (pos == path (0)) return current_start[p];
118    if (pos == path (1)) return current_end[p];
119    if (pos->item < 0 || pos->item > N(t) || is_nil (pos->next)) return -1;
120    return encode_path (t[pos->item], p * pos->item, pos->next);
121  }
122}
123
124C
125packrat_parser_rep::encode_tree_position (path p) {
126  if (is_nil (p) || p->item < 0) return PACKRAT_FAILED;
127  int i= encode_path (current_tree, path (), p);
128  return encode_string_position (i);
129}
130
131int
132packrat_parser_rep::decode_string_position (C pos) {
133  //cout << "Decode " << pos << "\n";
134  if (pos == PACKRAT_FAILED) return -1;
135  int i=0;
136  C k=0;
137  while (i<N(current_string) && k<pos) {
138    tm_char_forwards (current_string, i);
139    k++;
140  }
141  return i;
142}
143
144path
145packrat_parser_rep::decode_path (tree t, path p, int pos) {
146  //cout << "Search " << pos << " in " << t << ", " << p << "\n";
147  //cout << "Range " << current_start[p] << " -- " << current_end[p] << "\n";
148  if (is_atomic (t)) {
149    if (current_pos_path->contains (pos))
150      return current_pos_path[pos];
151    else return p * (pos - current_start[p]);
152  }
153  else {
154    for (int i=0; i<N(t); i++)
155      if (pos >= current_start[p*i] && pos <= current_end[p*i])
156	return decode_path (t[i], p * i, pos);
157    if (pos <= current_start[p]) return p * 0;
158    if (pos >= current_end[p]) return p * 1;
159    return p * 0;
160  }
161}
162
163path
164packrat_parser_rep::decode_tree_position (C pos) {
165  int i= decode_string_position (pos);
166  if (i < 0) return path (i);
167  return decode_path (current_tree, path (), i);
168}
169
170/******************************************************************************
171* Packrat parsing
172******************************************************************************/
173
174bool
175starts (tree t, string s) {
176  return is_atomic (t) && starts (t->label, s);
177}
178
179C
180packrat_parser_rep::parse (C sym, C pos) {
181  D key= (((D) sym) << 32) + ((D) (sym^pos));
182  C im = current_cache [key];
183  if (im != PACKRAT_UNDEFINED) {
184    //cout << "Cached " << sym << " at " << pos << " -> " << im << LF;
185    return im;
186  }
187  current_cache (key)= PACKRAT_FAILED;
188  if (DEBUG_PACKRAT)
189    cout << "Parse " << packrat_decode[sym] << " at " << pos << INDENT << LF;
190  if (sym >= PACKRAT_TM_OPEN) {
191    array<C> inst= grammar [sym];
192    //cout << "Parse " << inst << " at " << pos << LF;
193    switch (inst[0]) {
194    case PACKRAT_OR:
195      im= PACKRAT_FAILED;
196      for (int i=1; i<N(inst); i++) {
197	im= parse (inst[i], pos);
198	if (im != PACKRAT_FAILED) break;
199      }
200      break;
201    case PACKRAT_CONCAT:
202      im= pos;
203      for (int i=1; i<N(inst); i++) {
204	im= parse (inst[i], im);
205	if (im == PACKRAT_FAILED) break;
206      }
207      break;
208    case PACKRAT_WHILE:
209      im= pos;
210      while (true) {
211	C next= parse (inst[1], im);
212	if (next == PACKRAT_FAILED || (next >= 0 && next <= im)) break;
213	im= next;
214      }
215      break;
216    case PACKRAT_REPEAT:
217      im= parse (inst[1], pos);
218      if (im != PACKRAT_FAILED)
219	while (true) {
220	  C next= parse (inst[1], im);
221	  if (next == PACKRAT_FAILED || (next >= 0 && next <= im)) break;
222	  im= next;
223	}
224      break;
225    case PACKRAT_RANGE:
226      if (pos < N (current_input) &&
227	  current_input [pos] >= inst[1] &&
228	  current_input [pos] <= inst[2])
229	im= pos + 1;
230      else im= PACKRAT_FAILED;
231      break;
232    case PACKRAT_NOT:
233      if (parse (inst[1], pos) == PACKRAT_FAILED) im= pos;
234      else im= PACKRAT_FAILED;
235      break;
236    case PACKRAT_EXCEPT:
237      im= parse (inst[1], pos);
238      if (im != PACKRAT_FAILED)
239	if (parse (inst[2], pos) != PACKRAT_FAILED)
240	  im= PACKRAT_FAILED;
241      break;
242    case PACKRAT_TM_OPEN:
243      if (pos < N (current_input) &&
244	  starts (packrat_decode[current_input[pos]], "<\\"))
245	im= pos + 1;
246      else im= PACKRAT_FAILED;
247      break;
248    case PACKRAT_TM_ANY:
249      im= pos;
250      while (true) {
251	C old= im;
252	im= parse (PACKRAT_TM_OPEN, old);
253	if (im == PACKRAT_FAILED)
254	  im= parse (PACKRAT_TM_LEAF, old);
255	else {
256	  im= parse (PACKRAT_TM_ARGS, im);
257	  if (im != PACKRAT_FAILED)
258	    im= parse (encode_token ("</>"), im);
259	}
260	if (old == im) break;
261      }
262      break;
263    case PACKRAT_TM_ARGS:
264      im= parse (PACKRAT_TM_ANY, pos);
265      while (im < N (current_input))
266	if (current_input[im] != encode_token ("<|>")) break;
267	else im= parse (PACKRAT_TM_ANY, im + 1);
268      break;
269    case PACKRAT_TM_LEAF:
270      im= pos;
271      while (im < N (current_input)) {
272	tree t= packrat_decode[current_input[im]];
273	if (starts (t, "<\\") || t == "<|>" || t == "</>") break;
274	else im++;
275      }
276      break;
277    case PACKRAT_TM_CHAR:
278      if (pos >= N (current_input)) im= PACKRAT_FAILED;
279      else {
280	tree t= packrat_decode[current_input[pos]];
281	if (starts (t, "<\\") || t == "<|>" || t == "</>") im= PACKRAT_FAILED;
282	else im= pos + 1;
283      }
284      break;
285    case PACKRAT_TM_CURSOR:
286      if (pos == current_cursor) im= pos;
287      else im= PACKRAT_FAILED;
288      break;
289    case PACKRAT_TM_FAIL:
290      im= PACKRAT_FAILED;
291      break;
292    default:
293      im= parse (inst[0], pos);
294      break;
295    }
296  }
297  else {
298    if (pos < N (current_input) && current_input[pos] == sym) im= pos + 1;
299    else im= PACKRAT_FAILED;
300  }
301  current_cache (key)= im;
302  if (DEBUG_PACKRAT)
303    cout << UNINDENT << "Parsed " << packrat_decode[sym]
304	 << " at " << pos << " -> " << im << LF;
305  return im;
306}
307
308/******************************************************************************
309* Inspecting the parse tree
310******************************************************************************/
311
312void
313packrat_parser_rep::inspect (C sym, C pos, array<C>& syms, array<C>& poss) {
314  syms= array<C> ();
315  poss= array<C> ();
316  C next= parse (sym, pos);
317  if (next == PACKRAT_FAILED) return;
318  if (sym >= PACKRAT_TM_OPEN) {
319    array<C> inst= grammar [sym];
320    //cout << "Parse " << inst << " at " << pos << LF;
321    switch (inst[0]) {
322    case PACKRAT_OR:
323      for (int i=1; i<N(inst); i++)
324	if (parse (inst[i], pos) != PACKRAT_FAILED) {
325	  inspect (inst[i], pos, syms, poss);
326	  break;
327	}
328      break;
329    case PACKRAT_CONCAT:
330      for (int i=1; i<N(inst); i++) {
331	next= parse (inst[i], pos);
332	if (next == PACKRAT_FAILED) break;
333        syms << inst[i];
334        poss << pos;
335	pos= next;
336      }
337      break;
338    case PACKRAT_WHILE:
339    case PACKRAT_REPEAT:
340      while (true) {
341        C next= parse (inst[1], pos);
342        if (next == PACKRAT_FAILED) break;
343        syms << inst[1];
344        poss << pos;
345        pos= next;
346      }
347      break;
348    case PACKRAT_RANGE:
349    case PACKRAT_NOT:
350      break;
351    case PACKRAT_EXCEPT:
352      inspect (inst[1], pos, syms, poss);
353      break;
354    case PACKRAT_TM_OPEN:
355    case PACKRAT_TM_ANY:
356    case PACKRAT_TM_ARGS:
357    case PACKRAT_TM_LEAF:
358    case PACKRAT_TM_CHAR:
359    case PACKRAT_TM_CURSOR:
360    case PACKRAT_TM_FAIL:
361      break;
362    default:
363      inspect (inst[0], pos, syms, poss);
364      break;
365    }
366  }
367}
368
369bool
370packrat_parser_rep::is_left_recursive (C sym) {
371  if (sym < PACKRAT_TM_OPEN) return false;
372  array<C> inst= grammar [sym];
373  if (inst[0] != PACKRAT_CONCAT || N(inst) != 3) return false;
374  if (inst[1] < PACKRAT_TM_OPEN) return false;
375  tree t= packrat_decode[inst[1]];
376  return is_compound (t, "symbol", 1) && ends (t[0]->label, "-head");
377}
378
379bool
380packrat_parser_rep::is_associative (C sym) {
381  static C prop= encode_symbol (compound ("property", "associativity"));
382  D key = (((D) prop) << 32) + ((D) (sym ^ prop));
383  if (!properties->contains (key)) return false;
384  return properties[key] == "associative";
385}
386
387bool
388packrat_parser_rep::is_anti_associative (C sym) {
389  static C prop= encode_symbol (compound ("property", "associativity"));
390  D key = (((D) prop) << 32) + ((D) (sym ^ prop));
391  if (!properties->contains (key)) return false;
392  return properties[key] == "anti-associative";
393}
394
395bool
396packrat_parser_rep::is_list_like (C sym) {
397  (void) sym;
398  return false;
399}
400
401bool
402packrat_parser_rep::is_selectable (C sym) {
403  tree t= packrat_decode[sym];
404  if (is_compound (t, "partial", 1)) return true;
405  if (!is_compound (t, "symbol", 1)) return false;
406  string s= t[0]->label;
407  return !ends (s, "-head") && !ends (s, "-tail");
408}
409
410/******************************************************************************
411* Finding all enclosing structures at a given position
412******************************************************************************/
413
414void
415packrat_parser_rep::context
416  (C sym, C pos, C w1, C w2, int mode,
417   array<C>& kind, array<C>& begin, array<C>& end)
418{
419  C next= parse (sym, pos);
420  if (next < 0 || pos > w1 || next < w2) return;
421
422  if (mode == 2 && (pos == w1 || next == w2)) {
423    static C prop= encode_symbol (compound ("property", "operator"));
424    D key = (((D) prop) << 32) + ((D) (sym ^ prop));
425    if (properties->contains (key)) return;
426  }
427
428  if (true) {
429    static C sel_prop= encode_symbol (compound ("property", "selectable"));
430    static C foc_prop= encode_symbol (compound ("property", "focus"));
431    D sel_key = (((D) sel_prop) << 32) + ((D) (sym ^ sel_prop));
432    D foc_key = (((D) foc_prop) << 32) + ((D) (sym ^ foc_prop));
433    if (properties->contains (sel_key) &&
434        properties[sel_key] == "inside");
435    else if (properties->contains (foc_key) &&
436             properties[foc_key] == "disallow" &&
437             mode == 2);
438    else {
439      int n= N(kind);
440      if (n >= 1 && begin[n-1] == pos && end[n-1] == next) {
441        if (is_selectable (sym) || !is_selectable (kind[n-1]))
442          kind[n-1]= sym;
443      }
444      else {
445        kind  << sym;
446        begin << pos;
447        end   << next;
448      }
449    }
450  }
451
452  if (mode >= 0) {
453    static C prop= encode_symbol (compound ("property", "atomic"));
454    D key = (((D) prop) << 32) + ((D) (sym ^ prop));
455    if (properties->contains (key)) return;
456  }
457
458  if (is_left_recursive (sym) && mode == 0) {
459    array<C> inst= grammar [sym];
460    C before= pos;
461    C middle= parse (inst[1], before);
462    if (middle == PACKRAT_FAILED) return;
463    C after = parse (inst[2], middle);
464    if (after == PACKRAT_FAILED) return;
465    array<C> csym;
466    array<C> cpos;
467    inspect (inst[2], middle, csym, cpos);
468    csym= append (inst[1], csym);
469    cpos= append (before, cpos);
470    cpos << after;
471    int i1, i2;
472    for (i1=0; i1<N(csym); i1++)
473      if (cpos[i1+1] > w1) break;
474    for (i2=i1; i2<N(csym); i2++)
475      if (cpos[i2+1] >= w2) break;
476    if (i1 == i2) {
477      int i, n= N(kind);
478      context (csym[i1], cpos[i1], w1, w2, mode, kind, begin, end);
479      for (i=n; i<N(kind); i++)
480        if (is_selectable (kind[i]))
481          return;
482      kind  -> resize (n);
483      begin -> resize (n);
484      end   -> resize (n);
485    }
486    C alt_start= -1;
487    while (i1 > 0) {
488      array<C> ccsym;
489      array<C> ccpos;
490      inspect (csym[i1], cpos[i1], ccsym, ccpos);
491      if (N(ccsym)>1 && is_associative (ccsym[0])) {
492        if (w1 >= ccpos[1]) alt_start= ccpos[1];
493        break;
494      }
495      if (N(ccsym)>0 && is_anti_associative (ccsym[0])) break;
496      i1--;
497    }
498    tree sel= compound ("partial", packrat_decode[sym]);
499    kind  << encode_symbol (sel);
500    begin << (alt_start<0? cpos[i1]: alt_start);
501    end   << cpos[i2+1];
502    return;
503  }
504
505  if (sym >= PACKRAT_TM_OPEN) {
506    array<C> inst= grammar [sym];
507    //cout << "Context " << inst << " at " << pos << LF;
508    switch (inst[0]) {
509    case PACKRAT_OR:
510      for (int i=1; i<N(inst); i++)
511	if (parse (inst[i], pos) != PACKRAT_FAILED) {
512	  context (inst[i], pos, w1, w2, mode, kind, begin, end);
513	  break;
514	}
515      break;
516    case PACKRAT_CONCAT:
517      for (int i=1; i<N(inst); i++) {
518	next= parse (inst[i], pos);
519	if (next == PACKRAT_FAILED) break;
520	if (pos <= w1 && w2 <= next)
521	  context (inst[i], pos, w1, w2, mode, kind, begin, end);
522	if (next > w2) break;
523	pos= next;
524      }
525      break;
526    case PACKRAT_WHILE:
527    case PACKRAT_REPEAT:
528      while (true) {
529	C next= parse (inst[1], pos);
530	if (next == PACKRAT_FAILED) break;
531	if (pos <= w1 && w2 <= next)
532	  context (inst[1], pos, w1, w2, mode, kind, begin, end);
533	if (next > w2) break;
534	pos= next;
535      }
536      break;
537    case PACKRAT_RANGE:
538    case PACKRAT_NOT:
539      break;
540    case PACKRAT_EXCEPT:
541      context (inst[1], pos, w1, w2, mode, kind, begin, end);
542      break;
543    case PACKRAT_TM_OPEN:
544    case PACKRAT_TM_ANY:
545    case PACKRAT_TM_ARGS:
546    case PACKRAT_TM_LEAF:
547    case PACKRAT_TM_CHAR:
548    case PACKRAT_TM_CURSOR:
549    case PACKRAT_TM_FAIL:
550      break;
551    default:
552      context (inst[0], pos, w1, w2, mode, kind, begin, end);
553      break;
554    }
555  }
556}
557
558void
559packrat_parser_rep::compress
560  (array<C>& kind, array<C>& begin, array<C>& end)
561{
562  array<C> new_kind, new_begin, new_end;
563  for (int i=0; i<N(kind); i++) {
564    int n= N(new_kind);
565    if (is_selectable (kind[i]))
566      if (N(new_kind) == 0 ||
567	  new_kind [n-1] != kind[i] ||
568	  (new_begin[n-1] != begin[i] && new_end[n-1] != end[i])) {
569	new_kind  << kind[i];
570	new_begin << begin[i];
571	new_end   << end[i];
572      }
573  }
574  kind = new_kind;
575  begin= new_begin;
576  end  = new_end;
577}
578
579/******************************************************************************
580* Syntax highlighting
581******************************************************************************/
582
583void
584packrat_parser_rep::highlight (tree t, path tp, path p1, path p2, int col) {
585  if (p1 == p2);
586  else if (is_atomic (t)) {
587    string s= t->label;
588    ASSERT (is_atom (p1) && is_atom (p2), "invalid selection");
589    ASSERT (0 <= p1->item && p1->item <= p2->item && p2->item <= N(s),
590	    "invalid selection");
591    attach_highlight (t, current_hl_lan, col, p1->item, p2->item);
592  }
593  else if (N(t) == 0);
594  else {
595    ASSERT (!is_nil (p1) && !is_nil (p2) && p1->item <= p2->item,
596	    "invalid selection");
597    if (p1 == path (0)) p1= path (0, 0);
598    if (p2 == path (1)) p2= path (N(t) - 1, right_index (t[N(t) -1]));
599    for (int i= max (0, p1->item); i <= min (p2->item, N(t)-1); i++) {
600      path q1= (i == p1->item? p1->next: path (0));
601      path q2= (i == p2->item? p2->next: path (right_index (t[i])));
602      highlight (t[i], tp * i, q1, q2, col);
603    }
604  }
605}
606
607void
608packrat_parser_rep::highlight (C sym, C pos) {
609  C next= parse (sym, pos);
610  if (next < 0) return;
611  if (sym >= PACKRAT_SYMBOLS) {
612    static C prop= encode_symbol (compound ("property", "highlight"));
613    D key = (((D) prop) << 32) + ((D) (sym ^ prop));
614    if (properties->contains (key)) {
615      int  col  = encode_color (properties [key]);
616      path start= decode_tree_position (pos);
617      path end  = decode_tree_position (next);
618      highlight (current_tree, path (), start, end, col);
619      static C prop= encode_symbol (compound ("property", "transparent"));
620      D key = (((D) prop) << 32) + ((D) (sym ^ prop));
621      if (!properties->contains (key)) return;
622    }
623  }
624
625  if (sym >= PACKRAT_TM_OPEN) {
626    array<C> inst= grammar [sym];
627    //cout << "Parse " << inst << " at " << pos << LF;
628    switch (inst[0]) {
629    case PACKRAT_OR:
630      for (int i=1; i<N(inst); i++)
631	if (parse (inst[i], pos) != PACKRAT_FAILED) {
632	  highlight (inst[i], pos);
633	  break;
634	}
635      break;
636    case PACKRAT_CONCAT:
637      for (int i=1; i<N(inst); i++) {
638	next= parse (inst[i], pos);
639	highlight (inst[i], pos);
640	pos= next;
641      }
642      break;
643    case PACKRAT_WHILE:
644    case PACKRAT_REPEAT:
645      while (true) {
646	C next= parse (inst[1], pos);
647	if (next == PACKRAT_FAILED) break;
648	highlight (inst[1], pos);
649	if (next == pos) break;
650	pos= next;
651      }
652      break;
653    case PACKRAT_RANGE:
654    case PACKRAT_NOT:
655      break;
656    case PACKRAT_EXCEPT:
657      highlight (inst[1], pos);
658      break;      
659    case PACKRAT_TM_OPEN:
660    case PACKRAT_TM_ANY:
661    case PACKRAT_TM_ARGS:
662    case PACKRAT_TM_LEAF:
663    case PACKRAT_TM_CHAR:
664    case PACKRAT_TM_CURSOR:
665    case PACKRAT_TM_FAIL:
666      break;
667    default:
668      highlight (inst[0], pos);
669      break;
670    }
671  }
672}
673
674/******************************************************************************
675* Memoized and accelerated highlighting
676******************************************************************************/
677
678static bool
679empty_line (tree t) {
680  if (!is_atomic (t)) return false;
681  string s= t->label;
682  for (int i=0; i<N(s); i++)
683    if (s[i] != ' ') return false;
684  return true;
685}
686
687static bool
688consistent_portion (tree t, int begin, int end) {
689  int level= 0;
690  for (int i=begin; i<end; i++)
691    if (is_atomic (t[i])) {
692      string s= t[i]->label;
693      for (int j=0; j<N(s); j++)
694	switch (s[j]) {
695	case '(': level++; break;
696	case ')': if (level <= 0) return false; level--; break;
697	case '[': level++; break;
698	case ']': if (level <= 0) return false; level--; break;
699	case '{': level++; break;
700	case '}': if (level <= 0) return false; level--; break;
701	default : break;
702	}
703    }
704  return level == 0;
705}
706
707static void
708consistent_enlargement (tree t, int& begin, int& end) {
709  while (begin > 0 || end < N(t)) {
710    while (begin > 0    && !empty_line (t[begin-1])) begin--;
711    while (end   < N(t) && !empty_line (t[end    ])) end++;
712    if (consistent_portion (t, begin, end)) return;
713    //cout << "Inconsistent " << begin << " -- " << end << "\n";
714    begin= max (0   , begin - max (end - begin, 1));
715    end  = min (N(t), end   + max (end - begin, 1));
716    //cout << "  Try " << begin << " -- " << end << "\n";
717  }
718}
719
720/******************************************************************************
721* User interface
722******************************************************************************/
723
724path
725packrat_parse (string lan, string sym, tree in) {
726  packrat_parser par= make_packrat_parser (lan, in);
727  C pos= par->parse (encode_symbol (compound ("symbol", sym)), 0);
728  return par->decode_tree_position (pos);
729}
730
731bool
732packrat_correct (string lan, string sym, tree in) {
733  packrat_parser par= make_packrat_parser (lan, in);
734  C pos= par->parse (encode_symbol (compound ("symbol", sym)), 0);
735  return pos == N(par->current_input);
736}
737
738bool
739packrat_available_path (string lan, tree in, path in_p) {
740  packrat_parser par= make_packrat_parser (lan, in);
741  return par->current_start->contains (in_p);
742}
743
744object
745packrat_context (string lan, string s, tree in, path in_pos) {
746  //cout << "Context " << in << " at " << in_pos
747  //     << " (" << lan << ", " << s << ")" << LF;
748  packrat_parser par= make_packrat_parser (lan, in);
749  C sym= encode_symbol (compound ("symbol", s));
750  if (par->parse (sym, 0) != N(par->current_input))
751    par= make_packrat_parser (lan, in, in_pos);
752  C pos= par->encode_tree_position (in_pos);
753  if (pos == PACKRAT_FAILED) return object (false);
754  array<C> kind, begin, end;
755  par->context (sym, 0, pos-1, pos+1, 0, kind, begin, end);
756  par->compress (kind, begin, end);
757  object ret= null_object ();
758  for (int i=0; i<N(kind); i++) {
759    object x1 (symbol_object (packrat_decode[kind[i]][0]->label));
760    object x2 (par->decode_tree_position (begin[i]));
761    object x3 (par->decode_tree_position (end[i]));
762    ret= cons (list_object (x1, x2, x3), ret);
763  }
764  return ret;
765}
766
767bool
768packrat_select (string lan, string s, tree in, path in_pos,
769		path& p1, path& p2, int mode)
770{
771  // mode= 0: genuine semantic selection
772  // mode= 1: strictly larger selection for select_enlarge
773  // mode= 2: determine environment rectangles
774  if (path_less (p2, p1))
775    return packrat_select (lan, s, in, in_pos, p2, p1, mode);
776  //cout << "Enlarge " << p1 << " -- " << p2 << " in " << in
777  //<< " (" << lan << ", " << s << ")" << LF;
778  packrat_parser par= make_packrat_parser (lan, in);
779  C sym = encode_symbol (compound ("symbol", s));
780  if (par->parse (sym, 0) != N(par->current_input))
781    par= make_packrat_parser (lan, in, in_pos);
782  C pos1= par->encode_tree_position (p1);
783  C pos2= par->encode_tree_position (p2);
784  //cout << "Encoded " << pos1 << " -- " << pos2
785  //     << " in " << par->current_string << LF;
786  if (par->parse (sym, 0) != N(par->current_input)) return false;
787  if (pos1 == PACKRAT_FAILED || pos2 == PACKRAT_FAILED) return false;
788  array<C> kind, begin, end;
789  C pos0= pos1;
790  if ((mode == 1 && pos1 == pos2) || mode == 2) pos0= max (pos1 - 1, 0);
791  par->context (sym, 0, pos0, pos2, mode, kind, begin, end);
792  //for (int i=0; i<N(kind); i++)
793  //  cout << i << ":\t"
794  //       << par->decode_tree_position (begin[i]) << "\t"
795  //       << par->decode_tree_position (end[i]) << "\t"
796  //       << packrat_decode[kind[i]] << LF;
797  par->compress (kind, begin, end);
798  int n= N(kind);
799  if (n == 0) return false;
800  if (mode == 1) {
801    if (pos1 == begin[n-1] && pos2 == end[n-1]) n--;
802    if (n == 0) return false;
803  }
804  p1= par->decode_tree_position (begin[n-1]);
805  p2= par->decode_tree_position (end[n-1]);
806  //cout << "Selected " << packrat_decode[kind[n-1]] << LF;
807  return true;
808}
809
810void
811packrat_highlight_subtree (string lan, string s, tree in) {
812  //cout << "Highlight " << lan << ", " << s << " in " << in << "\n";
813  int hl_lan= packrat_abbreviation (lan, s);
814  if (hl_lan == 0) return;
815  packrat_parser par= make_packrat_parser (lan, in);
816  C sym = encode_symbol (compound ("symbol", s));
817  if (par->parse (sym, 0) == N(par->current_input)) {
818    par->current_hl_lan= hl_lan;
819    par->highlight (sym, 0);
820  }
821}
822
823void
824packrat_highlight (string lan, string s, tree in) {
825  int hl_lan= packrat_abbreviation (lan, s);
826  if (hl_lan == 0) return;
827  //cout << "Highlight " << in << "\n";
828  if (is_func (in, DOCUMENT)) {
829    int i, begin, end;
830    for (begin=0; begin<N(in); begin++)
831      if (!has_highlight (in[begin], hl_lan))
832	break;
833    for (end=N(in)-1; end>begin; end--)
834      if (!has_highlight (in[end-1], hl_lan))
835	break;
836    consistent_enlargement (in, begin, end);    
837    for (i=begin; i<end; i++)
838      detach_highlight (in[i], hl_lan);
839    attach_highlight (in, hl_lan);
840    packrat_highlight_subtree (lan, s, in (begin, end));
841  }
842  else {
843    if (is_compound (in))
844      for (int i=0; i<N(in); i++)
845	detach_highlight (in[i], hl_lan);
846    attach_highlight (in, hl_lan);
847    packrat_highlight_subtree (lan, s, in);
848  }
849}