/TeXmacs-1.0.7.11-src/src/System/Language/packrat_parser.cpp
C++ | 849 lines | 800 code | 20 blank | 29 comment | 90 complexity | 084ccf6226188647a76e89dec04e5fa3 MD5 | raw file
Possible License(s): GPL-3.0, GPL-2.0, MPL-2.0-no-copyleft-exception
1
2/******************************************************************************
3* MODULE : packrat_parser.cpp
4* DESCRIPTION: efficient packrat parsing
5* COPYRIGHT : (C) 2010 Joris van der Hoeven
6*******************************************************************************
7* This software falls under the GNU general public license version 3 or later.
8* It comes WITHOUT ANY WARRANTY WHATSOEVER. For details, see the file LICENSE
9* in the root directory or <http://www.gnu.org/licenses/gpl-3.0.html>.
10******************************************************************************/
11
12#include "packrat_parser.hpp"
13#include "analyze.hpp"
14#include "drd_std.hpp"
15
16extern tree the_et;
17bool packrat_invalid_colors= false;
18
19/******************************************************************************
20* Constructor
21******************************************************************************/
22
23packrat_parser_rep::packrat_parser_rep (packrat_grammar gr):
24 grammar (gr->grammar),
25 productions (gr->productions),
26 properties (gr->properties),
27 current_tree (packrat_uninit),
28 current_string (""),
29 current_start (-1),
30 current_end (-1),
31 current_path_pos (-1),
32 current_pos_path (-1),
33 current_cursor (-1),
34 current_input (),
35 current_cache (PACKRAT_UNDEFINED),
36 current_production (packrat_uninit) {}
37
38packrat_parser
39make_packrat_parser (string lan, tree in) {
40 static string last_lan = "";
41 static tree last_in = "";
42 static packrat_parser last_par;
43 if (lan != last_lan || in != last_in) {
44 packrat_grammar gr= find_packrat_grammar (lan);
45 last_lan = lan;
46 last_in = copy (in);
47 last_par = packrat_parser (gr, in);
48 }
49 return last_par;
50}
51
52packrat_parser
53make_packrat_parser (string lan, tree in, path in_pos) {
54 static string last_lan = "";
55 static tree last_in = "";
56 static path last_in_pos= path ();
57 static packrat_parser last_par;
58 if (lan != last_lan || in != last_in || in_pos != last_in_pos) {
59 packrat_grammar gr= find_packrat_grammar (lan);
60 last_lan = lan;
61 last_in = copy (in);
62 last_in_pos= copy (last_in_pos);
63 last_par = packrat_parser (gr, in, in_pos);
64 }
65 return last_par;
66}
67
68/******************************************************************************
69* Setting up the input
70******************************************************************************/
71
72void
73packrat_parser_rep::set_input (tree t) {
74 current_string= "";
75 current_tree = t;
76 serialize (t, path ());
77 if (DEBUG_FLATTEN)
78 cout << "Input " << current_string << "\n";
79 current_input= encode_tokens (current_string);
80}
81
82void
83packrat_parser_rep::set_cursor (path p) {
84 if (is_nil (p)) current_cursor= -1;
85 else current_cursor= encode_tree_position (p);
86 //cout << current_input << ", " << current_cursor << "\n";
87}
88
89/******************************************************************************
90* Encoding and decoding of cursor positions in the input
91******************************************************************************/
92
93C
94packrat_parser_rep::encode_string_position (int i) {
95 if (i < 0) return PACKRAT_FAILED;
96 int j=0;
97 C k=0;
98 while (j<i && j<N(current_string)) {
99 tm_char_forwards (current_string, j);
100 k++;
101 }
102 return k;
103}
104
105int
106packrat_parser_rep::encode_path (tree t, path p, path pos) {
107 //cout << "Search " << pos << " in " << t << ", " << p << "\n";
108 //cout << "Range " << current_start[p] << " -- " << current_end[p] << "\n";
109 if (is_nil (pos) || !current_start->contains (p)) return -1;
110 else if (is_atomic (t)) {
111 if (current_path_pos->contains (p * pos))
112 return current_path_pos[p * pos];
113 else if (pos->item < 0 || pos->item > N(t->label)) return -1;
114 return current_start[p] + pos->item;
115 }
116 else {
117 if (pos == path (0)) return current_start[p];
118 if (pos == path (1)) return current_end[p];
119 if (pos->item < 0 || pos->item > N(t) || is_nil (pos->next)) return -1;
120 return encode_path (t[pos->item], p * pos->item, pos->next);
121 }
122}
123
124C
125packrat_parser_rep::encode_tree_position (path p) {
126 if (is_nil (p) || p->item < 0) return PACKRAT_FAILED;
127 int i= encode_path (current_tree, path (), p);
128 return encode_string_position (i);
129}
130
131int
132packrat_parser_rep::decode_string_position (C pos) {
133 //cout << "Decode " << pos << "\n";
134 if (pos == PACKRAT_FAILED) return -1;
135 int i=0;
136 C k=0;
137 while (i<N(current_string) && k<pos) {
138 tm_char_forwards (current_string, i);
139 k++;
140 }
141 return i;
142}
143
144path
145packrat_parser_rep::decode_path (tree t, path p, int pos) {
146 //cout << "Search " << pos << " in " << t << ", " << p << "\n";
147 //cout << "Range " << current_start[p] << " -- " << current_end[p] << "\n";
148 if (is_atomic (t)) {
149 if (current_pos_path->contains (pos))
150 return current_pos_path[pos];
151 else return p * (pos - current_start[p]);
152 }
153 else {
154 for (int i=0; i<N(t); i++)
155 if (pos >= current_start[p*i] && pos <= current_end[p*i])
156 return decode_path (t[i], p * i, pos);
157 if (pos <= current_start[p]) return p * 0;
158 if (pos >= current_end[p]) return p * 1;
159 return p * 0;
160 }
161}
162
163path
164packrat_parser_rep::decode_tree_position (C pos) {
165 int i= decode_string_position (pos);
166 if (i < 0) return path (i);
167 return decode_path (current_tree, path (), i);
168}
169
170/******************************************************************************
171* Packrat parsing
172******************************************************************************/
173
174bool
175starts (tree t, string s) {
176 return is_atomic (t) && starts (t->label, s);
177}
178
179C
180packrat_parser_rep::parse (C sym, C pos) {
181 D key= (((D) sym) << 32) + ((D) (sym^pos));
182 C im = current_cache [key];
183 if (im != PACKRAT_UNDEFINED) {
184 //cout << "Cached " << sym << " at " << pos << " -> " << im << LF;
185 return im;
186 }
187 current_cache (key)= PACKRAT_FAILED;
188 if (DEBUG_PACKRAT)
189 cout << "Parse " << packrat_decode[sym] << " at " << pos << INDENT << LF;
190 if (sym >= PACKRAT_TM_OPEN) {
191 array<C> inst= grammar [sym];
192 //cout << "Parse " << inst << " at " << pos << LF;
193 switch (inst[0]) {
194 case PACKRAT_OR:
195 im= PACKRAT_FAILED;
196 for (int i=1; i<N(inst); i++) {
197 im= parse (inst[i], pos);
198 if (im != PACKRAT_FAILED) break;
199 }
200 break;
201 case PACKRAT_CONCAT:
202 im= pos;
203 for (int i=1; i<N(inst); i++) {
204 im= parse (inst[i], im);
205 if (im == PACKRAT_FAILED) break;
206 }
207 break;
208 case PACKRAT_WHILE:
209 im= pos;
210 while (true) {
211 C next= parse (inst[1], im);
212 if (next == PACKRAT_FAILED || (next >= 0 && next <= im)) break;
213 im= next;
214 }
215 break;
216 case PACKRAT_REPEAT:
217 im= parse (inst[1], pos);
218 if (im != PACKRAT_FAILED)
219 while (true) {
220 C next= parse (inst[1], im);
221 if (next == PACKRAT_FAILED || (next >= 0 && next <= im)) break;
222 im= next;
223 }
224 break;
225 case PACKRAT_RANGE:
226 if (pos < N (current_input) &&
227 current_input [pos] >= inst[1] &&
228 current_input [pos] <= inst[2])
229 im= pos + 1;
230 else im= PACKRAT_FAILED;
231 break;
232 case PACKRAT_NOT:
233 if (parse (inst[1], pos) == PACKRAT_FAILED) im= pos;
234 else im= PACKRAT_FAILED;
235 break;
236 case PACKRAT_EXCEPT:
237 im= parse (inst[1], pos);
238 if (im != PACKRAT_FAILED)
239 if (parse (inst[2], pos) != PACKRAT_FAILED)
240 im= PACKRAT_FAILED;
241 break;
242 case PACKRAT_TM_OPEN:
243 if (pos < N (current_input) &&
244 starts (packrat_decode[current_input[pos]], "<\\"))
245 im= pos + 1;
246 else im= PACKRAT_FAILED;
247 break;
248 case PACKRAT_TM_ANY:
249 im= pos;
250 while (true) {
251 C old= im;
252 im= parse (PACKRAT_TM_OPEN, old);
253 if (im == PACKRAT_FAILED)
254 im= parse (PACKRAT_TM_LEAF, old);
255 else {
256 im= parse (PACKRAT_TM_ARGS, im);
257 if (im != PACKRAT_FAILED)
258 im= parse (encode_token ("</>"), im);
259 }
260 if (old == im) break;
261 }
262 break;
263 case PACKRAT_TM_ARGS:
264 im= parse (PACKRAT_TM_ANY, pos);
265 while (im < N (current_input))
266 if (current_input[im] != encode_token ("<|>")) break;
267 else im= parse (PACKRAT_TM_ANY, im + 1);
268 break;
269 case PACKRAT_TM_LEAF:
270 im= pos;
271 while (im < N (current_input)) {
272 tree t= packrat_decode[current_input[im]];
273 if (starts (t, "<\\") || t == "<|>" || t == "</>") break;
274 else im++;
275 }
276 break;
277 case PACKRAT_TM_CHAR:
278 if (pos >= N (current_input)) im= PACKRAT_FAILED;
279 else {
280 tree t= packrat_decode[current_input[pos]];
281 if (starts (t, "<\\") || t == "<|>" || t == "</>") im= PACKRAT_FAILED;
282 else im= pos + 1;
283 }
284 break;
285 case PACKRAT_TM_CURSOR:
286 if (pos == current_cursor) im= pos;
287 else im= PACKRAT_FAILED;
288 break;
289 case PACKRAT_TM_FAIL:
290 im= PACKRAT_FAILED;
291 break;
292 default:
293 im= parse (inst[0], pos);
294 break;
295 }
296 }
297 else {
298 if (pos < N (current_input) && current_input[pos] == sym) im= pos + 1;
299 else im= PACKRAT_FAILED;
300 }
301 current_cache (key)= im;
302 if (DEBUG_PACKRAT)
303 cout << UNINDENT << "Parsed " << packrat_decode[sym]
304 << " at " << pos << " -> " << im << LF;
305 return im;
306}
307
308/******************************************************************************
309* Inspecting the parse tree
310******************************************************************************/
311
312void
313packrat_parser_rep::inspect (C sym, C pos, array<C>& syms, array<C>& poss) {
314 syms= array<C> ();
315 poss= array<C> ();
316 C next= parse (sym, pos);
317 if (next == PACKRAT_FAILED) return;
318 if (sym >= PACKRAT_TM_OPEN) {
319 array<C> inst= grammar [sym];
320 //cout << "Parse " << inst << " at " << pos << LF;
321 switch (inst[0]) {
322 case PACKRAT_OR:
323 for (int i=1; i<N(inst); i++)
324 if (parse (inst[i], pos) != PACKRAT_FAILED) {
325 inspect (inst[i], pos, syms, poss);
326 break;
327 }
328 break;
329 case PACKRAT_CONCAT:
330 for (int i=1; i<N(inst); i++) {
331 next= parse (inst[i], pos);
332 if (next == PACKRAT_FAILED) break;
333 syms << inst[i];
334 poss << pos;
335 pos= next;
336 }
337 break;
338 case PACKRAT_WHILE:
339 case PACKRAT_REPEAT:
340 while (true) {
341 C next= parse (inst[1], pos);
342 if (next == PACKRAT_FAILED) break;
343 syms << inst[1];
344 poss << pos;
345 pos= next;
346 }
347 break;
348 case PACKRAT_RANGE:
349 case PACKRAT_NOT:
350 break;
351 case PACKRAT_EXCEPT:
352 inspect (inst[1], pos, syms, poss);
353 break;
354 case PACKRAT_TM_OPEN:
355 case PACKRAT_TM_ANY:
356 case PACKRAT_TM_ARGS:
357 case PACKRAT_TM_LEAF:
358 case PACKRAT_TM_CHAR:
359 case PACKRAT_TM_CURSOR:
360 case PACKRAT_TM_FAIL:
361 break;
362 default:
363 inspect (inst[0], pos, syms, poss);
364 break;
365 }
366 }
367}
368
369bool
370packrat_parser_rep::is_left_recursive (C sym) {
371 if (sym < PACKRAT_TM_OPEN) return false;
372 array<C> inst= grammar [sym];
373 if (inst[0] != PACKRAT_CONCAT || N(inst) != 3) return false;
374 if (inst[1] < PACKRAT_TM_OPEN) return false;
375 tree t= packrat_decode[inst[1]];
376 return is_compound (t, "symbol", 1) && ends (t[0]->label, "-head");
377}
378
379bool
380packrat_parser_rep::is_associative (C sym) {
381 static C prop= encode_symbol (compound ("property", "associativity"));
382 D key = (((D) prop) << 32) + ((D) (sym ^ prop));
383 if (!properties->contains (key)) return false;
384 return properties[key] == "associative";
385}
386
387bool
388packrat_parser_rep::is_anti_associative (C sym) {
389 static C prop= encode_symbol (compound ("property", "associativity"));
390 D key = (((D) prop) << 32) + ((D) (sym ^ prop));
391 if (!properties->contains (key)) return false;
392 return properties[key] == "anti-associative";
393}
394
395bool
396packrat_parser_rep::is_list_like (C sym) {
397 (void) sym;
398 return false;
399}
400
401bool
402packrat_parser_rep::is_selectable (C sym) {
403 tree t= packrat_decode[sym];
404 if (is_compound (t, "partial", 1)) return true;
405 if (!is_compound (t, "symbol", 1)) return false;
406 string s= t[0]->label;
407 return !ends (s, "-head") && !ends (s, "-tail");
408}
409
410/******************************************************************************
411* Finding all enclosing structures at a given position
412******************************************************************************/
413
414void
415packrat_parser_rep::context
416 (C sym, C pos, C w1, C w2, int mode,
417 array<C>& kind, array<C>& begin, array<C>& end)
418{
419 C next= parse (sym, pos);
420 if (next < 0 || pos > w1 || next < w2) return;
421
422 if (mode == 2 && (pos == w1 || next == w2)) {
423 static C prop= encode_symbol (compound ("property", "operator"));
424 D key = (((D) prop) << 32) + ((D) (sym ^ prop));
425 if (properties->contains (key)) return;
426 }
427
428 if (true) {
429 static C sel_prop= encode_symbol (compound ("property", "selectable"));
430 static C foc_prop= encode_symbol (compound ("property", "focus"));
431 D sel_key = (((D) sel_prop) << 32) + ((D) (sym ^ sel_prop));
432 D foc_key = (((D) foc_prop) << 32) + ((D) (sym ^ foc_prop));
433 if (properties->contains (sel_key) &&
434 properties[sel_key] == "inside");
435 else if (properties->contains (foc_key) &&
436 properties[foc_key] == "disallow" &&
437 mode == 2);
438 else {
439 int n= N(kind);
440 if (n >= 1 && begin[n-1] == pos && end[n-1] == next) {
441 if (is_selectable (sym) || !is_selectable (kind[n-1]))
442 kind[n-1]= sym;
443 }
444 else {
445 kind << sym;
446 begin << pos;
447 end << next;
448 }
449 }
450 }
451
452 if (mode >= 0) {
453 static C prop= encode_symbol (compound ("property", "atomic"));
454 D key = (((D) prop) << 32) + ((D) (sym ^ prop));
455 if (properties->contains (key)) return;
456 }
457
458 if (is_left_recursive (sym) && mode == 0) {
459 array<C> inst= grammar [sym];
460 C before= pos;
461 C middle= parse (inst[1], before);
462 if (middle == PACKRAT_FAILED) return;
463 C after = parse (inst[2], middle);
464 if (after == PACKRAT_FAILED) return;
465 array<C> csym;
466 array<C> cpos;
467 inspect (inst[2], middle, csym, cpos);
468 csym= append (inst[1], csym);
469 cpos= append (before, cpos);
470 cpos << after;
471 int i1, i2;
472 for (i1=0; i1<N(csym); i1++)
473 if (cpos[i1+1] > w1) break;
474 for (i2=i1; i2<N(csym); i2++)
475 if (cpos[i2+1] >= w2) break;
476 if (i1 == i2) {
477 int i, n= N(kind);
478 context (csym[i1], cpos[i1], w1, w2, mode, kind, begin, end);
479 for (i=n; i<N(kind); i++)
480 if (is_selectable (kind[i]))
481 return;
482 kind -> resize (n);
483 begin -> resize (n);
484 end -> resize (n);
485 }
486 C alt_start= -1;
487 while (i1 > 0) {
488 array<C> ccsym;
489 array<C> ccpos;
490 inspect (csym[i1], cpos[i1], ccsym, ccpos);
491 if (N(ccsym)>1 && is_associative (ccsym[0])) {
492 if (w1 >= ccpos[1]) alt_start= ccpos[1];
493 break;
494 }
495 if (N(ccsym)>0 && is_anti_associative (ccsym[0])) break;
496 i1--;
497 }
498 tree sel= compound ("partial", packrat_decode[sym]);
499 kind << encode_symbol (sel);
500 begin << (alt_start<0? cpos[i1]: alt_start);
501 end << cpos[i2+1];
502 return;
503 }
504
505 if (sym >= PACKRAT_TM_OPEN) {
506 array<C> inst= grammar [sym];
507 //cout << "Context " << inst << " at " << pos << LF;
508 switch (inst[0]) {
509 case PACKRAT_OR:
510 for (int i=1; i<N(inst); i++)
511 if (parse (inst[i], pos) != PACKRAT_FAILED) {
512 context (inst[i], pos, w1, w2, mode, kind, begin, end);
513 break;
514 }
515 break;
516 case PACKRAT_CONCAT:
517 for (int i=1; i<N(inst); i++) {
518 next= parse (inst[i], pos);
519 if (next == PACKRAT_FAILED) break;
520 if (pos <= w1 && w2 <= next)
521 context (inst[i], pos, w1, w2, mode, kind, begin, end);
522 if (next > w2) break;
523 pos= next;
524 }
525 break;
526 case PACKRAT_WHILE:
527 case PACKRAT_REPEAT:
528 while (true) {
529 C next= parse (inst[1], pos);
530 if (next == PACKRAT_FAILED) break;
531 if (pos <= w1 && w2 <= next)
532 context (inst[1], pos, w1, w2, mode, kind, begin, end);
533 if (next > w2) break;
534 pos= next;
535 }
536 break;
537 case PACKRAT_RANGE:
538 case PACKRAT_NOT:
539 break;
540 case PACKRAT_EXCEPT:
541 context (inst[1], pos, w1, w2, mode, kind, begin, end);
542 break;
543 case PACKRAT_TM_OPEN:
544 case PACKRAT_TM_ANY:
545 case PACKRAT_TM_ARGS:
546 case PACKRAT_TM_LEAF:
547 case PACKRAT_TM_CHAR:
548 case PACKRAT_TM_CURSOR:
549 case PACKRAT_TM_FAIL:
550 break;
551 default:
552 context (inst[0], pos, w1, w2, mode, kind, begin, end);
553 break;
554 }
555 }
556}
557
558void
559packrat_parser_rep::compress
560 (array<C>& kind, array<C>& begin, array<C>& end)
561{
562 array<C> new_kind, new_begin, new_end;
563 for (int i=0; i<N(kind); i++) {
564 int n= N(new_kind);
565 if (is_selectable (kind[i]))
566 if (N(new_kind) == 0 ||
567 new_kind [n-1] != kind[i] ||
568 (new_begin[n-1] != begin[i] && new_end[n-1] != end[i])) {
569 new_kind << kind[i];
570 new_begin << begin[i];
571 new_end << end[i];
572 }
573 }
574 kind = new_kind;
575 begin= new_begin;
576 end = new_end;
577}
578
579/******************************************************************************
580* Syntax highlighting
581******************************************************************************/
582
583void
584packrat_parser_rep::highlight (tree t, path tp, path p1, path p2, int col) {
585 if (p1 == p2);
586 else if (is_atomic (t)) {
587 string s= t->label;
588 ASSERT (is_atom (p1) && is_atom (p2), "invalid selection");
589 ASSERT (0 <= p1->item && p1->item <= p2->item && p2->item <= N(s),
590 "invalid selection");
591 attach_highlight (t, current_hl_lan, col, p1->item, p2->item);
592 }
593 else if (N(t) == 0);
594 else {
595 ASSERT (!is_nil (p1) && !is_nil (p2) && p1->item <= p2->item,
596 "invalid selection");
597 if (p1 == path (0)) p1= path (0, 0);
598 if (p2 == path (1)) p2= path (N(t) - 1, right_index (t[N(t) -1]));
599 for (int i= max (0, p1->item); i <= min (p2->item, N(t)-1); i++) {
600 path q1= (i == p1->item? p1->next: path (0));
601 path q2= (i == p2->item? p2->next: path (right_index (t[i])));
602 highlight (t[i], tp * i, q1, q2, col);
603 }
604 }
605}
606
607void
608packrat_parser_rep::highlight (C sym, C pos) {
609 C next= parse (sym, pos);
610 if (next < 0) return;
611 if (sym >= PACKRAT_SYMBOLS) {
612 static C prop= encode_symbol (compound ("property", "highlight"));
613 D key = (((D) prop) << 32) + ((D) (sym ^ prop));
614 if (properties->contains (key)) {
615 int col = encode_color (properties [key]);
616 path start= decode_tree_position (pos);
617 path end = decode_tree_position (next);
618 highlight (current_tree, path (), start, end, col);
619 static C prop= encode_symbol (compound ("property", "transparent"));
620 D key = (((D) prop) << 32) + ((D) (sym ^ prop));
621 if (!properties->contains (key)) return;
622 }
623 }
624
625 if (sym >= PACKRAT_TM_OPEN) {
626 array<C> inst= grammar [sym];
627 //cout << "Parse " << inst << " at " << pos << LF;
628 switch (inst[0]) {
629 case PACKRAT_OR:
630 for (int i=1; i<N(inst); i++)
631 if (parse (inst[i], pos) != PACKRAT_FAILED) {
632 highlight (inst[i], pos);
633 break;
634 }
635 break;
636 case PACKRAT_CONCAT:
637 for (int i=1; i<N(inst); i++) {
638 next= parse (inst[i], pos);
639 highlight (inst[i], pos);
640 pos= next;
641 }
642 break;
643 case PACKRAT_WHILE:
644 case PACKRAT_REPEAT:
645 while (true) {
646 C next= parse (inst[1], pos);
647 if (next == PACKRAT_FAILED) break;
648 highlight (inst[1], pos);
649 if (next == pos) break;
650 pos= next;
651 }
652 break;
653 case PACKRAT_RANGE:
654 case PACKRAT_NOT:
655 break;
656 case PACKRAT_EXCEPT:
657 highlight (inst[1], pos);
658 break;
659 case PACKRAT_TM_OPEN:
660 case PACKRAT_TM_ANY:
661 case PACKRAT_TM_ARGS:
662 case PACKRAT_TM_LEAF:
663 case PACKRAT_TM_CHAR:
664 case PACKRAT_TM_CURSOR:
665 case PACKRAT_TM_FAIL:
666 break;
667 default:
668 highlight (inst[0], pos);
669 break;
670 }
671 }
672}
673
674/******************************************************************************
675* Memoized and accelerated highlighting
676******************************************************************************/
677
678static bool
679empty_line (tree t) {
680 if (!is_atomic (t)) return false;
681 string s= t->label;
682 for (int i=0; i<N(s); i++)
683 if (s[i] != ' ') return false;
684 return true;
685}
686
687static bool
688consistent_portion (tree t, int begin, int end) {
689 int level= 0;
690 for (int i=begin; i<end; i++)
691 if (is_atomic (t[i])) {
692 string s= t[i]->label;
693 for (int j=0; j<N(s); j++)
694 switch (s[j]) {
695 case '(': level++; break;
696 case ')': if (level <= 0) return false; level--; break;
697 case '[': level++; break;
698 case ']': if (level <= 0) return false; level--; break;
699 case '{': level++; break;
700 case '}': if (level <= 0) return false; level--; break;
701 default : break;
702 }
703 }
704 return level == 0;
705}
706
707static void
708consistent_enlargement (tree t, int& begin, int& end) {
709 while (begin > 0 || end < N(t)) {
710 while (begin > 0 && !empty_line (t[begin-1])) begin--;
711 while (end < N(t) && !empty_line (t[end ])) end++;
712 if (consistent_portion (t, begin, end)) return;
713 //cout << "Inconsistent " << begin << " -- " << end << "\n";
714 begin= max (0 , begin - max (end - begin, 1));
715 end = min (N(t), end + max (end - begin, 1));
716 //cout << " Try " << begin << " -- " << end << "\n";
717 }
718}
719
720/******************************************************************************
721* User interface
722******************************************************************************/
723
724path
725packrat_parse (string lan, string sym, tree in) {
726 packrat_parser par= make_packrat_parser (lan, in);
727 C pos= par->parse (encode_symbol (compound ("symbol", sym)), 0);
728 return par->decode_tree_position (pos);
729}
730
731bool
732packrat_correct (string lan, string sym, tree in) {
733 packrat_parser par= make_packrat_parser (lan, in);
734 C pos= par->parse (encode_symbol (compound ("symbol", sym)), 0);
735 return pos == N(par->current_input);
736}
737
738bool
739packrat_available_path (string lan, tree in, path in_p) {
740 packrat_parser par= make_packrat_parser (lan, in);
741 return par->current_start->contains (in_p);
742}
743
744object
745packrat_context (string lan, string s, tree in, path in_pos) {
746 //cout << "Context " << in << " at " << in_pos
747 // << " (" << lan << ", " << s << ")" << LF;
748 packrat_parser par= make_packrat_parser (lan, in);
749 C sym= encode_symbol (compound ("symbol", s));
750 if (par->parse (sym, 0) != N(par->current_input))
751 par= make_packrat_parser (lan, in, in_pos);
752 C pos= par->encode_tree_position (in_pos);
753 if (pos == PACKRAT_FAILED) return object (false);
754 array<C> kind, begin, end;
755 par->context (sym, 0, pos-1, pos+1, 0, kind, begin, end);
756 par->compress (kind, begin, end);
757 object ret= null_object ();
758 for (int i=0; i<N(kind); i++) {
759 object x1 (symbol_object (packrat_decode[kind[i]][0]->label));
760 object x2 (par->decode_tree_position (begin[i]));
761 object x3 (par->decode_tree_position (end[i]));
762 ret= cons (list_object (x1, x2, x3), ret);
763 }
764 return ret;
765}
766
767bool
768packrat_select (string lan, string s, tree in, path in_pos,
769 path& p1, path& p2, int mode)
770{
771 // mode= 0: genuine semantic selection
772 // mode= 1: strictly larger selection for select_enlarge
773 // mode= 2: determine environment rectangles
774 if (path_less (p2, p1))
775 return packrat_select (lan, s, in, in_pos, p2, p1, mode);
776 //cout << "Enlarge " << p1 << " -- " << p2 << " in " << in
777 //<< " (" << lan << ", " << s << ")" << LF;
778 packrat_parser par= make_packrat_parser (lan, in);
779 C sym = encode_symbol (compound ("symbol", s));
780 if (par->parse (sym, 0) != N(par->current_input))
781 par= make_packrat_parser (lan, in, in_pos);
782 C pos1= par->encode_tree_position (p1);
783 C pos2= par->encode_tree_position (p2);
784 //cout << "Encoded " << pos1 << " -- " << pos2
785 // << " in " << par->current_string << LF;
786 if (par->parse (sym, 0) != N(par->current_input)) return false;
787 if (pos1 == PACKRAT_FAILED || pos2 == PACKRAT_FAILED) return false;
788 array<C> kind, begin, end;
789 C pos0= pos1;
790 if ((mode == 1 && pos1 == pos2) || mode == 2) pos0= max (pos1 - 1, 0);
791 par->context (sym, 0, pos0, pos2, mode, kind, begin, end);
792 //for (int i=0; i<N(kind); i++)
793 // cout << i << ":\t"
794 // << par->decode_tree_position (begin[i]) << "\t"
795 // << par->decode_tree_position (end[i]) << "\t"
796 // << packrat_decode[kind[i]] << LF;
797 par->compress (kind, begin, end);
798 int n= N(kind);
799 if (n == 0) return false;
800 if (mode == 1) {
801 if (pos1 == begin[n-1] && pos2 == end[n-1]) n--;
802 if (n == 0) return false;
803 }
804 p1= par->decode_tree_position (begin[n-1]);
805 p2= par->decode_tree_position (end[n-1]);
806 //cout << "Selected " << packrat_decode[kind[n-1]] << LF;
807 return true;
808}
809
810void
811packrat_highlight_subtree (string lan, string s, tree in) {
812 //cout << "Highlight " << lan << ", " << s << " in " << in << "\n";
813 int hl_lan= packrat_abbreviation (lan, s);
814 if (hl_lan == 0) return;
815 packrat_parser par= make_packrat_parser (lan, in);
816 C sym = encode_symbol (compound ("symbol", s));
817 if (par->parse (sym, 0) == N(par->current_input)) {
818 par->current_hl_lan= hl_lan;
819 par->highlight (sym, 0);
820 }
821}
822
823void
824packrat_highlight (string lan, string s, tree in) {
825 int hl_lan= packrat_abbreviation (lan, s);
826 if (hl_lan == 0) return;
827 //cout << "Highlight " << in << "\n";
828 if (is_func (in, DOCUMENT)) {
829 int i, begin, end;
830 for (begin=0; begin<N(in); begin++)
831 if (!has_highlight (in[begin], hl_lan))
832 break;
833 for (end=N(in)-1; end>begin; end--)
834 if (!has_highlight (in[end-1], hl_lan))
835 break;
836 consistent_enlargement (in, begin, end);
837 for (i=begin; i<end; i++)
838 detach_highlight (in[i], hl_lan);
839 attach_highlight (in, hl_lan);
840 packrat_highlight_subtree (lan, s, in (begin, end));
841 }
842 else {
843 if (is_compound (in))
844 for (int i=0; i<N(in); i++)
845 detach_highlight (in[i], hl_lan);
846 attach_highlight (in, hl_lan);
847 packrat_highlight_subtree (lan, s, in);
848 }
849}