PageRenderTime 55ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 0ms

/TeXmacs-1.0.7.11-src/src/System/Language/packrat_parser.cpp

#
C++ | 849 lines | 800 code | 20 blank | 29 comment | 90 complexity | 084ccf6226188647a76e89dec04e5fa3 MD5 | raw file
Possible License(s): GPL-3.0, GPL-2.0, MPL-2.0-no-copyleft-exception
  1. /******************************************************************************
  2. * MODULE : packrat_parser.cpp
  3. * DESCRIPTION: efficient packrat parsing
  4. * COPYRIGHT : (C) 2010 Joris van der Hoeven
  5. *******************************************************************************
  6. * This software falls under the GNU general public license version 3 or later.
  7. * It comes WITHOUT ANY WARRANTY WHATSOEVER. For details, see the file LICENSE
  8. * in the root directory or <http://www.gnu.org/licenses/gpl-3.0.html>.
  9. ******************************************************************************/
  10. #include "packrat_parser.hpp"
  11. #include "analyze.hpp"
  12. #include "drd_std.hpp"
  13. extern tree the_et;
  14. bool packrat_invalid_colors= false;
  15. /******************************************************************************
  16. * Constructor
  17. ******************************************************************************/
  18. packrat_parser_rep::packrat_parser_rep (packrat_grammar gr):
  19. grammar (gr->grammar),
  20. productions (gr->productions),
  21. properties (gr->properties),
  22. current_tree (packrat_uninit),
  23. current_string (""),
  24. current_start (-1),
  25. current_end (-1),
  26. current_path_pos (-1),
  27. current_pos_path (-1),
  28. current_cursor (-1),
  29. current_input (),
  30. current_cache (PACKRAT_UNDEFINED),
  31. current_production (packrat_uninit) {}
  32. packrat_parser
  33. make_packrat_parser (string lan, tree in) {
  34. static string last_lan = "";
  35. static tree last_in = "";
  36. static packrat_parser last_par;
  37. if (lan != last_lan || in != last_in) {
  38. packrat_grammar gr= find_packrat_grammar (lan);
  39. last_lan = lan;
  40. last_in = copy (in);
  41. last_par = packrat_parser (gr, in);
  42. }
  43. return last_par;
  44. }
  45. packrat_parser
  46. make_packrat_parser (string lan, tree in, path in_pos) {
  47. static string last_lan = "";
  48. static tree last_in = "";
  49. static path last_in_pos= path ();
  50. static packrat_parser last_par;
  51. if (lan != last_lan || in != last_in || in_pos != last_in_pos) {
  52. packrat_grammar gr= find_packrat_grammar (lan);
  53. last_lan = lan;
  54. last_in = copy (in);
  55. last_in_pos= copy (last_in_pos);
  56. last_par = packrat_parser (gr, in, in_pos);
  57. }
  58. return last_par;
  59. }
  60. /******************************************************************************
  61. * Setting up the input
  62. ******************************************************************************/
  63. void
  64. packrat_parser_rep::set_input (tree t) {
  65. current_string= "";
  66. current_tree = t;
  67. serialize (t, path ());
  68. if (DEBUG_FLATTEN)
  69. cout << "Input " << current_string << "\n";
  70. current_input= encode_tokens (current_string);
  71. }
  72. void
  73. packrat_parser_rep::set_cursor (path p) {
  74. if (is_nil (p)) current_cursor= -1;
  75. else current_cursor= encode_tree_position (p);
  76. //cout << current_input << ", " << current_cursor << "\n";
  77. }
  78. /******************************************************************************
  79. * Encoding and decoding of cursor positions in the input
  80. ******************************************************************************/
  81. C
  82. packrat_parser_rep::encode_string_position (int i) {
  83. if (i < 0) return PACKRAT_FAILED;
  84. int j=0;
  85. C k=0;
  86. while (j<i && j<N(current_string)) {
  87. tm_char_forwards (current_string, j);
  88. k++;
  89. }
  90. return k;
  91. }
  92. int
  93. packrat_parser_rep::encode_path (tree t, path p, path pos) {
  94. //cout << "Search " << pos << " in " << t << ", " << p << "\n";
  95. //cout << "Range " << current_start[p] << " -- " << current_end[p] << "\n";
  96. if (is_nil (pos) || !current_start->contains (p)) return -1;
  97. else if (is_atomic (t)) {
  98. if (current_path_pos->contains (p * pos))
  99. return current_path_pos[p * pos];
  100. else if (pos->item < 0 || pos->item > N(t->label)) return -1;
  101. return current_start[p] + pos->item;
  102. }
  103. else {
  104. if (pos == path (0)) return current_start[p];
  105. if (pos == path (1)) return current_end[p];
  106. if (pos->item < 0 || pos->item > N(t) || is_nil (pos->next)) return -1;
  107. return encode_path (t[pos->item], p * pos->item, pos->next);
  108. }
  109. }
  110. C
  111. packrat_parser_rep::encode_tree_position (path p) {
  112. if (is_nil (p) || p->item < 0) return PACKRAT_FAILED;
  113. int i= encode_path (current_tree, path (), p);
  114. return encode_string_position (i);
  115. }
  116. int
  117. packrat_parser_rep::decode_string_position (C pos) {
  118. //cout << "Decode " << pos << "\n";
  119. if (pos == PACKRAT_FAILED) return -1;
  120. int i=0;
  121. C k=0;
  122. while (i<N(current_string) && k<pos) {
  123. tm_char_forwards (current_string, i);
  124. k++;
  125. }
  126. return i;
  127. }
  128. path
  129. packrat_parser_rep::decode_path (tree t, path p, int pos) {
  130. //cout << "Search " << pos << " in " << t << ", " << p << "\n";
  131. //cout << "Range " << current_start[p] << " -- " << current_end[p] << "\n";
  132. if (is_atomic (t)) {
  133. if (current_pos_path->contains (pos))
  134. return current_pos_path[pos];
  135. else return p * (pos - current_start[p]);
  136. }
  137. else {
  138. for (int i=0; i<N(t); i++)
  139. if (pos >= current_start[p*i] && pos <= current_end[p*i])
  140. return decode_path (t[i], p * i, pos);
  141. if (pos <= current_start[p]) return p * 0;
  142. if (pos >= current_end[p]) return p * 1;
  143. return p * 0;
  144. }
  145. }
  146. path
  147. packrat_parser_rep::decode_tree_position (C pos) {
  148. int i= decode_string_position (pos);
  149. if (i < 0) return path (i);
  150. return decode_path (current_tree, path (), i);
  151. }
  152. /******************************************************************************
  153. * Packrat parsing
  154. ******************************************************************************/
  155. bool
  156. starts (tree t, string s) {
  157. return is_atomic (t) && starts (t->label, s);
  158. }
  159. C
  160. packrat_parser_rep::parse (C sym, C pos) {
  161. D key= (((D) sym) << 32) + ((D) (sym^pos));
  162. C im = current_cache [key];
  163. if (im != PACKRAT_UNDEFINED) {
  164. //cout << "Cached " << sym << " at " << pos << " -> " << im << LF;
  165. return im;
  166. }
  167. current_cache (key)= PACKRAT_FAILED;
  168. if (DEBUG_PACKRAT)
  169. cout << "Parse " << packrat_decode[sym] << " at " << pos << INDENT << LF;
  170. if (sym >= PACKRAT_TM_OPEN) {
  171. array<C> inst= grammar [sym];
  172. //cout << "Parse " << inst << " at " << pos << LF;
  173. switch (inst[0]) {
  174. case PACKRAT_OR:
  175. im= PACKRAT_FAILED;
  176. for (int i=1; i<N(inst); i++) {
  177. im= parse (inst[i], pos);
  178. if (im != PACKRAT_FAILED) break;
  179. }
  180. break;
  181. case PACKRAT_CONCAT:
  182. im= pos;
  183. for (int i=1; i<N(inst); i++) {
  184. im= parse (inst[i], im);
  185. if (im == PACKRAT_FAILED) break;
  186. }
  187. break;
  188. case PACKRAT_WHILE:
  189. im= pos;
  190. while (true) {
  191. C next= parse (inst[1], im);
  192. if (next == PACKRAT_FAILED || (next >= 0 && next <= im)) break;
  193. im= next;
  194. }
  195. break;
  196. case PACKRAT_REPEAT:
  197. im= parse (inst[1], pos);
  198. if (im != PACKRAT_FAILED)
  199. while (true) {
  200. C next= parse (inst[1], im);
  201. if (next == PACKRAT_FAILED || (next >= 0 && next <= im)) break;
  202. im= next;
  203. }
  204. break;
  205. case PACKRAT_RANGE:
  206. if (pos < N (current_input) &&
  207. current_input [pos] >= inst[1] &&
  208. current_input [pos] <= inst[2])
  209. im= pos + 1;
  210. else im= PACKRAT_FAILED;
  211. break;
  212. case PACKRAT_NOT:
  213. if (parse (inst[1], pos) == PACKRAT_FAILED) im= pos;
  214. else im= PACKRAT_FAILED;
  215. break;
  216. case PACKRAT_EXCEPT:
  217. im= parse (inst[1], pos);
  218. if (im != PACKRAT_FAILED)
  219. if (parse (inst[2], pos) != PACKRAT_FAILED)
  220. im= PACKRAT_FAILED;
  221. break;
  222. case PACKRAT_TM_OPEN:
  223. if (pos < N (current_input) &&
  224. starts (packrat_decode[current_input[pos]], "<\\"))
  225. im= pos + 1;
  226. else im= PACKRAT_FAILED;
  227. break;
  228. case PACKRAT_TM_ANY:
  229. im= pos;
  230. while (true) {
  231. C old= im;
  232. im= parse (PACKRAT_TM_OPEN, old);
  233. if (im == PACKRAT_FAILED)
  234. im= parse (PACKRAT_TM_LEAF, old);
  235. else {
  236. im= parse (PACKRAT_TM_ARGS, im);
  237. if (im != PACKRAT_FAILED)
  238. im= parse (encode_token ("</>"), im);
  239. }
  240. if (old == im) break;
  241. }
  242. break;
  243. case PACKRAT_TM_ARGS:
  244. im= parse (PACKRAT_TM_ANY, pos);
  245. while (im < N (current_input))
  246. if (current_input[im] != encode_token ("<|>")) break;
  247. else im= parse (PACKRAT_TM_ANY, im + 1);
  248. break;
  249. case PACKRAT_TM_LEAF:
  250. im= pos;
  251. while (im < N (current_input)) {
  252. tree t= packrat_decode[current_input[im]];
  253. if (starts (t, "<\\") || t == "<|>" || t == "</>") break;
  254. else im++;
  255. }
  256. break;
  257. case PACKRAT_TM_CHAR:
  258. if (pos >= N (current_input)) im= PACKRAT_FAILED;
  259. else {
  260. tree t= packrat_decode[current_input[pos]];
  261. if (starts (t, "<\\") || t == "<|>" || t == "</>") im= PACKRAT_FAILED;
  262. else im= pos + 1;
  263. }
  264. break;
  265. case PACKRAT_TM_CURSOR:
  266. if (pos == current_cursor) im= pos;
  267. else im= PACKRAT_FAILED;
  268. break;
  269. case PACKRAT_TM_FAIL:
  270. im= PACKRAT_FAILED;
  271. break;
  272. default:
  273. im= parse (inst[0], pos);
  274. break;
  275. }
  276. }
  277. else {
  278. if (pos < N (current_input) && current_input[pos] == sym) im= pos + 1;
  279. else im= PACKRAT_FAILED;
  280. }
  281. current_cache (key)= im;
  282. if (DEBUG_PACKRAT)
  283. cout << UNINDENT << "Parsed " << packrat_decode[sym]
  284. << " at " << pos << " -> " << im << LF;
  285. return im;
  286. }
  287. /******************************************************************************
  288. * Inspecting the parse tree
  289. ******************************************************************************/
  290. void
  291. packrat_parser_rep::inspect (C sym, C pos, array<C>& syms, array<C>& poss) {
  292. syms= array<C> ();
  293. poss= array<C> ();
  294. C next= parse (sym, pos);
  295. if (next == PACKRAT_FAILED) return;
  296. if (sym >= PACKRAT_TM_OPEN) {
  297. array<C> inst= grammar [sym];
  298. //cout << "Parse " << inst << " at " << pos << LF;
  299. switch (inst[0]) {
  300. case PACKRAT_OR:
  301. for (int i=1; i<N(inst); i++)
  302. if (parse (inst[i], pos) != PACKRAT_FAILED) {
  303. inspect (inst[i], pos, syms, poss);
  304. break;
  305. }
  306. break;
  307. case PACKRAT_CONCAT:
  308. for (int i=1; i<N(inst); i++) {
  309. next= parse (inst[i], pos);
  310. if (next == PACKRAT_FAILED) break;
  311. syms << inst[i];
  312. poss << pos;
  313. pos= next;
  314. }
  315. break;
  316. case PACKRAT_WHILE:
  317. case PACKRAT_REPEAT:
  318. while (true) {
  319. C next= parse (inst[1], pos);
  320. if (next == PACKRAT_FAILED) break;
  321. syms << inst[1];
  322. poss << pos;
  323. pos= next;
  324. }
  325. break;
  326. case PACKRAT_RANGE:
  327. case PACKRAT_NOT:
  328. break;
  329. case PACKRAT_EXCEPT:
  330. inspect (inst[1], pos, syms, poss);
  331. break;
  332. case PACKRAT_TM_OPEN:
  333. case PACKRAT_TM_ANY:
  334. case PACKRAT_TM_ARGS:
  335. case PACKRAT_TM_LEAF:
  336. case PACKRAT_TM_CHAR:
  337. case PACKRAT_TM_CURSOR:
  338. case PACKRAT_TM_FAIL:
  339. break;
  340. default:
  341. inspect (inst[0], pos, syms, poss);
  342. break;
  343. }
  344. }
  345. }
  346. bool
  347. packrat_parser_rep::is_left_recursive (C sym) {
  348. if (sym < PACKRAT_TM_OPEN) return false;
  349. array<C> inst= grammar [sym];
  350. if (inst[0] != PACKRAT_CONCAT || N(inst) != 3) return false;
  351. if (inst[1] < PACKRAT_TM_OPEN) return false;
  352. tree t= packrat_decode[inst[1]];
  353. return is_compound (t, "symbol", 1) && ends (t[0]->label, "-head");
  354. }
  355. bool
  356. packrat_parser_rep::is_associative (C sym) {
  357. static C prop= encode_symbol (compound ("property", "associativity"));
  358. D key = (((D) prop) << 32) + ((D) (sym ^ prop));
  359. if (!properties->contains (key)) return false;
  360. return properties[key] == "associative";
  361. }
  362. bool
  363. packrat_parser_rep::is_anti_associative (C sym) {
  364. static C prop= encode_symbol (compound ("property", "associativity"));
  365. D key = (((D) prop) << 32) + ((D) (sym ^ prop));
  366. if (!properties->contains (key)) return false;
  367. return properties[key] == "anti-associative";
  368. }
  369. bool
  370. packrat_parser_rep::is_list_like (C sym) {
  371. (void) sym;
  372. return false;
  373. }
  374. bool
  375. packrat_parser_rep::is_selectable (C sym) {
  376. tree t= packrat_decode[sym];
  377. if (is_compound (t, "partial", 1)) return true;
  378. if (!is_compound (t, "symbol", 1)) return false;
  379. string s= t[0]->label;
  380. return !ends (s, "-head") && !ends (s, "-tail");
  381. }
  382. /******************************************************************************
  383. * Finding all enclosing structures at a given position
  384. ******************************************************************************/
  385. void
  386. packrat_parser_rep::context
  387. (C sym, C pos, C w1, C w2, int mode,
  388. array<C>& kind, array<C>& begin, array<C>& end)
  389. {
  390. C next= parse (sym, pos);
  391. if (next < 0 || pos > w1 || next < w2) return;
  392. if (mode == 2 && (pos == w1 || next == w2)) {
  393. static C prop= encode_symbol (compound ("property", "operator"));
  394. D key = (((D) prop) << 32) + ((D) (sym ^ prop));
  395. if (properties->contains (key)) return;
  396. }
  397. if (true) {
  398. static C sel_prop= encode_symbol (compound ("property", "selectable"));
  399. static C foc_prop= encode_symbol (compound ("property", "focus"));
  400. D sel_key = (((D) sel_prop) << 32) + ((D) (sym ^ sel_prop));
  401. D foc_key = (((D) foc_prop) << 32) + ((D) (sym ^ foc_prop));
  402. if (properties->contains (sel_key) &&
  403. properties[sel_key] == "inside");
  404. else if (properties->contains (foc_key) &&
  405. properties[foc_key] == "disallow" &&
  406. mode == 2);
  407. else {
  408. int n= N(kind);
  409. if (n >= 1 && begin[n-1] == pos && end[n-1] == next) {
  410. if (is_selectable (sym) || !is_selectable (kind[n-1]))
  411. kind[n-1]= sym;
  412. }
  413. else {
  414. kind << sym;
  415. begin << pos;
  416. end << next;
  417. }
  418. }
  419. }
  420. if (mode >= 0) {
  421. static C prop= encode_symbol (compound ("property", "atomic"));
  422. D key = (((D) prop) << 32) + ((D) (sym ^ prop));
  423. if (properties->contains (key)) return;
  424. }
  425. if (is_left_recursive (sym) && mode == 0) {
  426. array<C> inst= grammar [sym];
  427. C before= pos;
  428. C middle= parse (inst[1], before);
  429. if (middle == PACKRAT_FAILED) return;
  430. C after = parse (inst[2], middle);
  431. if (after == PACKRAT_FAILED) return;
  432. array<C> csym;
  433. array<C> cpos;
  434. inspect (inst[2], middle, csym, cpos);
  435. csym= append (inst[1], csym);
  436. cpos= append (before, cpos);
  437. cpos << after;
  438. int i1, i2;
  439. for (i1=0; i1<N(csym); i1++)
  440. if (cpos[i1+1] > w1) break;
  441. for (i2=i1; i2<N(csym); i2++)
  442. if (cpos[i2+1] >= w2) break;
  443. if (i1 == i2) {
  444. int i, n= N(kind);
  445. context (csym[i1], cpos[i1], w1, w2, mode, kind, begin, end);
  446. for (i=n; i<N(kind); i++)
  447. if (is_selectable (kind[i]))
  448. return;
  449. kind -> resize (n);
  450. begin -> resize (n);
  451. end -> resize (n);
  452. }
  453. C alt_start= -1;
  454. while (i1 > 0) {
  455. array<C> ccsym;
  456. array<C> ccpos;
  457. inspect (csym[i1], cpos[i1], ccsym, ccpos);
  458. if (N(ccsym)>1 && is_associative (ccsym[0])) {
  459. if (w1 >= ccpos[1]) alt_start= ccpos[1];
  460. break;
  461. }
  462. if (N(ccsym)>0 && is_anti_associative (ccsym[0])) break;
  463. i1--;
  464. }
  465. tree sel= compound ("partial", packrat_decode[sym]);
  466. kind << encode_symbol (sel);
  467. begin << (alt_start<0? cpos[i1]: alt_start);
  468. end << cpos[i2+1];
  469. return;
  470. }
  471. if (sym >= PACKRAT_TM_OPEN) {
  472. array<C> inst= grammar [sym];
  473. //cout << "Context " << inst << " at " << pos << LF;
  474. switch (inst[0]) {
  475. case PACKRAT_OR:
  476. for (int i=1; i<N(inst); i++)
  477. if (parse (inst[i], pos) != PACKRAT_FAILED) {
  478. context (inst[i], pos, w1, w2, mode, kind, begin, end);
  479. break;
  480. }
  481. break;
  482. case PACKRAT_CONCAT:
  483. for (int i=1; i<N(inst); i++) {
  484. next= parse (inst[i], pos);
  485. if (next == PACKRAT_FAILED) break;
  486. if (pos <= w1 && w2 <= next)
  487. context (inst[i], pos, w1, w2, mode, kind, begin, end);
  488. if (next > w2) break;
  489. pos= next;
  490. }
  491. break;
  492. case PACKRAT_WHILE:
  493. case PACKRAT_REPEAT:
  494. while (true) {
  495. C next= parse (inst[1], pos);
  496. if (next == PACKRAT_FAILED) break;
  497. if (pos <= w1 && w2 <= next)
  498. context (inst[1], pos, w1, w2, mode, kind, begin, end);
  499. if (next > w2) break;
  500. pos= next;
  501. }
  502. break;
  503. case PACKRAT_RANGE:
  504. case PACKRAT_NOT:
  505. break;
  506. case PACKRAT_EXCEPT:
  507. context (inst[1], pos, w1, w2, mode, kind, begin, end);
  508. break;
  509. case PACKRAT_TM_OPEN:
  510. case PACKRAT_TM_ANY:
  511. case PACKRAT_TM_ARGS:
  512. case PACKRAT_TM_LEAF:
  513. case PACKRAT_TM_CHAR:
  514. case PACKRAT_TM_CURSOR:
  515. case PACKRAT_TM_FAIL:
  516. break;
  517. default:
  518. context (inst[0], pos, w1, w2, mode, kind, begin, end);
  519. break;
  520. }
  521. }
  522. }
  523. void
  524. packrat_parser_rep::compress
  525. (array<C>& kind, array<C>& begin, array<C>& end)
  526. {
  527. array<C> new_kind, new_begin, new_end;
  528. for (int i=0; i<N(kind); i++) {
  529. int n= N(new_kind);
  530. if (is_selectable (kind[i]))
  531. if (N(new_kind) == 0 ||
  532. new_kind [n-1] != kind[i] ||
  533. (new_begin[n-1] != begin[i] && new_end[n-1] != end[i])) {
  534. new_kind << kind[i];
  535. new_begin << begin[i];
  536. new_end << end[i];
  537. }
  538. }
  539. kind = new_kind;
  540. begin= new_begin;
  541. end = new_end;
  542. }
  543. /******************************************************************************
  544. * Syntax highlighting
  545. ******************************************************************************/
  546. void
  547. packrat_parser_rep::highlight (tree t, path tp, path p1, path p2, int col) {
  548. if (p1 == p2);
  549. else if (is_atomic (t)) {
  550. string s= t->label;
  551. ASSERT (is_atom (p1) && is_atom (p2), "invalid selection");
  552. ASSERT (0 <= p1->item && p1->item <= p2->item && p2->item <= N(s),
  553. "invalid selection");
  554. attach_highlight (t, current_hl_lan, col, p1->item, p2->item);
  555. }
  556. else if (N(t) == 0);
  557. else {
  558. ASSERT (!is_nil (p1) && !is_nil (p2) && p1->item <= p2->item,
  559. "invalid selection");
  560. if (p1 == path (0)) p1= path (0, 0);
  561. if (p2 == path (1)) p2= path (N(t) - 1, right_index (t[N(t) -1]));
  562. for (int i= max (0, p1->item); i <= min (p2->item, N(t)-1); i++) {
  563. path q1= (i == p1->item? p1->next: path (0));
  564. path q2= (i == p2->item? p2->next: path (right_index (t[i])));
  565. highlight (t[i], tp * i, q1, q2, col);
  566. }
  567. }
  568. }
  569. void
  570. packrat_parser_rep::highlight (C sym, C pos) {
  571. C next= parse (sym, pos);
  572. if (next < 0) return;
  573. if (sym >= PACKRAT_SYMBOLS) {
  574. static C prop= encode_symbol (compound ("property", "highlight"));
  575. D key = (((D) prop) << 32) + ((D) (sym ^ prop));
  576. if (properties->contains (key)) {
  577. int col = encode_color (properties [key]);
  578. path start= decode_tree_position (pos);
  579. path end = decode_tree_position (next);
  580. highlight (current_tree, path (), start, end, col);
  581. static C prop= encode_symbol (compound ("property", "transparent"));
  582. D key = (((D) prop) << 32) + ((D) (sym ^ prop));
  583. if (!properties->contains (key)) return;
  584. }
  585. }
  586. if (sym >= PACKRAT_TM_OPEN) {
  587. array<C> inst= grammar [sym];
  588. //cout << "Parse " << inst << " at " << pos << LF;
  589. switch (inst[0]) {
  590. case PACKRAT_OR:
  591. for (int i=1; i<N(inst); i++)
  592. if (parse (inst[i], pos) != PACKRAT_FAILED) {
  593. highlight (inst[i], pos);
  594. break;
  595. }
  596. break;
  597. case PACKRAT_CONCAT:
  598. for (int i=1; i<N(inst); i++) {
  599. next= parse (inst[i], pos);
  600. highlight (inst[i], pos);
  601. pos= next;
  602. }
  603. break;
  604. case PACKRAT_WHILE:
  605. case PACKRAT_REPEAT:
  606. while (true) {
  607. C next= parse (inst[1], pos);
  608. if (next == PACKRAT_FAILED) break;
  609. highlight (inst[1], pos);
  610. if (next == pos) break;
  611. pos= next;
  612. }
  613. break;
  614. case PACKRAT_RANGE:
  615. case PACKRAT_NOT:
  616. break;
  617. case PACKRAT_EXCEPT:
  618. highlight (inst[1], pos);
  619. break;
  620. case PACKRAT_TM_OPEN:
  621. case PACKRAT_TM_ANY:
  622. case PACKRAT_TM_ARGS:
  623. case PACKRAT_TM_LEAF:
  624. case PACKRAT_TM_CHAR:
  625. case PACKRAT_TM_CURSOR:
  626. case PACKRAT_TM_FAIL:
  627. break;
  628. default:
  629. highlight (inst[0], pos);
  630. break;
  631. }
  632. }
  633. }
  634. /******************************************************************************
  635. * Memoized and accelerated highlighting
  636. ******************************************************************************/
  637. static bool
  638. empty_line (tree t) {
  639. if (!is_atomic (t)) return false;
  640. string s= t->label;
  641. for (int i=0; i<N(s); i++)
  642. if (s[i] != ' ') return false;
  643. return true;
  644. }
  645. static bool
  646. consistent_portion (tree t, int begin, int end) {
  647. int level= 0;
  648. for (int i=begin; i<end; i++)
  649. if (is_atomic (t[i])) {
  650. string s= t[i]->label;
  651. for (int j=0; j<N(s); j++)
  652. switch (s[j]) {
  653. case '(': level++; break;
  654. case ')': if (level <= 0) return false; level--; break;
  655. case '[': level++; break;
  656. case ']': if (level <= 0) return false; level--; break;
  657. case '{': level++; break;
  658. case '}': if (level <= 0) return false; level--; break;
  659. default : break;
  660. }
  661. }
  662. return level == 0;
  663. }
  664. static void
  665. consistent_enlargement (tree t, int& begin, int& end) {
  666. while (begin > 0 || end < N(t)) {
  667. while (begin > 0 && !empty_line (t[begin-1])) begin--;
  668. while (end < N(t) && !empty_line (t[end ])) end++;
  669. if (consistent_portion (t, begin, end)) return;
  670. //cout << "Inconsistent " << begin << " -- " << end << "\n";
  671. begin= max (0 , begin - max (end - begin, 1));
  672. end = min (N(t), end + max (end - begin, 1));
  673. //cout << " Try " << begin << " -- " << end << "\n";
  674. }
  675. }
  676. /******************************************************************************
  677. * User interface
  678. ******************************************************************************/
  679. path
  680. packrat_parse (string lan, string sym, tree in) {
  681. packrat_parser par= make_packrat_parser (lan, in);
  682. C pos= par->parse (encode_symbol (compound ("symbol", sym)), 0);
  683. return par->decode_tree_position (pos);
  684. }
  685. bool
  686. packrat_correct (string lan, string sym, tree in) {
  687. packrat_parser par= make_packrat_parser (lan, in);
  688. C pos= par->parse (encode_symbol (compound ("symbol", sym)), 0);
  689. return pos == N(par->current_input);
  690. }
  691. bool
  692. packrat_available_path (string lan, tree in, path in_p) {
  693. packrat_parser par= make_packrat_parser (lan, in);
  694. return par->current_start->contains (in_p);
  695. }
  696. object
  697. packrat_context (string lan, string s, tree in, path in_pos) {
  698. //cout << "Context " << in << " at " << in_pos
  699. // << " (" << lan << ", " << s << ")" << LF;
  700. packrat_parser par= make_packrat_parser (lan, in);
  701. C sym= encode_symbol (compound ("symbol", s));
  702. if (par->parse (sym, 0) != N(par->current_input))
  703. par= make_packrat_parser (lan, in, in_pos);
  704. C pos= par->encode_tree_position (in_pos);
  705. if (pos == PACKRAT_FAILED) return object (false);
  706. array<C> kind, begin, end;
  707. par->context (sym, 0, pos-1, pos+1, 0, kind, begin, end);
  708. par->compress (kind, begin, end);
  709. object ret= null_object ();
  710. for (int i=0; i<N(kind); i++) {
  711. object x1 (symbol_object (packrat_decode[kind[i]][0]->label));
  712. object x2 (par->decode_tree_position (begin[i]));
  713. object x3 (par->decode_tree_position (end[i]));
  714. ret= cons (list_object (x1, x2, x3), ret);
  715. }
  716. return ret;
  717. }
  718. bool
  719. packrat_select (string lan, string s, tree in, path in_pos,
  720. path& p1, path& p2, int mode)
  721. {
  722. // mode= 0: genuine semantic selection
  723. // mode= 1: strictly larger selection for select_enlarge
  724. // mode= 2: determine environment rectangles
  725. if (path_less (p2, p1))
  726. return packrat_select (lan, s, in, in_pos, p2, p1, mode);
  727. //cout << "Enlarge " << p1 << " -- " << p2 << " in " << in
  728. //<< " (" << lan << ", " << s << ")" << LF;
  729. packrat_parser par= make_packrat_parser (lan, in);
  730. C sym = encode_symbol (compound ("symbol", s));
  731. if (par->parse (sym, 0) != N(par->current_input))
  732. par= make_packrat_parser (lan, in, in_pos);
  733. C pos1= par->encode_tree_position (p1);
  734. C pos2= par->encode_tree_position (p2);
  735. //cout << "Encoded " << pos1 << " -- " << pos2
  736. // << " in " << par->current_string << LF;
  737. if (par->parse (sym, 0) != N(par->current_input)) return false;
  738. if (pos1 == PACKRAT_FAILED || pos2 == PACKRAT_FAILED) return false;
  739. array<C> kind, begin, end;
  740. C pos0= pos1;
  741. if ((mode == 1 && pos1 == pos2) || mode == 2) pos0= max (pos1 - 1, 0);
  742. par->context (sym, 0, pos0, pos2, mode, kind, begin, end);
  743. //for (int i=0; i<N(kind); i++)
  744. // cout << i << ":\t"
  745. // << par->decode_tree_position (begin[i]) << "\t"
  746. // << par->decode_tree_position (end[i]) << "\t"
  747. // << packrat_decode[kind[i]] << LF;
  748. par->compress (kind, begin, end);
  749. int n= N(kind);
  750. if (n == 0) return false;
  751. if (mode == 1) {
  752. if (pos1 == begin[n-1] && pos2 == end[n-1]) n--;
  753. if (n == 0) return false;
  754. }
  755. p1= par->decode_tree_position (begin[n-1]);
  756. p2= par->decode_tree_position (end[n-1]);
  757. //cout << "Selected " << packrat_decode[kind[n-1]] << LF;
  758. return true;
  759. }
  760. void
  761. packrat_highlight_subtree (string lan, string s, tree in) {
  762. //cout << "Highlight " << lan << ", " << s << " in " << in << "\n";
  763. int hl_lan= packrat_abbreviation (lan, s);
  764. if (hl_lan == 0) return;
  765. packrat_parser par= make_packrat_parser (lan, in);
  766. C sym = encode_symbol (compound ("symbol", s));
  767. if (par->parse (sym, 0) == N(par->current_input)) {
  768. par->current_hl_lan= hl_lan;
  769. par->highlight (sym, 0);
  770. }
  771. }
  772. void
  773. packrat_highlight (string lan, string s, tree in) {
  774. int hl_lan= packrat_abbreviation (lan, s);
  775. if (hl_lan == 0) return;
  776. //cout << "Highlight " << in << "\n";
  777. if (is_func (in, DOCUMENT)) {
  778. int i, begin, end;
  779. for (begin=0; begin<N(in); begin++)
  780. if (!has_highlight (in[begin], hl_lan))
  781. break;
  782. for (end=N(in)-1; end>begin; end--)
  783. if (!has_highlight (in[end-1], hl_lan))
  784. break;
  785. consistent_enlargement (in, begin, end);
  786. for (i=begin; i<end; i++)
  787. detach_highlight (in[i], hl_lan);
  788. attach_highlight (in, hl_lan);
  789. packrat_highlight_subtree (lan, s, in (begin, end));
  790. }
  791. else {
  792. if (is_compound (in))
  793. for (int i=0; i<N(in); i++)
  794. detach_highlight (in[i], hl_lan);
  795. attach_highlight (in, hl_lan);
  796. packrat_highlight_subtree (lan, s, in);
  797. }
  798. }