PageRenderTime 60ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 0ms

/bundles/plugins-trunk/XML/sidekick/html/parser/html/HtmlParser.jj

#
Unknown | 669 lines | 607 code | 62 blank | 0 comment | 0 complexity | ddf1bd7dc765250076a03078b02b5da1 MD5 | raw file
Possible License(s): BSD-3-Clause, AGPL-1.0, Apache-2.0, LGPL-2.0, LGPL-3.0, GPL-2.0, CC-BY-SA-3.0, LGPL-2.1, GPL-3.0, MPL-2.0-no-copyleft-exception, IPL-1.0
  1. /*
  2. * HtmlParser.jj -- JavaCC grammar for HTML.
  3. * Copyright (C) 1999 Quiotix Corporation.
  4. * Copyright (C) 2011 Eric Le Lay
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License, version 2, as
  8. * published by the Free Software Foundation.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License (http://www.gnu.org/copyleft/gpl.txt)
  14. * for more details.
  15. */
  16. /*
  17. * JavaCC grammar file for HTML.
  18. *
  19. * Author: Brian Goetz, Quiotix
  20. * Version: 1.03
  21. * Revision: $Id: HtmlParser.jj 19677 2011-07-17 15:14:39Z kerik-sf $
  22. *
  23. * This grammar parses an HTML document and produces a (flat) parse "tree"
  24. * representing the document. It preserves almost all information in the
  25. * source document, including carriage control and spacing (except inside
  26. * of tags.) See the HtmlDocument and HtmlDocument.* classes for a
  27. * description of the parse tree. The parse tree supports traversal using
  28. * the commonly used "Visitor" pattern. The HtmlDumper class is a visitor
  29. * which dumps out the tree to an output stream.
  30. *
  31. * It does not require begin tags to be matched with end tags, or validate
  32. * the names or contents of the tags (this can easily be done post-parsing;
  33. * see the HtmlCollector class (which matches begin tags with end tags)
  34. * for an example.)
  35. *
  36. * Notable edge cases include:
  37. * - Quoted string processing. Quoted strings are matched inside of comments, and
  38. * as tag attribute values. Quoted strings are matched in normal text only
  39. * to the extent that they do not span line breaks.
  40. *
  41. * Please direct comments, questions, gripes or praise to
  42. * html-parser@quiotix.com. If you like it/hate it/use it, please let us know!
  43. *
  44. * danson: Modified for HtmlSideKick plugin for jEdit, added ability to handle
  45. * jsp as well as html, added locations for tags, etc. This parser works well
  46. * for most xml-based markup also.
  47. */
  48. options { IGNORE_CASE = true; STATIC = false; }
  49. PARSER_BEGIN(HtmlParser)
  50. package sidekick.html.parser.html;
  51. import java.text.MessageFormat;
  52. import java.util.*;
  53. import java.util.regex.*;
  54. import sidekick.util.*;
  55. import java.io.Reader;
  56. public class HtmlParser {
  57. static String NL = System.getProperty("line.separator");
  58. private List<ParseError> parseErrors = new ArrayList<ParseError>();
  59. public void setLineSeparator(String ls) {
  60. NL = ls;
  61. }
  62. private static String getTokenText(Token first, Token cur) {
  63. Token t;
  64. StringBuffer sb = new StringBuffer();
  65. for (t=first; t != cur.next; t = t.next) {
  66. if (t.specialToken != null) {
  67. Token tt=t.specialToken;
  68. while (tt.specialToken != null)
  69. tt = tt.specialToken;
  70. for (; tt != null; tt = tt.next)
  71. sb.append(tt.image);
  72. };
  73. sb.append(t.image);
  74. };
  75. return sb.toString();
  76. }
  77. /**
  78. * The line offset is used when the HTML to be parsed is only part of a file,
  79. * @param lineOffset The line number of the first line of the fragment.
  80. * @param columnOffset The column number of the first character of the fragment.
  81. */
  82. public HtmlParser(Reader in, int lineOffset, int columnOffset){
  83. this(in);
  84. jj_input_stream.ReInit(in,lineOffset,columnOffset);
  85. }
  86. public static void main(String[] args) throws ParseException {
  87. HtmlParser parser = new HtmlParser(System.in);
  88. HtmlDocument doc = parser.HtmlDocument();
  89. doc.accept(new HtmlDumper(System.out));
  90. System.exit(0);
  91. }
  92. public void setTabSize(int size) {
  93. jj_input_stream.setTabSize(size);
  94. }
  95. public int getTabSize() {
  96. return jj_input_stream.getTabSize(0);
  97. }
  98. private void addException(ParseException pe) {
  99. Range range = getExceptionLocation( pe );
  100. parseErrors.add(new ParseError(pe.getMessage(), range));
  101. pe.printStackTrace();
  102. }
  103. public List<ParseError> getParseErrors() {
  104. System.out.println("getParserErrors, there are " + parseErrors.size() + " errors");
  105. return parseErrors;
  106. }
  107. // regex to extract line and colun from a ParseException message
  108. // ParseException message look like: "Parse error at line 116, column 5. Encountered: }"
  109. private Pattern pePattern = Pattern.compile( "(.*?)(\\d+)(.*?)(\\d+)(.*?)" );
  110. /**
  111. * @return attempts to return a Location indicating the location of a parser
  112. * exception. If the ParseException contains a Token reference, all is well,
  113. * otherwise, this method attempts to parse the message string for the
  114. * exception.
  115. */
  116. private Range getExceptionLocation( ParseException pe ) {
  117. Token t = pe.currentToken;
  118. if ( t != null ) {
  119. return new Range( new Location( t.next.beginLine - 1, t.next.beginColumn ), new Location( t.next.endLine - 1, t.next.endColumn ) );
  120. }
  121. // ParseException message look like: "Parse error at line 116, column 5. Encountered: }"
  122. try {
  123. Matcher m = pePattern.matcher( pe.getMessage() );
  124. if ( m.matches() ) {
  125. String ln = m.group( 2 );
  126. String cn = m.group( 4 );
  127. int line_number = -1;
  128. int column_number = 0;
  129. if ( ln != null )
  130. line_number = Integer.parseInt( ln );
  131. if ( cn != null )
  132. column_number = Integer.parseInt( cn );
  133. return line_number > -1 ? new Range( new Location( line_number - 1, column_number - 1 ), new Location( line_number - 1, column_number ) ) : null;
  134. }
  135. return new Range();
  136. }
  137. catch ( Exception e ) {
  138. //e.printStackTrace();
  139. return new Range();
  140. }
  141. }
  142. // regex pattern for a valid non-quoted attribute.
  143. // Attributes can be single or double quoted, or consist solely of
  144. // letters in the range A-Z and a-z, digits (0-9), hyphens ("-"),
  145. // and periods (".")
  146. private Pattern attributePattern = Pattern.compile( "([a-zA-Z0-9.-])*" );
  147. private boolean isProperAttribute(String s) {
  148. // could have double quotes
  149. if (s.startsWith("\"") && s.endsWith("\"")) {
  150. return true;
  151. }
  152. // or single quotes
  153. else if (s.startsWith("'") && s.endsWith("'")) {
  154. return true;
  155. }
  156. // or might be jsp
  157. else if (s.startsWith("<%") && (s.endsWith("%>") || s.endsWith("%")) ) {
  158. return true;
  159. }
  160. boolean rtn = attributePattern.matcher(s).matches();
  161. if (rtn == false) {
  162. System.out.println("bad attribute: " + s);
  163. }
  164. return rtn;
  165. }
  166. }
  167. PARSER_END(HtmlParser)
  168. MORE:
  169. {
  170. "<%" : IN_JSP_EXP
  171. }
  172. <IN_JSP_EXP>
  173. SPECIAL_TOKEN :
  174. {
  175. <JSP_EXP_END: "%>" > : DEFAULT
  176. }
  177. <IN_JSP_EXP>
  178. MORE :
  179. {
  180. < ~[] >
  181. }
  182. <*> TOKEN :
  183. {
  184. <#ALPHA_CHAR: [
  185. "\u0024",
  186. "\u0041"-"\u005a",
  187. "\u005f",
  188. "\u0061"-"\u007a",
  189. "\u00c0"-"\u00d6", // Latin with diacritics
  190. "\u00d8"-"\u00f6", // Latin with diacritics
  191. "\u00f8"-"\u00ff", // Latin with diacritics
  192. "\u0100"-"\u1fff", // Latin Extended-A through Greek Extended
  193. "\u3040"-"\u318f", // Hiragana through Hangul Compatibility Jamo
  194. "\u3300"-"\u337f", // CJK Compatibility
  195. "\u3400"-"\u3d2d", // CJK Unified Ideographs Extension A
  196. "\u4e00"-"\u9fff", // CJK Unified Ideographs
  197. "\uf900"-"\ufaff" ] > // CJK Compatibility Ideographs
  198. | <#NUM_CHAR: ["0"-"9"] >
  199. | <#ALPHANUM_CHAR: [ "a"-"z", "A"-"Z", "0"-"9" ] >
  200. | <#IDENTIFIER_CHAR: [ "a"-"z", "A"-"Z", "0"-"9", "_", "-", ".", ":" ] >
  201. | <#IDENTIFIER: <ALPHA_CHAR> (<IDENTIFIER_CHAR>)* >
  202. | <#STYLE_IDENTIFIER: (<ALPHA_CHAR>)+ >
  203. | <#QUOTED_STRING_NB: ( "'" ( ~["'", "\r", "\n"] )* "'" )
  204. | ( "\"" ( ~["\"", "\r", "\n"] )* "\"" ) >
  205. | <#QUOTED_STRING: ( "'" ( ~["'"] )* "'" ) | ( "\"" ( ~["\""] )* "\"" ) >
  206. | <#WHITESPACE: ( " " | "\t" | "\n" | "\r" ) >
  207. | <#NEWLINE: ( "\r\n" | "\r" | "\n" ) >
  208. | <#QUOTE: ( "'" | "\"" ) >
  209. }
  210. <DEFAULT> TOKEN :
  211. {
  212. <EOL: ( " " | "\t" )* <NEWLINE> >
  213. | <COMMENT_START: "<!--" | "<%--" > : LexComment
  214. | <ENDTAG_START: "</" | "</" <IDENTIFIER> ":" > : LexStartTag
  215. | <TAG_START: "<" | "<%@" | "<" <IDENTIFIER> ":" > : LexStartTag
  216. | <DECL_START: "<!" > : LexDecl
  217. | <PCDATA: ( ~["<", "\r", "\n"] )+ >
  218. }
  219. <LexStartTag> SPECIAL_TOKEN :
  220. {
  221. < (<WHITESPACE>)+ >
  222. }
  223. <LexStartTag> TOKEN :
  224. {
  225. <TAG_SCRIPT: "SCRIPT"> : LexInTag
  226. | <TAG_STYLE: "STYLE"> : LexInTag
  227. | <TAG_NAME: <IDENTIFIER> > : LexInTag
  228. | <LST_ERROR: ~[]> : DEFAULT
  229. }
  230. <LexInTag> SPECIAL_TOKEN :
  231. {
  232. < (<WHITESPACE>)+ >
  233. }
  234. <LexInTag> TOKEN :
  235. {
  236. <ATTR_NAME: <IDENTIFIER> >
  237. | <TAG_END: ">" | "%>" > : DEFAULT
  238. | <TAG_SLASHEND: "/>" > : DEFAULT
  239. | <ATTR_EQ: "=" > : LexAttrVal
  240. | <IMPLICIT_TAG_END: "<">
  241. {
  242. Token t = new Token();
  243. t.image = "<";
  244. t.kind = TAG_START;
  245. t.next = matchedToken.next;
  246. t.beginLine = matchedToken.beginLine;
  247. t.beginColumn = matchedToken.beginColumn;
  248. t.endLine = matchedToken.endLine;
  249. t.endColumn = matchedToken.endColumn;
  250. matchedToken.next = t;
  251. matchedToken.kind = TAG_END;
  252. matchedToken.image = ">";
  253. } : LexStartTag
  254. | <LIT_ERROR: ~[]>
  255. }
  256. <LexAttrVal> SPECIAL_TOKEN :
  257. {
  258. < <WHITESPACE> >
  259. }
  260. <LexAttrVal> TOKEN :
  261. {
  262. <ATTR_VAL: <QUOTED_STRING>
  263. | ( ~[">", "\"", "'", " ", "\t", "\n", "\r"] )+ > : LexInTag
  264. | <LAV_ERROR: ~[]>
  265. }
  266. <LexComment> TOKEN :
  267. {
  268. < COMMENT_END: ("--" (" ")* ">" | "->" | "--%>" ) > : DEFAULT
  269. | < DASH: "-" >
  270. | < COMMENT_EOL: <NEWLINE> >
  271. | < COMMENT_WORD: ( (~[ "\n", "\r", "'", "\"", "-" ])+
  272. | <QUOTED_STRING_NB>
  273. | <QUOTE> ) >
  274. }
  275. <LexDecl> TOKEN :
  276. {
  277. <DECL_ANY: ( <QUOTED_STRING_NB> | <QUOTE> | ~[ ">" ] )+ >
  278. | <DECL_END: ">" > : DEFAULT
  279. }
  280. <LexScript> TOKEN : {
  281. <SCRIPT_END: "</SCRIPT>" > : DEFAULT
  282. }
  283. <LexStyle> TOKEN : {
  284. <STYLE_END: "</STYLE>" > : DEFAULT
  285. }
  286. <LexScript, LexStyle> TOKEN :
  287. {
  288. <BLOCK_EOL: <NEWLINE> >
  289. | <BLOCK_LBR: "<" >
  290. | <BLOCK_WORD: ( <QUOTED_STRING_NB>
  291. | <QUOTE>
  292. | (~[ "\n", "\r", "'", "\"", "<"])+ ) >
  293. }
  294. HtmlDocument HtmlDocument() :
  295. {
  296. HtmlDocument.ElementSequence s;
  297. }
  298. {
  299. s=ElementSequence() <EOF>
  300. { return new HtmlDocument(s); }
  301. }
  302. HtmlDocument.ElementSequence ElementSequence() :
  303. {
  304. HtmlDocument.ElementSequence s = new HtmlDocument.ElementSequence();
  305. HtmlDocument.HtmlElement h;
  306. }
  307. {
  308. ( h=Element() { s.addElement(h); } ) *
  309. { return s; }
  310. }
  311. HtmlDocument.HtmlElement Element() :
  312. {
  313. HtmlDocument.HtmlElement e;
  314. Token text;
  315. }
  316. {
  317. (
  318. LOOKAHEAD(2)
  319. e = Tag() { return e; }
  320. | e = EndTag() { return e; }
  321. | e = CommentTag() { return e; }
  322. | e = DeclTag() { return e; }
  323. | LOOKAHEAD(2)
  324. e = ScriptBlock() { return e; }
  325. | LOOKAHEAD(2)
  326. e = StyleBlock() { return e; }
  327. | LOOKAHEAD(2)
  328. <TAG_START> text=<LST_ERROR>
  329. { return new HtmlDocument.Text("<" + text.image); }
  330. | text = <PCDATA> { return new HtmlDocument.Text(text.image); }
  331. | <EOL> { return new HtmlDocument.Newline(); }
  332. )
  333. }
  334. HtmlDocument.Attribute Attribute() :
  335. {
  336. HtmlDocument.Attribute a;
  337. Token t1, t2=null;
  338. }
  339. {
  340. try {
  341. t1=<ATTR_NAME> [ <ATTR_EQ> t2=<ATTR_VAL> ]
  342. {
  343. if (t2 == null) {
  344. a = new HtmlDocument.Attribute(t1.image);
  345. a.setStartLocation(t1.beginLine, t1.beginColumn);
  346. a.setEndLocation(t1.endLine, t1.endColumn + 1);
  347. }
  348. else {
  349. a = new HtmlDocument.Attribute(t1.image, t2.image);
  350. a.setStartLocation(t1.beginLine, t1.beginColumn);
  351. a.setValueStartLocation(t2.beginLine,t2.beginColumn);
  352. a.setEndLocation(t2.endLine, t2.endColumn + 1);
  353. if (!isProperAttribute(t2.image)) {
  354. ParseException e = new ParseException("Parse error at line " + t2.beginLine + ", column " + t2.beginColumn + ". Attribute is improperly quoted." );
  355. addException(e);
  356. }
  357. }
  358. return a;
  359. }
  360. }
  361. catch(ParseException e) {
  362. addException(e);
  363. return null;
  364. }
  365. }
  366. HtmlDocument.AttributeList AttributeList() :
  367. {
  368. HtmlDocument.AttributeList alist = new HtmlDocument.AttributeList();
  369. HtmlDocument.Attribute a;
  370. }
  371. {
  372. (a=Attribute() { alist.addAttribute(a); } )*
  373. {
  374. return alist;
  375. }
  376. }
  377. HtmlDocument.HtmlElement Tag() :
  378. {
  379. Token t, et;
  380. HtmlDocument.AttributeList alist;
  381. Token firstToken = getToken(1);
  382. Token st = null;
  383. boolean isJspTag = false;
  384. }
  385. {
  386. try {
  387. st=<TAG_START> t=<TAG_NAME> alist=AttributeList()
  388. ( et=<TAG_END> | et=<TAG_SLASHEND> )
  389. {
  390. String tag_start = "<";
  391. String tag_name = "";
  392. if (st.image.startsWith("<") && st.image.endsWith(":")) {
  393. isJspTag = true;
  394. tag_start = "<";
  395. tag_name = st.image.substring(1) + t.image;
  396. }
  397. else {
  398. tag_name = t.image;
  399. }
  400. if (st.image.startsWith("<%")) {
  401. isJspTag = true;
  402. }
  403. HtmlDocument.Tag rtn_tag = new HtmlDocument.Tag(tag_start, tag_name, alist, et.image);
  404. if (et.kind == TAG_SLASHEND) {
  405. rtn_tag.setEmpty(true);
  406. }
  407. rtn_tag.setStartLocation(st.beginLine, st.beginColumn);
  408. rtn_tag.setEndLocation(et.endLine, et.endColumn + 1);
  409. rtn_tag.setIsJspTag(isJspTag);
  410. return rtn_tag;
  411. }
  412. }
  413. catch (ParseException ex) {
  414. addException(ex);
  415. token_source.SwitchTo(DEFAULT);
  416. String s = getTokenText(firstToken, getNextToken());
  417. return new HtmlDocument.Text(s);
  418. }
  419. }
  420. String StyleBlockContents() :
  421. {
  422. StringBuffer sb = new StringBuffer();
  423. Token t = null;
  424. }
  425. {
  426. try {
  427. ( t=<BLOCK_EOL> { sb.append(t.image); }
  428. | t=<BLOCK_WORD> { sb.append(t.image); }
  429. | t=<BLOCK_LBR> { sb.append(t.image); }
  430. )*
  431. {
  432. // don't trim content, otherwise errors in the first line will be off by
  433. // the amount of whitespace trimmed
  434. return sb.toString();
  435. }
  436. }
  437. catch(ParseException e) {
  438. addException(e);
  439. }
  440. }
  441. String ScriptBlockContents() :
  442. {
  443. //HtmlDocument.ElementSequence e = new HtmlDocument.ElementSequence();
  444. StringBuffer sb = new StringBuffer();
  445. Token t = null;
  446. }
  447. {
  448. try {
  449. ( t=<BLOCK_EOL> { sb.append(t.image); }
  450. | t=<BLOCK_WORD> { sb.append(t.image); }
  451. | t=<BLOCK_LBR> { sb.append(t.image); }
  452. )*
  453. {
  454. String contents = sb.toString();
  455. contents = contents.trim();
  456. // sometimes people wrap the contents of script tags with html comments
  457. // to protect older browsers that don't understand script tags from puking.
  458. // I'm removing them here as they don't serve a purpose as far as a jEdit
  459. // SideKick plugin is concerned.
  460. if (contents.startsWith("<!--")) {
  461. contents = contents.substring(4);
  462. }
  463. if (contents.endsWith("//-->")) {
  464. contents = contents.substring(0, contents.length() - 5);
  465. }
  466. return contents.trim();
  467. //return e;
  468. }
  469. }
  470. catch(ParseException e) {
  471. addException(e);
  472. }
  473. }
  474. HtmlDocument.HtmlElement ScriptBlock() :
  475. {
  476. HtmlDocument.AttributeList alist;
  477. Token firstToken = getToken(1);
  478. Token st, et, ts, est;
  479. String contents = "";
  480. }
  481. {
  482. try {
  483. st=<TAG_START> ts=<TAG_SCRIPT> alist=AttributeList() est=<TAG_END>
  484. {
  485. token_source.SwitchTo(LexScript);
  486. }
  487. contents=ScriptBlockContents()
  488. et=<SCRIPT_END>
  489. {
  490. HtmlDocument.Tag script = new HtmlDocument.Tag(ts.image, alist);
  491. script.setStartLocation(st.beginLine, st.beginColumn);
  492. script.setEndLocation(est.endLine, est.endColumn);
  493. HtmlDocument.EndTag endScript = new HtmlDocument.EndTag( ts.image );
  494. endScript.setStartLocation(et.beginLine, et.beginColumn);
  495. endScript.setEndLocation(et.endLine, et.endColumn);
  496. HtmlDocument.Text text = new HtmlDocument.Text(contents);
  497. HtmlDocument.ElementSequence seq = new HtmlDocument.ElementSequence();
  498. seq.addElement(text);
  499. HtmlDocument.TagBlock b = new HtmlDocument.TagBlock(script, seq, endScript);
  500. b.setStartLocation(st.beginLine, st.beginColumn);
  501. b.setEndLocation(et.endLine, et.endColumn + 1);
  502. return b;
  503. }
  504. }
  505. catch (ParseException ex) {
  506. addException(ex);
  507. token_source.SwitchTo(DEFAULT);
  508. String s = getTokenText(firstToken, getNextToken());
  509. return new HtmlDocument.Text(s);
  510. }
  511. }
  512. HtmlDocument.HtmlElement StyleBlock() :
  513. {
  514. HtmlDocument.AttributeList alist;
  515. Token firstToken = getToken(1);
  516. Token st, est, et;
  517. String contents = "";
  518. }
  519. {
  520. try {
  521. st=<TAG_START> <TAG_STYLE> alist=AttributeList() est=<TAG_END>
  522. {
  523. token_source.SwitchTo(LexStyle);
  524. }
  525. contents=StyleBlockContents()
  526. et=<STYLE_END>
  527. {
  528. HtmlDocument.Text text = new HtmlDocument.Text(contents);
  529. HtmlDocument.ElementSequence seq = new HtmlDocument.ElementSequence();
  530. seq.addElement(text);
  531. HtmlDocument.TagBlock b = new HtmlDocument.TagBlock("STYLE", alist, seq);
  532. b.setStartLocation(st.beginLine, st.beginColumn);
  533. b.setEndLocation(et.endLine, et.endColumn + 1);
  534. b.startTag.setStartLocation(st.beginLine, st.beginColumn);
  535. b.startTag.setEndLocation(est.endLine, est.endColumn + 1);
  536. return b;
  537. }
  538. }
  539. catch (ParseException ex) {
  540. addException(ex);
  541. token_source.SwitchTo(DEFAULT);
  542. String s = getTokenText(firstToken, getNextToken());
  543. return new HtmlDocument.Text(s);
  544. }
  545. }
  546. HtmlDocument.HtmlElement EndTag() :
  547. {
  548. Token t;
  549. Token firstToken = getToken(1);
  550. Token st, et;
  551. }
  552. {
  553. try {
  554. st=<ENDTAG_START> t=<TAG_NAME> et=<TAG_END>
  555. {
  556. String tag_name = "";
  557. if (st.image.startsWith("</") && st.image.endsWith(":")) {
  558. tag_name = st.image.substring(2) + t.image;
  559. }
  560. else
  561. tag_name = t.image;
  562. HtmlDocument.EndTag b = new HtmlDocument.EndTag(tag_name);
  563. b.setStartLocation(st.beginLine, st.beginColumn);
  564. b.setEndLocation(et.endLine, et.endColumn + 1);
  565. return b;
  566. }
  567. }
  568. catch (ParseException ex) {
  569. addException(ex);
  570. token_source.SwitchTo(DEFAULT);
  571. String s = getTokenText(firstToken, getNextToken());
  572. return new HtmlDocument.Text(s);
  573. }
  574. }
  575. HtmlDocument.Comment CommentTag() :
  576. {
  577. Token t, comment_start, comment_end = null;
  578. StringBuffer s = new StringBuffer();
  579. }
  580. {
  581. try {
  582. comment_start=<COMMENT_START>
  583. ( t=<DASH> { s.append(t.image); }
  584. | <COMMENT_EOL> { s.append(NL); }
  585. | t=<COMMENT_WORD> { s.append(t.image); } )*
  586. (<EOF> | comment_end=<COMMENT_END>)
  587. { return new HtmlDocument.Comment(comment_start.image + s.toString() + (comment_end == null ? "" : comment_end.image)); }
  588. }
  589. catch(ParseException e) {
  590. addException(e);
  591. }
  592. }
  593. HtmlDocument.Comment DeclTag() :
  594. {
  595. Token t;
  596. }
  597. {
  598. try {
  599. <DECL_START> t=<DECL_ANY> <DECL_END>
  600. {
  601. return new HtmlDocument.Comment(t.image);
  602. }
  603. }
  604. catch(ParseException e) {
  605. addException(e);
  606. }
  607. }