PageRenderTime 54ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 0ms

/docs/official-grammars/php/xhp/xhp_orig_when_port.l

http://github.com/facebook/pfff
LEX | 1069 lines | 956 code | 49 blank | 64 comment | 0 complexity | 91501c91788a6ca10b4353e33272c056 MD5 | raw file
Possible License(s): LGPL-2.1, GPL-2.0, LGPL-2.0, Apache-2.0
  1. /*
  2. +----------------------------------------------------------------------+
  3. | XHP |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) 1998-2010 Zend Technologies Ltd. (http://www.zend.com) |
  6. | Copyright (c) 2009 - 2010 Facebook, Inc. (http://www.facebook.com) |
  7. +----------------------------------------------------------------------+
  8. | This source file is subject to version 2.00 of the Zend license, |
  9. | that is bundled with this package in the file LICENSE.ZEND, and is |
  10. | available through the world-wide-web at the following url: |
  11. | http://www.zend.com/license/2_00.txt. |
  12. | If you did not receive a copy of the Zend license and are unable to |
  13. | obtain it through the world-wide-web, please send a note to |
  14. | license@zend.com so we can mail you a copy immediately. |
  15. +----------------------------------------------------------------------+
  16. */
  17. %{
  18. #include "xhp.hpp"
  19. #include <string.h>
  20. #define push_state(s) xhp_new_push_state(s, yyg)
  21. #define pop_state() xhp_new_pop_state(yyg)
  22. #define set_state(s) xhp_set_state(s, yyg)
  23. #define last_curly_token() (yyextra->curly_stack.empty() ? 0 : yyextra->curly_stack.top())
  24. #define last_token() yyextra->last_token
  25. #define YY_USER_ACTION \
  26. if (yyextra->terminated) \
  27. return 0; \
  28. if (!yyg->yy_more_len) \
  29. yyextra->first_lineno = yyextra->lineno;
  30. #define tok(t) \
  31. if (yyextra->has_doc_block) { \
  32. *yylval = yyextra->doc_block + code_rope(yytext, yyextra->first_lineno, yyextra->lineno - yyextra->first_lineno); \
  33. yyextra->has_doc_block = false; \
  34. } else { \
  35. *yylval = code_rope(yytext, yyextra->first_lineno, yyextra->lineno - yyextra->first_lineno); \
  36. } \
  37. return yy_token(t, yyg)
  38. #ifdef DEBUG
  39. static void yy_log_token(int tok);
  40. #define tokt(t) *yylval = t; push_state(XHP_AFTER_ENT); yy_log_token(T_XHP_TEXT); return yyextra->last_token = T_XHP_TEXT;
  41. #else
  42. #define tokt(t) *yylval = t; push_state(XHP_AFTER_ENT); return yyextra->last_token = T_XHP_TEXT;
  43. #endif
  44. #define YY_USER_INIT \
  45. if (yyextra->insert_token) { \
  46. yyg->yy_init = 0; \
  47. int ft = yyextra->insert_token; \
  48. yyextra->insert_token = 0; \
  49. return yy_token(ft, yyg); \
  50. }
  51. using namespace std;
  52. const char* yytokname(int tok);
  53. static int yy_token(int tok, struct yyguts_t* yyg);
  54. static void yy_scan_newlines(const char* text, struct yyguts_t* yyg);
  55. static bool utf8ize(uint32_t v, char* buf /* [5] */) {
  56. if (v <= 0x7f) { // 0xxxxxxx
  57. buf[0] = v;
  58. buf[1] = 0;
  59. } else if (v <= 0x7ff) { // 110yyyxx 10xxxxxx
  60. buf[0] = 0xc0 | (v >> 6);
  61. buf[1] = 0x80 | (v & 0x3f);
  62. buf[2] = 0;
  63. } else if (v <= 0xffff) { // 1110yyyy 10yyyyxx 10xxxxxx
  64. buf[0] = 0xe0 | (v >> 12);
  65. buf[1] = 0x80 | ((v >> 6) & 0x3f);
  66. buf[2] = 0x80 | (v & 0x3f);
  67. buf[3] = 0;
  68. } else if (v <= 0x1fffff) { // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
  69. buf[0] = 0xf0 | (v >> 18);
  70. buf[1] = 0x80 | ((v >> 12) & 0x3f);
  71. buf[2] = 0x80 | ((v >> 6) & 0x3f);
  72. buf[3] = 0x80 | (v & 0x3f);
  73. buf[4] = 0;
  74. } else {
  75. return false;
  76. }
  77. return true;
  78. }
  79. %}
  80. %option prefix="xhp"
  81. %option reentrant
  82. %option case-insensitive
  83. %option noyywrap nodefault
  84. %option stack
  85. %option bison-bridge
  86. %option 8bit
  87. /* I think an interactive scanner is required because of the bison state
  88. * pushing we do. I'm putting an explicit interactive declaration here in case
  89. * someone tries adding -CF or whatever to the make flags. */
  90. %option interactive
  91. %s PHP
  92. %s PHP_COMMENT
  93. %s PHP_EOL_COMMENT
  94. %s PHP_DOC_COMMENT
  95. %s PHP_HEREDOC_START
  96. %s PHP_HEREDOC_NSTART
  97. %s PHP_HEREDOC_NEWLINE
  98. %s PHP_HEREDOC_DATA
  99. %s PHP_NO_RESERVED_WORDS
  100. %s PHP_NO_RESERVED_WORDS_PERSIST
  101. %s XHP_LABEL
  102. %s XHP_LABEL_WHITESPACE
  103. %s XHP_ATTRS
  104. %s XHP_ATTR_VAL
  105. %s XHP_AFTER_ENT
  106. %s XHP_CHILD
  107. %s XHP_CHILD_START
  108. %s XHP_INVALID_ENTITY
  109. %s XHP_ATTR_TYPE_DECL
  110. %s XHP_CHILDREN_DECL
  111. LNUM [0-9]+
  112. DNUM ([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*)
  113. EXPONENT_DNUM (({LNUM}|{DNUM})[eE][+-]?{LNUM})
  114. HNUM "0x"[0-9a-fA-F]+
  115. LABEL [a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*
  116. BYTE (.|\n)
  117. WHITESPACE [ \n\r\t]+
  118. TABS_AND_SPACES [ \t]*
  119. NEWLINE ("\r\n"|"\n"|"\r")
  120. %%
  121. <XHP_ATTR_TYPE_DECL>{
  122. bool tok(T_XHP_BOOLEAN);
  123. int tok(T_XHP_NUMBER);
  124. float tok(T_XHP_FLOAT);
  125. var tok(T_VAR);
  126. array tok(T_XHP_ARRAY);
  127. string tok(T_XHP_STRING);
  128. enum tok(T_XHP_ENUM);
  129. @required tok(T_XHP_REQUIRED);
  130. "(" tok('(');
  131. ":" tok(T_XHP_COLON);
  132. }
  133. /* Open / close PHP + inline HTML */
  134. <INITIAL>{
  135. "<?php"([ \t]|{NEWLINE}) {
  136. yy_scan_newlines(yytext + 5, yyg);
  137. tok(T_OPEN_TAG);
  138. }
  139. "<?" {
  140. if (yyextra->short_tags) {
  141. tok(T_OPEN_TAG);
  142. } else {
  143. tok(T_INLINE_HTML);
  144. }
  145. }
  146. "<?=" {
  147. if (yyextra->short_tags) {
  148. tok(T_OPEN_TAG_WITH_ECHO);
  149. } else {
  150. tok(T_INLINE_HTML);
  151. }
  152. }
  153. "<%" {
  154. if (yyextra->asp_tags) {
  155. tok(T_OPEN_TAG);
  156. } else {
  157. tok(T_INLINE_HTML);
  158. }
  159. }
  160. "<%=" {
  161. if (yyextra->asp_tags) {
  162. tok(T_OPEN_TAG_WITH_ECHO);
  163. } else {
  164. tok(T_INLINE_HTML);
  165. }
  166. }
  167. "<"|[^<]* {
  168. yy_scan_newlines(yytext, yyg);
  169. tok(T_INLINE_HTML);
  170. }
  171. }
  172. <PHP,PHP_NO_RESERVED_WORDS,PHP_NO_RESERVED_WORDS_PERSIST>{
  173. ("?>"|"</script"{WHITESPACE}*">"){NEWLINE}? {
  174. yy_scan_newlines(yytext + 2, yyg);
  175. tok(T_CLOSE_TAG);
  176. }
  177. "%>" {
  178. if (yyextra->asp_tags) {
  179. tok(T_CLOSE_TAG);
  180. } else {
  181. yyless(1);
  182. tok(yytext[0]);
  183. }
  184. }
  185. }
  186. /* Comments and whitespace */
  187. <PHP,PHP_NO_RESERVED_WORDS,PHP_NO_RESERVED_WORDS_PERSIST,XHP_CHILDREN_DECL,XHP_ATTR_TYPE_DECL>{
  188. "#"|"//" {
  189. push_state(PHP_EOL_COMMENT);
  190. yymore();
  191. }
  192. "/**"{WHITESPACE} {
  193. yy_scan_newlines(yytext + 3, yyg);
  194. push_state(PHP_DOC_COMMENT);
  195. yymore();
  196. }
  197. "/*" {
  198. push_state(PHP_COMMENT);
  199. yymore();
  200. }
  201. {WHITESPACE}+ yy_scan_newlines(yytext, yyg);
  202. }
  203. <PHP_EOL_COMMENT>{
  204. {NEWLINE} {
  205. ++yyextra->lineno;
  206. pop_state();
  207. }
  208. [^\r\n?]+ yymore();
  209. "?>" {
  210. yyless(yyleng - 2);
  211. pop_state();
  212. }
  213. . yymore();
  214. }
  215. <PHP_DOC_COMMENT,PHP_COMMENT>{
  216. {NEWLINE} {
  217. ++yyextra->lineno;
  218. yymore();
  219. }
  220. [^*\r\n]+|"*" yymore();
  221. }
  222. <PHP_DOC_COMMENT>"*/" {
  223. yyextra->doc_block = code_rope(yytext, yyextra->first_lineno, yyextra->lineno - yyextra->first_lineno);
  224. yyextra->has_doc_block = true;
  225. pop_state();
  226. }
  227. <PHP_COMMENT>"*/" pop_state();
  228. /* Reserved words */
  229. <PHP>{
  230. include tok(T_INCLUDE);
  231. include_once tok(T_INCLUDE_ONCE);
  232. eval tok(T_EVAL);
  233. require tok(T_REQUIRE);
  234. require_once tok(T_REQUIRE_ONCE);
  235. or tok(T_LOGICAL_OR);
  236. xor tok(T_LOGICAL_XOR);
  237. and tok(T_LOGICAL_AND);
  238. print tok(T_PRINT);
  239. instanceof tok(T_INSTANCEOF);
  240. new tok(T_NEW);
  241. clone tok(T_CLONE);
  242. exit tok(T_EXIT);
  243. if tok(T_IF);
  244. elseif tok(T_ELSEIF);
  245. else tok(T_ELSE);
  246. endif tok(T_ENDIF);
  247. echo tok(T_ECHO);
  248. do tok(T_DO);
  249. while tok(T_WHILE);
  250. endwhile tok(T_ENDWHILE);
  251. for tok(T_FOR);
  252. endfor tok(T_ENDFOR);
  253. foreach tok(T_FOREACH);
  254. endforeach tok(T_ENDFOREACH);
  255. declare tok(T_DECLARE);
  256. enddeclare tok(T_ENDDECLARE);
  257. as tok(T_AS);
  258. switch tok(T_SWITCH);
  259. endswitch tok(T_ENDSWITCH);
  260. case tok(T_CASE);
  261. default tok(T_DEFAULT);
  262. break tok(T_BREAK);
  263. continue tok(T_CONTINUE);
  264. goto tok(T_GOTO);
  265. function tok(T_FUNCTION);
  266. const tok(T_CONST);
  267. return tok(T_RETURN);
  268. try tok(T_TRY);
  269. catch tok(T_CATCH);
  270. throw tok(T_THROW);
  271. use tok(T_USE);
  272. global tok(T_GLOBAL);
  273. static tok(T_STATIC);
  274. abstract tok(T_ABSTRACT);
  275. final tok(T_FINAL);
  276. private tok(T_PRIVATE);
  277. protected tok(T_PROTECTED);
  278. public tok(T_PUBLIC);
  279. var tok(T_VAR);
  280. unset tok(T_UNSET);
  281. isset tok(T_ISSET);
  282. empty tok(T_EMPTY);
  283. __halt_compiler tok(T_HALT_COMPILER);
  284. class tok(T_CLASS);
  285. interface tok(T_INTERFACE);
  286. extends tok(T_EXTENDS);
  287. implements tok(T_IMPLEMENTS);
  288. list tok(T_LIST);
  289. array tok(T_ARRAY);
  290. __class__ tok(T_CLASS_C);
  291. __method__ tok(T_METHOD_C);
  292. __function__ tok(T_FUNC_C);
  293. __line__ tok(T_LINE);
  294. __file__ tok(T_FILE);
  295. namespace tok(T_NAMESPACE);
  296. __namespace__ tok(T_NS_C);
  297. __dir__ tok(T_DIR);
  298. attribute {
  299. if ((last_token() == '{' || last_token() == '}' || last_token() == ';') &&
  300. (yyextra->expecting_xhp_class_statements)) {
  301. tok(T_XHP_ATTRIBUTE);
  302. } else {
  303. tok(T_STRING);
  304. }
  305. }
  306. category {
  307. if ((last_token() == '{' || last_token() == '}' || last_token() == ';') &&
  308. (yyextra->expecting_xhp_class_statements)) {
  309. tok(T_XHP_CATEGORY);
  310. } else {
  311. tok(T_STRING);
  312. }
  313. }
  314. children {
  315. if ((last_token() == '{' || last_token() == '}' || last_token() == ';') &&
  316. (yyextra->expecting_xhp_class_statements)) {
  317. tok(T_XHP_CHILDREN);
  318. } else {
  319. tok(T_STRING);
  320. }
  321. }
  322. }
  323. /* Operators */
  324. <PHP,PHP_NO_RESERVED_WORDS,PHP_NO_RESERVED_WORDS_PERSIST,XHP_ATTR_TYPE_DECL>{
  325. "+=" tok(T_PLUS_EQUAL);
  326. "-=" tok(T_MINUS_EQUAL);
  327. "*=" tok(T_MUL_EQUAL);
  328. "/=" tok(T_DIV_EQUAL);
  329. ".=" tok(T_CONCAT_EQUAL);
  330. "%=" tok(T_MOD_EQUAL);
  331. "&=" tok(T_AND_EQUAL);
  332. "|=" tok(T_OR_EQUAL);
  333. "^=" tok(T_XOR_EQUAL);
  334. "<<=" tok(T_SL_EQUAL);
  335. ">>=" tok(T_SR_EQUAL);
  336. "||" tok(T_BOOLEAN_OR);
  337. "&&" tok(T_BOOLEAN_AND);
  338. "==" tok(T_IS_EQUAL);
  339. "!="|"<>" tok(T_IS_NOT_EQUAL);
  340. "===" tok(T_IS_IDENTICAL);
  341. "!==" tok(T_IS_NOT_IDENTICAL);
  342. "<=" tok(T_IS_SMALLER_OR_EQUAL);
  343. ">=" tok(T_IS_GREATER_OR_EQUAL);
  344. "<<" tok(T_SL);
  345. ">>" tok(T_SR);
  346. "++" tok(T_INC);
  347. "--" tok(T_DEC);
  348. "->" tok(T_OBJECT_OPERATOR);
  349. "=>" tok(T_DOUBLE_ARROW);
  350. "::" tok(T_PAAMAYIM_NEKUDOTAYIM);
  351. "\\" tok(T_NS_SEPARATOR);
  352. ":" {
  353. switch (yyextra->last_token) {
  354. case ',': case '=': case '|': case '^': case '&': case '<': case '>':
  355. case '+': case '-': case '%': case '!': case '~': case '[': case '(':
  356. case '{': case '.':
  357. case T_LOGICAL_OR: case T_LOGICAL_XOR: case T_LOGICAL_AND:
  358. case T_PLUS_EQUAL: case T_MINUS_EQUAL: case T_MUL_EQUAL:
  359. case T_DIV_EQUAL: case T_CONCAT_EQUAL: case T_MOD_EQUAL:
  360. case T_AND_EQUAL: case T_OR_EQUAL: case T_XOR_EQUAL:
  361. case T_SL_EQUAL: case T_SR_EQUAL: case T_BOOLEAN_OR:
  362. case T_BOOLEAN_AND: case T_IS_EQUAL: case T_IS_NOT_EQUAL:
  363. case T_IS_IDENTICAL: case T_IS_NOT_IDENTICAL: case T_IS_SMALLER_OR_EQUAL:
  364. case T_IS_GREATER_OR_EQUAL: case T_ECHO: case T_RETURN:
  365. case T_EXTENDS: case T_INSTANCEOF: case T_DOUBLE_ARROW:
  366. case T_XHP_ATTRIBUTE:
  367. tok(T_XHP_COLON);
  368. break;
  369. default:
  370. tok(':');
  371. break;
  372. }
  373. }
  374. }
  375. /* Casts */
  376. <PHP,PHP_NO_RESERVED_WORDS,PHP_NO_RESERVED_WORDS_PERSIST>{
  377. "("{TABS_AND_SPACES}(int|integer){TABS_AND_SPACES}")" tok(T_INT_CAST);
  378. "("{TABS_AND_SPACES}(real|double|float){TABS_AND_SPACES}")" tok(T_DOUBLE_CAST);
  379. "("{TABS_AND_SPACES}string{TABS_AND_SPACES}")" tok(T_STRING_CAST);
  380. "("{TABS_AND_SPACES}unicode{TABS_AND_SPACES}")" tok(T_UNICODE_CAST);
  381. "("{TABS_AND_SPACES}binary{TABS_AND_SPACES}")" tok(T_BINARY_CAST);
  382. "("{TABS_AND_SPACES}array{TABS_AND_SPACES}")" tok(T_ARRAY_CAST);
  383. "("{TABS_AND_SPACES}object{TABS_AND_SPACES}")" tok(T_OBJECT_CAST);
  384. "("{TABS_AND_SPACES}(bool|boolean){TABS_AND_SPACES}")" tok(T_BOOL_CAST);
  385. "("{TABS_AND_SPACES}unset{TABS_AND_SPACES}")" tok(T_UNSET_CAST);
  386. }
  387. /* Scalars (parsing these doesn't really matter since we just pass them through literally) */
  388. <PHP,PHP_NO_RESERVED_WORDS,PHP_NO_RESERVED_WORDS_PERSIST,XHP_ATTR_TYPE_DECL>{
  389. {LNUM}|{HNUM} tok(T_LNUMBER);
  390. {DNUM}|{EXPONENT_DNUM} tok(T_DNUMBER);
  391. {LABEL} tok(T_STRING);
  392. "$"{LABEL} tok(T_VARIABLE);
  393. b?'(\\.|\\\n|[^\\']+)*'|b?\"(\\.|\\\n|[^\\\"]+)*\" {
  394. yy_scan_newlines(yytext, yyg);
  395. tok(T_CONSTANT_ENCAPSED_STRING);
  396. }
  397. `[^`]*` {
  398. yy_scan_newlines(yytext, yyg);
  399. tok(T_BACKTICKS_EXPR);
  400. }
  401. }
  402. /* (HERE|NOW)DOC's */
  403. <PHP,PHP_NO_RESERVED_WORDS,PHP_NO_RESERVED_WORDS_PERSIST>b?"<<<"{TABS_AND_SPACES} {
  404. push_state(PHP_HEREDOC_START);
  405. yyextra->heredoc_yyleng = yyleng;
  406. yymore();
  407. }
  408. <PHP_HEREDOC_START>{
  409. "'"{LABEL}"'"|\"{LABEL}\" {
  410. // Create a new string for the heredoc label. Since we're using yymore above
  411. // yytext will actually start at the "<<<" and not the label. Use of
  412. // heredoc_yyleng jumps past that. Then we add 1 to get past the " or '. The
  413. // match is similar to calculate length.
  414. yyextra->heredoc_label = string(yytext + yyextra->heredoc_yyleng + 1, yyleng - yyextra->heredoc_yyleng - 2);
  415. set_state(PHP_HEREDOC_NSTART);
  416. yyextra->heredoc_yyleng = yyleng;
  417. yymore();
  418. }
  419. {LABEL} {
  420. yyextra->heredoc_label = string(yytext + yyextra->heredoc_yyleng);
  421. set_state(PHP_HEREDOC_NSTART);
  422. yyextra->heredoc_yyleng = yyleng;
  423. yymore();
  424. }
  425. }
  426. <PHP_HEREDOC_NSTART>{NEWLINE} {
  427. ++yyextra->lineno;
  428. yyextra->heredoc_data = yytext + yyleng;
  429. set_state(PHP_HEREDOC_DATA);
  430. yymore();
  431. }
  432. <PHP_HEREDOC_DATA>{
  433. [^\r\n]*{NEWLINE} {
  434. ++yyextra->lineno;
  435. set_state(PHP_HEREDOC_NEWLINE);
  436. yyextra->heredoc_yyleng = yyleng;
  437. yymore();
  438. }
  439. }
  440. <PHP_HEREDOC_NEWLINE>{
  441. {LABEL};?{NEWLINE} {
  442. if (strncmp(yyextra->heredoc_label.c_str(), yytext + yyextra->heredoc_yyleng, yyextra->heredoc_label.size()) == 0) {
  443. switch (yytext[yyextra->heredoc_yyleng + yyextra->heredoc_label.size()]) {
  444. case ';': case '\n': case '\r':
  445. yyless(yyleng - (yyleng - yyextra->heredoc_yyleng - yyextra->heredoc_label.size()));
  446. pop_state();
  447. tok(T_HEREDOC);
  448. }
  449. }
  450. ++yyextra->lineno;
  451. yyextra->heredoc_yyleng = yyleng;
  452. yymore();
  453. }
  454. [^\r\n]+ {
  455. set_state(PHP_HEREDOC_DATA);
  456. yyextra->heredoc_yyleng = yyleng;
  457. yymore();
  458. }
  459. {NEWLINE} {
  460. ++yyextra->lineno;
  461. yyextra->heredoc_yyleng = yyleng;
  462. yymore();
  463. }
  464. }
  465. /* XHP */
  466. <XHP_LABEL_WHITESPACE>{
  467. {WHITESPACE}+ yy_scan_newlines(yytext, yyg);
  468. }
  469. <XHP_LABEL,XHP_LABEL_WHITESPACE>{
  470. ":" tok(T_XHP_COLON);
  471. "-" tok(T_XHP_HYPHEN);
  472. "::" {
  473. pop_state();
  474. // Hack: Please don't expect this to work: $foo = <a href={Thing::if} />;
  475. // PHP will let you use reserved words for member variables and methods, but
  476. // they are verboten in XHP classes now.
  477. //
  478. // We don't use tok() because that pushes PHP_NO_RESERVED_WORDS, which the
  479. // scanner expects to pop at some point, but XHP_LABEL (in parser.y) will pop
  480. // sooner and then you're left with an imbalanced tag stack and that's when
  481. // the fun stops.
  482. *yylval = code_rope(yytext, yyextra->first_lineno, yyextra->lineno - yyextra->first_lineno);
  483. #ifdef DEBUG
  484. yy_log_token(T_PAAMAYIM_NEKUDOTAYIM);
  485. #endif
  486. return T_PAAMAYIM_NEKUDOTAYIM;
  487. }
  488. "--" {
  489. pop_state();
  490. tok(T_DEC);
  491. }
  492. {WHITESPACE} {
  493. yy_scan_newlines(yytext, yyg);
  494. pop_state();
  495. tok(T_XHP_WHITESPACE);
  496. }
  497. {LABEL} tok(T_STRING);
  498. . {
  499. pop_state();
  500. tok(yytext[0]);
  501. }
  502. }
  503. <XHP_ATTRS>{
  504. "="|"/"|">" tok(yytext[0]);
  505. {WHITESPACE}+ yy_scan_newlines(yytext, yyg);
  506. {LABEL} tok(T_STRING);
  507. }
  508. <XHP_ATTR_VAL>{
  509. [^&'\\"]+ tok(T_XHP_TEXT);
  510. \" {
  511. pop_state();
  512. tok('"');
  513. }
  514. }
  515. <XHP_CHILD_START>{
  516. {WHITESPACE}+ {
  517. /* ignore whitespace at the start */
  518. yy_scan_newlines(yytext, yyg);
  519. unput(' ');
  520. set_state(XHP_CHILD);
  521. }
  522. . {
  523. yyless(0);
  524. set_state(XHP_CHILD);
  525. }
  526. }
  527. <XHP_CHILD,XHP_AFTER_ENT,XHP_ATTR_VAL>{
  528. /* xml entities */
  529. (?-i:&quot;) tokt("\"");
  530. (?-i:&amp;) tokt("&");
  531. (?-i:&apos;) tokt("\\'");
  532. (?-i:&lt;) tokt("<")
  533. (?-i:&gt;) tokt(">");
  534. /* html entities */
  535. (?-i:&nbsp;) tokt("\u00A0");
  536. (?-i:&iexcl;) tokt("\u00A1");
  537. (?-i:&cent;) tokt("\u00A2");
  538. (?-i:&pound;) tokt("\u00A3");
  539. (?-i:&curren;) tokt("\u00A4");
  540. (?-i:&yen;) tokt("\u00A5");
  541. (?-i:&brvbar;) tokt("\u00A6");
  542. (?-i:&sect;) tokt("\u00A7");
  543. (?-i:&uml;) tokt("\u00A8");
  544. (?-i:&copy;) tokt("\u00A9");
  545. (?-i:&ordf;) tokt("\u00AA");
  546. (?-i:&laquo;) tokt("\u00AB");
  547. (?-i:&not;) tokt("\u00AC");
  548. (?-i:&shy;) tokt("\u00AD");
  549. (?-i:&reg;) tokt("\u00AE");
  550. (?-i:&macr;) tokt("\u00AF");
  551. (?-i:&deg;) tokt("\u00B0");
  552. (?-i:&plusmn;) tokt("\u00B1");
  553. (?-i:&sup2;) tokt("\u00B2");
  554. (?-i:&sup3;) tokt("\u00B3");
  555. (?-i:&acute;) tokt("\u00B4");
  556. (?-i:&micro;) tokt("\u00B5");
  557. (?-i:&para;) tokt("\u00B6");
  558. (?-i:&middot;) tokt("\u00B7");
  559. (?-i:&cedil;) tokt("\u00B8");
  560. (?-i:&sup1;) tokt("\u00B9");
  561. (?-i:&ordm;) tokt("\u00BA");
  562. (?-i:&raquo;) tokt("\u00BB");
  563. (?-i:&frac14;) tokt("\u00BC");
  564. (?-i:&frac12;) tokt("\u00BD");
  565. (?-i:&frac34;) tokt("\u00BE");
  566. (?-i:&iquest;) tokt("\u00BF");
  567. (?-i:&Agrave;) tokt("\u00C0");
  568. (?-i:&Aacute;) tokt("\u00C1");
  569. (?-i:&Acirc;) tokt("\u00C2");
  570. (?-i:&Atilde;) tokt("\u00C3");
  571. (?-i:&Auml;) tokt("\u00C4");
  572. (?-i:&Aring;) tokt("\u00C5");
  573. (?-i:&AElig;) tokt("\u00C6");
  574. (?-i:&Ccedil;) tokt("\u00C7");
  575. (?-i:&Egrave;) tokt("\u00C8");
  576. (?-i:&Eacute;) tokt("\u00C9");
  577. (?-i:&Ecirc;) tokt("\u00CA");
  578. (?-i:&Euml;) tokt("\u00CB");
  579. (?-i:&Igrave;) tokt("\u00CC");
  580. (?-i:&Iacute;) tokt("\u00CD");
  581. (?-i:&Icirc;) tokt("\u00CE");
  582. (?-i:&Iuml;) tokt("\u00CF");
  583. (?-i:&ETH;) tokt("\u00D0");
  584. (?-i:&Ntilde;) tokt("\u00D1");
  585. (?-i:&Ograve;) tokt("\u00D2");
  586. (?-i:&Oacute;) tokt("\u00D3");
  587. (?-i:&Ocirc;) tokt("\u00D4");
  588. (?-i:&Otilde;) tokt("\u00D5");
  589. (?-i:&Ouml;) tokt("\u00D6");
  590. (?-i:&times;) tokt("\u00D7");
  591. (?-i:&Oslash;) tokt("\u00D8");
  592. (?-i:&Ugrave;) tokt("\u00D9");
  593. (?-i:&Uacute;) tokt("\u00DA");
  594. (?-i:&Ucirc;) tokt("\u00DB");
  595. (?-i:&Uuml;) tokt("\u00DC");
  596. (?-i:&Yacute;) tokt("\u00DD");
  597. (?-i:&THORN;) tokt("\u00DE");
  598. (?-i:&szlig;) tokt("\u00DF");
  599. (?-i:&agrave;) tokt("\u00E0");
  600. (?-i:&aacute;) tokt("\u00E1");
  601. (?-i:&acirc;) tokt("\u00E2");
  602. (?-i:&atilde;) tokt("\u00E3");
  603. (?-i:&auml;) tokt("\u00E4");
  604. (?-i:&aring;) tokt("\u00E5");
  605. (?-i:&aelig;) tokt("\u00E6");
  606. (?-i:&ccedil;) tokt("\u00E7");
  607. (?-i:&egrave;) tokt("\u00E8");
  608. (?-i:&eacute;) tokt("\u00E9");
  609. (?-i:&ecirc;) tokt("\u00EA");
  610. (?-i:&euml;) tokt("\u00EB");
  611. (?-i:&igrave;) tokt("\u00EC");
  612. (?-i:&iacute;) tokt("\u00ED");
  613. (?-i:&icirc;) tokt("\u00EE");
  614. (?-i:&iuml;) tokt("\u00EF");
  615. (?-i:&eth;) tokt("\u00F0");
  616. (?-i:&ntilde;) tokt("\u00F1");
  617. (?-i:&ograve;) tokt("\u00F2");
  618. (?-i:&oacute;) tokt("\u00F3");
  619. (?-i:&ocirc;) tokt("\u00F4");
  620. (?-i:&otilde;) tokt("\u00F5");
  621. (?-i:&ouml;) tokt("\u00F6");
  622. (?-i:&divide;) tokt("\u00F7");
  623. (?-i:&oslash;) tokt("\u00F8");
  624. (?-i:&ugrave;) tokt("\u00F9");
  625. (?-i:&uacute;) tokt("\u00FA");
  626. (?-i:&ucirc;) tokt("\u00FB");
  627. (?-i:&uuml;) tokt("\u00FC");
  628. (?-i:&yacute;) tokt("\u00FD");
  629. (?-i:&thorn;) tokt("\u00FE");
  630. (?-i:&yuml;) tokt("\u00FF");
  631. (?-i:&OElig;) tokt("\u0152");
  632. (?-i:&oelig;) tokt("\u0153");
  633. (?-i:&Scaron;) tokt("\u0160");
  634. (?-i:&scaron;) tokt("\u0161");
  635. (?-i:&Yuml;) tokt("\u0178");
  636. (?-i:&fnof;) tokt("\u0192");
  637. (?-i:&circ;) tokt("\u02C6");
  638. (?-i:&tilde;) tokt("\u02DC");
  639. (?-i:&Alpha;) tokt("\u0391");
  640. (?-i:&Beta;) tokt("\u0392");
  641. (?-i:&Gamma;) tokt("\u0393");
  642. (?-i:&Delta;) tokt("\u0394");
  643. (?-i:&Epsilon;) tokt("\u0395");
  644. (?-i:&Zeta;) tokt("\u0396");
  645. (?-i:&Eta;) tokt("\u0397");
  646. (?-i:&Theta;) tokt("\u0398");
  647. (?-i:&Iota;) tokt("\u0399");
  648. (?-i:&Kappa;) tokt("\u039A");
  649. (?-i:&Lambda;) tokt("\u039B");
  650. (?-i:&Mu;) tokt("\u039C");
  651. (?-i:&Nu;) tokt("\u039D");
  652. (?-i:&Xi;) tokt("\u039E");
  653. (?-i:&Omicron;) tokt("\u039F");
  654. (?-i:&Pi;) tokt("\u03A0");
  655. (?-i:&Rho;) tokt("\u03A1");
  656. (?-i:&Sigma;) tokt("\u03A3");
  657. (?-i:&Tau;) tokt("\u03A4");
  658. (?-i:&Upsilon;) tokt("\u03A5");
  659. (?-i:&Phi;) tokt("\u03A6");
  660. (?-i:&Chi;) tokt("\u03A7");
  661. (?-i:&Psi;) tokt("\u03A8");
  662. (?-i:&Omega;) tokt("\u03A9");
  663. (?-i:&alpha;) tokt("\u03B1");
  664. (?-i:&beta;) tokt("\u03B2");
  665. (?-i:&gamma;) tokt("\u03B3");
  666. (?-i:&delta;) tokt("\u03B4");
  667. (?-i:&epsilon;) tokt("\u03B5");
  668. (?-i:&zeta;) tokt("\u03B6");
  669. (?-i:&eta;) tokt("\u03B7");
  670. (?-i:&theta;) tokt("\u03B8");
  671. (?-i:&iota;) tokt("\u03B9");
  672. (?-i:&kappa;) tokt("\u03BA");
  673. (?-i:&lambda;) tokt("\u03BB");
  674. (?-i:&mu;) tokt("\u03BC");
  675. (?-i:&nu;) tokt("\u03BD");
  676. (?-i:&xi;) tokt("\u03BE");
  677. (?-i:&omicron;) tokt("\u03BF");
  678. (?-i:&pi;) tokt("\u03C0");
  679. (?-i:&rho;) tokt("\u03C1");
  680. (?-i:&sigmaf;) tokt("\u03C2");
  681. (?-i:&sigma;) tokt("\u03C3");
  682. (?-i:&tau;) tokt("\u03C4");
  683. (?-i:&upsilon;) tokt("\u03C5");
  684. (?-i:&phi;) tokt("\u03C6");
  685. (?-i:&chi;) tokt("\u03C7");
  686. (?-i:&psi;) tokt("\u03C8");
  687. (?-i:&omega;) tokt("\u03C9");
  688. (?-i:&thetasym;) tokt("\u03D1");
  689. (?-i:&upsih;) tokt("\u03D2");
  690. (?-i:&piv;) tokt("\u03D6");
  691. (?-i:&ensp;) tokt("\u2002");
  692. (?-i:&emsp;) tokt("\u2003");
  693. (?-i:&thinsp;) tokt("\u2009");
  694. (?-i:&zwnj;) tokt("\u200C");
  695. (?-i:&zwj;) tokt("\u200D");
  696. (?-i:&lrm;) tokt("\u200E");
  697. (?-i:&rlm;) tokt("\u200F");
  698. (?-i:&ndash;) tokt("\u2013");
  699. (?-i:&mdash;) tokt("\u2014");
  700. (?-i:&lsquo;) tokt("\u2018");
  701. (?-i:&rsquo;) tokt("\u2019");
  702. (?-i:&sbquo;) tokt("\u201A");
  703. (?-i:&ldquo;) tokt("\u201C");
  704. (?-i:&rdquo;) tokt("\u201D");
  705. (?-i:&bdquo;) tokt("\u201E");
  706. (?-i:&dagger;) tokt("\u2020");
  707. (?-i:&Dagger;) tokt("\u2021");
  708. (?-i:&bull;) tokt("\u2022");
  709. (?-i:&hellip;) tokt("\u2026");
  710. (?-i:&permil;) tokt("\u2030");
  711. (?-i:&prime;) tokt("\u2032");
  712. (?-i:&Prime;) tokt("\u2033");
  713. (?-i:&lsaquo;) tokt("\u2039");
  714. (?-i:&rsaquo;) tokt("\u203A");
  715. (?-i:&oline;) tokt("\u203E");
  716. (?-i:&frasl;) tokt("\u2044");
  717. (?-i:&euro;) tokt("\u20AC");
  718. (?-i:&image;) tokt("\u2111");
  719. (?-i:&weierp;) tokt("\u2118");
  720. (?-i:&real;) tokt("\u211C");
  721. (?-i:&trade;) tokt("\u2122");
  722. (?-i:&alefsym;) tokt("\u2135");
  723. (?-i:&larr;) tokt("\u2190");
  724. (?-i:&uarr;) tokt("\u2191");
  725. (?-i:&rarr;) tokt("\u2192");
  726. (?-i:&darr;) tokt("\u2193");
  727. (?-i:&harr;) tokt("\u2194");
  728. (?-i:&crarr;) tokt("\u21B5");
  729. (?-i:&lArr;) tokt("\u21D0");
  730. (?-i:&uArr;) tokt("\u21D1");
  731. (?-i:&rArr;) tokt("\u21D2");
  732. (?-i:&dArr;) tokt("\u21D3");
  733. (?-i:&hArr;) tokt("\u21D4");
  734. (?-i:&forall;) tokt("\u2200");
  735. (?-i:&part;) tokt("\u2202");
  736. (?-i:&exist;) tokt("\u2203");
  737. (?-i:&empty;) tokt("\u2205");
  738. (?-i:&nabla;) tokt("\u2207");
  739. (?-i:&isin;) tokt("\u2208");
  740. (?-i:&notin;) tokt("\u2209");
  741. (?-i:&ni;) tokt("\u220B");
  742. (?-i:&prod;) tokt("\u220F");
  743. (?-i:&sum;) tokt("\u2211");
  744. (?-i:&minus;) tokt("\u2212");
  745. (?-i:&lowast;) tokt("\u2217");
  746. (?-i:&radic;) tokt("\u221A");
  747. (?-i:&prop;) tokt("\u221D");
  748. (?-i:&infin;) tokt("\u221E");
  749. (?-i:&ang;) tokt("\u2220");
  750. (?-i:&and;) tokt("\u2227");
  751. (?-i:&or;) tokt("\u2228");
  752. (?-i:&cap;) tokt("\u2229");
  753. (?-i:&cup;) tokt("\u222A");
  754. (?-i:&int;) tokt("\u222B");
  755. (?-i:&there4;) tokt("\u2234");
  756. (?-i:&sim;) tokt("\u223C");
  757. (?-i:&cong;) tokt("\u2245");
  758. (?-i:&asymp;) tokt("\u2248");
  759. (?-i:&ne;) tokt("\u2260");
  760. (?-i:&equiv;) tokt("\u2261");
  761. (?-i:&le;) tokt("\u2264");
  762. (?-i:&ge;) tokt("\u2265");
  763. (?-i:&sub;) tokt("\u2282");
  764. (?-i:&sup;) tokt("\u2283");
  765. (?-i:&nsub;) tokt("\u2284");
  766. (?-i:&sube;) tokt("\u2286");
  767. (?-i:&supe;) tokt("\u2287");
  768. (?-i:&oplus;) tokt("\u2295");
  769. (?-i:&otimes;) tokt("\u2297");
  770. (?-i:&perp;) tokt("\u22A5");
  771. (?-i:&sdot;) tokt("\u22C5");
  772. (?-i:&lceil;) tokt("\u2308");
  773. (?-i:&rceil;) tokt("\u2309");
  774. (?-i:&lfloor;) tokt("\u230A");
  775. (?-i:&rfloor;) tokt("\u230B");
  776. (?-i:&lang;) tokt("\u2329");
  777. (?-i:&rang;) tokt("\u232A");
  778. (?-i:&loz;) tokt("\u25CA");
  779. (?-i:&spades;) tokt("\u2660");
  780. (?-i:&clubs;) tokt("\u2663");
  781. (?-i:&hearts;) tokt("\u2665");
  782. (?-i:&diams;) tokt("\u2666");
  783. /* awesome entities */
  784. (?-i:&cloud;) tokt("\u2601");
  785. (?-i:&umbrella;) tokt("\u2602");
  786. (?-i:&snowman;) tokt("\u2603");
  787. (?-i:&snowflake;) tokt("\u2745");
  788. (?-i:&comet;) tokt("\u2604");
  789. (?-i:&thunderstorm;) tokt("\u2608");
  790. /* pseudo entities */
  791. ' tokt("\\'");
  792. "\\" tokt("\\\\");
  793. /* meta entities */
  794. (?-i:&#[0-9]+;) {
  795. char buf[5];
  796. utf8ize(atoi(yytext + 2), buf);
  797. tokt(buf);
  798. }
  799. (?-i:&#x)[A-F0-9]+; {
  800. char buf[5];
  801. char *_;
  802. utf8ize(strtol(yytext + 3, &_, 16), buf);
  803. tokt(buf);
  804. }
  805. /* not entities */
  806. & {
  807. yymore();
  808. BEGIN(XHP_INVALID_ENTITY);
  809. }
  810. }
  811. <XHP_INVALID_ENTITY>{
  812. {BYTE}{1,10} {
  813. for (char* ii = yytext; *ii; ++ii) {
  814. if (*ii == ';') {
  815. ii[1] = 0;
  816. break;
  817. }
  818. }
  819. if (!yyextra->terminated) {
  820. yyextra->error = string("Invalid entity: (") + yytext + ")";
  821. yyextra->terminated = true;
  822. }
  823. }
  824. }
  825. <XHP_AFTER_ENT>{
  826. [ \t\x0b\x0c\xa0\r\n]|\r\n {
  827. if (*yytext == '\r' || *yytext == '\n') {
  828. // Since we rewrite newlines into space we need to increment both line
  829. // counters. The first_lineno increment is quite a hack, and makes it so
  830. // that this ent is on the wrong line but it doesn't mess up the rest of
  831. // the file.
  832. ++yyextra->lineno;
  833. ++yyextra->first_lineno;
  834. }
  835. pop_state();
  836. yytext[0] = ' ';
  837. yytext[1] = 0;
  838. tok(T_XHP_TEXT);
  839. }
  840. . {
  841. pop_state();
  842. yyless(0);
  843. }
  844. }
  845. <XHP_CHILD>{
  846. [^&'<>\\{ \t\x0b\x0c\xa0\r\n]+{WHITESPACE}? {
  847. yy_scan_newlines(yytext, yyg);
  848. // Crunch white space at the end
  849. char* ii = yytext + yyleng - 1;
  850. while (*ii == ' ' || *ii == '\t' || *ii == '\n' || *ii == '\r') {
  851. --ii;
  852. }
  853. if (ii != yytext + yyleng - 1) {
  854. ii[1] = ' ';
  855. ii[2] = 0;
  856. }
  857. tok(T_XHP_TEXT);
  858. }
  859. {WHITESPACE}* {
  860. yy_scan_newlines(yytext, yyg);
  861. yytext[0] = ' ';
  862. yytext[1] = 0;
  863. tok(T_XHP_TEXT);
  864. }
  865. {WHITESPACE}*"{" {
  866. yy_scan_newlines(yytext, yyg);
  867. tok('{');
  868. }
  869. {WHITESPACE}*"<" {
  870. yy_scan_newlines(yytext, yyg);
  871. tok('<');
  872. }
  873. {WHITESPACE}*"</" {
  874. yy_scan_newlines(yytext, yyg);
  875. tok(T_XHP_LT_DIV);
  876. }
  877. {WHITESPACE}*"</>" {
  878. yy_scan_newlines(yytext, yyg);
  879. tok(T_XHP_LT_DIV_GT);
  880. }
  881. }
  882. <XHP_CHILDREN_DECL>{
  883. any tok(T_XHP_ANY);
  884. pcdata tok(T_XHP_PCDATA);
  885. empty tok(T_XHP_EMPTY);
  886. {LABEL} tok(T_STRING);
  887. ";" {
  888. pop_state();
  889. tok(';');
  890. }
  891. ":" {
  892. tok(T_XHP_COLON);
  893. }
  894. }
  895. /* Other */
  896. <*>{BYTE} {
  897. tok(yytext[0]);
  898. // fix unused function warnings
  899. yy_top_state(NULL);
  900. yyunput(0, 0, NULL);
  901. }
  902. %%
  903. #ifdef DEBUG
  904. static const char* yy_state_name(int state) {
  905. switch (state) {
  906. case INITIAL:
  907. return "INITIAL";
  908. case PHP:
  909. return "PHP";
  910. case PHP_COMMENT:
  911. return "PHP_COMMENT";
  912. case PHP_EOL_COMMENT:
  913. return "PHP_EOL_COMMENT";
  914. case PHP_DOC_COMMENT:
  915. return "PHP_DOC_COMMENT";
  916. case PHP_HEREDOC_START:
  917. return "PHP_HEREDOC_START";
  918. case PHP_HEREDOC_NSTART:
  919. return "PHP_HEREDOC_NSTART";
  920. case PHP_HEREDOC_NEWLINE:
  921. return "PHP_HEREDOC_NEWLINE";
  922. case PHP_HEREDOC_DATA:
  923. return "PHP_HEREDOC_DATA";
  924. case PHP_NO_RESERVED_WORDS:
  925. return "PHP_NO_RESERVED_WORDS";
  926. case PHP_NO_RESERVED_WORDS_PERSIST:
  927. return "PHP_NO_RESERVED_WORDS_PERSIST";
  928. case XHP_LABEL:
  929. return "XHP_LABEL";
  930. case XHP_LABEL_WHITESPACE:
  931. return "XHP_LABEL_WHITESPACE";
  932. case XHP_ATTRS:
  933. return "XHP_ATTRS";
  934. case XHP_ATTR_VAL:
  935. return "XHP_ATTR_VAL";
  936. case XHP_AFTER_ENT:
  937. return "XHP_AFTER_ENT";
  938. case XHP_CHILD:
  939. return "XHP_CHILD";
  940. case XHP_CHILD_START:
  941. return "XHP_CHILD_START";
  942. case XHP_INVALID_ENTITY:
  943. return "XHP_INVALID_ENTITY";
  944. case XHP_ATTR_TYPE_DECL:
  945. return "XHP_ATTR_TYPE_DECL";
  946. case XHP_CHILDREN_DECL:
  947. return "XHP_CHILDREN_DECL";
  948. default:
  949. return "???";
  950. }
  951. }
  952. static void yy_log_token(int tok) {
  953. const char* tokname = yytokname(tok);
  954. if (tokname) {
  955. fprintf(stderr, "--> %s\n", tokname);
  956. } else {
  957. fprintf(stderr, "--> '%c'\n", tok);
  958. }
  959. }
  960. #endif
  961. static int yy_token(int tok, yyguts_t* yyg) {
  962. if (YY_START == PHP_NO_RESERVED_WORDS) {
  963. pop_state();
  964. }
  965. switch (tok) {
  966. case T_OPEN_TAG:
  967. case T_OPEN_TAG_WITH_ECHO:
  968. case T_OPEN_TAG_FAKE:
  969. push_state(PHP);
  970. break;
  971. case T_CLOSE_TAG:
  972. pop_state();
  973. return ';';
  974. case T_OBJECT_OPERATOR:
  975. case T_PAAMAYIM_NEKUDOTAYIM:
  976. case T_FUNCTION:
  977. push_state(PHP_NO_RESERVED_WORDS);
  978. break;
  979. case '{':
  980. yyextra->curly_stack.push(tok);
  981. break;
  982. }
  983. #ifdef DEBUG
  984. yy_log_token(tok);
  985. #endif
  986. return yyextra->last_token = tok;
  987. }
  988. static inline void yy_scan_newlines(const char* text, struct yyguts_t* yyg) {
  989. for (; *text; ++text) {
  990. if (*text == '\r') {
  991. if (text[1] == '\n') {
  992. ++text;
  993. }
  994. ++yyextra->lineno;
  995. } else if (*text == '\n') {
  996. ++yyextra->lineno;
  997. }
  998. }
  999. }
  1000. void xhp_new_push_state(int s, struct yyguts_t* yyg) {
  1001. #ifdef DEBUG
  1002. fprintf(stderr, "--> PUSH(%s -> %s)\n", yy_state_name(YY_START), yy_state_name(s));
  1003. #endif
  1004. yy_push_state(s, yyg);
  1005. }
  1006. void xhp_new_pop_state(struct yyguts_t* yyg) {
  1007. #ifdef DEBUG
  1008. int s = YY_START;
  1009. #endif
  1010. yy_pop_state(yyg);
  1011. #ifdef DEBUG
  1012. fprintf(stderr, "--> POP(%s -> %s)\n", yy_state_name(s), yy_state_name(YY_START));
  1013. #endif
  1014. }
  1015. void xhp_set_state(int s, struct yyguts_t* yyg) {
  1016. #ifdef DEBUG
  1017. fprintf(stderr, "--> SET(%s)\n", yy_state_name(s));
  1018. #endif
  1019. BEGIN(s);
  1020. }