PageRenderTime 52ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/php/zend_language_scanner.l

http://yaxx.googlecode.com/
LEX | 1910 lines | 1211 code | 228 blank | 471 comment | 0 complexity | 7ba0f0afee391e560b60aec127463b4b MD5 | raw file
Possible License(s): AGPL-1.0
  1. %{
  2. /*
  3. +----------------------------------------------------------------------+
  4. | Zend Engine |
  5. +----------------------------------------------------------------------+
  6. | Copyright (c) 1998-2006 Zend Technologies Ltd. (http://www.zend.com) |
  7. +----------------------------------------------------------------------+
  8. | This source file is subject to version 2.00 of the Zend license, |
  9. | that is bundled with this package in the file LICENSE, and is |
  10. | available through the world-wide-web at the following url: |
  11. | http://www.zend.com/license/2_00.txt. |
  12. | If you did not receive a copy of the Zend license and are unable to |
  13. | obtain it through the world-wide-web, please send a note to |
  14. | license@zend.com so we can mail you a copy immediately. |
  15. +----------------------------------------------------------------------+
  16. | Authors: Andi Gutmans <andi@zend.com> |
  17. | Zeev Suraski <zeev@zend.com> |
  18. +----------------------------------------------------------------------+
  19. */
  20. /* $Id: zend_language_scanner.l,v 1.1 2006/09/14 19:32:15 yijunsf Exp $ */
  21. #define yyleng SCNG(yy_leng)
  22. #define yytext SCNG(yy_text)
  23. #define yytext_ptr SCNG(yy_text)
  24. #define yyin SCNG(yy_in)
  25. #define yyout SCNG(yy_out)
  26. #define yy_last_accepting_state SCNG(_yy_last_accepting_state)
  27. #define yy_last_accepting_cpos SCNG(_yy_last_accepting_cpos)
  28. #define yy_more_flag SCNG(_yy_more_flag)
  29. #define yy_more_len SCNG(_yy_more_len)
  30. %}
  31. %x ST_IN_SCRIPTING
  32. %x ST_DOUBLE_QUOTES
  33. %x ST_SINGLE_QUOTE
  34. %x ST_BACKQUOTE
  35. %x ST_HEREDOC
  36. %x ST_LOOKING_FOR_PROPERTY
  37. %x ST_LOOKING_FOR_VARNAME
  38. %x ST_COMMENT
  39. %x ST_DOC_COMMENT
  40. %x ST_ONE_LINE_COMMENT
  41. %option stack
  42. %{
  43. #include <errno.h>
  44. #ifdef HAVE_STDARG_H
  45. # include <stdarg.h>
  46. #endif
  47. #ifdef HAVE_UNISTD_H
  48. # include <unistd.h>
  49. #endif
  50. #define YY_DECL int lex_scan(zval *zendlval TSRMLS_DC)
  51. #define ECHO { ZEND_WRITE( yytext, yyleng ); }
  52. #ifdef ZTS
  53. # define MY_INPUT yyinput
  54. #else
  55. # define MY_INPUT input
  56. #endif
  57. /* Globals Macros */
  58. #define SCNG LANG_SCNG
  59. #ifdef ZTS
  60. ZEND_API ts_rsrc_id language_scanner_globals_id;
  61. #else
  62. ZEND_API zend_scanner_globals language_scanner_globals;
  63. #endif
  64. #define YY_FATAL_ERROR zend_fatal_scanner_error
  65. #define HANDLE_NEWLINES(s, l) \
  66. do { \
  67. char *p = (s), *boundary = p+(l); \
  68. \
  69. while (p<boundary) { \
  70. if (*p == '\n') { \
  71. CG(zend_lineno)++; \
  72. } else if ((*p == '\r') && (p+1 < boundary) && (*(p+1) != '\n')) { \
  73. CG(zend_lineno)++; \
  74. } \
  75. p++; \
  76. } \
  77. } while (0)
  78. #define HANDLE_NEWLINE(c) \
  79. { \
  80. if (c == '\n' || c == '\r') { \
  81. CG(zend_lineno)++; \
  82. } \
  83. }
  84. #define ZEND_IS_OCT(c) ((c)>='0' && (c)<='7')
  85. #define ZEND_IS_HEX(c) (((c)>='0' && (c)<='9') || ((c)>='a' && (c)<='f') || ((c)>='A' && (c)<='F'))
  86. void zend_fatal_scanner_error(char *message)
  87. {
  88. zend_error(E_COMPILE_ERROR, "%s", message);
  89. }
  90. BEGIN_EXTERN_C()
  91. void startup_scanner(TSRMLS_D)
  92. {
  93. CG(heredoc) = NULL;
  94. CG(heredoc_len) = 0;
  95. CG(doc_comment) = NULL;
  96. CG(doc_comment_len) = 0;
  97. SCNG(yy_start_stack_ptr) = 0;
  98. SCNG(yy_start_stack_depth) = 0;
  99. SCNG(current_buffer) = NULL;
  100. #ifdef ZEND_MULTIBYTE
  101. SCNG(script_org) = NULL;
  102. SCNG(script_org_size) = 0;
  103. SCNG(script_filtered) = NULL;
  104. SCNG(script_filtered_size) = 0;
  105. SCNG(input_filter) = NULL;
  106. SCNG(output_filter) = NULL;
  107. SCNG(script_encoding) = NULL;
  108. SCNG(internal_encoding) = NULL;
  109. #endif /* ZEND_MULTIBYTE */
  110. }
  111. void shutdown_scanner(TSRMLS_D)
  112. {
  113. if (CG(heredoc)) {
  114. efree(CG(heredoc));
  115. CG(heredoc_len)=0;
  116. }
  117. if (SCNG(yy_start_stack)) {
  118. yy_flex_free(SCNG(yy_start_stack));
  119. SCNG(yy_start_stack) = NULL;
  120. }
  121. RESET_DOC_COMMENT();
  122. #ifdef ZEND_MULTIBYTE
  123. if (SCNG(script_org)) {
  124. efree(SCNG(script_org));
  125. SCNG(script_org) = NULL;
  126. }
  127. if (SCNG(script_filtered)) {
  128. efree(SCNG(script_filtered));
  129. SCNG(script_filtered) = NULL;
  130. }
  131. SCNG(script_org_size) = 0;
  132. SCNG(script_filtered_size) = 0;
  133. SCNG(input_filter) = NULL;
  134. SCNG(output_filter) = NULL;
  135. SCNG(script_encoding) = NULL;
  136. SCNG(internal_encoding) = NULL;
  137. #endif /* ZEND_MULTIBYTE */
  138. }
  139. END_EXTERN_C()
  140. ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
  141. {
  142. memcpy(&lex_state->buffer_state, &YY_CURRENT_BUFFER, sizeof(YY_BUFFER_STATE));
  143. lex_state->in = SCNG(yy_in);
  144. lex_state->state = YYSTATE;
  145. lex_state->filename = zend_get_compiled_filename(TSRMLS_C);
  146. lex_state->lineno = CG(zend_lineno);
  147. #ifdef ZEND_MULTIBYTE
  148. lex_state->script_org = SCNG(script_org);
  149. lex_state->script_org_size = SCNG(script_org_size);
  150. lex_state->script_filtered = SCNG(script_filtered);
  151. lex_state->script_filtered_size = SCNG(script_filtered_size);
  152. lex_state->input_filter = SCNG(input_filter);
  153. lex_state->output_filter = SCNG(output_filter);
  154. lex_state->script_encoding = SCNG(script_encoding);
  155. lex_state->internal_encoding = SCNG(internal_encoding);
  156. #endif /* ZEND_MULTIBYTE */
  157. }
  158. ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
  159. {
  160. YY_BUFFER_STATE original_buffer_state = YY_CURRENT_BUFFER;
  161. if (lex_state->buffer_state) {
  162. yy_switch_to_buffer(lex_state->buffer_state TSRMLS_CC);
  163. } else {
  164. YY_CURRENT_BUFFER = NULL;
  165. }
  166. yy_delete_buffer(original_buffer_state TSRMLS_CC);
  167. SCNG(yy_in) = lex_state->in;
  168. BEGIN(lex_state->state);
  169. CG(zend_lineno) = lex_state->lineno;
  170. zend_restore_compiled_filename(lex_state->filename TSRMLS_CC);
  171. #ifdef ZEND_MULTIBYTE
  172. if (SCNG(script_org)) {
  173. efree(SCNG(script_org));
  174. SCNG(script_org) = NULL;
  175. }
  176. if (SCNG(script_filtered)) {
  177. efree(SCNG(script_filtered));
  178. SCNG(script_filtered) = NULL;
  179. }
  180. SCNG(script_org) = lex_state->script_org;
  181. SCNG(script_org_size) = lex_state->script_org_size;
  182. SCNG(script_filtered) = lex_state->script_filtered;
  183. SCNG(script_filtered_size) = lex_state->script_filtered_size;
  184. SCNG(input_filter) = lex_state->input_filter;
  185. SCNG(output_filter) = lex_state->output_filter;
  186. SCNG(script_encoding) = lex_state->script_encoding;
  187. SCNG(internal_encoding) = lex_state->internal_encoding;
  188. #endif /* ZEND_MULTIBYTE */
  189. }
  190. BEGIN_EXTERN_C()
  191. ZEND_API void zend_file_handle_dtor(zend_file_handle *fh)
  192. {
  193. TSRMLS_FETCH();
  194. switch (fh->type) {
  195. case ZEND_HANDLE_FP:
  196. fclose(fh->handle.fp);
  197. break;
  198. case ZEND_HANDLE_STREAM:
  199. if (fh->handle.stream.closer) {
  200. fh->handle.stream.closer(fh->handle.stream.handle TSRMLS_CC);
  201. }
  202. break;
  203. case ZEND_HANDLE_FILENAME:
  204. /* We're only supposed to get here when destructing the used_files hash,
  205. * which doesn't really contain open files, but references to their names/paths
  206. */
  207. break;
  208. }
  209. if (fh->opened_path) {
  210. efree(fh->opened_path);
  211. fh->opened_path = NULL;
  212. }
  213. if (fh->free_filename && fh->filename) {
  214. efree(fh->filename);
  215. fh->filename = NULL;
  216. }
  217. }
  218. int zend_compare_file_handles(zend_file_handle *fh1, zend_file_handle *fh2)
  219. {
  220. if (fh1->type != fh2->type) {
  221. return 0;
  222. }
  223. switch (fh1->type) {
  224. case ZEND_HANDLE_FP:
  225. return fh1->handle.fp==fh2->handle.fp;
  226. break;
  227. case ZEND_HANDLE_STREAM:
  228. return fh1->handle.stream.handle == fh2->handle.stream.handle;
  229. break;
  230. }
  231. return 0;
  232. }
  233. ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle TSRMLS_DC)
  234. {
  235. zend_llist_del_element(&CG(open_files), file_handle, (int (*)(void *, void *)) zend_compare_file_handles);
  236. }
  237. ZEND_API int open_file_for_scanning(zend_file_handle *file_handle TSRMLS_DC)
  238. {
  239. char *file_path=NULL;
  240. if (FAILURE == zend_stream_fixup(file_handle TSRMLS_CC)) {
  241. return FAILURE;
  242. }
  243. zend_llist_add_element(&CG(open_files), file_handle);
  244. /* Reset the scanner for scanning the new file */
  245. SCNG(yy_in) = file_handle;
  246. #ifdef ZEND_MULTIBYTE
  247. if (file_handle->handle.stream.interactive == 0) {
  248. if (zend_multibyte_read_script(TSRMLS_C) != 0) {
  249. return FAILURE;
  250. }
  251. /* force flex to use buffer only */
  252. SCNG(yy_in) = NULL;
  253. SCNG(init) = 0;
  254. SCNG(start) = 1;
  255. zend_multibyte_set_filter(NULL TSRMLS_CC);
  256. if (!SCNG(input_filter)) {
  257. SCNG(script_filtered) = (char*)emalloc(SCNG(script_org_size)+1);
  258. memcpy(SCNG(script_filtered), SCNG(script_org), SCNG(script_org_size)+1);
  259. SCNG(script_filtered_size) = SCNG(script_org_size);
  260. } else {
  261. SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC);
  262. }
  263. /* flex requires doubled null */
  264. SCNG(script_filtered) = (char*)erealloc(SCNG(script_filtered), SCNG(script_filtered_size)+2);
  265. *(SCNG(script_filtered)+SCNG(script_filtered_size)) = (char)NULL;
  266. *(SCNG(script_filtered)+SCNG(script_filtered_size)+1) = (char)NULL;
  267. yy_scan_buffer(SCNG(script_filtered), SCNG(script_filtered_size)+2 TSRMLS_CC);
  268. } else {
  269. yy_switch_to_buffer(yy_create_buffer(SCNG(yy_in), YY_BUF_SIZE TSRMLS_CC) TSRMLS_CC);
  270. }
  271. #else /* !ZEND_MULTIBYTE */
  272. yy_switch_to_buffer(yy_create_buffer(SCNG(yy_in), YY_BUF_SIZE TSRMLS_CC) TSRMLS_CC);
  273. #endif /* ZEND_MULTIBYTE */
  274. BEGIN(INITIAL);
  275. if (file_handle->opened_path) {
  276. file_path = file_handle->opened_path;
  277. } else {
  278. file_path = file_handle->filename;
  279. }
  280. zend_set_compiled_filename(file_path TSRMLS_CC);
  281. if (CG(start_lineno)) {
  282. CG(zend_lineno) = CG(start_lineno);
  283. CG(start_lineno) = 0;
  284. } else {
  285. CG(zend_lineno) = 1;
  286. }
  287. CG(increment_lineno) = 0;
  288. return SUCCESS;
  289. }
  290. END_EXTERN_C()
  291. ZEND_API zend_op_array *compile_file(zend_file_handle *file_handle, int type TSRMLS_DC)
  292. {
  293. zend_lex_state original_lex_state;
  294. zend_op_array *op_array = (zend_op_array *) emalloc(sizeof(zend_op_array));
  295. zend_op_array *original_active_op_array = CG(active_op_array);
  296. zend_op_array *retval=NULL;
  297. int compiler_result;
  298. zend_bool compilation_successful=0;
  299. znode retval_znode;
  300. zend_bool original_in_compilation = CG(in_compilation);
  301. retval_znode.op_type = IS_CONST;
  302. retval_znode.u.constant.type = IS_LONG;
  303. retval_znode.u.constant.value.lval = 1;
  304. retval_znode.u.constant.is_ref = 0;
  305. retval_znode.u.constant.refcount = 1;
  306. zend_save_lexical_state(&original_lex_state TSRMLS_CC);
  307. retval = op_array; /* success oriented */
  308. if (open_file_for_scanning(file_handle TSRMLS_CC)==FAILURE) {
  309. if (type==ZEND_REQUIRE) {
  310. zend_message_dispatcher(ZMSG_FAILED_REQUIRE_FOPEN, file_handle->filename);
  311. zend_bailout();
  312. } else {
  313. zend_message_dispatcher(ZMSG_FAILED_INCLUDE_FOPEN, file_handle->filename);
  314. }
  315. compilation_successful=0;
  316. } else {
  317. init_op_array(op_array, ZEND_USER_FUNCTION, INITIAL_OP_ARRAY_SIZE TSRMLS_CC);
  318. CG(in_compilation) = 1;
  319. CG(active_op_array) = op_array;
  320. compiler_result = zendparse(TSRMLS_C);
  321. zend_do_return(&retval_znode, 0 TSRMLS_CC);
  322. zend_do_handle_exception(TSRMLS_C);
  323. CG(in_compilation) = original_in_compilation;
  324. if (compiler_result==1) { /* parser error */
  325. zend_bailout();
  326. }
  327. compilation_successful=1;
  328. }
  329. if (retval) {
  330. CG(active_op_array) = original_active_op_array;
  331. if (compilation_successful) {
  332. pass_two(op_array TSRMLS_CC);
  333. } else {
  334. efree(op_array);
  335. retval = NULL;
  336. }
  337. }
  338. if (compilation_successful) {
  339. zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
  340. }
  341. return retval;
  342. }
  343. zend_op_array *compile_filename(int type, zval *filename TSRMLS_DC)
  344. {
  345. zend_file_handle file_handle;
  346. zval tmp;
  347. zend_op_array *retval;
  348. char *opened_path = NULL;
  349. if (filename->type != IS_STRING) {
  350. tmp = *filename;
  351. zval_copy_ctor(&tmp);
  352. convert_to_string(&tmp);
  353. filename = &tmp;
  354. }
  355. file_handle.filename = filename->value.str.val;
  356. file_handle.free_filename = 0;
  357. file_handle.type = ZEND_HANDLE_FILENAME;
  358. file_handle.opened_path = NULL;
  359. file_handle.handle.fp = NULL;
  360. retval = zend_compile_file(&file_handle, type TSRMLS_CC);
  361. if (retval && file_handle.handle.stream.handle) {
  362. int dummy = 1;
  363. if (!file_handle.opened_path) {
  364. file_handle.opened_path = opened_path = estrndup(filename->value.str.val, filename->value.str.len);
  365. }
  366. zend_hash_add(&EG(included_files), file_handle.opened_path, strlen(file_handle.opened_path)+1, (void *)&dummy, sizeof(int), NULL);
  367. if (opened_path) {
  368. efree(opened_path);
  369. }
  370. }
  371. zend_destroy_file_handle(&file_handle TSRMLS_CC);
  372. if (filename==&tmp) {
  373. zval_dtor(&tmp);
  374. }
  375. return retval;
  376. }
  377. ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_DC)
  378. {
  379. /* enforce two trailing NULLs for flex... */
  380. STR_REALLOC(str->value.str.val, str->value.str.len+2);
  381. str->value.str.val[str->value.str.len+1]=0;
  382. SCNG(yy_in)=NULL;
  383. #ifdef ZEND_MULTIBYTE
  384. SCNG(script_org) = estrdup(str->value.str.val);
  385. SCNG(script_org_size) = str->value.str.len;
  386. zend_multibyte_set_filter(CG(internal_encoding) TSRMLS_CC);
  387. if (!SCNG(input_filter)) {
  388. SCNG(script_filtered) = (char*)emalloc(SCNG(script_org_size)+1);
  389. memcpy(SCNG(script_filtered), SCNG(script_org), SCNG(script_org_size)+1);
  390. SCNG(script_filtered_size) = SCNG(script_org_size);
  391. } else {
  392. SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC);
  393. }
  394. /* flex requires doubled null */
  395. SCNG(script_filtered) = (char*)erealloc(SCNG(script_filtered), SCNG(script_filtered_size)+2);
  396. *(SCNG(script_filtered)+SCNG(script_filtered_size)) = (char)NULL;
  397. *(SCNG(script_filtered)+SCNG(script_filtered_size)+1) = (char)NULL;
  398. yy_scan_buffer(SCNG(script_filtered), SCNG(script_filtered_size)+2 TSRMLS_CC);
  399. #else /* !ZEND_MULTIBYTE */
  400. yy_scan_buffer(str->value.str.val, str->value.str.len+2 TSRMLS_CC);
  401. #endif /* ZEND_MULTIBYTE */
  402. zend_set_compiled_filename(filename TSRMLS_CC);
  403. CG(zend_lineno) = 1;
  404. CG(increment_lineno) = 0;
  405. return SUCCESS;
  406. }
  407. ZEND_API int zend_get_scanned_file_offset(TSRMLS_D)
  408. {
  409. if (yyin) {
  410. int offset_in_buffer = (yy_c_buf_p - (YY_CURRENT_BUFFER)->yy_ch_buf);
  411. int read_bytes = SCNG(yy_n_chars);
  412. int offset_from_the_end = read_bytes - offset_in_buffer;
  413. return zend_stream_ftell(yyin TSRMLS_CC) - offset_from_the_end;
  414. } else {
  415. /* The entire file is in the buffer; probably zend multibyte
  416. is enabled */
  417. return (yy_c_buf_p - (YY_CURRENT_BUFFER)->yy_ch_buf);
  418. }
  419. }
  420. zend_op_array *compile_string(zval *source_string, char *filename TSRMLS_DC)
  421. {
  422. zend_lex_state original_lex_state;
  423. zend_op_array *op_array = (zend_op_array *) emalloc(sizeof(zend_op_array));
  424. zend_op_array *original_active_op_array = CG(active_op_array);
  425. zend_op_array *retval;
  426. zval tmp;
  427. int compiler_result;
  428. zend_bool original_in_compilation = CG(in_compilation);
  429. if (source_string->value.str.len==0) {
  430. efree(op_array);
  431. return NULL;
  432. }
  433. CG(in_compilation) = 1;
  434. tmp = *source_string;
  435. zval_copy_ctor(&tmp);
  436. convert_to_string(&tmp);
  437. source_string = &tmp;
  438. zend_save_lexical_state(&original_lex_state TSRMLS_CC);
  439. if (zend_prepare_string_for_scanning(source_string, filename TSRMLS_CC)==FAILURE) {
  440. efree(op_array);
  441. retval = NULL;
  442. } else {
  443. init_op_array(op_array, ZEND_EVAL_CODE, INITIAL_OP_ARRAY_SIZE TSRMLS_CC);
  444. CG(active_op_array) = op_array;
  445. BEGIN(ST_IN_SCRIPTING);
  446. compiler_result = zendparse(TSRMLS_C);
  447. #ifdef ZEND_MULTIBYTE
  448. if (SCNG(script_org)) {
  449. efree(SCNG(script_org));
  450. SCNG(script_org) = NULL;
  451. }
  452. if (SCNG(script_filtered)) {
  453. efree(SCNG(script_filtered));
  454. SCNG(script_filtered) = NULL;
  455. }
  456. #endif /* ZEND_MULTIBYTE */
  457. if (compiler_result==1) {
  458. CG(active_op_array) = original_active_op_array;
  459. CG(unclean_shutdown)=1;
  460. retval = NULL;
  461. } else {
  462. zend_do_return(NULL, 0 TSRMLS_CC);
  463. zend_do_handle_exception(TSRMLS_C);
  464. CG(active_op_array) = original_active_op_array;
  465. pass_two(op_array TSRMLS_CC);
  466. retval = op_array;
  467. }
  468. zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
  469. }
  470. zval_dtor(&tmp);
  471. CG(in_compilation) = original_in_compilation;
  472. return retval;
  473. }
  474. BEGIN_EXTERN_C()
  475. int highlight_file(char *filename, zend_syntax_highlighter_ini *syntax_highlighter_ini TSRMLS_DC)
  476. {
  477. zend_lex_state original_lex_state;
  478. zend_file_handle file_handle;
  479. file_handle.type = ZEND_HANDLE_FILENAME;
  480. file_handle.filename = filename;
  481. file_handle.free_filename = 0;
  482. file_handle.opened_path = NULL;
  483. zend_save_lexical_state(&original_lex_state TSRMLS_CC);
  484. if (open_file_for_scanning(&file_handle TSRMLS_CC)==FAILURE) {
  485. zend_message_dispatcher(ZMSG_FAILED_HIGHLIGHT_FOPEN, filename);
  486. return FAILURE;
  487. }
  488. zend_highlight(syntax_highlighter_ini TSRMLS_CC);
  489. #ifdef ZEND_MULTIBYTE
  490. if (SCNG(script_org)) {
  491. efree(SCNG(script_org));
  492. SCNG(script_org) = NULL;
  493. }
  494. if (SCNG(script_filtered)) {
  495. efree(SCNG(script_filtered));
  496. SCNG(script_filtered) = NULL;
  497. }
  498. #endif /* ZEND_MULTIBYTE */
  499. zend_destroy_file_handle(&file_handle TSRMLS_CC);
  500. zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
  501. return SUCCESS;
  502. }
  503. int highlight_string(zval *str, zend_syntax_highlighter_ini *syntax_highlighter_ini, char *str_name TSRMLS_DC)
  504. {
  505. zend_lex_state original_lex_state;
  506. zval tmp = *str;
  507. str = &tmp;
  508. zval_copy_ctor(str);
  509. zend_save_lexical_state(&original_lex_state TSRMLS_CC);
  510. if (zend_prepare_string_for_scanning(str, str_name TSRMLS_CC)==FAILURE) {
  511. return FAILURE;
  512. }
  513. BEGIN(INITIAL);
  514. zend_highlight(syntax_highlighter_ini TSRMLS_CC);
  515. #ifdef ZEND_MULTIBYTE
  516. if (SCNG(script_org)) {
  517. efree(SCNG(script_org));
  518. SCNG(script_org) = NULL;
  519. }
  520. if (SCNG(script_filtered)) {
  521. efree(SCNG(script_filtered));
  522. SCNG(script_filtered) = NULL;
  523. }
  524. #endif /* ZEND_MULTIBYTE */
  525. zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
  526. zval_dtor(str);
  527. return SUCCESS;
  528. }
  529. END_EXTERN_C()
  530. #ifdef ZEND_MULTIBYTE
  531. BEGIN_EXTERN_C()
  532. ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, zend_encoding *old_encoding TSRMLS_DC)
  533. {
  534. YY_BUFFER_STATE b = YY_CURRENT_BUFFER;
  535. int offset, original_offset, length, free_flag;
  536. char *p;
  537. zend_encoding *new_encoding;
  538. /* calculate current position */
  539. offset = original_offset = yy_c_buf_p - b->yy_ch_buf;
  540. if (old_input_filter && original_offset > 0) {
  541. new_encoding = SCNG(script_encoding);
  542. SCNG(script_encoding) = old_encoding;
  543. do {
  544. (old_input_filter)(&p, &length, SCNG(script_org), offset TSRMLS_CC);
  545. if (!p) {
  546. SCNG(script_encoding) = new_encoding;
  547. return;
  548. }
  549. efree(p);
  550. if (length > original_offset) {
  551. offset--;
  552. } else if (length < original_offset) {
  553. offset++;
  554. }
  555. } while (original_offset != length);
  556. SCNG(script_encoding) = new_encoding;
  557. }
  558. /* convert and set */
  559. if (!SCNG(input_filter)) {
  560. length = SCNG(script_org_size)-offset-1;
  561. p = SCNG(script_org)+offset+1;
  562. free_flag = 0;
  563. } else {
  564. SCNG(input_filter)(&p, &length, SCNG(script_org)+offset+1, SCNG(script_org_size)-offset-1 TSRMLS_CC);
  565. free_flag = 1;
  566. }
  567. if (original_offset+length+1 > (int)b->yy_buf_size) {
  568. b->yy_buf_size = original_offset+length+1;
  569. b->yy_ch_buf = (char*)erealloc(b->yy_ch_buf, b->yy_buf_size+2);
  570. SCNG(script_filtered) = b->yy_ch_buf;
  571. SCNG(script_filtered_size) = b->yy_buf_size;
  572. }
  573. yy_c_buf_p = b->yy_ch_buf + original_offset;
  574. strncpy(yy_c_buf_p+1, p, length);
  575. b->yy_n_chars = original_offset + length + 1;
  576. SCNG(yy_n_chars) = b->yy_n_chars;
  577. b->yy_ch_buf[SCNG(yy_n_chars)] = YY_END_OF_BUFFER_CHAR;
  578. b->yy_ch_buf[SCNG(yy_n_chars)+1] = YY_END_OF_BUFFER_CHAR;
  579. if (free_flag) {
  580. efree(p);
  581. }
  582. }
  583. ZEND_API int zend_multibyte_yyinput(zend_file_handle *file_handle, char *buf, size_t len TSRMLS_DC)
  584. {
  585. int c = '*', n;
  586. if (file_handle->handle.stream.interactive == 0) {
  587. return zend_stream_read(file_handle, buf, len TSRMLS_CC);
  588. }
  589. /* interactive */
  590. if (SCNG(script_org)) {
  591. efree(SCNG(script_org));
  592. }
  593. if (SCNG(script_filtered)) {
  594. efree(SCNG(script_filtered));
  595. }
  596. SCNG(script_org) = NULL;
  597. SCNG(script_org_size) = 0;
  598. /* TODO: support widechars */
  599. for (n = 0; n < sizeof(buf) && (c = zend_stream_getc(yyin TSRMLS_CC)) != EOF && c != '\n'; ++n) {
  600. buf[n] = (char)c;
  601. }
  602. if (c == '\n') {
  603. buf[n++] = (char) c;
  604. }
  605. SCNG(script_org_size) = n;
  606. SCNG(script_org) = (char*)emalloc(SCNG(script_org_size)+1);
  607. memcpy(SCNG(script_org)+SCNG(script_org_size)-n, buf, n);
  608. return n;
  609. }
  610. ZEND_API int zend_multibyte_read_script(TSRMLS_D)
  611. {
  612. char buf[8192];
  613. int n;
  614. if (SCNG(script_org)) {
  615. efree(SCNG(script_org));
  616. }
  617. SCNG(script_org) = NULL;
  618. SCNG(script_org_size) = 0;
  619. for (;;) {
  620. n = zend_stream_read(yyin, buf, sizeof(buf) TSRMLS_CC);
  621. if (n <= 0) {
  622. break;
  623. }
  624. SCNG(script_org_size) += n;
  625. if (SCNG(script_org)) {
  626. SCNG(script_org) = (char*)erealloc(SCNG(script_org), SCNG(script_org_size)+1);
  627. } else {
  628. SCNG(script_org) = (char*)emalloc(SCNG(script_org_size)+1);
  629. }
  630. memcpy(SCNG(script_org)+SCNG(script_org_size)-n, buf, n);
  631. }
  632. if (n < 0) {
  633. return -1;
  634. }
  635. if (!SCNG(script_org)) {
  636. SCNG(script_org) = emalloc(SCNG(script_org_size)+1);
  637. }
  638. *(SCNG(script_org)+SCNG(script_org_size)) = (char)NULL;
  639. return 0;
  640. }
  641. # define zend_copy_value(zendlval, yytext, yyleng) \
  642. if (SCNG(output_filter)) { \
  643. SCNG(output_filter)(&(zendlval->value.str.val), &(zendlval->value.str.len), yytext, yyleng TSRMLS_CC); \
  644. } else { \
  645. zendlval->value.str.val = (char *) estrndup(yytext, yyleng); \
  646. zendlval->value.str.len = yyleng; \
  647. }
  648. #else /* ZEND_MULTIBYTE */
  649. # define zend_copy_value(zendlval, yytext, yyleng) \
  650. zendlval->value.str.val = (char *)estrndup(yytext, yyleng); \
  651. zendlval->value.str.len = yyleng;
  652. #endif /* ZEND_MULTIBYTE */
  653. %}
  654. LNUM [0-9]+
  655. DNUM ([0-9]*[\.][0-9]+)|([0-9]+[\.][0-9]*)
  656. EXPONENT_DNUM (({LNUM}|{DNUM})[eE][+-]?{LNUM})
  657. HNUM "0x"[0-9a-fA-F]+
  658. LABEL [a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*
  659. WHITESPACE [ \n\r\t]+
  660. TABS_AND_SPACES [ \t]*
  661. TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@]
  662. ENCAPSED_TOKENS [\[\]{}$]
  663. ESCAPED_AND_WHITESPACE [\n\t\r #'.:;,()|^&+-/*=%!~<>?@]+
  664. ANY_CHAR (.|[\n])
  665. NEWLINE ("\r"|"\n"|"\r\n")
  666. %option noyylineno
  667. %option noyywrap
  668. %%
  669. <ST_IN_SCRIPTING>"exit" {
  670. return T_EXIT;
  671. }
  672. <ST_IN_SCRIPTING>"die" {
  673. return T_EXIT;
  674. }
  675. <ST_IN_SCRIPTING>"function" {
  676. return T_FUNCTION;
  677. }
  678. <ST_IN_SCRIPTING>"const" {
  679. return T_CONST;
  680. }
  681. <ST_IN_SCRIPTING>"return" {
  682. return T_RETURN;
  683. }
  684. <ST_IN_SCRIPTING>"try" {
  685. return T_TRY;
  686. }
  687. <ST_IN_SCRIPTING>"catch" {
  688. return T_CATCH;
  689. }
  690. <ST_IN_SCRIPTING>"throw" {
  691. return T_THROW;
  692. }
  693. <ST_IN_SCRIPTING>"if" {
  694. return T_IF;
  695. }
  696. <ST_IN_SCRIPTING>"elseif" {
  697. return T_ELSEIF;
  698. }
  699. <ST_IN_SCRIPTING>"endif" {
  700. return T_ENDIF;
  701. }
  702. <ST_IN_SCRIPTING>"else" {
  703. return T_ELSE;
  704. }
  705. <ST_IN_SCRIPTING>"while" {
  706. return T_WHILE;
  707. }
  708. <ST_IN_SCRIPTING>"endwhile" {
  709. return T_ENDWHILE;
  710. }
  711. <ST_IN_SCRIPTING>"do" {
  712. return T_DO;
  713. }
  714. <ST_IN_SCRIPTING>"for" {
  715. return T_FOR;
  716. }
  717. <ST_IN_SCRIPTING>"endfor" {
  718. return T_ENDFOR;
  719. }
  720. <ST_IN_SCRIPTING>"foreach" {
  721. return T_FOREACH;
  722. }
  723. <ST_IN_SCRIPTING>"endforeach" {
  724. return T_ENDFOREACH;
  725. }
  726. <ST_IN_SCRIPTING>"declare" {
  727. return T_DECLARE;
  728. }
  729. <ST_IN_SCRIPTING>"enddeclare" {
  730. return T_ENDDECLARE;
  731. }
  732. <ST_IN_SCRIPTING>"instanceof" {
  733. return T_INSTANCEOF;
  734. }
  735. <ST_IN_SCRIPTING>"as" {
  736. return T_AS;
  737. }
  738. <ST_IN_SCRIPTING>"switch" {
  739. return T_SWITCH;
  740. }
  741. <ST_IN_SCRIPTING>"endswitch" {
  742. return T_ENDSWITCH;
  743. }
  744. <ST_IN_SCRIPTING>"case" {
  745. return T_CASE;
  746. }
  747. <ST_IN_SCRIPTING>"default" {
  748. return T_DEFAULT;
  749. }
  750. <ST_IN_SCRIPTING>"break" {
  751. return T_BREAK;
  752. }
  753. <ST_IN_SCRIPTING>"continue" {
  754. return T_CONTINUE;
  755. }
  756. <ST_IN_SCRIPTING>"echo" {
  757. return T_ECHO;
  758. }
  759. <ST_IN_SCRIPTING>"print" {
  760. return T_PRINT;
  761. }
  762. <ST_IN_SCRIPTING>"class" {
  763. return T_CLASS;
  764. }
  765. <ST_IN_SCRIPTING>"interface" {
  766. return T_INTERFACE;
  767. }
  768. <ST_IN_SCRIPTING>"extends" {
  769. return T_EXTENDS;
  770. }
  771. <ST_IN_SCRIPTING>"implements" {
  772. return T_IMPLEMENTS;
  773. }
  774. <ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"->" {
  775. yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC);
  776. return T_OBJECT_OPERATOR;
  777. }
  778. <ST_LOOKING_FOR_PROPERTY>{LABEL} {
  779. yy_pop_state(TSRMLS_C);
  780. zend_copy_value(zendlval, yytext, yyleng);
  781. zendlval->type = IS_STRING;
  782. return T_STRING;
  783. }
  784. <ST_LOOKING_FOR_PROPERTY>{ANY_CHAR} {
  785. yyless(0);
  786. yy_pop_state(TSRMLS_C);
  787. }
  788. <ST_IN_SCRIPTING>"::" {
  789. return T_PAAMAYIM_NEKUDOTAYIM;
  790. }
  791. <ST_IN_SCRIPTING>"new" {
  792. return T_NEW;
  793. }
  794. <ST_IN_SCRIPTING>"clone" {
  795. return T_CLONE;
  796. }
  797. <ST_IN_SCRIPTING>"var" {
  798. return T_VAR;
  799. }
  800. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" {
  801. return T_INT_CAST;
  802. }
  803. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("real"|"double"|"float"){TABS_AND_SPACES}")" {
  804. return T_DOUBLE_CAST;
  805. }
  806. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"string"{TABS_AND_SPACES}")" {
  807. return T_STRING_CAST;
  808. }
  809. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"array"{TABS_AND_SPACES}")" {
  810. return T_ARRAY_CAST;
  811. }
  812. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"object"{TABS_AND_SPACES}")" {
  813. return T_OBJECT_CAST;
  814. }
  815. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("bool"|"boolean"){TABS_AND_SPACES}")" {
  816. return T_BOOL_CAST;
  817. }
  818. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("unset"){TABS_AND_SPACES}")" {
  819. return T_UNSET_CAST;
  820. }
  821. <ST_IN_SCRIPTING>"eval" {
  822. return T_EVAL;
  823. }
  824. <ST_IN_SCRIPTING>"include" {
  825. return T_INCLUDE;
  826. }
  827. <ST_IN_SCRIPTING>"include_once" {
  828. return T_INCLUDE_ONCE;
  829. }
  830. <ST_IN_SCRIPTING>"require" {
  831. return T_REQUIRE;
  832. }
  833. <ST_IN_SCRIPTING>"require_once" {
  834. return T_REQUIRE_ONCE;
  835. }
  836. <ST_IN_SCRIPTING>"use" {
  837. return T_USE;
  838. }
  839. <ST_IN_SCRIPTING>"global" {
  840. return T_GLOBAL;
  841. }
  842. <ST_IN_SCRIPTING>"isset" {
  843. return T_ISSET;
  844. }
  845. <ST_IN_SCRIPTING>"empty" {
  846. return T_EMPTY;
  847. }
  848. <ST_IN_SCRIPTING>"__halt_compiler" {
  849. return T_HALT_COMPILER;
  850. }
  851. <ST_IN_SCRIPTING>"static" {
  852. return T_STATIC;
  853. }
  854. <ST_IN_SCRIPTING>"abstract" {
  855. return T_ABSTRACT;
  856. }
  857. <ST_IN_SCRIPTING>"final" {
  858. return T_FINAL;
  859. }
  860. <ST_IN_SCRIPTING>"private" {
  861. return T_PRIVATE;
  862. }
  863. <ST_IN_SCRIPTING>"protected" {
  864. return T_PROTECTED;
  865. }
  866. <ST_IN_SCRIPTING>"public" {
  867. return T_PUBLIC;
  868. }
  869. <ST_IN_SCRIPTING>"unset" {
  870. return T_UNSET;
  871. }
  872. <ST_IN_SCRIPTING>"=>" {
  873. return T_DOUBLE_ARROW;
  874. }
  875. <ST_IN_SCRIPTING>"list" {
  876. return T_LIST;
  877. }
  878. <ST_IN_SCRIPTING>"array" {
  879. return T_ARRAY;
  880. }
  881. <ST_IN_SCRIPTING>"++" {
  882. return T_INC;
  883. }
  884. <ST_IN_SCRIPTING>"--" {
  885. return T_DEC;
  886. }
  887. <ST_IN_SCRIPTING>"===" {
  888. return T_IS_IDENTICAL;
  889. }
  890. <ST_IN_SCRIPTING>"!==" {
  891. return T_IS_NOT_IDENTICAL;
  892. }
  893. <ST_IN_SCRIPTING>"==" {
  894. return T_IS_EQUAL;
  895. }
  896. <ST_IN_SCRIPTING>"!="|"<>" {
  897. return T_IS_NOT_EQUAL;
  898. }
  899. <ST_IN_SCRIPTING>"<=" {
  900. return T_IS_SMALLER_OR_EQUAL;
  901. }
  902. <ST_IN_SCRIPTING>">=" {
  903. return T_IS_GREATER_OR_EQUAL;
  904. }
  905. <ST_IN_SCRIPTING>"+=" {
  906. return T_PLUS_EQUAL;
  907. }
  908. <ST_IN_SCRIPTING>"-=" {
  909. return T_MINUS_EQUAL;
  910. }
  911. <ST_IN_SCRIPTING>"*=" {
  912. return T_MUL_EQUAL;
  913. }
  914. <ST_IN_SCRIPTING>"/=" {
  915. return T_DIV_EQUAL;
  916. }
  917. <ST_IN_SCRIPTING>".=" {
  918. return T_CONCAT_EQUAL;
  919. }
  920. <ST_IN_SCRIPTING>"%=" {
  921. return T_MOD_EQUAL;
  922. }
  923. <ST_IN_SCRIPTING>"<<=" {
  924. return T_SL_EQUAL;
  925. }
  926. <ST_IN_SCRIPTING>">>=" {
  927. return T_SR_EQUAL;
  928. }
  929. <ST_IN_SCRIPTING>"&=" {
  930. return T_AND_EQUAL;
  931. }
  932. <ST_IN_SCRIPTING>"|=" {
  933. return T_OR_EQUAL;
  934. }
  935. <ST_IN_SCRIPTING>"^=" {
  936. return T_XOR_EQUAL;
  937. }
  938. <ST_IN_SCRIPTING>"||" {
  939. return T_BOOLEAN_OR;
  940. }
  941. <ST_IN_SCRIPTING>"&&" {
  942. return T_BOOLEAN_AND;
  943. }
  944. <ST_IN_SCRIPTING>"OR" {
  945. return T_LOGICAL_OR;
  946. }
  947. <ST_IN_SCRIPTING>"AND" {
  948. return T_LOGICAL_AND;
  949. }
  950. <ST_IN_SCRIPTING>"XOR" {
  951. return T_LOGICAL_XOR;
  952. }
  953. <ST_IN_SCRIPTING>"<<" {
  954. return T_SL;
  955. }
  956. <ST_IN_SCRIPTING>">>" {
  957. return T_SR;
  958. }
  959. <ST_IN_SCRIPTING>{TOKENS} {
  960. return yytext[0];
  961. }
  962. <ST_IN_SCRIPTING>"{" {
  963. yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
  964. return '{';
  965. }
  966. <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
  967. yy_push_state(ST_LOOKING_FOR_VARNAME TSRMLS_CC);
  968. return T_DOLLAR_OPEN_CURLY_BRACES;
  969. }
  970. <ST_IN_SCRIPTING>"}" {
  971. RESET_DOC_COMMENT();
  972. /* This is a temporary fix which is dependant on flex and it's implementation */
  973. if (yy_start_stack_ptr) {
  974. yy_pop_state(TSRMLS_C);
  975. }
  976. return '}';
  977. }
  978. <ST_LOOKING_FOR_VARNAME>{LABEL} {
  979. zend_copy_value(zendlval, yytext, yyleng);
  980. zendlval->type = IS_STRING;
  981. yy_pop_state(TSRMLS_C);
  982. yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
  983. return T_STRING_VARNAME;
  984. }
  985. <ST_LOOKING_FOR_VARNAME>{ANY_CHAR} {
  986. yyless(0);
  987. yy_pop_state(TSRMLS_C);
  988. yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
  989. }
  990. <ST_IN_SCRIPTING>{LNUM} {
  991. errno = 0;
  992. zendlval->value.lval = strtol(yytext, NULL, 0);
  993. if (errno == ERANGE) { /* overflow */
  994. zendlval->value.dval = zend_strtod(yytext, NULL);
  995. zendlval->type = IS_DOUBLE;
  996. return T_DNUMBER;
  997. } else {
  998. zendlval->type = IS_LONG;
  999. return T_LNUMBER;
  1000. }
  1001. }
  1002. <ST_IN_SCRIPTING>{HNUM} {
  1003. errno = 0;
  1004. zendlval->value.lval = strtoul(yytext, NULL, 16);
  1005. if (errno == ERANGE) { /* overflow */
  1006. /* not trying strtod - it returns trash on 0x-es */
  1007. zendlval->value.lval = LONG_MAX; /* maximal long */
  1008. zend_error(E_NOTICE,"Hex number is too big: %s", yytext);
  1009. } else {
  1010. if (zendlval->value.lval < 0) {
  1011. /* maintain consistency with the old way */
  1012. zendlval->value.dval = (unsigned long) zendlval->value.lval;
  1013. zendlval->type = IS_DOUBLE;
  1014. return T_DNUMBER;
  1015. }
  1016. zendlval->type = IS_LONG;
  1017. }
  1018. zendlval->type = IS_LONG;
  1019. return T_LNUMBER;
  1020. }
  1021. <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>{LNUM}|{HNUM} { /* treat numbers (almost) as strings inside encapsulated strings */
  1022. zendlval->value.str.val = (char *)estrndup(yytext, yyleng);
  1023. zendlval->value.str.len = yyleng;
  1024. zendlval->type = IS_STRING;
  1025. return T_NUM_STRING;
  1026. }
  1027. <ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} {
  1028. zendlval->value.dval = zend_strtod(yytext, NULL);
  1029. zendlval->type = IS_DOUBLE;
  1030. return T_DNUMBER;
  1031. }
  1032. <ST_IN_SCRIPTING>"__CLASS__" {
  1033. char *class_name = NULL;
  1034. if (CG(active_class_entry)) {
  1035. class_name = CG(active_class_entry)->name;
  1036. }
  1037. if (!class_name) {
  1038. class_name = "";
  1039. }
  1040. zendlval->value.str.len = strlen(class_name);
  1041. zendlval->value.str.val = estrndup(class_name, zendlval->value.str.len);
  1042. zendlval->type = IS_STRING;
  1043. return T_CLASS_C;
  1044. }
  1045. <ST_IN_SCRIPTING>"__FUNCTION__" {
  1046. char *func_name = NULL;
  1047. if (CG(active_op_array)) {
  1048. func_name = CG(active_op_array)->function_name;
  1049. }
  1050. if (!func_name) {
  1051. func_name = "";
  1052. }
  1053. zendlval->value.str.len = strlen(func_name);
  1054. zendlval->value.str.val = estrndup(func_name, zendlval->value.str.len);
  1055. zendlval->type = IS_STRING;
  1056. return T_FUNC_C;
  1057. }
  1058. <ST_IN_SCRIPTING>"__METHOD__" {
  1059. char *class_name = CG(active_class_entry) ? CG(active_class_entry)->name : NULL;
  1060. char *func_name = CG(active_op_array)? CG(active_op_array)->function_name : NULL;
  1061. size_t len = 0;
  1062. if (class_name) {
  1063. len += strlen(class_name) + 2;
  1064. }
  1065. if (func_name) {
  1066. len += strlen(func_name);
  1067. }
  1068. zendlval->value.str.val = emalloc(len+1);
  1069. zendlval->value.str.len = sprintf(zendlval->value.str.val, "%s%s%s",
  1070. class_name ? class_name : "",
  1071. class_name && func_name ? "::" : "",
  1072. func_name ? func_name : ""
  1073. );
  1074. zendlval->value.str.len = strlen(zendlval->value.str.val);
  1075. zendlval->type = IS_STRING;
  1076. return T_METHOD_C;
  1077. }
  1078. <ST_IN_SCRIPTING>"__LINE__" {
  1079. zendlval->value.lval = CG(zend_lineno);
  1080. zendlval->type = IS_LONG;
  1081. return T_LINE;
  1082. }
  1083. <ST_IN_SCRIPTING>"__FILE__" {
  1084. char *filename = zend_get_compiled_filename(TSRMLS_C);
  1085. if (!filename) {
  1086. filename = "";
  1087. }
  1088. zendlval->value.str.len = strlen(filename);
  1089. zendlval->value.str.val = estrndup(filename, zendlval->value.str.len);
  1090. zendlval->type = IS_STRING;
  1091. return T_FILE;
  1092. }
  1093. <INITIAL>(([^<]|"<"[^?%s<]){1,400})|"<s"|"<" {
  1094. #ifdef ZEND_MULTIBYTE
  1095. if (SCNG(output_filter)) {
  1096. int readsize;
  1097. readsize = SCNG(output_filter)(&(zendlval->value.str.val), &(zendlval->value.str.len), yytext, yyleng TSRMLS_CC);
  1098. if (readsize < yyleng) {
  1099. yyless(readsize);
  1100. }
  1101. } else {
  1102. zendlval->value.str.val = (char *) estrndup(yytext, yyleng);
  1103. zendlval->value.str.len = yyleng;
  1104. }
  1105. #else /* !ZEND_MULTIBYTE */
  1106. zendlval->value.str.val = (char *) estrndup(yytext, yyleng);
  1107. zendlval->value.str.len = yyleng;
  1108. #endif /* ZEND_MULTIBYTE */
  1109. zendlval->type = IS_STRING;
  1110. HANDLE_NEWLINES(yytext, yyleng);
  1111. return T_INLINE_HTML;
  1112. }
  1113. <INITIAL>"<?"|"<script"{WHITESPACE}+"language"{WHITESPACE}*"="{WHITESPACE}*("php"|"\"php\""|"\'php\'"){WHITESPACE}*">" {
  1114. HANDLE_NEWLINES(yytext, yyleng);
  1115. if (CG(short_tags) || yyleng>2) { /* yyleng>2 means it's not <? but <script> */
  1116. zendlval->value.str.val = yytext; /* no copying - intentional */
  1117. zendlval->value.str.len = yyleng;
  1118. zendlval->type = IS_STRING;
  1119. BEGIN(ST_IN_SCRIPTING);
  1120. return T_OPEN_TAG;
  1121. } else {
  1122. zendlval->value.str.val = (char *) estrndup(yytext, yyleng);
  1123. zendlval->value.str.len = yyleng;
  1124. zendlval->type = IS_STRING;
  1125. return T_INLINE_HTML;
  1126. }
  1127. }
  1128. <INITIAL>"<%="|"<?=" {
  1129. if ((yytext[1]=='%' && CG(asp_tags)) || (yytext[1]=='?' && CG(short_tags))) {
  1130. zendlval->value.str.val = yytext; /* no copying - intentional */
  1131. zendlval->value.str.len = yyleng;
  1132. zendlval->type = IS_STRING;
  1133. BEGIN(ST_IN_SCRIPTING);
  1134. return T_OPEN_TAG_WITH_ECHO;
  1135. } else {
  1136. zendlval->value.str.val = (char *) estrndup(yytext, yyleng);
  1137. zendlval->value.str.len = yyleng;
  1138. zendlval->type = IS_STRING;
  1139. return T_INLINE_HTML;
  1140. }
  1141. }
  1142. <INITIAL>"<%" {
  1143. if (CG(asp_tags)) {
  1144. zendlval->value.str.val = yytext; /* no copying - intentional */
  1145. zendlval->value.str.len = yyleng;
  1146. zendlval->type = IS_STRING;
  1147. BEGIN(ST_IN_SCRIPTING);
  1148. return T_OPEN_TAG;
  1149. } else {
  1150. zendlval->value.str.val = (char *) estrndup(yytext, yyleng);
  1151. zendlval->value.str.len = yyleng;
  1152. zendlval->type = IS_STRING;
  1153. return T_INLINE_HTML;
  1154. }
  1155. }
  1156. <INITIAL>"<?php"([ \t]|{NEWLINE}) {
  1157. zendlval->value.str.val = yytext; /* no copying - intentional */
  1158. zendlval->value.str.len = yyleng;
  1159. zendlval->type = IS_STRING;
  1160. HANDLE_NEWLINE(yytext[yyleng-1]);
  1161. BEGIN(ST_IN_SCRIPTING);
  1162. return T_OPEN_TAG;
  1163. }
  1164. <ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL} {
  1165. zend_copy_value(zendlval, (yytext+1), (yyleng-1));
  1166. zendlval->type = IS_STRING;
  1167. return T_VARIABLE;
  1168. }
  1169. <ST_IN_SCRIPTING>{LABEL} {
  1170. zend_copy_value(zendlval, yytext, yyleng);
  1171. zendlval->type = IS_STRING;
  1172. return T_STRING;
  1173. }
  1174. <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>{LABEL} {
  1175. zend_copy_value(zendlval, yytext, yyleng);
  1176. zendlval->type = IS_STRING;
  1177. return T_STRING;
  1178. }
  1179. <ST_IN_SCRIPTING>{WHITESPACE} {
  1180. zendlval->value.str.val = yytext; /* no copying - intentional */
  1181. zendlval->value.str.len = yyleng;
  1182. zendlval->type = IS_STRING;
  1183. HANDLE_NEWLINES(yytext, yyleng);
  1184. return T_WHITESPACE;
  1185. }
  1186. <ST_IN_SCRIPTING>"#"|"//" {
  1187. BEGIN(ST_ONE_LINE_COMMENT);
  1188. yymore();
  1189. }
  1190. <ST_ONE_LINE_COMMENT>"?"|"%"|">" {
  1191. yymore();
  1192. }
  1193. <ST_ONE_LINE_COMMENT>[^\n\r?%>]*{ANY_CHAR} {
  1194. switch (yytext[yyleng-1]) {
  1195. case '?': case '%': case '>':
  1196. yyless(yyleng-1);
  1197. yymore();
  1198. break;
  1199. case '\n':
  1200. CG(zend_lineno)++;
  1201. /* intentional fall through */
  1202. default:
  1203. zendlval->value.str.val = yytext; /* no copying - intentional */
  1204. zendlval->value.str.len = yyleng;
  1205. zendlval->type = IS_STRING;
  1206. BEGIN(ST_IN_SCRIPTING);
  1207. return T_COMMENT;
  1208. }
  1209. }
  1210. <ST_ONE_LINE_COMMENT>{NEWLINE} {
  1211. zendlval->value.str.val = yytext; /* no copying - intentional */
  1212. zendlval->value.str.len = yyleng;
  1213. zendlval->type = IS_STRING;
  1214. BEGIN(ST_IN_SCRIPTING);
  1215. CG(zend_lineno)++;
  1216. return T_COMMENT;
  1217. }
  1218. <ST_ONE_LINE_COMMENT>"?>"|"%>" {
  1219. if (CG(asp_tags) || yytext[yyleng-2] != '%') { /* asp comment? */
  1220. zendlval->value.str.val = yytext; /* no copying - intentional */
  1221. zendlval->value.str.len = yyleng-2;
  1222. zendlval->type = IS_STRING;
  1223. yyless(yyleng-2);
  1224. BEGIN(ST_IN_SCRIPTING);
  1225. return T_COMMENT;
  1226. } else {
  1227. yymore();
  1228. }
  1229. }
  1230. <ST_IN_SCRIPTING>"/**"{WHITESPACE} {
  1231. CG(comment_start_line) = CG(zend_lineno);
  1232. RESET_DOC_COMMENT();
  1233. BEGIN(ST_DOC_COMMENT);
  1234. yymore();
  1235. }
  1236. <ST_IN_SCRIPTING>"/*" {
  1237. CG(comment_start_line) = CG(zend_lineno);
  1238. BEGIN(ST_COMMENT);
  1239. yymore();
  1240. }
  1241. <ST_COMMENT,ST_DOC_COMMENT>[^*]+ {
  1242. yymore();
  1243. }
  1244. <ST_DOC_COMMENT>"*/" {
  1245. CG(doc_comment) = estrndup(yytext, yyleng);
  1246. CG(doc_comment_len) = yyleng;
  1247. HANDLE_NEWLINES(yytext, yyleng);
  1248. BEGIN(ST_IN_SCRIPTING);
  1249. return T_DOC_COMMENT;
  1250. }
  1251. <ST_COMMENT>"*/" {
  1252. HANDLE_NEWLINES(yytext, yyleng);
  1253. BEGIN(ST_IN_SCRIPTING);
  1254. return T_COMMENT;
  1255. }
  1256. <ST_COMMENT,ST_DOC_COMMENT>"*" {
  1257. yymore();
  1258. }
  1259. <ST_IN_SCRIPTING>("?>"|"</script"{WHITESPACE}*">"){NEWLINE}? {
  1260. zendlval->value.str.val = yytext; /* no copying - intentional */
  1261. zendlval->value.str.len = yyleng;
  1262. zendlval->type = IS_STRING;
  1263. BEGIN(INITIAL);
  1264. return T_CLOSE_TAG; /* implicit ';' at php-end tag */
  1265. }
  1266. <ST_IN_SCRIPTING>"%>"{NEWLINE}? {
  1267. if (CG(asp_tags)) {
  1268. BEGIN(INITIAL);
  1269. zendlval->value.str.len = yyleng;
  1270. zendlval->type = IS_STRING;
  1271. zendlval->value.str.val = yytext; /* no copying - intentional */
  1272. return T_CLOSE_TAG; /* implicit ';' at php-end tag */
  1273. } else {
  1274. yyless(1);
  1275. return yytext[0];
  1276. }
  1277. }
  1278. <ST_IN_SCRIPTING>(["]([^$"\\]|("\\".))*["]) {
  1279. register char *s, *t;
  1280. char *end;
  1281. zendlval->value.str.val = estrndup(yytext+1, yyleng-2);
  1282. zendlval->value.str.len = yyleng-2;
  1283. zendlval->type = IS_STRING;
  1284. HANDLE_NEWLINES(yytext, yyleng);
  1285. /* convert escape sequences */
  1286. s = t = zendlval->value.str.val;
  1287. end = s+zendlval->value.str.len;
  1288. while (s<end) {
  1289. if (*s=='\\') {
  1290. s++;
  1291. if (s>=end) {
  1292. continue;
  1293. }
  1294. switch(*s) {
  1295. case 'n':
  1296. *t++ = '\n';
  1297. zendlval->value.str.len--;
  1298. break;
  1299. case 'r':
  1300. *t++ = '\r';
  1301. zendlval->value.str.len--;
  1302. break;
  1303. case 't':
  1304. *t++ = '\t';
  1305. zendlval->value.str.len--;
  1306. break;
  1307. case '\\':
  1308. case '$':
  1309. case '"':
  1310. *t++ = *s;
  1311. zendlval->value.str.len--;
  1312. break;
  1313. default:
  1314. /* check for an octal */
  1315. if (ZEND_IS_OCT(*s)) {
  1316. char octal_buf[4] = { 0, 0, 0, 0 };
  1317. octal_buf[0] = *s;
  1318. zendlval->value.str.len--;
  1319. if ((s+1)<end && ZEND_IS_OCT(*(s+1))) {
  1320. octal_buf[1] = *(++s);
  1321. zendlval->value.str.len--;
  1322. if ((s+1)<end && ZEND_IS_OCT(*(s+1))) {
  1323. octal_buf[2] = *(++s);
  1324. zendlval->value.str.len--;
  1325. }
  1326. }
  1327. *t++ = (char) strtol(octal_buf, NULL, 8);
  1328. } else if (*s=='x' && (s+1)<end && ZEND_IS_HEX(*(s+1))) {
  1329. char hex_buf[3] = { 0, 0, 0};
  1330. zendlval->value.str.len--; /* for the 'x' */
  1331. hex_buf[0] = *(++s);
  1332. zendlval->value.str.len--;
  1333. if ((s+1)<end && ZEND_IS_HEX(*(s+1))) {
  1334. hex_buf[1] = *(++s);
  1335. zendlval->value.str.len--;
  1336. }
  1337. *t++ = (char) strtol(hex_buf, NULL, 16);
  1338. } else {
  1339. *t++ = '\\';
  1340. *t++ = *s;
  1341. }
  1342. break;
  1343. }
  1344. s++;
  1345. } else {
  1346. *t++ = *s++;
  1347. }
  1348. }
  1349. *t = 0;
  1350. #ifdef ZEND_MULTIBYTE
  1351. if (SCNG(output_filter)) {
  1352. s = zendlval->value.str.val;
  1353. SCNG(output_filter)(&(zendlval->value.str.val), &(zendlval->value.str.len), s, zendlval->value.str.len TSRMLS_CC);
  1354. efree(s);
  1355. }
  1356. #endif /* ZEND_MULTIBYTE */
  1357. return T_CONSTANT_ENCAPSED_STRING;
  1358. }
  1359. <ST_IN_SCRIPTING>([']([^'\\]|("\\".))*[']) {
  1360. register char *s, *t;
  1361. char *end;
  1362. zendlval->value.str.val = estrndup(yytext+1, yyleng-2);
  1363. zendlval->value.str.len = yyleng-2;
  1364. zendlval->type = IS_STRING;
  1365. HANDLE_NEWLINES(yytext, yyleng);
  1366. /* convert escape sequences */
  1367. s = t = zendlval->value.str.val;
  1368. end = s+zendlval->value.str.len;
  1369. while (s<end) {
  1370. if (*s=='\\') {
  1371. s++;
  1372. if (s>=end) {
  1373. continue;
  1374. }
  1375. switch(*s) {
  1376. case '\\':
  1377. case '\'':
  1378. *t++ = *s;
  1379. zendlval->value.str.len--;
  1380. break;
  1381. default:
  1382. *t++ = '\\';
  1383. *t++ = *s;
  1384. break;
  1385. }
  1386. s++;
  1387. } else {
  1388. *t++ = *s++;
  1389. }
  1390. }
  1391. *t = 0;
  1392. #ifdef ZEND_MULTIBYTE
  1393. if (SCNG(output_filter)) {
  1394. s = zendlval->value.str.val;
  1395. SCNG(output_filter)(&(zendlval->value.str.val), &(zendlval->value.str.len), s, zendlval->value.str.len TSRMLS_CC);
  1396. efree(s);
  1397. }
  1398. #endif /* ZEND_MULTIBYTE */
  1399. return T_CONSTANT_ENCAPSED_STRING;
  1400. }
  1401. <ST_IN_SCRIPTING>["] {
  1402. BEGIN(ST_DOUBLE_QUOTES);
  1403. return '\"';
  1404. }
  1405. <ST_IN_SCRIPTING>"<<<"{TABS_AND_SPACES}{LABEL}{NEWLINE} {
  1406. char *s;
  1407. CG(zend_lineno)++;
  1408. CG(heredoc_len) = yyleng-3-1-(yytext[yyleng-2]=='\r'?1:0);
  1409. s = yytext+3;
  1410. while ((*s == ' ') || (*s == '\t')) {
  1411. s++;
  1412. CG(heredoc_len)--;
  1413. }
  1414. CG(heredoc) = estrndup(s, CG(heredoc_len));
  1415. BEGIN(ST_HEREDOC);
  1416. return T_START_HEREDOC;
  1417. }
  1418. <ST_IN_SCRIPTING>[`] {
  1419. BEGIN(ST_BACKQUOTE);
  1420. return '`';
  1421. }
  1422. <ST_IN_SCRIPTING>['] {
  1423. BEGIN(ST_SINGLE_QUOTE);
  1424. return '\'';
  1425. }
  1426. <ST_HEREDOC>^{LABEL}(";")?{NEWLINE} {
  1427. int label_len;
  1428. if (yytext[yyleng-2]=='\r') {
  1429. label_len = yyleng-2;
  1430. } else {
  1431. label_len = yyleng-1;
  1432. }
  1433. if (yytext[label_len-1]==';') {
  1434. label_len--;
  1435. }
  1436. if (label_len==CG(heredoc_len) && !memcmp(yytext, CG(heredoc), label_len)) {
  1437. zendlval->value.str.val = estrndup(yytext, label_len); /* unput destroys yytext */
  1438. zendlval->value.str.len = label_len;
  1439. yyless(yyleng - (yyleng - label_len));
  1440. efree(CG(heredoc));
  1441. CG(heredoc)=NULL;
  1442. CG(heredoc_len)=0;
  1443. BEGIN(ST_IN_SCRIPTING);
  1444. return T_END_HEREDOC;
  1445. } else {
  1446. CG(zend_lineno)++;
  1447. zend_copy_value(zendlval, yytext, yyleng);
  1448. zendlval->type = IS_STRING;
  1449. return T_STRING;
  1450. }
  1451. }
  1452. <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>{ESCAPED_AND_WHITESPACE} {
  1453. HANDLE_NEWLINES(yytext, yyleng);
  1454. zendlval->value.str.val = (char *) estrndup(yytext, yyleng);
  1455. zendlval->value.str.len = yyleng;
  1456. zendlval->type = IS_STRING;
  1457. return T_ENCAPSED_AND_WHITESPACE;
  1458. }
  1459. <ST_SINGLE_QUOTE>([^'\\]|\\[^'\\])+ {
  1460. HANDLE_NEWLINES(yytext, yyleng);
  1461. zend_copy_value(zendlval, yytext, yyleng);
  1462. zendlval->type = IS_STRING;
  1463. return T_ENCAPSED_AND_WHITESPACE;
  1464. }
  1465. <ST_DOUBLE_QUOTES>[`]+ {
  1466. zend_copy_value(zendlval, yytext, yyleng);
  1467. zendlval->type = IS_STRING;
  1468. return T_ENCAPSED_AND_WHITESPACE;
  1469. }
  1470. <ST_BACKQUOTE>["]+ {
  1471. zend_copy_value(zendlval, yytext, yyleng);
  1472. zendlval->type = IS_STRING;
  1473. return T_ENCAPSED_AND_WHITESPACE;
  1474. }
  1475. <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"$"[^a-zA-Z_\x7f-\xff{] {
  1476. zendlval->value.lval = (long) yytext[0];
  1477. if (yyleng == 2) {
  1478. yyless(1);
  1479. }
  1480. return T_CHARACTER;
  1481. }
  1482. <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>{ENCAPSED_TOKENS} {
  1483. zendlval->value.lval = (long) yytext[0];
  1484. return yytext[0];
  1485. }
  1486. <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"\\{" {
  1487. zendlval->value.str.val = estrndup("\\{", sizeof("\\{") - 1);
  1488. zendlval->value.str.len = sizeof("\\{") - 1;
  1489. zendlval->type = IS_STRING;
  1490. return T_STRING;
  1491. }
  1492. <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" {
  1493. zendlval->value.lval = (long) yytext[0];
  1494. yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
  1495. yyless(1);
  1496. return T_CURLY_OPEN;
  1497. }
  1498. <ST_SINGLE_QUOTE>"\\'" {
  1499. zendlval->value.lval = (long) '\'';
  1500. return T_CHARACTER;
  1501. }
  1502. <ST_SINGLE_QUOTE>"\\\\" {
  1503. zendlval->value.lval = (long)'\\';
  1504. return T_CHARACTER;
  1505. }
  1506. <ST_DOUBLE_QUOTES>"\\\"" {
  1507. zendlval->value.lval = (long) '"';
  1508. return T_CHARACTER;
  1509. }
  1510. <ST_BACKQUOTE>"\\`" {
  1511. zendlval->value.lval = (long) '`';
  1512. return T_CHARACTER;
  1513. }
  1514. <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"\\"[0-7]{1,3} {
  1515. zendlval->value.lval = strtol(yytext+1, NULL, 8);
  1516. return T_CHARACTER;
  1517. }
  1518. <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"\\x"[0-9A-Fa-f]{1,2} {
  1519. zendlval->value.lval = strtol (yytext+2, NULL, 16);
  1520. return T_CHARACTER;
  1521. }
  1522. <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"\\"{ANY_CHAR} {
  1523. switch (yytext[1]) {
  1524. case 'n':
  1525. zendlval->value.lval = (long) '\n';
  1526. break;
  1527. case 't':
  1528. zendlval->value.lval = (long) '\t';
  1529. break;
  1530. case 'r':
  1531. zendlval->value.lval = (long) '\r';
  1532. break;
  1533. case '\\':
  1534. zendlval->value.lval = (long) '\\';
  1535. break;
  1536. case '$':
  1537. zendlval->value.lval = (long) yytext[1];
  1538. break;
  1539. default:
  1540. zendlval->value.str.val = estrndup(yytext, yyleng);
  1541. zendlval->value.str.len = yyleng;
  1542. zendlval->type = IS_STRING;
  1543. return T_BAD_CHARACTER;
  1544. break;
  1545. }
  1546. return T_CHARACTER;
  1547. }
  1548. <ST_HEREDOC>["'`]+ {
  1549. zendlval->value.str.val = (char *) estrndup(yytext, yyleng);
  1550. zendlval->value.str.len = yyleng;
  1551. zendlval->type = IS_STRING;
  1552. return T_ENCAPSED_AND_WHITESPACE;
  1553. }
  1554. <ST_DOUBLE_QUOTES>["] {
  1555. BEGIN(ST_IN_SCRIPTING);
  1556. return '\"';
  1557. }
  1558. <ST_BACKQUOTE>[`] {
  1559. BEGIN(ST_IN_SCRIPTING);
  1560. return '`';
  1561. }
  1562. <ST_SINGLE_QUOTE>['] {
  1563. BEGIN(ST_IN_SCRIPTING);
  1564. return '\'';
  1565. }
  1566. <ST_DOUBLE_QUOTES,ST_BACKQUOTE,INITIAL,ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY><<EOF>> {
  1567. return 0;
  1568. }
  1569. <ST_COMMENT,ST_DOC_COMMENT><<EOF>> {
  1570. zend_error(E_COMPILE_WARNING,"Unterminated comment starting line %d", CG(comment_start_line));
  1571. return 0;
  1572. }
  1573. <ST_IN_SCRIPTING,INITIAL,ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_SINGLE_QUOTE,ST_HEREDOC>{ANY_CHAR} {
  1574. zend_error(E_COMPILE_WARNING,"Unexpected character in input: '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE);
  1575. }