PageRenderTime 60ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 0ms

/Zend/zend_language_scanner.l

http://github.com/infusion/PHP
LEX | 2195 lines | 1693 code | 380 blank | 122 comment | 0 complexity | 1dfa5ee22c952d0fc5c4bd0b40a5fe3c MD5 | raw file
Possible License(s): MPL-2.0-no-copyleft-exception, LGPL-2.1, BSD-3-Clause
  1. /*
  2. +----------------------------------------------------------------------+
  3. | Zend Engine |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) 1998-2011 Zend Technologies Ltd. (http://www.zend.com) |
  6. +----------------------------------------------------------------------+
  7. | This source file is subject to version 2.00 of the Zend license, |
  8. | that is bundled with this package in the file LICENSE, and is |
  9. | available through the world-wide-web at the following url: |
  10. | http://www.zend.com/license/2_00.txt. |
  11. | If you did not receive a copy of the Zend license and are unable to |
  12. | obtain it through the world-wide-web, please send a note to |
  13. | license@zend.com so we can mail you a copy immediately. |
  14. +----------------------------------------------------------------------+
  15. | Authors: Marcus Boerger <helly@php.net> |
  16. | Nuno Lopes <nlopess@php.net> |
  17. | Scott MacVicar <scottmac@php.net> |
  18. | Flex version authors: |
  19. | Andi Gutmans <andi@zend.com> |
  20. | Zeev Suraski <zeev@zend.com> |
  21. +----------------------------------------------------------------------+
  22. */
  23. /* $Id: zend_language_scanner.l 307028 2011-01-03 14:39:48Z iliaa $ */
  24. #if 0
  25. # define YYDEBUG(s, c) printf("state: %d char: %c\n", s, c)
  26. #else
  27. # define YYDEBUG(s, c)
  28. #endif
  29. #include "zend_language_scanner_defs.h"
  30. #include <errno.h>
  31. #include "zend.h"
  32. #include "zend_alloc.h"
  33. #include <zend_language_parser.h>
  34. #include "zend_compile.h"
  35. #include "zend_language_scanner.h"
  36. #include "zend_highlight.h"
  37. #include "zend_constants.h"
  38. #include "zend_variables.h"
  39. #include "zend_operators.h"
  40. #include "zend_API.h"
  41. #include "zend_strtod.h"
  42. #include "zend_exceptions.h"
  43. #include "tsrm_virtual_cwd.h"
  44. #include "tsrm_config_common.h"
  45. #define YYCTYPE unsigned char
  46. #define YYFILL(n) { if ((YYCURSOR + n) >= (YYLIMIT + ZEND_MMAP_AHEAD)) { return 0; } }
  47. #define YYCURSOR SCNG(yy_cursor)
  48. #define YYLIMIT SCNG(yy_limit)
  49. #define YYMARKER SCNG(yy_marker)
  50. #define YYGETCONDITION() SCNG(yy_state)
  51. #define YYSETCONDITION(s) SCNG(yy_state) = s
  52. #define STATE(name) yyc##name
  53. /* emulate flex constructs */
  54. #define BEGIN(state) YYSETCONDITION(STATE(state))
  55. #define YYSTATE YYGETCONDITION()
  56. #define yytext ((char*)SCNG(yy_text))
  57. #define yyleng SCNG(yy_leng)
  58. #define yyless(x) do { YYCURSOR = (unsigned char*)yytext + x; \
  59. yyleng = (unsigned int)x; } while(0)
  60. #define yymore() goto yymore_restart
  61. /* perform sanity check. If this message is triggered you should
  62. increase the ZEND_MMAP_AHEAD value in the zend_streams.h file */
  63. /*!max:re2c */
  64. #if ZEND_MMAP_AHEAD < YYMAXFILL
  65. # error ZEND_MMAP_AHEAD should be greater than or equal to YYMAXFILL
  66. #endif
  67. #ifdef HAVE_STDARG_H
  68. # include <stdarg.h>
  69. #endif
  70. #ifdef HAVE_UNISTD_H
  71. # include <unistd.h>
  72. #endif
  73. /* Globals Macros */
  74. #define SCNG LANG_SCNG
  75. #ifdef ZTS
  76. ZEND_API ts_rsrc_id language_scanner_globals_id;
  77. #else
  78. ZEND_API zend_php_scanner_globals language_scanner_globals;
  79. #endif
  80. #define HANDLE_NEWLINES(s, l) \
  81. do { \
  82. char *p = (s), *boundary = p+(l); \
  83. \
  84. while (p<boundary) { \
  85. if (*p == '\n' || (*p == '\r' && (*(p+1) != '\n'))) { \
  86. CG(zend_lineno)++; \
  87. } \
  88. p++; \
  89. } \
  90. } while (0)
  91. #define HANDLE_NEWLINE(c) \
  92. { \
  93. if (c == '\n' || c == '\r') { \
  94. CG(zend_lineno)++; \
  95. } \
  96. }
  97. /* To save initial string length after scanning to first variable, CG(doc_comment_len) can be reused */
  98. #define SET_DOUBLE_QUOTES_SCANNED_LENGTH(len) CG(doc_comment_len) = (len)
  99. #define GET_DOUBLE_QUOTES_SCANNED_LENGTH() CG(doc_comment_len)
  100. #define IS_LABEL_START(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || (c) == '_' || (c) >= 0x7F)
  101. #define ZEND_IS_OCT(c) ((c)>='0' && (c)<='7')
  102. #define ZEND_IS_HEX(c) (((c)>='0' && (c)<='9') || ((c)>='a' && (c)<='f') || ((c)>='A' && (c)<='F'))
  103. BEGIN_EXTERN_C()
  104. static void _yy_push_state(int new_state TSRMLS_DC)
  105. {
  106. zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION(), sizeof(int));
  107. YYSETCONDITION(new_state);
  108. }
  109. #define yy_push_state(state_and_tsrm) _yy_push_state(yyc##state_and_tsrm)
  110. static void yy_pop_state(TSRMLS_D)
  111. {
  112. int *stack_state;
  113. zend_stack_top(&SCNG(state_stack), (void **) &stack_state);
  114. YYSETCONDITION(*stack_state);
  115. zend_stack_del_top(&SCNG(state_stack));
  116. }
  117. static void yy_scan_buffer(char *str, unsigned int len TSRMLS_DC)
  118. {
  119. YYCURSOR = (YYCTYPE*)str;
  120. YYLIMIT = YYCURSOR + len;
  121. if (!SCNG(yy_start)) {
  122. SCNG(yy_start) = YYCURSOR;
  123. }
  124. }
  125. void startup_scanner(TSRMLS_D)
  126. {
  127. CG(heredoc) = NULL;
  128. CG(heredoc_len) = 0;
  129. CG(doc_comment) = NULL;
  130. CG(doc_comment_len) = 0;
  131. zend_stack_init(&SCNG(state_stack));
  132. }
  133. void shutdown_scanner(TSRMLS_D)
  134. {
  135. if (CG(heredoc)) {
  136. efree(CG(heredoc));
  137. CG(heredoc_len)=0;
  138. }
  139. zend_stack_destroy(&SCNG(state_stack));
  140. RESET_DOC_COMMENT();
  141. }
  142. ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
  143. {
  144. lex_state->yy_leng = SCNG(yy_leng);
  145. lex_state->yy_start = SCNG(yy_start);
  146. lex_state->yy_text = SCNG(yy_text);
  147. lex_state->yy_cursor = SCNG(yy_cursor);
  148. lex_state->yy_marker = SCNG(yy_marker);
  149. lex_state->yy_limit = SCNG(yy_limit);
  150. lex_state->state_stack = SCNG(state_stack);
  151. zend_stack_init(&SCNG(state_stack));
  152. lex_state->in = SCNG(yy_in);
  153. lex_state->yy_state = YYSTATE;
  154. lex_state->filename = zend_get_compiled_filename(TSRMLS_C);
  155. lex_state->lineno = CG(zend_lineno);
  156. #ifdef ZEND_MULTIBYTE
  157. lex_state->script_org = SCNG(script_org);
  158. lex_state->script_org_size = SCNG(script_org_size);
  159. lex_state->script_filtered = SCNG(script_filtered);
  160. lex_state->script_filtered_size = SCNG(script_filtered_size);
  161. lex_state->input_filter = SCNG(input_filter);
  162. lex_state->output_filter = SCNG(output_filter);
  163. lex_state->script_encoding = SCNG(script_encoding);
  164. lex_state->internal_encoding = SCNG(internal_encoding);
  165. #endif /* ZEND_MULTIBYTE */
  166. }
  167. ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
  168. {
  169. SCNG(yy_leng) = lex_state->yy_leng;
  170. SCNG(yy_start) = lex_state->yy_start;
  171. SCNG(yy_text) = lex_state->yy_text;
  172. SCNG(yy_cursor) = lex_state->yy_cursor;
  173. SCNG(yy_marker) = lex_state->yy_marker;
  174. SCNG(yy_limit) = lex_state->yy_limit;
  175. zend_stack_destroy(&SCNG(state_stack));
  176. SCNG(state_stack) = lex_state->state_stack;
  177. SCNG(yy_in) = lex_state->in;
  178. YYSETCONDITION(lex_state->yy_state);
  179. CG(zend_lineno) = lex_state->lineno;
  180. zend_restore_compiled_filename(lex_state->filename TSRMLS_CC);
  181. #ifdef ZEND_MULTIBYTE
  182. if (SCNG(script_org)) {
  183. efree(SCNG(script_org));
  184. SCNG(script_org) = NULL;
  185. }
  186. if (SCNG(script_filtered)) {
  187. efree(SCNG(script_filtered));
  188. SCNG(script_filtered) = NULL;
  189. }
  190. SCNG(script_org) = lex_state->script_org;
  191. SCNG(script_org_size) = lex_state->script_org_size;
  192. SCNG(script_filtered) = lex_state->script_filtered;
  193. SCNG(script_filtered_size) = lex_state->script_filtered_size;
  194. SCNG(input_filter) = lex_state->input_filter;
  195. SCNG(output_filter) = lex_state->output_filter;
  196. SCNG(script_encoding) = lex_state->script_encoding;
  197. SCNG(internal_encoding) = lex_state->internal_encoding;
  198. #endif /* ZEND_MULTIBYTE */
  199. if (CG(heredoc)) {
  200. efree(CG(heredoc));
  201. CG(heredoc) = NULL;
  202. CG(heredoc_len) = 0;
  203. }
  204. }
  205. ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle TSRMLS_DC)
  206. {
  207. zend_llist_del_element(&CG(open_files), file_handle, (int (*)(void *, void *)) zend_compare_file_handles);
  208. /* zend_file_handle_dtor() operates on the copy, so we have to NULLify the original here */
  209. file_handle->opened_path = NULL;
  210. if (file_handle->free_filename) {
  211. file_handle->filename = NULL;
  212. }
  213. }
  214. ZEND_API int open_file_for_scanning(zend_file_handle *file_handle TSRMLS_DC)
  215. {
  216. char *file_path = NULL, *buf;
  217. size_t size, offset = 0;
  218. /* The shebang line was read, get the current position to obtain the buffer start */
  219. if (CG(start_lineno) == 2 && file_handle->type == ZEND_HANDLE_FP && file_handle->handle.fp) {
  220. if ((offset = ftell(file_handle->handle.fp)) == -1) {
  221. offset = 0;
  222. }
  223. }
  224. if (zend_stream_fixup(file_handle, &buf, &size TSRMLS_CC) == FAILURE) {
  225. return FAILURE;
  226. }
  227. zend_llist_add_element(&CG(open_files), file_handle);
  228. if (file_handle->handle.stream.handle >= (void*)file_handle && file_handle->handle.stream.handle <= (void*)(file_handle+1)) {
  229. zend_file_handle *fh = (zend_file_handle*)zend_llist_get_last(&CG(open_files));
  230. size_t diff = (char*)file_handle->handle.stream.handle - (char*)file_handle;
  231. fh->handle.stream.handle = (void*)(((char*)fh) + diff);
  232. file_handle->handle.stream.handle = fh->handle.stream.handle;
  233. }
  234. /* Reset the scanner for scanning the new file */
  235. SCNG(yy_in) = file_handle;
  236. SCNG(yy_start) = NULL;
  237. if (size != -1) {
  238. #ifdef ZEND_MULTIBYTE
  239. if (zend_multibyte_read_script((unsigned char *)buf, size TSRMLS_CC) != 0) {
  240. return FAILURE;
  241. }
  242. SCNG(yy_in) = NULL;
  243. zend_multibyte_set_filter(NULL TSRMLS_CC);
  244. if (!SCNG(input_filter)) {
  245. SCNG(script_filtered) = (unsigned char*)emalloc(SCNG(script_org_size)+1);
  246. memcpy(SCNG(script_filtered), SCNG(script_org), SCNG(script_org_size)+1);
  247. SCNG(script_filtered_size) = SCNG(script_org_size);
  248. } else {
  249. SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC);
  250. if (SCNG(script_filtered) == NULL) {
  251. zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
  252. "encoding \"%s\" to a compatible encoding", LANG_SCNG(script_encoding)->name);
  253. }
  254. }
  255. SCNG(yy_start) = SCNG(script_filtered) - offset;
  256. yy_scan_buffer((char *)SCNG(script_filtered), SCNG(script_filtered_size) TSRMLS_CC);
  257. #else /* !ZEND_MULTIBYTE */
  258. SCNG(yy_start) = buf - offset;
  259. yy_scan_buffer(buf, size TSRMLS_CC);
  260. #endif /* ZEND_MULTIBYTE */
  261. } else {
  262. zend_error_noreturn(E_COMPILE_ERROR, "zend_stream_mmap() failed");
  263. }
  264. BEGIN(INITIAL);
  265. if (file_handle->opened_path) {
  266. file_path = file_handle->opened_path;
  267. } else {
  268. file_path = file_handle->filename;
  269. }
  270. zend_set_compiled_filename(file_path TSRMLS_CC);
  271. if (CG(start_lineno)) {
  272. CG(zend_lineno) = CG(start_lineno);
  273. CG(start_lineno) = 0;
  274. } else {
  275. CG(zend_lineno) = 1;
  276. }
  277. CG(increment_lineno) = 0;
  278. return SUCCESS;
  279. }
  280. END_EXTERN_C()
  281. ZEND_API zend_op_array *compile_file(zend_file_handle *file_handle, int type TSRMLS_DC)
  282. {
  283. zend_lex_state original_lex_state;
  284. zend_op_array *op_array = (zend_op_array *) emalloc(sizeof(zend_op_array));
  285. zend_op_array *original_active_op_array = CG(active_op_array);
  286. zend_op_array *retval=NULL;
  287. int compiler_result;
  288. zend_bool compilation_successful=0;
  289. znode retval_znode;
  290. zend_bool original_in_compilation = CG(in_compilation);
  291. retval_znode.op_type = IS_CONST;
  292. retval_znode.u.constant.type = IS_LONG;
  293. retval_znode.u.constant.value.lval = 1;
  294. Z_UNSET_ISREF(retval_znode.u.constant);
  295. Z_SET_REFCOUNT(retval_znode.u.constant, 1);
  296. zend_save_lexical_state(&original_lex_state TSRMLS_CC);
  297. retval = op_array; /* success oriented */
  298. if (open_file_for_scanning(file_handle TSRMLS_CC)==FAILURE) {
  299. if (type==ZEND_REQUIRE) {
  300. zend_message_dispatcher(ZMSG_FAILED_REQUIRE_FOPEN, file_handle->filename TSRMLS_CC);
  301. zend_bailout();
  302. } else {
  303. zend_message_dispatcher(ZMSG_FAILED_INCLUDE_FOPEN, file_handle->filename TSRMLS_CC);
  304. }
  305. compilation_successful=0;
  306. } else {
  307. init_op_array(op_array, ZEND_USER_FUNCTION, INITIAL_OP_ARRAY_SIZE TSRMLS_CC);
  308. CG(in_compilation) = 1;
  309. CG(active_op_array) = op_array;
  310. compiler_result = zendparse(TSRMLS_C);
  311. zend_do_return(&retval_znode, 0 TSRMLS_CC);
  312. CG(in_compilation) = original_in_compilation;
  313. if (compiler_result==1) { /* parser error */
  314. zend_bailout();
  315. }
  316. compilation_successful=1;
  317. }
  318. if (retval) {
  319. CG(active_op_array) = original_active_op_array;
  320. if (compilation_successful) {
  321. pass_two(op_array TSRMLS_CC);
  322. zend_release_labels(TSRMLS_C);
  323. } else {
  324. efree(op_array);
  325. retval = NULL;
  326. }
  327. }
  328. zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
  329. return retval;
  330. }
  331. zend_op_array *compile_filename(int type, zval *filename TSRMLS_DC)
  332. {
  333. zend_file_handle file_handle;
  334. zval tmp;
  335. zend_op_array *retval;
  336. char *opened_path = NULL;
  337. if (filename->type != IS_STRING) {
  338. tmp = *filename;
  339. zval_copy_ctor(&tmp);
  340. convert_to_string(&tmp);
  341. filename = &tmp;
  342. }
  343. file_handle.filename = filename->value.str.val;
  344. file_handle.free_filename = 0;
  345. file_handle.type = ZEND_HANDLE_FILENAME;
  346. file_handle.opened_path = NULL;
  347. file_handle.handle.fp = NULL;
  348. retval = zend_compile_file(&file_handle, type TSRMLS_CC);
  349. if (retval && file_handle.handle.stream.handle) {
  350. int dummy = 1;
  351. if (!file_handle.opened_path) {
  352. file_handle.opened_path = opened_path = estrndup(filename->value.str.val, filename->value.str.len);
  353. }
  354. zend_hash_add(&EG(included_files), file_handle.opened_path, strlen(file_handle.opened_path)+1, (void *)&dummy, sizeof(int), NULL);
  355. if (opened_path) {
  356. efree(opened_path);
  357. }
  358. }
  359. zend_destroy_file_handle(&file_handle TSRMLS_CC);
  360. if (filename==&tmp) {
  361. zval_dtor(&tmp);
  362. }
  363. return retval;
  364. }
  365. ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_DC)
  366. {
  367. /* enforce two trailing NULLs for flex... */
  368. str->value.str.val = safe_erealloc(str->value.str.val, 1, str->value.str.len, ZEND_MMAP_AHEAD);
  369. memset(str->value.str.val + str->value.str.len, 0, ZEND_MMAP_AHEAD);
  370. SCNG(yy_in)=NULL;
  371. SCNG(yy_start) = NULL;
  372. #ifdef ZEND_MULTIBYTE
  373. SCNG(script_org) = (unsigned char *)estrdup(str->value.str.val);
  374. SCNG(script_org_size) = str->value.str.len;
  375. zend_multibyte_set_filter(CG(internal_encoding) TSRMLS_CC);
  376. if (!SCNG(input_filter)) {
  377. SCNG(script_filtered) = (unsigned char*)emalloc(SCNG(script_org_size)+1);
  378. memcpy(SCNG(script_filtered), SCNG(script_org), SCNG(script_org_size)+1);
  379. SCNG(script_filtered_size) = SCNG(script_org_size);
  380. } else {
  381. SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC);
  382. }
  383. yy_scan_buffer((char *)SCNG(script_filtered), SCNG(script_filtered_size) TSRMLS_CC);
  384. #else /* !ZEND_MULTIBYTE */
  385. yy_scan_buffer(str->value.str.val, str->value.str.len TSRMLS_CC);
  386. #endif /* ZEND_MULTIBYTE */
  387. zend_set_compiled_filename(filename TSRMLS_CC);
  388. CG(zend_lineno) = 1;
  389. CG(increment_lineno) = 0;
  390. return SUCCESS;
  391. }
  392. ZEND_API size_t zend_get_scanned_file_offset(TSRMLS_D)
  393. {
  394. size_t offset = SCNG(yy_cursor) - SCNG(yy_start);
  395. #ifdef ZEND_MULTIBYTE
  396. if (SCNG(input_filter)) {
  397. size_t original_offset = offset, length = 0; do {
  398. unsigned char *p = NULL;
  399. SCNG(input_filter)(&p, &length, SCNG(script_org), offset TSRMLS_CC);
  400. if (!p) {
  401. break;
  402. }
  403. efree(p);
  404. if (length > original_offset) {
  405. offset--;
  406. } else if (length < original_offset) {
  407. offset++;
  408. }
  409. } while (original_offset != length);
  410. }
  411. #endif
  412. return offset;
  413. }
  414. zend_op_array *compile_string(zval *source_string, char *filename TSRMLS_DC)
  415. {
  416. zend_lex_state original_lex_state;
  417. zend_op_array *op_array = (zend_op_array *) emalloc(sizeof(zend_op_array));
  418. zend_op_array *original_active_op_array = CG(active_op_array);
  419. zend_op_array *retval;
  420. zval tmp;
  421. int compiler_result;
  422. zend_bool original_in_compilation = CG(in_compilation);
  423. if (source_string->value.str.len==0) {
  424. efree(op_array);
  425. return NULL;
  426. }
  427. CG(in_compilation) = 1;
  428. tmp = *source_string;
  429. zval_copy_ctor(&tmp);
  430. convert_to_string(&tmp);
  431. source_string = &tmp;
  432. zend_save_lexical_state(&original_lex_state TSRMLS_CC);
  433. if (zend_prepare_string_for_scanning(source_string, filename TSRMLS_CC)==FAILURE) {
  434. efree(op_array);
  435. retval = NULL;
  436. } else {
  437. zend_bool orig_interactive = CG(interactive);
  438. CG(interactive) = 0;
  439. init_op_array(op_array, ZEND_EVAL_CODE, INITIAL_OP_ARRAY_SIZE TSRMLS_CC);
  440. CG(interactive) = orig_interactive;
  441. CG(active_op_array) = op_array;
  442. BEGIN(ST_IN_SCRIPTING);
  443. compiler_result = zendparse(TSRMLS_C);
  444. #ifdef ZEND_MULTIBYTE
  445. if (SCNG(script_org)) {
  446. efree(SCNG(script_org));
  447. SCNG(script_org) = NULL;
  448. }
  449. if (SCNG(script_filtered)) {
  450. efree(SCNG(script_filtered));
  451. SCNG(script_filtered) = NULL;
  452. }
  453. #endif /* ZEND_MULTIBYTE */
  454. if (compiler_result==1) {
  455. CG(active_op_array) = original_active_op_array;
  456. CG(unclean_shutdown)=1;
  457. retval = NULL;
  458. } else {
  459. zend_do_return(NULL, 0 TSRMLS_CC);
  460. CG(active_op_array) = original_active_op_array;
  461. pass_two(op_array TSRMLS_CC);
  462. zend_release_labels(TSRMLS_C);
  463. retval = op_array;
  464. }
  465. }
  466. zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
  467. zval_dtor(&tmp);
  468. CG(in_compilation) = original_in_compilation;
  469. return retval;
  470. }
  471. BEGIN_EXTERN_C()
  472. int highlight_file(char *filename, zend_syntax_highlighter_ini *syntax_highlighter_ini TSRMLS_DC)
  473. {
  474. zend_lex_state original_lex_state;
  475. zend_file_handle file_handle;
  476. file_handle.type = ZEND_HANDLE_FILENAME;
  477. file_handle.filename = filename;
  478. file_handle.free_filename = 0;
  479. file_handle.opened_path = NULL;
  480. zend_save_lexical_state(&original_lex_state TSRMLS_CC);
  481. if (open_file_for_scanning(&file_handle TSRMLS_CC)==FAILURE) {
  482. zend_message_dispatcher(ZMSG_FAILED_HIGHLIGHT_FOPEN, filename TSRMLS_CC);
  483. zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
  484. return FAILURE;
  485. }
  486. zend_highlight(syntax_highlighter_ini TSRMLS_CC);
  487. #ifdef ZEND_MULTIBYTE
  488. if (SCNG(script_org)) {
  489. efree(SCNG(script_org));
  490. SCNG(script_org) = NULL;
  491. }
  492. if (SCNG(script_filtered)) {
  493. efree(SCNG(script_filtered));
  494. SCNG(script_filtered) = NULL;
  495. }
  496. #endif /* ZEND_MULTIBYTE */
  497. zend_destroy_file_handle(&file_handle TSRMLS_CC);
  498. zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
  499. return SUCCESS;
  500. }
  501. int highlight_string(zval *str, zend_syntax_highlighter_ini *syntax_highlighter_ini, char *str_name TSRMLS_DC)
  502. {
  503. zend_lex_state original_lex_state;
  504. zval tmp = *str;
  505. str = &tmp;
  506. zval_copy_ctor(str);
  507. zend_save_lexical_state(&original_lex_state TSRMLS_CC);
  508. if (zend_prepare_string_for_scanning(str, str_name TSRMLS_CC)==FAILURE) {
  509. zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
  510. return FAILURE;
  511. }
  512. BEGIN(INITIAL);
  513. zend_highlight(syntax_highlighter_ini TSRMLS_CC);
  514. #ifdef ZEND_MULTIBYTE
  515. if (SCNG(script_org)) {
  516. efree(SCNG(script_org));
  517. SCNG(script_org) = NULL;
  518. }
  519. if (SCNG(script_filtered)) {
  520. efree(SCNG(script_filtered));
  521. SCNG(script_filtered) = NULL;
  522. }
  523. #endif /* ZEND_MULTIBYTE */
  524. zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
  525. zval_dtor(str);
  526. return SUCCESS;
  527. }
  528. END_EXTERN_C()
  529. #ifdef ZEND_MULTIBYTE
  530. BEGIN_EXTERN_C()
  531. ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, zend_encoding *old_encoding TSRMLS_DC)
  532. {
  533. size_t original_offset, offset, free_flag, new_len, length;
  534. unsigned char *p;
  535. /* calculate current position */
  536. offset = original_offset = YYCURSOR - SCNG(yy_start);
  537. if (old_input_filter && offset > 0) {
  538. zend_encoding *new_encoding = SCNG(script_encoding);
  539. zend_encoding_filter new_filter = SCNG(input_filter);
  540. SCNG(script_encoding) = old_encoding;
  541. SCNG(input_filter) = old_input_filter;
  542. offset = zend_get_scanned_file_offset(TSRMLS_C);
  543. SCNG(script_encoding) = new_encoding;
  544. SCNG(input_filter) = new_filter;
  545. }
  546. /* convert and set */
  547. if (!SCNG(input_filter)) {
  548. length = SCNG(script_org_size) - offset;
  549. p = SCNG(script_org) + offset;
  550. free_flag = 0;
  551. } else {
  552. SCNG(input_filter)(&p, &length, SCNG(script_org) + offset, SCNG(script_org_size) - offset TSRMLS_CC);
  553. free_flag = 1;
  554. }
  555. new_len = original_offset + length;
  556. if (new_len > YYLIMIT - SCNG(yy_start)) {
  557. unsigned char *new_yy_start = erealloc(SCNG(yy_start), new_len);
  558. SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start));
  559. SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start));
  560. SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start));
  561. SCNG(yy_start) = new_yy_start;
  562. SCNG(script_filtered) = new_yy_start;
  563. SCNG(script_filtered_size) = new_len;
  564. }
  565. SCNG(yy_limit) = SCNG(yy_start) + new_len;
  566. memmove(SCNG(yy_start) + original_offset, p, length);
  567. if (free_flag) {
  568. efree(p);
  569. }
  570. }
  571. ZEND_API int zend_multibyte_yyinput(zend_file_handle *file_handle, char *buf, size_t len TSRMLS_DC)
  572. {
  573. size_t n;
  574. if (CG(interactive) == 0) {
  575. if (zend_stream_fixup(file_handle, &buf, &len TSRMLS_CC) == FAILURE) {
  576. return FAILURE;
  577. }
  578. n = len;
  579. return n;
  580. }
  581. /* interactive */
  582. if (SCNG(script_org)) {
  583. efree(SCNG(script_org));
  584. }
  585. if (SCNG(script_filtered)) {
  586. efree(SCNG(script_filtered));
  587. }
  588. SCNG(script_org) = NULL;
  589. SCNG(script_org_size) = 0;
  590. /* TODO: support widechars */
  591. if (zend_stream_fixup(file_handle, &buf, &len TSRMLS_CC) == FAILURE) {
  592. return FAILURE;
  593. }
  594. n = len;
  595. SCNG(script_org_size) = n;
  596. SCNG(script_org) = (unsigned char*)emalloc(SCNG(script_org_size) + 1);
  597. memcpy(SCNG(script_org), buf, n);
  598. return n;
  599. }
  600. ZEND_API int zend_multibyte_read_script(unsigned char *buf, size_t n TSRMLS_DC)
  601. {
  602. if (SCNG(script_org)) {
  603. efree(SCNG(script_org));
  604. SCNG(script_org) = NULL;
  605. }
  606. SCNG(script_org_size) = n;
  607. SCNG(script_org) = (unsigned char*)emalloc(SCNG(script_org_size) + 1);
  608. memcpy(SCNG(script_org), buf, n);
  609. *(SCNG(script_org)+SCNG(script_org_size)) = '\0';
  610. return 0;
  611. }
  612. # define zend_copy_value(zendlval, yytext, yyleng) \
  613. if (SCNG(output_filter)) { \
  614. size_t sz = 0; \
  615. SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)yytext, (size_t)yyleng TSRMLS_CC); \
  616. zendlval->value.str.len = sz; \
  617. } else { \
  618. zendlval->value.str.val = (char *) estrndup(yytext, yyleng); \
  619. zendlval->value.str.len = yyleng; \
  620. }
  621. #else /* ZEND_MULTIBYTE */
  622. # define zend_copy_value(zendlval, yytext, yyleng) \
  623. zendlval->value.str.val = (char *)estrndup(yytext, yyleng); \
  624. zendlval->value.str.len = yyleng;
  625. #endif /* ZEND_MULTIBYTE */
  626. static void zend_scan_escape_string(zval *zendlval, char *str, int len, char quote_type TSRMLS_DC)
  627. {
  628. register char *s, *t;
  629. char *end;
  630. ZVAL_STRINGL(zendlval, str, len, 1);
  631. /* convert escape sequences */
  632. s = t = zendlval->value.str.val;
  633. end = s+zendlval->value.str.len;
  634. while (s<end) {
  635. if (*s=='\\') {
  636. s++;
  637. if (s >= end) {
  638. *t++ = '\\';
  639. break;
  640. }
  641. switch(*s) {
  642. case 'n':
  643. *t++ = '\n';
  644. zendlval->value.str.len--;
  645. break;
  646. case 'r':
  647. *t++ = '\r';
  648. zendlval->value.str.len--;
  649. break;
  650. case 't':
  651. *t++ = '\t';
  652. zendlval->value.str.len--;
  653. break;
  654. case 'f':
  655. *t++ = '\f';
  656. zendlval->value.str.len--;
  657. break;
  658. case 'v':
  659. *t++ = '\v';
  660. zendlval->value.str.len--;
  661. break;
  662. case '"':
  663. case '`':
  664. if (*s != quote_type) {
  665. *t++ = '\\';
  666. *t++ = *s;
  667. break;
  668. }
  669. case '\\':
  670. case '$':
  671. *t++ = *s;
  672. zendlval->value.str.len--;
  673. break;
  674. case 'x':
  675. case 'X':
  676. if (ZEND_IS_HEX(*(s+1))) {
  677. char hex_buf[3] = { 0, 0, 0 };
  678. zendlval->value.str.len--; /* for the 'x' */
  679. hex_buf[0] = *(++s);
  680. zendlval->value.str.len--;
  681. if (ZEND_IS_HEX(*(s+1))) {
  682. hex_buf[1] = *(++s);
  683. zendlval->value.str.len--;
  684. }
  685. *t++ = (char) strtol(hex_buf, NULL, 16);
  686. } else {
  687. *t++ = '\\';
  688. *t++ = *s;
  689. }
  690. break;
  691. default:
  692. /* check for an octal */
  693. if (ZEND_IS_OCT(*s)) {
  694. char octal_buf[4] = { 0, 0, 0, 0 };
  695. octal_buf[0] = *s;
  696. zendlval->value.str.len--;
  697. if (ZEND_IS_OCT(*(s+1))) {
  698. octal_buf[1] = *(++s);
  699. zendlval->value.str.len--;
  700. if (ZEND_IS_OCT(*(s+1))) {
  701. octal_buf[2] = *(++s);
  702. zendlval->value.str.len--;
  703. }
  704. }
  705. *t++ = (char) strtol(octal_buf, NULL, 8);
  706. } else {
  707. *t++ = '\\';
  708. *t++ = *s;
  709. }
  710. break;
  711. }
  712. } else {
  713. *t++ = *s;
  714. }
  715. if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
  716. CG(zend_lineno)++;
  717. }
  718. s++;
  719. }
  720. *t = 0;
  721. #ifdef ZEND_MULTIBYTE
  722. if (SCNG(output_filter)) {
  723. size_t sz = 0;
  724. s = zendlval->value.str.val;
  725. SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)s, (size_t)zendlval->value.str.len TSRMLS_CC);
  726. zendlval->value.str.len = sz;
  727. efree(s);
  728. }
  729. #endif /* ZEND_MULTIBYTE */
  730. }
  731. int lex_scan(zval *zendlval TSRMLS_DC)
  732. {
  733. restart:
  734. SCNG(yy_text) = YYCURSOR;
  735. yymore_restart:
  736. /*!re2c
  737. re2c:yyfill:check = 0;
  738. LNUM [0-9]+
  739. DNUM ([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*)
  740. EXPONENT_DNUM (({LNUM}|{DNUM})[eE][+-]?{LNUM})
  741. HNUM "0x"[0-9a-fA-F]+
  742. BNUM "0b"[01]+
  743. LABEL [a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*
  744. WHITESPACE [ \n\r\t]+
  745. TABS_AND_SPACES [ \t]*
  746. TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@]
  747. ANY_CHAR [^]
  748. NEWLINE ("\r"|"\n"|"\r\n")
  749. /* compute yyleng before each rule */
  750. <!*> := yyleng = YYCURSOR - SCNG(yy_text);
  751. <ST_IN_SCRIPTING>"exit" {
  752. return T_EXIT;
  753. }
  754. <ST_IN_SCRIPTING>"die" {
  755. return T_EXIT;
  756. }
  757. <ST_IN_SCRIPTING>"function" {
  758. return T_FUNCTION;
  759. }
  760. <ST_IN_SCRIPTING>"const" {
  761. return T_CONST;
  762. }
  763. <ST_IN_SCRIPTING>"return" {
  764. return T_RETURN;
  765. }
  766. <ST_IN_SCRIPTING>"try" {
  767. return T_TRY;
  768. }
  769. <ST_IN_SCRIPTING>"catch" {
  770. return T_CATCH;
  771. }
  772. <ST_IN_SCRIPTING>"throw" {
  773. return T_THROW;
  774. }
  775. <ST_IN_SCRIPTING>"if" {
  776. return T_IF;
  777. }
  778. <ST_IN_SCRIPTING>"elseif" {
  779. return T_ELSEIF;
  780. }
  781. <ST_IN_SCRIPTING>"endif" {
  782. return T_ENDIF;
  783. }
  784. <ST_IN_SCRIPTING>"else" {
  785. return T_ELSE;
  786. }
  787. <ST_IN_SCRIPTING>"while" {
  788. return T_WHILE;
  789. }
  790. <ST_IN_SCRIPTING>"count" {
  791. return T_COUNT;
  792. }
  793. <ST_IN_SCRIPTING>"strlen" {
  794. return T_STRLEN;
  795. }
  796. <ST_IN_SCRIPTING>"xifset" {
  797. return T_IFSET;
  798. }
  799. <ST_IN_SCRIPTING>"exists" {
  800. return T_EXISTS;
  801. }
  802. <ST_IN_SCRIPTING>"endwhile" {
  803. return T_ENDWHILE;
  804. }
  805. <ST_IN_SCRIPTING>"do" {
  806. return T_DO;
  807. }
  808. <ST_IN_SCRIPTING>"for" {
  809. return T_FOR;
  810. }
  811. <ST_IN_SCRIPTING>"endfor" {
  812. return T_ENDFOR;
  813. }
  814. <ST_IN_SCRIPTING>"foreach" {
  815. return T_FOREACH;
  816. }
  817. <ST_IN_SCRIPTING>"endforeach" {
  818. return T_ENDFOREACH;
  819. }
  820. <ST_IN_SCRIPTING>"declare" {
  821. return T_DECLARE;
  822. }
  823. <ST_IN_SCRIPTING>"enddeclare" {
  824. return T_ENDDECLARE;
  825. }
  826. <ST_IN_SCRIPTING>"instanceof" {
  827. return T_INSTANCEOF;
  828. }
  829. <ST_IN_SCRIPTING>"as" {
  830. return T_AS;
  831. }
  832. <ST_IN_SCRIPTING>"switch" {
  833. return T_SWITCH;
  834. }
  835. <ST_IN_SCRIPTING>"endswitch" {
  836. return T_ENDSWITCH;
  837. }
  838. <ST_IN_SCRIPTING>"case" {
  839. return T_CASE;
  840. }
  841. <ST_IN_SCRIPTING>"default" {
  842. return T_DEFAULT;
  843. }
  844. <ST_IN_SCRIPTING>"break" {
  845. return T_BREAK;
  846. }
  847. <ST_IN_SCRIPTING>"continue" {
  848. return T_CONTINUE;
  849. }
  850. <ST_IN_SCRIPTING>"goto" {
  851. return T_GOTO;
  852. }
  853. <ST_IN_SCRIPTING>"echo" {
  854. return T_ECHO;
  855. }
  856. <ST_IN_SCRIPTING>"print" {
  857. return T_PRINT;
  858. }
  859. <ST_IN_SCRIPTING>"class" {
  860. return T_CLASS;
  861. }
  862. <ST_IN_SCRIPTING>"interface" {
  863. return T_INTERFACE;
  864. }
  865. <ST_IN_SCRIPTING>"extends" {
  866. return T_EXTENDS;
  867. }
  868. <ST_IN_SCRIPTING>"implements" {
  869. return T_IMPLEMENTS;
  870. }
  871. <ST_IN_SCRIPTING>"->" {
  872. yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC);
  873. return T_OBJECT_OPERATOR;
  874. }
  875. <ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY>{WHITESPACE}+ {
  876. zendlval->value.str.val = yytext; /* no copying - intentional */
  877. zendlval->value.str.len = yyleng;
  878. zendlval->type = IS_STRING;
  879. HANDLE_NEWLINES(yytext, yyleng);
  880. return T_WHITESPACE;
  881. }
  882. <ST_LOOKING_FOR_PROPERTY>"->" {
  883. return T_OBJECT_OPERATOR;
  884. }
  885. <ST_LOOKING_FOR_PROPERTY>{LABEL} {
  886. yy_pop_state(TSRMLS_C);
  887. zend_copy_value(zendlval, yytext, yyleng);
  888. zendlval->type = IS_STRING;
  889. return T_STRING;
  890. }
  891. <ST_LOOKING_FOR_PROPERTY>{ANY_CHAR} {
  892. yyless(0);
  893. yy_pop_state(TSRMLS_C);
  894. goto restart;
  895. }
  896. <ST_IN_SCRIPTING>"::" {
  897. return T_PAAMAYIM_NEKUDOTAYIM;
  898. }
  899. <ST_IN_SCRIPTING>"\\" {
  900. return T_NS_SEPARATOR;
  901. }
  902. <ST_IN_SCRIPTING>"new" {
  903. return T_NEW;
  904. }
  905. <ST_IN_SCRIPTING>"clone" {
  906. return T_CLONE;
  907. }
  908. <ST_IN_SCRIPTING>"var" {
  909. return T_VAR;
  910. }
  911. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" {
  912. return T_INT_CAST;
  913. }
  914. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("real"|"double"|"float"){TABS_AND_SPACES}")" {
  915. return T_DOUBLE_CAST;
  916. }
  917. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"string"{TABS_AND_SPACES}")" {
  918. return T_STRING_CAST;
  919. }
  920. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"binary"{TABS_AND_SPACES}")" {
  921. return T_STRING_CAST;
  922. }
  923. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"array"{TABS_AND_SPACES}")" {
  924. return T_ARRAY_CAST;
  925. }
  926. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"object"{TABS_AND_SPACES}")" {
  927. return T_OBJECT_CAST;
  928. }
  929. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("bool"|"boolean"){TABS_AND_SPACES}")" {
  930. return T_BOOL_CAST;
  931. }
  932. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("unset"){TABS_AND_SPACES}")" {
  933. return T_UNSET_CAST;
  934. }
  935. <ST_IN_SCRIPTING>"eval" {
  936. return T_EVAL;
  937. }
  938. <ST_IN_SCRIPTING>"include" {
  939. return T_INCLUDE;
  940. }
  941. <ST_IN_SCRIPTING>"include_once" {
  942. return T_INCLUDE_ONCE;
  943. }
  944. <ST_IN_SCRIPTING>"require" {
  945. return T_REQUIRE;
  946. }
  947. <ST_IN_SCRIPTING>"require_once" {
  948. return T_REQUIRE_ONCE;
  949. }
  950. <ST_IN_SCRIPTING>"namespace" {
  951. return T_NAMESPACE;
  952. }
  953. <ST_IN_SCRIPTING>"use" {
  954. return T_USE;
  955. }
  956. <ST_IN_SCRIPTING>"global" {
  957. return T_GLOBAL;
  958. }
  959. <ST_IN_SCRIPTING>"isset" {
  960. return T_ISSET;
  961. }
  962. <ST_IN_SCRIPTING>"empty" {
  963. return T_EMPTY;
  964. }
  965. <ST_IN_SCRIPTING>"__halt_compiler" {
  966. return T_HALT_COMPILER;
  967. }
  968. <ST_IN_SCRIPTING>"static" {
  969. return T_STATIC;
  970. }
  971. <ST_IN_SCRIPTING>"abstract" {
  972. return T_ABSTRACT;
  973. }
  974. <ST_IN_SCRIPTING>"final" {
  975. return T_FINAL;
  976. }
  977. <ST_IN_SCRIPTING>"private" {
  978. return T_PRIVATE;
  979. }
  980. <ST_IN_SCRIPTING>"protected" {
  981. return T_PROTECTED;
  982. }
  983. <ST_IN_SCRIPTING>"public" {
  984. return T_PUBLIC;
  985. }
  986. <ST_IN_SCRIPTING>"unset" {
  987. return T_UNSET;
  988. }
  989. <ST_IN_SCRIPTING>"=>" {
  990. return T_DOUBLE_ARROW;
  991. }
  992. <ST_IN_SCRIPTING>"list" {
  993. return T_LIST;
  994. }
  995. <ST_IN_SCRIPTING>"array" {
  996. return T_ARRAY;
  997. }
  998. <ST_IN_SCRIPTING>"++" {
  999. return T_INC;
  1000. }
  1001. <ST_IN_SCRIPTING>"--" {
  1002. return T_DEC;
  1003. }
  1004. <ST_IN_SCRIPTING>"===" {
  1005. return T_IS_IDENTICAL;
  1006. }
  1007. <ST_IN_SCRIPTING>"!==" {
  1008. return T_IS_NOT_IDENTICAL;
  1009. }
  1010. <ST_IN_SCRIPTING>"==" {
  1011. return T_IS_EQUAL;
  1012. }
  1013. <ST_IN_SCRIPTING>"!="|"<>" {
  1014. return T_IS_NOT_EQUAL;
  1015. }
  1016. <ST_IN_SCRIPTING>"<=" {
  1017. return T_IS_SMALLER_OR_EQUAL;
  1018. }
  1019. <ST_IN_SCRIPTING>">=" {
  1020. return T_IS_GREATER_OR_EQUAL;
  1021. }
  1022. <ST_IN_SCRIPTING>"+=" {
  1023. return T_PLUS_EQUAL;
  1024. }
  1025. <ST_IN_SCRIPTING>"-=" {
  1026. return T_MINUS_EQUAL;
  1027. }
  1028. <ST_IN_SCRIPTING>"*=" {
  1029. return T_MUL_EQUAL;
  1030. }
  1031. <ST_IN_SCRIPTING>"/=" {
  1032. return T_DIV_EQUAL;
  1033. }
  1034. <ST_IN_SCRIPTING>".=" {
  1035. return T_CONCAT_EQUAL;
  1036. }
  1037. <ST_IN_SCRIPTING>"%=" {
  1038. return T_MOD_EQUAL;
  1039. }
  1040. <ST_IN_SCRIPTING>"<<=" {
  1041. return T_SL_EQUAL;
  1042. }
  1043. <ST_IN_SCRIPTING>">>=" {
  1044. return T_SR_EQUAL;
  1045. }
  1046. <ST_IN_SCRIPTING>"&=" {
  1047. return T_AND_EQUAL;
  1048. }
  1049. <ST_IN_SCRIPTING>"|=" {
  1050. return T_OR_EQUAL;
  1051. }
  1052. <ST_IN_SCRIPTING>"^=" {
  1053. return T_XOR_EQUAL;
  1054. }
  1055. <ST_IN_SCRIPTING>"||" {
  1056. return T_BOOLEAN_OR;
  1057. }
  1058. <ST_IN_SCRIPTING>"&&" {
  1059. return T_BOOLEAN_AND;
  1060. }
  1061. <ST_IN_SCRIPTING>"OR" {
  1062. return T_LOGICAL_OR;
  1063. }
  1064. <ST_IN_SCRIPTING>"AND" {
  1065. return T_LOGICAL_AND;
  1066. }
  1067. <ST_IN_SCRIPTING>"XOR" {
  1068. return T_LOGICAL_XOR;
  1069. }
  1070. <ST_IN_SCRIPTING>"<<" {
  1071. return T_SL;
  1072. }
  1073. <ST_IN_SCRIPTING>">>" {
  1074. return T_SR;
  1075. }
  1076. <ST_IN_SCRIPTING>{TOKENS} {
  1077. return yytext[0];
  1078. }
  1079. <ST_IN_SCRIPTING>"{" {
  1080. yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
  1081. return '{';
  1082. }
  1083. <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
  1084. yy_push_state(ST_LOOKING_FOR_VARNAME TSRMLS_CC);
  1085. return T_DOLLAR_OPEN_CURLY_BRACES;
  1086. }
  1087. <ST_IN_SCRIPTING>"}" {
  1088. RESET_DOC_COMMENT();
  1089. if (!zend_stack_is_empty(&SCNG(state_stack))) {
  1090. yy_pop_state(TSRMLS_C);
  1091. }
  1092. return '}';
  1093. }
  1094. <ST_LOOKING_FOR_VARNAME>{LABEL} {
  1095. zend_copy_value(zendlval, yytext, yyleng);
  1096. zendlval->type = IS_STRING;
  1097. yy_pop_state(TSRMLS_C);
  1098. yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
  1099. return T_STRING_VARNAME;
  1100. }
  1101. <ST_LOOKING_FOR_VARNAME>{ANY_CHAR} {
  1102. yyless(0);
  1103. yy_pop_state(TSRMLS_C);
  1104. yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
  1105. goto restart;
  1106. }
  1107. <ST_IN_SCRIPTING>{LNUM} {
  1108. if (yyleng < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
  1109. zendlval->value.lval = strtol(yytext, NULL, 0);
  1110. } else {
  1111. errno = 0;
  1112. zendlval->value.lval = strtol(yytext, NULL, 0);
  1113. if (errno == ERANGE) { /* Overflow */
  1114. if (yytext[0] == '0') { /* octal overflow */
  1115. zendlval->value.dval = zend_oct_strtod(yytext, NULL);
  1116. } else {
  1117. zendlval->value.dval = zend_strtod(yytext, NULL);
  1118. }
  1119. zendlval->type = IS_DOUBLE;
  1120. return T_DNUMBER;
  1121. }
  1122. }
  1123. zendlval->type = IS_LONG;
  1124. return T_LNUMBER;
  1125. }
  1126. <ST_IN_SCRIPTING>{HNUM} {
  1127. char *hex = yytext + 2; /* Skip "0x" */
  1128. int len = yyleng - 2;
  1129. /* Skip any leading 0s */
  1130. while (*hex == '0') {
  1131. hex++;
  1132. len--;
  1133. }
  1134. if (len < SIZEOF_LONG * 2 || (len == SIZEOF_LONG * 2 && *hex <= '7')) {
  1135. zendlval->value.lval = strtol(hex, NULL, 16);
  1136. zendlval->type = IS_LONG;
  1137. return T_LNUMBER;
  1138. } else {
  1139. zendlval->value.dval = zend_hex_strtod(hex, NULL);
  1140. zendlval->type = IS_DOUBLE;
  1141. return T_DNUMBER;
  1142. }
  1143. }
  1144. <ST_IN_SCRIPTING>{BNUM} {
  1145. char *hex = yytext + 2; /* Skip "0x" */
  1146. int len = yyleng - 2;
  1147. /* Skip any leading 0s */
  1148. while (*hex == '0') {
  1149. hex++;
  1150. len--;
  1151. }
  1152. if (len < SIZEOF_LONG * 2 * 4) {
  1153. zendlval->value.lval = strtol(hex, NULL, 2);
  1154. zendlval->type = IS_LONG;
  1155. return T_LNUMBER;
  1156. } else {
  1157. zendlval->value.dval = zend_bin_strtod(hex, NULL);
  1158. zendlval->type = IS_DOUBLE;
  1159. return T_DNUMBER;
  1160. }
  1161. }
  1162. <ST_IN_SCRIPTING>"false" {
  1163. zendlval->value.lval = 0;
  1164. zendlval->type = IS_BOOL;
  1165. return T_LNUMBER;
  1166. }
  1167. <ST_IN_SCRIPTING>"true" {
  1168. zendlval->value.lval = 1;
  1169. zendlval->type = IS_BOOL;
  1170. return T_LNUMBER;
  1171. }
  1172. <ST_IN_SCRIPTING>"null" {
  1173. zendlval->type = IS_NULL;
  1174. return T_LNUMBER;
  1175. }
  1176. <ST_VAR_OFFSET>[0]|([1-9][0-9]*) { /* Offset could be treated as a long */
  1177. if (yyleng < MAX_LENGTH_OF_LONG - 1 || (yyleng == MAX_LENGTH_OF_LONG - 1 && strcmp(yytext, long_min_digits) < 0)) {
  1178. zendlval->value.lval = strtol(yytext, NULL, 10);
  1179. zendlval->type = IS_LONG;
  1180. } else {
  1181. zendlval->value.str.val = (char *)estrndup(yytext, yyleng);
  1182. zendlval->value.str.len = yyleng;
  1183. zendlval->type = IS_STRING;
  1184. }
  1185. return T_NUM_STRING;
  1186. }
  1187. <ST_VAR_OFFSET>{LNUM}|{HNUM}|{BNUM} { /* Offset must be treated as a string */
  1188. zendlval->value.str.val = (char *)estrndup(yytext, yyleng);
  1189. zendlval->value.str.len = yyleng;
  1190. zendlval->type = IS_STRING;
  1191. return T_NUM_STRING;
  1192. }
  1193. <ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} {
  1194. zendlval->value.dval = zend_strtod(yytext, NULL);
  1195. zendlval->type = IS_DOUBLE;
  1196. return T_DNUMBER;
  1197. }
  1198. <ST_IN_SCRIPTING>"__CLASS__" {
  1199. char *class_name = NULL;
  1200. if (CG(active_class_entry)) {
  1201. class_name = CG(active_class_entry)->name;
  1202. }
  1203. if (!class_name) {
  1204. class_name = "";
  1205. }
  1206. zendlval->value.str.len = strlen(class_name);
  1207. zendlval->value.str.val = estrndup(class_name, zendlval->value.str.len);
  1208. zendlval->type = IS_STRING;
  1209. return T_CLASS_C;
  1210. }
  1211. <ST_IN_SCRIPTING>"__FUNCTION__" {
  1212. char *func_name = NULL;
  1213. if (CG(active_op_array)) {
  1214. func_name = CG(active_op_array)->function_name;
  1215. }
  1216. if (!func_name) {
  1217. func_name = "";
  1218. }
  1219. zendlval->value.str.len = strlen(func_name);
  1220. zendlval->value.str.val = estrndup(func_name, zendlval->value.str.len);
  1221. zendlval->type = IS_STRING;
  1222. return T_FUNC_C;
  1223. }
  1224. <ST_IN_SCRIPTING>"__METHOD__" {
  1225. char *class_name = CG(active_class_entry) ? CG(active_class_entry)->name : NULL;
  1226. char *func_name = CG(active_op_array)? CG(active_op_array)->function_name : NULL;
  1227. size_t len = 0;
  1228. if (class_name) {
  1229. len += strlen(class_name) + 2;
  1230. }
  1231. if (func_name) {
  1232. len += strlen(func_name);
  1233. }
  1234. zendlval->value.str.len = zend_spprintf(&zendlval->value.str.val, 0, "%s%s%s",
  1235. class_name ? class_name : "",
  1236. class_name && func_name ? "::" : "",
  1237. func_name ? func_name : ""
  1238. );
  1239. zendlval->type = IS_STRING;
  1240. return T_METHOD_C;
  1241. }
  1242. <ST_IN_SCRIPTING>"__LINE__" {
  1243. zendlval->value.lval = CG(zend_lineno);
  1244. zendlval->type = IS_LONG;
  1245. return T_LINE;
  1246. }
  1247. <ST_IN_SCRIPTING>"__FILE__" {
  1248. char *filename = zend_get_compiled_filename(TSRMLS_C);
  1249. if (!filename) {
  1250. filename = "";
  1251. }
  1252. zendlval->value.str.len = strlen(filename);
  1253. zendlval->value.str.val = estrndup(filename, zendlval->value.str.len);
  1254. zendlval->type = IS_STRING;
  1255. return T_FILE;
  1256. }
  1257. <ST_IN_SCRIPTING>"__DIR__" {
  1258. char *filename = zend_get_compiled_filename(TSRMLS_C);
  1259. const size_t filename_len = strlen(filename);
  1260. char *dirname;
  1261. if (!filename) {
  1262. filename = "";
  1263. }
  1264. dirname = estrndup(filename, filename_len);
  1265. zend_dirname(dirname, filename_len);
  1266. if (strcmp(dirname, ".") == 0) {
  1267. dirname = erealloc(dirname, MAXPATHLEN);
  1268. #if HAVE_GETCWD
  1269. VCWD_GETCWD(dirname, MAXPATHLEN);
  1270. #elif HAVE_GETWD
  1271. VCWD_GETWD(dirname);
  1272. #endif
  1273. }
  1274. zendlval->value.str.len = strlen(dirname);
  1275. zendlval->value.str.val = dirname;
  1276. zendlval->type = IS_STRING;
  1277. return T_DIR;
  1278. }
  1279. <ST_IN_SCRIPTING>"__NAMESPACE__" {
  1280. if (CG(current_namespace)) {
  1281. *zendlval = *CG(current_namespace);
  1282. zval_copy_ctor(zendlval);
  1283. } else {
  1284. ZVAL_EMPTY_STRING(zendlval);
  1285. }
  1286. return T_NS_C;
  1287. }
  1288. <INITIAL>"<?php=" {
  1289. zendlval->value.str.val = yytext; /* no copying - intentional */
  1290. zendlval->value.str.len = yyleng;
  1291. zendlval->type = IS_STRING;
  1292. BEGIN(ST_IN_SCRIPTING);
  1293. return T_OPEN_TAG_WITH_ECHO;
  1294. }
  1295. <INITIAL>"<?php" {
  1296. zendlval->value.str.val = yytext; /* no copying - intentional */
  1297. zendlval->value.str.len = yyleng;
  1298. zendlval->type = IS_STRING;
  1299. BEGIN(ST_IN_SCRIPTING);
  1300. return T_OPEN_TAG;
  1301. }
  1302. <INITIAL>{ANY_CHAR} {
  1303. if (YYCURSOR > YYLIMIT) {
  1304. return 0;
  1305. }
  1306. inline_char_handler:
  1307. while (1) {
  1308. YYCTYPE *ptr = memchr(YYCURSOR, '<', YYLIMIT - YYCURSOR);
  1309. YYCURSOR = ptr ? ptr + 1 : YYLIMIT;
  1310. if (YYCURSOR < YYLIMIT) {
  1311. switch (*YYCURSOR) {
  1312. case '?':
  1313. if (!strncasecmp(YYCURSOR + 1, "php", 3)) { /* Assume [ \t\n\r] follows "php" */
  1314. break;
  1315. }
  1316. continue;
  1317. case 's':
  1318. case 'S':
  1319. /* Probably NOT an opening PHP <script> tag, so don't end the HTML chunk yet
  1320. * If it is, the PHP <script> tag rule checks for any HTML scanned before it */
  1321. YYCURSOR--;
  1322. yymore();
  1323. default:
  1324. continue;
  1325. }
  1326. YYCURSOR--;
  1327. }
  1328. break;
  1329. }
  1330. inline_html:
  1331. yyleng = YYCURSOR - SCNG(yy_text);
  1332. #ifdef ZEND_MULTIBYTE
  1333. if (SCNG(output_filter)) {
  1334. int readsize;
  1335. size_t sz = 0;
  1336. readsize = SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)yytext, (size_t)yyleng TSRMLS_CC);
  1337. zendlval->value.str.len = sz;
  1338. if (readsize < yyleng) {
  1339. yyless(readsize);
  1340. }
  1341. } else {
  1342. zendlval->value.str.val = (char *) estrndup(yytext, yyleng);
  1343. zendlval->value.str.len = yyleng;
  1344. }
  1345. #else /* !ZEND_MULTIBYTE */
  1346. zendlval->value.str.val = (char *) estrndup(yytext, yyleng);
  1347. zendlval->value.str.len = yyleng;
  1348. #endif
  1349. zendlval->type = IS_STRING;
  1350. HANDLE_NEWLINES(yytext, yyleng);
  1351. return T_INLINE_HTML;
  1352. }
  1353. /* Make sure a label character follows "->", otherwise there is no property
  1354. * and "->" will be taken literally
  1355. */
  1356. <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"->"[a-zA-Z_\x7f-\xff] {
  1357. yyless(yyleng - 3);
  1358. yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC);
  1359. zend_copy_value(zendlval, (yytext+1), (yyleng-1));
  1360. zendlval->type = IS_STRING;
  1361. return T_VARIABLE;
  1362. }
  1363. /* A [ always designates a variable offset, regardless of what follows
  1364. */
  1365. <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"[" {
  1366. yyless(yyleng - 1);
  1367. yy_push_state(ST_VAR_OFFSET TSRMLS_CC);
  1368. zend_copy_value(zendlval, (yytext+1), (yyleng-1));
  1369. zendlval->type = IS_STRING;
  1370. return T_VARIABLE;
  1371. }
  1372. <ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE,ST_VAR_OFFSET>"$"{LABEL} {
  1373. zend_copy_value(zendlval, (yytext+1), (yyleng-1));
  1374. zendlval->type = IS_STRING;
  1375. return T_VARIABLE;
  1376. }
  1377. <ST_VAR_OFFSET>"]" {
  1378. yy_pop_state(TSRMLS_C);
  1379. return ']';
  1380. }
  1381. <ST_VAR_OFFSET>{TOKENS}|[{}"`] {
  1382. /* Only '[' can be valid, but returning other tokens will allow a more explicit parse error */
  1383. return yytext[0];
  1384. }
  1385. <ST_VAR_OFFSET>[ \n\r\t\\'#] {
  1386. /* Invalid rule to return a more explicit parse error with proper line number */
  1387. yyless(0);
  1388. yy_pop_state(TSRMLS_C);
  1389. return T_ENCAPSED_AND_WHITESPACE;
  1390. }
  1391. <ST_IN_SCRIPTING,ST_VAR_OFFSET>{LABEL} {
  1392. zend_copy_value(zendlval, yytext, yyleng);
  1393. zendlval->type = IS_STRING;
  1394. return T_STRING;
  1395. }
  1396. <ST_IN_SCRIPTING>"#"|"//" {
  1397. while (YYCURSOR < YYLIMIT) {
  1398. switch (*YYCURSOR++) {
  1399. case '\r':
  1400. if (*YYCURSOR == '\n') {
  1401. YYCURSOR++;
  1402. }
  1403. /* fall through */
  1404. case '\n':
  1405. CG(zend_lineno)++;
  1406. break;
  1407. case '?':
  1408. if (*YYCURSOR == '>') {
  1409. YYCURSOR--;
  1410. break;
  1411. }
  1412. /* fall through */
  1413. default:
  1414. continue;
  1415. }
  1416. break;
  1417. }
  1418. yyleng = YYCURSOR - SCNG(yy_text);
  1419. return T_COMMENT;
  1420. }
  1421. <ST_IN_SCRIPTING>"/*"|"/**"{WHITESPACE} {
  1422. int doc_com;
  1423. if (yyleng > 2) {
  1424. doc_com = 1;
  1425. RESET_DOC_COMMENT();
  1426. } else {
  1427. doc_com = 0;
  1428. }
  1429. while (YYCURSOR < YYLIMIT) {
  1430. if (*YYCURSOR++ == '*' && *YYCURSOR == '/') {
  1431. break;
  1432. }
  1433. }
  1434. if (YYCURSOR < YYLIMIT) {
  1435. YYCURSOR++;
  1436. } else {
  1437. zend_error(E_COMPILE_WARNING, "Unterminated comment starting line %d", CG(zend_lineno));
  1438. }
  1439. yyleng = YYCURSOR - SCNG(yy_text);
  1440. HANDLE_NEWLINES(yytext, yyleng);
  1441. if (doc_com) {
  1442. CG(doc_comment) = estrndup(yytext, yyleng);
  1443. CG(doc_comment_len) = yyleng;
  1444. return T_DOC_COMMENT;
  1445. }
  1446. return T_COMMENT;
  1447. }
  1448. <ST_IN_SCRIPTING>("?>"){NEWLINE}? {
  1449. zendlval->value.str.val = yytext; /* no copying - intentional */
  1450. zendlval->value.str.len = yyleng;
  1451. zendlval->type = IS_STRING;
  1452. BEGIN(INITIAL);
  1453. return T_CLOSE_TAG; /* implicit ';' at php-end tag */
  1454. }
  1455. <ST_IN_SCRIPTING>b?['] {
  1456. register char *s, *t;
  1457. char *end;
  1458. int bprefix = (yytext[0] != '\'') ? 1 : 0;
  1459. while (1) {
  1460. if (YYCURSOR < YYLIMIT) {
  1461. if (*YYCURSOR == '\'') {
  1462. YYCURSOR++;
  1463. yyleng = YYCURSOR - SCNG(yy_text);
  1464. break;
  1465. } else if (*YYCURSOR++ == '\\' && YYCURSOR < YYLIMIT) {
  1466. YYCURSOR++;
  1467. }
  1468. } else {
  1469. yyleng = YYLIMIT - SCNG(yy_text);
  1470. /* Unclosed single quotes; treat similar to double quotes, but without a separate token
  1471. * for ' (unrecognized by parser), instead of old flex fallback to "Unexpected character..."
  1472. * rule, which continued in ST_IN_SCRIPTING state after the quote */
  1473. return T_ENCAPSED_AND_WHITESPACE;
  1474. }
  1475. }
  1476. zendlval->value.str.val = estrndup(yytext+bprefix+1, yyleng-bprefix-2);
  1477. zendlval->value.str.len = yyleng-bprefix-2;
  1478. zendlval->type = IS_STRING;
  1479. /* convert escape sequences */
  1480. s = t = zendlval->value.str.val;
  1481. end = s+zendlval->value.str.len;
  1482. while (s<end) {
  1483. if (*s=='\\') {
  1484. s++;
  1485. switch(*s) {
  1486. case '\\':
  1487. case '\'':
  1488. *t++ = *s;
  1489. zendlval->value.str.len--;
  1490. break;
  1491. default:
  1492. *t++ = '\\';
  1493. *t++ = *s;
  1494. break;
  1495. }
  1496. } else {
  1497. *t++ = *s;
  1498. }
  1499. if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
  1500. CG(zend_lineno)++;
  1501. }
  1502. s++;
  1503. }
  1504. *t = 0;
  1505. #ifdef ZEND_MULTIBYTE
  1506. if (SCNG(output_filter)) {
  1507. size_t sz = 0;
  1508. s = zendlval->value.str.val;
  1509. SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)s, (size_t)zendlval->value.str.len TSRMLS_CC);
  1510. zendlval->value.str.len = sz;
  1511. efree(s);
  1512. }
  1513. #endif /* ZEND_MULTIBYTE */
  1514. return T_CONSTANT_ENCAPSED_STRING;
  1515. }
  1516. <ST_IN_SCRIPTING>b?["] {
  1517. int bprefix = (yytext[0] != '"') ? 1 : 0;
  1518. while (YYCURSOR < YYLIMIT) {
  1519. switch (*YYCURSOR++) {
  1520. case '"':
  1521. yyleng = YYCURSOR - SCNG(yy_text);
  1522. zend_scan_escape_string(zendlval, yytext+bprefix+1, yyleng-bprefix-2, '"' TSRMLS_CC);
  1523. return T_CONSTANT_ENCAPSED_STRING;
  1524. case '$':
  1525. if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
  1526. break;
  1527. }
  1528. continue;
  1529. case '{':
  1530. if (*YYCURSOR == '$') {
  1531. break;
  1532. }
  1533. continue;
  1534. case '\\':
  1535. if (YYCURSOR < YYLIMIT) {
  1536. YYCURSOR++;
  1537. }
  1538. /* fall through */
  1539. default:
  1540. continue;
  1541. }
  1542. YYCURSOR--;
  1543. break;
  1544. }
  1545. /* Remember how much was scanned to save rescanning */
  1546. SET_DOUBLE_QUOTES_SCANNED_LENGTH(YYCURSOR - SCNG(yy_text) - yyleng);
  1547. YYCURSOR = SCNG(yy_text) + yyleng;
  1548. BEGIN(ST_DOUBLE_QUOTES);
  1549. return '"';
  1550. }
  1551. <ST_IN_SCRIPTING>b?"<<<"{TABS_AND_SPACES}({LABEL}|([']{LABEL}['])|(["]{LABEL}["])){NEWLINE} {
  1552. char *s;
  1553. int bprefix = (yytext[0] != '<') ? 1 : 0;
  1554. /* save old heredoc label */
  1555. Z_STRVAL_P(zendlval) = CG(heredoc);
  1556. Z_STRLEN_P(zendlval) = CG(heredoc_len);
  1557. CG(zend_lineno)++;
  1558. CG(heredoc_len) = yyleng-bprefix-3-1-(yytext[yyleng-2]=='\r'?1:0);
  1559. s = yytext+bprefix+3;
  1560. while ((*s == ' ') || (*s == '\t')) {
  1561. s++;
  1562. CG(heredoc_len)--;
  1563. }
  1564. if (*s == '\'') {
  1565. s++;
  1566. CG(heredoc_len) -= 2;
  1567. BEGIN(ST_NOWDOC);
  1568. } else {
  1569. if (*s == '"') {
  1570. s++;
  1571. CG(heredoc_len) -= 2;
  1572. }
  1573. BEGIN(ST_HEREDOC);
  1574. }
  1575. CG(heredoc) = estrndup(s, CG(heredoc_len));
  1576. /* Check for ending label on the next line */
  1577. if (CG(heredoc_len) < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, s, CG(heredoc_len))) {
  1578. YYCTYPE *end = YYCURSOR + CG(heredoc_len);
  1579. if (*end == ';') {
  1580. end++;
  1581. }
  1582. if (*end == '\n' || *end == '\r') {
  1583. BEGIN(ST_END_HEREDOC);
  1584. }
  1585. }
  1586. return T_START_HEREDOC;
  1587. }
  1588. <ST_IN_SCRIPTING>[`] {
  1589. BEGIN(ST_BACKQUOTE);
  1590. return '`';
  1591. }
  1592. <ST_END_HEREDOC>{ANY_CHAR} {
  1593. YYCURSOR += CG(heredoc_len) - 1;
  1594. yyleng = CG(heredoc_len);
  1595. Z_STRVAL_P(zendlval) = CG(heredoc);
  1596. Z_STRLEN_P(zendlval) = CG(heredoc_len);
  1597. CG(heredoc) = NULL;
  1598. CG(heredoc_len) = 0;
  1599. BEGIN(ST_IN_SCRIPTING);
  1600. return T_END_HEREDOC;
  1601. }
  1602. <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" {
  1603. zendlval->value.lval = (long) '{';
  1604. yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
  1605. yyless(1);
  1606. return T_CURLY_OPEN;
  1607. }
  1608. <ST_DOUBLE_QUOTES>["] {
  1609. BEGIN(ST_IN_SCRIPTING);
  1610. return '"';
  1611. }
  1612. <ST_BACKQUOTE>[`] {
  1613. BEGIN(ST_IN_SCRIPTING);
  1614. return '`';
  1615. }
  1616. <ST_DOUBLE_QUOTES>{ANY_CHAR} {
  1617. if (GET_DOUBLE_QUOTES_SCANNED_LENGTH()) {
  1618. YYCURSOR += GET_DOUBLE_QUOTES_SCANNED_LENGTH() - 1;
  1619. SET_DOUBLE_QUOTES_SCANNED_LENGTH(0);
  1620. goto double_quotes_scan_done;
  1621. }
  1622. if (YYCURSOR > YYLIMIT) {
  1623. return 0;
  1624. }
  1625. if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
  1626. YYCURSOR++;
  1627. }
  1628. while (YYCURSOR < YYLIMIT) {
  1629. switch (*YYCURSOR++) {
  1630. case '"':
  1631. break;
  1632. case '$':
  1633. if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
  1634. break;
  1635. }
  1636. continue;
  1637. case '{':
  1638. if (*YYCURSOR == '$') {
  1639. break;
  1640. }
  1641. continue;
  1642. case '\\':
  1643. if (YYCURSOR < YYLIMIT) {
  1644. YYCURSOR++;
  1645. }
  1646. /* fall through */
  1647. default:
  1648. continue;
  1649. }
  1650. YYCURSOR--;
  1651. break;
  1652. }
  1653. double_quotes_scan_done:
  1654. yyleng = YYCURSOR - SCNG(yy_text);
  1655. zend_scan_escape_string(zendlval, yytext, yyleng, '"' TSRMLS_CC);
  1656. return T_ENCAPSED_AND_WHITESPACE;
  1657. }
  1658. <ST_BACKQUOTE>{ANY_CHAR} {
  1659. if (YYCURSOR > YYLIMIT) {
  1660. return 0;
  1661. }
  1662. if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
  1663. YYCURSOR++;
  1664. }
  1665. while (YYCURSOR < YYLIMIT) {
  1666. switch (*YYCURSOR++) {
  1667. case '`':
  1668. break;
  1669. case '$':
  1670. if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
  1671. break;
  1672. }
  1673. continue;
  1674. case '{':
  1675. if (*YYCURSOR == '$') {
  1676. break;
  1677. }
  1678. continue;
  1679. case '\\':
  1680. if (YYCURSOR < YYLIMIT) {
  1681. YYCURSOR++;
  1682. }
  1683. /* fall through */
  1684. default:
  1685. continue;
  1686. }
  1687. YYCURSOR--;
  1688. break;
  1689. }
  1690. yyleng = YYCURSOR - SCNG(yy_text);
  1691. zend_scan_escape_string(zendlval, yytext, yyleng, '`' TSRMLS_CC);
  1692. return T_ENCAPSED_AND_WHITESPACE;
  1693. }
  1694. <ST_HEREDOC>{ANY_CHAR} {
  1695. int newline = 0;
  1696. if (YYCURSOR > YYLIMIT) {
  1697. return 0;
  1698. }
  1699. YYCURSOR--;
  1700. while (YYCURSOR < YYLIMIT) {
  1701. switch (*YYCURSOR++) {
  1702. case '\r':
  1703. if (*YYCURSOR == '\n') {
  1704. YYCURSOR++;
  1705. }
  1706. /* fall through */
  1707. case '\n':
  1708. /* Check for ending label on the next line */
  1709. if (IS_LABEL_START(*YYCURSOR) && CG(heredoc_len) < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, CG(heredoc), CG(heredoc_len))) {
  1710. YYCTYPE *end = YYCURSOR + CG(heredoc_len);
  1711. if (*end == ';') {
  1712. end++;
  1713. }
  1714. if (*end == '\n' || *end == '\r') {
  1715. /* newline before label will be subtracted from returned text, but
  1716. * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
  1717. if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
  1718. newline = 2; /* Windows newline */
  1719. } else {
  1720. newline = 1;
  1721. }
  1722. CG(increment_lineno) = 1; /* For newline before label */
  1723. BEGIN(ST_END_HEREDOC);
  1724. goto heredoc_scan_done;
  1725. }
  1726. }
  1727. continue;
  1728. case '$':
  1729. if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
  1730. break;
  1731. }
  1732. continue;
  1733. case '{':
  1734. if (*YYCURSOR == '$') {
  1735. break;
  1736. }
  1737. continue;
  1738. case '\\':
  1739. if (YYCURSOR < YYLIMIT && *YYCURSOR != '\n' && *YYCURSOR != '\r') {
  1740. YYCURSOR++;
  1741. }
  1742. /* fall through */
  1743. default:
  1744. continue;
  1745. }
  1746. YYCURSOR--;
  1747. break;
  1748. }
  1749. heredoc_scan_done:
  1750. yyleng = YYCURSOR - SCNG(yy_text);
  1751. zend_scan_escape_string(zendlval, yytext, yyleng - newline, 0 TSRMLS_CC);
  1752. return T_ENCAPSED_AND_WHITESPACE;
  1753. }
  1754. <ST_NOWDOC>{ANY_CHAR} {
  1755. int newline = 0;
  1756. if (YYCURSOR > YYLIMIT) {
  1757. return 0;
  1758. }
  1759. YYCURSOR--;
  1760. while (YYCURSOR < YYLIMIT) {
  1761. switch (*YYCURSOR++) {
  1762. case '\r':
  1763. if (*YYCURSOR == '\n') {
  1764. YYCURSOR++;
  1765. }
  1766. /* fall through */
  1767. case '\n':
  1768. /* Check for ending label on the next line */
  1769. if (IS_LABEL_START(*YYCURSOR) && CG(heredoc_len) < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, CG(heredoc), CG(heredoc_len))) {
  1770. YYCTYPE *end = YYCURSOR + CG(heredoc_len);
  1771. if (*end == ';') {
  1772. end++;
  1773. }
  1774. if (*end == '\n' || *end == '\r') {
  1775. /* newline before label will be subtracted from returned text, but
  1776. * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
  1777. if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
  1778. newline = 2; /* Windows newline */
  1779. } else {
  1780. newline = 1;
  1781. }
  1782. CG(increment_lineno) = 1; /* For newline before label */
  1783. BEGIN(ST_END_HEREDOC);
  1784. goto nowdoc_scan_done;
  1785. }
  1786. }
  1787. /* fall through */
  1788. default:
  1789. continue;
  1790. }
  1791. }
  1792. nowdoc_scan_done:
  1793. yyleng = YYCURSOR - SCNG(yy_text);
  1794. zend_copy_value(zendlval, yytext, yyleng - newline);
  1795. zendlval->type = IS_STRING;
  1796. HANDLE_NEWLINES(yytext, yyleng - newline);
  1797. return T_ENCAPSED_AND_WHITESPACE;
  1798. }
  1799. <ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} {
  1800. if (YYCURSOR > YYLIMIT) {
  1801. return 0;
  1802. }
  1803. zend_error(E_COMPILE_WARNING,"Unexpected character in input: '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE);
  1804. goto restart;
  1805. }
  1806. */
  1807. }