PageRenderTime 80ms CodeModel.GetById 39ms RepoModel.GetById 0ms app.codeStats 1ms

/docs/official-grammars/php/5.4.0rc1/zend_language_scanner.l

http://github.com/facebook/pfff
LEX | 2400 lines | 1857 code | 405 blank | 138 comment | 0 complexity | f024c9be011729963b76866c721d4b7d MD5 | raw file
Possible License(s): LGPL-2.1, GPL-2.0, LGPL-2.0, Apache-2.0

Large files files are truncated, but you can click here to view the full file

  1. /*
  2. +----------------------------------------------------------------------+
  3. | Zend Engine |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) 1998-2011 Zend Technologies Ltd. (http://www.zend.com) |
  6. +----------------------------------------------------------------------+
  7. | This source file is subject to version 2.00 of the Zend license, |
  8. | that is bundled with this package in the file LICENSE, and is |
  9. | available through the world-wide-web at the following url: |
  10. | http://www.zend.com/license/2_00.txt. |
  11. | If you did not receive a copy of the Zend license and are unable to |
  12. | obtain it through the world-wide-web, please send a note to |
  13. | license@zend.com so we can mail you a copy immediately. |
  14. +----------------------------------------------------------------------+
  15. | Authors: Marcus Boerger <helly@php.net> |
  16. | Nuno Lopes <nlopess@php.net> |
  17. | Scott MacVicar <scottmac@php.net> |
  18. | Flex version authors: |
  19. | Andi Gutmans <andi@zend.com> |
  20. | Zeev Suraski <zeev@zend.com> |
  21. +----------------------------------------------------------------------+
  22. */
  23. /* $Id: zend_language_scanner.l 316627 2011-09-13 13:29:35Z dmitry $ */
  24. #if 0
  25. # define YYDEBUG(s, c) printf("state: %d char: %c\n", s, c)
  26. #else
  27. # define YYDEBUG(s, c)
  28. #endif
  29. #include "zend_language_scanner_defs.h"
  30. #include <errno.h>
  31. #include "zend.h"
  32. #include "zend_alloc.h"
  33. #include <zend_language_parser.h>
  34. #include "zend_compile.h"
  35. #include "zend_language_scanner.h"
  36. #include "zend_highlight.h"
  37. #include "zend_constants.h"
  38. #include "zend_variables.h"
  39. #include "zend_operators.h"
  40. #include "zend_API.h"
  41. #include "zend_strtod.h"
  42. #include "zend_exceptions.h"
  43. #include "tsrm_virtual_cwd.h"
  44. #include "tsrm_config_common.h"
  45. #define YYCTYPE unsigned char
  46. #define YYFILL(n) { if ((YYCURSOR + n) >= (YYLIMIT + ZEND_MMAP_AHEAD)) { return 0; } }
  47. #define YYCURSOR SCNG(yy_cursor)
  48. #define YYLIMIT SCNG(yy_limit)
  49. #define YYMARKER SCNG(yy_marker)
  50. #define YYGETCONDITION() SCNG(yy_state)
  51. #define YYSETCONDITION(s) SCNG(yy_state) = s
  52. #define STATE(name) yyc##name
  53. /* emulate flex constructs */
  54. #define BEGIN(state) YYSETCONDITION(STATE(state))
  55. #define YYSTATE YYGETCONDITION()
  56. #define yytext ((char*)SCNG(yy_text))
  57. #define yyleng SCNG(yy_leng)
  58. #define yyless(x) do { YYCURSOR = (unsigned char*)yytext + x; \
  59. yyleng = (unsigned int)x; } while(0)
  60. #define yymore() goto yymore_restart
  61. /* perform sanity check. If this message is triggered you should
  62. increase the ZEND_MMAP_AHEAD value in the zend_streams.h file */
  63. /*!max:re2c */
  64. #if ZEND_MMAP_AHEAD < YYMAXFILL
  65. # error ZEND_MMAP_AHEAD should be greater than or equal to YYMAXFILL
  66. #endif
  67. #ifdef HAVE_STDARG_H
  68. # include <stdarg.h>
  69. #endif
  70. #ifdef HAVE_UNISTD_H
  71. # include <unistd.h>
  72. #endif
  73. /* Globals Macros */
  74. #define SCNG LANG_SCNG
  75. #ifdef ZTS
  76. ZEND_API ts_rsrc_id language_scanner_globals_id;
  77. #else
  78. ZEND_API zend_php_scanner_globals language_scanner_globals;
  79. #endif
  80. #define HANDLE_NEWLINES(s, l) \
  81. do { \
  82. char *p = (s), *boundary = p+(l); \
  83. \
  84. while (p<boundary) { \
  85. if (*p == '\n' || (*p == '\r' && (*(p+1) != '\n'))) { \
  86. CG(zend_lineno)++; \
  87. } \
  88. p++; \
  89. } \
  90. } while (0)
  91. #define HANDLE_NEWLINE(c) \
  92. { \
  93. if (c == '\n' || c == '\r') { \
  94. CG(zend_lineno)++; \
  95. } \
  96. }
  97. /* To save initial string length after scanning to first variable, CG(doc_comment_len) can be reused */
  98. #define SET_DOUBLE_QUOTES_SCANNED_LENGTH(len) CG(doc_comment_len) = (len)
  99. #define GET_DOUBLE_QUOTES_SCANNED_LENGTH() CG(doc_comment_len)
  100. #define IS_LABEL_START(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || (c) == '_' || (c) >= 0x7F)
  101. #define ZEND_IS_OCT(c) ((c)>='0' && (c)<='7')
  102. #define ZEND_IS_HEX(c) (((c)>='0' && (c)<='9') || ((c)>='a' && (c)<='f') || ((c)>='A' && (c)<='F'))
  103. BEGIN_EXTERN_C()
  104. static size_t encoding_filter_script_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
  105. {
  106. const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
  107. assert(internal_encoding && zend_multibyte_check_lexer_compatibility(internal_encoding));
  108. return zend_multibyte_encoding_converter(to, to_length, from, from_length, internal_encoding, LANG_SCNG(script_encoding) TSRMLS_CC);
  109. }
  110. static size_t encoding_filter_script_to_intermediate(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
  111. {
  112. return zend_multibyte_encoding_converter(to, to_length, from, from_length, zend_multibyte_encoding_utf8, LANG_SCNG(script_encoding) TSRMLS_CC);
  113. }
  114. static size_t encoding_filter_intermediate_to_script(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
  115. {
  116. return zend_multibyte_encoding_converter(to, to_length, from, from_length,
  117. LANG_SCNG(script_encoding), zend_multibyte_encoding_utf8 TSRMLS_CC);
  118. }
  119. static size_t encoding_filter_intermediate_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
  120. {
  121. const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
  122. assert(internal_encoding && zend_multibyte_check_lexer_compatibility(internal_encoding));
  123. return zend_multibyte_encoding_converter(to, to_length, from, from_length,
  124. internal_encoding, zend_multibyte_encoding_utf8 TSRMLS_CC);
  125. }
  126. static void _yy_push_state(int new_state TSRMLS_DC)
  127. {
  128. zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION(), sizeof(int));
  129. YYSETCONDITION(new_state);
  130. }
  131. #define yy_push_state(state_and_tsrm) _yy_push_state(yyc##state_and_tsrm)
  132. static void yy_pop_state(TSRMLS_D)
  133. {
  134. int *stack_state;
  135. zend_stack_top(&SCNG(state_stack), (void **) &stack_state);
  136. YYSETCONDITION(*stack_state);
  137. zend_stack_del_top(&SCNG(state_stack));
  138. }
  139. static void yy_scan_buffer(char *str, unsigned int len TSRMLS_DC)
  140. {
  141. YYCURSOR = (YYCTYPE*)str;
  142. YYLIMIT = YYCURSOR + len;
  143. if (!SCNG(yy_start)) {
  144. SCNG(yy_start) = YYCURSOR;
  145. }
  146. }
  147. void startup_scanner(TSRMLS_D)
  148. {
  149. CG(parse_error) = 0;
  150. CG(heredoc) = NULL;
  151. CG(heredoc_len) = 0;
  152. CG(doc_comment) = NULL;
  153. CG(doc_comment_len) = 0;
  154. zend_stack_init(&SCNG(state_stack));
  155. }
  156. void shutdown_scanner(TSRMLS_D)
  157. {
  158. if (CG(heredoc)) {
  159. efree(CG(heredoc));
  160. CG(heredoc_len)=0;
  161. }
  162. CG(parse_error) = 0;
  163. zend_stack_destroy(&SCNG(state_stack));
  164. RESET_DOC_COMMENT();
  165. }
  166. ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
  167. {
  168. lex_state->yy_leng = SCNG(yy_leng);
  169. lex_state->yy_start = SCNG(yy_start);
  170. lex_state->yy_text = SCNG(yy_text);
  171. lex_state->yy_cursor = SCNG(yy_cursor);
  172. lex_state->yy_marker = SCNG(yy_marker);
  173. lex_state->yy_limit = SCNG(yy_limit);
  174. lex_state->state_stack = SCNG(state_stack);
  175. zend_stack_init(&SCNG(state_stack));
  176. lex_state->in = SCNG(yy_in);
  177. lex_state->yy_state = YYSTATE;
  178. lex_state->filename = zend_get_compiled_filename(TSRMLS_C);
  179. lex_state->lineno = CG(zend_lineno);
  180. lex_state->script_org = SCNG(script_org);
  181. lex_state->script_org_size = SCNG(script_org_size);
  182. lex_state->script_filtered = SCNG(script_filtered);
  183. lex_state->script_filtered_size = SCNG(script_filtered_size);
  184. lex_state->input_filter = SCNG(input_filter);
  185. lex_state->output_filter = SCNG(output_filter);
  186. lex_state->script_encoding = SCNG(script_encoding);
  187. }
  188. ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
  189. {
  190. SCNG(yy_leng) = lex_state->yy_leng;
  191. SCNG(yy_start) = lex_state->yy_start;
  192. SCNG(yy_text) = lex_state->yy_text;
  193. SCNG(yy_cursor) = lex_state->yy_cursor;
  194. SCNG(yy_marker) = lex_state->yy_marker;
  195. SCNG(yy_limit) = lex_state->yy_limit;
  196. zend_stack_destroy(&SCNG(state_stack));
  197. SCNG(state_stack) = lex_state->state_stack;
  198. SCNG(yy_in) = lex_state->in;
  199. YYSETCONDITION(lex_state->yy_state);
  200. CG(zend_lineno) = lex_state->lineno;
  201. zend_restore_compiled_filename(lex_state->filename TSRMLS_CC);
  202. if (SCNG(script_filtered)) {
  203. efree(SCNG(script_filtered));
  204. SCNG(script_filtered) = NULL;
  205. }
  206. SCNG(script_org) = lex_state->script_org;
  207. SCNG(script_org_size) = lex_state->script_org_size;
  208. SCNG(script_filtered) = lex_state->script_filtered;
  209. SCNG(script_filtered_size) = lex_state->script_filtered_size;
  210. SCNG(input_filter) = lex_state->input_filter;
  211. SCNG(output_filter) = lex_state->output_filter;
  212. SCNG(script_encoding) = lex_state->script_encoding;
  213. if (CG(heredoc)) {
  214. efree(CG(heredoc));
  215. CG(heredoc) = NULL;
  216. CG(heredoc_len) = 0;
  217. }
  218. }
  219. ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle TSRMLS_DC)
  220. {
  221. zend_llist_del_element(&CG(open_files), file_handle, (int (*)(void *, void *)) zend_compare_file_handles);
  222. /* zend_file_handle_dtor() operates on the copy, so we have to NULLify the original here */
  223. file_handle->opened_path = NULL;
  224. if (file_handle->free_filename) {
  225. file_handle->filename = NULL;
  226. }
  227. }
  228. #define BOM_UTF32_BE "\x00\x00\xfe\xff"
  229. #define BOM_UTF32_LE "\xff\xfe\x00\x00"
  230. #define BOM_UTF16_BE "\xfe\xff"
  231. #define BOM_UTF16_LE "\xff\xfe"
  232. #define BOM_UTF8 "\xef\xbb\xbf"
  233. static const zend_encoding *zend_multibyte_detect_utf_encoding(const unsigned char *script, size_t script_size TSRMLS_DC)
  234. {
  235. const unsigned char *p;
  236. int wchar_size = 2;
  237. int le = 0;
  238. /* utf-16 or utf-32? */
  239. p = script;
  240. while ((p-script) < script_size) {
  241. p = memchr(p, 0, script_size-(p-script)-2);
  242. if (!p) {
  243. break;
  244. }
  245. if (*(p+1) == '\0' && *(p+2) == '\0') {
  246. wchar_size = 4;
  247. break;
  248. }
  249. /* searching for UTF-32 specific byte orders, so this will do */
  250. p += 4;
  251. }
  252. /* BE or LE? */
  253. p = script;
  254. while ((p-script) < script_size) {
  255. if (*p == '\0' && *(p+wchar_size-1) != '\0') {
  256. /* BE */
  257. le = 0;
  258. break;
  259. } else if (*p != '\0' && *(p+wchar_size-1) == '\0') {
  260. /* LE* */
  261. le = 1;
  262. break;
  263. }
  264. p += wchar_size;
  265. }
  266. if (wchar_size == 2) {
  267. return le ? zend_multibyte_encoding_utf16le : zend_multibyte_encoding_utf16be;
  268. } else {
  269. return le ? zend_multibyte_encoding_utf32le : zend_multibyte_encoding_utf32be;
  270. }
  271. return NULL;
  272. }
  273. static const zend_encoding* zend_multibyte_detect_unicode(TSRMLS_D)
  274. {
  275. const zend_encoding *script_encoding = NULL;
  276. int bom_size;
  277. unsigned char *pos1, *pos2;
  278. if (LANG_SCNG(script_org_size) < sizeof(BOM_UTF32_LE)-1) {
  279. return NULL;
  280. }
  281. /* check out BOM */
  282. if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_BE, sizeof(BOM_UTF32_BE)-1)) {
  283. script_encoding = zend_multibyte_encoding_utf32be;
  284. bom_size = sizeof(BOM_UTF32_BE)-1;
  285. } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_LE, sizeof(BOM_UTF32_LE)-1)) {
  286. script_encoding = zend_multibyte_encoding_utf32le;
  287. bom_size = sizeof(BOM_UTF32_LE)-1;
  288. } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_BE, sizeof(BOM_UTF16_BE)-1)) {
  289. script_encoding = zend_multibyte_encoding_utf16be;
  290. bom_size = sizeof(BOM_UTF16_BE)-1;
  291. } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_LE, sizeof(BOM_UTF16_LE)-1)) {
  292. script_encoding = zend_multibyte_encoding_utf16le;
  293. bom_size = sizeof(BOM_UTF16_LE)-1;
  294. } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF8, sizeof(BOM_UTF8)-1)) {
  295. script_encoding = zend_multibyte_encoding_utf8;
  296. bom_size = sizeof(BOM_UTF8)-1;
  297. }
  298. if (script_encoding) {
  299. /* remove BOM */
  300. LANG_SCNG(script_org) += bom_size;
  301. LANG_SCNG(script_org_size) -= bom_size;
  302. return script_encoding;
  303. }
  304. /* script contains NULL bytes -> auto-detection */
  305. if ((pos1 = memchr(LANG_SCNG(script_org), 0, LANG_SCNG(script_org_size)))) {
  306. /* check if the NULL byte is after the __HALT_COMPILER(); */
  307. pos2 = LANG_SCNG(script_org);
  308. while (pos1 - pos2 >= sizeof("__HALT_COMPILER();")-1) {
  309. pos2 = memchr(pos2, '_', pos1 - pos2);
  310. if (!pos2) break;
  311. pos2++;
  312. if (strncasecmp((char*)pos2, "_HALT_COMPILER", sizeof("_HALT_COMPILER")-1) == 0) {
  313. pos2 += sizeof("_HALT_COMPILER")-1;
  314. while (*pos2 == ' ' ||
  315. *pos2 == '\t' ||
  316. *pos2 == '\r' ||
  317. *pos2 == '\n') {
  318. pos2++;
  319. }
  320. if (*pos2 == '(') {
  321. pos2++;
  322. while (*pos2 == ' ' ||
  323. *pos2 == '\t' ||
  324. *pos2 == '\r' ||
  325. *pos2 == '\n') {
  326. pos2++;
  327. }
  328. if (*pos2 == ')') {
  329. pos2++;
  330. while (*pos2 == ' ' ||
  331. *pos2 == '\t' ||
  332. *pos2 == '\r' ||
  333. *pos2 == '\n') {
  334. pos2++;
  335. }
  336. if (*pos2 == ';') {
  337. return NULL;
  338. }
  339. }
  340. }
  341. }
  342. }
  343. /* make best effort if BOM is missing */
  344. return zend_multibyte_detect_utf_encoding(LANG_SCNG(script_org), LANG_SCNG(script_org_size) TSRMLS_CC);
  345. }
  346. return NULL;
  347. }
  348. static const zend_encoding* zend_multibyte_find_script_encoding(TSRMLS_D)
  349. {
  350. const zend_encoding *script_encoding;
  351. if (CG(detect_unicode)) {
  352. /* check out bom(byte order mark) and see if containing wchars */
  353. script_encoding = zend_multibyte_detect_unicode(TSRMLS_C);
  354. if (script_encoding != NULL) {
  355. /* bom or wchar detection is prior to 'script_encoding' option */
  356. return script_encoding;
  357. }
  358. }
  359. /* if no script_encoding specified, just leave alone */
  360. if (!CG(script_encoding_list) || !CG(script_encoding_list_size)) {
  361. return NULL;
  362. }
  363. /* if multiple encodings specified, detect automagically */
  364. if (CG(script_encoding_list_size) > 1) {
  365. return zend_multibyte_encoding_detector(LANG_SCNG(script_org), LANG_SCNG(script_org_size), CG(script_encoding_list), CG(script_encoding_list_size) TSRMLS_CC);
  366. }
  367. return CG(script_encoding_list)[0];
  368. }
  369. ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding TSRMLS_DC)
  370. {
  371. const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
  372. const zend_encoding *script_encoding = onetime_encoding ? onetime_encoding: zend_multibyte_find_script_encoding(TSRMLS_C);
  373. if (!script_encoding) {
  374. return FAILURE;
  375. }
  376. /* judge input/output filter */
  377. LANG_SCNG(script_encoding) = script_encoding;
  378. LANG_SCNG(input_filter) = NULL;
  379. LANG_SCNG(output_filter) = NULL;
  380. if (!internal_encoding || LANG_SCNG(script_encoding) == internal_encoding) {
  381. if (!zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
  382. /* and if not, work around w/ script_encoding -> utf-8 -> script_encoding conversion */
  383. LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
  384. LANG_SCNG(output_filter) = encoding_filter_intermediate_to_script;
  385. } else {
  386. LANG_SCNG(input_filter) = NULL;
  387. LANG_SCNG(output_filter) = NULL;
  388. }
  389. return SUCCESS;
  390. }
  391. if (zend_multibyte_check_lexer_compatibility(internal_encoding)) {
  392. LANG_SCNG(input_filter) = encoding_filter_script_to_internal;
  393. LANG_SCNG(output_filter) = NULL;
  394. } else if (zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
  395. LANG_SCNG(input_filter) = NULL;
  396. LANG_SCNG(output_filter) = encoding_filter_script_to_internal;
  397. } else {
  398. /* both script and internal encodings are incompatible w/ flex */
  399. LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
  400. LANG_SCNG(output_filter) = encoding_filter_intermediate_to_internal;
  401. }
  402. return 0;
  403. }
  404. ZEND_API int open_file_for_scanning(zend_file_handle *file_handle TSRMLS_DC)
  405. {
  406. const char *file_path = NULL;
  407. char *buf;
  408. size_t size, offset = 0;
  409. /* The shebang line was read, get the current position to obtain the buffer start */
  410. if (CG(start_lineno) == 2 && file_handle->type == ZEND_HANDLE_FP && file_handle->handle.fp) {
  411. if ((offset = ftell(file_handle->handle.fp)) == -1) {
  412. offset = 0;
  413. }
  414. }
  415. if (zend_stream_fixup(file_handle, &buf, &size TSRMLS_CC) == FAILURE) {
  416. return FAILURE;
  417. }
  418. zend_llist_add_element(&CG(open_files), file_handle);
  419. if (file_handle->handle.stream.handle >= (void*)file_handle && file_handle->handle.stream.handle <= (void*)(file_handle+1)) {
  420. zend_file_handle *fh = (zend_file_handle*)zend_llist_get_last(&CG(open_files));
  421. size_t diff = (char*)file_handle->handle.stream.handle - (char*)file_handle;
  422. fh->handle.stream.handle = (void*)(((char*)fh) + diff);
  423. file_handle->handle.stream.handle = fh->handle.stream.handle;
  424. }
  425. /* Reset the scanner for scanning the new file */
  426. SCNG(yy_in) = file_handle;
  427. SCNG(yy_start) = NULL;
  428. if (size != -1) {
  429. if (CG(multibyte)) {
  430. SCNG(script_org) = (unsigned char*)buf;
  431. SCNG(script_org_size) = size;
  432. SCNG(script_filtered) = NULL;
  433. zend_multibyte_set_filter(NULL TSRMLS_CC);
  434. if (SCNG(input_filter)) {
  435. if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
  436. zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
  437. "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
  438. }
  439. buf = (char*)SCNG(script_filtered);
  440. size = SCNG(script_filtered_size);
  441. }
  442. }
  443. SCNG(yy_start) = (unsigned char *)buf - offset;
  444. yy_scan_buffer(buf, size TSRMLS_CC);
  445. } else {
  446. zend_error_noreturn(E_COMPILE_ERROR, "zend_stream_mmap() failed");
  447. }
  448. BEGIN(INITIAL);
  449. if (file_handle->opened_path) {
  450. file_path = file_handle->opened_path;
  451. } else {
  452. file_path = file_handle->filename;
  453. }
  454. zend_set_compiled_filename(file_path TSRMLS_CC);
  455. if (CG(start_lineno)) {
  456. CG(zend_lineno) = CG(start_lineno);
  457. CG(start_lineno) = 0;
  458. } else {
  459. CG(zend_lineno) = 1;
  460. }
  461. CG(increment_lineno) = 0;
  462. return SUCCESS;
  463. }
  464. END_EXTERN_C()
  465. ZEND_API zend_op_array *compile_file(zend_file_handle *file_handle, int type TSRMLS_DC)
  466. {
  467. zend_lex_state original_lex_state;
  468. zend_op_array *op_array = (zend_op_array *) emalloc(sizeof(zend_op_array));
  469. zend_op_array *original_active_op_array = CG(active_op_array);
  470. zend_op_array *retval=NULL;
  471. int compiler_result;
  472. zend_bool compilation_successful=0;
  473. znode retval_znode;
  474. zend_bool original_in_compilation = CG(in_compilation);
  475. retval_znode.op_type = IS_CONST;
  476. retval_znode.u.constant.type = IS_LONG;
  477. retval_znode.u.constant.value.lval = 1;
  478. Z_UNSET_ISREF(retval_znode.u.constant);
  479. Z_SET_REFCOUNT(retval_znode.u.constant, 1);
  480. zend_save_lexical_state(&original_lex_state TSRMLS_CC);
  481. retval = op_array; /* success oriented */
  482. if (open_file_for_scanning(file_handle TSRMLS_CC)==FAILURE) {
  483. if (type==ZEND_REQUIRE) {
  484. zend_message_dispatcher(ZMSG_FAILED_REQUIRE_FOPEN, file_handle->filename TSRMLS_CC);
  485. zend_bailout();
  486. } else {
  487. zend_message_dispatcher(ZMSG_FAILED_INCLUDE_FOPEN, file_handle->filename TSRMLS_CC);
  488. }
  489. compilation_successful=0;
  490. } else {
  491. init_op_array(op_array, ZEND_USER_FUNCTION, INITIAL_OP_ARRAY_SIZE TSRMLS_CC);
  492. CG(in_compilation) = 1;
  493. CG(active_op_array) = op_array;
  494. zend_init_compiler_context(TSRMLS_C);
  495. compiler_result = zendparse(TSRMLS_C);
  496. zend_do_return(&retval_znode, 0 TSRMLS_CC);
  497. CG(in_compilation) = original_in_compilation;
  498. if (compiler_result==1) { /* parser error */
  499. zend_bailout();
  500. }
  501. compilation_successful=1;
  502. }
  503. if (retval) {
  504. CG(active_op_array) = original_active_op_array;
  505. if (compilation_successful) {
  506. pass_two(op_array TSRMLS_CC);
  507. zend_release_labels(TSRMLS_C);
  508. } else {
  509. efree(op_array);
  510. retval = NULL;
  511. }
  512. }
  513. zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
  514. return retval;
  515. }
  516. zend_op_array *compile_filename(int type, zval *filename TSRMLS_DC)
  517. {
  518. zend_file_handle file_handle;
  519. zval tmp;
  520. zend_op_array *retval;
  521. char *opened_path = NULL;
  522. if (filename->type != IS_STRING) {
  523. tmp = *filename;
  524. zval_copy_ctor(&tmp);
  525. convert_to_string(&tmp);
  526. filename = &tmp;
  527. }
  528. file_handle.filename = filename->value.str.val;
  529. file_handle.free_filename = 0;
  530. file_handle.type = ZEND_HANDLE_FILENAME;
  531. file_handle.opened_path = NULL;
  532. file_handle.handle.fp = NULL;
  533. retval = zend_compile_file(&file_handle, type TSRMLS_CC);
  534. if (retval && file_handle.handle.stream.handle) {
  535. int dummy = 1;
  536. if (!file_handle.opened_path) {
  537. file_handle.opened_path = opened_path = estrndup(filename->value.str.val, filename->value.str.len);
  538. }
  539. zend_hash_add(&EG(included_files), file_handle.opened_path, strlen(file_handle.opened_path)+1, (void *)&dummy, sizeof(int), NULL);
  540. if (opened_path) {
  541. efree(opened_path);
  542. }
  543. }
  544. zend_destroy_file_handle(&file_handle TSRMLS_CC);
  545. if (filename==&tmp) {
  546. zval_dtor(&tmp);
  547. }
  548. return retval;
  549. }
  550. ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_DC)
  551. {
  552. char *buf;
  553. size_t size;
  554. /* enforce two trailing NULLs for flex... */
  555. if (IS_INTERNED(str->value.str.val)) {
  556. char *tmp = safe_emalloc(1, str->value.str.len, ZEND_MMAP_AHEAD);
  557. memcpy(tmp, str->value.str.val, str->value.str.len + ZEND_MMAP_AHEAD);
  558. str->value.str.val = tmp;
  559. } else {
  560. str->value.str.val = safe_erealloc(str->value.str.val, 1, str->value.str.len, ZEND_MMAP_AHEAD);
  561. }
  562. memset(str->value.str.val + str->value.str.len, 0, ZEND_MMAP_AHEAD);
  563. SCNG(yy_in) = NULL;
  564. SCNG(yy_start) = NULL;
  565. buf = str->value.str.val;
  566. size = str->value.str.len;
  567. if (CG(multibyte)) {
  568. SCNG(script_org) = (unsigned char*)buf;
  569. SCNG(script_org_size) = size;
  570. SCNG(script_filtered) = NULL;
  571. zend_multibyte_set_filter(zend_multibyte_get_internal_encoding(TSRMLS_C) TSRMLS_CC);
  572. if (SCNG(input_filter)) {
  573. if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
  574. zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
  575. "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
  576. }
  577. buf = (char*)SCNG(script_filtered);
  578. size = SCNG(script_filtered_size);
  579. }
  580. }
  581. yy_scan_buffer(buf, size TSRMLS_CC);
  582. zend_set_compiled_filename(filename TSRMLS_CC);
  583. CG(zend_lineno) = 1;
  584. CG(increment_lineno) = 0;
  585. return SUCCESS;
  586. }
  587. ZEND_API size_t zend_get_scanned_file_offset(TSRMLS_D)
  588. {
  589. size_t offset = SCNG(yy_cursor) - SCNG(yy_start);
  590. if (SCNG(input_filter)) {
  591. size_t original_offset = offset, length = 0;
  592. do {
  593. unsigned char *p = NULL;
  594. if ((size_t)-1 == SCNG(input_filter)(&p, &length, SCNG(script_org), offset TSRMLS_CC)) {
  595. return (size_t)-1;
  596. }
  597. efree(p);
  598. if (length > original_offset) {
  599. offset--;
  600. } else if (length < original_offset) {
  601. offset++;
  602. }
  603. } while (original_offset != length);
  604. }
  605. return offset;
  606. }
  607. zend_op_array *compile_string(zval *source_string, char *filename TSRMLS_DC)
  608. {
  609. zend_lex_state original_lex_state;
  610. zend_op_array *op_array = (zend_op_array *) emalloc(sizeof(zend_op_array));
  611. zend_op_array *original_active_op_array = CG(active_op_array);
  612. zend_op_array *retval;
  613. zval tmp;
  614. int compiler_result;
  615. zend_bool original_in_compilation = CG(in_compilation);
  616. if (source_string->value.str.len==0) {
  617. efree(op_array);
  618. return NULL;
  619. }
  620. CG(in_compilation) = 1;
  621. tmp = *source_string;
  622. zval_copy_ctor(&tmp);
  623. convert_to_string(&tmp);
  624. source_string = &tmp;
  625. zend_save_lexical_state(&original_lex_state TSRMLS_CC);
  626. if (zend_prepare_string_for_scanning(source_string, filename TSRMLS_CC)==FAILURE) {
  627. efree(op_array);
  628. retval = NULL;
  629. } else {
  630. zend_bool orig_interactive = CG(interactive);
  631. CG(interactive) = 0;
  632. init_op_array(op_array, ZEND_EVAL_CODE, INITIAL_OP_ARRAY_SIZE TSRMLS_CC);
  633. CG(interactive) = orig_interactive;
  634. CG(active_op_array) = op_array;
  635. zend_init_compiler_context(TSRMLS_C);
  636. BEGIN(ST_IN_SCRIPTING);
  637. compiler_result = zendparse(TSRMLS_C);
  638. if (SCNG(script_filtered)) {
  639. efree(SCNG(script_filtered));
  640. SCNG(script_filtered) = NULL;
  641. }
  642. if (compiler_result==1) {
  643. CG(active_op_array) = original_active_op_array;
  644. CG(unclean_shutdown)=1;
  645. retval = NULL;
  646. } else {
  647. zend_do_return(NULL, 0 TSRMLS_CC);
  648. CG(active_op_array) = original_active_op_array;
  649. pass_two(op_array TSRMLS_CC);
  650. zend_release_labels(TSRMLS_C);
  651. retval = op_array;
  652. }
  653. }
  654. zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
  655. zval_dtor(&tmp);
  656. CG(in_compilation) = original_in_compilation;
  657. return retval;
  658. }
  659. BEGIN_EXTERN_C()
  660. int highlight_file(char *filename, zend_syntax_highlighter_ini *syntax_highlighter_ini TSRMLS_DC)
  661. {
  662. zend_lex_state original_lex_state;
  663. zend_file_handle file_handle;
  664. file_handle.type = ZEND_HANDLE_FILENAME;
  665. file_handle.filename = filename;
  666. file_handle.free_filename = 0;
  667. file_handle.opened_path = NULL;
  668. zend_save_lexical_state(&original_lex_state TSRMLS_CC);
  669. if (open_file_for_scanning(&file_handle TSRMLS_CC)==FAILURE) {
  670. zend_message_dispatcher(ZMSG_FAILED_HIGHLIGHT_FOPEN, filename TSRMLS_CC);
  671. zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
  672. return FAILURE;
  673. }
  674. zend_highlight(syntax_highlighter_ini TSRMLS_CC);
  675. if (SCNG(script_filtered)) {
  676. efree(SCNG(script_filtered));
  677. SCNG(script_filtered) = NULL;
  678. }
  679. zend_destroy_file_handle(&file_handle TSRMLS_CC);
  680. zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
  681. return SUCCESS;
  682. }
  683. int highlight_string(zval *str, zend_syntax_highlighter_ini *syntax_highlighter_ini, char *str_name TSRMLS_DC)
  684. {
  685. zend_lex_state original_lex_state;
  686. zval tmp = *str;
  687. str = &tmp;
  688. zval_copy_ctor(str);
  689. zend_save_lexical_state(&original_lex_state TSRMLS_CC);
  690. if (zend_prepare_string_for_scanning(str, str_name TSRMLS_CC)==FAILURE) {
  691. zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
  692. return FAILURE;
  693. }
  694. BEGIN(INITIAL);
  695. zend_highlight(syntax_highlighter_ini TSRMLS_CC);
  696. if (SCNG(script_filtered)) {
  697. efree(SCNG(script_filtered));
  698. SCNG(script_filtered) = NULL;
  699. }
  700. zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
  701. zval_dtor(str);
  702. return SUCCESS;
  703. }
  704. ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, const zend_encoding *old_encoding TSRMLS_DC)
  705. {
  706. size_t length;
  707. unsigned char *new_yy_start;
  708. /* convert and set */
  709. if (!SCNG(input_filter)) {
  710. if (SCNG(script_filtered)) {
  711. efree(SCNG(script_filtered));
  712. SCNG(script_filtered) = NULL;
  713. }
  714. SCNG(script_filtered_size) = 0;
  715. length = SCNG(script_org_size);
  716. new_yy_start = SCNG(script_org);
  717. } else {
  718. if ((size_t)-1 == SCNG(input_filter)(&new_yy_start, &length, SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
  719. zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
  720. "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
  721. }
  722. SCNG(script_filtered) = new_yy_start;
  723. SCNG(script_filtered_size) = length;
  724. }
  725. SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start));
  726. SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start));
  727. SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start));
  728. SCNG(yy_limit) = new_yy_start + (SCNG(yy_limit) - SCNG(yy_start));
  729. SCNG(yy_start) = new_yy_start;
  730. }
  731. # define zend_copy_value(zendlval, yytext, yyleng) \
  732. if (SCNG(output_filter)) { \
  733. size_t sz = 0; \
  734. SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)yytext, (size_t)yyleng TSRMLS_CC); \
  735. zendlval->value.str.len = sz; \
  736. } else { \
  737. zendlval->value.str.val = (char *) estrndup(yytext, yyleng); \
  738. zendlval->value.str.len = yyleng; \
  739. }
  740. static void zend_scan_escape_string(zval *zendlval, char *str, int len, char quote_type TSRMLS_DC)
  741. {
  742. register char *s, *t;
  743. char *end;
  744. ZVAL_STRINGL(zendlval, str, len, 1);
  745. /* convert escape sequences */
  746. s = t = zendlval->value.str.val;
  747. end = s+zendlval->value.str.len;
  748. while (s<end) {
  749. if (*s=='\\') {
  750. s++;
  751. if (s >= end) {
  752. *t++ = '\\';
  753. break;
  754. }
  755. switch(*s) {
  756. case 'n':
  757. *t++ = '\n';
  758. zendlval->value.str.len--;
  759. break;
  760. case 'r':
  761. *t++ = '\r';
  762. zendlval->value.str.len--;
  763. break;
  764. case 't':
  765. *t++ = '\t';
  766. zendlval->value.str.len--;
  767. break;
  768. case 'f':
  769. *t++ = '\f';
  770. zendlval->value.str.len--;
  771. break;
  772. case 'v':
  773. *t++ = '\v';
  774. zendlval->value.str.len--;
  775. break;
  776. case '"':
  777. case '`':
  778. if (*s != quote_type) {
  779. *t++ = '\\';
  780. *t++ = *s;
  781. break;
  782. }
  783. case '\\':
  784. case '$':
  785. *t++ = *s;
  786. zendlval->value.str.len--;
  787. break;
  788. case 'x':
  789. case 'X':
  790. if (ZEND_IS_HEX(*(s+1))) {
  791. char hex_buf[3] = { 0, 0, 0 };
  792. zendlval->value.str.len--; /* for the 'x' */
  793. hex_buf[0] = *(++s);
  794. zendlval->value.str.len--;
  795. if (ZEND_IS_HEX(*(s+1))) {
  796. hex_buf[1] = *(++s);
  797. zendlval->value.str.len--;
  798. }
  799. *t++ = (char) strtol(hex_buf, NULL, 16);
  800. } else {
  801. *t++ = '\\';
  802. *t++ = *s;
  803. }
  804. break;
  805. default:
  806. /* check for an octal */
  807. if (ZEND_IS_OCT(*s)) {
  808. char octal_buf[4] = { 0, 0, 0, 0 };
  809. octal_buf[0] = *s;
  810. zendlval->value.str.len--;
  811. if (ZEND_IS_OCT(*(s+1))) {
  812. octal_buf[1] = *(++s);
  813. zendlval->value.str.len--;
  814. if (ZEND_IS_OCT(*(s+1))) {
  815. octal_buf[2] = *(++s);
  816. zendlval->value.str.len--;
  817. }
  818. }
  819. *t++ = (char) strtol(octal_buf, NULL, 8);
  820. } else {
  821. *t++ = '\\';
  822. *t++ = *s;
  823. }
  824. break;
  825. }
  826. } else {
  827. *t++ = *s;
  828. }
  829. if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
  830. CG(zend_lineno)++;
  831. }
  832. s++;
  833. }
  834. *t = 0;
  835. if (SCNG(output_filter)) {
  836. size_t sz = 0;
  837. s = zendlval->value.str.val;
  838. SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)s, (size_t)zendlval->value.str.len TSRMLS_CC);
  839. zendlval->value.str.len = sz;
  840. efree(s);
  841. }
  842. }
  843. int lex_scan(zval *zendlval TSRMLS_DC)
  844. {
  845. restart:
  846. SCNG(yy_text) = YYCURSOR;
  847. yymore_restart:
  848. /*!re2c
  849. re2c:yyfill:check = 0;
  850. LNUM [0-9]+
  851. DNUM ([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*)
  852. EXPONENT_DNUM (({LNUM}|{DNUM})[eE][+-]?{LNUM})
  853. HNUM "0x"[0-9a-fA-F]+
  854. BNUM "0b"[01]+
  855. LABEL [a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*
  856. WHITESPACE [ \n\r\t]+
  857. TABS_AND_SPACES [ \t]*
  858. TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@]
  859. ANY_CHAR [^]
  860. NEWLINE ("\r"|"\n"|"\r\n")
  861. /* compute yyleng before each rule */
  862. <!*> := yyleng = YYCURSOR - SCNG(yy_text);
  863. <ST_IN_SCRIPTING>"exit" {
  864. return T_EXIT;
  865. }
  866. <ST_IN_SCRIPTING>"die" {
  867. return T_EXIT;
  868. }
  869. <ST_IN_SCRIPTING>"function" {
  870. return T_FUNCTION;
  871. }
  872. <ST_IN_SCRIPTING>"const" {
  873. return T_CONST;
  874. }
  875. <ST_IN_SCRIPTING>"return" {
  876. return T_RETURN;
  877. }
  878. <ST_IN_SCRIPTING>"try" {
  879. return T_TRY;
  880. }
  881. <ST_IN_SCRIPTING>"catch" {
  882. return T_CATCH;
  883. }
  884. <ST_IN_SCRIPTING>"throw" {
  885. return T_THROW;
  886. }
  887. <ST_IN_SCRIPTING>"if" {
  888. return T_IF;
  889. }
  890. <ST_IN_SCRIPTING>"elseif" {
  891. return T_ELSEIF;
  892. }
  893. <ST_IN_SCRIPTING>"endif" {
  894. return T_ENDIF;
  895. }
  896. <ST_IN_SCRIPTING>"else" {
  897. return T_ELSE;
  898. }
  899. <ST_IN_SCRIPTING>"while" {
  900. return T_WHILE;
  901. }
  902. <ST_IN_SCRIPTING>"endwhile" {
  903. return T_ENDWHILE;
  904. }
  905. <ST_IN_SCRIPTING>"do" {
  906. return T_DO;
  907. }
  908. <ST_IN_SCRIPTING>"for" {
  909. return T_FOR;
  910. }
  911. <ST_IN_SCRIPTING>"endfor" {
  912. return T_ENDFOR;
  913. }
  914. <ST_IN_SCRIPTING>"foreach" {
  915. return T_FOREACH;
  916. }
  917. <ST_IN_SCRIPTING>"endforeach" {
  918. return T_ENDFOREACH;
  919. }
  920. <ST_IN_SCRIPTING>"declare" {
  921. return T_DECLARE;
  922. }
  923. <ST_IN_SCRIPTING>"enddeclare" {
  924. return T_ENDDECLARE;
  925. }
  926. <ST_IN_SCRIPTING>"instanceof" {
  927. return T_INSTANCEOF;
  928. }
  929. <ST_IN_SCRIPTING>"as" {
  930. return T_AS;
  931. }
  932. <ST_IN_SCRIPTING>"switch" {
  933. return T_SWITCH;
  934. }
  935. <ST_IN_SCRIPTING>"endswitch" {
  936. return T_ENDSWITCH;
  937. }
  938. <ST_IN_SCRIPTING>"case" {
  939. return T_CASE;
  940. }
  941. <ST_IN_SCRIPTING>"default" {
  942. return T_DEFAULT;
  943. }
  944. <ST_IN_SCRIPTING>"break" {
  945. return T_BREAK;
  946. }
  947. <ST_IN_SCRIPTING>"continue" {
  948. return T_CONTINUE;
  949. }
  950. <ST_IN_SCRIPTING>"goto" {
  951. return T_GOTO;
  952. }
  953. <ST_IN_SCRIPTING>"echo" {
  954. return T_ECHO;
  955. }
  956. <ST_IN_SCRIPTING>"print" {
  957. return T_PRINT;
  958. }
  959. <ST_IN_SCRIPTING>"class" {
  960. return T_CLASS;
  961. }
  962. <ST_IN_SCRIPTING>"interface" {
  963. return T_INTERFACE;
  964. }
  965. <ST_IN_SCRIPTING>"trait" {
  966. return T_TRAIT;
  967. }
  968. <ST_IN_SCRIPTING>"extends" {
  969. return T_EXTENDS;
  970. }
  971. <ST_IN_SCRIPTING>"implements" {
  972. return T_IMPLEMENTS;
  973. }
  974. <ST_IN_SCRIPTING>"->" {
  975. yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC);
  976. return T_OBJECT_OPERATOR;
  977. }
  978. <ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY>{WHITESPACE}+ {
  979. zendlval->value.str.val = yytext; /* no copying - intentional */
  980. zendlval->value.str.len = yyleng;
  981. zendlval->type = IS_STRING;
  982. HANDLE_NEWLINES(yytext, yyleng);
  983. return T_WHITESPACE;
  984. }
  985. <ST_LOOKING_FOR_PROPERTY>"->" {
  986. return T_OBJECT_OPERATOR;
  987. }
  988. <ST_LOOKING_FOR_PROPERTY>{LABEL} {
  989. yy_pop_state(TSRMLS_C);
  990. zend_copy_value(zendlval, yytext, yyleng);
  991. zendlval->type = IS_STRING;
  992. return T_STRING;
  993. }
  994. <ST_LOOKING_FOR_PROPERTY>{ANY_CHAR} {
  995. yyless(0);
  996. yy_pop_state(TSRMLS_C);
  997. goto restart;
  998. }
  999. <ST_IN_SCRIPTING>"::" {
  1000. return T_PAAMAYIM_NEKUDOTAYIM;
  1001. }
  1002. <ST_IN_SCRIPTING>"\\" {
  1003. return T_NS_SEPARATOR;
  1004. }
  1005. <ST_IN_SCRIPTING>"new" {
  1006. return T_NEW;
  1007. }
  1008. <ST_IN_SCRIPTING>"clone" {
  1009. return T_CLONE;
  1010. }
  1011. <ST_IN_SCRIPTING>"var" {
  1012. return T_VAR;
  1013. }
  1014. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" {
  1015. return T_INT_CAST;
  1016. }
  1017. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("real"|"double"|"float"){TABS_AND_SPACES}")" {
  1018. return T_DOUBLE_CAST;
  1019. }
  1020. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("string"|"binary"){TABS_AND_SPACES}")" {
  1021. return T_STRING_CAST;
  1022. }
  1023. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"array"{TABS_AND_SPACES}")" {
  1024. return T_ARRAY_CAST;
  1025. }
  1026. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"object"{TABS_AND_SPACES}")" {
  1027. return T_OBJECT_CAST;
  1028. }
  1029. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("bool"|"boolean"){TABS_AND_SPACES}")" {
  1030. return T_BOOL_CAST;
  1031. }
  1032. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("unset"){TABS_AND_SPACES}")" {
  1033. return T_UNSET_CAST;
  1034. }
  1035. <ST_IN_SCRIPTING>"eval" {
  1036. return T_EVAL;
  1037. }
  1038. <ST_IN_SCRIPTING>"include" {
  1039. return T_INCLUDE;
  1040. }
  1041. <ST_IN_SCRIPTING>"include_once" {
  1042. return T_INCLUDE_ONCE;
  1043. }
  1044. <ST_IN_SCRIPTING>"require" {
  1045. return T_REQUIRE;
  1046. }
  1047. <ST_IN_SCRIPTING>"require_once" {
  1048. return T_REQUIRE_ONCE;
  1049. }
  1050. <ST_IN_SCRIPTING>"namespace" {
  1051. return T_NAMESPACE;
  1052. }
  1053. <ST_IN_SCRIPTING>"use" {
  1054. return T_USE;
  1055. }
  1056. <ST_IN_SCRIPTING>"insteadof" {
  1057. return T_INSTEADOF;
  1058. }
  1059. <ST_IN_SCRIPTING>"global" {
  1060. return T_GLOBAL;
  1061. }
  1062. <ST_IN_SCRIPTING>"isset" {
  1063. return T_ISSET;
  1064. }
  1065. <ST_IN_SCRIPTING>"empty" {
  1066. return T_EMPTY;
  1067. }
  1068. <ST_IN_SCRIPTING>"__halt_compiler" {
  1069. return T_HALT_COMPILER;
  1070. }
  1071. <ST_IN_SCRIPTING>"static" {
  1072. return T_STATIC;
  1073. }
  1074. <ST_IN_SCRIPTING>"abstract" {
  1075. return T_ABSTRACT;
  1076. }
  1077. <ST_IN_SCRIPTING>"final" {
  1078. return T_FINAL;
  1079. }
  1080. <ST_IN_SCRIPTING>"private" {
  1081. return T_PRIVATE;
  1082. }
  1083. <ST_IN_SCRIPTING>"protected" {
  1084. return T_PROTECTED;
  1085. }
  1086. <ST_IN_SCRIPTING>"public" {
  1087. return T_PUBLIC;
  1088. }
  1089. <ST_IN_SCRIPTING>"unset" {
  1090. return T_UNSET;
  1091. }
  1092. <ST_IN_SCRIPTING>"=>" {
  1093. return T_DOUBLE_ARROW;
  1094. }
  1095. <ST_IN_SCRIPTING>"list" {
  1096. return T_LIST;
  1097. }
  1098. <ST_IN_SCRIPTING>"array" {
  1099. return T_ARRAY;
  1100. }
  1101. <ST_IN_SCRIPTING>"callable" {
  1102. return T_CALLABLE;
  1103. }
  1104. <ST_IN_SCRIPTING>"++" {
  1105. return T_INC;
  1106. }
  1107. <ST_IN_SCRIPTING>"--" {
  1108. return T_DEC;
  1109. }
  1110. <ST_IN_SCRIPTING>"===" {
  1111. return T_IS_IDENTICAL;
  1112. }
  1113. <ST_IN_SCRIPTING>"!==" {
  1114. return T_IS_NOT_IDENTICAL;
  1115. }
  1116. <ST_IN_SCRIPTING>"==" {
  1117. return T_IS_EQUAL;
  1118. }
  1119. <ST_IN_SCRIPTING>"!="|"<>" {
  1120. return T_IS_NOT_EQUAL;
  1121. }
  1122. <ST_IN_SCRIPTING>"<=" {
  1123. return T_IS_SMALLER_OR_EQUAL;
  1124. }
  1125. <ST_IN_SCRIPTING>">=" {
  1126. return T_IS_GREATER_OR_EQUAL;
  1127. }
  1128. <ST_IN_SCRIPTING>"+=" {
  1129. return T_PLUS_EQUAL;
  1130. }
  1131. <ST_IN_SCRIPTING>"-=" {
  1132. return T_MINUS_EQUAL;
  1133. }
  1134. <ST_IN_SCRIPTING>"*=" {
  1135. return T_MUL_EQUAL;
  1136. }
  1137. <ST_IN_SCRIPTING>"/=" {
  1138. return T_DIV_EQUAL;
  1139. }
  1140. <ST_IN_SCRIPTING>".=" {
  1141. return T_CONCAT_EQUAL;
  1142. }
  1143. <ST_IN_SCRIPTING>"%=" {
  1144. return T_MOD_EQUAL;
  1145. }
  1146. <ST_IN_SCRIPTING>"<<=" {
  1147. return T_SL_EQUAL;
  1148. }
  1149. <ST_IN_SCRIPTING>">>=" {
  1150. return T_SR_EQUAL;
  1151. }
  1152. <ST_IN_SCRIPTING>"&=" {
  1153. return T_AND_EQUAL;
  1154. }
  1155. <ST_IN_SCRIPTING>"|=" {
  1156. return T_OR_EQUAL;
  1157. }
  1158. <ST_IN_SCRIPTING>"^=" {
  1159. return T_XOR_EQUAL;
  1160. }
  1161. <ST_IN_SCRIPTING>"||" {
  1162. return T_BOOLEAN_OR;
  1163. }
  1164. <ST_IN_SCRIPTING>"&&" {
  1165. return T_BOOLEAN_AND;
  1166. }
  1167. <ST_IN_SCRIPTING>"OR" {
  1168. return T_LOGICAL_OR;
  1169. }
  1170. <ST_IN_SCRIPTING>"AND" {
  1171. return T_LOGICAL_AND;
  1172. }
  1173. <ST_IN_SCRIPTING>"XOR" {
  1174. return T_LOGICAL_XOR;
  1175. }
  1176. <ST_IN_SCRIPTING>"<<" {
  1177. return T_SL;
  1178. }
  1179. <ST_IN_SCRIPTING>">>" {
  1180. return T_SR;
  1181. }
  1182. <ST_IN_SCRIPTING>{TOKENS} {
  1183. return yytext[0];
  1184. }
  1185. <ST_IN_SCRIPTING>"{" {
  1186. yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
  1187. return '{';
  1188. }
  1189. <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
  1190. yy_push_state(ST_LOOKING_FOR_VARNAME TSRMLS_CC);
  1191. return T_DOLLAR_OPEN_CURLY_BRACES;
  1192. }
  1193. <ST_IN_SCRIPTING>"}" {
  1194. RESET_DOC_COMMENT();
  1195. if (!zend_stack_is_empty(&SCNG(state_stack))) {
  1196. yy_pop_state(TSRMLS_C);
  1197. }
  1198. return '}';
  1199. }
  1200. <ST_LOOKING_FOR_VARNAME>{LABEL} {
  1201. zend_copy_value(zendlval, yytext, yyleng);
  1202. zendlval->type = IS_STRING;
  1203. yy_pop_state(TSRMLS_C);
  1204. yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
  1205. return T_STRING_VARNAME;
  1206. }
  1207. <ST_LOOKING_FOR_VARNAME>{ANY_CHAR} {
  1208. yyless(0);
  1209. yy_pop_state(TSRMLS_C);
  1210. yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
  1211. goto restart;
  1212. }
  1213. <ST_IN_SCRIPTING>{BNUM} {
  1214. char *bin = yytext + 2; /* Skip "0b" */
  1215. int len = yyleng - 2;
  1216. /* Skip any leading 0s */
  1217. while (*bin == '0') {
  1218. ++bin;
  1219. --len;
  1220. }
  1221. if (len < SIZEOF_LONG * 8) {
  1222. zendlval->value.lval = strtol(bin, NULL, 2);
  1223. zendlval->type = IS_LONG;
  1224. return T_LNUMBER;
  1225. } else {
  1226. zendlval->value.dval = zend_bin_strtod(bin, NULL);
  1227. zendlval->type = IS_DOUBLE;
  1228. return T_DNUMBER;
  1229. }
  1230. }
  1231. <ST_IN_SCRIPTING>{LNUM} {
  1232. if (yyleng < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
  1233. zendlval->value.lval = strtol(yytext, NULL, 0);
  1234. } else {
  1235. errno = 0;
  1236. zendlval->value.lval = strtol(yytext, NULL, 0);
  1237. if (errno == ERANGE) { /* Overflow */
  1238. if (yytext[0] == '0') { /* octal overflow */
  1239. zendlval->value.dval = zend_oct_strtod(yytext, NULL);
  1240. } else {
  1241. zendlval->value.dval = zend_strtod(yytext, NULL);
  1242. }
  1243. zendlval->type = IS_DOUBLE;
  1244. return T_DNUMBER;
  1245. }
  1246. }
  1247. zendlval->type = IS_LONG;
  1248. return T_LNUMBER;
  1249. }
  1250. <ST_IN_SCRIPTING>{HNUM} {
  1251. char *hex = yytext + 2; /* Skip "0x" */
  1252. int len = yyleng - 2;
  1253. /* Skip any leading 0s */
  1254. while (*hex == '0') {
  1255. hex++;
  1256. len--;
  1257. }
  1258. if (len < SIZEOF_LONG * 2 || (len == SIZEOF_LONG * 2 && *hex <= '7')) {
  1259. zendlval->value.lval = strtol(hex, NULL, 16);
  1260. zendlval->type = IS_LONG;
  1261. return T_LNUMBER;
  1262. } else {
  1263. zendlval->value.dval = zend_hex_strtod(hex, NULL);
  1264. zendlval->type = IS_DOUBLE;
  1265. return T_DNUMBER;
  1266. }
  1267. }
  1268. <ST_VAR_OFFSET>[0]|([1-9][0-9]*) { /* Offset could be treated as a long */
  1269. if (yyleng < MAX_LENGTH_OF_LONG - 1 || (yyleng == MAX_LENGTH_OF_LONG - 1 && strcmp(yytext, long_min_digits) < 0)) {
  1270. zendlval->value.lval = strtol(yytext, NULL, 10);
  1271. zendlval->type = IS_LONG;
  1272. } else {
  1273. zendlval->value.str.val = (char *)estrndup(yytext, yyleng);
  1274. zendlval->value.str.len = yyleng;
  1275. zendlval->type = IS_STRING;
  1276. }
  1277. return T_NUM_STRING;
  1278. }
  1279. <ST_VAR_OFFSET>{LNUM}|{HNUM}|{BNUM} { /* Offset must be treated as a string */
  1280. zendlval->value.str.val = (char *)estrndup(yytext, yyleng);
  1281. zendlval->value.str.len = yyleng;
  1282. zendlval->type = IS_STRING;
  1283. return T_NUM_STRING;
  1284. }
  1285. <ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} {
  1286. zendlval->value.dval = zend_strtod(yytext, NULL);
  1287. zendlval->type = IS_DOUBLE;
  1288. return T_DNUMBER;
  1289. }
  1290. <ST_IN_SCRIPTING>"__CLASS__" {
  1291. const char *class_name = NULL;
  1292. if (CG(active_class_entry)
  1293. && (ZEND_ACC_TRAIT ==
  1294. (CG(active_class_entry)->ce_flags & ZEND_ACC_TRAIT))) {
  1295. // This is a hack, we abuse IS_NULL to indicate an invalid value
  1296. // if __CLASS__ is encountered in a trait, however, we also not that we
  1297. // should fix it up when we copy the method into an actual class
  1298. zendlval->value.lval = ZEND_ACC_TRAIT;
  1299. zendlval->type = IS_NULL;
  1300. } else {
  1301. if (CG(active_class_entry)) {
  1302. class_name = CG(active_class_entry)->name;
  1303. }
  1304. if (!class_name) {
  1305. class_name = "";
  1306. }
  1307. zendlval->value.str.len = strlen(class_name);
  1308. zendlval->value.str.val = estrndup(class_name, zendlval->value.str.len);
  1309. zendlval->type = IS_STRING;
  1310. }
  1311. return T_CLASS_C;
  1312. }
  1313. <ST_IN_SCRIPTING>"__TRAIT__" {
  1314. const char *trait_name = NULL;
  1315. if (CG(active_class_entry)
  1316. && (ZEND_ACC_TRAIT ==
  1317. (CG(active_class_entry)->ce_flags & ZEND_ACC_TRAIT))) {
  1318. trait_name = CG(active_class_entry)->name;
  1319. }
  1320. if (!trait_name) {
  1321. trait_name = "";
  1322. }
  1323. zendlval->value.str.len = strlen(trait_name);
  1324. zendlval->value.str.val = estrndup(trait_name, zendlval->value.str.len);
  1325. zendlval->type = IS_STRING;
  1326. return T_TRAIT_C;
  1327. }
  1328. <ST_IN_SCRIPTING>"__FUNCTION__" {
  1329. const char *func_name = NULL;
  1330. if (CG(active_op_array)) {
  1331. func_name = CG(active_op_array)->function_name;
  1332. }
  1333. if (!func_name) {
  1334. func_name = "";
  1335. }
  1336. zendlval->value.str.len = strlen(func_name);
  1337. zendlval->value.str.val = estrndup(func_name, zendlval->value.str.len);
  1338. zendlval->type = IS_STRING;
  1339. return T_FUNC_C;
  1340. }
  1341. <ST_IN_SCRIPTING>"__METHOD__" {
  1342. const char *class_name = CG(active_class_entry) ? CG(active_class_entry)->name : NULL;
  1343. const char *func_name = CG(active_op_array)? CG(active_op_array)->function_name : NULL;
  1344. size_t len = 0;
  1345. if (class_name) {
  1346. len += strlen(class_name) + 2;
  1347. }
  1348. if (func_name) {
  1349. len += strlen(func_name);
  1350. }
  1351. zendlval->value.str.len = zend_spprintf(&zendlval->value.str.val, 0, "%s%s%s",
  1352. class_name ? class_name : "",
  1353. class_name && func_name ? "::" : "",
  1354. func_name ? func_name : ""
  1355. );
  1356. zendlval->type = IS_STRING;
  1357. return T_METHOD_C;
  1358. }
  1359. <ST_IN_SCRIPTING>"__LINE__" {
  1360. zendlval->value.lval = CG(zend_lineno);
  1361. zendlval->type = IS_LONG;
  1362. return T_LINE;
  1363. }
  1364. <ST_IN_SCRIPTING>"__FILE__" {
  1365. char *filename = zend_get_compiled_filename(TSRMLS_C);
  1366. if (!filename) {
  1367. filename = "";
  1368. }
  1369. zendlval->value.str.len = strlen(filename);
  1370. zendlval->value.str.val = estrndup(filename, zendlval->value.str.len);
  1371. zendlval->type = IS_STRING;
  1372. return T_FILE;
  1373. }
  1374. <ST_IN_SCRIPTING>"__DIR__" {
  1375. char *filename = zend_get_compiled_filename(TSRMLS_C);
  1376. const size_t filename_len = strlen(filename);
  1377. char *dirname;
  1378. if (!filename) {
  1379. filename = "";
  1380. }
  1381. dirname = estrndup(filename, filename_len);
  1382. zend_dirname(dirname, filename_len);
  1383. if (strcmp(dirname, ".") == 0) {
  1384. dirname = erealloc(dirname, MAXPATHLEN);
  1385. #if HAVE_GETCWD
  1386. VCWD_GETCWD(dirname, MAXPATHLEN);
  1387. #elif HAVE_GETWD
  1388. VCWD_GETWD(dirname);
  1389. #endif
  1390. }
  1391. zendlval->value.str.len = strlen(dirname);
  1392. zendlval->value.str.val = dirname;
  1393. zendlval->type = IS_STRING;
  1394. return T_DIR;
  1395. }
  1396. <ST_IN_SCRIPTING>"__NAMESPACE__" {
  1397. if (CG(current_namespace)) {
  1398. *zendlval = *CG(current_namespace);
  1399. zval_copy_ctor(zendlval);
  1400. } else {
  1401. ZVAL_EMPTY_STRING(zendlval);
  1402. }
  1403. return T_NS_C;
  1404. }
  1405. <INITIAL>"<script"{WHITESPACE}+"language"{WHITESPACE}*"="{WHITESPACE}*("php"|"\"php\""|"'php'"){WHITESPACE}*">" {
  1406. YYCTYPE *bracket = (YYCTYPE*)zend_memrchr(yytext, '<', yyleng - (sizeof("script language=php>") - 1));
  1407. if (bracket != SCNG(yy_text)) {
  1408. /* Handle previously scanned HTML, as possible <script> tags found are assumed to not be PHP's */
  1409. YYCURSOR = bracket;
  1410. goto inline_html;
  1411. }
  1412. HANDLE_NEWLINES(yytext, yyleng);
  1413. zendlval->value.str.val = yytext; /* no copying - intentional */
  1414. zendlval->value.str.len = yyleng;
  1415. zendlval->type = IS_STRING;
  1416. BEGIN(ST_IN_SCRIPTING);
  1417. return T_OPEN_TAG;
  1418. }
  1419. <INITIAL>"<%=" {
  1420. if (CG(asp_tags)) {
  1421. zendlval->value.str.val = yytext; /* no copying - intentional */
  1422. zendlval->value.str.len = yyleng;
  1423. zendlval->type = IS_STRING;
  1424. BEGIN(ST_IN_SCRIPTING);
  1425. return T_OPEN_TAG_WITH_ECHO;
  1426. } else {
  1427. goto inline_char_handler;
  1428. }
  1429. }
  1430. <INITIAL>"<?=" {
  1431. zendlval->value.str.val = yytext; /* no copying - intentional */
  1432. zendlval->value.str.len = yyleng;
  1433. zendlval->type = IS_STRING;
  1434. BEGIN(ST_IN_SCRIPTING);
  1435. return T_OPEN_TAG_WITH_ECHO;
  1436. }
  1437. <INITIAL>"<%" {
  1438. if (CG(asp_tags)) {
  1439. zendlval->value.str.val = yytext; /* no copying - intentional */
  1440. zendlval->value.str.len = yyleng;
  1441. zendlval->type = IS_STRING;
  1442. BEGIN(ST_IN_SCRIPTING);
  1443. return T_OPEN_TAG;
  1444. } else {
  1445. goto inline_char_handler;
  1446. }
  1447. }
  1448. <INITIAL>"<?php"([ \t]|{NEWLINE}) {
  1449. zendlval->value.str.val = yytext; /* no copying - intentional */
  1450. zendlval->value.str.len = yyleng;
  1451. zendlval->type = IS_STRING;
  1452. HANDLE_NEWLINE(yytext[yyleng-1]);
  1453. BEGIN(ST_IN_SCRIPTING);
  1454. return T_OPEN_TAG;
  1455. }
  1456. <INITIAL>"<?" {
  1457. if (CG(short_tags)) {
  1458. zendlval->value.str.val = yytext; /* no copying - intentional */
  1459. zendlval->value.str.len = yyleng;
  1460. zendlval->type = IS_STRING;
  1461. BEGIN(ST_IN_SCRIPTING);
  1462. return T_OPEN_TAG;
  1463. } else {
  1464. goto inline_char_handler;
  1465. }
  1466. }
  1467. <INITIAL>{ANY_CHAR} {
  1468. if (YYCURSOR > YYLIMIT) {
  1469. return 0;
  1470. }
  1471. inline_char_handler:
  1472. while (1) {
  1473. YYCTYPE *ptr = memchr(YYCURSOR, '<', YYLIMIT - YYCURSOR);
  1474. YYCURSOR = ptr ? ptr + 1 : YYLIMIT;
  1475. if (YYCURSOR < YYLIMIT) {
  1476. switch (*YYCURSOR) {
  1477. case '?':
  1478. if (CG(short_tags) || !strncasecmp((char*)YYCURSOR + 1, "php", 3) || (*(YYCURSOR + 1) == '=')) { /* Assume [ \t\n\r] follows "php" */
  1479. break;
  1480. }
  1481. continue;
  1482. case '%':
  1483. if (CG(asp_tags)) {
  1484. break;
  1485. }
  1486. continue;
  1487. case 's':
  1488. case 'S':
  1489. /* Probably NOT an opening PHP <script> tag, so don't end the HTML chunk yet
  1490. * If it is, the PHP <script> tag rule checks for any HTML scanned before it */
  1491. YYCURSOR--;
  1492. yymore();
  1493. default:
  1494. continue;
  1495. }
  1496. YYCURSOR--;
  1497. }
  1498. break;
  1499. }
  1500. inline_html:
  1501. yyleng = YYCURSOR - SCNG(yy_text);
  1502. if (SCNG(output_filter)) {
  1503. int readsize;
  1504. size_t sz = 0;
  1505. readsize = SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)yytext, (size_t)yyleng TSRMLS_CC);
  1506. zendlval->value.str.len = sz;
  1507. if (readsize < yyleng) {
  1508. yyless(readsize);
  1509. }
  1510. } else {
  1511. zendlval->value.str.val = (char *) estrndup(yytext, yyleng);
  1512. zendlval->value.str.len = yyleng;
  1513. }
  1514. zendlval->type = IS_STRING;
  1515. HANDLE_NEWLINES(yytext, yyleng);
  1516. return T_INLINE_HTML;
  1517. }
  1518. /* Make sure a label character follows "->", otherwise there is no property
  1519. * and "->" will be taken literally
  1520. */
  1521. <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"->"[a-zA-Z_\x7f-\xff] {
  1522. yyless(yyleng - 3);
  1523. yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC);
  1524. zend_copy_value(zendlval, (yytext+1), (yyleng-1));
  1525. zendlval->type = IS_STRING;
  1526. return T_VARIABLE;
  1527. }
  1528. /* A [ always designates a variable offset, regardless of what follows
  1529. */
  1530. <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"[" {
  1531. yyless(yyleng - 1);
  1532. yy_push_state(ST_VAR_OFFSET TSRMLS_CC);
  1533. zend_copy_value(zendlval, (yytext+1), (yyleng-1));
  1534. zendlval->type = IS_STRING;
  1535. return T_VARIABLE;
  1536. }
  1537. <ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE,ST_VAR_OFFSET>"$"{LABEL} {
  1538. zend_copy_value(zendlval, (yytext+1), (yyleng-1));
  1539. zendlval->type = IS_STRING;
  1540. return T_VARIABLE;
  1541. }
  1542. <ST_VAR_OFFSET>"]" {
  1543. yy_pop_state(TSRMLS_C);
  1544. return ']';
  1545. }
  1546. <ST_VAR_OFFSET>{TOKENS}|[{}"`] {
  1547. /* Only '[' can be valid, but returning other tokens will allow a more explicit parse error */
  1548. return yytext[0];
  1549. }
  1550. <ST_VAR_OFFSET>[ \n\r\t\\'#] {
  1551. /* Invalid rule to return a more explicit parse error with proper line number */
  1552. yyless(0);
  1553. yy_pop_state(TSRMLS_C);
  1554. return T_ENCAPSED_AND_WHITESPACE;
  1555. }
  1556. <ST_IN_SCRIPTING,ST_VAR_OFFSET>{LABEL} {
  1557. zend_copy_value(zendlval, yytext, yyleng);
  1558. zendlval->type = IS_STRING;
  1559. return T_STRING;
  1560. }
  1561. <ST_IN_SCRIPTING>"#"|"//" {
  1562. while (YYCURSOR < YYLIMIT) {
  1563. switch (*YYCURSOR++) {
  1564. case '\r':
  1565. if (*YYCURSOR == '\n') {
  1566. YYCURSOR++;
  1567. }
  1568. /* fall through */
  1569. case '\n':
  1570. CG(zend_lineno)++;
  1571. break;
  1572. case '%':
  1573. if (!CG(asp_tags)) {
  1574. continue;
  1575. }
  1576. /* fall through */
  1577. case '?':
  1578. if (*YYCURSOR == '>') {
  1579. YYCURSOR--;
  1580. break;
  1581. }
  1582. /* fall through */
  1583. default:
  1584. continue;
  1585. }
  1586. break;
  1587. }
  1588. yyleng = YYCURSOR - SCNG(yy_text);
  1589. return T_COMMENT;
  1590. }
  1591. <ST_IN_SCRIPTING>"/*"|"/**"{WHITESPACE} {
  1592. int doc_com;
  1593. if (yyleng > 2) {
  1594. doc_com = 1;
  1595. RESET_DOC_COMMENT();
  1596. } else {
  1597. doc_com = 0;
  1598. }
  1599. while (YYCURSOR < YYLIMIT) {
  1600. if (*YYCURSOR++ == '*' && *YYCURSOR == '/') {
  1601. break;
  1602. }
  1603. }
  1604. if (YYCURSOR < YYLIMIT) {
  1605. YYCURSOR++;
  1606. } else {
  1607. zend_error(E_COMPILE_WARNING, "Unterminated comment starting line %d", CG(zend_lineno));
  1608. }
  1609. yyleng = YYCURSOR - SCNG(yy_text);
  1610. HANDLE_NEWLINES(yytext, yyleng);
  1611. if (doc_com) {
  1612. CG(doc_comment) = estrndup(yytext, yyleng);
  1613. CG(doc_comment_len) = yyleng;
  1614. return T_DOC_COMMENT;
  1615. }
  1616. return T_COMMENT;
  1617. }
  1618. <ST_IN_SCRIPTING>("?>"|"</script"{WHITESPACE}*">"){NEWLINE}? {
  1619. zendlval->value.str.val = yytext; /* no copying - intentional */
  1620. zendlval->value.str.len = yyleng;
  1621. zendlval->type = IS_STRING;
  1622. BEGIN(INITIAL);
  1623. return T_CLOSE_TAG; /* implicit ';' at php-end tag */
  1624. }
  1625. <ST_IN_SCRIPTING>"%>"{NEWLINE}? {
  1626. if (CG(asp_tags)) {
  1627. BEGIN(INITIAL);
  1628. zendlval->value.str.len = yyleng;
  1629. zendlval->type = IS_STRING;
  1630. zendlval->value.str.val = yytext; /* no copying - intentional */
  1631. return T_CLOSE_TAG; /* implicit ';' at php-end tag */
  1632. } else {
  1633. yyless(1);
  1634. return yytext[0];
  1635. }
  1636. }
  1637. <ST_IN_SCRIPTING>b?['] {
  1638. register char *s, *t;
  1639. char *end;
  1640. int bprefix = (yytext[0] != '\'') ? 1 : 0;
  1641. while (1) {
  1642. if (YYCURSOR < YYLIMIT) {
  1643. if (*YYCURSOR == '\'') {
  1644. YYCURSOR++;
  1645. yyleng = YYCURSOR - SCNG(yy_text);
  1646. break;
  1647. } else if (*YYCURSOR++ == '\\' && YYCURSOR < YYLIMIT) {
  1648. YYCURSOR++;
  1649. }
  1650. } else {
  1651. yyleng = YYLIMIT - SCNG(yy_text);
  1652. /* Unclosed single quotes; treat similar to double quotes, but without a separate token
  1653. * for ' (unrecognized by parser), instead of old flex fallback to "Unexpected character..."
  1654. * rule, which continued in ST_IN_SCRIPTING state after the quote */
  1655. return T_ENCAPSED_AND_WHITESPACE;
  1656. }
  1657. }
  1658. zendlval->value.str.val = estrndup(yytext+bprefix+1, yyleng-bprefix-2);
  1659. zendlval->value.str.len = yyleng-bprefix-2;
  1660. zendlval->type = IS_STRING;
  1661. /* convert escape sequences */
  1662. s = t = zendlval->value.str.val;
  1663. end = s+zendlval->value.str.len;
  1664. while (s<end) {
  1665. if (*s=='\\') {
  1666. s++;
  1667. switch(*s) {
  1668. case '\\':
  1669. case '\'':
  1670. *t++ = *s;
  1671. zendlval->value.str.len--;
  1672. break;
  1673. default:
  1674. *t++ = '\\';
  1675. *t++ = *s;
  1676. break;
  1677. }
  1678. } else {
  1679. *t++ = *s;
  1680. }
  1681. if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
  1682. CG(zend_lineno)++;
  1683. }
  1684. s++;
  1685. }
  1686. *t = 0;
  1687. if (SCNG(output_filter)) {
  1688. size_t sz = 0;
  1689. s = zendlval->value.str.val;
  1690. SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz,

Large files files are truncated, but you can click here to view the full file