PageRenderTime 58ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 0ms

/Zend/zend_language_scanner.l

http://github.com/php/php-src
LEX | 3038 lines | 2370 code | 502 blank | 166 comment | 0 complexity | 66cd4909e5ed6918ba506c45e3f1ebb8 MD5 | raw file
Possible License(s): BSD-2-Clause, BSD-3-Clause, MPL-2.0-no-copyleft-exception, LGPL-2.1

Large files files are truncated, but you can click here to view the full file

  1. /*
  2. +----------------------------------------------------------------------+
  3. | Zend Engine |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) Zend Technologies Ltd. (http://www.zend.com) |
  6. +----------------------------------------------------------------------+
  7. | This source file is subject to version 2.00 of the Zend license, |
  8. | that is bundled with this package in the file LICENSE, and is |
  9. | available through the world-wide-web at the following url: |
  10. | http://www.zend.com/license/2_00.txt. |
  11. | If you did not receive a copy of the Zend license and are unable to |
  12. | obtain it through the world-wide-web, please send a note to |
  13. | license@zend.com so we can mail you a copy immediately. |
  14. +----------------------------------------------------------------------+
  15. | Authors: Marcus Boerger <helly@php.net> |
  16. | Nuno Lopes <nlopess@php.net> |
  17. | Scott MacVicar <scottmac@php.net> |
  18. | Flex version authors: |
  19. | Andi Gutmans <andi@php.net> |
  20. | Zeev Suraski <zeev@php.net> |
  21. +----------------------------------------------------------------------+
  22. */
  23. #if 0
  24. # define YYDEBUG(s, c) printf("state: %d char: %c\n", s, c)
  25. #else
  26. # define YYDEBUG(s, c)
  27. #endif
  28. #include "zend_language_scanner_defs.h"
  29. #include <errno.h>
  30. #include "zend.h"
  31. #ifdef ZEND_WIN32
  32. # include <Winuser.h>
  33. #endif
  34. #include "zend_alloc.h"
  35. #include <zend_language_parser.h>
  36. #include "zend_compile.h"
  37. #include "zend_language_scanner.h"
  38. #include "zend_highlight.h"
  39. #include "zend_constants.h"
  40. #include "zend_variables.h"
  41. #include "zend_operators.h"
  42. #include "zend_API.h"
  43. #include "zend_strtod.h"
  44. #include "zend_exceptions.h"
  45. #include "zend_virtual_cwd.h"
  46. #define YYCTYPE unsigned char
  47. #define YYFILL(n) { if ((YYCURSOR + n) >= (YYLIMIT + ZEND_MMAP_AHEAD)) { return 0; } }
  48. #define YYCURSOR SCNG(yy_cursor)
  49. #define YYLIMIT SCNG(yy_limit)
  50. #define YYMARKER SCNG(yy_marker)
  51. #define YYGETCONDITION() SCNG(yy_state)
  52. #define YYSETCONDITION(s) SCNG(yy_state) = s
  53. #define STATE(name) yyc##name
  54. /* emulate flex constructs */
  55. #define BEGIN(state) YYSETCONDITION(STATE(state))
  56. #define YYSTATE YYGETCONDITION()
  57. #define yytext ((char*)SCNG(yy_text))
  58. #define yyleng SCNG(yy_leng)
  59. #define yyless(x) do { YYCURSOR = (unsigned char*)yytext + x; \
  60. yyleng = (unsigned int)x; } while(0)
  61. #define yymore() goto yymore_restart
  62. /* perform sanity check. If this message is triggered you should
  63. increase the ZEND_MMAP_AHEAD value in the zend_streams.h file */
  64. /*!max:re2c */
  65. #if ZEND_MMAP_AHEAD < YYMAXFILL
  66. # error ZEND_MMAP_AHEAD should be greater than or equal to YYMAXFILL
  67. #endif
  68. #include <stdarg.h>
  69. #ifdef HAVE_UNISTD_H
  70. # include <unistd.h>
  71. #endif
  72. /* Globals Macros */
  73. #define SCNG LANG_SCNG
  74. #ifdef ZTS
  75. ZEND_API ts_rsrc_id language_scanner_globals_id;
  76. ZEND_API size_t language_scanner_globals_offset;
  77. #else
  78. ZEND_API zend_php_scanner_globals language_scanner_globals;
  79. #endif
  80. #define HANDLE_NEWLINES(s, l) \
  81. do { \
  82. char *p = (s), *boundary = p+(l); \
  83. \
  84. while (p<boundary) { \
  85. if (*p == '\n' || (*p == '\r' && (*(p+1) != '\n'))) { \
  86. CG(zend_lineno)++; \
  87. } \
  88. p++; \
  89. } \
  90. } while (0)
  91. #define HANDLE_NEWLINE(c) \
  92. { \
  93. if (c == '\n' || c == '\r') { \
  94. CG(zend_lineno)++; \
  95. } \
  96. }
  97. /* To save initial string length after scanning to first variable */
  98. #define SET_DOUBLE_QUOTES_SCANNED_LENGTH(len) SCNG(scanned_string_len) = (len)
  99. #define GET_DOUBLE_QUOTES_SCANNED_LENGTH() SCNG(scanned_string_len)
  100. #define IS_LABEL_START(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || (c) == '_' || (c) >= 0x80)
  101. #define IS_LABEL_SUCCESSOR(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || ((c) >= '0' && (c) <= '9') || (c) == '_' || (c) >= 0x80)
  102. #define ZEND_IS_OCT(c) ((c)>='0' && (c)<='7')
  103. #define ZEND_IS_HEX(c) (((c)>='0' && (c)<='9') || ((c)>='a' && (c)<='f') || ((c)>='A' && (c)<='F'))
  104. BEGIN_EXTERN_C()
  105. static void strip_underscores(char *str, size_t *len)
  106. {
  107. char *src = str, *dest = str;
  108. while (*src != '\0') {
  109. if (*src != '_') {
  110. *dest = *src;
  111. dest++;
  112. } else {
  113. --(*len);
  114. }
  115. src++;
  116. }
  117. *dest = '\0';
  118. }
  119. static size_t encoding_filter_script_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
  120. {
  121. const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
  122. ZEND_ASSERT(internal_encoding);
  123. return zend_multibyte_encoding_converter(to, to_length, from, from_length, internal_encoding, LANG_SCNG(script_encoding));
  124. }
  125. static size_t encoding_filter_script_to_intermediate(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
  126. {
  127. return zend_multibyte_encoding_converter(to, to_length, from, from_length, zend_multibyte_encoding_utf8, LANG_SCNG(script_encoding));
  128. }
  129. static size_t encoding_filter_intermediate_to_script(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
  130. {
  131. return zend_multibyte_encoding_converter(to, to_length, from, from_length,
  132. LANG_SCNG(script_encoding), zend_multibyte_encoding_utf8);
  133. }
  134. static size_t encoding_filter_intermediate_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
  135. {
  136. const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
  137. ZEND_ASSERT(internal_encoding);
  138. return zend_multibyte_encoding_converter(to, to_length, from, from_length,
  139. internal_encoding, zend_multibyte_encoding_utf8);
  140. }
  141. static void _yy_push_state(int new_state)
  142. {
  143. zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION());
  144. YYSETCONDITION(new_state);
  145. }
  146. #define yy_push_state(state_and_tsrm) _yy_push_state(yyc##state_and_tsrm)
  147. static void yy_pop_state(void)
  148. {
  149. int *stack_state = zend_stack_top(&SCNG(state_stack));
  150. YYSETCONDITION(*stack_state);
  151. zend_stack_del_top(&SCNG(state_stack));
  152. }
  153. static void yy_scan_buffer(char *str, unsigned int len)
  154. {
  155. YYCURSOR = (YYCTYPE*)str;
  156. YYLIMIT = YYCURSOR + len;
  157. if (!SCNG(yy_start)) {
  158. SCNG(yy_start) = YYCURSOR;
  159. }
  160. }
  161. void startup_scanner(void)
  162. {
  163. CG(parse_error) = 0;
  164. CG(doc_comment) = NULL;
  165. CG(extra_fn_flags) = 0;
  166. zend_stack_init(&SCNG(state_stack), sizeof(int));
  167. zend_stack_init(&SCNG(nest_location_stack), sizeof(zend_nest_location));
  168. zend_ptr_stack_init(&SCNG(heredoc_label_stack));
  169. SCNG(heredoc_scan_ahead) = 0;
  170. }
  171. static void heredoc_label_dtor(zend_heredoc_label *heredoc_label) {
  172. efree(heredoc_label->label);
  173. }
  174. void shutdown_scanner(void)
  175. {
  176. CG(parse_error) = 0;
  177. RESET_DOC_COMMENT();
  178. zend_stack_destroy(&SCNG(state_stack));
  179. zend_stack_destroy(&SCNG(nest_location_stack));
  180. zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
  181. zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
  182. SCNG(heredoc_scan_ahead) = 0;
  183. SCNG(on_event) = NULL;
  184. }
  185. ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state)
  186. {
  187. lex_state->yy_leng = SCNG(yy_leng);
  188. lex_state->yy_start = SCNG(yy_start);
  189. lex_state->yy_text = SCNG(yy_text);
  190. lex_state->yy_cursor = SCNG(yy_cursor);
  191. lex_state->yy_marker = SCNG(yy_marker);
  192. lex_state->yy_limit = SCNG(yy_limit);
  193. lex_state->state_stack = SCNG(state_stack);
  194. zend_stack_init(&SCNG(state_stack), sizeof(int));
  195. lex_state->nest_location_stack = SCNG(nest_location_stack);
  196. zend_stack_init(&SCNG(nest_location_stack), sizeof(zend_nest_location));
  197. lex_state->heredoc_label_stack = SCNG(heredoc_label_stack);
  198. zend_ptr_stack_init(&SCNG(heredoc_label_stack));
  199. lex_state->in = SCNG(yy_in);
  200. lex_state->yy_state = YYSTATE;
  201. lex_state->filename = zend_get_compiled_filename();
  202. lex_state->lineno = CG(zend_lineno);
  203. lex_state->script_org = SCNG(script_org);
  204. lex_state->script_org_size = SCNG(script_org_size);
  205. lex_state->script_filtered = SCNG(script_filtered);
  206. lex_state->script_filtered_size = SCNG(script_filtered_size);
  207. lex_state->input_filter = SCNG(input_filter);
  208. lex_state->output_filter = SCNG(output_filter);
  209. lex_state->script_encoding = SCNG(script_encoding);
  210. lex_state->on_event = SCNG(on_event);
  211. lex_state->on_event_context = SCNG(on_event_context);
  212. lex_state->ast = CG(ast);
  213. lex_state->ast_arena = CG(ast_arena);
  214. }
  215. ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state)
  216. {
  217. SCNG(yy_leng) = lex_state->yy_leng;
  218. SCNG(yy_start) = lex_state->yy_start;
  219. SCNG(yy_text) = lex_state->yy_text;
  220. SCNG(yy_cursor) = lex_state->yy_cursor;
  221. SCNG(yy_marker) = lex_state->yy_marker;
  222. SCNG(yy_limit) = lex_state->yy_limit;
  223. zend_stack_destroy(&SCNG(state_stack));
  224. SCNG(state_stack) = lex_state->state_stack;
  225. zend_stack_destroy(&SCNG(nest_location_stack));
  226. SCNG(nest_location_stack) = lex_state->nest_location_stack;
  227. zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
  228. zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
  229. SCNG(heredoc_label_stack) = lex_state->heredoc_label_stack;
  230. SCNG(yy_in) = lex_state->in;
  231. YYSETCONDITION(lex_state->yy_state);
  232. CG(zend_lineno) = lex_state->lineno;
  233. zend_restore_compiled_filename(lex_state->filename);
  234. if (SCNG(script_filtered)) {
  235. efree(SCNG(script_filtered));
  236. SCNG(script_filtered) = NULL;
  237. }
  238. SCNG(script_org) = lex_state->script_org;
  239. SCNG(script_org_size) = lex_state->script_org_size;
  240. SCNG(script_filtered) = lex_state->script_filtered;
  241. SCNG(script_filtered_size) = lex_state->script_filtered_size;
  242. SCNG(input_filter) = lex_state->input_filter;
  243. SCNG(output_filter) = lex_state->output_filter;
  244. SCNG(script_encoding) = lex_state->script_encoding;
  245. SCNG(on_event) = lex_state->on_event;
  246. SCNG(on_event_context) = lex_state->on_event_context;
  247. CG(ast) = lex_state->ast;
  248. CG(ast_arena) = lex_state->ast_arena;
  249. RESET_DOC_COMMENT();
  250. }
  251. ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle)
  252. {
  253. zend_llist_del_element(&CG(open_files), file_handle, (int (*)(void *, void *)) zend_compare_file_handles);
  254. /* zend_file_handle_dtor() operates on the copy, so we have to NULLify the original here */
  255. file_handle->opened_path = NULL;
  256. if (file_handle->free_filename) {
  257. file_handle->filename = NULL;
  258. }
  259. }
  260. ZEND_API void zend_lex_tstring(zval *zv)
  261. {
  262. if (SCNG(on_event)) {
  263. SCNG(on_event)(ON_FEEDBACK, T_STRING, 0, SCNG(on_event_context));
  264. }
  265. ZVAL_STRINGL(zv, (char*)SCNG(yy_text), SCNG(yy_leng));
  266. }
  267. #define BOM_UTF32_BE "\x00\x00\xfe\xff"
  268. #define BOM_UTF32_LE "\xff\xfe\x00\x00"
  269. #define BOM_UTF16_BE "\xfe\xff"
  270. #define BOM_UTF16_LE "\xff\xfe"
  271. #define BOM_UTF8 "\xef\xbb\xbf"
  272. static const zend_encoding *zend_multibyte_detect_utf_encoding(const unsigned char *script, size_t script_size)
  273. {
  274. const unsigned char *p;
  275. int wchar_size = 2;
  276. int le = 0;
  277. /* utf-16 or utf-32? */
  278. p = script;
  279. assert(p >= script);
  280. while ((size_t)(p-script) < script_size) {
  281. p = memchr(p, 0, script_size-(p-script)-2);
  282. if (!p) {
  283. break;
  284. }
  285. if (*(p+1) == '\0' && *(p+2) == '\0') {
  286. wchar_size = 4;
  287. break;
  288. }
  289. /* searching for UTF-32 specific byte orders, so this will do */
  290. p += 4;
  291. }
  292. /* BE or LE? */
  293. p = script;
  294. assert(p >= script);
  295. while ((size_t)(p-script) < script_size) {
  296. if (*p == '\0' && *(p+wchar_size-1) != '\0') {
  297. /* BE */
  298. le = 0;
  299. break;
  300. } else if (*p != '\0' && *(p+wchar_size-1) == '\0') {
  301. /* LE* */
  302. le = 1;
  303. break;
  304. }
  305. p += wchar_size;
  306. }
  307. if (wchar_size == 2) {
  308. return le ? zend_multibyte_encoding_utf16le : zend_multibyte_encoding_utf16be;
  309. } else {
  310. return le ? zend_multibyte_encoding_utf32le : zend_multibyte_encoding_utf32be;
  311. }
  312. return NULL;
  313. }
  314. static const zend_encoding* zend_multibyte_detect_unicode(void)
  315. {
  316. const zend_encoding *script_encoding = NULL;
  317. int bom_size;
  318. unsigned char *pos1, *pos2;
  319. if (LANG_SCNG(script_org_size) < sizeof(BOM_UTF32_LE)-1) {
  320. return NULL;
  321. }
  322. /* check out BOM */
  323. if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_BE, sizeof(BOM_UTF32_BE)-1)) {
  324. script_encoding = zend_multibyte_encoding_utf32be;
  325. bom_size = sizeof(BOM_UTF32_BE)-1;
  326. } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_LE, sizeof(BOM_UTF32_LE)-1)) {
  327. script_encoding = zend_multibyte_encoding_utf32le;
  328. bom_size = sizeof(BOM_UTF32_LE)-1;
  329. } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_BE, sizeof(BOM_UTF16_BE)-1)) {
  330. script_encoding = zend_multibyte_encoding_utf16be;
  331. bom_size = sizeof(BOM_UTF16_BE)-1;
  332. } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_LE, sizeof(BOM_UTF16_LE)-1)) {
  333. script_encoding = zend_multibyte_encoding_utf16le;
  334. bom_size = sizeof(BOM_UTF16_LE)-1;
  335. } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF8, sizeof(BOM_UTF8)-1)) {
  336. script_encoding = zend_multibyte_encoding_utf8;
  337. bom_size = sizeof(BOM_UTF8)-1;
  338. }
  339. if (script_encoding) {
  340. /* remove BOM */
  341. LANG_SCNG(script_org) += bom_size;
  342. LANG_SCNG(script_org_size) -= bom_size;
  343. return script_encoding;
  344. }
  345. /* script contains NULL bytes -> auto-detection */
  346. if ((pos1 = memchr(LANG_SCNG(script_org), 0, LANG_SCNG(script_org_size)))) {
  347. /* check if the NULL byte is after the __HALT_COMPILER(); */
  348. pos2 = LANG_SCNG(script_org);
  349. while ((size_t)(pos1 - pos2) >= sizeof("__HALT_COMPILER();")-1) {
  350. pos2 = memchr(pos2, '_', pos1 - pos2);
  351. if (!pos2) break;
  352. pos2++;
  353. if (strncasecmp((char*)pos2, "_HALT_COMPILER", sizeof("_HALT_COMPILER")-1) == 0) {
  354. pos2 += sizeof("_HALT_COMPILER")-1;
  355. while (*pos2 == ' ' ||
  356. *pos2 == '\t' ||
  357. *pos2 == '\r' ||
  358. *pos2 == '\n') {
  359. pos2++;
  360. }
  361. if (*pos2 == '(') {
  362. pos2++;
  363. while (*pos2 == ' ' ||
  364. *pos2 == '\t' ||
  365. *pos2 == '\r' ||
  366. *pos2 == '\n') {
  367. pos2++;
  368. }
  369. if (*pos2 == ')') {
  370. pos2++;
  371. while (*pos2 == ' ' ||
  372. *pos2 == '\t' ||
  373. *pos2 == '\r' ||
  374. *pos2 == '\n') {
  375. pos2++;
  376. }
  377. if (*pos2 == ';') {
  378. return NULL;
  379. }
  380. }
  381. }
  382. }
  383. }
  384. /* make best effort if BOM is missing */
  385. return zend_multibyte_detect_utf_encoding(LANG_SCNG(script_org), LANG_SCNG(script_org_size));
  386. }
  387. return NULL;
  388. }
  389. static const zend_encoding* zend_multibyte_find_script_encoding(void)
  390. {
  391. const zend_encoding *script_encoding;
  392. if (CG(detect_unicode)) {
  393. /* check out bom(byte order mark) and see if containing wchars */
  394. script_encoding = zend_multibyte_detect_unicode();
  395. if (script_encoding != NULL) {
  396. /* bom or wchar detection is prior to 'script_encoding' option */
  397. return script_encoding;
  398. }
  399. }
  400. /* if no script_encoding specified, just leave alone */
  401. if (!CG(script_encoding_list) || !CG(script_encoding_list_size)) {
  402. return NULL;
  403. }
  404. /* if multiple encodings specified, detect automagically */
  405. if (CG(script_encoding_list_size) > 1) {
  406. return zend_multibyte_encoding_detector(LANG_SCNG(script_org), LANG_SCNG(script_org_size), CG(script_encoding_list), CG(script_encoding_list_size));
  407. }
  408. return CG(script_encoding_list)[0];
  409. }
  410. ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding)
  411. {
  412. const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
  413. const zend_encoding *script_encoding = onetime_encoding ? onetime_encoding: zend_multibyte_find_script_encoding();
  414. if (!script_encoding) {
  415. return FAILURE;
  416. }
  417. /* judge input/output filter */
  418. LANG_SCNG(script_encoding) = script_encoding;
  419. LANG_SCNG(input_filter) = NULL;
  420. LANG_SCNG(output_filter) = NULL;
  421. if (!internal_encoding || LANG_SCNG(script_encoding) == internal_encoding) {
  422. if (!zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
  423. /* and if not, work around w/ script_encoding -> utf-8 -> script_encoding conversion */
  424. LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
  425. LANG_SCNG(output_filter) = encoding_filter_intermediate_to_script;
  426. } else {
  427. LANG_SCNG(input_filter) = NULL;
  428. LANG_SCNG(output_filter) = NULL;
  429. }
  430. return SUCCESS;
  431. }
  432. if (zend_multibyte_check_lexer_compatibility(internal_encoding)) {
  433. LANG_SCNG(input_filter) = encoding_filter_script_to_internal;
  434. LANG_SCNG(output_filter) = NULL;
  435. } else if (zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
  436. LANG_SCNG(input_filter) = NULL;
  437. LANG_SCNG(output_filter) = encoding_filter_script_to_internal;
  438. } else {
  439. /* both script and internal encodings are incompatible w/ flex */
  440. LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
  441. LANG_SCNG(output_filter) = encoding_filter_intermediate_to_internal;
  442. }
  443. return 0;
  444. }
  445. ZEND_API int open_file_for_scanning(zend_file_handle *file_handle)
  446. {
  447. char *buf;
  448. size_t size;
  449. zend_string *compiled_filename;
  450. if (zend_stream_fixup(file_handle, &buf, &size) == FAILURE) {
  451. /* Still add it to open_files to make destroy_file_handle work */
  452. zend_llist_add_element(&CG(open_files), file_handle);
  453. return FAILURE;
  454. }
  455. ZEND_ASSERT(!EG(exception) && "stream_fixup() should have failed");
  456. zend_llist_add_element(&CG(open_files), file_handle);
  457. if (file_handle->handle.stream.handle >= (void*)file_handle && file_handle->handle.stream.handle <= (void*)(file_handle+1)) {
  458. zend_file_handle *fh = (zend_file_handle*)zend_llist_get_last(&CG(open_files));
  459. size_t diff = (char*)file_handle->handle.stream.handle - (char*)file_handle;
  460. fh->handle.stream.handle = (void*)(((char*)fh) + diff);
  461. file_handle->handle.stream.handle = fh->handle.stream.handle;
  462. }
  463. /* Reset the scanner for scanning the new file */
  464. SCNG(yy_in) = file_handle;
  465. SCNG(yy_start) = NULL;
  466. if (size != (size_t)-1) {
  467. if (CG(multibyte)) {
  468. SCNG(script_org) = (unsigned char*)buf;
  469. SCNG(script_org_size) = size;
  470. SCNG(script_filtered) = NULL;
  471. zend_multibyte_set_filter(NULL);
  472. if (SCNG(input_filter)) {
  473. if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size))) {
  474. zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
  475. "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
  476. }
  477. buf = (char*)SCNG(script_filtered);
  478. size = SCNG(script_filtered_size);
  479. }
  480. }
  481. SCNG(yy_start) = (unsigned char *)buf;
  482. yy_scan_buffer(buf, (unsigned int)size);
  483. } else {
  484. zend_error_noreturn(E_COMPILE_ERROR, "zend_stream_mmap() failed");
  485. }
  486. if (CG(skip_shebang)) {
  487. CG(skip_shebang) = 0;
  488. BEGIN(SHEBANG);
  489. } else {
  490. BEGIN(INITIAL);
  491. }
  492. if (file_handle->opened_path) {
  493. compiled_filename = zend_string_copy(file_handle->opened_path);
  494. } else {
  495. compiled_filename = zend_string_init(file_handle->filename, strlen(file_handle->filename), 0);
  496. }
  497. zend_set_compiled_filename(compiled_filename);
  498. zend_string_release_ex(compiled_filename, 0);
  499. RESET_DOC_COMMENT();
  500. CG(zend_lineno) = 1;
  501. CG(increment_lineno) = 0;
  502. return SUCCESS;
  503. }
  504. END_EXTERN_C()
  505. static zend_op_array *zend_compile(int type)
  506. {
  507. zend_op_array *op_array = NULL;
  508. zend_bool original_in_compilation = CG(in_compilation);
  509. CG(in_compilation) = 1;
  510. CG(ast) = NULL;
  511. CG(ast_arena) = zend_arena_create(1024 * 32);
  512. if (!zendparse()) {
  513. int last_lineno = CG(zend_lineno);
  514. zend_file_context original_file_context;
  515. zend_oparray_context original_oparray_context;
  516. zend_op_array *original_active_op_array = CG(active_op_array);
  517. op_array = emalloc(sizeof(zend_op_array));
  518. init_op_array(op_array, type, INITIAL_OP_ARRAY_SIZE);
  519. CG(active_op_array) = op_array;
  520. /* Use heap to not waste arena memory */
  521. op_array->fn_flags |= ZEND_ACC_HEAP_RT_CACHE;
  522. if (zend_ast_process) {
  523. zend_ast_process(CG(ast));
  524. }
  525. zend_file_context_begin(&original_file_context);
  526. zend_oparray_context_begin(&original_oparray_context);
  527. zend_compile_top_stmt(CG(ast));
  528. CG(zend_lineno) = last_lineno;
  529. zend_emit_final_return(type == ZEND_USER_FUNCTION);
  530. op_array->line_start = 1;
  531. op_array->line_end = last_lineno;
  532. pass_two(op_array);
  533. zend_oparray_context_end(&original_oparray_context);
  534. zend_file_context_end(&original_file_context);
  535. CG(active_op_array) = original_active_op_array;
  536. }
  537. zend_ast_destroy(CG(ast));
  538. zend_arena_destroy(CG(ast_arena));
  539. CG(in_compilation) = original_in_compilation;
  540. return op_array;
  541. }
  542. ZEND_API zend_op_array *compile_file(zend_file_handle *file_handle, int type)
  543. {
  544. zend_lex_state original_lex_state;
  545. zend_op_array *op_array = NULL;
  546. zend_save_lexical_state(&original_lex_state);
  547. if (open_file_for_scanning(file_handle)==FAILURE) {
  548. if (!EG(exception)) {
  549. if (type==ZEND_REQUIRE) {
  550. zend_message_dispatcher(ZMSG_FAILED_REQUIRE_FOPEN, file_handle->filename);
  551. zend_bailout();
  552. } else {
  553. zend_message_dispatcher(ZMSG_FAILED_INCLUDE_FOPEN, file_handle->filename);
  554. }
  555. }
  556. } else {
  557. op_array = zend_compile(ZEND_USER_FUNCTION);
  558. }
  559. zend_restore_lexical_state(&original_lex_state);
  560. return op_array;
  561. }
  562. ZEND_API zend_ast *zend_compile_string_to_ast(
  563. zend_string *code, zend_arena **ast_arena, const char *filename) {
  564. zval code_zv;
  565. zend_bool original_in_compilation;
  566. zend_lex_state original_lex_state;
  567. zend_ast *ast;
  568. ZVAL_STR_COPY(&code_zv, code);
  569. original_in_compilation = CG(in_compilation);
  570. CG(in_compilation) = 1;
  571. zend_save_lexical_state(&original_lex_state);
  572. if (zend_prepare_string_for_scanning(&code_zv, filename) == SUCCESS) {
  573. CG(ast) = NULL;
  574. CG(ast_arena) = zend_arena_create(1024 * 32);
  575. LANG_SCNG(yy_state) = yycINITIAL;
  576. if (zendparse() != 0) {
  577. zend_ast_destroy(CG(ast));
  578. zend_arena_destroy(CG(ast_arena));
  579. CG(ast) = NULL;
  580. }
  581. }
  582. /* restore_lexical_state changes CG(ast) and CG(ast_arena) */
  583. ast = CG(ast);
  584. *ast_arena = CG(ast_arena);
  585. zend_restore_lexical_state(&original_lex_state);
  586. CG(in_compilation) = original_in_compilation;
  587. zval_dtor(&code_zv);
  588. return ast;
  589. }
  590. zend_op_array *compile_filename(int type, zval *filename)
  591. {
  592. zend_file_handle file_handle;
  593. zval tmp;
  594. zend_op_array *retval;
  595. zend_string *opened_path = NULL;
  596. if (Z_TYPE_P(filename) != IS_STRING) {
  597. ZVAL_STR(&tmp, zval_get_string(filename));
  598. filename = &tmp;
  599. }
  600. zend_stream_init_filename(&file_handle, Z_STRVAL_P(filename));
  601. retval = zend_compile_file(&file_handle, type);
  602. if (retval && file_handle.handle.stream.handle) {
  603. if (!file_handle.opened_path) {
  604. file_handle.opened_path = opened_path = zend_string_copy(Z_STR_P(filename));
  605. }
  606. zend_hash_add_empty_element(&EG(included_files), file_handle.opened_path);
  607. if (opened_path) {
  608. zend_string_release_ex(opened_path, 0);
  609. }
  610. }
  611. zend_destroy_file_handle(&file_handle);
  612. if (UNEXPECTED(filename == &tmp)) {
  613. zval_ptr_dtor(&tmp);
  614. }
  615. return retval;
  616. }
  617. ZEND_API int zend_prepare_string_for_scanning(zval *str, const char *filename)
  618. {
  619. char *buf;
  620. size_t size, old_len;
  621. zend_string *new_compiled_filename;
  622. /* enforce ZEND_MMAP_AHEAD trailing NULLs for flex... */
  623. old_len = Z_STRLEN_P(str);
  624. Z_STR_P(str) = zend_string_extend(Z_STR_P(str), old_len + ZEND_MMAP_AHEAD, 0);
  625. Z_TYPE_INFO_P(str) = IS_STRING_EX;
  626. memset(Z_STRVAL_P(str) + old_len, 0, ZEND_MMAP_AHEAD + 1);
  627. SCNG(yy_in) = NULL;
  628. SCNG(yy_start) = NULL;
  629. buf = Z_STRVAL_P(str);
  630. size = old_len;
  631. if (CG(multibyte)) {
  632. SCNG(script_org) = (unsigned char*)buf;
  633. SCNG(script_org_size) = size;
  634. SCNG(script_filtered) = NULL;
  635. zend_multibyte_set_filter(zend_multibyte_get_internal_encoding());
  636. if (SCNG(input_filter)) {
  637. if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size))) {
  638. zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
  639. "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
  640. }
  641. buf = (char*)SCNG(script_filtered);
  642. size = SCNG(script_filtered_size);
  643. }
  644. }
  645. yy_scan_buffer(buf, (unsigned int)size);
  646. new_compiled_filename = zend_string_init(filename, strlen(filename), 0);
  647. zend_set_compiled_filename(new_compiled_filename);
  648. zend_string_release_ex(new_compiled_filename, 0);
  649. CG(zend_lineno) = 1;
  650. CG(increment_lineno) = 0;
  651. RESET_DOC_COMMENT();
  652. return SUCCESS;
  653. }
  654. ZEND_API size_t zend_get_scanned_file_offset(void)
  655. {
  656. size_t offset = SCNG(yy_cursor) - SCNG(yy_start);
  657. if (SCNG(input_filter)) {
  658. size_t original_offset = offset, length = 0;
  659. do {
  660. unsigned char *p = NULL;
  661. if ((size_t)-1 == SCNG(input_filter)(&p, &length, SCNG(script_org), offset)) {
  662. return (size_t)-1;
  663. }
  664. efree(p);
  665. if (length > original_offset) {
  666. offset--;
  667. } else if (length < original_offset) {
  668. offset++;
  669. }
  670. } while (original_offset != length);
  671. }
  672. return offset;
  673. }
  674. zend_op_array *compile_string(zval *source_string, const char *filename)
  675. {
  676. zend_lex_state original_lex_state;
  677. zend_op_array *op_array = NULL;
  678. zval tmp;
  679. if (UNEXPECTED(Z_TYPE_P(source_string) != IS_STRING)) {
  680. ZVAL_STR(&tmp, zval_get_string_func(source_string));
  681. } else {
  682. ZVAL_COPY(&tmp, source_string);
  683. }
  684. if (Z_STRLEN(tmp)==0) {
  685. zval_ptr_dtor(&tmp);
  686. return NULL;
  687. }
  688. zend_save_lexical_state(&original_lex_state);
  689. if (zend_prepare_string_for_scanning(&tmp, filename) == SUCCESS) {
  690. BEGIN(ST_IN_SCRIPTING);
  691. op_array = zend_compile(ZEND_EVAL_CODE);
  692. }
  693. zend_restore_lexical_state(&original_lex_state);
  694. zval_ptr_dtor(&tmp);
  695. return op_array;
  696. }
  697. BEGIN_EXTERN_C()
  698. int highlight_file(const char *filename, zend_syntax_highlighter_ini *syntax_highlighter_ini)
  699. {
  700. zend_lex_state original_lex_state;
  701. zend_file_handle file_handle;
  702. zend_stream_init_filename(&file_handle, filename);
  703. zend_save_lexical_state(&original_lex_state);
  704. if (open_file_for_scanning(&file_handle)==FAILURE) {
  705. zend_message_dispatcher(ZMSG_FAILED_HIGHLIGHT_FOPEN, filename);
  706. zend_restore_lexical_state(&original_lex_state);
  707. return FAILURE;
  708. }
  709. zend_highlight(syntax_highlighter_ini);
  710. if (SCNG(script_filtered)) {
  711. efree(SCNG(script_filtered));
  712. SCNG(script_filtered) = NULL;
  713. }
  714. zend_destroy_file_handle(&file_handle);
  715. zend_restore_lexical_state(&original_lex_state);
  716. return SUCCESS;
  717. }
  718. int highlight_string(zval *str, zend_syntax_highlighter_ini *syntax_highlighter_ini, const char *str_name)
  719. {
  720. zend_lex_state original_lex_state;
  721. zval tmp;
  722. if (UNEXPECTED(Z_TYPE_P(str) != IS_STRING)) {
  723. ZVAL_STR(&tmp, zval_get_string_func(str));
  724. str = &tmp;
  725. }
  726. zend_save_lexical_state(&original_lex_state);
  727. if (zend_prepare_string_for_scanning(str, str_name)==FAILURE) {
  728. zend_restore_lexical_state(&original_lex_state);
  729. if (UNEXPECTED(str == &tmp)) {
  730. zval_ptr_dtor(&tmp);
  731. }
  732. return FAILURE;
  733. }
  734. BEGIN(INITIAL);
  735. zend_highlight(syntax_highlighter_ini);
  736. if (SCNG(script_filtered)) {
  737. efree(SCNG(script_filtered));
  738. SCNG(script_filtered) = NULL;
  739. }
  740. zend_restore_lexical_state(&original_lex_state);
  741. if (UNEXPECTED(str == &tmp)) {
  742. zval_ptr_dtor(&tmp);
  743. }
  744. return SUCCESS;
  745. }
  746. ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, const zend_encoding *old_encoding)
  747. {
  748. size_t length;
  749. unsigned char *new_yy_start;
  750. /* convert and set */
  751. if (!SCNG(input_filter)) {
  752. if (SCNG(script_filtered)) {
  753. efree(SCNG(script_filtered));
  754. SCNG(script_filtered) = NULL;
  755. }
  756. SCNG(script_filtered_size) = 0;
  757. length = SCNG(script_org_size);
  758. new_yy_start = SCNG(script_org);
  759. } else {
  760. if ((size_t)-1 == SCNG(input_filter)(&new_yy_start, &length, SCNG(script_org), SCNG(script_org_size))) {
  761. zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
  762. "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
  763. }
  764. if (SCNG(script_filtered)) {
  765. efree(SCNG(script_filtered));
  766. }
  767. SCNG(script_filtered) = new_yy_start;
  768. SCNG(script_filtered_size) = length;
  769. }
  770. SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start));
  771. SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start));
  772. SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start));
  773. SCNG(yy_limit) = new_yy_start + length;
  774. SCNG(yy_start) = new_yy_start;
  775. }
  776. // TODO: avoid reallocation ???
  777. # define zend_copy_value(zendlval, yytext, yyleng) \
  778. if (SCNG(output_filter)) { \
  779. size_t sz = 0; \
  780. char *s = NULL; \
  781. SCNG(output_filter)((unsigned char **)&s, &sz, (unsigned char *)yytext, (size_t)yyleng); \
  782. ZVAL_STRINGL(zendlval, s, sz); \
  783. efree(s); \
  784. } else if (yyleng == 1) { \
  785. ZVAL_INTERNED_STR(zendlval, ZSTR_CHAR((zend_uchar)*(yytext))); \
  786. } else { \
  787. ZVAL_STRINGL(zendlval, yytext, yyleng); \
  788. }
  789. static int zend_scan_escape_string(zval *zendlval, char *str, int len, char quote_type)
  790. {
  791. register char *s, *t;
  792. char *end;
  793. if (len <= 1) {
  794. if (len < 1) {
  795. ZVAL_EMPTY_STRING(zendlval);
  796. } else {
  797. zend_uchar c = (zend_uchar)*str;
  798. if (c == '\n' || c == '\r') {
  799. CG(zend_lineno)++;
  800. }
  801. ZVAL_INTERNED_STR(zendlval, ZSTR_CHAR(c));
  802. }
  803. goto skip_escape_conversion;
  804. }
  805. ZVAL_STRINGL(zendlval, str, len);
  806. /* convert escape sequences */
  807. s = Z_STRVAL_P(zendlval);
  808. end = s+Z_STRLEN_P(zendlval);
  809. while (1) {
  810. if (UNEXPECTED(*s=='\\')) {
  811. break;
  812. }
  813. if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
  814. CG(zend_lineno)++;
  815. }
  816. s++;
  817. if (s == end) {
  818. goto skip_escape_conversion;
  819. }
  820. }
  821. t = s;
  822. while (s<end) {
  823. if (*s=='\\') {
  824. s++;
  825. if (s >= end) {
  826. *t++ = '\\';
  827. break;
  828. }
  829. switch(*s) {
  830. case 'n':
  831. *t++ = '\n';
  832. break;
  833. case 'r':
  834. *t++ = '\r';
  835. break;
  836. case 't':
  837. *t++ = '\t';
  838. break;
  839. case 'f':
  840. *t++ = '\f';
  841. break;
  842. case 'v':
  843. *t++ = '\v';
  844. break;
  845. case 'e':
  846. #ifdef ZEND_WIN32
  847. *t++ = VK_ESCAPE;
  848. #else
  849. *t++ = '\e';
  850. #endif
  851. break;
  852. case '"':
  853. case '`':
  854. if (*s != quote_type) {
  855. *t++ = '\\';
  856. *t++ = *s;
  857. break;
  858. }
  859. case '\\':
  860. case '$':
  861. *t++ = *s;
  862. break;
  863. case 'x':
  864. case 'X':
  865. if (ZEND_IS_HEX(*(s+1))) {
  866. char hex_buf[3] = { 0, 0, 0 };
  867. hex_buf[0] = *(++s);
  868. if (ZEND_IS_HEX(*(s+1))) {
  869. hex_buf[1] = *(++s);
  870. }
  871. *t++ = (char) ZEND_STRTOL(hex_buf, NULL, 16);
  872. } else {
  873. *t++ = '\\';
  874. *t++ = *s;
  875. }
  876. break;
  877. /* UTF-8 codepoint escape, format: /\\u\{\x+\}/ */
  878. case 'u':
  879. {
  880. /* cache where we started so we can parse after validating */
  881. char *start = s + 1;
  882. size_t len = 0;
  883. zend_bool valid = 1;
  884. unsigned long codepoint;
  885. if (*start != '{') {
  886. /* we silently let this pass to avoid breaking code
  887. * with JSON in string literals (e.g. "\"\u202e\""
  888. */
  889. *t++ = '\\';
  890. *t++ = 'u';
  891. break;
  892. } else {
  893. /* on the other hand, invalid \u{blah} errors */
  894. s++;
  895. len++;
  896. s++;
  897. while (*s != '}') {
  898. if (!ZEND_IS_HEX(*s)) {
  899. valid = 0;
  900. break;
  901. } else {
  902. len++;
  903. }
  904. s++;
  905. }
  906. if (*s == '}') {
  907. valid = 1;
  908. len++;
  909. }
  910. }
  911. /* \u{} is invalid */
  912. if (len <= 2) {
  913. valid = 0;
  914. }
  915. if (!valid) {
  916. zend_throw_exception(zend_ce_parse_error,
  917. "Invalid UTF-8 codepoint escape sequence", 0);
  918. zval_ptr_dtor(zendlval);
  919. ZVAL_UNDEF(zendlval);
  920. return FAILURE;
  921. }
  922. errno = 0;
  923. codepoint = strtoul(start + 1, NULL, 16);
  924. /* per RFC 3629, UTF-8 can only represent 21 bits */
  925. if (codepoint > 0x10FFFF || errno) {
  926. zend_throw_exception(zend_ce_parse_error,
  927. "Invalid UTF-8 codepoint escape sequence: Codepoint too large", 0);
  928. zval_ptr_dtor(zendlval);
  929. ZVAL_UNDEF(zendlval);
  930. return FAILURE;
  931. }
  932. /* based on https://en.wikipedia.org/wiki/UTF-8#Sample_code */
  933. if (codepoint < 0x80) {
  934. *t++ = codepoint;
  935. } else if (codepoint <= 0x7FF) {
  936. *t++ = (codepoint >> 6) + 0xC0;
  937. *t++ = (codepoint & 0x3F) + 0x80;
  938. } else if (codepoint <= 0xFFFF) {
  939. *t++ = (codepoint >> 12) + 0xE0;
  940. *t++ = ((codepoint >> 6) & 0x3F) + 0x80;
  941. *t++ = (codepoint & 0x3F) + 0x80;
  942. } else if (codepoint <= 0x10FFFF) {
  943. *t++ = (codepoint >> 18) + 0xF0;
  944. *t++ = ((codepoint >> 12) & 0x3F) + 0x80;
  945. *t++ = ((codepoint >> 6) & 0x3F) + 0x80;
  946. *t++ = (codepoint & 0x3F) + 0x80;
  947. }
  948. }
  949. break;
  950. default:
  951. /* check for an octal */
  952. if (ZEND_IS_OCT(*s)) {
  953. char octal_buf[4] = { 0, 0, 0, 0 };
  954. octal_buf[0] = *s;
  955. if (ZEND_IS_OCT(*(s+1))) {
  956. octal_buf[1] = *(++s);
  957. if (ZEND_IS_OCT(*(s+1))) {
  958. octal_buf[2] = *(++s);
  959. }
  960. }
  961. if (octal_buf[2] &&
  962. (octal_buf[0] > '3')) {
  963. /* 3 octit values must not overflow 0xFF (\377) */
  964. zend_error(E_COMPILE_WARNING, "Octal escape sequence overflow \\%s is greater than \\377", octal_buf);
  965. }
  966. *t++ = (char) ZEND_STRTOL(octal_buf, NULL, 8);
  967. } else {
  968. *t++ = '\\';
  969. *t++ = *s;
  970. }
  971. break;
  972. }
  973. } else {
  974. *t++ = *s;
  975. }
  976. if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
  977. CG(zend_lineno)++;
  978. }
  979. s++;
  980. }
  981. *t = 0;
  982. Z_STRLEN_P(zendlval) = t - Z_STRVAL_P(zendlval);
  983. skip_escape_conversion:
  984. if (SCNG(output_filter)) {
  985. size_t sz = 0;
  986. unsigned char *str;
  987. // TODO: avoid realocation ???
  988. s = Z_STRVAL_P(zendlval);
  989. SCNG(output_filter)(&str, &sz, (unsigned char *)s, (size_t)Z_STRLEN_P(zendlval));
  990. zval_ptr_dtor(zendlval);
  991. ZVAL_STRINGL(zendlval, (char *) str, sz);
  992. efree(str);
  993. }
  994. return SUCCESS;
  995. }
  996. #define HEREDOC_USING_SPACES 1
  997. #define HEREDOC_USING_TABS 2
  998. static const char *next_newline(const char *str, const char *end, size_t *newline_len) {
  999. for (; str < end; str++) {
  1000. if (*str == '\r') {
  1001. *newline_len = str + 1 < end && *(str + 1) == '\n' ? 2 : 1;
  1002. } else if (*str == '\n') {
  1003. *newline_len = 1;
  1004. return str;
  1005. }
  1006. }
  1007. *newline_len = 0;
  1008. return NULL;
  1009. }
  1010. static zend_bool strip_multiline_string_indentation(
  1011. zval *zendlval, int indentation, zend_bool using_spaces,
  1012. zend_bool newline_at_start, zend_bool newline_at_end)
  1013. {
  1014. const char *str = Z_STRVAL_P(zendlval), *end = str + Z_STRLEN_P(zendlval);
  1015. char *copy = Z_STRVAL_P(zendlval);
  1016. int newline_count = 0;
  1017. size_t newline_len;
  1018. const char *nl;
  1019. if (!newline_at_start) {
  1020. nl = next_newline(str, end, &newline_len);
  1021. if (!nl) {
  1022. return 1;
  1023. }
  1024. str = nl + newline_len;
  1025. copy = (char *) nl + newline_len;
  1026. newline_count++;
  1027. } else {
  1028. nl = str;
  1029. }
  1030. /* <= intentional */
  1031. while (str <= end && nl) {
  1032. size_t skip;
  1033. nl = next_newline(str, end, &newline_len);
  1034. if (!nl && newline_at_end) {
  1035. nl = end;
  1036. }
  1037. /* Try to skip indentation */
  1038. for (skip = 0; skip < indentation; skip++, str++) {
  1039. if (str == nl) {
  1040. /* Don't require full indentation on whitespace-only lines */
  1041. break;
  1042. }
  1043. if (str == end || (*str != ' ' && *str != '\t')) {
  1044. CG(zend_lineno) += newline_count;
  1045. zend_throw_exception_ex(zend_ce_parse_error, 0,
  1046. "Invalid body indentation level (expecting an indentation level of at least %d)", indentation);
  1047. goto error;
  1048. }
  1049. if ((!using_spaces && *str == ' ') || (using_spaces && *str == '\t')) {
  1050. CG(zend_lineno) += newline_count;
  1051. zend_throw_exception(zend_ce_parse_error,
  1052. "Invalid indentation - tabs and spaces cannot be mixed", 0);
  1053. goto error;
  1054. }
  1055. }
  1056. if (str == end) {
  1057. break;
  1058. }
  1059. size_t len = nl ? (nl - str + newline_len) : (end - str);
  1060. memmove(copy, str, len);
  1061. str += len;
  1062. copy += len;
  1063. newline_count++;
  1064. }
  1065. *copy = '\0';
  1066. Z_STRLEN_P(zendlval) = copy - Z_STRVAL_P(zendlval);
  1067. return 1;
  1068. error:
  1069. zval_ptr_dtor_str(zendlval);
  1070. ZVAL_UNDEF(zendlval);
  1071. return 0;
  1072. }
  1073. static void copy_heredoc_label_stack(void *void_heredoc_label)
  1074. {
  1075. zend_heredoc_label *heredoc_label = void_heredoc_label;
  1076. zend_heredoc_label *new_heredoc_label = emalloc(sizeof(zend_heredoc_label));
  1077. *new_heredoc_label = *heredoc_label;
  1078. new_heredoc_label->label = estrndup(heredoc_label->label, heredoc_label->length);
  1079. zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) new_heredoc_label);
  1080. }
  1081. /* Check that { }, [ ], ( ) are nested correctly */
  1082. static void report_bad_nesting(char opening, int opening_lineno, char closing)
  1083. {
  1084. char buf[256];
  1085. size_t used = 0;
  1086. used = snprintf(buf, sizeof(buf), "Unclosed '%c'", opening);
  1087. if (opening_lineno != CG(zend_lineno)) {
  1088. used += snprintf(buf + used, sizeof(buf) - used, " on line %d", opening_lineno);
  1089. }
  1090. if (closing) { /* 'closing' will be 0 if at end of file */
  1091. used += snprintf(buf + used, sizeof(buf) - used, " does not match '%c'", closing);
  1092. }
  1093. zend_throw_exception(zend_ce_parse_error, buf, 0);
  1094. }
  1095. static void enter_nesting(char opening)
  1096. {
  1097. zend_nest_location nest_loc = {opening, CG(zend_lineno)};
  1098. zend_stack_push(&SCNG(nest_location_stack), &nest_loc);
  1099. }
  1100. static int exit_nesting(char closing)
  1101. {
  1102. if (zend_stack_is_empty(&SCNG(nest_location_stack))) {
  1103. zend_throw_exception_ex(zend_ce_parse_error, 0, "Unmatched '%c'", closing);
  1104. return -1;
  1105. }
  1106. zend_nest_location *nest_loc = zend_stack_top(&SCNG(nest_location_stack));
  1107. char opening = nest_loc->text;
  1108. if ((opening == '{' && closing != '}') ||
  1109. (opening == '[' && closing != ']') ||
  1110. (opening == '(' && closing != ')')) {
  1111. report_bad_nesting(opening, nest_loc->lineno, closing);
  1112. return -1;
  1113. }
  1114. zend_stack_del_top(&SCNG(nest_location_stack));
  1115. return 0;
  1116. }
  1117. static int check_nesting_at_end()
  1118. {
  1119. if (!zend_stack_is_empty(&SCNG(nest_location_stack))) {
  1120. zend_nest_location *nest_loc = zend_stack_top(&SCNG(nest_location_stack));
  1121. report_bad_nesting(nest_loc->text, nest_loc->lineno, 0);
  1122. return -1;
  1123. }
  1124. return 0;
  1125. }
  1126. #define PARSER_MODE() \
  1127. EXPECTED(elem != NULL)
  1128. #define RETURN_TOKEN(_token) do { \
  1129. token = _token; \
  1130. goto emit_token; \
  1131. } while (0)
  1132. #define RETURN_TOKEN_WITH_VAL(_token) do { \
  1133. token = _token; \
  1134. goto emit_token_with_val; \
  1135. } while (0)
  1136. #define RETURN_TOKEN_WITH_STR(_token, _offset) do { \
  1137. token = _token; \
  1138. offset = _offset; \
  1139. goto emit_token_with_str; \
  1140. } while (0)
  1141. #define RETURN_OR_SKIP_TOKEN(_token) do { \
  1142. token = _token; \
  1143. if (PARSER_MODE()) { \
  1144. goto skip_token; \
  1145. } \
  1146. goto emit_token; \
  1147. } while (0)
  1148. #define RETURN_EXIT_NESTING_TOKEN(_token) do { \
  1149. if (exit_nesting(_token) && PARSER_MODE()) { \
  1150. RETURN_TOKEN(T_ERROR); \
  1151. } else { \
  1152. RETURN_TOKEN(_token); \
  1153. } \
  1154. } while(0)
  1155. #define RETURN_END_TOKEN do { \
  1156. if (check_nesting_at_end() && PARSER_MODE()) { \
  1157. RETURN_TOKEN(T_ERROR); \
  1158. } else { \
  1159. RETURN_TOKEN(END); \
  1160. } \
  1161. } while (0)
  1162. int ZEND_FASTCALL lex_scan(zval *zendlval, zend_parser_stack_elem *elem)
  1163. {
  1164. int token;
  1165. int offset;
  1166. int start_line = CG(zend_lineno);
  1167. ZVAL_UNDEF(zendlval);
  1168. restart:
  1169. SCNG(yy_text) = YYCURSOR;
  1170. /*!re2c
  1171. re2c:yyfill:check = 0;
  1172. LNUM [0-9]+(_[0-9]+)*
  1173. DNUM ({LNUM}?"."{LNUM})|({LNUM}"."{LNUM}?)
  1174. EXPONENT_DNUM (({LNUM}|{DNUM})[eE][+-]?{LNUM})
  1175. HNUM "0x"[0-9a-fA-F]+(_[0-9a-fA-F]+)*
  1176. BNUM "0b"[01]+(_[01]+)*
  1177. LABEL [a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*
  1178. WHITESPACE [ \n\r\t]+
  1179. TABS_AND_SPACES [ \t]*
  1180. TOKENS [;:,.|^&+-/*=%!~$<>?@]
  1181. ANY_CHAR [^]
  1182. NEWLINE ("\r"|"\n"|"\r\n")
  1183. /* compute yyleng before each rule */
  1184. <!*> := yyleng = YYCURSOR - SCNG(yy_text);
  1185. <ST_IN_SCRIPTING>"exit" {
  1186. RETURN_TOKEN(T_EXIT);
  1187. }
  1188. <ST_IN_SCRIPTING>"die" {
  1189. RETURN_TOKEN(T_EXIT);
  1190. }
  1191. <ST_IN_SCRIPTING>"fn" {
  1192. RETURN_TOKEN(T_FN);
  1193. }
  1194. <ST_IN_SCRIPTING>"function" {
  1195. RETURN_TOKEN(T_FUNCTION);
  1196. }
  1197. <ST_IN_SCRIPTING>"const" {
  1198. RETURN_TOKEN(T_CONST);
  1199. }
  1200. <ST_IN_SCRIPTING>"return" {
  1201. RETURN_TOKEN(T_RETURN);
  1202. }
  1203. <ST_IN_SCRIPTING>"yield"{WHITESPACE}"from"[^a-zA-Z0-9_\x80-\xff] {
  1204. yyless(yyleng - 1);
  1205. HANDLE_NEWLINES(yytext, yyleng);
  1206. RETURN_TOKEN(T_YIELD_FROM);
  1207. }
  1208. <ST_IN_SCRIPTING>"yield" {
  1209. RETURN_TOKEN(T_YIELD);
  1210. }
  1211. <ST_IN_SCRIPTING>"try" {
  1212. RETURN_TOKEN(T_TRY);
  1213. }
  1214. <ST_IN_SCRIPTING>"catch" {
  1215. RETURN_TOKEN(T_CATCH);
  1216. }
  1217. <ST_IN_SCRIPTING>"finally" {
  1218. RETURN_TOKEN(T_FINALLY);
  1219. }
  1220. <ST_IN_SCRIPTING>"throw" {
  1221. RETURN_TOKEN(T_THROW);
  1222. }
  1223. <ST_IN_SCRIPTING>"if" {
  1224. RETURN_TOKEN(T_IF);
  1225. }
  1226. <ST_IN_SCRIPTING>"elseif" {
  1227. RETURN_TOKEN(T_ELSEIF);
  1228. }
  1229. <ST_IN_SCRIPTING>"endif" {
  1230. RETURN_TOKEN(T_ENDIF);
  1231. }
  1232. <ST_IN_SCRIPTING>"else" {
  1233. RETURN_TOKEN(T_ELSE);
  1234. }
  1235. <ST_IN_SCRIPTING>"while" {
  1236. RETURN_TOKEN(T_WHILE);
  1237. }
  1238. <ST_IN_SCRIPTING>"endwhile" {
  1239. RETURN_TOKEN(T_ENDWHILE);
  1240. }
  1241. <ST_IN_SCRIPTING>"do" {
  1242. RETURN_TOKEN(T_DO);
  1243. }
  1244. <ST_IN_SCRIPTING>"for" {
  1245. RETURN_TOKEN(T_FOR);
  1246. }
  1247. <ST_IN_SCRIPTING>"endfor" {
  1248. RETURN_TOKEN(T_ENDFOR);
  1249. }
  1250. <ST_IN_SCRIPTING>"foreach" {
  1251. RETURN_TOKEN(T_FOREACH);
  1252. }
  1253. <ST_IN_SCRIPTING>"endforeach" {
  1254. RETURN_TOKEN(T_ENDFOREACH);
  1255. }
  1256. <ST_IN_SCRIPTING>"declare" {
  1257. RETURN_TOKEN(T_DECLARE);
  1258. }
  1259. <ST_IN_SCRIPTING>"enddeclare" {
  1260. RETURN_TOKEN(T_ENDDECLARE);
  1261. }
  1262. <ST_IN_SCRIPTING>"instanceof" {
  1263. RETURN_TOKEN(T_INSTANCEOF);
  1264. }
  1265. <ST_IN_SCRIPTING>"as" {
  1266. RETURN_TOKEN(T_AS);
  1267. }
  1268. <ST_IN_SCRIPTING>"switch" {
  1269. RETURN_TOKEN(T_SWITCH);
  1270. }
  1271. <ST_IN_SCRIPTING>"endswitch" {
  1272. RETURN_TOKEN(T_ENDSWITCH);
  1273. }
  1274. <ST_IN_SCRIPTING>"case" {
  1275. RETURN_TOKEN(T_CASE);
  1276. }
  1277. <ST_IN_SCRIPTING>"default" {
  1278. RETURN_TOKEN(T_DEFAULT);
  1279. }
  1280. <ST_IN_SCRIPTING>"break" {
  1281. RETURN_TOKEN(T_BREAK);
  1282. }
  1283. <ST_IN_SCRIPTING>"continue" {
  1284. RETURN_TOKEN(T_CONTINUE);
  1285. }
  1286. <ST_IN_SCRIPTING>"goto" {
  1287. RETURN_TOKEN(T_GOTO);
  1288. }
  1289. <ST_IN_SCRIPTING>"echo" {
  1290. RETURN_TOKEN(T_ECHO);
  1291. }
  1292. <ST_IN_SCRIPTING>"print" {
  1293. RETURN_TOKEN(T_PRINT);
  1294. }
  1295. <ST_IN_SCRIPTING>"class" {
  1296. RETURN_TOKEN(T_CLASS);
  1297. }
  1298. <ST_IN_SCRIPTING>"interface" {
  1299. RETURN_TOKEN(T_INTERFACE);
  1300. }
  1301. <ST_IN_SCRIPTING>"trait" {
  1302. RETURN_TOKEN(T_TRAIT);
  1303. }
  1304. <ST_IN_SCRIPTING>"extends" {
  1305. RETURN_TOKEN(T_EXTENDS);
  1306. }
  1307. <ST_IN_SCRIPTING>"implements" {
  1308. RETURN_TOKEN(T_IMPLEMENTS);
  1309. }
  1310. <ST_IN_SCRIPTING>"->" {
  1311. yy_push_state(ST_LOOKING_FOR_PROPERTY);
  1312. RETURN_TOKEN(T_OBJECT_OPERATOR);
  1313. }
  1314. <ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY>{WHITESPACE}+ {
  1315. goto return_whitespace;
  1316. }
  1317. <ST_LOOKING_FOR_PROPERTY>"->" {
  1318. RETURN_TOKEN(T_OBJECT_OPERATOR);
  1319. }
  1320. <ST_LOOKING_FOR_PROPERTY>{LABEL} {
  1321. yy_pop_state();
  1322. RETURN_TOKEN_WITH_STR(T_STRING, 0);
  1323. }
  1324. <ST_LOOKING_FOR_PROPERTY>{ANY_CHAR} {
  1325. yyless(0);
  1326. yy_pop_state();
  1327. goto restart;
  1328. }
  1329. <ST_IN_SCRIPTING>"::" {
  1330. RETURN_TOKEN(T_PAAMAYIM_NEKUDOTAYIM);
  1331. }
  1332. <ST_IN_SCRIPTING>"\\" {
  1333. RETURN_TOKEN(T_NS_SEPARATOR);
  1334. }
  1335. <ST_IN_SCRIPTING>"..." {
  1336. RETURN_TOKEN(T_ELLIPSIS);
  1337. }
  1338. <ST_IN_SCRIPTING>"??" {
  1339. RETURN_TOKEN(T_COALESCE);
  1340. }
  1341. <ST_IN_SCRIPTING>"new" {
  1342. RETURN_TOKEN(T_NEW);
  1343. }
  1344. <ST_IN_SCRIPTING>"clone" {
  1345. RETURN_TOKEN(T_CLONE);
  1346. }
  1347. <ST_IN_SCRIPTING>"var" {
  1348. RETURN_TOKEN(T_VAR);
  1349. }
  1350. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" {
  1351. RETURN_TOKEN(T_INT_CAST);
  1352. }
  1353. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("double"|"float"){TABS_AND_SPACES}")" {
  1354. RETURN_TOKEN(T_DOUBLE_CAST);
  1355. }
  1356. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"real"{TABS_AND_SPACES}")" {
  1357. if (PARSER_MODE()) {
  1358. zend_throw_exception(zend_ce_parse_error, "The (real) cast has been removed, use (float) instead", 0);
  1359. RETURN_TOKEN(T_ERROR);
  1360. }
  1361. RETURN_TOKEN(T_DOUBLE_CAST);
  1362. }
  1363. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("string"|"binary"){TABS_AND_SPACES}")" {
  1364. RETURN_TOKEN(T_STRING_CAST);
  1365. }
  1366. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"array"{TABS_AND_SPACES}")" {
  1367. RETURN_TOKEN(T_ARRAY_CAST);
  1368. }
  1369. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"object"{TABS_AND_SPACES}")" {
  1370. RETURN_TOKEN(T_OBJECT_CAST);
  1371. }
  1372. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("bool"|"boolean"){TABS_AND_SPACES}")" {
  1373. RETURN_TOKEN(T_BOOL_CAST);
  1374. }
  1375. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("unset"){TABS_AND_SPACES}")" {
  1376. RETURN_TOKEN(T_UNSET_CAST);
  1377. }
  1378. <ST_IN_SCRIPTING>"eval" {
  1379. RETURN_TOKEN(T_EVAL);
  1380. }
  1381. <ST_IN_SCRIPTING>"include" {
  1382. RETURN_TOKEN(T_INCLUDE);
  1383. }
  1384. <ST_IN_SCRIPTING>"include_once" {
  1385. RETURN_TOKEN(T_INCLUDE_ONCE);
  1386. }
  1387. <ST_IN_SCRIPTING>"require" {
  1388. RETURN_TOKEN(T_REQUIRE);
  1389. }
  1390. <ST_IN_SCRIPTING>"require_once" {
  1391. RETURN_TOKEN(T_REQUIRE_ONCE);
  1392. }
  1393. <ST_IN_SCRIPTING>"namespace" {
  1394. RETURN_TOKEN(T_NAMESPACE);
  1395. }
  1396. <ST_IN_SCRIPTING>"use" {
  1397. RETURN_TOKEN(T_USE);
  1398. }
  1399. <ST_IN_SCRIPTING>"insteadof" {
  1400. RETURN_TOKEN(T_INSTEADOF);
  1401. }
  1402. <ST_IN_SCRIPTING>"global" {
  1403. RETURN_TOKEN(T_GLOBAL);
  1404. }
  1405. <ST_IN_SCRIPTING>"isset" {
  1406. RETURN_TOKEN(T_ISSET);
  1407. }
  1408. <ST_IN_SCRIPTING>"empty" {
  1409. RETURN_TOKEN(T_EMPTY);
  1410. }
  1411. <ST_IN_SCRIPTING>"__halt_compiler" {
  1412. RETURN_TOKEN(T_HALT_COMPILER);
  1413. }
  1414. <ST_IN_SCRIPTING>"static" {
  1415. RETURN_TOKEN(T_STATIC);
  1416. }
  1417. <ST_IN_SCRIPTING>"abstract" {
  1418. RETURN_TOKEN(T_ABSTRACT);
  1419. }
  1420. <ST_IN_SCRIPTING>"final" {
  1421. RETURN_TOKEN(T_FINAL);
  1422. }
  1423. <ST_IN_SCRIPTING>"private" {
  1424. RETURN_TOKEN(T_PRIVATE);
  1425. }
  1426. <ST_IN_SCRIPTING>"protected" {
  1427. RETURN_TOKEN(T_PROTECTED);
  1428. }
  1429. <ST_IN_SCRIPTING>"public" {
  1430. RETURN_TOKEN(T_PUBLIC);
  1431. }
  1432. <ST_IN_SCRIPTING>"unset" {
  1433. RETURN_TOKEN(T_UNSET);
  1434. }
  1435. <ST_IN_SCRIPTING>"=>" {
  1436. RETURN_TOKEN(T_DOUBLE_ARROW);
  1437. }
  1438. <ST_IN_SCRIPTING>"list" {
  1439. RETURN_TOKEN(T_LIST);
  1440. }
  1441. <ST_IN_SCRIPTING>"array" {
  1442. RETURN_TOKEN(T_ARRAY);
  1443. }
  1444. <ST_IN_SCRIPTING>"callable" {
  1445. RETURN_TOKEN(T_CALLABLE);
  1446. }
  1447. <ST_IN_SCRIPTING>"++" {
  1448. RETURN_TOKEN(T_INC);
  1449. }
  1450. <ST_IN_SCRIPTING>"--" {
  1451. RETURN_TOKEN(T_DEC);
  1452. }
  1453. <ST_IN_SCRIPTING>"===" {
  1454. RETURN_TOKEN(T_IS_IDENTICAL);
  1455. }
  1456. <ST_IN_SCRIPTING>"!==" {
  1457. RETURN_TOKEN(T_IS_NOT_IDENTICAL);
  1458. }
  1459. <ST_IN_SCRIPTING>"==" {
  1460. RETURN_TOKEN(T_IS_EQUAL);
  1461. }
  1462. <ST_IN_SCRIPTING>"!="|"<>" {
  1463. RETURN_TOKEN(T_IS_NOT_EQUAL);
  1464. }
  1465. <ST_IN_SCRIPTING>"<=>" {
  1466. RETURN_TOKEN(T_SPACESHIP);
  1467. }
  1468. <ST_IN_SCRIPTING>"<=" {
  1469. RETURN_TOKEN(T_IS_SMALLER_OR_EQUAL);
  1470. }
  1471. <ST_IN_SCRIPTING>">=" {
  1472. RETURN_TOKEN(T_IS_GREATER_OR_EQUAL);
  1473. }
  1474. <ST_IN_SCRIPTING>"+=" {
  1475. RETURN_TOKEN(T_PLUS_EQUAL);
  1476. }
  1477. <ST_IN_SCRIPTING>"-=" {
  1478. RETURN_TOKEN(T_MINUS_EQUAL);
  1479. }
  1480. <ST_IN_SCRIPTING>"*=" {
  1481. RETURN_TOKEN(T_MUL_EQUAL);
  1482. }
  1483. <ST_IN_SCRIPTING>"*\*" {
  1484. RETURN_TOKEN(T_POW);
  1485. }
  1486. <ST_IN_SCRIPTING>"*\*=" {
  1487. RETURN_TOKEN(T_POW_EQUAL);
  1488. }
  1489. <ST_IN_SCRIPTING>"/=" {
  1490. RETURN_TOKEN(T_DIV_EQUAL);
  1491. }
  1492. <ST_IN_SCRIPTING>".=" {
  1493. RETURN_TOKEN(T_CONCAT_EQUAL);
  1494. }
  1495. <ST_IN_SCRIPTING>"%=" {
  1496. RETURN_TOKEN(T_MOD_EQUAL);
  1497. }
  1498. <ST_IN_SCRIPTING>"<<=" {
  1499. RETURN_TOKEN(T_SL_EQUAL);
  1500. }
  1501. <ST_IN_SCRIPTING>">>=" {
  1502. RETURN_TOKEN(T_SR_EQUAL);
  1503. }
  1504. <ST_IN_SCRIPTING>"&=" {
  1505. RETURN_TOKEN(T_AND_EQUAL);
  1506. }
  1507. <ST_IN_SCRIPTING>"|=" {
  1508. RETURN_TOKEN(T_OR_EQUAL);
  1509. }
  1510. <ST_IN_SCRIPTING>"^=" {
  1511. RETURN_TOKEN(T_XOR_EQUAL);
  1512. }
  1513. <ST_IN_SCRIPTING>"??=" {
  1514. RETURN_TOKEN(T_COALESCE_EQUAL);
  1515. }
  1516. <ST_IN_SCRIPTING>"||" {
  1517. RETURN_TOKEN(T_BOOLEAN_OR);
  1518. }
  1519. <ST_IN_SCRIPTING>"&&" {
  1520. RETURN_TOKEN(T_BOOLEAN_AND);
  1521. }
  1522. <ST_IN_SCRIPTING>"OR" {
  1523. RETURN_TOKEN(T_LOGICAL_OR);
  1524. }
  1525. <ST_IN_SCRIPTING>"AND" {
  1526. RETURN_TOKEN(T_LOGICAL_AND);
  1527. }
  1528. <ST_IN_SCRIPTING>"XOR" {
  1529. RETURN_TOKEN(T_LOGICAL_XOR);
  1530. }
  1531. <ST_IN_SCRIPTING>"<<" {
  1532. RETURN_TOKEN(T_SL);
  1533. }
  1534. <ST_IN_SCRIPTING>">>" {
  1535. RETURN_TOKEN(T_SR);
  1536. }
  1537. <ST_IN_SCRIPTING>"]"|")" {
  1538. /* Check that ] and ) match up properly with a preceding [ or ( */
  1539. RETURN_EXIT_NESTING_TOKEN(yytext[0]);
  1540. }
  1541. <ST_IN_SCRIPTING>"["|"(" {
  1542. enter_nesting(yytext[0]);
  1543. RETURN_TOKEN(yytext[0]);
  1544. }
  1545. <ST_IN_SCRIPTING>{TOKENS} {
  1546. RETURN_TOKEN(yytext[0]);
  1547. }
  1548. <ST_IN_SCRIPTING>"{" {
  1549. yy_push_state(ST_IN_SCRIPTING);
  1550. enter_nesting('{');
  1551. RETURN_TOKEN('{');
  1552. }
  1553. <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
  1554. yy_push_state(ST_LOOKING_FOR_VARNAME);
  1555. enter_nesting('{');
  1556. RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
  1557. }
  1558. <ST_IN_SCRIPTING>"}" {
  1559. RESET_DOC_COMMENT();
  1560. if (!zend_stack_is_empty(&SCNG(state_stack))) {
  1561. yy_pop_state();
  1562. }
  1563. RETURN_EXIT_NESTING_TOKEN('}');
  1564. }
  1565. <ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
  1566. yyless(yyleng - 1);
  1567. yy_pop_state();
  1568. yy_push_state(ST_IN_SCRIPTING);
  1569. RETURN_TOKEN_WITH_STR(T_STRING_VARNAME, 0);
  1570. }
  1571. <ST_LOOKING_FOR_VARNAME>{ANY_CHAR} {
  1572. yyless(0);
  1573. yy_pop_state();
  1574. yy_push_state(ST_IN_SCRIPTING);
  1575. goto restart;
  1576. }
  1577. <ST_IN_SCRIPTING>{BNUM} {
  1578. /* The +/- 2 skips "0b" */
  1579. size_t len = yyleng - 2;
  1580. char *end, *bin = yytext + 2;
  1581. zend_bool contains_underscores;
  1582. /* Skip any leading 0s */
  1583. while (len > 0 && (*bin == '0' || *bin == '_')) {
  1584. ++bin;
  1585. --len;
  1586. }
  1587. contains_underscores = (memchr(bin, '_', len) != NULL);
  1588. if (contains_underscores) {
  1589. bin = estrndup(bin, len);
  1590. strip_underscores(bin, &len);
  1591. }
  1592. if (len < SIZEOF_ZEND_LONG * 8) {
  1593. if (len == 0) {
  1594. ZVAL_LONG(zendlval, 0);
  1595. } else {
  1596. errno = 0;
  1597. ZVAL_LONG(zendlval, ZEND_STRTOL(bin, &end, 2));
  1598. ZEND_ASSERT(!errno && end == bin + len);
  1599. }
  1600. if (contains_underscores) {
  1601. efree(bin);
  1602. }
  1603. RETURN_TOKEN_WITH_VAL(T_LNUMBER);
  1604. } else {
  1605. ZVAL_DOUBLE(zendlval, zend_bin_strtod(bin, (const char **)&end));
  1606. /* errno isn't checked since we allow HUGE_VAL/INF overflow */
  1607. ZEND_ASSERT(end == bin + len);
  1608. if (contains_underscores) {
  1609. efree(bin);
  1610. }
  1611. RETURN_TOKEN_WITH_VAL(T_DNUMBER);
  1612. }
  1613. }
  1614. <ST_IN_SCRIPTING>{LNUM} {
  1615. size_t len = yyleng;
  1616. char *end, *lnum = yytext;
  1617. zend_bool is_octal = lnum[0] == '0';
  1618. zend_bool contains_underscores = (memchr(lnum, '_', len) != NULL);
  1619. if (contains_underscores) {
  1620. lnum = estrndup(lnum, len);
  1621. strip_underscores(lnum, &len);
  1622. }
  1623. /* Digits 8 and 9 are illegal in octal literals. */
  1624. if (is_octal) {
  1625. size_t i;
  1626. for (i = 0; i < len; i++) {
  1627. if (lnum[i] == '8' || lnum[i] == '9') {
  1628. zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
  1629. if (PARSER_MODE()) {
  1630. if (contains_underscores) {
  1631. efree(lnum);
  1632. }
  1633. ZVAL_UNDEF(zendlval);
  1634. RETURN_TOKEN(T_ERROR);
  1635. }
  1636. /* Continue in order to determine if this is T_LNUMBER or T_DNUMBER. */
  1637. len = i;
  1638. break;
  1639. }
  1640. }
  1641. }
  1642. if (len < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
  1643. errno = 0;
  1644. /* base must be passed explicitly for correct parse error on Windows */
  1645. ZVAL_LONG(zendlval, ZEND_STRTOL(lnum, &end, is_octal ? 8 : 10));
  1646. ZEND_ASSERT(end == lnum + len);
  1647. } else {
  1648. errno = 0;
  1649. ZVAL_LONG(zendlval, ZEND_STRTOL(lnum, &end, 0));
  1650. if (errno == ERANGE) { /* Overflow */
  1651. errno = 0;
  1652. if (is_octal) { /* octal overflow */
  1653. ZVAL_DOUBLE(zendlval, zend_oct_strtod(lnum, (const char **)&end));
  1654. } else {
  1655. ZVAL_DOUBLE(zendlval, zend_strtod(lnum, (const char **)&end));
  1656. }
  1657. ZEND_ASSERT(end == lnum + len);
  1658. if (contains_underscores) {
  1659. efree(lnum);
  1660. }
  1661. RETURN_TOKEN_WITH_VAL(T_DNUMBER);
  1662. }
  1663. ZEND_ASSERT(end == lnum + len);
  1664. }
  1665. ZEND_ASSERT(!errno);
  1666. if (contains_underscores) {
  1667. efree(lnum);
  1668. }
  1669. RETURN_TOKEN_WITH_VAL(T_LNUMBER);
  1670. }
  1671. <ST_IN_SCRIPTING>{HNUM} {
  1672. /* The …

Large files files are truncated, but you can click here to view the full file