PageRenderTime 52ms CodeModel.GetById 25ms RepoModel.GetById 0ms app.codeStats 0ms

/ext/tokenizer/tokenizer.c

http://php52-backports.googlecode.com/
C | 259 lines | 146 code | 35 blank | 78 comment | 21 complexity | e9a66236ce780f5826af89c98fc48c18 MD5 | raw file
Possible License(s): MPL-2.0-no-copyleft-exception, LGPL-2.1, BSD-3-Clause
  1. /*
  2. +----------------------------------------------------------------------+
  3. | PHP Version 5 |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) 1997-2010 The PHP Group |
  6. +----------------------------------------------------------------------+
  7. | This source file is subject to version 3.01 of the PHP license, |
  8. | that is bundled with this package in the file LICENSE, and is |
  9. | available through the world-wide-web at the following url: |
  10. | http://www.php.net/license/3_01.txt |
  11. | If you did not receive a copy of the PHP license and are unable to |
  12. | obtain it through the world-wide-web, please send a note to |
  13. | license@php.net so we can mail you a copy immediately. |
  14. +----------------------------------------------------------------------+
  15. | Author: Andrei Zmievski <andrei@php.net> |
  16. +----------------------------------------------------------------------+
  17. */
  18. /* $Id: tokenizer.c 293036 2010-01-03 09:23:27Z sebastian $ */
  19. #ifdef HAVE_CONFIG_H
  20. #include "config.h"
  21. #endif
  22. #include "php.h"
  23. #include "php_ini.h"
  24. #include "ext/standard/info.h"
  25. #include "php_tokenizer.h"
  26. typedef struct yy_buffer_state *YY_BUFFER_STATE;
  27. typedef unsigned int yy_size_t;
  28. struct yy_buffer_state
  29. {
  30. FILE *yy_input_file;
  31. char *yy_ch_buf; /* input buffer */
  32. char *yy_buf_pos; /* current position in input buffer */
  33. /* Size of input buffer in bytes, not including room for EOB
  34. * characters.
  35. */
  36. yy_size_t yy_buf_size;
  37. /* Number of characters read into yy_ch_buf, not including EOB
  38. * characters.
  39. */
  40. int yy_n_chars;
  41. /* Whether we "own" the buffer - i.e., we know we created it,
  42. * and can realloc() it to grow it, and should free() it to
  43. * delete it.
  44. */
  45. int yy_is_our_buffer;
  46. /* Whether this is an "interactive" input source; if so, and
  47. * if we're using stdio for input, then we want to use getc()
  48. * instead of fread(), to make sure we stop fetching input after
  49. * each newline.
  50. */
  51. int yy_is_interactive;
  52. /* Whether we're considered to be at the beginning of a line.
  53. * If so, '^' rules will be active on the next match, otherwise
  54. * not.
  55. */
  56. int yy_at_bol;
  57. /* Whether to try to fill the input buffer when we reach the
  58. * end of it.
  59. */
  60. int yy_fill_buffer;
  61. int yy_buffer_status;
  62. #define YY_BUFFER_NEW 0
  63. #define YY_BUFFER_NORMAL 1
  64. /* When an EOF's been seen but there's still some text to process
  65. * then we mark the buffer as YY_EOF_PENDING, to indicate that we
  66. * shouldn't try reading from the input source any more. We might
  67. * still have a bunch of tokens to match, though, because of
  68. * possible backing-up.
  69. *
  70. * When we actually see the EOF, we change the status to "new"
  71. * (via yyrestart()), so that the user can continue scanning by
  72. * just pointing yyin at a new input file.
  73. */
  74. #define YY_BUFFER_EOF_PENDING 2
  75. };
  76. #include "zend.h"
  77. #include "zend_language_scanner.h"
  78. #include <zend_language_parser.h>
  79. #define zendtext LANG_SCNG(yy_text)
  80. #define zendleng LANG_SCNG(yy_leng)
  81. /* {{{ tokenizer_functions[]
  82. *
  83. * Every user visible function must have an entry in tokenizer_functions[].
  84. */
  85. zend_function_entry tokenizer_functions[] = {
  86. PHP_FE(token_get_all, NULL)
  87. PHP_FE(token_name, NULL)
  88. {NULL, NULL, NULL} /* Must be the last line in tokenizer_functions[] */
  89. };
  90. /* }}} */
  91. /* {{{ tokenizer_module_entry
  92. */
  93. zend_module_entry tokenizer_module_entry = {
  94. #if ZEND_MODULE_API_NO >= 20010901
  95. STANDARD_MODULE_HEADER,
  96. #endif
  97. "tokenizer",
  98. tokenizer_functions,
  99. PHP_MINIT(tokenizer),
  100. NULL,
  101. NULL,
  102. NULL,
  103. PHP_MINFO(tokenizer),
  104. #if ZEND_MODULE_API_NO >= 20010901
  105. "0.1", /* Replace with version number for your extension */
  106. #endif
  107. STANDARD_MODULE_PROPERTIES
  108. };
  109. /* }}} */
  110. #ifdef COMPILE_DL_TOKENIZER
  111. ZEND_GET_MODULE(tokenizer)
  112. #endif
  113. /* {{{ PHP_MINIT_FUNCTION
  114. */
  115. PHP_MINIT_FUNCTION(tokenizer)
  116. {
  117. tokenizer_register_constants(INIT_FUNC_ARGS_PASSTHRU);
  118. return SUCCESS;
  119. }
  120. /* }}} */
  121. /* {{{ PHP_MINFO_FUNCTION
  122. */
  123. PHP_MINFO_FUNCTION(tokenizer)
  124. {
  125. php_info_print_table_start();
  126. php_info_print_table_row(2, "Tokenizer Support", "enabled");
  127. php_info_print_table_end();
  128. }
  129. /* }}} */
  130. static void tokenize(zval *return_value TSRMLS_DC)
  131. {
  132. zval token;
  133. zval *keyword;
  134. int token_type;
  135. zend_bool destroy;
  136. int token_line = 1;
  137. array_init(return_value);
  138. ZVAL_NULL(&token);
  139. while ((token_type = lex_scan(&token TSRMLS_CC))) {
  140. destroy = 1;
  141. switch (token_type) {
  142. case T_CLOSE_TAG:
  143. if (zendtext[zendleng - 1] != '>') {
  144. CG(zend_lineno)++;
  145. }
  146. case T_OPEN_TAG:
  147. case T_OPEN_TAG_WITH_ECHO:
  148. case T_WHITESPACE:
  149. case T_COMMENT:
  150. case T_DOC_COMMENT:
  151. destroy = 0;
  152. break;
  153. }
  154. if (token_type >= 256) {
  155. MAKE_STD_ZVAL(keyword);
  156. array_init(keyword);
  157. add_next_index_long(keyword, token_type);
  158. if (token_type == T_END_HEREDOC) {
  159. if (CG(increment_lineno)) {
  160. token_line = ++CG(zend_lineno);
  161. CG(increment_lineno) = 0;
  162. }
  163. add_next_index_stringl(keyword, Z_STRVAL(token), Z_STRLEN(token), 1);
  164. efree(Z_STRVAL(token));
  165. } else {
  166. add_next_index_stringl(keyword, zendtext, zendleng, 1);
  167. }
  168. add_next_index_long(keyword, token_line);
  169. add_next_index_zval(return_value, keyword);
  170. } else {
  171. add_next_index_stringl(return_value, zendtext, zendleng, 1);
  172. }
  173. if (destroy && Z_TYPE(token) != IS_NULL) {
  174. zval_dtor(&token);
  175. }
  176. ZVAL_NULL(&token);
  177. token_line = CG(zend_lineno);
  178. if (token_type == T_HALT_COMPILER) {
  179. break;
  180. }
  181. }
  182. }
  183. /* {{{ proto array token_get_all(string source)
  184. */
  185. PHP_FUNCTION(token_get_all)
  186. {
  187. char *source = NULL;
  188. int argc = ZEND_NUM_ARGS();
  189. int source_len;
  190. zval source_z;
  191. zend_lex_state original_lex_state;
  192. if (zend_parse_parameters(argc TSRMLS_CC, "s", &source, &source_len) == FAILURE)
  193. return;
  194. ZVAL_STRINGL(&source_z, source, source_len, 1);
  195. zend_save_lexical_state(&original_lex_state TSRMLS_CC);
  196. if (zend_prepare_string_for_scanning(&source_z, "" TSRMLS_CC) == FAILURE) {
  197. RETURN_EMPTY_STRING();
  198. }
  199. LANG_SCNG(start) = 1;
  200. tokenize(return_value TSRMLS_CC);
  201. zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
  202. zval_dtor(&source_z);
  203. }
  204. /* }}} */
  205. /* {{{ proto string token_name(int type)
  206. */
  207. PHP_FUNCTION(token_name)
  208. {
  209. int argc = ZEND_NUM_ARGS();
  210. long type;
  211. if (zend_parse_parameters(argc TSRMLS_CC, "l", &type) == FAILURE) {
  212. return;
  213. }
  214. RETVAL_STRING(get_token_type_name(type), 1);
  215. }
  216. /* }}} */
  217. /*
  218. * Local variables:
  219. * tab-width: 4
  220. * c-basic-offset: 4
  221. * End:
  222. * vim600: noet sw=4 ts=4 fdm=marker
  223. * vim<600: noet sw=4 ts=4
  224. */