PageRenderTime 25ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 0ms

/hphp/parser/scanner.h

https://gitlab.com/0072016/0072016-PHP.LLC
C Header | 386 lines | 296 code | 44 blank | 46 comment | 21 complexity | fe1626e2ac5f8d9baddecd72e45057b2 MD5 | raw file
  1. /*
  2. +----------------------------------------------------------------------+
  3. | HipHop for PHP |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) 2010-2015 Facebook, Inc. (http://www.facebook.com) |
  6. +----------------------------------------------------------------------+
  7. | This source file is subject to version 3.01 of the PHP license, |
  8. | that is bundled with this package in the file LICENSE, and is |
  9. | available through the world-wide-web at the following url: |
  10. | http://www.php.net/license/3_01.txt |
  11. | If you did not receive a copy of the PHP license and are unable to |
  12. | obtain it through the world-wide-web, please send a note to |
  13. | license@php.net so we can mail you a copy immediately. |
  14. +----------------------------------------------------------------------+
  15. */
  16. #ifndef incl_HPHP_PARSER_SCANNER_H_
  17. #define incl_HPHP_PARSER_SCANNER_H_
  18. #include <sstream>
  19. #include <cstdint>
  20. #include <string>
  21. #include <limits>
  22. #include <cstdlib>
  23. #include <limits.h>
  24. #include "hphp/util/exception.h"
  25. #include "hphp/util/portability.h"
  26. #include "hphp/parser/location.h"
  27. #include "hphp/parser/hphp.tab.hpp"
  28. #ifndef YY_TYPEDEF_YY_SIZE_T
  29. #define YY_TYPEDEF_YY_SIZE_T
  30. typedef size_t yy_size_t;
  31. #endif
  32. namespace HPHP {
  33. ///////////////////////////////////////////////////////////////////////////////
  34. typedef int TokenID;
  35. class ScannerToken {
  36. public:
  37. ScannerToken() : m_num(0), m_check(false), m_id(-1) {}
  38. void reset() { m_num = 0; m_text.clear(); m_id = -1; }
  39. TokenID num() const { return m_num;}
  40. void setNum(TokenID num) {
  41. m_num = num;
  42. }
  43. void set(TokenID num, const char *t) {
  44. m_num = num;
  45. m_text = t;
  46. }
  47. void set(TokenID num, const std::string &t) {
  48. m_num = num;
  49. m_text = t;
  50. }
  51. void operator++(TokenID) {
  52. ++m_num;
  53. }
  54. ScannerToken& operator=(const ScannerToken& other) {
  55. m_num = other.m_num;
  56. m_text = other.m_text;
  57. m_id = other.m_id;
  58. return *this;
  59. }
  60. const std::string &text() const {
  61. return m_text;
  62. }
  63. bool same(const char *s) const {
  64. return strcasecmp(m_text.c_str(), s) == 0;
  65. }
  66. void setText(const char *t, int len) {
  67. m_text = std::string(t, len);
  68. }
  69. void setText(const char *t) {
  70. m_text = t;
  71. }
  72. void setText(const std::string &t) {
  73. m_text = t;
  74. }
  75. void setText(const ScannerToken &token) {
  76. m_text = token.m_text;
  77. }
  78. bool check() const {
  79. return m_check;
  80. }
  81. void setCheck() {
  82. m_check = true;
  83. }
  84. void setID(int id) {
  85. m_id = id;
  86. }
  87. int ID() {
  88. return m_id;
  89. }
  90. void xhpLabel(bool prefix = true);
  91. bool htmlTrim(); // true if non-empty after trimming
  92. void xhpDecode(); // xhp supports more entities than html
  93. protected:
  94. TokenID m_num; // internal token id
  95. std::string m_text;
  96. bool m_check;
  97. int m_id;
  98. };
  99. struct LookaheadToken {
  100. ScannerToken token;
  101. Location loc;
  102. int t;
  103. };
  104. struct LookaheadSlab {
  105. static const int SlabSize = 32;
  106. LookaheadToken m_data[SlabSize];
  107. int m_beginPos;
  108. int m_endPos;
  109. LookaheadSlab* m_next;
  110. };
  111. struct TokenStore {
  112. LookaheadSlab* m_head;
  113. LookaheadSlab* m_tail;
  114. TokenStore() {
  115. m_head = nullptr;
  116. m_tail = nullptr;
  117. }
  118. ~TokenStore() {
  119. LookaheadSlab* s = m_head;
  120. LookaheadSlab* next;
  121. while (s) {
  122. next = s->m_next;
  123. delete s;
  124. s = next;
  125. }
  126. }
  127. bool empty() {
  128. return !m_head || (m_head->m_beginPos == m_head->m_endPos);
  129. }
  130. struct iterator {
  131. LookaheadSlab* m_slab;
  132. int m_pos;
  133. const LookaheadToken& operator*() const {
  134. return m_slab->m_data[m_pos];
  135. }
  136. LookaheadToken& operator*() {
  137. return m_slab->m_data[m_pos];
  138. }
  139. const LookaheadToken* operator->() const {
  140. return m_slab->m_data + m_pos;
  141. }
  142. LookaheadToken* operator->() {
  143. return m_slab->m_data + m_pos;
  144. }
  145. void next() {
  146. if (!m_slab) return;
  147. ++m_pos;
  148. if (m_pos < m_slab->m_endPos) return;
  149. m_slab = m_slab->m_next;
  150. if (!m_slab) return;
  151. m_pos = m_slab->m_beginPos;
  152. return;
  153. }
  154. iterator& operator++() {
  155. next();
  156. return *this;
  157. }
  158. iterator operator++(int) {
  159. iterator it = *this;
  160. next();
  161. return it;
  162. }
  163. bool operator==(const iterator& it) const {
  164. if (m_slab != it.m_slab) return false;
  165. if (!m_slab) return true;
  166. return (m_pos == it.m_pos);
  167. }
  168. };
  169. iterator begin();
  170. iterator end();
  171. void popFront();
  172. iterator appendNew();
  173. };
  174. ///////////////////////////////////////////////////////////////////////////////
  175. struct TokenListener {
  176. virtual int publish(const char *rawText, int rawLeng, int type) = 0;
  177. virtual ~TokenListener() {}
  178. };
  179. class Scanner {
  180. public:
  181. enum Type {
  182. AllowShortTags = 0x01, // allow <?
  183. AllowAspTags = 0x02, // allow <% %>
  184. ReturnAllTokens = 0x04, // return comments and whitespaces
  185. AllowXHPSyntax = 0x08, // allow XHP syntax
  186. AllowHipHopSyntax = 0x18, // allow HipHop-specific syntax (which
  187. // includes XHP syntax)
  188. };
  189. public:
  190. Scanner(const std::string& filename, int type, bool md5 = false);
  191. Scanner(std::istream &stream, int type, const char *fileName = "",
  192. bool md5 = false);
  193. Scanner(const char *source, int len, int type, const char *fileName = "",
  194. bool md5 = false);
  195. void setListener(TokenListener *listener) { m_listener = listener; }
  196. ~Scanner();
  197. const std::string &getMd5() const {
  198. return m_md5;
  199. }
  200. int scanToken(ScannerToken &t, Location &l);
  201. int fetchToken(ScannerToken &t, Location &l);
  202. void nextLookahead(TokenStore::iterator& pos);
  203. bool tryParseNSType(TokenStore::iterator& pos);
  204. bool tryParseTypeList(TokenStore::iterator& pos);
  205. bool tryParseFuncTypeList(TokenStore::iterator& pos);
  206. bool tryParseNonEmptyLambdaParams(TokenStore::iterator& pos);
  207. void parseApproxParamDefVal(TokenStore::iterator& pos);
  208. /**
  209. * Called by parser or tokenizer.
  210. */
  211. int getNextToken(ScannerToken &t, Location &l);
  212. const std::string &getError() const { return m_error;}
  213. Location *getLocation() const { return m_loc;}
  214. /**
  215. * Implemented in hphp.x, as they need to call yy functions.
  216. */
  217. void init();
  218. void reset();
  219. int scan();
  220. /**
  221. * Called by lex.yy.cpp for YY_INPUT (see hphp.x)
  222. */
  223. int read(char *text, yy_size_t &result, yy_size_t max);
  224. // Overload for older versions of flex.
  225. int read(char *text, int &result, yy_size_t max);
  226. /**
  227. * Called by scanner rules.
  228. */
  229. bool shortTags() const { return (m_type & AllowShortTags) == AllowShortTags;}
  230. bool aspTags() const { return (m_type & AllowAspTags) == AllowAspTags;}
  231. bool full() const { return (m_type & ReturnAllTokens) == ReturnAllTokens;}
  232. int lastToken() const { return m_lastToken;}
  233. void setToken(const char *rawText, int rawLeng, int type = -1) {
  234. m_token->setText(rawText, rawLeng);
  235. incLoc(rawText, rawLeng, type);
  236. }
  237. void stepPos(const char *rawText, int rawLeng, int type = -1) {
  238. if (shortTags()) {
  239. m_token->setText(rawText, rawLeng);
  240. }
  241. incLoc(rawText, rawLeng, type);
  242. }
  243. void setToken(const char *rawText, int rawLeng,
  244. const char *ytext, int yleng, int type = -1) {
  245. if (full()) {
  246. m_token->setText(rawText, rawLeng);
  247. } else {
  248. m_token->setText(ytext, yleng);
  249. }
  250. incLoc(rawText, rawLeng, type);
  251. }
  252. // also used for YY_FATAL_ERROR in hphp.x
  253. void error(const char* fmt, ...) ATTRIBUTE_PRINTF(2,3);
  254. void warn(const char* fmt, ...) ATTRIBUTE_PRINTF(2,3);
  255. std::string escape(const char *str, int len, char quote_type) const;
  256. /**
  257. * Called by scanner rules for doc comments.
  258. */
  259. void setDocComment(const char *ytext, int yleng) {
  260. m_docComment.assign(ytext, yleng);
  261. }
  262. void setDocComment(const std::string& com) {
  263. m_docComment = com;
  264. }
  265. std::string detachDocComment() {
  266. std::string dc = m_docComment;
  267. m_docComment.clear();
  268. return dc;
  269. }
  270. /**
  271. * Called by scanner rules for HEREDOC/NOWDOC.
  272. */
  273. void setHeredocLabel(const char *label, int len) {
  274. m_heredocLabel.assign(label, len);
  275. }
  276. int getHeredocLabelLen() const {
  277. return m_heredocLabel.length();
  278. }
  279. const char *getHeredocLabel() const {
  280. return m_heredocLabel.data();
  281. }
  282. void resetHeredoc() {
  283. m_heredocLabel.clear();
  284. }
  285. /**
  286. * Enables HipHop syntax for this file.
  287. */
  288. void setHHFile() {
  289. m_isHHFile = 1;
  290. }
  291. bool isHHFile() const {
  292. return m_isHHFile;
  293. }
  294. bool isXHPSyntaxEnabled() const {
  295. return ((m_type & AllowXHPSyntax) == AllowXHPSyntax) || m_isHHFile;
  296. }
  297. bool isHHSyntaxEnabled() const {
  298. return ((m_type & AllowHipHopSyntax) == AllowHipHopSyntax) || m_isHHFile;
  299. }
  300. int getLookaheadLtDepth() {
  301. return m_lookaheadLtDepth;
  302. }
  303. private:
  304. bool tryParseShapeType(TokenStore::iterator& pos);
  305. bool tryParseShapeMemberList(TokenStore::iterator& pos);
  306. bool nextIfToken(TokenStore::iterator& pos, int tok);
  307. void computeMd5();
  308. std::string m_filename;
  309. bool m_streamOwner;
  310. std::istream *m_stream;
  311. std::stringstream m_sstream; // XHP helper
  312. const char *m_source;
  313. int m_len;
  314. int m_pos;
  315. std::string m_md5;
  316. enum State {
  317. Start = -1,
  318. NoLineFeed,
  319. HadLineFeed,
  320. };
  321. State m_state;
  322. int m_type;
  323. void *m_yyscanner;
  324. // These fields are used to temporarily hold pointers to token/location
  325. // storage while the lexer is active to facilitate functions such as
  326. // setToken() and incLoc()
  327. ScannerToken *m_token;
  328. Location *m_loc;
  329. std::string m_error;
  330. std::string m_docComment;
  331. std::string m_heredocLabel;
  332. // fields for XHP parsing
  333. int m_lastToken;
  334. void incLoc(const char *rawText, int rawLeng, int type);
  335. bool m_isHHFile;
  336. TokenStore m_lookahead;
  337. int m_lookaheadLtDepth;
  338. TokenListener *m_listener;
  339. };
  340. ///////////////////////////////////////////////////////////////////////////////
  341. }
  342. #endif // incl_HPHP_PARSER_SCANNER_H_