PageRenderTime 45ms CodeModel.GetById 12ms RepoModel.GetById 0ms app.codeStats 0ms

/Source/ThirdParty/TurboBadger/parser/tb_parser.cpp

https://gitlab.com/Teo-Mirror/AtomicGameEngine
C++ | 496 lines | 384 code | 62 blank | 50 comment | 152 complexity | 42cd3e113e429811d560bfde38582e04 MD5 | raw file
  1. // ================================================================================
  2. // == This file is a part of Turbo Badger. (C) 2011-2014, Emil Segerås ==
  3. // == See tb_core.h for more information. ==
  4. // ================================================================================
  5. #include "parser/tb_parser.h"
  6. #include "tb_tempbuffer.h"
  7. #include "utf8/utf8.h"
  8. #include <assert.h>
  9. #include <ctype.h>
  10. namespace tb {
  11. // == Util functions ====================================================================
  12. static bool is_hex(char c)
  13. {
  14. return ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'));
  15. }
  16. static uint32 parse_hex(char *&src, int max_count)
  17. {
  18. uint32 hex = 0;
  19. for (int i = 0; i < max_count; i++)
  20. {
  21. char c = *src;
  22. if (!is_hex(c))
  23. break;
  24. hex <<= 4;
  25. hex |= isdigit(c) ? c - '0' : tolower(c) - 'a' + 10;
  26. src++;
  27. }
  28. return hex;
  29. }
  30. void UnescapeString(char *str)
  31. {
  32. // fast forward to any escape sequence
  33. while (*str && *str != '\\')
  34. str++;
  35. char *dst = str, *src = str;
  36. while (*src)
  37. {
  38. if (*src == '\\')
  39. {
  40. bool code_found = true;
  41. switch (src[1])
  42. {
  43. case 'a': *dst = '\a'; break;
  44. case 'b': *dst = '\b'; break;
  45. case 'f': *dst = '\f'; break;
  46. case 'n': *dst = '\n'; break;
  47. case 'r': *dst = '\r'; break;
  48. case 't': *dst = '\t'; break;
  49. case 'v': *dst = '\v'; break;
  50. case '0': *dst = '\0'; break;
  51. case '\"': *dst = '\"'; break;
  52. case '\'': *dst = '\''; break;
  53. case '\\': *dst = '\\'; break;
  54. case 'x': // \xXX
  55. case 'u': // \uXXXX
  56. {
  57. // This should be safe. A utf-8 character can be at most 4 bytes,
  58. // and we have 4 bytes to use for \xXX and 6 for \uXXXX.
  59. src += 2;
  60. if (UCS4 hex = parse_hex(src, src[1] == 'x' ? 2 : 4))
  61. dst += utf8::encode(hex, dst);
  62. continue;
  63. }
  64. default:
  65. code_found = false;
  66. }
  67. if (code_found)
  68. {
  69. src += 2;
  70. dst++;
  71. continue;
  72. }
  73. }
  74. *dst = *src;
  75. dst++;
  76. src++;
  77. }
  78. *dst = 0;
  79. }
  80. bool is_white_space(const char *str)
  81. {
  82. switch (*str)
  83. {
  84. case ' ':
  85. case '\t':
  86. return true;
  87. default:
  88. return false;
  89. }
  90. }
  91. /** Return true if the given string starts with a color.
  92. Ex: #ffdd00, #fd0 */
  93. bool is_start_of_color(const char *str)
  94. {
  95. if (*str++ != '#')
  96. return false;
  97. int digit_count = 0;
  98. while (is_hex(*str))
  99. {
  100. str++;
  101. digit_count++;
  102. }
  103. return digit_count == 8 || digit_count == 6 || digit_count == 4 || digit_count == 3;
  104. }
  105. /** Return true if the given string may be a node reference, such
  106. as language strings or TBNodeRefTree references. */
  107. bool is_start_of_reference(const char *str)
  108. {
  109. if (*str++ != '@')
  110. return false;
  111. while (*str && *str != ' ')
  112. {
  113. // If the token ends with colon, it's not a value but a key.
  114. if (*str == ':')
  115. return false;
  116. str++;
  117. }
  118. return true;
  119. }
  120. /** Check if the line is a comment or empty space. If it is, consume the leading
  121. whitespace from line. */
  122. bool is_space_or_comment(char *&line)
  123. {
  124. char *tmp = line;
  125. while (is_white_space(tmp))
  126. tmp++;
  127. if (*tmp == '#' || *tmp == 0)
  128. {
  129. line = tmp;
  130. return true;
  131. }
  132. return false;
  133. }
  134. bool is_pending_multiline(const char *str)
  135. {
  136. while (is_white_space(str))
  137. str++;
  138. return str[0] == '\\' && str[1] == 0;
  139. }
  140. bool IsEndQuote(const char *buf_start, const char *buf, const char quote_type)
  141. {
  142. if (*buf != quote_type)
  143. return false;
  144. int num_backslashes = 0;
  145. while (buf_start < buf && *(buf-- - 1) == '\\')
  146. num_backslashes++;
  147. return !(num_backslashes & 1);
  148. }
  149. // == Parser ============================================================================
  150. TBParser::STATUS TBParser::Read(TBParserStream *stream, TBParserTarget *target)
  151. {
  152. TBTempBuffer line, work;
  153. if (!line.Reserve(1024) || !work.Reserve(1024))
  154. return STATUS_OUT_OF_MEMORY;
  155. current_indent = 0;
  156. current_line_nr = 1;
  157. pending_multiline = false;
  158. multi_line_sub_level = 0;
  159. while (int read_len = stream->GetMoreData((char *)work.GetData(), work.GetCapacity()))
  160. {
  161. char *buf = work.GetData();
  162. // Skip BOM (BYTE ORDER MARK) character, often in the beginning of UTF-8 documents.
  163. if (current_line_nr == 1 && read_len > 3 &&
  164. (uint8)buf[0] == 239 &&
  165. (uint8)buf[1] == 187 &&
  166. (uint8)buf[2] == 191)
  167. {
  168. read_len -= 3;
  169. buf += 3;
  170. }
  171. int line_pos = 0;
  172. while (true)
  173. {
  174. // Find line end
  175. int line_start = line_pos;
  176. while (line_pos < read_len && buf[line_pos] != '\n')
  177. line_pos++;
  178. if (line_pos < read_len)
  179. {
  180. // We have a line
  181. // Skip preceding \r (if we have one)
  182. int line_len = line_pos - line_start;
  183. if (!line.Append(buf + line_start, line_len))
  184. return STATUS_OUT_OF_MEMORY;
  185. // Strip away trailing '\r' if the line has it
  186. char *linebuf = line.GetData();
  187. int linebuf_len = line.GetAppendPos();
  188. if (linebuf_len > 0 && linebuf[linebuf_len - 1] == '\r')
  189. linebuf[linebuf_len - 1] = 0;
  190. // Terminate the line string
  191. if (!line.Append("", 1))
  192. return STATUS_OUT_OF_MEMORY;
  193. // Handle line
  194. OnLine(line.GetData(), target);
  195. current_line_nr++;
  196. line.ResetAppendPos();
  197. line_pos++; // Skip this \n
  198. // Find next line
  199. continue;
  200. }
  201. // No more lines here so push the rest and break for more data
  202. if (!line.Append(buf + line_start, read_len - line_start))
  203. return STATUS_OUT_OF_MEMORY;
  204. break;
  205. }
  206. }
  207. if (line.GetAppendPos())
  208. {
  209. if (!line.Append("", 1))
  210. return STATUS_OUT_OF_MEMORY;
  211. OnLine(line.GetData(), target);
  212. current_line_nr++;
  213. }
  214. return STATUS_OK;
  215. }
  216. void TBParser::OnLine(char *line, TBParserTarget *target)
  217. {
  218. if (is_space_or_comment(line))
  219. {
  220. if (*line == '#')
  221. target->OnComment(current_line_nr, line + 1);
  222. return;
  223. }
  224. if (pending_multiline)
  225. {
  226. OnMultiline(line, target);
  227. return;
  228. }
  229. int i = 0;
  230. int tabs = 0;
  231. int spaces = 0;
  232. while (line[i] != 0)
  233. {
  234. if (line[i] == '\t')
  235. tabs++;
  236. else if (line[i] == ' ')
  237. spaces++;
  238. else
  239. break;
  240. i++;
  241. }
  242. if (spaces && indent_spaces == -1)
  243. {
  244. indent_spaces = spaces;
  245. }
  246. if ((tabs || indent_tabs) && spaces)
  247. {
  248. target->OnError(current_line_nr, "Indentation error. Mixed tabs and spaces (Line skipped)");
  249. return;
  250. }
  251. // Check indent
  252. int indent = 0;
  253. if (tabs)
  254. {
  255. indent_tabs = true;
  256. indent += tabs;
  257. line += tabs;
  258. }
  259. else
  260. {
  261. i = 0;
  262. int c = 0;
  263. while (line[i] == ' ' && line[i] != 0)
  264. {
  265. c++;
  266. i++;
  267. if (indent_spaces == c)
  268. {
  269. c = 0;
  270. indent++;
  271. }
  272. }
  273. line += i;
  274. }
  275. if (indent - current_indent > 1)
  276. {
  277. target->OnError(current_line_nr, "Indentation error. (Line skipped)");
  278. return;
  279. }
  280. if (indent > current_indent)
  281. {
  282. // FIX: Report indentation error if more than 1 higher!
  283. assert(indent - current_indent == 1);
  284. target->Enter();
  285. current_indent++;
  286. }
  287. else if (indent < current_indent)
  288. {
  289. while (indent < current_indent)
  290. {
  291. target->Leave();
  292. current_indent--;
  293. }
  294. }
  295. if (*line == 0)
  296. return;
  297. else
  298. {
  299. char *token = line;
  300. // Read line while consuming it and copy over to token buf
  301. while (!is_white_space(line) && *line != 0)
  302. line++;
  303. int token_len = line - token;
  304. // Consume any white space after the token
  305. while (is_white_space(line))
  306. line++;
  307. bool is_compact_line = token_len && token[token_len - 1] == ':';
  308. TBValue value;
  309. if (is_compact_line)
  310. {
  311. token_len--;
  312. token[token_len] = 0;
  313. // Check if the first argument is not a child but the value for this token
  314. if (*line == '[' || *line == '\"' || *line == '\'' ||
  315. is_start_of_number(line) ||
  316. is_start_of_color(line) ||
  317. is_start_of_reference(line))
  318. {
  319. ConsumeValue(value, line);
  320. if (pending_multiline)
  321. {
  322. // The value wrapped to the next line, so we should remember the token and continue.
  323. multi_line_token.Set(token);
  324. return;
  325. }
  326. }
  327. }
  328. else if (token[token_len])
  329. {
  330. token[token_len] = 0;
  331. UnescapeString(line);
  332. value.SetFromStringAuto(line, TBValue::SET_AS_STATIC);
  333. }
  334. target->OnToken(current_line_nr, token, value);
  335. if (is_compact_line)
  336. OnCompactLine(line, target);
  337. }
  338. }
  339. void TBParser::OnCompactLine(char *line, TBParserTarget *target)
  340. {
  341. target->Enter();
  342. while (*line)
  343. {
  344. // consume any whitespace
  345. while (is_white_space(line))
  346. line++;
  347. // Find token
  348. char *token = line;
  349. while (*line != ':' && *line != 0)
  350. line++;
  351. if (!*line)
  352. break; // Syntax error, expected token
  353. *line++ = 0;
  354. // consume any whitespace
  355. while (is_white_space(line))
  356. line++;
  357. TBValue v;
  358. ConsumeValue(v, line);
  359. if (pending_multiline)
  360. {
  361. // The value wrapped to the next line, so we should remember the token and continue.
  362. multi_line_token.Set(token);
  363. // Since we need to call target->Leave when the multiline is ready, set multi_line_sub_level.
  364. multi_line_sub_level = 1;
  365. return;
  366. }
  367. // Ready
  368. target->OnToken(current_line_nr, token, v);
  369. }
  370. target->Leave();
  371. }
  372. void TBParser::OnMultiline(char *line, TBParserTarget *target)
  373. {
  374. // consume any whitespace
  375. while (is_white_space(line))
  376. line++;
  377. TBValue value;
  378. ConsumeValue(value, line);
  379. if (!pending_multiline)
  380. {
  381. // Ready with all lines
  382. value.SetString(multi_line_value.GetData(), TBValue::SET_AS_STATIC);
  383. target->OnToken(current_line_nr, multi_line_token, value);
  384. if (multi_line_sub_level)
  385. target->Leave();
  386. // Reset
  387. multi_line_value.SetAppendPos(0);
  388. multi_line_sub_level = 0;
  389. }
  390. }
  391. void TBParser::ConsumeValue(TBValue &dst_value, char *&line)
  392. {
  393. // Find value (As quoted string, or as auto)
  394. char *value = line;
  395. if (*line == '\"' || *line == '\'')
  396. {
  397. const char quote_type = *line;
  398. // Consume starting quote
  399. line++;
  400. value++;
  401. // Find ending quote or end
  402. while (!IsEndQuote(value, line, quote_type) && *line != 0)
  403. line++;
  404. // Terminate away the quote
  405. if (*line == quote_type)
  406. *line++ = 0;
  407. // consume any whitespace
  408. while (is_white_space(line))
  409. line++;
  410. // consume any comma
  411. if (*line == ',')
  412. line++;
  413. UnescapeString(value);
  414. dst_value.SetString(value, TBValue::SET_AS_STATIC);
  415. }
  416. else
  417. {
  418. // Find next comma or end
  419. while (*line != ',' && *line != 0)
  420. line++;
  421. // Terminate away the comma
  422. if (*line == ',')
  423. *line++ = 0;
  424. UnescapeString(value);
  425. dst_value.SetFromStringAuto(value, TBValue::SET_AS_STATIC);
  426. }
  427. // Check if we still have pending value data on the following line and set pending_multiline.
  428. bool continuing_multiline = pending_multiline;
  429. pending_multiline = is_pending_multiline(line);
  430. // Append the multi line value to the buffer.
  431. if (continuing_multiline || pending_multiline)
  432. multi_line_value.AppendString(dst_value.GetString());
  433. }
  434. }; // namespace tb