PageRenderTime 278ms CodeModel.GetById 34ms RepoModel.GetById 0ms app.codeStats 1ms

/src/libyaml/scanner.c

https://code.google.com/
C | 2455 lines | 1043 code | 552 blank | 860 comment | 247 complexity | c13f0124b3e2df97f153328bd89f1a6e MD5 | raw file
Possible License(s): GPL-2.0
  1. /*
  2. * Introduction
  3. * ************
  4. *
  5. * The following notes assume that you are familiar with the YAML specification
  6. * (http://yaml.org/spec/cvs/current.html). We mostly follow it, although in
  7. * some cases we are less restrictive that it requires.
  8. *
  9. * The process of transforming a YAML stream into a sequence of events is
  10. * divided on two steps: Scanning and Parsing.
  11. *
  12. * The Scanner transforms the input stream into a sequence of tokens, while the
  13. * parser transform the sequence of tokens produced by the Scanner into a
  14. * sequence of parsing events.
  15. *
  16. * The Scanner is rather clever and complicated. The Parser, on the contrary,
  17. * is a straightforward implementation of a recursive-descendant parser (or,
  18. * LL(1) parser, as it is usually called).
  19. *
  20. * Actually there are two issues of Scanning that might be called "clever", the
  21. * rest is quite straightforward. The issues are "block collection start" and
  22. * "simple keys". Both issues are explained below in details.
  23. *
  24. * Here the Scanning step is explained and implemented. We start with the list
  25. * of all the tokens produced by the Scanner together with short descriptions.
  26. *
  27. * Now, tokens:
  28. *
  29. * STREAM-START(encoding) # The stream start.
  30. * STREAM-END # The stream end.
  31. * VERSION-DIRECTIVE(major,minor) # The '%YAML' directive.
  32. * TAG-DIRECTIVE(handle,prefix) # The '%TAG' directive.
  33. * DOCUMENT-START # '---'
  34. * DOCUMENT-END # '...'
  35. * BLOCK-SEQUENCE-START # Indentation increase denoting a block
  36. * BLOCK-MAPPING-START # sequence or a block mapping.
  37. * BLOCK-END # Indentation decrease.
  38. * FLOW-SEQUENCE-START # '['
  39. * FLOW-SEQUENCE-END # ']'
  40. * BLOCK-SEQUENCE-START # '{'
  41. * BLOCK-SEQUENCE-END # '}'
  42. * BLOCK-ENTRY # '-'
  43. * FLOW-ENTRY # ','
  44. * KEY # '?' or nothing (simple keys).
  45. * VALUE # ':'
  46. * ALIAS(anchor) # '*anchor'
  47. * ANCHOR(anchor) # '&anchor'
  48. * TAG(handle,suffix) # '!handle!suffix'
  49. * SCALAR(value,style) # A scalar.
  50. *
  51. * The following two tokens are "virtual" tokens denoting the beginning and the
  52. * end of the stream:
  53. *
  54. * STREAM-START(encoding)
  55. * STREAM-END
  56. *
  57. * We pass the information about the input stream encoding with the
  58. * STREAM-START token.
  59. *
  60. * The next two tokens are responsible for tags:
  61. *
  62. * VERSION-DIRECTIVE(major,minor)
  63. * TAG-DIRECTIVE(handle,prefix)
  64. *
  65. * Example:
  66. *
  67. * %YAML 1.1
  68. * %TAG ! !foo
  69. * %TAG !yaml! tag:yaml.org,2002:
  70. * ---
  71. *
  72. * The correspoding sequence of tokens:
  73. *
  74. * STREAM-START(utf-8)
  75. * VERSION-DIRECTIVE(1,1)
  76. * TAG-DIRECTIVE("!","!foo")
  77. * TAG-DIRECTIVE("!yaml","tag:yaml.org,2002:")
  78. * DOCUMENT-START
  79. * STREAM-END
  80. *
  81. * Note that the VERSION-DIRECTIVE and TAG-DIRECTIVE tokens occupy a whole
  82. * line.
  83. *
  84. * The document start and end indicators are represented by:
  85. *
  86. * DOCUMENT-START
  87. * DOCUMENT-END
  88. *
  89. * Note that if a YAML stream contains an implicit document (without '---'
  90. * and '...' indicators), no DOCUMENT-START and DOCUMENT-END tokens will be
  91. * produced.
  92. *
  93. * In the following examples, we present whole documents together with the
  94. * produced tokens.
  95. *
  96. * 1. An implicit document:
  97. *
  98. * 'a scalar'
  99. *
  100. * Tokens:
  101. *
  102. * STREAM-START(utf-8)
  103. * SCALAR("a scalar",single-quoted)
  104. * STREAM-END
  105. *
  106. * 2. An explicit document:
  107. *
  108. * ---
  109. * 'a scalar'
  110. * ...
  111. *
  112. * Tokens:
  113. *
  114. * STREAM-START(utf-8)
  115. * DOCUMENT-START
  116. * SCALAR("a scalar",single-quoted)
  117. * DOCUMENT-END
  118. * STREAM-END
  119. *
  120. * 3. Several documents in a stream:
  121. *
  122. * 'a scalar'
  123. * ---
  124. * 'another scalar'
  125. * ---
  126. * 'yet another scalar'
  127. *
  128. * Tokens:
  129. *
  130. * STREAM-START(utf-8)
  131. * SCALAR("a scalar",single-quoted)
  132. * DOCUMENT-START
  133. * SCALAR("another scalar",single-quoted)
  134. * DOCUMENT-START
  135. * SCALAR("yet another scalar",single-quoted)
  136. * STREAM-END
  137. *
  138. * We have already introduced the SCALAR token above. The following tokens are
  139. * used to describe aliases, anchors, tag, and scalars:
  140. *
  141. * ALIAS(anchor)
  142. * ANCHOR(anchor)
  143. * TAG(handle,suffix)
  144. * SCALAR(value,style)
  145. *
  146. * The following series of examples illustrate the usage of these tokens:
  147. *
  148. * 1. A recursive sequence:
  149. *
  150. * &A [ *A ]
  151. *
  152. * Tokens:
  153. *
  154. * STREAM-START(utf-8)
  155. * ANCHOR("A")
  156. * FLOW-SEQUENCE-START
  157. * ALIAS("A")
  158. * FLOW-SEQUENCE-END
  159. * STREAM-END
  160. *
  161. * 2. A tagged scalar:
  162. *
  163. * !!float "3.14" # A good approximation.
  164. *
  165. * Tokens:
  166. *
  167. * STREAM-START(utf-8)
  168. * TAG("!!","float")
  169. * SCALAR("3.14",double-quoted)
  170. * STREAM-END
  171. *
  172. * 3. Various scalar styles:
  173. *
  174. * --- # Implicit empty plain scalars do not produce tokens.
  175. * --- a plain scalar
  176. * --- 'a single-quoted scalar'
  177. * --- "a double-quoted scalar"
  178. * --- |-
  179. * a literal scalar
  180. * --- >-
  181. * a folded
  182. * scalar
  183. *
  184. * Tokens:
  185. *
  186. * STREAM-START(utf-8)
  187. * DOCUMENT-START
  188. * DOCUMENT-START
  189. * SCALAR("a plain scalar",plain)
  190. * DOCUMENT-START
  191. * SCALAR("a single-quoted scalar",single-quoted)
  192. * DOCUMENT-START
  193. * SCALAR("a double-quoted scalar",double-quoted)
  194. * DOCUMENT-START
  195. * SCALAR("a literal scalar",literal)
  196. * DOCUMENT-START
  197. * SCALAR("a folded scalar",folded)
  198. * STREAM-END
  199. *
  200. * Now it's time to review collection-related tokens. We will start with
  201. * flow collections:
  202. *
  203. * FLOW-SEQUENCE-START
  204. * FLOW-SEQUENCE-END
  205. * FLOW-MAPPING-START
  206. * FLOW-MAPPING-END
  207. * FLOW-ENTRY
  208. * KEY
  209. * VALUE
  210. *
  211. * The tokens FLOW-SEQUENCE-START, FLOW-SEQUENCE-END, FLOW-MAPPING-START, and
  212. * FLOW-MAPPING-END represent the indicators '[', ']', '{', and '}'
  213. * correspondingly. FLOW-ENTRY represent the ',' indicator. Finally the
  214. * indicators '?' and ':', which are used for denoting mapping keys and values,
  215. * are represented by the KEY and VALUE tokens.
  216. *
  217. * The following examples show flow collections:
  218. *
  219. * 1. A flow sequence:
  220. *
  221. * [item 1, item 2, item 3]
  222. *
  223. * Tokens:
  224. *
  225. * STREAM-START(utf-8)
  226. * FLOW-SEQUENCE-START
  227. * SCALAR("item 1",plain)
  228. * FLOW-ENTRY
  229. * SCALAR("item 2",plain)
  230. * FLOW-ENTRY
  231. * SCALAR("item 3",plain)
  232. * FLOW-SEQUENCE-END
  233. * STREAM-END
  234. *
  235. * 2. A flow mapping:
  236. *
  237. * {
  238. * a simple key: a value, # Note that the KEY token is produced.
  239. * ? a complex key: another value,
  240. * }
  241. *
  242. * Tokens:
  243. *
  244. * STREAM-START(utf-8)
  245. * FLOW-MAPPING-START
  246. * KEY
  247. * SCALAR("a simple key",plain)
  248. * VALUE
  249. * SCALAR("a value",plain)
  250. * FLOW-ENTRY
  251. * KEY
  252. * SCALAR("a complex key",plain)
  253. * VALUE
  254. * SCALAR("another value",plain)
  255. * FLOW-ENTRY
  256. * FLOW-MAPPING-END
  257. * STREAM-END
  258. *
  259. * A simple key is a key which is not denoted by the '?' indicator. Note that
  260. * the Scanner still produce the KEY token whenever it encounters a simple key.
  261. *
  262. * For scanning block collections, the following tokens are used (note that we
  263. * repeat KEY and VALUE here):
  264. *
  265. * BLOCK-SEQUENCE-START
  266. * BLOCK-MAPPING-START
  267. * BLOCK-END
  268. * BLOCK-ENTRY
  269. * KEY
  270. * VALUE
  271. *
  272. * The tokens BLOCK-SEQUENCE-START and BLOCK-MAPPING-START denote indentation
  273. * increase that precedes a block collection (cf. the INDENT token in Python).
  274. * The token BLOCK-END denote indentation decrease that ends a block collection
  275. * (cf. the DEDENT token in Python). However YAML has some syntax pecularities
  276. * that makes detections of these tokens more complex.
  277. *
  278. * The tokens BLOCK-ENTRY, KEY, and VALUE are used to represent the indicators
  279. * '-', '?', and ':' correspondingly.
  280. *
  281. * The following examples show how the tokens BLOCK-SEQUENCE-START,
  282. * BLOCK-MAPPING-START, and BLOCK-END are emitted by the Scanner:
  283. *
  284. * 1. Block sequences:
  285. *
  286. * - item 1
  287. * - item 2
  288. * -
  289. * - item 3.1
  290. * - item 3.2
  291. * -
  292. * key 1: value 1
  293. * key 2: value 2
  294. *
  295. * Tokens:
  296. *
  297. * STREAM-START(utf-8)
  298. * BLOCK-SEQUENCE-START
  299. * BLOCK-ENTRY
  300. * SCALAR("item 1",plain)
  301. * BLOCK-ENTRY
  302. * SCALAR("item 2",plain)
  303. * BLOCK-ENTRY
  304. * BLOCK-SEQUENCE-START
  305. * BLOCK-ENTRY
  306. * SCALAR("item 3.1",plain)
  307. * BLOCK-ENTRY
  308. * SCALAR("item 3.2",plain)
  309. * BLOCK-END
  310. * BLOCK-ENTRY
  311. * BLOCK-MAPPING-START
  312. * KEY
  313. * SCALAR("key 1",plain)
  314. * VALUE
  315. * SCALAR("value 1",plain)
  316. * KEY
  317. * SCALAR("key 2",plain)
  318. * VALUE
  319. * SCALAR("value 2",plain)
  320. * BLOCK-END
  321. * BLOCK-END
  322. * STREAM-END
  323. *
  324. * 2. Block mappings:
  325. *
  326. * a simple key: a value # The KEY token is produced here.
  327. * ? a complex key
  328. * : another value
  329. * a mapping:
  330. * key 1: value 1
  331. * key 2: value 2
  332. * a sequence:
  333. * - item 1
  334. * - item 2
  335. *
  336. * Tokens:
  337. *
  338. * STREAM-START(utf-8)
  339. * BLOCK-MAPPING-START
  340. * KEY
  341. * SCALAR("a simple key",plain)
  342. * VALUE
  343. * SCALAR("a value",plain)
  344. * KEY
  345. * SCALAR("a complex key",plain)
  346. * VALUE
  347. * SCALAR("another value",plain)
  348. * KEY
  349. * SCALAR("a mapping",plain)
  350. * BLOCK-MAPPING-START
  351. * KEY
  352. * SCALAR("key 1",plain)
  353. * VALUE
  354. * SCALAR("value 1",plain)
  355. * KEY
  356. * SCALAR("key 2",plain)
  357. * VALUE
  358. * SCALAR("value 2",plain)
  359. * BLOCK-END
  360. * KEY
  361. * SCALAR("a sequence",plain)
  362. * VALUE
  363. * BLOCK-SEQUENCE-START
  364. * BLOCK-ENTRY
  365. * SCALAR("item 1",plain)
  366. * BLOCK-ENTRY
  367. * SCALAR("item 2",plain)
  368. * BLOCK-END
  369. * BLOCK-END
  370. * STREAM-END
  371. *
  372. * YAML does not always require to start a new block collection from a new
  373. * line. If the current line contains only '-', '?', and ':' indicators, a new
  374. * block collection may start at the current line. The following examples
  375. * illustrate this case:
  376. *
  377. * 1. Collections in a sequence:
  378. *
  379. * - - item 1
  380. * - item 2
  381. * - key 1: value 1
  382. * key 2: value 2
  383. * - ? complex key
  384. * : complex value
  385. *
  386. * Tokens:
  387. *
  388. * STREAM-START(utf-8)
  389. * BLOCK-SEQUENCE-START
  390. * BLOCK-ENTRY
  391. * BLOCK-SEQUENCE-START
  392. * BLOCK-ENTRY
  393. * SCALAR("item 1",plain)
  394. * BLOCK-ENTRY
  395. * SCALAR("item 2",plain)
  396. * BLOCK-END
  397. * BLOCK-ENTRY
  398. * BLOCK-MAPPING-START
  399. * KEY
  400. * SCALAR("key 1",plain)
  401. * VALUE
  402. * SCALAR("value 1",plain)
  403. * KEY
  404. * SCALAR("key 2",plain)
  405. * VALUE
  406. * SCALAR("value 2",plain)
  407. * BLOCK-END
  408. * BLOCK-ENTRY
  409. * BLOCK-MAPPING-START
  410. * KEY
  411. * SCALAR("complex key")
  412. * VALUE
  413. * SCALAR("complex value")
  414. * BLOCK-END
  415. * BLOCK-END
  416. * STREAM-END
  417. *
  418. * 2. Collections in a mapping:
  419. *
  420. * ? a sequence
  421. * : - item 1
  422. * - item 2
  423. * ? a mapping
  424. * : key 1: value 1
  425. * key 2: value 2
  426. *
  427. * Tokens:
  428. *
  429. * STREAM-START(utf-8)
  430. * BLOCK-MAPPING-START
  431. * KEY
  432. * SCALAR("a sequence",plain)
  433. * VALUE
  434. * BLOCK-SEQUENCE-START
  435. * BLOCK-ENTRY
  436. * SCALAR("item 1",plain)
  437. * BLOCK-ENTRY
  438. * SCALAR("item 2",plain)
  439. * BLOCK-END
  440. * KEY
  441. * SCALAR("a mapping",plain)
  442. * VALUE
  443. * BLOCK-MAPPING-START
  444. * KEY
  445. * SCALAR("key 1",plain)
  446. * VALUE
  447. * SCALAR("value 1",plain)
  448. * KEY
  449. * SCALAR("key 2",plain)
  450. * VALUE
  451. * SCALAR("value 2",plain)
  452. * BLOCK-END
  453. * BLOCK-END
  454. * STREAM-END
  455. *
  456. * YAML also permits non-indented sequences if they are included into a block
  457. * mapping. In this case, the token BLOCK-SEQUENCE-START is not produced:
  458. *
  459. * key:
  460. * - item 1 # BLOCK-SEQUENCE-START is NOT produced here.
  461. * - item 2
  462. *
  463. * Tokens:
  464. *
  465. * STREAM-START(utf-8)
  466. * BLOCK-MAPPING-START
  467. * KEY
  468. * SCALAR("key",plain)
  469. * VALUE
  470. * BLOCK-ENTRY
  471. * SCALAR("item 1",plain)
  472. * BLOCK-ENTRY
  473. * SCALAR("item 2",plain)
  474. * BLOCK-END
  475. */
  476. #include "yaml_private.h"
  477. /*
  478. * Ensure that the buffer contains the required number of characters.
  479. * Return 1 on success, 0 on failure (reader error or memory error).
  480. */
  481. #define CACHE(parser,length) \
  482. (parser->unread >= (length) \
  483. ? 1 \
  484. : yaml_parser_update_buffer(parser, (length)))
  485. /*
  486. * Advance the buffer pointer.
  487. */
  488. #define SKIP(parser) \
  489. (parser->mark.index ++, \
  490. parser->mark.column ++, \
  491. parser->unread --, \
  492. parser->buffer.pointer ++)
  493. #define SKIPN(parser,n) \
  494. (parser->mark.index ++, \
  495. parser->mark.column ++, \
  496. parser->unread --, \
  497. parser->buffer.pointer += (n))
  498. #define SKIP_LINE(parser) \
  499. (IS_CRLF(parser->buffer) ? \
  500. (parser->mark.index += 2, \
  501. parser->mark.column = 0, \
  502. parser->mark.line ++, \
  503. parser->unread -= 2, \
  504. parser->buffer.pointer += 2) : \
  505. IS_BREAK(parser->buffer) ? \
  506. (parser->mark.index ++, \
  507. parser->mark.column = 0, \
  508. parser->mark.line ++, \
  509. parser->unread --, \
  510. parser->buffer.pointer += WIDTH(parser->buffer)) : 0)
  511. /*
  512. * Copy a character to a string buffer and advance pointers.
  513. */
  514. #define READ(parser,string) \
  515. (STRING_EXTEND(parser,string) ? \
  516. (COPY(string,parser->buffer), \
  517. parser->mark.index ++, \
  518. parser->mark.column ++, \
  519. parser->unread --, \
  520. 1) : 0)
  521. #define READN(parser,string,n) \
  522. (STRING_EXTEND(parser,string) ? \
  523. (COPYN(string,parser->buffer,n), \
  524. parser->mark.index ++, \
  525. parser->mark.column ++, \
  526. parser->unread --, \
  527. 1) : 0)
  528. /*
  529. * Copy a line break character to a string buffer and advance pointers.
  530. */
  531. #define READ_LINE(parser,string) \
  532. (STRING_EXTEND(parser,string) ? \
  533. (((CHECK_AT(parser->buffer,'\r',0) \
  534. && CHECK_AT(parser->buffer,'\n',1)) ? /* CR LF -> LF */ \
  535. (*((string).pointer++) = (yaml_char_t) '\n', \
  536. parser->buffer.pointer += 2, \
  537. parser->mark.index += 2, \
  538. parser->mark.column = 0, \
  539. parser->mark.line ++, \
  540. parser->unread -= 2) : \
  541. (CHECK_AT(parser->buffer,'\r',0) \
  542. || CHECK_AT(parser->buffer,'\n',0)) ? /* CR|LF -> LF */ \
  543. (*((string).pointer++) = (yaml_char_t) '\n', \
  544. parser->buffer.pointer ++, \
  545. parser->mark.index ++, \
  546. parser->mark.column = 0, \
  547. parser->mark.line ++, \
  548. parser->unread --) : \
  549. (CHECK_AT(parser->buffer,'\xC2',0) \
  550. && CHECK_AT(parser->buffer,'\x85',1)) ? /* NEL -> LF */ \
  551. (*((string).pointer++) = (yaml_char_t) '\n', \
  552. parser->buffer.pointer += 2, \
  553. parser->mark.index ++, \
  554. parser->mark.column = 0, \
  555. parser->mark.line ++, \
  556. parser->unread --) : \
  557. (CHECK_AT(parser->buffer,'\xE2',0) && \
  558. CHECK_AT(parser->buffer,'\x80',1) && \
  559. (CHECK_AT(parser->buffer,'\xA8',2) || \
  560. CHECK_AT(parser->buffer,'\xA9',2))) ? /* LS|PS -> LS|PS */ \
  561. (*((string).pointer++) = *(parser->buffer.pointer++), \
  562. *((string).pointer++) = *(parser->buffer.pointer++), \
  563. *((string).pointer++) = *(parser->buffer.pointer++), \
  564. parser->mark.index ++, \
  565. parser->mark.column = 0, \
  566. parser->mark.line ++, \
  567. parser->unread --) : 0), \
  568. 1) : 0)
  569. /*
  570. * Public API declarations.
  571. */
  572. YAML_DECLARE(int)
  573. yaml_parser_scan(yaml_parser_t *parser, yaml_token_t *token);
  574. /*
  575. * Error handling.
  576. */
  577. static int
  578. yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context,
  579. yaml_mark_t context_mark, const char *problem);
  580. /*
  581. * High-level token API.
  582. */
  583. YAML_DECLARE(int)
  584. yaml_parser_fetch_more_tokens(yaml_parser_t *parser);
  585. static int
  586. yaml_parser_fetch_next_token(yaml_parser_t *parser);
  587. /*
  588. * Potential simple keys.
  589. */
  590. static int
  591. yaml_parser_stale_simple_keys(yaml_parser_t *parser);
  592. static int
  593. yaml_parser_save_simple_key(yaml_parser_t *parser);
  594. static int
  595. yaml_parser_remove_simple_key(yaml_parser_t *parser);
  596. static int
  597. yaml_parser_increase_flow_level(yaml_parser_t *parser);
  598. static int
  599. yaml_parser_decrease_flow_level(yaml_parser_t *parser);
  600. /*
  601. * Indentation treatment.
  602. */
  603. static int
  604. yaml_parser_roll_indent(yaml_parser_t *parser, int column,
  605. int number, yaml_token_type_t type, yaml_mark_t mark);
  606. static int
  607. yaml_parser_unroll_indent(yaml_parser_t *parser, int column);
  608. /*
  609. * Token fetchers.
  610. */
  611. static int
  612. yaml_parser_fetch_stream_start(yaml_parser_t *parser);
  613. static int
  614. yaml_parser_fetch_stream_end(yaml_parser_t *parser);
  615. static int
  616. yaml_parser_fetch_directive(yaml_parser_t *parser);
  617. static int
  618. yaml_parser_fetch_document_indicator(yaml_parser_t *parser,
  619. yaml_token_type_t type);
  620. static int
  621. yaml_parser_fetch_flow_collection_start(yaml_parser_t *parser,
  622. yaml_token_type_t type);
  623. static int
  624. yaml_parser_fetch_flow_collection_end(yaml_parser_t *parser,
  625. yaml_token_type_t type);
  626. static int
  627. yaml_parser_fetch_flow_entry(yaml_parser_t *parser);
  628. static int
  629. yaml_parser_fetch_block_entry(yaml_parser_t *parser);
  630. static int
  631. yaml_parser_fetch_key(yaml_parser_t *parser);
  632. static int
  633. yaml_parser_fetch_value(yaml_parser_t *parser);
  634. static int
  635. yaml_parser_fetch_anchor(yaml_parser_t *parser, yaml_token_type_t type);
  636. static int
  637. yaml_parser_fetch_tag(yaml_parser_t *parser);
  638. static int
  639. yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal);
  640. static int
  641. yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single);
  642. static int
  643. yaml_parser_fetch_plain_scalar(yaml_parser_t *parser);
  644. /*
  645. * Token scanners.
  646. */
  647. static int
  648. yaml_parser_scan_to_next_token(yaml_parser_t *parser);
  649. static int
  650. yaml_parser_scan_directive(yaml_parser_t *parser, yaml_token_t *token);
  651. static int
  652. yaml_parser_scan_directive_name(yaml_parser_t *parser,
  653. yaml_mark_t start_mark, yaml_char_t **name);
  654. static int
  655. yaml_parser_scan_version_directive_value(yaml_parser_t *parser,
  656. yaml_mark_t start_mark, int *major, int *minor);
  657. static int
  658. yaml_parser_scan_version_directive_number(yaml_parser_t *parser,
  659. yaml_mark_t start_mark, int *number);
  660. static int
  661. yaml_parser_scan_tag_directive_value(yaml_parser_t *parser,
  662. yaml_mark_t mark, yaml_char_t **handle, yaml_char_t **prefix);
  663. static int
  664. yaml_parser_scan_anchor(yaml_parser_t *parser, yaml_token_t *token,
  665. yaml_token_type_t type);
  666. static int
  667. yaml_parser_scan_tag(yaml_parser_t *parser, yaml_token_t *token);
  668. static int
  669. yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive,
  670. yaml_mark_t start_mark, yaml_char_t **handle);
  671. static int
  672. yaml_parser_scan_tag_uri(yaml_parser_t *parser, int directive,
  673. yaml_char_t *head, yaml_mark_t start_mark, yaml_char_t **uri);
  674. static int
  675. yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive,
  676. yaml_mark_t start_mark, yaml_string_t *string);
  677. static int
  678. yaml_parser_scan_block_scalar(yaml_parser_t *parser, yaml_token_t *token,
  679. int literal);
  680. static int
  681. yaml_parser_scan_block_scalar_breaks(yaml_parser_t *parser,
  682. int *indent, yaml_string_t *breaks,
  683. yaml_mark_t start_mark, yaml_mark_t *end_mark);
  684. static int
  685. yaml_parser_scan_flow_scalar(yaml_parser_t *parser, yaml_token_t *token,
  686. int single);
  687. static int
  688. yaml_parser_scan_plain_scalar(yaml_parser_t *parser, yaml_token_t *token);
  689. /*
  690. * Get the next token.
  691. */
  692. YAML_DECLARE(int)
  693. yaml_parser_scan(yaml_parser_t *parser, yaml_token_t *token)
  694. {
  695. assert(parser); /* Non-NULL parser object is expected. */
  696. assert(token); /* Non-NULL token object is expected. */
  697. /* Erase the token object. */
  698. memset(token, 0, sizeof(yaml_token_t));
  699. /* No tokens after STREAM-END or error. */
  700. if (parser->stream_end_produced || parser->error) {
  701. return 1;
  702. }
  703. /* Ensure that the tokens queue contains enough tokens. */
  704. if (!parser->token_available) {
  705. if (!yaml_parser_fetch_more_tokens(parser))
  706. return 0;
  707. }
  708. /* Fetch the next token from the queue. */
  709. *token = DEQUEUE(parser, parser->tokens);
  710. parser->token_available = 0;
  711. parser->tokens_parsed ++;
  712. if (token->type == YAML_STREAM_END_TOKEN) {
  713. parser->stream_end_produced = 1;
  714. }
  715. return 1;
  716. }
  717. /*
  718. * Set the scanner error and return 0.
  719. */
  720. static int
  721. yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context,
  722. yaml_mark_t context_mark, const char *problem)
  723. {
  724. parser->error = YAML_SCANNER_ERROR;
  725. parser->context = context;
  726. parser->context_mark = context_mark;
  727. parser->problem = problem;
  728. parser->problem_mark = parser->mark;
  729. return 0;
  730. }
  731. /*
  732. * Ensure that the tokens queue contains at least one token which can be
  733. * returned to the Parser.
  734. */
  735. YAML_DECLARE(int)
  736. yaml_parser_fetch_more_tokens(yaml_parser_t *parser)
  737. {
  738. int need_more_tokens;
  739. /* While we need more tokens to fetch, do it. */
  740. while (1)
  741. {
  742. /*
  743. * Check if we really need to fetch more tokens.
  744. */
  745. need_more_tokens = 0;
  746. if (parser->tokens.head == parser->tokens.tail)
  747. {
  748. /* Queue is empty. */
  749. need_more_tokens = 1;
  750. }
  751. else
  752. {
  753. yaml_simple_key_t *simple_key;
  754. /* Check if any potential simple key may occupy the head position. */
  755. if (!yaml_parser_stale_simple_keys(parser))
  756. return 0;
  757. for (simple_key = parser->simple_keys.start;
  758. simple_key != parser->simple_keys.top; simple_key++) {
  759. if (simple_key->possible
  760. && simple_key->token_number == parser->tokens_parsed) {
  761. need_more_tokens = 1;
  762. break;
  763. }
  764. }
  765. }
  766. /* We are finished. */
  767. if (!need_more_tokens)
  768. break;
  769. /* Fetch the next token. */
  770. if (!yaml_parser_fetch_next_token(parser))
  771. return 0;
  772. }
  773. parser->token_available = 1;
  774. return 1;
  775. }
  776. /*
  777. * The dispatcher for token fetchers.
  778. */
  779. static int
  780. yaml_parser_fetch_next_token(yaml_parser_t *parser)
  781. {
  782. /* Ensure that the buffer is initialized. */
  783. if (!CACHE(parser, 1))
  784. return 0;
  785. /* Check if we just started scanning. Fetch STREAM-START then. */
  786. if (!parser->stream_start_produced)
  787. return yaml_parser_fetch_stream_start(parser);
  788. /* Eat whitespaces and comments until we reach the next token. */
  789. if (!yaml_parser_scan_to_next_token(parser))
  790. return 0;
  791. /* Remove obsolete potential simple keys. */
  792. if (!yaml_parser_stale_simple_keys(parser))
  793. return 0;
  794. /* Check the indentation level against the current column. */
  795. if (!yaml_parser_unroll_indent(parser, parser->mark.column))
  796. return 0;
  797. /*
  798. * Ensure that the buffer contains at least 4 characters. 4 is the length
  799. * of the longest indicators ('--- ' and '... ').
  800. */
  801. if (!CACHE(parser, 4))
  802. return 0;
  803. /* Is it the end of the stream? */
  804. if (IS_Z(parser->buffer))
  805. return yaml_parser_fetch_stream_end(parser);
  806. /* Is it a directive? */
  807. if (parser->mark.column == 0 && CHECK(parser->buffer, '%'))
  808. return yaml_parser_fetch_directive(parser);
  809. /* Is it the document start indicator? */
  810. if (parser->mark.column == 0
  811. && CHECK_AT(parser->buffer, '-', 0)
  812. && CHECK_AT(parser->buffer, '-', 1)
  813. && CHECK_AT(parser->buffer, '-', 2)
  814. && IS_BLANKZ_AT(parser->buffer, 3))
  815. return yaml_parser_fetch_document_indicator(parser,
  816. YAML_DOCUMENT_START_TOKEN);
  817. /* Is it the document end indicator? */
  818. if (parser->mark.column == 0
  819. && CHECK_AT(parser->buffer, '.', 0)
  820. && CHECK_AT(parser->buffer, '.', 1)
  821. && CHECK_AT(parser->buffer, '.', 2)
  822. && IS_BLANKZ_AT(parser->buffer, 3))
  823. return yaml_parser_fetch_document_indicator(parser,
  824. YAML_DOCUMENT_END_TOKEN);
  825. /* Is it the flow sequence start indicator? */
  826. if (CHECK(parser->buffer, '['))
  827. return yaml_parser_fetch_flow_collection_start(parser,
  828. YAML_FLOW_SEQUENCE_START_TOKEN);
  829. /* Is it the flow mapping start indicator? */
  830. if (CHECK(parser->buffer, '{'))
  831. return yaml_parser_fetch_flow_collection_start(parser,
  832. YAML_FLOW_MAPPING_START_TOKEN);
  833. /* Is it the flow sequence end indicator? */
  834. if (CHECK(parser->buffer, ']'))
  835. return yaml_parser_fetch_flow_collection_end(parser,
  836. YAML_FLOW_SEQUENCE_END_TOKEN);
  837. /* Is it the flow mapping end indicator? */
  838. if (CHECK(parser->buffer, '}'))
  839. return yaml_parser_fetch_flow_collection_end(parser,
  840. YAML_FLOW_MAPPING_END_TOKEN);
  841. /* Is it the flow entry indicator? */
  842. if (CHECK(parser->buffer, ','))
  843. return yaml_parser_fetch_flow_entry(parser);
  844. /* Is it the block entry indicator? */
  845. if (CHECK(parser->buffer, '-') && IS_BLANKZ_AT(parser->buffer, 1))
  846. return yaml_parser_fetch_block_entry(parser);
  847. /* Is it the key indicator? */
  848. if (CHECK(parser->buffer, '?')
  849. && (parser->flow_level || IS_BLANKZ_AT(parser->buffer, 1)))
  850. return yaml_parser_fetch_key(parser);
  851. /* Is it the value indicator? */
  852. if (CHECK(parser->buffer, ':')
  853. && (parser->flow_level || IS_BLANKZ_AT(parser->buffer, 1)))
  854. return yaml_parser_fetch_value(parser);
  855. /* Is it an alias? */
  856. if (CHECK(parser->buffer, '*'))
  857. return yaml_parser_fetch_anchor(parser, YAML_ALIAS_TOKEN);
  858. /* Is it an anchor? */
  859. if (CHECK(parser->buffer, '&'))
  860. return yaml_parser_fetch_anchor(parser, YAML_ANCHOR_TOKEN);
  861. /* Is it a tag? */
  862. if (CHECK(parser->buffer, '!'))
  863. return yaml_parser_fetch_tag(parser);
  864. /* Is it a literal scalar? */
  865. if (CHECK(parser->buffer, '|') && !parser->flow_level)
  866. return yaml_parser_fetch_block_scalar(parser, 1);
  867. /* Is it a folded scalar? */
  868. if (CHECK(parser->buffer, '>') && !parser->flow_level)
  869. return yaml_parser_fetch_block_scalar(parser, 0);
  870. /* Is it a single-quoted scalar? */
  871. if (CHECK(parser->buffer, '\''))
  872. return yaml_parser_fetch_flow_scalar(parser, 1);
  873. /* Is it a double-quoted scalar? */
  874. if (CHECK(parser->buffer, '"'))
  875. return yaml_parser_fetch_flow_scalar(parser, 0);
  876. /*
  877. * Is it a plain scalar?
  878. *
  879. * A plain scalar may start with any non-blank characters except
  880. *
  881. * '-', '?', ':', ',', '[', ']', '{', '}',
  882. * '#', '&', '*', '!', '|', '>', '\'', '\"',
  883. * '%', '@', '`'.
  884. *
  885. * In the block context (and, for the '-' indicator, in the flow context
  886. * too), it may also start with the characters
  887. *
  888. * '-', '?', ':'
  889. *
  890. * if it is followed by a non-space character.
  891. *
  892. * The last rule is more restrictive than the specification requires.
  893. */
  894. if (!(IS_BLANKZ(parser->buffer) || CHECK(parser->buffer, '-')
  895. || CHECK(parser->buffer, '?') || CHECK(parser->buffer, ':')
  896. || CHECK(parser->buffer, ',') || CHECK(parser->buffer, '[')
  897. || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '{')
  898. || CHECK(parser->buffer, '}') || CHECK(parser->buffer, '#')
  899. || CHECK(parser->buffer, '&') || CHECK(parser->buffer, '*')
  900. || CHECK(parser->buffer, '!') || CHECK(parser->buffer, '|')
  901. || CHECK(parser->buffer, '>') || CHECK(parser->buffer, '\'')
  902. || CHECK(parser->buffer, '"') || CHECK(parser->buffer, '%')
  903. || CHECK(parser->buffer, '@') || CHECK(parser->buffer, '`')) ||
  904. (CHECK(parser->buffer, '-') && !IS_BLANK_AT(parser->buffer, 1)) ||
  905. (!parser->flow_level &&
  906. (CHECK(parser->buffer, '?') || CHECK(parser->buffer, ':'))
  907. && !IS_BLANKZ_AT(parser->buffer, 1)))
  908. return yaml_parser_fetch_plain_scalar(parser);
  909. /*
  910. * If we don't determine the token type so far, it is an error.
  911. */
  912. return yaml_parser_set_scanner_error(parser,
  913. "while scanning for the next token", parser->mark,
  914. "found character that cannot start any token");
  915. }
  916. /*
  917. * Check the list of potential simple keys and remove the positions that
  918. * cannot contain simple keys anymore.
  919. */
  920. static int
  921. yaml_parser_stale_simple_keys(yaml_parser_t *parser)
  922. {
  923. yaml_simple_key_t *simple_key;
  924. /* Check for a potential simple key for each flow level. */
  925. for (simple_key = parser->simple_keys.start;
  926. simple_key != parser->simple_keys.top; simple_key ++)
  927. {
  928. /*
  929. * The specification requires that a simple key
  930. *
  931. * - is limited to a single line,
  932. * - is shorter than 1024 characters.
  933. */
  934. if (simple_key->possible
  935. && (simple_key->mark.line < parser->mark.line
  936. || simple_key->mark.index+1024 < parser->mark.index)) {
  937. /* Check if the potential simple key to be removed is required. */
  938. if (simple_key->required) {
  939. return yaml_parser_set_scanner_error(parser,
  940. "while scanning a simple key", simple_key->mark,
  941. "could not find expected ':'");
  942. }
  943. simple_key->possible = 0;
  944. }
  945. }
  946. return 1;
  947. }
  948. /*
  949. * Check if a simple key may start at the current position and add it if
  950. * needed.
  951. */
  952. static int
  953. yaml_parser_save_simple_key(yaml_parser_t *parser)
  954. {
  955. /*
  956. * A simple key is required at the current position if the scanner is in
  957. * the block context and the current column coincides with the indentation
  958. * level.
  959. */
  960. int required = (!parser->flow_level
  961. && parser->indent == (int)parser->mark.column);
  962. /*
  963. * A simple key is required only when it is the first token in the current
  964. * line. Therefore it is always allowed. But we add a check anyway.
  965. */
  966. assert(parser->simple_key_allowed || !required); /* Impossible. */
  967. /*
  968. * If the current position may start a simple key, save it.
  969. */
  970. if (parser->simple_key_allowed)
  971. {
  972. yaml_simple_key_t simple_key;
  973. simple_key.possible = 1;
  974. simple_key.required = required;
  975. simple_key.token_number =
  976. parser->tokens_parsed + parser->tokens.tail - parser->tokens.head;
  977. simple_key.mark = parser->mark;
  978. if (!yaml_parser_remove_simple_key(parser)) return 0;
  979. *(parser->simple_keys.top-1) = simple_key;
  980. }
  981. return 1;
  982. }
  983. /*
  984. * Remove a potential simple key at the current flow level.
  985. */
  986. static int
  987. yaml_parser_remove_simple_key(yaml_parser_t *parser)
  988. {
  989. yaml_simple_key_t *simple_key = parser->simple_keys.top-1;
  990. if (simple_key->possible)
  991. {
  992. /* If the key is required, it is an error. */
  993. if (simple_key->required) {
  994. return yaml_parser_set_scanner_error(parser,
  995. "while scanning a simple key", simple_key->mark,
  996. "could not find expected ':'");
  997. }
  998. }
  999. /* Remove the key from the stack. */
  1000. simple_key->possible = 0;
  1001. return 1;
  1002. }
  1003. /*
  1004. * Increase the flow level and resize the simple key list if needed.
  1005. */
  1006. static int
  1007. yaml_parser_increase_flow_level(yaml_parser_t *parser)
  1008. {
  1009. yaml_simple_key_t empty_simple_key = { 0, 0, 0, { 0, 0, 0 } };
  1010. /* Reset the simple key on the next level. */
  1011. if (!PUSH(parser, parser->simple_keys, empty_simple_key))
  1012. return 0;
  1013. /* Increase the flow level. */
  1014. parser->flow_level++;
  1015. return 1;
  1016. }
  1017. /*
  1018. * Decrease the flow level.
  1019. */
  1020. static int
  1021. yaml_parser_decrease_flow_level(yaml_parser_t *parser)
  1022. {
  1023. yaml_simple_key_t dummy_key; /* Used to eliminate a compiler warning. */
  1024. if (parser->flow_level) {
  1025. parser->flow_level --;
  1026. dummy_key = POP(parser, parser->simple_keys);
  1027. }
  1028. return 1;
  1029. }
  1030. /*
  1031. * Push the current indentation level to the stack and set the new level
  1032. * the current column is greater than the indentation level. In this case,
  1033. * append or insert the specified token into the token queue.
  1034. *
  1035. */
  1036. static int
  1037. yaml_parser_roll_indent(yaml_parser_t *parser, int column,
  1038. int number, yaml_token_type_t type, yaml_mark_t mark)
  1039. {
  1040. yaml_token_t token;
  1041. /* In the flow context, do nothing. */
  1042. if (parser->flow_level)
  1043. return 1;
  1044. if (parser->indent < column)
  1045. {
  1046. /*
  1047. * Push the current indentation level to the stack and set the new
  1048. * indentation level.
  1049. */
  1050. if (!PUSH(parser, parser->indents, parser->indent))
  1051. return 0;
  1052. parser->indent = column;
  1053. /* Create a token and insert it into the queue. */
  1054. TOKEN_INIT(token, type, mark, mark);
  1055. if (number == -1) {
  1056. if (!ENQUEUE(parser, parser->tokens, token))
  1057. return 0;
  1058. }
  1059. else {
  1060. if (!QUEUE_INSERT(parser,
  1061. parser->tokens, number - parser->tokens_parsed, token))
  1062. return 0;
  1063. }
  1064. }
  1065. return 1;
  1066. }
  1067. /*
  1068. * Pop indentation levels from the indents stack until the current level
  1069. * becomes less or equal to the column. For each intendation level, append
  1070. * the BLOCK-END token.
  1071. */
  1072. static int
  1073. yaml_parser_unroll_indent(yaml_parser_t *parser, int column)
  1074. {
  1075. yaml_token_t token;
  1076. /* In the flow context, do nothing. */
  1077. if (parser->flow_level)
  1078. return 1;
  1079. /* Loop through the intendation levels in the stack. */
  1080. while (parser->indent > column)
  1081. {
  1082. /* Create a token and append it to the queue. */
  1083. TOKEN_INIT(token, YAML_BLOCK_END_TOKEN, parser->mark, parser->mark);
  1084. if (!ENQUEUE(parser, parser->tokens, token))
  1085. return 0;
  1086. /* Pop the indentation level. */
  1087. parser->indent = POP(parser, parser->indents);
  1088. }
  1089. return 1;
  1090. }
  1091. /*
  1092. * Initialize the scanner and produce the STREAM-START token.
  1093. */
  1094. static int
  1095. yaml_parser_fetch_stream_start(yaml_parser_t *parser)
  1096. {
  1097. yaml_simple_key_t simple_key = { 0, 0, 0, { 0, 0, 0 } };
  1098. yaml_token_t token;
  1099. /* Set the initial indentation. */
  1100. parser->indent = -1;
  1101. /* Initialize the simple key stack. */
  1102. if (!PUSH(parser, parser->simple_keys, simple_key))
  1103. return 0;
  1104. /* A simple key is allowed at the beginning of the stream. */
  1105. parser->simple_key_allowed = 1;
  1106. /* We have started. */
  1107. parser->stream_start_produced = 1;
  1108. /* Create the STREAM-START token and append it to the queue. */
  1109. STREAM_START_TOKEN_INIT(token, parser->encoding,
  1110. parser->mark, parser->mark);
  1111. if (!ENQUEUE(parser, parser->tokens, token))
  1112. return 0;
  1113. return 1;
  1114. }
  1115. /*
  1116. * Produce the STREAM-END token and shut down the scanner.
  1117. */
  1118. static int
  1119. yaml_parser_fetch_stream_end(yaml_parser_t *parser)
  1120. {
  1121. yaml_token_t token;
  1122. /* Force new line. */
  1123. if (parser->mark.column != 0) {
  1124. parser->mark.column = 0;
  1125. parser->mark.line ++;
  1126. }
  1127. /* Reset the indentation level. */
  1128. if (!yaml_parser_unroll_indent(parser, -1))
  1129. return 0;
  1130. /* Reset simple keys. */
  1131. if (!yaml_parser_remove_simple_key(parser))
  1132. return 0;
  1133. parser->simple_key_allowed = 0;
  1134. /* Create the STREAM-END token and append it to the queue. */
  1135. STREAM_END_TOKEN_INIT(token, parser->mark, parser->mark);
  1136. if (!ENQUEUE(parser, parser->tokens, token))
  1137. return 0;
  1138. return 1;
  1139. }
  1140. /*
  1141. * Produce a VERSION-DIRECTIVE or TAG-DIRECTIVE token.
  1142. */
  1143. static int
  1144. yaml_parser_fetch_directive(yaml_parser_t *parser)
  1145. {
  1146. yaml_token_t token;
  1147. /* Reset the indentation level. */
  1148. if (!yaml_parser_unroll_indent(parser, -1))
  1149. return 0;
  1150. /* Reset simple keys. */
  1151. if (!yaml_parser_remove_simple_key(parser))
  1152. return 0;
  1153. parser->simple_key_allowed = 0;
  1154. /* Create the YAML-DIRECTIVE or TAG-DIRECTIVE token. */
  1155. if (!yaml_parser_scan_directive(parser, &token))
  1156. return 0;
  1157. /* Append the token to the queue. */
  1158. if (!ENQUEUE(parser, parser->tokens, token)) {
  1159. yaml_token_delete(&token);
  1160. return 0;
  1161. }
  1162. return 1;
  1163. }
  1164. /*
  1165. * Produce the DOCUMENT-START or DOCUMENT-END token.
  1166. */
  1167. static int
  1168. yaml_parser_fetch_document_indicator(yaml_parser_t *parser,
  1169. yaml_token_type_t type)
  1170. {
  1171. yaml_mark_t start_mark, end_mark;
  1172. yaml_token_t token;
  1173. /* Reset the indentation level. */
  1174. if (!yaml_parser_unroll_indent(parser, -1))
  1175. return 0;
  1176. /* Reset simple keys. */
  1177. if (!yaml_parser_remove_simple_key(parser))
  1178. return 0;
  1179. parser->simple_key_allowed = 0;
  1180. /* Consume the token. */
  1181. start_mark = parser->mark;
  1182. SKIP(parser);
  1183. SKIP(parser);
  1184. SKIP(parser);
  1185. end_mark = parser->mark;
  1186. /* Create the DOCUMENT-START or DOCUMENT-END token. */
  1187. TOKEN_INIT(token, type, start_mark, end_mark);
  1188. /* Append the token to the queue. */
  1189. if (!ENQUEUE(parser, parser->tokens, token))
  1190. return 0;
  1191. return 1;
  1192. }
  1193. /*
  1194. * Produce the FLOW-SEQUENCE-START or FLOW-MAPPING-START token.
  1195. */
  1196. static int
  1197. yaml_parser_fetch_flow_collection_start(yaml_parser_t *parser,
  1198. yaml_token_type_t type)
  1199. {
  1200. yaml_mark_t start_mark, end_mark;
  1201. yaml_token_t token;
  1202. /* The indicators '[' and '{' may start a simple key. */
  1203. if (!yaml_parser_save_simple_key(parser))
  1204. return 0;
  1205. /* Increase the flow level. */
  1206. if (!yaml_parser_increase_flow_level(parser))
  1207. return 0;
  1208. /* A simple key may follow the indicators '[' and '{'. */
  1209. parser->simple_key_allowed = 1;
  1210. /* Consume the token. */
  1211. start_mark = parser->mark;
  1212. SKIP(parser);
  1213. end_mark = parser->mark;
  1214. /* Create the FLOW-SEQUENCE-START of FLOW-MAPPING-START token. */
  1215. TOKEN_INIT(token, type, start_mark, end_mark);
  1216. /* Append the token to the queue. */
  1217. if (!ENQUEUE(parser, parser->tokens, token))
  1218. return 0;
  1219. return 1;
  1220. }
  1221. /*
  1222. * Produce the FLOW-SEQUENCE-END or FLOW-MAPPING-END token.
  1223. */
  1224. static int
  1225. yaml_parser_fetch_flow_collection_end(yaml_parser_t *parser,
  1226. yaml_token_type_t type)
  1227. {
  1228. yaml_mark_t start_mark, end_mark;
  1229. yaml_token_t token;
  1230. /* Reset any potential simple key on the current flow level. */
  1231. if (!yaml_parser_remove_simple_key(parser))
  1232. return 0;
  1233. /* Decrease the flow level. */
  1234. if (!yaml_parser_decrease_flow_level(parser))
  1235. return 0;
  1236. /* No simple keys after the indicators ']' and '}'. */
  1237. parser->simple_key_allowed = 0;
  1238. /* Consume the token. */
  1239. start_mark = parser->mark;
  1240. SKIP(parser);
  1241. end_mark = parser->mark;
  1242. /* Create the FLOW-SEQUENCE-END of FLOW-MAPPING-END token. */
  1243. TOKEN_INIT(token, type, start_mark, end_mark);
  1244. /* Append the token to the queue. */
  1245. if (!ENQUEUE(parser, parser->tokens, token))
  1246. return 0;
  1247. return 1;
  1248. }
  1249. /*
  1250. * Produce the FLOW-ENTRY token.
  1251. */
  1252. static int
  1253. yaml_parser_fetch_flow_entry(yaml_parser_t *parser)
  1254. {
  1255. yaml_mark_t start_mark, end_mark;
  1256. yaml_token_t token;
  1257. /* Reset any potential simple keys on the current flow level. */
  1258. if (!yaml_parser_remove_simple_key(parser))
  1259. return 0;
  1260. /* Simple keys are allowed after ','. */
  1261. parser->simple_key_allowed = 1;
  1262. /* Consume the token. */
  1263. start_mark = parser->mark;
  1264. SKIP(parser);
  1265. end_mark = parser->mark;
  1266. /* Create the FLOW-ENTRY token and append it to the queue. */
  1267. TOKEN_INIT(token, YAML_FLOW_ENTRY_TOKEN, start_mark, end_mark);
  1268. if (!ENQUEUE(parser, parser->tokens, token))
  1269. return 0;
  1270. return 1;
  1271. }
  1272. /*
  1273. * Produce the BLOCK-ENTRY token.
  1274. */
  1275. static int
  1276. yaml_parser_fetch_block_entry(yaml_parser_t *parser)
  1277. {
  1278. yaml_mark_t start_mark, end_mark;
  1279. yaml_token_t token;
  1280. /* Check if the scanner is in the block context. */
  1281. if (!parser->flow_level)
  1282. {
  1283. /* Check if we are allowed to start a new entry. */
  1284. if (!parser->simple_key_allowed) {
  1285. return yaml_parser_set_scanner_error(parser, NULL, parser->mark,
  1286. "block sequence entries are not allowed in this context");
  1287. }
  1288. /* Add the BLOCK-SEQUENCE-START token if needed. */
  1289. if (!yaml_parser_roll_indent(parser, parser->mark.column, -1,
  1290. YAML_BLOCK_SEQUENCE_START_TOKEN, parser->mark))
  1291. return 0;
  1292. }
  1293. else
  1294. {
  1295. /*
  1296. * It is an error for the '-' indicator to occur in the flow context,
  1297. * but we let the Parser detect and report about it because the Parser
  1298. * is able to point to the context.
  1299. */
  1300. }
  1301. /* Reset any potential simple keys on the current flow level. */
  1302. if (!yaml_parser_remove_simple_key(parser))
  1303. return 0;
  1304. /* Simple keys are allowed after '-'. */
  1305. parser->simple_key_allowed = 1;
  1306. /* Consume the token. */
  1307. start_mark = parser->mark;
  1308. SKIP(parser);
  1309. end_mark = parser->mark;
  1310. /* Create the BLOCK-ENTRY token and append it to the queue. */
  1311. TOKEN_INIT(token, YAML_BLOCK_ENTRY_TOKEN, start_mark, end_mark);
  1312. if (!ENQUEUE(parser, parser->tokens, token))
  1313. return 0;
  1314. return 1;
  1315. }
  1316. /*
  1317. * Produce the KEY token.
  1318. */
  1319. static int
  1320. yaml_parser_fetch_key(yaml_parser_t *parser)
  1321. {
  1322. yaml_mark_t start_mark, end_mark;
  1323. yaml_token_t token;
  1324. /* In the block context, additional checks are required. */
  1325. if (!parser->flow_level)
  1326. {
  1327. /* Check if we are allowed to start a new key (not nessesary simple). */
  1328. if (!parser->simple_key_allowed) {
  1329. return yaml_parser_set_scanner_error(parser, NULL, parser->mark,
  1330. "mapping keys are not allowed in this context");
  1331. }
  1332. /* Add the BLOCK-MAPPING-START token if needed. */
  1333. if (!yaml_parser_roll_indent(parser, parser->mark.column, -1,
  1334. YAML_BLOCK_MAPPING_START_TOKEN, parser->mark))
  1335. return 0;
  1336. }
  1337. /* Reset any potential simple keys on the current flow level. */
  1338. if (!yaml_parser_remove_simple_key(parser))
  1339. return 0;
  1340. /* Simple keys are allowed after '?' in the block context. */
  1341. parser->simple_key_allowed = (!parser->flow_level);
  1342. /* Consume the token. */
  1343. start_mark = parser->mark;
  1344. SKIP(parser);
  1345. end_mark = parser->mark;
  1346. /* Create the KEY token and append it to the queue. */
  1347. TOKEN_INIT(token, YAML_KEY_TOKEN, start_mark, end_mark);
  1348. if (!ENQUEUE(parser, parser->tokens, token))
  1349. return 0;
  1350. return 1;
  1351. }
  1352. /*
  1353. * Produce the VALUE token.
  1354. */
  1355. static int
  1356. yaml_parser_fetch_value(yaml_parser_t *parser)
  1357. {
  1358. yaml_mark_t start_mark, end_mark;
  1359. yaml_token_t token;
  1360. yaml_simple_key_t *simple_key = parser->simple_keys.top-1;
  1361. /* Have we found a simple key? */
  1362. if (simple_key->possible)
  1363. {
  1364. /* Create the KEY token and insert it into the queue. */
  1365. TOKEN_INIT(token, YAML_KEY_TOKEN, simple_key->mark, simple_key->mark);
  1366. if (!QUEUE_INSERT(parser, parser->tokens,
  1367. simple_key->token_number - parser->tokens_parsed, token))
  1368. return 0;
  1369. /* In the block context, we may need to add the BLOCK-MAPPING-START token. */
  1370. if (!yaml_parser_roll_indent(parser, simple_key->mark.column,
  1371. simple_key->token_number,
  1372. YAML_BLOCK_MAPPING_START_TOKEN, simple_key->mark))
  1373. return 0;
  1374. /* Remove the simple key. */
  1375. simple_key->possible = 0;
  1376. /* A simple key cannot follow another simple key. */
  1377. parser->simple_key_allowed = 0;
  1378. }
  1379. else
  1380. {
  1381. /* The ':' indicator follows a complex key. */
  1382. /* In the block context, extra checks are required. */
  1383. if (!parser->flow_level)
  1384. {
  1385. /* Check if we are allowed to start a complex value. */
  1386. if (!parser->simple_key_allowed) {
  1387. return yaml_parser_set_scanner_error(parser, NULL, parser->mark,
  1388. "mapping values are not allowed in this context");
  1389. }
  1390. /* Add the BLOCK-MAPPING-START token if needed. */
  1391. if (!yaml_parser_roll_indent(parser, parser->mark.column, -1,
  1392. YAML_BLOCK_MAPPING_START_TOKEN, parser->mark))
  1393. return 0;
  1394. }
  1395. /* Simple keys after ':' are allowed in the block context. */
  1396. parser->simple_key_allowed = (!parser->flow_level);
  1397. }
  1398. /* Consume the token. */
  1399. start_mark = parser->mark;
  1400. SKIP(parser);
  1401. end_mark = parser->mark;
  1402. /* Create the VALUE token and append it to the queue. */
  1403. TOKEN_INIT(token, YAML_VALUE_TOKEN, start_mark, end_mark);
  1404. if (!ENQUEUE(parser, parser->tokens, token))
  1405. return 0;
  1406. return 1;
  1407. }
  1408. /*
  1409. * Produce the ALIAS or ANCHOR token.
  1410. */
  1411. static int
  1412. yaml_parser_fetch_anchor(yaml_parser_t *parser, yaml_token_type_t type)
  1413. {
  1414. yaml_token_t token;
  1415. /* An anchor or an alias could be a simple key. */
  1416. if (!yaml_parser_save_simple_key(parser))
  1417. return 0;
  1418. /* A simple key cannot follow an anchor or an alias. */
  1419. parser->simple_key_allowed = 0;
  1420. /* Create the ALIAS or ANCHOR token and append it to the queue. */
  1421. if (!yaml_parser_scan_anchor(parser, &token, type))
  1422. return 0;
  1423. if (!ENQUEUE(parser, parser->tokens, token)) {
  1424. yaml_token_delete(&token);
  1425. return 0;
  1426. }
  1427. return 1;
  1428. }
  1429. /*
  1430. * Produce the TAG token.
  1431. */
  1432. static int
  1433. yaml_parser_fetch_tag(yaml_parser_t *parser)
  1434. {
  1435. yaml_token_t token;
  1436. /* A tag could be a simple key. */
  1437. if (!yaml_parser_save_simple_key(parser))
  1438. return 0;
  1439. /* A simple key cannot follow a tag. */
  1440. parser->simple_key_allowed = 0;
  1441. /* Create the TAG token and append it to the queue. */
  1442. if (!yaml_parser_scan_tag(parser, &token))
  1443. return 0;
  1444. if (!ENQUEUE(parser, parser->tokens, token)) {
  1445. yaml_token_delete(&token);
  1446. return 0;
  1447. }
  1448. return 1;
  1449. }
  1450. /*
  1451. * Produce the SCALAR(...,literal) or SCALAR(...,folded) tokens.
  1452. */
  1453. static int
  1454. yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal)
  1455. {
  1456. yaml_token_t token;
  1457. /* Remove any potential simple keys. */
  1458. if (!yaml_parser_remove_simple_key(parser))
  1459. return 0;
  1460. /* A simple key may follow a block scalar. */
  1461. parser->simple_key_allowed = 1;
  1462. /* Create the SCALAR token and append it to the queue. */
  1463. if (!yaml_parser_scan_block_scalar(parser, &token, literal))
  1464. return 0;
  1465. if (!ENQUEUE(parser, parser->tokens, token)) {
  1466. yaml_token_delete(&token);
  1467. return 0;
  1468. }
  1469. return 1;
  1470. }
  1471. /*
  1472. * Produce the SCALAR(...,single-quoted) or SCALAR(...,double-quoted) tokens.
  1473. */
  1474. static int
  1475. yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single)
  1476. {
  1477. yaml_token_t token;
  1478. /* A plain scalar could be a simple key. */
  1479. if (!yaml_parser_save_simple_key(parser))
  1480. return 0;
  1481. /* A simple key cannot follow a flow scalar. */
  1482. parser->simple_key_allowed = 0;
  1483. /* Create the SCALAR token and append it to the queue. */
  1484. if (!yaml_parser_scan_flow_scalar(parser, &token, single))
  1485. return 0;
  1486. if (!ENQUEUE(parser, parser->tokens, token)) {
  1487. yaml_token_delete(&token);
  1488. return 0;
  1489. }
  1490. return 1;
  1491. }
  1492. /*
  1493. * Produce the SCALAR(...,plain) token.
  1494. */
  1495. static int
  1496. yaml_parser_fetch_plain_scalar(yaml_parser_t *parser)
  1497. {
  1498. yaml_token_t token;
  1499. /* A plain scalar could be a simple key. */
  1500. if (!yaml_parser_save_simple_key(parser))
  1501. return 0;
  1502. /* A simple key cannot follow a flow scalar. */
  1503. parser->simple_key_allowed = 0;
  1504. /* Create the SCALAR token and append it to the queue. */
  1505. if (!yaml_parser_scan_plain_scalar(parser, &token))
  1506. return 0;
  1507. if (!ENQUEUE(parser, parser->tokens, token)) {
  1508. yaml_token_delete(&token);
  1509. return 0;
  1510. }
  1511. return 1;
  1512. }
  1513. /*
  1514. * Eat whitespaces and comments until the next token is found.
  1515. */
  1516. static int
  1517. yaml_parser_scan_to_next_token(yaml_parser_t *parser)
  1518. {
  1519. /* Until the next token is not found. */
  1520. while (1)
  1521. {
  1522. /* Allow the BOM mark to start a line. */
  1523. if (!CACHE(parser, 1)) return 0;
  1524. if (parser->mark.column == 0 && IS_BOM(parser->buffer))
  1525. SKIPN(parser,3); /* UTF-8 BOM is 3 bytes */
  1526. /*
  1527. * Eat whitespaces.
  1528. *
  1529. * Tabs are allowed:
  1530. *
  1531. * - in the flow context;
  1532. * - in the block context, but not at the beginning of the line or
  1533. * after '-', '?', or ':' (complex value).
  1534. */
  1535. if (!CACHE(parser, 1)) return 0;
  1536. while (CHECK(parser->buffer,' ') ||
  1537. ((parser->flow_level || !parser->simple_key_allowed) &&
  1538. CHECK(parser->buffer, '\t'))) {
  1539. SKIP(parser);
  1540. if (!CACHE(parser, 1)) return 0;
  1541. }
  1542. /* Eat a comment until a line break. */
  1543. if (CHECK(parser->buffer, '#')) {
  1544. while (!IS_BREAKZ(parser->buffer)) {
  1545. SKIP(parser);
  1546. if (!CACHE(parser, 1)) return 0;
  1547. }
  1548. }
  1549. /* If it is a line break, eat it. */
  1550. if (IS_BREAK(parser->buffer))
  1551. {
  1552. if (!CACHE(parser, 2)) return 0;
  1553. SKIP_LINE(parser);
  1554. /* In the block context, a new line may start a simple key. */
  1555. if (!parser->flow_level) {
  1556. parser->simple_key_allowed = 1;
  1557. }
  1558. }
  1559. else
  1560. {
  1561. /* We have found a token. */
  1562. break;
  1563. }
  1564. }
  1565. return 1;
  1566. }
  1567. /*
  1568. * Scan a YAML-DIRECTIVE or TAG-DIRECTIVE token.
  1569. *
  1570. * Scope:
  1571. * %YAML 1.1 # a comment \n
  1572. * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1573. * %TAG !yaml! tag:yaml.org,2002: \n
  1574. * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1575. */
  1576. int
  1577. yaml_parser_scan_directive(yaml_parser_t *parser, yaml_token_t *token)
  1578. {
  1579. yaml_mark_t start_mark, end_mark;
  1580. yaml_char_t *name = NULL;
  1581. int major, minor;
  1582. yaml_char_t *handle = NULL, *prefix = NULL;
  1583. /* Eat '%'. */
  1584. start_mark = parser->mark;
  1585. SKIP(parser);
  1586. /* Scan the directive name. */
  1587. if (!yaml_parser_scan_directive_name(parser, start_mark, &name))
  1588. goto error;
  1589. /* Is it a YAML directive? */
  1590. if (strcmp((char *)name, "YAML") == 0)
  1591. {
  1592. /* Scan the VERSION directive value. */
  1593. if (!yaml_parser_scan_version_directive_value(parser, start_mark,
  1594. &major, &minor))
  1595. goto error;
  1596. end_mark = parser->mark;
  1597. /* Create a VERSION-DIRECTIVE token. */
  1598. VERSION_DIRECTIVE_TOKEN_INIT(*token, major, minor,
  1599. start_mark, end_mark);
  1600. }
  1601. /* Is it a TAG directive? */
  1602. else if (strcmp((char *)name, "TAG") == 0)
  1603. {
  1604. /* Scan the TAG directive value. */
  1605. if (!yaml_parser_scan_tag_directive_value(parser, start_mark,
  1606. &handle, &prefix))
  1607. goto error;
  1608. end_mark = parser->mark;
  1609. /* Create a TAG-DIRECTIVE token. */
  1610. TAG_DIRECTIVE_TOKEN_INIT(*token, handle, prefix,
  1611. start_mark, end_mark);
  1612. }
  1613. /* Unknown directive. */
  1614. else
  1615. {
  1616. yaml_parser_set_scanner_error(parser, "while scanning a directive",
  1617. start_mark, "found uknown directive name");
  1618. goto error;
  1619. }
  1620. /* Eat the rest of the line including any comments. */
  1621. if (!CACHE(parser, 1)) goto error;
  1622. while (IS_BLANK(parser->buffer)) {
  1623. SKIP(parser);
  1624. if (!CACHE(parser, 1)) goto error;
  1625. }
  1626. if (CHECK(parser->buffer, '#')) {
  1627. while (!IS_BREAKZ(parser->buffer)) {
  1628. SKIP(parser);
  1629. if (!CACHE(parser, 1)) goto error;
  1630. }
  1631. }
  1632. /* Check if we are at the end of the line. */
  1633. if (!IS_BREAKZ(parser->buffer)) {
  1634. yaml_parser_set_scanner_error(parser, "while scanning a directive",
  1635. start_mark, "did not find expected comment or line break");
  1636. goto error;
  1637. }
  1638. /* Eat a line break. */
  1639. if (IS_BREAK(parser->buffer)) {
  1640. if (!CACHE(parser, 2)) goto error;
  1641. SKIP_LINE(parser);
  1642. }
  1643. yaml_free(name);
  1644. return 1;
  1645. error:
  1646. yaml_free(prefix);
  1647. yaml_free(handle);
  1648. yaml_free(name);
  1649. return 0;
  1650. }
  1651. /*
  1652. * Scan the directive name.
  1653. *
  1654. * Scope:
  1655. * %YAML 1.1 # a comment \n
  1656. * ^^^^
  1657. * %TAG !yaml! tag:yaml.org,2002: \n
  1658. * ^^^
  1659. */
  1660. static int
  1661. yaml_parser_scan_directive_name(yaml_parser_t *parser,
  1662. yaml_mark_t start_mark, yaml_char_t **name)
  1663. {
  1664. yaml_string_t string = NULL_STRING;
  1665. if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
  1666. /* Consume the directive name. */
  1667. if (!CACHE(parser, 1)) goto error;
  1668. while (IS_ALPHA(parser->buffer))
  1669. {
  1670. if (!READ(parser, string)) goto error;
  1671. if (!CACHE(parser, 1)) goto error;
  1672. }
  1673. /* Check if the name is empty. */
  1674. if (string.start == string.pointer) {
  1675. yaml_parser_set_scanner_error(parser, "while scanning a directive",
  1676. start_mark, "could not find expected directive name");
  1677. goto error;
  1678. }
  1679. /* Check for an blank character after the name. */
  1680. if (!IS_BLANKZ(parser->buffer)) {
  1681. yaml_parser_set_scanner_error(parser, "while scanning a directive",
  1682. start_mark, "found unexpected non-alphabetical character");
  1683. goto error;
  1684. }
  1685. *name = string.start;
  1686. return 1;
  1687. error:
  1688. STRING_DEL(parser, string);
  1689. return 0;
  1690. }
  1691. /*
  1692. * Scan the value of VERSION-DIRECTIVE.
  1693. *
  1694. * Scope:
  1695. * %YAML 1.1 # a comment \n
  1696. * ^^^^^^
  1697. */
  1698. static int
  1699. yaml_parser_scan_version_directive_value(yaml_parser_t *parser,
  1700. yaml_mark_t start_mark, int *major, int *minor)
  1701. {
  1702. /* Eat whitespaces. */
  1703. if (!CACHE(parser, 1)) return 0;
  1704. while (IS_BLANK(parser->buffer)) {
  1705. SKIP(parser);
  1706. if (!CACHE(parser, 1)) return 0;
  1707. }
  1708. /* Consume the major version number. */
  1709. if (!yaml_parser_scan_version_directive_number(parser, start_mark, major))
  1710. return 0;
  1711. /* Eat '.'. */
  1712. if (!CHECK(parser->buffer, '.')) {
  1713. return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
  1714. start_mark, "did not find expected digit or '.' character");
  1715. }
  1716. SKIP(parser);
  1717. /* Consume the minor version number. */
  1718. if (!yaml_parser_scan_version_directive_number(parser, start_mark, minor))
  1719. return 0;
  1720. return 1;
  1721. }
  1722. #define MAX_NUMBER_LENGTH 9
  1723. /*
  1724. * Scan the version number of VERSION-DIRECTIVE.
  1725. *
  1726. * Scope:
  1727. * %YAML 1.1 # a comment \n
  1728. * ^
  1729. * %YAML 1.1 # a comment \n
  1730. * ^
  1731. */
  1732. static int
  1733. yaml_parser_scan_version_directive_number(yaml_parser_t *parser,
  1734. yaml_mark_t start_mark, int *number)
  1735. {
  1736. int value = 0;
  1737. size_t length = 0;
  1738. /* Repeat while the next character is digit. */
  1739. if (!CACHE(parser, 1)) return 0;
  1740. while (IS_DIGIT(parser->buffer))
  1741. {
  1742. /* Check if the number is too long. */
  1743. if (++length > MAX_NUMBER_LENGTH) {
  1744. return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
  1745. start_mark, "found extremely long version number");
  1746. }
  1747. value = value*10 + AS_DIGIT(parser->buffer);
  1748. SKIP(parser);
  1749. if (!CACHE(parser, 1)) return 0;
  1750. }
  1751. /* Check if the number was present. */
  1752. if (!length) {
  1753. return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
  1754. start_mark, "did not find expected version number");
  1755. }
  1756. *number = value;
  1757. return 1;
  1758. }
  1759. /*
  1760. * Scan the value of a TAG-DIRECTIVE token.
  1761. *
  1762. * Scope:
  1763. * %TAG !yaml! tag:yaml.org,2002: \n
  1764. * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1765. */
  1766. static int
  1767. yaml_parser_scan_tag_directive_value(yaml_parser_t *parser,
  1768. yaml_mark_t start_mark, yaml_char_t **handle, yaml_char_t **prefix)
  1769. {
  1770. yaml_char_t *handle_value = NULL;
  1771. yaml_char_t *prefix_value = NULL;
  1772. /* Eat whitespaces. */
  1773. if (!CACHE(parser, 1)) goto error;
  1774. while (IS_BLANK(parser->buffer)) {
  1775. SKIP(parser);
  1776. if (!CACHE(parser, 1)) goto error;
  1777. }
  1778. /* Scan a handle. */
  1779. if (!yaml_parser_scan_tag_handle(parser, 1, start_mark, &handle_value))
  1780. goto error;
  1781. /* Expect a whitespace. */
  1782. if (!CACHE(parser, 1)) goto error;
  1783. if (!IS_BLANK(parser->buffer)) {
  1784. yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive",
  1785. start_mark, "did not find expected whitespace");
  1786. goto error;
  1787. }
  1788. /* Eat whitespaces. */
  1789. while (IS_BLANK(parser->buffer)) {
  1790. SKIP(parser);
  1791. if (!CACHE(parser, 1)) goto error;
  1792. }
  1793. /* Scan a prefix. */
  1794. if (!yaml_parser_scan_tag_uri(parser, 1, NULL, start_mark, &prefix_value))
  1795. goto error;
  1796. /* Expect a whitespace or line break. */
  1797. if (!CACHE(parser, 1)) goto error;
  1798. if (!IS_BLANKZ(parser->buffer)) {
  1799. yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive",
  1800. start_mark, "did not find expected whitespace or line break");
  1801. goto error;
  1802. }
  1803. *handle = handle_value;
  1804. *prefix = prefix_value;
  1805. return 1;
  1806. error:
  1807. yaml_free(handle_value);
  1808. yaml_free(prefix_value);
  1809. return 0;
  1810. }
  1811. static int
  1812. yaml_parser_scan_anchor(yaml_parser_t *parser, yaml_token_t *token,
  1813. yaml_token_type_t type)
  1814. {
  1815. int length = 0;
  1816. yaml_mark_t start_mark, end_mark;
  1817. yaml_string_t string = NULL_STRING;
  1818. if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
  1819. /* Eat the indicator character. */
  1820. start_mark = parser->mark;
  1821. SKIP(parser);
  1822. /* Consume the value. */
  1823. if (!CACHE(parser, 1)) goto error;
  1824. while (IS_ALPHA(parser->buffer)) {
  1825. if (!READ(parser, string)) goto error;
  1826. if (!CACHE(parser, 1)) goto error;
  1827. length ++;
  1828. }
  1829. end_mark = parser->mark;
  1830. /*
  1831. * Check if length of the anchor is greater than 0 and it is followed by
  1832. * a whitespace character or one of the indicators:
  1833. *
  1834. * '?', ':', ',', ']', '}', '%', '@', '`'.
  1835. */
  1836. if (!length || !(IS_BLANKZ(parser->buffer) || CHECK(parser->buffer, '?')
  1837. || CHECK(parser->buffer, ':') || CHECK(parser->buffer, ',')
  1838. || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '}')
  1839. || CHECK(parser->buffer, '%') || CHECK(parser->buffer, '@')
  1840. || CHECK(parser->buffer, '`'))) {
  1841. yaml_parser_set_scanner_error(parser, type == YAML_ANCHOR_TOKEN ?
  1842. "while scanning an anchor" : "while scanning an alias", start_mark,
  1843. "did not find expected alphabetic or numeric character");
  1844. goto error;
  1845. }
  1846. /* Create a token. */
  1847. if (type == YAML_ANCHOR_TOKEN) {
  1848. ANCHOR_TOKEN_INIT(*token, string.start, start_mark, end_mark);
  1849. }
  1850. else {
  1851. ALIAS_TOKEN_INIT(*token, string.start, start_mark, end_mark);
  1852. }
  1853. return 1;
  1854. error:
  1855. STRING_DEL(parser, string);
  1856. return 0;
  1857. }
  1858. /*
  1859. * Scan a TAG token.
  1860. */
  1861. static int
  1862. yaml_parser_scan_tag(yaml_parser_t *parser, yaml_token_t *token)
  1863. {
  1864. yaml_char_t *handle = NULL;
  1865. yaml_char_t *suffix = NULL;
  1866. yaml_mark_t start_mark, end_mark;
  1867. start_mark = parser->mark;
  1868. /* Check if the tag is in the canonical form. */
  1869. if (!CACHE(parser, 2)) goto error;
  1870. if (CHECK_AT(parser->buffer, '<', 1))
  1871. {
  1872. /* Set the handle to '' */
  1873. handle = yaml_malloc(1);
  1874. if (!handle) goto error;
  1875. handle[0] = '\0';
  1876. /* Eat '!<' */
  1877. SKIP(parser);
  1878. SKIP(parser);
  1879. /* Consume the tag value. */
  1880. if (!yaml_parser_scan_tag_uri(parser, 0, NULL, start_mark, &suffix))
  1881. goto error;
  1882. /* Check for '>' and eat it. */
  1883. if (!CHECK(parser->buffer, '>')) {
  1884. yaml_parser_set_scanner_error(parser, "while scanning a tag",
  1885. start_mark, "did not find the expected '>'");
  1886. goto error;
  1887. }
  1888. SKIP(parser);
  1889. }
  1890. else
  1891. {
  1892. /* The tag has either the '!suffix' or the '!handle!suffix' form. */
  1893. /* First, try to scan a handle. */
  1894. if (!yaml_parser_scan_tag_handle(parser, 0, start_mark, &handle))
  1895. goto error;
  1896. /* Check if it is, indeed, handle. */
  1897. if (handle[0] == '!' && handle[1] != '\0' && handle[strlen((char *)handle)-1] == '!')
  1898. {
  1899. /* Scan the suffix now. */
  1900. if (!yaml_parser_scan_tag_uri(parser, 0, NULL, start_mark, &suffix))
  1901. goto error;