/oxine-0.7.1/src/xmlparser.c

# · C · 680 lines · 550 code · 74 blank · 56 comment · 61 complexity · 47e68404bdf8b0409f9277880371ae3a MD5 · raw file

  1. /*
  2. * Copyright (C) 2002-2003,2007 the xine project
  3. *
  4. * This file is part of xine, a free video player.
  5. * This file is part of oxine a free media player.
  6. *
  7. * The xine-lib XML parser is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Library General Public License as
  9. * published by the Free Software Foundation; either version 2 of the
  10. * License, or (at your option) any later version.
  11. *
  12. * The xine-lib XML parser is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Library General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Library General Public
  18. * License along with the Gnome Library; see the file COPYING.LIB. If not,
  19. * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20. * Boston, MA 02111-1307, USA.
  21. *
  22. * $Id: xmlparser.c 2609 2007-07-25 10:35:51Z mschwerin $
  23. *
  24. */
  25. #include <unistd.h>
  26. #include <stdio.h>
  27. #include <stdlib.h>
  28. #include <string.h>
  29. #include <stdarg.h>
  30. #include <ctype.h>
  31. #define lprintf(...)
  32. #include "config.h"
  33. #include "heap.h"
  34. #include "logger.h"
  35. #include "xmllexer.h"
  36. #include "xmlparser.h"
  37. #define TOKEN_SIZE 4 * 1024
  38. #define MAX_RECURSION 10
  39. /* private global variables */
  40. static int xml_parser_mode;
  41. /* private functions */
  42. static char *
  43. strtoupper (char *str)
  44. {
  45. int i = 0;
  46. while (str[i] != '\0') {
  47. str[i] = (char) toupper ((int) str[i]);
  48. i++;
  49. }
  50. return str;
  51. }
  52. static xml_node_t *
  53. new_xml_node (void)
  54. {
  55. xml_node_t *new_node;
  56. new_node = (xml_node_t *) ho_malloc (sizeof (xml_node_t));
  57. new_node->name = NULL;
  58. new_node->data = NULL;
  59. new_node->props = NULL;
  60. new_node->child = NULL;
  61. new_node->next = NULL;
  62. return new_node;
  63. }
  64. static void
  65. free_xml_node (xml_node_t * node)
  66. {
  67. ho_free (node->name);
  68. ho_free (node->data);
  69. ho_free (node);
  70. }
  71. static xml_property_t *
  72. new_xml_property (void)
  73. {
  74. xml_property_t *new_property;
  75. new_property = (xml_property_t *) ho_malloc (sizeof (xml_property_t));
  76. new_property->name = NULL;
  77. new_property->value = NULL;
  78. new_property->next = NULL;
  79. return new_property;
  80. }
  81. static void
  82. free_xml_property (xml_property_t * property)
  83. {
  84. ho_free (property->name);
  85. ho_free (property->value);
  86. ho_free (property);
  87. }
  88. void
  89. xml_parser_init (const char *buf, int size, int mode)
  90. {
  91. lexer_init (buf, size);
  92. xml_parser_mode = mode;
  93. }
  94. static void
  95. xml_parser_free_props (xml_property_t * current_property)
  96. {
  97. if (current_property) {
  98. if (!current_property->next) {
  99. free_xml_property (current_property);
  100. }
  101. else {
  102. xml_parser_free_props (current_property->next);
  103. free_xml_property (current_property);
  104. }
  105. }
  106. }
  107. static void
  108. xml_parser_free_tree_rec (xml_node_t * current_node, int free_next)
  109. {
  110. lprintf ("xml_parser_free_tree_rec: %s\n", current_node->name);
  111. if (current_node) {
  112. /* properties */
  113. if (current_node->props) {
  114. xml_parser_free_props (current_node->props);
  115. }
  116. /* child nodes */
  117. if (current_node->child) {
  118. lprintf ("xml_parser_free_tree_rec: child\n");
  119. xml_parser_free_tree_rec (current_node->child, 1);
  120. }
  121. /* next nodes */
  122. if (free_next) {
  123. xml_node_t *next_node = current_node->next;
  124. xml_node_t *next_next_node;
  125. while (next_node) {
  126. next_next_node = next_node->next;
  127. lprintf ("xml_parser_free_tree_rec: next\n");
  128. xml_parser_free_tree_rec (next_node, 0);
  129. next_node = next_next_node;
  130. }
  131. }
  132. free_xml_node (current_node);
  133. }
  134. }
  135. void
  136. xml_parser_free_tree (xml_node_t * current_node)
  137. {
  138. lprintf ("xml_parser_free_tree\n");
  139. xml_parser_free_tree_rec (current_node, 1);
  140. }
  141. #define STATE_IDLE 0
  142. #define STATE_NODE 1
  143. #define STATE_COMMENT 7
  144. static int xml_parser_get_node (xml_node_t * current_node,
  145. char *root_name, int rec);
  146. static int
  147. _xml_parser_get_node (char **tok_buffer, int *tok_buffer_size,
  148. xml_node_t * current_node, char *root_name, int rec)
  149. {
  150. char *tok = *tok_buffer;
  151. char property_name[TOKEN_SIZE];
  152. char node_name[TOKEN_SIZE];
  153. int state = STATE_IDLE;
  154. int lexer_res = 0;
  155. int parse_res = 0;
  156. int bypass_get_token = 0;
  157. xml_node_t *subtree = NULL;
  158. xml_node_t *current_subtree = NULL;
  159. xml_property_t *current_property = NULL;
  160. xml_property_t *properties = NULL;
  161. if (rec >= MAX_RECURSION) {
  162. error ("The maximum recursion depth "
  163. "%d has been reached.", MAX_RECURSION);
  164. return -1;
  165. }
  166. while (1) {
  167. if (!bypass_get_token) {
  168. lexer_res = lexer_get_token (tok_buffer, tok_buffer_size);
  169. if (lexer_res == T_ERROR) {
  170. error ("Lexer error.");
  171. return -1;
  172. }
  173. tok = *tok_buffer;
  174. }
  175. bypass_get_token = 0;
  176. lprintf ("info: %d - %d : '%s'\n", state, lexer_res, tok);
  177. switch (state) {
  178. case STATE_IDLE:
  179. switch (lexer_res) {
  180. case (T_EOL):
  181. case (T_SEPAR):
  182. /* do nothing */
  183. break;
  184. case (T_EOF):
  185. /* normal end */
  186. return 0;
  187. break;
  188. case (T_M_START_1):
  189. state = STATE_NODE;
  190. break;
  191. case (T_M_START_2):
  192. state = 3;
  193. break;
  194. case (T_C_START):
  195. state = STATE_COMMENT;
  196. break;
  197. case (T_TI_START):
  198. state = 8;
  199. break;
  200. case (T_DOCTYPE_START):
  201. state = 9;
  202. break;
  203. case (T_DATA):
  204. /* current data */
  205. ho_free (current_node->data);
  206. current_node->data = lexer_decode_entities (tok);
  207. lprintf ("info: node data : %s\n", current_node->data);
  208. break;
  209. default:
  210. error ("Found unexpected token '%s' (state %d).", tok, state);
  211. return -1;
  212. break;
  213. }
  214. break;
  215. case STATE_NODE:
  216. switch (lexer_res) {
  217. case (T_IDENT):
  218. properties = NULL;
  219. current_property = NULL;
  220. /* save node name */
  221. if (xml_parser_mode == XML_PARSER_CASE_INSENSITIVE) {
  222. strtoupper (tok);
  223. }
  224. strcpy (node_name, tok);
  225. state = 2;
  226. lprintf ("info: current node name \"%s\"\n", node_name);
  227. break;
  228. default:
  229. error ("Found unexpected token '%s' (state %d).", tok, state);
  230. return -1;
  231. break;
  232. }
  233. break;
  234. case 2:
  235. switch (lexer_res) {
  236. case (T_EOL):
  237. case (T_SEPAR):
  238. /* nothing */
  239. break;
  240. case (T_M_STOP_1):
  241. /* new subtree */
  242. subtree = new_xml_node ();
  243. /* set node name */
  244. subtree->name = ho_strdup (node_name);
  245. /* set node propertys */
  246. subtree->props = properties;
  247. lprintf ("info: rec %d new subtree %s\n", rec, node_name);
  248. parse_res = xml_parser_get_node (subtree, node_name, rec + 1);
  249. if (parse_res != 0) {
  250. return parse_res;
  251. }
  252. if (current_subtree == NULL) {
  253. current_node->child = subtree;
  254. current_subtree = subtree;
  255. }
  256. else {
  257. current_subtree->next = subtree;
  258. current_subtree = subtree;
  259. }
  260. state = STATE_IDLE;
  261. break;
  262. case (T_M_STOP_2):
  263. /* new leaf */
  264. /* new subtree */
  265. subtree = new_xml_node ();
  266. /* set node name */
  267. subtree->name = ho_strdup (node_name);
  268. /* set node propertys */
  269. subtree->props = properties;
  270. lprintf ("info: rec %d new subtree %s\n", rec, node_name);
  271. if (current_subtree == NULL) {
  272. current_node->child = subtree;
  273. current_subtree = subtree;
  274. }
  275. else {
  276. current_subtree->next = subtree;
  277. current_subtree = subtree;
  278. }
  279. state = STATE_IDLE;
  280. break;
  281. case (T_IDENT):
  282. /* save property name */
  283. if (xml_parser_mode == XML_PARSER_CASE_INSENSITIVE) {
  284. strtoupper (tok);
  285. }
  286. strcpy (property_name, tok);
  287. state = 5;
  288. lprintf ("info: current property name \"%s\"\n",
  289. property_name);
  290. break;
  291. default:
  292. error ("Found unexpected token '%s' (state %d).", tok, state);
  293. return -1;
  294. break;
  295. }
  296. break;
  297. case 3:
  298. switch (lexer_res) {
  299. case (T_IDENT):
  300. /* must be equal to root_name */
  301. if (xml_parser_mode == XML_PARSER_CASE_INSENSITIVE) {
  302. strtoupper (tok);
  303. }
  304. if (strcmp (tok, root_name) == 0) {
  305. state = 4;
  306. }
  307. else {
  308. error ("Found unexpected token '%s'. "
  309. "Expected token '%s' (state %d).",
  310. tok, root_name, state);
  311. return -1;
  312. }
  313. break;
  314. default:
  315. error ("Found unexpected token '%s' (state %d).", tok, state);
  316. return -1;
  317. break;
  318. }
  319. break;
  320. case 4:
  321. /* > expected */
  322. switch (lexer_res) {
  323. case (T_M_STOP_1):
  324. return 0;
  325. break;
  326. default:
  327. error ("Found unexpected token '%s' (state %d).", tok, state);
  328. return -1;
  329. break;
  330. }
  331. break;
  332. case 5:
  333. /* = or > or ident or separator expected */
  334. switch (lexer_res) {
  335. case (T_EOL):
  336. case (T_SEPAR):
  337. /* do nothing */
  338. break;
  339. case (T_EQUAL):
  340. state = 6;
  341. break;
  342. case (T_IDENT):
  343. /* jump to state 2 without fetching a new token */
  344. bypass_get_token = 1;
  345. state = 2;
  346. break;
  347. case (T_M_STOP_1):
  348. /* add a new property without value */
  349. if (current_property == NULL) {
  350. properties = new_xml_property ();
  351. current_property = properties;
  352. }
  353. else {
  354. current_property->next = new_xml_property ();
  355. current_property = current_property->next;
  356. }
  357. current_property->name = ho_strdup (property_name);
  358. lprintf ("info: new property %s\n", current_property->name);
  359. /* jump to state 2 without fetching a new token */
  360. bypass_get_token = 1;
  361. state = 2;
  362. break;
  363. default:
  364. error ("Found unexpected token '%s' (state %d).", tok, state);
  365. return -1;
  366. break;
  367. }
  368. break;
  369. case 6:
  370. /* string, ident or separator expected */
  371. switch (lexer_res) {
  372. case (T_EOL):
  373. case (T_SEPAR):
  374. /* do nothing */
  375. break;
  376. case (T_STRING):
  377. case (T_IDENT):
  378. /* add a new property */
  379. if (current_property == NULL) {
  380. properties = new_xml_property ();
  381. current_property = properties;
  382. }
  383. else {
  384. current_property->next = new_xml_property ();
  385. current_property = current_property->next;
  386. }
  387. current_property->name = ho_strdup (property_name);
  388. current_property->value = lexer_decode_entities (tok);
  389. lprintf ("info: new property %s=%s\n",
  390. current_property->name, current_property->value);
  391. state = 2;
  392. break;
  393. default:
  394. error ("Found unexpected token '%s' (state %d).", tok, state);
  395. return -1;
  396. break;
  397. }
  398. break;
  399. case STATE_COMMENT:
  400. /* --> expected */
  401. switch (lexer_res) {
  402. case (T_C_STOP):
  403. state = STATE_IDLE;
  404. break;
  405. default:
  406. state = STATE_COMMENT;
  407. break;
  408. }
  409. break;
  410. case 8:
  411. /* ?> expected */
  412. switch (lexer_res) {
  413. case (T_TI_STOP):
  414. state = 0;
  415. break;
  416. default:
  417. state = 8;
  418. break;
  419. }
  420. break;
  421. case 9:
  422. /* > expected */
  423. switch (lexer_res) {
  424. case (T_M_STOP_1):
  425. state = 0;
  426. break;
  427. default:
  428. state = 9;
  429. break;
  430. }
  431. break;
  432. default:
  433. error ("We are in an unknown parser state (state=%d).", state);
  434. return -1;
  435. }
  436. }
  437. }
  438. static int
  439. xml_parser_get_node (xml_node_t * current_node, char *root_name, int rec)
  440. {
  441. int res = 0;
  442. int tok_buffer_size = TOKEN_SIZE;
  443. char *tok_buffer = ho_malloc (tok_buffer_size);
  444. res = _xml_parser_get_node (&tok_buffer, &tok_buffer_size,
  445. current_node, root_name, rec);
  446. ho_free (tok_buffer);
  447. return res;
  448. }
  449. int
  450. xml_parser_build_tree (xml_node_t ** root_node)
  451. {
  452. xml_node_t *tmp_node;
  453. int res;
  454. tmp_node = new_xml_node ();
  455. res = xml_parser_get_node (tmp_node, "", 0);
  456. if ((tmp_node->child) && (!tmp_node->child->next)) {
  457. *root_node = tmp_node->child;
  458. free_xml_node (tmp_node);
  459. res = 0;
  460. }
  461. else {
  462. *root_node = NULL;
  463. xml_parser_free_tree (tmp_node);
  464. res = -1;
  465. }
  466. return res;
  467. }
  468. const char *
  469. xml_parser_get_property (const xml_node_t * node, const char *name)
  470. {
  471. xml_property_t *prop;
  472. prop = node->props;
  473. while (prop) {
  474. lprintf ("looking for %s in %s\n", name, prop->name);
  475. if (!strcasecmp (prop->name, name)) {
  476. lprintf ("found it. value=%s\n", prop->value);
  477. return prop->value;
  478. }
  479. prop = prop->next;
  480. }
  481. return NULL;
  482. }
  483. int
  484. xml_parser_get_property_int (const xml_node_t * node, const char *name,
  485. int def_value)
  486. {
  487. const char *v;
  488. int ret;
  489. v = xml_parser_get_property (node, name);
  490. if (!v) {
  491. return def_value;
  492. }
  493. if (sscanf (v, "%d", &ret) != 1) {
  494. return def_value;
  495. }
  496. else {
  497. return ret;
  498. }
  499. }
  500. int
  501. xml_parser_get_property_bool (const xml_node_t * node, const char *name,
  502. int def_value)
  503. {
  504. const char *v;
  505. v = xml_parser_get_property (node, name);
  506. if (!v) {
  507. return def_value;
  508. }
  509. return !strcasecmp (v, "true");
  510. }
  511. static int
  512. xml_escape_string_internal (char *buf, const char *s,
  513. xml_escape_quote_t quote_type)
  514. {
  515. int c;
  516. int length = 0;
  517. int sl = buf ? 8 : 0;
  518. /* calculate max required buffer size */
  519. while ((c = *s++ & 0xFF))
  520. switch (c) {
  521. case '"':
  522. if (quote_type != XML_ESCAPE_DOUBLE_QUOTE)
  523. goto literal;
  524. length += snprintf (buf + length, sl, "&quot;");
  525. break;
  526. case '\'':
  527. if (quote_type != XML_ESCAPE_SINGLE_QUOTE)
  528. goto literal;
  529. length += snprintf (buf + length, sl, "&apos;");
  530. break;
  531. case '&':
  532. length += snprintf (buf + length, sl, "&amp;");
  533. break;
  534. case '<':
  535. length += snprintf (buf + length, sl, "&lt;");
  536. break;
  537. case '>':
  538. length += snprintf (buf + length, sl, "&gt;");
  539. break;
  540. case 127:
  541. length += snprintf (buf + length, sl, "&#127;");
  542. break;
  543. case '\t':
  544. case '\n':
  545. literal:if (buf)
  546. buf[length] = c;
  547. ++length;
  548. break;
  549. default:
  550. if (c >= ' ')
  551. goto literal;
  552. length += snprintf (buf + length, sl, "&#%d;", c);
  553. break;
  554. }
  555. if (buf)
  556. buf[length] = 0;
  557. return length + 1;
  558. }
  559. char *
  560. xml_escape_string (const char *s, xml_escape_quote_t quote_type)
  561. {
  562. char *buf = ho_malloc (xml_escape_string_internal (NULL, s, quote_type));
  563. return buf ? (xml_escape_string_internal (buf, s, quote_type),
  564. buf) : NULL;
  565. }
  566. static void
  567. xml_parser_dump_node (const xml_node_t * node, int indent)
  568. {
  569. xml_property_t *p;
  570. xml_node_t *n;
  571. int l;
  572. printf ("%*s<%s ", indent, "", node->name);
  573. l = strlen (node->name);
  574. p = node->props;
  575. while (p) {
  576. char *value = xml_escape_string (p->value, XML_ESCAPE_SINGLE_QUOTE);
  577. printf ("%s='%s'", p->name, value);
  578. ho_free (value);
  579. p = p->next;
  580. if (p) {
  581. printf ("\n%*s", indent + 2 + l, "");
  582. }
  583. }
  584. printf (">\n");
  585. n = node->child;
  586. while (n) {
  587. xml_parser_dump_node (n, indent + 2);
  588. n = n->next;
  589. }
  590. printf ("%*s</%s>\n", indent, "", node->name);
  591. }
  592. void
  593. xml_parser_dump_tree (const xml_node_t * node)
  594. {
  595. xml_parser_dump_node (node, 0);
  596. }