PageRenderTime 68ms CodeModel.GetById 39ms RepoModel.GetById 0ms app.codeStats 0ms

/libxml++/parsers/saxparser.cc

https://bitbucket.org/khurley/libxml
C++ | 536 lines | 420 code | 98 blank | 18 comment | 17 complexity | 4d1692912eeedee14478eb0b76fcbf43 MD5 | raw file
  1. /* xml++.cc
  2. * libxml++ and this file are copyright (C) 2000 by Ari Johnson, and
  3. * are covered by the GNU Lesser General Public License, which should be
  4. * included with libxml++ as the file COPYING.
  5. *
  6. * 2002/01/05 Valentin Rusu - fixed some potential buffer overruns
  7. * 2002/01/21 Valentin Rusu - added CDATA handlers
  8. */
  9. #ifdef USE_PRECOMPILED
  10. #include "libxml++/libxml++.h"
  11. #else
  12. #include "libxml++/parsers/saxparser.h"
  13. #include "libxml++/nodes/element.h"
  14. #include "libxml++/keepblanks.h"
  15. #include <libxml/parser.h>
  16. #include <libxml/parserInternals.h> // for xmlCreateFileParserCtxt
  17. #include <cstdarg> //For va_list.
  18. #include <cassert> // for assert()
  19. #include <iostream>
  20. #endif
  21. namespace xmlpp {
  22. struct SaxParserCallback
  23. {
  24. static xmlEntityPtr get_entity(void* context, const xmlChar* name);
  25. static void entity_decl(void* context, const xmlChar* name, int type, const xmlChar* publicId, const xmlChar* systemId, xmlChar* content);
  26. static void start_document(void* context);
  27. static void end_document(void* context);
  28. static void start_element(void* context, const xmlChar* name, const xmlChar** p);
  29. static void end_element(void* context, const xmlChar* name);
  30. static void characters(void* context, const xmlChar* ch, int len);
  31. static void comment(void* context, const xmlChar* value);
  32. static void warning(void* context, const char* fmt, ...);
  33. static void error(void* context, const char* fmt, ...);
  34. static void fatal_error(void* context, const char* fmt, ...);
  35. static void cdata_block(void* context, const xmlChar* value, int len);
  36. static void internal_subset(void* context, const xmlChar* name, const xmlChar*publicId, const xmlChar*systemId);
  37. };
  38. SaxParser::SaxParser(bool use_get_entity)
  39. : sax_handler_( new _xmlSAXHandler )
  40. {
  41. xmlSAXHandler temp = {
  42. SaxParserCallback::internal_subset,
  43. 0, // isStandalone
  44. 0, // hasInternalSubset
  45. 0, // hasExternalSubset
  46. 0, // resolveEntity
  47. use_get_entity ? SaxParserCallback::get_entity : 0, // getEntity
  48. SaxParserCallback::entity_decl, // entityDecl
  49. 0, // notationDecl
  50. 0, // attributeDecl
  51. 0, // elementDecl
  52. 0, // unparsedEntityDecl
  53. 0, // setDocumentLocator
  54. SaxParserCallback::start_document, // startDocument
  55. SaxParserCallback::end_document, // endDocument
  56. SaxParserCallback::start_element, // startElement
  57. SaxParserCallback::end_element, // endElement
  58. 0, // reference
  59. SaxParserCallback::characters, // characters
  60. 0, // ignorableWhitespace
  61. 0, // processingInstruction
  62. SaxParserCallback::comment, // comment
  63. SaxParserCallback::warning, // warning
  64. SaxParserCallback::error, // error
  65. SaxParserCallback::fatal_error, // fatalError
  66. 0, // getParameterEntity
  67. SaxParserCallback::cdata_block, // cdataBlock
  68. 0 // externalSubset
  69. };
  70. *sax_handler_ = temp;
  71. }
  72. SaxParser::~SaxParser()
  73. {
  74. release_underlying();
  75. }
  76. xmlEntityPtr SaxParser::on_get_entity(const uniStr::ustring& name)
  77. {
  78. return entity_resolver_doc_.get_entity(name);
  79. }
  80. void SaxParser::on_entity_declaration(const uniStr::ustring& name, XmlEntityType type, const uniStr::ustring& publicId, const uniStr::ustring& systemId, const uniStr::ustring& content)
  81. {
  82. entity_resolver_doc_.set_entity_declaration(name, type, publicId, systemId, content);
  83. }
  84. void SaxParser::on_start_document()
  85. {
  86. }
  87. void SaxParser::on_end_document()
  88. {
  89. }
  90. void SaxParser::on_start_element(const uniStr::ustring& name, const AttributeList& attributes)
  91. {
  92. }
  93. void SaxParser::on_end_element(const uniStr::ustring& name)
  94. {
  95. }
  96. void SaxParser::on_characters(const uniStr::ustring& text)
  97. {
  98. }
  99. void SaxParser::on_comment(const uniStr::ustring& text)
  100. {
  101. }
  102. void SaxParser::on_warning(const uniStr::ustring& text)
  103. {
  104. }
  105. void SaxParser::on_error(const uniStr::ustring& text)
  106. {
  107. }
  108. void SaxParser::on_fatal_error(const uniStr::ustring& text)
  109. {
  110. throw parse_error("Fatal error: " + text);
  111. }
  112. void SaxParser::on_cdata_block(const uniStr::ustring& text)
  113. {
  114. }
  115. void SaxParser::on_internal_subset(const uniStr::ustring& name,
  116. const uniStr::ustring& publicId,
  117. const uniStr::ustring& systemId)
  118. {
  119. entity_resolver_doc_.set_internal_subset(name, publicId, systemId);
  120. }
  121. // implementation of this function is inspired by the SAX documentation by James Henstridge.
  122. // (http://www.daa.com.au/~james/gnome/xml-sax/implementing.html)
  123. void SaxParser::parse()
  124. {
  125. if(!context_)
  126. throw internal_error("Parse context not created.");
  127. xmlSAXHandlerPtr old_sax = context_->sax;
  128. context_->sax = sax_handler_.get();
  129. initialize_context();
  130. xmlParseDocument(context_);
  131. context_->sax = old_sax;
  132. if( (! context_->wellFormed)
  133. && (! exception_) )
  134. exception_ = new parse_error("Document not well-formed");
  135. release_underlying();
  136. check_for_exception();
  137. }
  138. void SaxParser::parse_file(const uniStr::ustring& filename)
  139. {
  140. if(context_)
  141. throw parse_error("Attempt to start a second parse while a parse is in progress.");
  142. KeepBlanks k(KeepBlanks::Default);
  143. context_ = xmlCreateFileParserCtxt(filename.c_str());
  144. parse();
  145. }
  146. void SaxParser::parse_memory_raw(const unsigned char* contents, size_type bytes_count)
  147. {
  148. if(context_)
  149. throw parse_error("Attempt to start a second parse while a parse is in progress.");
  150. KeepBlanks k(KeepBlanks::Default);
  151. context_ = xmlCreateMemoryParserCtxt((const char*)contents, bytes_count);
  152. parse();
  153. }
  154. void SaxParser::parse_memory(const uniStr::ustring& contents)
  155. {
  156. parse_memory_raw((const unsigned char*)contents.c_str(), contents.bytes());
  157. }
  158. void SaxParser::parse_stream(std::istream& in)
  159. {
  160. if(context_)
  161. throw parse_error("Attempt to start a second parse while a parse is in progress.");
  162. KeepBlanks k(KeepBlanks::Default);
  163. context_ = xmlCreatePushParserCtxt(
  164. sax_handler_.get(),
  165. 0, // user_data
  166. 0,
  167. 0,
  168. ""); // This should be the filename. I don't know if it is a problem to leave it empty.
  169. initialize_context();
  170. //TODO: Shouldn't we use a uniStr::ustring here, and some alternative to std::getline()?
  171. std::string line;
  172. while( ( ! exception_ )
  173. && std::getline(in, line))
  174. {
  175. // since getline does not get the line separator, we have to add it since the parser care
  176. // about layout in certain cases.
  177. line += '\n';
  178. xmlParseChunk(context_, line.c_str(), line.size() /* This is a std::string, not a ustring, so this is the number of bytes. */, 0 /* don't terminate */);
  179. }
  180. if( ! exception_ )
  181. xmlParseChunk(context_, 0 /* chunk */, 0 /* size */, 1 /* terminate (1 or 0) */); //This seems to be called just to terminate parsing.
  182. release_underlying();
  183. check_for_exception();
  184. }
  185. void SaxParser::parse_chunk(const uniStr::ustring& chunk)
  186. {
  187. KeepBlanks k(KeepBlanks::Default);
  188. if(!context_)
  189. {
  190. context_ = xmlCreatePushParserCtxt(
  191. sax_handler_.get(),
  192. 0, // user_data
  193. 0,
  194. 0,
  195. ""); // This should be the filename. I don't know if it is a problem to let it empty
  196. initialize_context();
  197. }
  198. if(!exception_)
  199. xmlParseChunk(context_, chunk.c_str(), chunk.bytes(), 0 /* don't terminate */);
  200. check_for_exception();
  201. }
  202. void SaxParser::release_underlying()
  203. {
  204. Parser::release_underlying();
  205. }
  206. void SaxParser::finish_chunk_parsing()
  207. {
  208. if(!context_)
  209. {
  210. context_ = xmlCreatePushParserCtxt(
  211. sax_handler_.get(),
  212. 0, // this, // user_data
  213. 0,
  214. 0,
  215. ""); // This should be the filename. I don't know if it is a problem to leave it empty
  216. }
  217. if(!exception_)
  218. xmlParseChunk(context_, 0 /* chunk */, 0 /* size */, 1 /* terminate (1 or 0) */); //This seems to be called just to terminate parsing.
  219. release_underlying();
  220. check_for_exception();
  221. }
  222. xmlEntityPtr SaxParserCallback::get_entity(void* context, const xmlChar* name)
  223. {
  224. _xmlParserCtxt* the_context = static_cast<_xmlParserCtxt*>(context);
  225. SaxParser* parser = static_cast<SaxParser*>(the_context->_private);
  226. xmlEntityPtr result = 0;
  227. try
  228. {
  229. result = parser->on_get_entity((const char*)name);
  230. }
  231. catch(const exception& e)
  232. {
  233. parser->handleException(e);
  234. }
  235. return result;
  236. }
  237. void SaxParserCallback::entity_decl(void* context, const xmlChar* name, int type, const xmlChar* publicId, const xmlChar* systemId, xmlChar* content)
  238. {
  239. _xmlParserCtxt* the_context = static_cast<_xmlParserCtxt*>(context);
  240. SaxParser* parser = static_cast<SaxParser*>(the_context->_private);
  241. try
  242. {
  243. parser->on_entity_declaration(
  244. ( name ? uniStr::ustring((const char*)name) : ""),
  245. static_cast<XmlEntityType>(type),
  246. ( publicId ? uniStr::ustring((const char*)publicId) : ""),
  247. ( systemId ? uniStr::ustring((const char*)systemId) : ""),
  248. ( content ? uniStr::ustring((const char*)content) : "") );
  249. }
  250. catch(const exception& e)
  251. {
  252. parser->handleException(e);
  253. }
  254. }
  255. void SaxParserCallback::start_document(void* context)
  256. {
  257. _xmlParserCtxt* the_context = static_cast<_xmlParserCtxt*>(context);
  258. SaxParser* parser = static_cast<SaxParser*>(the_context->_private);
  259. try
  260. {
  261. parser->on_start_document();
  262. }
  263. catch(const exception& e)
  264. {
  265. parser->handleException(e);
  266. }
  267. }
  268. void SaxParserCallback::end_document(void* context)
  269. {
  270. _xmlParserCtxt* the_context = static_cast<_xmlParserCtxt*>(context);
  271. SaxParser* parser = static_cast<SaxParser*>(the_context->_private);
  272. if(parser->exception_)
  273. return;
  274. try
  275. {
  276. parser->on_end_document();
  277. }
  278. catch(const exception& e)
  279. {
  280. parser->handleException(e);
  281. }
  282. }
  283. void SaxParserCallback::start_element(void* context,
  284. const xmlChar* name,
  285. const xmlChar** p)
  286. {
  287. _xmlParserCtxt* the_context = static_cast<_xmlParserCtxt*>(context);
  288. SaxParser* parser = static_cast<SaxParser*>(the_context->_private);
  289. SaxParser::AttributeList attributes;
  290. if(p)
  291. for(const xmlChar** cur = p; cur && *cur; cur += 2)
  292. attributes.push_back(
  293. SaxParser::Attribute( (char*)*cur, (char*)*(cur + 1) ));
  294. try
  295. {
  296. parser->on_start_element(uniStr::ustring((const char*) name), attributes);
  297. }
  298. catch(const exception& e)
  299. {
  300. parser->handleException(e);
  301. }
  302. }
  303. void SaxParserCallback::end_element(void* context, const xmlChar* name)
  304. {
  305. _xmlParserCtxt* the_context = static_cast<_xmlParserCtxt*>(context);
  306. SaxParser* parser = static_cast<SaxParser*>(the_context->_private);
  307. try
  308. {
  309. parser->on_end_element(uniStr::ustring((const char*) name));
  310. }
  311. catch(const exception& e)
  312. {
  313. parser->handleException(e);
  314. }
  315. }
  316. void SaxParserCallback::characters(void * context, const xmlChar* ch, int len)
  317. {
  318. _xmlParserCtxt* the_context = static_cast<_xmlParserCtxt*>(context);
  319. SaxParser* parser = static_cast<SaxParser*>(the_context->_private);
  320. try
  321. {
  322. // Here we force the use of uniStr::ustring::ustring( InputIterator begin, InputIterator end )
  323. // instead of uniStr::ustring::ustring( const char*, size_type ) because it
  324. // expects the length of the string in characters, not in bytes.
  325. parser->on_cdata_block(
  326. uniStr::ustring(
  327. reinterpret_cast<const char *>(ch),
  328. reinterpret_cast<const char *>(ch + len) ) );
  329. }
  330. catch(const exception& e)
  331. {
  332. parser->handleException(e);
  333. }
  334. }
  335. void SaxParserCallback::comment(void* context, const xmlChar* value)
  336. {
  337. _xmlParserCtxt* the_context = static_cast<_xmlParserCtxt*>(context);
  338. SaxParser* parser = static_cast<SaxParser*>(the_context->_private);
  339. try
  340. {
  341. parser->on_comment(uniStr::ustring((const char*) value));
  342. }
  343. catch(const exception& e)
  344. {
  345. parser->handleException(e);
  346. }
  347. }
  348. void SaxParserCallback::warning(void* context, const char* fmt, ...)
  349. {
  350. _xmlParserCtxt* the_context = static_cast<_xmlParserCtxt*>(context);
  351. SaxParser* parser = static_cast<SaxParser*>(the_context->_private);
  352. va_list arg;
  353. char buff[1024]; //TODO: Larger/Shared
  354. va_start(arg, fmt);
  355. vsnprintf(buff, sizeof(buff)/sizeof(buff[0]), fmt, arg);
  356. va_end(arg);
  357. try
  358. {
  359. parser->on_warning(uniStr::ustring(buff));
  360. }
  361. catch(const exception& e)
  362. {
  363. parser->handleException(e);
  364. }
  365. }
  366. void SaxParserCallback::error(void* context, const char* fmt, ...)
  367. {
  368. _xmlParserCtxt* the_context = static_cast<_xmlParserCtxt*>(context);
  369. SaxParser* parser = static_cast<SaxParser*>(the_context->_private);
  370. va_list arg;
  371. char buff[1024]; //TODO: Larger/Shared
  372. if(parser->exception_)
  373. return;
  374. va_start(arg, fmt);
  375. vsnprintf(buff, sizeof(buff)/sizeof(buff[0]), fmt, arg);
  376. va_end(arg);
  377. try
  378. {
  379. parser->on_error(uniStr::ustring(buff));
  380. }
  381. catch(const exception& e)
  382. {
  383. parser->handleException(e);
  384. }
  385. }
  386. void SaxParserCallback::fatal_error(void* context, const char* fmt, ...)
  387. {
  388. _xmlParserCtxt* the_context = static_cast<_xmlParserCtxt*>(context);
  389. SaxParser* parser = static_cast<SaxParser*>(the_context->_private);
  390. va_list arg;
  391. char buff[1024]; //TODO: Larger/Shared
  392. va_start(arg, fmt);
  393. vsnprintf(buff, sizeof(buff)/sizeof(buff[0]), fmt, arg);
  394. va_end(arg);
  395. try
  396. {
  397. parser->on_fatal_error(uniStr::ustring(buff));
  398. }
  399. catch(const exception& e)
  400. {
  401. parser->handleException(e);
  402. }
  403. }
  404. void SaxParserCallback::cdata_block(void* context, const xmlChar* value, int len)
  405. {
  406. _xmlParserCtxt* the_context = static_cast<_xmlParserCtxt*>(context);
  407. SaxParser* parser = static_cast<SaxParser*>(the_context->_private);
  408. try
  409. {
  410. // Here we force the use of uniStr::ustring::ustring( InputIterator begin, InputIterator end )
  411. // see comments in SaxParserCallback::characters
  412. parser->on_cdata_block(
  413. uniStr::ustring(
  414. reinterpret_cast<const char *>(value),
  415. reinterpret_cast<const char *>(value + len) ) );
  416. }
  417. catch(const exception& e)
  418. {
  419. parser->handleException(e);
  420. }
  421. }
  422. void SaxParserCallback::internal_subset(void* context, const xmlChar* name,
  423. const xmlChar* publicId, const xmlChar* systemId)
  424. {
  425. _xmlParserCtxt* the_context = static_cast<_xmlParserCtxt*>(context);
  426. SaxParser* parser = static_cast<SaxParser*>(the_context->_private);
  427. try
  428. {
  429. uniStr::ustring pid = publicId ? uniStr::ustring((const char*) publicId) : "";
  430. uniStr::ustring sid = systemId ? uniStr::ustring((const char*) systemId) : "";
  431. parser->on_internal_subset( uniStr::ustring((const char*) name), pid, sid);
  432. }
  433. catch(const exception& e)
  434. {
  435. parser->handleException(e);
  436. }
  437. }
  438. } // namespace xmlpp