PageRenderTime 60ms CodeModel.GetById 20ms RepoModel.GetById 1ms app.codeStats 0ms

/ATF2/control-software/epics-3.14.8/extensions/src/ChannelArchiver/ThirdParty/xerces-c-src2_4_0/src/xercesc/internal/DGXMLScanner.cpp

http://atf2flightsim.googlecode.com/
C++ | 1901 lines | 1257 code | 208 blank | 436 comment | 216 complexity | afe27fa481f9f83afc4be0ce638918e9 MD5 | raw file
Possible License(s): BSD-2-Clause, LGPL-2.0, IPL-1.0, BSD-3-Clause
  1. /*
  2. * The Apache Software License, Version 1.1
  3. *
  4. * Copyright (c) 2002, 2003 The Apache Software Foundation. All rights
  5. * reserved.
  6. *
  7. * Redistribution and use in source and binary forms, with or without
  8. * modification, are permitted provided that the following conditions
  9. * are met:
  10. *
  11. * 1. Redistributions of source code must retain the above copyright
  12. * notice, this list of conditions and the following disclaimer.
  13. *
  14. * 2. Redistributions in binary form must reproduce the above copyright
  15. * notice, this list of conditions and the following disclaimer in
  16. * the documentation and/or other materials provided with the
  17. * distribution.
  18. *
  19. * 3. The end-user documentation included with the redistribution,
  20. * if any, must include the following acknowledgment:
  21. * "This product includes software developed by the
  22. * Apache Software Foundation (http://www.apache.org/)."
  23. * Alternately, this acknowledgment may appear in the software itself,
  24. * if and wherever such third-party acknowledgments normally appear.
  25. *
  26. * 4. The names "Xerces" and "Apache Software Foundation" must
  27. * not be used to endorse or promote products derived from this
  28. * software without prior written permission. For written
  29. * permission, please contact apache\@apache.org.
  30. *
  31. * 5. Products derived from this software may not be called "Apache",
  32. * nor may "Apache" appear in their name, without prior written
  33. * permission of the Apache Software Foundation.
  34. *
  35. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  36. * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  37. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  38. * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  39. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  40. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  41. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  42. * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  43. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  44. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  45. * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  46. * SUCH DAMAGE.
  47. * ====================================================================
  48. *
  49. * This software consists of voluntary contributions made by many
  50. * individuals on behalf of the Apache Software Foundation, and was
  51. * originally based on software copyright (c) 1999, International
  52. * Business Machines, Inc., http://www.ibm.com . For more information
  53. * on the Apache Software Foundation, please see
  54. * <http://www.apache.org/>.
  55. */
  56. /*
  57. * $Id: DGXMLScanner.cpp,v 1.1.1.1 2009/03/14 06:42:18 whitegr Exp $
  58. */
  59. // ---------------------------------------------------------------------------
  60. // Includes
  61. // ---------------------------------------------------------------------------
  62. #include <xercesc/internal/DGXMLScanner.hpp>
  63. #include <xercesc/util/Janitor.hpp>
  64. #include <xercesc/util/RuntimeException.hpp>
  65. #include <xercesc/util/UnexpectedEOFException.hpp>
  66. #include <xercesc/framework/URLInputSource.hpp>
  67. #include <xercesc/framework/LocalFileInputSource.hpp>
  68. #include <xercesc/framework/XMLDocumentHandler.hpp>
  69. #include <xercesc/framework/XMLEntityHandler.hpp>
  70. #include <xercesc/framework/XMLPScanToken.hpp>
  71. #include <xercesc/framework/XMLGrammarPool.hpp>
  72. #include <xercesc/framework/XMLDTDDescription.hpp>
  73. #include <xercesc/internal/EndOfEntityException.hpp>
  74. #include <xercesc/validators/common/GrammarResolver.hpp>
  75. #include <xercesc/validators/DTD/DocTypeHandler.hpp>
  76. #include <xercesc/validators/DTD/DTDScanner.hpp>
  77. #include <xercesc/validators/DTD/DTDValidator.hpp>
  78. #include <xercesc/util/OutOfMemoryException.hpp>
  79. #include <xercesc/util/XMLResourceIdentifier.hpp>
  80. #include <xercesc/util/HashPtr.hpp>
  81. XERCES_CPP_NAMESPACE_BEGIN
  82. // ---------------------------------------------------------------------------
  83. // DGXMLScanner: Constructors and Destructor
  84. // ---------------------------------------------------------------------------
  85. DGXMLScanner::DGXMLScanner(XMLValidator* const valToAdopt
  86. , GrammarResolver* const grammarResolver
  87. , MemoryManager* const manager) :
  88. XMLScanner(valToAdopt, grammarResolver, manager)
  89. , fAttrNSList(0)
  90. , fDTDValidator(0)
  91. , fDTDGrammar(0)
  92. , fDTDElemNonDeclPool(0)
  93. , fElemCount(0)
  94. , fAttDefRegistry(0)
  95. , fUndeclaredAttrRegistry(0)
  96. {
  97. try
  98. {
  99. commonInit();
  100. if (valToAdopt)
  101. {
  102. if (!valToAdopt->handlesDTD())
  103. ThrowXML(RuntimeException, XMLExcepts::Gen_NoDTDValidator);
  104. }
  105. else
  106. {
  107. fValidator = fDTDValidator;
  108. }
  109. }
  110. catch(const OutOfMemoryException&)
  111. {
  112. throw;
  113. }
  114. catch(...)
  115. {
  116. cleanUp();
  117. throw;
  118. }
  119. }
  120. DGXMLScanner::DGXMLScanner( XMLDocumentHandler* const docHandler
  121. , DocTypeHandler* const docTypeHandler
  122. , XMLEntityHandler* const entityHandler
  123. , XMLErrorReporter* const errHandler
  124. , XMLValidator* const valToAdopt
  125. , GrammarResolver* const grammarResolver
  126. , MemoryManager* const manager) :
  127. XMLScanner(docHandler, docTypeHandler, entityHandler, errHandler, valToAdopt, grammarResolver, manager)
  128. , fAttrNSList(0)
  129. , fDTDValidator(0)
  130. , fDTDGrammar(0)
  131. , fDTDElemNonDeclPool(0)
  132. , fElemCount(0)
  133. , fAttDefRegistry(0)
  134. , fUndeclaredAttrRegistry(0)
  135. {
  136. try
  137. {
  138. commonInit();
  139. if (valToAdopt)
  140. {
  141. if (!valToAdopt->handlesDTD())
  142. ThrowXML(RuntimeException, XMLExcepts::Gen_NoDTDValidator);
  143. }
  144. else
  145. {
  146. fValidator = fDTDValidator;
  147. }
  148. }
  149. catch(const OutOfMemoryException&)
  150. {
  151. throw;
  152. }
  153. catch(...)
  154. {
  155. cleanUp();
  156. throw;
  157. }
  158. }
  159. DGXMLScanner::~DGXMLScanner()
  160. {
  161. cleanUp();
  162. }
  163. // ---------------------------------------------------------------------------
  164. // XMLScanner: Getter methods
  165. // ---------------------------------------------------------------------------
  166. NameIdPool<DTDEntityDecl>* DGXMLScanner::getEntityDeclPool()
  167. {
  168. if(!fGrammar)
  169. return 0;
  170. return ((DTDGrammar*)fGrammar)->getEntityDeclPool();
  171. }
  172. const NameIdPool<DTDEntityDecl>* DGXMLScanner::getEntityDeclPool() const
  173. {
  174. if(!fGrammar)
  175. return 0;
  176. return ((DTDGrammar*)fGrammar)->getEntityDeclPool();
  177. }
  178. // ---------------------------------------------------------------------------
  179. // DGXMLScanner: Main entry point to scan a document
  180. // ---------------------------------------------------------------------------
  181. void DGXMLScanner::scanDocument(const InputSource& src)
  182. {
  183. // Bump up the sequence id for this parser instance. This will invalidate
  184. // any previous progressive scan tokens.
  185. fSequenceId++;
  186. try
  187. {
  188. // Reset the scanner and its plugged in stuff for a new run. This
  189. // resets all the data structures, creates the initial reader and
  190. // pushes it on the stack, and sets up the base document path.
  191. scanReset(src);
  192. // If we have a document handler, then call the start document
  193. if (fDocHandler)
  194. fDocHandler->startDocument();
  195. // Scan the prolog part, which is everything before the root element
  196. // including the DTD subsets.
  197. scanProlog();
  198. // If we got to the end of input, then its not a valid XML file.
  199. // Else, go on to scan the content.
  200. if (fReaderMgr.atEOF())
  201. {
  202. emitError(XMLErrs::EmptyMainEntity);
  203. }
  204. else
  205. {
  206. // Scan content, and tell it its not an external entity
  207. if (scanContent(false))
  208. {
  209. // Do post-parse validation if required
  210. if (fValidate)
  211. {
  212. // We handle ID reference semantics at this level since
  213. // its required by XML 1.0.
  214. checkIDRefs();
  215. // Then allow the validator to do any extra stuff it wants
  216. // fValidator->postParseValidation();
  217. }
  218. // That went ok, so scan for any miscellaneous stuff
  219. if (!fReaderMgr.atEOF())
  220. scanMiscellaneous();
  221. }
  222. }
  223. // If we have a document handler, then call the end document
  224. if (fDocHandler)
  225. fDocHandler->endDocument();
  226. // Reset the reader manager to close all files, sockets, etc...
  227. fReaderMgr.reset();
  228. }
  229. // NOTE:
  230. //
  231. // In all of the error processing below, the emitError() call MUST come
  232. // before the flush of the reader mgr, or it will fail because it tries
  233. // to find out the position in the XML source of the error.
  234. catch(const XMLErrs::Codes)
  235. {
  236. // This is a 'first fatal error' type exit, so reset and fall through
  237. fReaderMgr.reset();
  238. }
  239. catch(const XMLValid::Codes)
  240. {
  241. // This is a 'first fatal error' type exit, so reset and fall through
  242. fReaderMgr.reset();
  243. }
  244. catch(const XMLException& excToCatch)
  245. {
  246. // Emit the error and catch any user exception thrown from here. Make
  247. // sure in all cases we flush the reader manager.
  248. fInException = true;
  249. try
  250. {
  251. if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
  252. emitError
  253. (
  254. XMLErrs::XMLException_Warning
  255. , excToCatch.getType()
  256. , excToCatch.getMessage()
  257. );
  258. else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
  259. emitError
  260. (
  261. XMLErrs::XMLException_Fatal
  262. , excToCatch.getType()
  263. , excToCatch.getMessage()
  264. );
  265. else
  266. emitError
  267. (
  268. XMLErrs::XMLException_Error
  269. , excToCatch.getType()
  270. , excToCatch.getMessage()
  271. );
  272. }
  273. catch(const OutOfMemoryException&)
  274. {
  275. throw;
  276. }
  277. catch(...)
  278. {
  279. // Flush the reader manager and rethrow user's error
  280. fReaderMgr.reset();
  281. throw;
  282. }
  283. // If it returned, then reset the reader manager and fall through
  284. fReaderMgr.reset();
  285. }
  286. catch(const OutOfMemoryException&)
  287. {
  288. throw;
  289. }
  290. catch(...)
  291. {
  292. // Reset and rethrow
  293. fReaderMgr.reset();
  294. throw;
  295. }
  296. }
  297. bool DGXMLScanner::scanNext(XMLPScanToken& token)
  298. {
  299. // Make sure this token is still legal
  300. if (!isLegalToken(token))
  301. ThrowXML(RuntimeException, XMLExcepts::Scan_BadPScanToken);
  302. // Find the next token and remember the reader id
  303. unsigned int orgReader;
  304. XMLTokens curToken;
  305. bool retVal = true;
  306. try
  307. {
  308. while (true)
  309. {
  310. // We have to handle any end of entity exceptions that happen here.
  311. // We could be at the end of X nested entities, each of which will
  312. // generate an end of entity exception as we try to move forward.
  313. try
  314. {
  315. curToken = senseNextToken(orgReader);
  316. break;
  317. }
  318. catch(const EndOfEntityException& toCatch)
  319. {
  320. // Send an end of entity reference event
  321. if (fDocHandler)
  322. fDocHandler->endEntityReference(toCatch.getEntity());
  323. }
  324. }
  325. if (curToken == Token_CharData)
  326. {
  327. scanCharData(fCDataBuf);
  328. }
  329. else if (curToken == Token_EOF)
  330. {
  331. if (!fElemStack.isEmpty())
  332. {
  333. const ElemStack::StackElem* topElem = fElemStack.popTop();
  334. emitError
  335. (
  336. XMLErrs::EndedWithTagsOnStack
  337. , topElem->fThisElement->getFullName()
  338. );
  339. }
  340. retVal = false;
  341. }
  342. else
  343. {
  344. // Its some sort of markup
  345. bool gotData = true;
  346. switch(curToken)
  347. {
  348. case Token_CData :
  349. // Make sure we are within content
  350. if (fElemStack.isEmpty())
  351. emitError(XMLErrs::CDATAOutsideOfContent);
  352. scanCDSection();
  353. break;
  354. case Token_Comment :
  355. scanComment();
  356. break;
  357. case Token_EndTag :
  358. scanEndTag(gotData);
  359. break;
  360. case Token_PI :
  361. scanPI();
  362. break;
  363. case Token_StartTag :
  364. scanStartTag(gotData);
  365. break;
  366. default :
  367. fReaderMgr.skipToChar(chOpenAngle);
  368. break;
  369. }
  370. if (orgReader != fReaderMgr.getCurrentReaderNum())
  371. emitError(XMLErrs::PartialMarkupInEntity);
  372. // If we hit the end, then do the miscellaneous part
  373. if (!gotData)
  374. {
  375. // Do post-parse validation if required
  376. if (fValidate)
  377. {
  378. // We handle ID reference semantics at this level since
  379. // its required by XML 1.0.
  380. checkIDRefs();
  381. // Then allow the validator to do any extra stuff it wants
  382. // fValidator->postParseValidation();
  383. }
  384. // That went ok, so scan for any miscellaneous stuff
  385. scanMiscellaneous();
  386. if (fDocHandler)
  387. fDocHandler->endDocument();
  388. }
  389. }
  390. }
  391. // NOTE:
  392. //
  393. // In all of the error processing below, the emitError() call MUST come
  394. // before the flush of the reader mgr, or it will fail because it tries
  395. // to find out the position in the XML source of the error.
  396. catch(const XMLErrs::Codes)
  397. {
  398. // This is a 'first failure' exception, so reset and return failure
  399. fReaderMgr.reset();
  400. return false;
  401. }
  402. catch(const XMLValid::Codes)
  403. {
  404. // This is a 'first fatal error' type exit, so reset and reuturn failure
  405. fReaderMgr.reset();
  406. return false;
  407. }
  408. catch(const XMLException& excToCatch)
  409. {
  410. // Emit the error and catch any user exception thrown from here. Make
  411. // sure in all cases we flush the reader manager.
  412. fInException = true;
  413. try
  414. {
  415. if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
  416. emitError
  417. (
  418. XMLErrs::XMLException_Warning
  419. , excToCatch.getType()
  420. , excToCatch.getMessage()
  421. );
  422. else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
  423. emitError
  424. (
  425. XMLErrs::XMLException_Fatal
  426. , excToCatch.getType()
  427. , excToCatch.getMessage()
  428. );
  429. else
  430. emitError
  431. (
  432. XMLErrs::XMLException_Error
  433. , excToCatch.getType()
  434. , excToCatch.getMessage()
  435. );
  436. }
  437. catch(const OutOfMemoryException&)
  438. {
  439. throw;
  440. }
  441. catch(...)
  442. {
  443. // Reset and rethrow user error
  444. fReaderMgr.reset();
  445. throw;
  446. }
  447. // Reset and return failure
  448. fReaderMgr.reset();
  449. return false;
  450. }
  451. catch(const OutOfMemoryException&)
  452. {
  453. throw;
  454. }
  455. catch(...)
  456. {
  457. // Reset and rethrow original error
  458. fReaderMgr.reset();
  459. throw;
  460. }
  461. // If we hit the end, then flush the reader manager
  462. if (!retVal)
  463. fReaderMgr.reset();
  464. return retVal;
  465. }
  466. // ---------------------------------------------------------------------------
  467. // DGXMLScanner: Private scanning methods
  468. // ---------------------------------------------------------------------------
  469. // This method will kick off the scanning of the primary content of the
  470. // document, i.e. the elements.
  471. bool DGXMLScanner::scanContent(const bool extEntity)
  472. {
  473. // Go into a loop until we hit the end of the root element, or we fall
  474. // out because there is no root element.
  475. //
  476. // We have to do kind of a deeply nested double loop here in order to
  477. // avoid doing the setup/teardown of the exception handler on each
  478. // round. Doing it this way we only do it when an exception actually
  479. // occurs.
  480. bool gotData = true;
  481. bool inMarkup = false;
  482. while (gotData)
  483. {
  484. try
  485. {
  486. while (gotData)
  487. {
  488. // Sense what the next top level token is. According to what
  489. // this tells us, we will call something to handle that kind
  490. // of thing.
  491. unsigned int orgReader;
  492. const XMLTokens curToken = senseNextToken(orgReader);
  493. // Handle character data and end of file specially. Char data
  494. // is not markup so we don't want to handle it in the loop
  495. // below.
  496. if (curToken == Token_CharData)
  497. {
  498. // Scan the character data and call appropriate events. Let
  499. // him use our local character data buffer for efficiency.
  500. scanCharData(fCDataBuf);
  501. continue;
  502. }
  503. else if (curToken == Token_EOF)
  504. {
  505. // The element stack better be empty at this point or we
  506. // ended prematurely before all elements were closed.
  507. if (!fElemStack.isEmpty())
  508. {
  509. const ElemStack::StackElem* topElem = fElemStack.popTop();
  510. emitError
  511. (
  512. XMLErrs::EndedWithTagsOnStack
  513. , topElem->fThisElement->getFullName()
  514. );
  515. }
  516. // Its the end of file, so clear the got data flag
  517. gotData = false;
  518. continue;
  519. }
  520. // We are in some sort of markup now
  521. inMarkup = true;
  522. // According to the token we got, call the appropriate
  523. // scanning method.
  524. switch(curToken)
  525. {
  526. case Token_CData :
  527. // Make sure we are within content
  528. if (fElemStack.isEmpty())
  529. emitError(XMLErrs::CDATAOutsideOfContent);
  530. scanCDSection();
  531. break;
  532. case Token_Comment :
  533. scanComment();
  534. break;
  535. case Token_EndTag :
  536. scanEndTag(gotData);
  537. break;
  538. case Token_PI :
  539. scanPI();
  540. break;
  541. case Token_StartTag :
  542. scanStartTag(gotData);
  543. break;
  544. default :
  545. fReaderMgr.skipToChar(chOpenAngle);
  546. break;
  547. }
  548. if (orgReader != fReaderMgr.getCurrentReaderNum())
  549. emitError(XMLErrs::PartialMarkupInEntity);
  550. // And we are back out of markup again
  551. inMarkup = false;
  552. }
  553. }
  554. catch(const EndOfEntityException& toCatch)
  555. {
  556. // If we were in some markup when this happened, then its a
  557. // partial markup error.
  558. if (inMarkup)
  559. emitError(XMLErrs::PartialMarkupInEntity);
  560. // Send an end of entity reference event
  561. if (fDocHandler)
  562. fDocHandler->endEntityReference(toCatch.getEntity());
  563. inMarkup = false;
  564. }
  565. }
  566. // It went ok, so return success
  567. return true;
  568. }
  569. void DGXMLScanner::scanEndTag(bool& gotData)
  570. {
  571. // Assume we will still have data until proven otherwise. It will only
  572. // ever be false if this is the end of the root element.
  573. gotData = true;
  574. // Check if the element stack is empty. If so, then this is an unbalanced
  575. // element (i.e. more ends than starts, perhaps because of bad text
  576. // causing one to be skipped.)
  577. if (fElemStack.isEmpty())
  578. {
  579. emitError(XMLErrs::MoreEndThanStartTags);
  580. fReaderMgr.skipPastChar(chCloseAngle);
  581. ThrowXML(RuntimeException, XMLExcepts::Scan_UnbalancedStartEnd);
  582. }
  583. // After the </ is the element QName, so get a name from the input
  584. if (!fReaderMgr.getName(fQNameBuf))
  585. {
  586. // It failed so we can't really do anything with it
  587. emitError(XMLErrs::ExpectedElementName);
  588. fReaderMgr.skipPastChar(chCloseAngle);
  589. return;
  590. }
  591. // Resolve element name uri if needed
  592. unsigned int uriId = fEmptyNamespaceId;
  593. const ElemStack::StackElem* topElem = fElemStack.topElement();
  594. if (fDoNamespaces)
  595. {
  596. uriId = resolvePrefix
  597. (
  598. topElem->fThisElement->getElementName()->getPrefix()
  599. , ElemStack::Mode_Element
  600. );
  601. }
  602. // Pop the stack of the element we are supposed to be ending. Remember
  603. // that we don't own this. The stack just keeps them and reuses them.
  604. fElemStack.popTop();
  605. // See if it was the root element, to avoid multiple calls below
  606. const bool isRoot = fElemStack.isEmpty();
  607. // Make sure that its the end of the element that we expect
  608. if (!XMLString::equals(topElem->fThisElement->getFullName(), fQNameBuf.getRawBuffer()))
  609. {
  610. emitError
  611. (
  612. XMLErrs::ExpectedEndOfTagX
  613. , topElem->fThisElement->getFullName()
  614. );
  615. }
  616. // Make sure we are back on the same reader as where we started
  617. if (topElem->fReaderNum != fReaderMgr.getCurrentReaderNum())
  618. emitError(XMLErrs::PartialTagMarkupError);
  619. // Skip optional whitespace
  620. fReaderMgr.skipPastSpaces();
  621. // Make sure we find the closing bracket
  622. if (!fReaderMgr.skippedChar(chCloseAngle))
  623. {
  624. emitError
  625. (
  626. XMLErrs::UnterminatedEndTag
  627. , topElem->fThisElement->getFullName()
  628. );
  629. }
  630. // If validation is enabled, then lets pass him the list of children and
  631. // this element and let him validate it.
  632. if (fValidate)
  633. {
  634. int res = fValidator->checkContent
  635. (
  636. topElem->fThisElement
  637. , topElem->fChildren
  638. , topElem->fChildCount
  639. );
  640. if (res >= 0)
  641. {
  642. // One of the elements is not valid for the content. NOTE that
  643. // if no children were provided but the content model requires
  644. // them, it comes back with a zero value. But we cannot use that
  645. // to index the child array in this case, and have to put out a
  646. // special message.
  647. if (!topElem->fChildCount)
  648. {
  649. fValidator->emitError
  650. (
  651. XMLValid::EmptyNotValidForContent
  652. , topElem->fThisElement->getFormattedContentModel()
  653. );
  654. }
  655. else if ((unsigned int)res >= topElem->fChildCount)
  656. {
  657. fValidator->emitError
  658. (
  659. XMLValid::NotEnoughElemsForCM
  660. , topElem->fThisElement->getFormattedContentModel()
  661. );
  662. }
  663. else
  664. {
  665. fValidator->emitError
  666. (
  667. XMLValid::ElementNotValidForContent
  668. , topElem->fChildren[res]->getRawName()
  669. , topElem->fThisElement->getFormattedContentModel()
  670. );
  671. }
  672. }
  673. }
  674. // If we have a doc handler, tell it about the end tag
  675. if (fDocHandler)
  676. {
  677. fDocHandler->endElement
  678. (
  679. *topElem->fThisElement
  680. , uriId
  681. , isRoot
  682. , (fDoNamespaces)
  683. ? topElem->fThisElement->getElementName()->getPrefix()
  684. : XMLUni::fgZeroLenString
  685. );
  686. }
  687. // If this was the root, then done with content
  688. gotData = !isRoot;
  689. }
  690. // This method handles the high level logic of scanning the DOCType
  691. // declaration. This calls the DTDScanner and kicks off both the scanning of
  692. // the internal subset and the scanning of the external subset, if any.
  693. //
  694. // When we get here the '<!DOCTYPE' part has already been scanned, which is
  695. // what told us that we had a doc type decl to parse.
  696. void DGXMLScanner::scanDocTypeDecl()
  697. {
  698. if (fDocTypeHandler)
  699. fDocTypeHandler->resetDocType();
  700. // There must be some space after DOCTYPE
  701. if (!fReaderMgr.skipPastSpaces())
  702. {
  703. emitError(XMLErrs::ExpectedWhitespace);
  704. // Just skip the Doctype declaration and return
  705. fReaderMgr.skipPastChar(chCloseAngle);
  706. return;
  707. }
  708. // Get a buffer for the root element
  709. XMLBufBid bbRootName(&fBufMgr);
  710. // Get a name from the input, which should be the name of the root
  711. // element of the upcoming content.
  712. fReaderMgr.getName(bbRootName.getBuffer());
  713. if (bbRootName.isEmpty())
  714. {
  715. emitError(XMLErrs::NoRootElemInDOCTYPE);
  716. fReaderMgr.skipPastChar(chCloseAngle);
  717. return;
  718. }
  719. // Store the root element name for later check
  720. setRootElemName(bbRootName.getRawBuffer());
  721. // This element obviously is not going to exist in the element decl
  722. // pool yet, but we need to call docTypeDecl. So force it into
  723. // the element decl pool, marked as being there because it was in
  724. // the DOCTYPE. Later, when its declared, the status will be updated.
  725. //
  726. // Only do this if we are not reusing the validator! If we are reusing,
  727. // then look it up instead. It has to exist!
  728. DTDElementDecl* rootDecl = new (fGrammarPoolMemoryManager) DTDElementDecl
  729. (
  730. bbRootName.getRawBuffer()
  731. , fEmptyNamespaceId
  732. , DTDElementDecl::Any
  733. , fGrammarPoolMemoryManager
  734. );
  735. rootDecl->setCreateReason(DTDElementDecl::AsRootElem);
  736. rootDecl->setExternalElemDeclaration(true);
  737. if(!fUseCachedGrammar)
  738. {
  739. // this will break getRootElemId on DTDGrammar when
  740. // cached grammars are in use, but
  741. // why would one use this anyway???
  742. ((DTDGrammar*)fGrammar)->setRootElemId(fGrammar->putElemDecl(rootDecl));
  743. } else
  744. {
  745. // put this in the undeclared pool so it gets deleted...
  746. rootDecl->setId(fDTDElemNonDeclPool->put((DTDElementDecl*)rootDecl));
  747. }
  748. // Skip any spaces after the name
  749. fReaderMgr.skipPastSpaces();
  750. // And now if we are looking at a >, then we are done. It is not
  751. // required to have an internal or external subset, though why you
  752. // would not escapes me.
  753. if (fReaderMgr.skippedChar(chCloseAngle)) {
  754. // If we have a doc type handler and advanced callbacks are enabled,
  755. // call the doctype event.
  756. if (fDocTypeHandler)
  757. fDocTypeHandler->doctypeDecl(*rootDecl, 0, 0, false);
  758. return;
  759. }
  760. // either internal/external subset
  761. if (fValScheme == Val_Auto && !fValidate)
  762. fValidate = true;
  763. bool hasIntSubset = false;
  764. bool hasExtSubset = false;
  765. XMLCh* sysId = 0;
  766. XMLCh* pubId = 0;
  767. DTDScanner dtdScanner
  768. (
  769. (DTDGrammar*) fGrammar
  770. , fDocTypeHandler
  771. , fGrammarPoolMemoryManager
  772. , fMemoryManager
  773. );
  774. dtdScanner.setScannerInfo(this, &fReaderMgr, &fBufMgr);
  775. // If the next character is '[' then we have no external subset cause
  776. // there is no system id, just the opening character of the internal
  777. // subset. Else, has to be an id.
  778. //
  779. // Just look at the next char, don't eat it.
  780. if (fReaderMgr.peekNextChar() == chOpenSquare)
  781. {
  782. hasIntSubset = true;
  783. }
  784. else
  785. {
  786. // Indicate we have an external subset
  787. hasExtSubset = true;
  788. fHasNoDTD = false;
  789. // Get buffers for the ids
  790. XMLBufBid bbPubId(&fBufMgr);
  791. XMLBufBid bbSysId(&fBufMgr);
  792. // Get the external subset id
  793. if (!dtdScanner.scanId(bbPubId.getBuffer(), bbSysId.getBuffer(), DTDScanner::IDType_External))
  794. {
  795. fReaderMgr.skipPastChar(chCloseAngle);
  796. return;
  797. }
  798. // Get copies of the ids we got
  799. pubId = XMLString::replicate(bbPubId.getRawBuffer(), fMemoryManager);
  800. sysId = XMLString::replicate(bbSysId.getRawBuffer(), fMemoryManager);
  801. // Skip spaces and check again for the opening of an internal subset
  802. fReaderMgr.skipPastSpaces();
  803. // Just look at the next char, don't eat it.
  804. if (fReaderMgr.peekNextChar() == chOpenSquare) {
  805. hasIntSubset = true;
  806. }
  807. }
  808. // Insure that the ids get cleaned up, if they got allocated
  809. ArrayJanitor<XMLCh> janSysId(sysId, fMemoryManager);
  810. ArrayJanitor<XMLCh> janPubId(pubId, fMemoryManager);
  811. // If we have a doc type handler and advanced callbacks are enabled,
  812. // call the doctype event.
  813. if (fDocTypeHandler)
  814. fDocTypeHandler->doctypeDecl(*rootDecl, pubId, sysId, hasIntSubset, hasExtSubset);
  815. // Ok, if we had an internal subset, we are just past the [ character
  816. // and need to parse that first.
  817. if (hasIntSubset)
  818. {
  819. // Eat the opening square bracket
  820. fReaderMgr.getNextChar();
  821. // We can't have any internal subset if we are reusing the validator
  822. if (fUseCachedGrammar || fToCacheGrammar)
  823. ThrowXML(RuntimeException, XMLExcepts::Val_CantHaveIntSS);
  824. // And try to scan the internal subset. If we fail, try to recover
  825. // by skipping forward tot he close angle and returning.
  826. if (!dtdScanner.scanInternalSubset())
  827. {
  828. fReaderMgr.skipPastChar(chCloseAngle);
  829. return;
  830. }
  831. // Do a sanity check that some expanded PE did not propogate out of
  832. // the doctype. This could happen if it was terminated early by bad
  833. // syntax.
  834. if (fReaderMgr.getReaderDepth() > 1)
  835. {
  836. emitError(XMLErrs::PEPropogated);
  837. // Ask the reader manager to pop back down to the main level
  838. fReaderMgr.cleanStackBackTo(1);
  839. }
  840. fReaderMgr.skipPastSpaces();
  841. }
  842. // And that should leave us at the closing > of the DOCTYPE line
  843. if (!fReaderMgr.skippedChar(chCloseAngle))
  844. {
  845. // Do a special check for the common scenario of an extra ] char at
  846. // the end. This is easy to recover from.
  847. if (fReaderMgr.skippedChar(chCloseSquare)
  848. && fReaderMgr.skippedChar(chCloseAngle))
  849. {
  850. emitError(XMLErrs::ExtraCloseSquare);
  851. }
  852. else
  853. {
  854. emitError(XMLErrs::UnterminatedDOCTYPE);
  855. fReaderMgr.skipPastChar(chCloseAngle);
  856. }
  857. }
  858. // If we had an external subset, then we need to deal with that one
  859. // next. If we are reusing the validator, then don't scan it.
  860. if (hasExtSubset) {
  861. if (fUseCachedGrammar)
  862. {
  863. InputSource* sysIdSrc = resolveSystemId(sysId);
  864. Janitor<InputSource> janSysIdSrc(sysIdSrc);
  865. Grammar* grammar = fGrammarResolver->getGrammar(sysIdSrc->getSystemId());
  866. if (grammar && grammar->getGrammarType() == Grammar::DTDGrammarType) {
  867. fDTDGrammar = (DTDGrammar*) grammar;
  868. fGrammar = fDTDGrammar;
  869. fValidator->setGrammar(fGrammar);
  870. // we *cannot* identify the root element on
  871. // cached grammars; else we risk breaking multithreaded
  872. // applications. - NG
  873. /*******
  874. rootDecl = (DTDElementDecl*) fGrammar->getElemDecl(fEmptyNamespaceId, 0, bbRootName.getRawBuffer(), Grammar::TOP_LEVEL_SCOPE);
  875. if (rootDecl)
  876. ((DTDGrammar*)fGrammar)->setRootElemId(rootDecl->getId());
  877. else {
  878. rootDecl = new (fGrammarPoolMemoryManager) DTDElementDecl
  879. (
  880. bbRootName.getRawBuffer()
  881. , fEmptyNamespaceId
  882. , DTDElementDecl::Any
  883. , fGrammarPoolMemoryManager
  884. );
  885. rootDecl->setCreateReason(DTDElementDecl::AsRootElem);
  886. rootDecl->setExternalElemDeclaration(true);
  887. ((DTDGrammar*)fGrammar)->setRootElemId(fGrammar->putElemDecl(rootDecl));
  888. }
  889. *********/
  890. return;
  891. }
  892. }
  893. if (fLoadExternalDTD || fValidate)
  894. {
  895. // And now create a reader to read this entity
  896. InputSource* srcUsed;
  897. XMLReader* reader = fReaderMgr.createReader
  898. (
  899. sysId
  900. , pubId
  901. , false
  902. , XMLReader::RefFrom_NonLiteral
  903. , XMLReader::Type_General
  904. , XMLReader::Source_External
  905. , srcUsed
  906. , fCalculateSrcOfs
  907. );
  908. // Put a janitor on the input source
  909. Janitor<InputSource> janSrc(srcUsed);
  910. // If it failed then throw an exception
  911. if (!reader)
  912. ThrowXML1(RuntimeException, XMLExcepts::Gen_CouldNotOpenDTD, srcUsed->getSystemId());
  913. if (fToCacheGrammar) {
  914. unsigned int stringId = fGrammarResolver->getStringPool()->addOrFind(srcUsed->getSystemId());
  915. const XMLCh* sysIdStr = fGrammarResolver->getStringPool()->getValueForId(stringId);
  916. fGrammarResolver->orphanGrammar(XMLUni::fgDTDEntityString);
  917. ((XMLDTDDescription*) (fGrammar->getGrammarDescription()))->setRootName(sysIdStr);
  918. fGrammarResolver->putGrammar(fGrammar);
  919. }
  920. // In order to make the processing work consistently, we have to
  921. // make this look like an external entity. So create an entity
  922. // decl and fill it in and push it with the reader, as happens
  923. // with an external entity. Put a janitor on it to insure it gets
  924. // cleaned up. The reader manager does not adopt them.
  925. const XMLCh gDTDStr[] = { chLatin_D, chLatin_T, chLatin_D , chNull };
  926. DTDEntityDecl* declDTD = new (fGrammarPoolMemoryManager) DTDEntityDecl(gDTDStr, false, fGrammarPoolMemoryManager);
  927. declDTD->setSystemId(sysId);
  928. Janitor<DTDEntityDecl> janDecl(declDTD);
  929. // Mark this one as a throw at end
  930. reader->setThrowAtEnd(true);
  931. // And push it onto the stack, with its pseudo name
  932. fReaderMgr.pushReader(reader, declDTD);
  933. // Tell it its not in an include section
  934. dtdScanner.scanExtSubsetDecl(false, true);
  935. }
  936. }
  937. }
  938. bool DGXMLScanner::scanStartTag(bool& gotData)
  939. {
  940. // Assume we will still have data until proven otherwise. It will only
  941. // ever be false if this is the root and its empty.
  942. gotData = true;
  943. // Get the QName. In this case, we are not doing namespaces, so we just
  944. // use it as is and don't have to break it into parts.
  945. if (!fReaderMgr.getName(fQNameBuf))
  946. {
  947. emitError(XMLErrs::ExpectedElementName);
  948. fReaderMgr.skipToChar(chOpenAngle);
  949. return false;
  950. }
  951. // Assume it won't be an empty tag
  952. bool isEmpty = false;
  953. // See if its the root element
  954. const bool isRoot = fElemStack.isEmpty();
  955. // Lets try to look up the element in the validator's element decl pool
  956. // We can pass bogus values for the URI id and the base name. We know that
  957. // this can only be called if we are doing a DTD style validator and that
  958. // he will only look at the QName.
  959. //
  960. // We *do not* tell him to fault in a decl if he does not find one - NG.
  961. bool wasAdded = false;
  962. const XMLCh* qnameRawBuf = fQNameBuf.getRawBuffer();
  963. XMLElementDecl* elemDecl = fGrammar->getElemDecl
  964. (
  965. fEmptyNamespaceId
  966. , 0
  967. , qnameRawBuf
  968. , Grammar::TOP_LEVEL_SCOPE
  969. );
  970. // look in the undeclared pool:
  971. if(!elemDecl)
  972. {
  973. elemDecl = fDTDElemNonDeclPool->getByKey(qnameRawBuf);
  974. }
  975. if(!elemDecl)
  976. {
  977. wasAdded = true;
  978. elemDecl = new (fMemoryManager) DTDElementDecl
  979. (
  980. qnameRawBuf
  981. , fEmptyNamespaceId
  982. , DTDElementDecl::Any
  983. , fMemoryManager
  984. );
  985. elemDecl->setId(fDTDElemNonDeclPool->put((DTDElementDecl*)elemDecl));
  986. }
  987. if (fValidate) {
  988. if (wasAdded)
  989. {
  990. // This is to tell the reuse Validator that this element was
  991. // faulted-in, was not an element in the validator pool originally
  992. elemDecl->setCreateReason(XMLElementDecl::JustFaultIn);
  993. fValidator->emitError
  994. (
  995. XMLValid::ElementNotDefined
  996. , qnameRawBuf
  997. );
  998. }
  999. // If its not marked declared, then emit an error
  1000. else if (!elemDecl->isDeclared())
  1001. {
  1002. fValidator->emitError
  1003. (
  1004. XMLValid::ElementNotDefined
  1005. , qnameRawBuf
  1006. );
  1007. }
  1008. fValidator->validateElement(elemDecl);
  1009. }
  1010. // Expand the element stack and add the new element
  1011. fElemStack.addLevel(elemDecl, fReaderMgr.getCurrentReaderNum());
  1012. // If this is the first element and we are validating, check the root
  1013. // element.
  1014. if (isRoot)
  1015. {
  1016. fRootGrammar = fGrammar;
  1017. if (fValidate)
  1018. {
  1019. // If a DocType exists, then check if it matches the root name there.
  1020. if (fRootElemName && !XMLString::equals(qnameRawBuf, fRootElemName))
  1021. fValidator->emitError(XMLValid::RootElemNotLikeDocType);
  1022. // Some validators may also want to check the root, call the
  1023. // XMLValidator::checkRootElement
  1024. if (fValidatorFromUser && !fValidator->checkRootElement(elemDecl->getId()))
  1025. fValidator->emitError(XMLValid::RootElemNotLikeDocType);
  1026. }
  1027. }
  1028. else if (fValidate)
  1029. {
  1030. // If the element stack is not empty, then add this element as a
  1031. // child of the previous top element. If its empty, this is the root
  1032. // elem and is not the child of anything.
  1033. fElemStack.addChild(elemDecl->getElementName(), true);
  1034. }
  1035. // Skip any whitespace after the name
  1036. fReaderMgr.skipPastSpaces();
  1037. // We loop until we either see a /> or >, handling attribute/value
  1038. // pairs until we get there.
  1039. unsigned int attCount = 0;
  1040. unsigned int curAttListSize = fAttrList->size();
  1041. wasAdded = false;
  1042. fElemCount++;
  1043. while (true)
  1044. {
  1045. // And get the next non-space character
  1046. XMLCh nextCh = fReaderMgr.peekNextChar();
  1047. // If the next character is not a slash or closed angle bracket,
  1048. // then it must be whitespace, since whitespace is required
  1049. // between the end of the last attribute and the name of the next
  1050. // one.
  1051. if (attCount)
  1052. {
  1053. if ((nextCh != chForwardSlash) && (nextCh != chCloseAngle))
  1054. {
  1055. if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh))
  1056. {
  1057. // Ok, skip by them and peek another char
  1058. fReaderMgr.skipPastSpaces();
  1059. nextCh = fReaderMgr.peekNextChar();
  1060. }
  1061. else
  1062. {
  1063. // Emit the error but keep on going
  1064. emitError(XMLErrs::ExpectedWhitespace);
  1065. }
  1066. }
  1067. }
  1068. // Ok, here we first check for any of the special case characters.
  1069. // If its not one, then we do the normal case processing, which
  1070. // assumes that we've hit an attribute value, Otherwise, we do all
  1071. // the special case checks.
  1072. if (!fReaderMgr.getCurrentReader()->isSpecialStartTagChar(nextCh))
  1073. {
  1074. // Assume its going to be an attribute, so get a name from
  1075. // the input.
  1076. if (!fReaderMgr.getName(fAttNameBuf))
  1077. {
  1078. emitError(XMLErrs::ExpectedAttrName);
  1079. fReaderMgr.skipPastChar(chCloseAngle);
  1080. return false;
  1081. }
  1082. // And next must be an equal sign
  1083. if (!scanEq())
  1084. {
  1085. static const XMLCh tmpList[] =
  1086. {
  1087. chSingleQuote, chDoubleQuote, chCloseAngle
  1088. , chOpenAngle, chForwardSlash, chNull
  1089. };
  1090. emitError(XMLErrs::ExpectedEqSign);
  1091. // Try to sync back up by skipping forward until we either
  1092. // hit something meaningful.
  1093. const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
  1094. if ((chFound == chCloseAngle) || (chFound == chForwardSlash))
  1095. {
  1096. // Jump back to top for normal processing of these
  1097. continue;
  1098. }
  1099. else if ((chFound == chSingleQuote)
  1100. || (chFound == chDoubleQuote)
  1101. || fReaderMgr.getCurrentReader()->isWhitespace(chFound))
  1102. {
  1103. // Just fall through assuming that the value is to follow
  1104. }
  1105. else if (chFound == chOpenAngle)
  1106. {
  1107. // Assume a malformed tag and that new one is starting
  1108. emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
  1109. return false;
  1110. }
  1111. else
  1112. {
  1113. // Something went really wrong
  1114. return false;
  1115. }
  1116. }
  1117. // See if this attribute is declared for this element. If we are
  1118. // not validating of course it will not be at first, but we will
  1119. // fault it into the pool (to avoid lots of redundant errors.)
  1120. XMLAttDef* attDef = ((DTDElementDecl *)elemDecl)->getAttDef ( fAttNameBuf.getRawBuffer());
  1121. // now need to prepare for duplicate detection
  1122. if(attDef)
  1123. {
  1124. unsigned int *curCountPtr = fAttDefRegistry->get(attDef);
  1125. if(!curCountPtr)
  1126. {
  1127. curCountPtr = getNewUIntPtr();
  1128. *curCountPtr = fElemCount;
  1129. fAttDefRegistry->put(attDef, curCountPtr);
  1130. }
  1131. else if(*curCountPtr < fElemCount)
  1132. *curCountPtr = fElemCount;
  1133. else
  1134. {
  1135. emitError
  1136. (
  1137. XMLErrs::AttrAlreadyUsedInSTag
  1138. , attDef->getFullName()
  1139. , elemDecl->getFullName()
  1140. );
  1141. }
  1142. }
  1143. else
  1144. {
  1145. XMLCh * namePtr = fAttNameBuf.getRawBuffer();
  1146. unsigned int *curCountPtr = fUndeclaredAttrRegistry->get(namePtr);
  1147. if(!curCountPtr)
  1148. {
  1149. curCountPtr = getNewUIntPtr();
  1150. *curCountPtr = fElemCount;
  1151. fUndeclaredAttrRegistry->put((void *)namePtr, curCountPtr);
  1152. }
  1153. else if(*curCountPtr < fElemCount)
  1154. *curCountPtr = fElemCount;
  1155. else
  1156. {
  1157. emitError
  1158. (
  1159. XMLErrs::AttrAlreadyUsedInSTag
  1160. , namePtr
  1161. , elemDecl->getFullName()
  1162. );
  1163. }
  1164. }
  1165. if (fValidate)
  1166. {
  1167. if (!attDef)
  1168. {
  1169. fValidator->emitError
  1170. (
  1171. XMLValid::AttNotDefinedForElement
  1172. , fAttNameBuf.getRawBuffer()
  1173. , qnameRawBuf
  1174. );
  1175. }
  1176. }
  1177. // Skip any whitespace before the value and then scan the att
  1178. // value. This will come back normalized with entity refs and
  1179. // char refs expanded.
  1180. fReaderMgr.skipPastSpaces();
  1181. if (!scanAttValue(attDef, fAttNameBuf.getRawBuffer(), fAttValueBuf))
  1182. {
  1183. static const XMLCh tmpList[] =
  1184. {
  1185. chCloseAngle, chOpenAngle, chForwardSlash, chNull
  1186. };
  1187. emitError(XMLErrs::ExpectedAttrValue);
  1188. // It failed, so lets try to get synced back up. We skip
  1189. // forward until we find some whitespace or one of the
  1190. // chars in our list.
  1191. const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
  1192. if ((chFound == chCloseAngle)
  1193. || (chFound == chForwardSlash)
  1194. || fReaderMgr.getCurrentReader()->isWhitespace(chFound))
  1195. {
  1196. // Just fall through and process this attribute, though
  1197. // the value will be "".
  1198. }
  1199. else if (chFound == chOpenAngle)
  1200. {
  1201. // Assume a malformed tag and that new one is starting
  1202. emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
  1203. return false;
  1204. }
  1205. else
  1206. {
  1207. // Something went really wrong
  1208. return false;
  1209. }
  1210. }
  1211. // Now that its all stretched out, lets look at its type and
  1212. // determine if it has a valid value. It will output any needed
  1213. // errors, but we just keep going. We only need to do this if
  1214. // we are validating.
  1215. if (attDef)
  1216. {
  1217. // Let the validator pass judgement on the attribute value
  1218. if (fValidate)
  1219. {
  1220. fValidator->validateAttrValue
  1221. (
  1222. attDef
  1223. , fAttValueBuf.getRawBuffer()
  1224. , false
  1225. , elemDecl
  1226. );
  1227. }
  1228. }
  1229. if (fDoNamespaces)
  1230. {
  1231. // Make sure that the name is basically well formed for namespace
  1232. // enabled rules. It either has no colons, or it has one which
  1233. // is neither the first or last char.
  1234. const int colonFirst = XMLString::indexOf(fAttNameBuf.getRawBuffer(), chColon);
  1235. if (colonFirst != -1)
  1236. {
  1237. const int colonLast = XMLString::lastIndexOf(fAttNameBuf.getRawBuffer(), chColon);
  1238. if (colonFirst != colonLast)
  1239. {
  1240. emitError(XMLErrs::TooManyColonsInName);
  1241. continue;
  1242. }
  1243. else if ((colonFirst == 0)
  1244. || (colonLast == (int)fAttNameBuf.getLen() - 1))
  1245. {
  1246. emitError(XMLErrs::InvalidColonPos);
  1247. continue;
  1248. }
  1249. }
  1250. }
  1251. // Add this attribute to the attribute list that we use to
  1252. // pass them to the handler. We reuse its existing elements
  1253. // but expand it as required.
  1254. XMLAttr* curAtt;
  1255. if (attCount >= curAttListSize)
  1256. {
  1257. if (fDoNamespaces) {
  1258. curAtt = new (fMemoryManager) XMLAttr
  1259. (
  1260. fEmptyNamespaceId
  1261. , fAttNameBuf.getRawBuffer()
  1262. , fAttValueBuf.getRawBuffer()
  1263. , (attDef)?attDef->getType():XMLAttDef::CData
  1264. , true
  1265. , fMemoryManager
  1266. );
  1267. }
  1268. else
  1269. {
  1270. curAtt = new (fMemoryManager) XMLAttr
  1271. (
  1272. -1
  1273. , fAttNameBuf.getRawBuffer()
  1274. , XMLUni::fgZeroLenString
  1275. , fAttValueBuf.getRawBuffer()
  1276. , (attDef)?attDef->getType():XMLAttDef::CData
  1277. , true
  1278. , fMemoryManager
  1279. );
  1280. }
  1281. fAttrList->addElement(curAtt);
  1282. }
  1283. else
  1284. {
  1285. curAtt = fAttrList->elementAt(attCount);
  1286. if (fDoNamespaces)
  1287. {
  1288. curAtt->set
  1289. (
  1290. fEmptyNamespaceId
  1291. , fAttNameBuf.getRawBuffer()
  1292. , fAttValueBuf.getRawBuffer()
  1293. , (attDef)?attDef->getType():XMLAttDef::CData
  1294. );
  1295. }
  1296. else
  1297. {
  1298. curAtt->set
  1299. (
  1300. -1
  1301. , fAttNameBuf.getRawBuffer()
  1302. , XMLUni::fgZeroLenString
  1303. , fAttValueBuf.getRawBuffer()
  1304. , (attDef)?attDef->getType():XMLAttDef::CData
  1305. );
  1306. }
  1307. curAtt->setSpecified(true);
  1308. }
  1309. attCount++;
  1310. // And jump back to the top of the loop
  1311. continue;
  1312. }
  1313. // It was some special case character so do all of the checks and
  1314. // deal with it.
  1315. if (!nextCh)
  1316. ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF);
  1317. if (nextCh == chForwardSlash)
  1318. {
  1319. fReaderMgr.getNextChar();
  1320. isEmpty = true;
  1321. if (!fReaderMgr.skippedChar(chCloseAngle))
  1322. emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
  1323. break;
  1324. }
  1325. else if (nextCh == chCloseAngle)
  1326. {
  1327. fReaderMgr.getNextChar();
  1328. break;
  1329. }
  1330. else if (nextCh == chOpenAngle)
  1331. {
  1332. // Check for this one specially, since its going to be common
  1333. // and it is kind of auto-recovering since we've already hit the
  1334. // next open bracket, which is what we would have seeked to (and
  1335. // skipped this whole tag.)
  1336. emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
  1337. break;
  1338. }
  1339. else if ((nextCh == chSingleQuote) || (nextCh == chDoubleQuote))
  1340. {
  1341. // Check for this one specially, which is probably a missing
  1342. // attribute name, e.g. ="value". Just issue expected name
  1343. // error and eat the quoted string, then jump back to the
  1344. // top again.
  1345. emitError(XMLErrs::ExpectedAttrName);
  1346. fReaderMgr.getNextChar();
  1347. fReaderMgr.skipQuotedString(nextCh);
  1348. fReaderMgr.skipPastSpaces();
  1349. continue;
  1350. }
  1351. }
  1352. // Make an initial pass through the list and find any xmlns attributes.
  1353. if (fDoNamespaces && attCount)
  1354. scanAttrListforNameSpaces(fAttrList, attCount, elemDecl);
  1355. // Now lets get the fAttrList filled in. This involves faulting in any
  1356. // defaulted and fixed attributes and normalizing the values of any that
  1357. // we got explicitly.
  1358. //
  1359. // We update the attCount value with the total number of attributes, but
  1360. // it goes in with the number of values we got during the raw scan of
  1361. // explictly provided attrs above.
  1362. attCount = buildAttList(attCount, elemDecl, *fAttrList);
  1363. // If we have a document handler, then tell it about this start tag. We
  1364. // don't have any URI id to send along, so send fEmptyNamespaceId. We also do not send
  1365. // any prefix since its just one big name if we are not doing namespaces.
  1366. unsigned int uriId = fEmptyNamespaceId;
  1367. if (fDocHandler)
  1368. {
  1369. if (fDoNamespaces)
  1370. {
  1371. uriId = resolvePrefix
  1372. (
  1373. elemDecl->getElementName()->getPrefix()
  1374. , ElemStack::Mode_Element
  1375. );
  1376. }
  1377. fDocHandler->startElement
  1378. (
  1379. *elemDecl
  1380. , uriId
  1381. , (fDoNamespaces) ? elemDecl->getElementName()->getPrefix() : 0
  1382. , *fAttrList
  1383. , attCount
  1384. , false
  1385. , isRoot
  1386. );
  1387. }
  1388. // If empty, validate content right now if we are validating and then
  1389. // pop the element stack top. Else, we have to update the current stack
  1390. // top's namespace mapping elements.
  1391. if (isEmpty)
  1392. {
  1393. // If validating, then insure that its legal to have no content
  1394. if (fValidate)
  1395. {
  1396. const int res = fValidator->checkContent(elemDecl, 0, 0);
  1397. if (res >= 0)
  1398. {
  1399. fValidator->emitError
  1400. (
  1401. XMLValid::ElementNotValidForContent
  1402. , qnameRawBuf
  1403. , elemDecl->getFormattedContentModel()
  1404. );
  1405. }
  1406. }
  1407. // If we have a doc handler, tell it about the end tag
  1408. if (fDocHandler)
  1409. {
  1410. fDocHandler->endElement
  1411. (
  1412. *elemDecl
  1413. , uriId
  1414. , isRoot
  1415. , (fDoNamespaces) ? elemDecl->getElementName()->getPrefix()
  1416. : XMLUni::fgZeroLenString
  1417. );
  1418. }
  1419. // Pop the element stack back off since it'll never be used now
  1420. fElemStack.popTop();
  1421. // If the elem stack is empty, then it was an empty root
  1422. if (isRoot)
  1423. gotData = false;
  1424. }
  1425. return true;
  1426. }
  1427. unsigned int
  1428. DGXMLScanner::resolveQName(const XMLCh* const qName
  1429. , XMLBuffer& prefixBuf
  1430. , const short mode
  1431. , int& prefixColonPos)
  1432. {
  1433. // Lets split out the qName into a URI and name buffer first. The URI
  1434. // can be empty.
  1435. prefixColonPos = XMLString::indexOf(qName, chColon);
  1436. if (prefixColonPos == -1)
  1437. {
  1438. // Its all name with no prefix, so put the whole thing into the name
  1439. // buffer. Then map the empty string to a URI, since the empty string
  1440. // represents the default namespace. This will either return some
  1441. // explicit URI which the default namespace is mapped to, or the
  1442. // the default global namespace.
  1443. bool unknown = false;
  1444. prefixBuf.reset();
  1445. return fElemStack.mapPrefixToURI(XMLUni::fgZeroLenString, (ElemStack::MapModes) mode, unknown);
  1446. }
  1447. else
  1448. {
  1449. // Copy the chars up to but not including the colon into the prefix
  1450. // buffer.
  1451. prefixBuf.set(qName, prefixColonPos);
  1452. // Watch for the special namespace prefixes. We always map these to
  1453. // special URIs. 'xml' gets mapped to the official URI that its defined
  1454. // to map to by the NS spec. xmlns gets mapped to a special place holder
  1455. // URI that we define (so that it maps to something checkable.)
  1456. const XMLCh* prefixRawBuf = prefixBuf.getRawBuffer();
  1457. if (XMLString::equals(prefixRawBuf, XMLUni::fgXMLNSString)) {
  1458. // if this is an element, it is an error to have xmlns as prefix
  1459. if (mode == ElemStack::Mode_Element)
  1460. emitError(XMLErrs::NoXMLNSAsElementPrefix, qName);
  1461. return fXMLNSNamespaceId;
  1462. }
  1463. else if (XMLString::equals(prefixRawBuf, XMLUni::fgXMLString)) {
  1464. return fXMLNamespaceId;
  1465. }
  1466. else
  1467. {
  1468. bool unknown = false;
  1469. unsigned int uriId = fElemStack.mapPrefixToURI(prefixRawBuf, (ElemStack::MapModes) mode, unknown);
  1470. if (unknown)
  1471. emitError(XMLErrs::UnknownPrefix, prefixRawBuf);
  1472. return uriId;
  1473. }
  1474. }
  1475. }
  1476. // ---------------------------------------------------------------------------
  1477. // DGXMLScanner: Grammar preparsing
  1478. // ---------------------------------------------------------------------------
  1479. Grammar* DGXMLScanner::loadGrammar(const InputSource& src
  1480. , const short grammarType
  1481. , const bool toCache)
  1482. {
  1483. Grammar* loadedGrammar = 0;
  1484. try
  1485. {
  1486. fGrammarResolver->cacheGrammarFromParse(false);
  1487. fGrammarResolver->useCachedGrammarInParse(false);
  1488. fRootGrammar = 0;
  1489. if (fValScheme == Val_Auto) {
  1490. fValidate = true;
  1491. }
  1492. // Reset some status flags
  1493. fInException = false;
  1494. fStandalone = false;
  1495. fErrorCount = 0;
  1496. fHasNoDTD = true;
  1497. if (grammarType == Grammar::DTDGrammarType) {
  1498. loadedGrammar = loadDTDGrammar(src, toCache);
  1499. }
  1500. // Reset the reader manager to close all files, sockets, etc...
  1501. fReaderMgr.reset();
  1502. }
  1503. // NOTE:
  1504. //
  1505. // In all of the error processing below, the emitError() call MUST come
  1506. // before the flush of the reader mgr, or it will fail because it tries
  1507. // to find out the position in the XML source of the error.
  1508. catch(const XMLErrs::Codes)
  1509. {
  1510. // This is a 'first fatal error' type exit, so reset and fall through
  1511. fReaderMgr.reset();
  1512. }
  1513. catch(const XMLValid::Codes)
  1514. {
  1515. // This is a 'first fatal error' type exit, so reset and fall through
  1516. fReaderMgr.reset();
  1517. }
  1518. catch(const XMLException& excToCatch)
  1519. {
  1520. // Emit the error and catch any user exception thrown from here. Make
  1521. // sure in all cases we flush the reader manager.
  1522. fInException = true;
  1523. try
  1524. {
  1525. if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
  1526. emitError
  1527. (
  1528. XMLErrs::DisplayErrorMessage
  1529. , excToCatch.getMessage()
  1530. );
  1531. else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
  1532. emitError
  1533. (
  1534. XMLErrs::XMLException_Fatal
  1535. , excToCatch.getType()
  1536. , excToCatch.getMessage()
  1537. );
  1538. else
  1539. emitError
  1540. (
  1541. XMLErrs::XMLException_Error
  1542. , excToCatch.getType()
  1543. , excToCatch.getMessage()
  1544. );
  1545. }
  1546. catch(const OutOfMemoryException&)
  1547. {
  1548. throw;
  1549. }
  1550. catch(...)
  1551. {
  1552. // Flush the reader manager and rethrow user's error
  1553. fReaderMgr.reset();
  1554. throw;
  1555. }
  1556. // If it returned, then reset the reader manager and fall through
  1557. fReaderMgr.reset();
  1558. }
  1559. catch(const OutOfMemoryException&)
  1560. {
  1561. throw;
  1562. }
  1563. catch(...)
  1564. {
  1565. // Reset and rethrow
  1566. fReaderMgr.reset();
  1567. throw;
  1568. }
  1569. return loadedGrammar;
  1570. }
  1571. Grammar* DGXMLScanner::loadDTDGrammar(const InputSource& src,
  1572. const bool toCache)
  1573. {
  1574. // Reset the validators
  1575. fDTDValidator->reset();
  1576. if (fValidatorFromUser)
  1577. fValidator->reset();
  1578. fDTDGrammar = new (fGrammarPoolMemoryManager) DTDGrammar(fGrammarPoolMemoryManager);
  1579. fGrammarResolver->putGrammar(fDTDGrammar);
  1580. fGrammar = fDTDGrammar;
  1581. fValidator->setGrammar(fGrammar);
  1582. // And for all installed handlers, send reset events. This gives them
  1583. // a chance to flush any cached data.
  1584. if (fDocHandler)
  1585. fDocHandler->resetDocument();
  1586. if (fEntityHandler)
  1587. fEntityHandler->resetEntities();
  1588. if (fErrorReporter)
  1589. fErrorReporter->resetErrors();
  1590. // Clear out the id reference list
  1591. resetValidationContext();
  1592. if (toCache) {
  1593. unsigned int sysId = fGrammarResolver->getStringPool()->addOrFind(src.getSystemId());
  1594. const XMLCh* sysIdStr = fGrammarResolver->getStringPool()->getValueForId(sysId);
  1595. fGrammarResolver->orphanGrammar(XMLUni::fgDTDEntityString);
  1596. ((XMLDTDDescription*) (fGrammar->getGrammarDescription()))->setRootName(sysIdStr);
  1597. fGrammarResolver->putGrammar(fGrammar);
  1598. }
  1599. // Handle the creation of the XML reader object for this input source.
  1600. // This will provide us with transcoding and basic lexing services.
  1601. XMLReader* newReader = fReaderMgr.createReader
  1602. (
  1603. src
  1604. , false
  1605. , XMLReader::RefFrom_NonLiteral
  1606. , XMLReader::Type_General
  1607. , XMLReader::Source_External
  1608. , fCalculateSrcOfs
  1609. );
  1610. if (!newReader) {
  1611. if (src.getIssueFatalErrorIfNotFound())
  1612. ThrowXML1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource, src.getSystemId());
  1613. else
  1614. ThrowXML1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource_Warning, src.getSystemId());
  1615. }
  1616. // In order to make the processing work consistently, we have to
  1617. // make this look like an external entity. So create an entity
  1618. // decl and fill it in and push it with the reader, as happens
  1619. // with an external entity. Put a janitor on it to insure it gets
  1620. // cleaned up. The reader manager does not adopt them.
  1621. const XMLCh gDTDStr[] = { chLatin_D, chLatin_T, chLatin_D , chNull };
  1622. DTDEntityDecl* declDTD = new (fGrammarPoolMemoryManager) DTDEntityDecl(gDTDStr, false, fGrammarPoolMemoryManager);
  1623. declDTD->setSystemId(src.getSystemId());
  1624. Janitor<DTDEntityDecl> janDecl(declDTD);
  1625. // Mark this one as a throw at end
  1626. newReader->setThrowAtEnd(true);
  1627. // And push it onto the stack, with its pseudo name
  1628. fReaderMgr.pushReader(newReader, declDTD);
  1629. // If we have a doc type handler and advanced callbacks are enabled,
  1630. // call the doctype event.
  1631. if (fDocTypeHandler) {
  1632. // Create a dummy root
  1633. DTDElementDecl* rootDecl = new (fGrammarPoolMemoryManager) DTDElementDecl
  1634. (
  1635. gDTDStr
  1636. , fEmptyNamespaceId
  1637. , DTDElementDecl::Any
  1638. , fGrammarPoolMemoryManager
  1639. );
  1640. rootDecl->setCreateReason(DTDElementDecl::AsRootElem);
  1641. rootDecl->setExternalElemDeclaration(true);
  1642. Janitor<DTDElementDecl> janSrc(rootDecl);
  1643. fDocTypeHandler->doctypeDecl(*rootDecl, src.getPublicId(), src.getSystemId(), false, true);
  1644. }
  1645. // Create DTDScanner
  1646. DTDScanner dtdScanner
  1647. (
  1648. (DTDGrammar*)fGrammar
  1649. , fDocTypeHandler
  1650. , fGrammarPoolMemoryManager
  1651. , fMemoryManager
  1652. );
  1653. dtdScanner.setScannerInfo(this, &fReaderMgr, &fBufMgr);
  1654. // Tell it its not in an include section
  1655. dtdScanner.scanExtSubsetDecl(false, true);
  1656. if (fValidate) {
  1657. // validate the DTD scan so far
  1658. fValidator->preContentValidation(false, true);
  1659. }
  1660. if (toCache)
  1661. fGrammarResolver->cacheGrammars();
  1662. return fDTDGrammar;
  1663. }
  1664. // ---------------------------------------------------------------------------
  1665. // DGXMLScanner: Private helper methods
  1666. // ---------------------------------------------------------------------------
  1667. // This method handles the common initialization, to avoid having to do
  1668. // it redundantly in multiple constructors.
  1669. void DGXMLScanner::commonInit()
  1670. {
  1671. // And we need one for the raw attribute scan. This just stores key/
  1672. // value string pairs (prior to any processing.)
  1673. fAttrNSList = new (fMemoryManager) ValueVectorOf<XMLAttr*>(8, fMemoryManager);
  1674. // Create the Validator and init them
  1675. fDTDValidator = new (fMemoryManager) DTDValidator();
  1676. initValidator(fDTDValidator);
  1677. fDTDElemNonDeclPool = new (fMemoryManager) NameIdPool<DTDElementDecl>(29, 128, fMemoryManager);
  1678. fAttDefRegistry = new (fMemoryManager) RefHashTableOf<unsigned int>
  1679. (
  1680. 509, false, new (fMemoryManager)HashPtr(), fMemoryManager
  1681. );
  1682. fUndeclaredAttrRegistry = new (fMemoryManager) RefHashTableOf<unsigned int>
  1683. (
  1684. 509, false, new (fMemoryManager)HashXMLCh(), fMemoryManager
  1685. );
  1686. }
  1687. void DGXMLScanner::cleanUp()
  1688. {
  1689. delete fAttrNSList;
  1690. delete fDTDValidator;
  1691. delete fDTDElemNonDeclPool;
  1692. delete fAttDefRegistr