/ATF2/control-software/epics-3.14.8/extensions/src/ChannelArchiver/ThirdParty/xerces-c-src2_4_0/src/xercesc/internal/DGXMLScanner.cpp
C++ | 1901 lines | 1257 code | 208 blank | 436 comment | 216 complexity | afe27fa481f9f83afc4be0ce638918e9 MD5 | raw file
Possible License(s): BSD-2-Clause, LGPL-2.0, IPL-1.0, BSD-3-Clause
Large files files are truncated, but you can click here to view the full file
- /*
- * The Apache Software License, Version 1.1
- *
- * Copyright (c) 2002, 2003 The Apache Software Foundation. All rights
- * reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * 3. The end-user documentation included with the redistribution,
- * if any, must include the following acknowledgment:
- * "This product includes software developed by the
- * Apache Software Foundation (http://www.apache.org/)."
- * Alternately, this acknowledgment may appear in the software itself,
- * if and wherever such third-party acknowledgments normally appear.
- *
- * 4. The names "Xerces" and "Apache Software Foundation" must
- * not be used to endorse or promote products derived from this
- * software without prior written permission. For written
- * permission, please contact apache\@apache.org.
- *
- * 5. Products derived from this software may not be called "Apache",
- * nor may "Apache" appear in their name, without prior written
- * permission of the Apache Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- * ====================================================================
- *
- * This software consists of voluntary contributions made by many
- * individuals on behalf of the Apache Software Foundation, and was
- * originally based on software copyright (c) 1999, International
- * Business Machines, Inc., http://www.ibm.com . For more information
- * on the Apache Software Foundation, please see
- * <http://www.apache.org/>.
- */
- /*
- * $Id: DGXMLScanner.cpp,v 1.1.1.1 2009/03/14 06:42:18 whitegr Exp $
- */
- // ---------------------------------------------------------------------------
- // Includes
- // ---------------------------------------------------------------------------
- #include <xercesc/internal/DGXMLScanner.hpp>
- #include <xercesc/util/Janitor.hpp>
- #include <xercesc/util/RuntimeException.hpp>
- #include <xercesc/util/UnexpectedEOFException.hpp>
- #include <xercesc/framework/URLInputSource.hpp>
- #include <xercesc/framework/LocalFileInputSource.hpp>
- #include <xercesc/framework/XMLDocumentHandler.hpp>
- #include <xercesc/framework/XMLEntityHandler.hpp>
- #include <xercesc/framework/XMLPScanToken.hpp>
- #include <xercesc/framework/XMLGrammarPool.hpp>
- #include <xercesc/framework/XMLDTDDescription.hpp>
- #include <xercesc/internal/EndOfEntityException.hpp>
- #include <xercesc/validators/common/GrammarResolver.hpp>
- #include <xercesc/validators/DTD/DocTypeHandler.hpp>
- #include <xercesc/validators/DTD/DTDScanner.hpp>
- #include <xercesc/validators/DTD/DTDValidator.hpp>
- #include <xercesc/util/OutOfMemoryException.hpp>
- #include <xercesc/util/XMLResourceIdentifier.hpp>
- #include <xercesc/util/HashPtr.hpp>
- XERCES_CPP_NAMESPACE_BEGIN
- // ---------------------------------------------------------------------------
- // DGXMLScanner: Constructors and Destructor
- // ---------------------------------------------------------------------------
- DGXMLScanner::DGXMLScanner(XMLValidator* const valToAdopt
- , GrammarResolver* const grammarResolver
- , MemoryManager* const manager) :
- XMLScanner(valToAdopt, grammarResolver, manager)
- , fAttrNSList(0)
- , fDTDValidator(0)
- , fDTDGrammar(0)
- , fDTDElemNonDeclPool(0)
- , fElemCount(0)
- , fAttDefRegistry(0)
- , fUndeclaredAttrRegistry(0)
- {
- try
- {
- commonInit();
- if (valToAdopt)
- {
- if (!valToAdopt->handlesDTD())
- ThrowXML(RuntimeException, XMLExcepts::Gen_NoDTDValidator);
- }
- else
- {
- fValidator = fDTDValidator;
- }
- }
- catch(const OutOfMemoryException&)
- {
- throw;
- }
- catch(...)
- {
- cleanUp();
- throw;
- }
- }
- DGXMLScanner::DGXMLScanner( XMLDocumentHandler* const docHandler
- , DocTypeHandler* const docTypeHandler
- , XMLEntityHandler* const entityHandler
- , XMLErrorReporter* const errHandler
- , XMLValidator* const valToAdopt
- , GrammarResolver* const grammarResolver
- , MemoryManager* const manager) :
- XMLScanner(docHandler, docTypeHandler, entityHandler, errHandler, valToAdopt, grammarResolver, manager)
- , fAttrNSList(0)
- , fDTDValidator(0)
- , fDTDGrammar(0)
- , fDTDElemNonDeclPool(0)
- , fElemCount(0)
- , fAttDefRegistry(0)
- , fUndeclaredAttrRegistry(0)
- {
- try
- {
- commonInit();
- if (valToAdopt)
- {
- if (!valToAdopt->handlesDTD())
- ThrowXML(RuntimeException, XMLExcepts::Gen_NoDTDValidator);
- }
- else
- {
- fValidator = fDTDValidator;
- }
- }
- catch(const OutOfMemoryException&)
- {
- throw;
- }
- catch(...)
- {
- cleanUp();
- throw;
- }
- }
- DGXMLScanner::~DGXMLScanner()
- {
- cleanUp();
- }
- // ---------------------------------------------------------------------------
- // XMLScanner: Getter methods
- // ---------------------------------------------------------------------------
- NameIdPool<DTDEntityDecl>* DGXMLScanner::getEntityDeclPool()
- {
- if(!fGrammar)
- return 0;
- return ((DTDGrammar*)fGrammar)->getEntityDeclPool();
- }
- const NameIdPool<DTDEntityDecl>* DGXMLScanner::getEntityDeclPool() const
- {
- if(!fGrammar)
- return 0;
- return ((DTDGrammar*)fGrammar)->getEntityDeclPool();
- }
- // ---------------------------------------------------------------------------
- // DGXMLScanner: Main entry point to scan a document
- // ---------------------------------------------------------------------------
- void DGXMLScanner::scanDocument(const InputSource& src)
- {
- // Bump up the sequence id for this parser instance. This will invalidate
- // any previous progressive scan tokens.
- fSequenceId++;
- try
- {
- // Reset the scanner and its plugged in stuff for a new run. This
- // resets all the data structures, creates the initial reader and
- // pushes it on the stack, and sets up the base document path.
- scanReset(src);
- // If we have a document handler, then call the start document
- if (fDocHandler)
- fDocHandler->startDocument();
- // Scan the prolog part, which is everything before the root element
- // including the DTD subsets.
- scanProlog();
- // If we got to the end of input, then its not a valid XML file.
- // Else, go on to scan the content.
- if (fReaderMgr.atEOF())
- {
- emitError(XMLErrs::EmptyMainEntity);
- }
- else
- {
- // Scan content, and tell it its not an external entity
- if (scanContent(false))
- {
- // Do post-parse validation if required
- if (fValidate)
- {
- // We handle ID reference semantics at this level since
- // its required by XML 1.0.
- checkIDRefs();
- // Then allow the validator to do any extra stuff it wants
- // fValidator->postParseValidation();
- }
- // That went ok, so scan for any miscellaneous stuff
- if (!fReaderMgr.atEOF())
- scanMiscellaneous();
- }
- }
- // If we have a document handler, then call the end document
- if (fDocHandler)
- fDocHandler->endDocument();
- // Reset the reader manager to close all files, sockets, etc...
- fReaderMgr.reset();
- }
- // NOTE:
- //
- // In all of the error processing below, the emitError() call MUST come
- // before the flush of the reader mgr, or it will fail because it tries
- // to find out the position in the XML source of the error.
- catch(const XMLErrs::Codes)
- {
- // This is a 'first fatal error' type exit, so reset and fall through
- fReaderMgr.reset();
- }
- catch(const XMLValid::Codes)
- {
- // This is a 'first fatal error' type exit, so reset and fall through
- fReaderMgr.reset();
- }
- catch(const XMLException& excToCatch)
- {
- // Emit the error and catch any user exception thrown from here. Make
- // sure in all cases we flush the reader manager.
- fInException = true;
- try
- {
- if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
- emitError
- (
- XMLErrs::XMLException_Warning
- , excToCatch.getType()
- , excToCatch.getMessage()
- );
- else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
- emitError
- (
- XMLErrs::XMLException_Fatal
- , excToCatch.getType()
- , excToCatch.getMessage()
- );
- else
- emitError
- (
- XMLErrs::XMLException_Error
- , excToCatch.getType()
- , excToCatch.getMessage()
- );
- }
- catch(const OutOfMemoryException&)
- {
- throw;
- }
- catch(...)
- {
- // Flush the reader manager and rethrow user's error
- fReaderMgr.reset();
- throw;
- }
- // If it returned, then reset the reader manager and fall through
- fReaderMgr.reset();
- }
- catch(const OutOfMemoryException&)
- {
- throw;
- }
- catch(...)
- {
- // Reset and rethrow
- fReaderMgr.reset();
- throw;
- }
- }
- bool DGXMLScanner::scanNext(XMLPScanToken& token)
- {
- // Make sure this token is still legal
- if (!isLegalToken(token))
- ThrowXML(RuntimeException, XMLExcepts::Scan_BadPScanToken);
- // Find the next token and remember the reader id
- unsigned int orgReader;
- XMLTokens curToken;
- bool retVal = true;
- try
- {
- while (true)
- {
- // We have to handle any end of entity exceptions that happen here.
- // We could be at the end of X nested entities, each of which will
- // generate an end of entity exception as we try to move forward.
- try
- {
- curToken = senseNextToken(orgReader);
- break;
- }
- catch(const EndOfEntityException& toCatch)
- {
- // Send an end of entity reference event
- if (fDocHandler)
- fDocHandler->endEntityReference(toCatch.getEntity());
- }
- }
- if (curToken == Token_CharData)
- {
- scanCharData(fCDataBuf);
- }
- else if (curToken == Token_EOF)
- {
- if (!fElemStack.isEmpty())
- {
- const ElemStack::StackElem* topElem = fElemStack.popTop();
- emitError
- (
- XMLErrs::EndedWithTagsOnStack
- , topElem->fThisElement->getFullName()
- );
- }
- retVal = false;
- }
- else
- {
- // Its some sort of markup
- bool gotData = true;
- switch(curToken)
- {
- case Token_CData :
- // Make sure we are within content
- if (fElemStack.isEmpty())
- emitError(XMLErrs::CDATAOutsideOfContent);
- scanCDSection();
- break;
- case Token_Comment :
- scanComment();
- break;
- case Token_EndTag :
- scanEndTag(gotData);
- break;
- case Token_PI :
- scanPI();
- break;
- case Token_StartTag :
- scanStartTag(gotData);
- break;
- default :
- fReaderMgr.skipToChar(chOpenAngle);
- break;
- }
- if (orgReader != fReaderMgr.getCurrentReaderNum())
- emitError(XMLErrs::PartialMarkupInEntity);
- // If we hit the end, then do the miscellaneous part
- if (!gotData)
- {
- // Do post-parse validation if required
- if (fValidate)
- {
- // We handle ID reference semantics at this level since
- // its required by XML 1.0.
- checkIDRefs();
- // Then allow the validator to do any extra stuff it wants
- // fValidator->postParseValidation();
- }
- // That went ok, so scan for any miscellaneous stuff
- scanMiscellaneous();
- if (fDocHandler)
- fDocHandler->endDocument();
- }
- }
- }
- // NOTE:
- //
- // In all of the error processing below, the emitError() call MUST come
- // before the flush of the reader mgr, or it will fail because it tries
- // to find out the position in the XML source of the error.
- catch(const XMLErrs::Codes)
- {
- // This is a 'first failure' exception, so reset and return failure
- fReaderMgr.reset();
- return false;
- }
- catch(const XMLValid::Codes)
- {
- // This is a 'first fatal error' type exit, so reset and reuturn failure
- fReaderMgr.reset();
- return false;
- }
- catch(const XMLException& excToCatch)
- {
- // Emit the error and catch any user exception thrown from here. Make
- // sure in all cases we flush the reader manager.
- fInException = true;
- try
- {
- if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
- emitError
- (
- XMLErrs::XMLException_Warning
- , excToCatch.getType()
- , excToCatch.getMessage()
- );
- else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
- emitError
- (
- XMLErrs::XMLException_Fatal
- , excToCatch.getType()
- , excToCatch.getMessage()
- );
- else
- emitError
- (
- XMLErrs::XMLException_Error
- , excToCatch.getType()
- , excToCatch.getMessage()
- );
- }
- catch(const OutOfMemoryException&)
- {
- throw;
- }
- catch(...)
- {
- // Reset and rethrow user error
- fReaderMgr.reset();
- throw;
- }
- // Reset and return failure
- fReaderMgr.reset();
- return false;
- }
- catch(const OutOfMemoryException&)
- {
- throw;
- }
- catch(...)
- {
- // Reset and rethrow original error
- fReaderMgr.reset();
- throw;
- }
- // If we hit the end, then flush the reader manager
- if (!retVal)
- fReaderMgr.reset();
- return retVal;
- }
- // ---------------------------------------------------------------------------
- // DGXMLScanner: Private scanning methods
- // ---------------------------------------------------------------------------
- // This method will kick off the scanning of the primary content of the
- // document, i.e. the elements.
- bool DGXMLScanner::scanContent(const bool extEntity)
- {
- // Go into a loop until we hit the end of the root element, or we fall
- // out because there is no root element.
- //
- // We have to do kind of a deeply nested double loop here in order to
- // avoid doing the setup/teardown of the exception handler on each
- // round. Doing it this way we only do it when an exception actually
- // occurs.
- bool gotData = true;
- bool inMarkup = false;
- while (gotData)
- {
- try
- {
- while (gotData)
- {
- // Sense what the next top level token is. According to what
- // this tells us, we will call something to handle that kind
- // of thing.
- unsigned int orgReader;
- const XMLTokens curToken = senseNextToken(orgReader);
- // Handle character data and end of file specially. Char data
- // is not markup so we don't want to handle it in the loop
- // below.
- if (curToken == Token_CharData)
- {
- // Scan the character data and call appropriate events. Let
- // him use our local character data buffer for efficiency.
- scanCharData(fCDataBuf);
- continue;
- }
- else if (curToken == Token_EOF)
- {
- // The element stack better be empty at this point or we
- // ended prematurely before all elements were closed.
- if (!fElemStack.isEmpty())
- {
- const ElemStack::StackElem* topElem = fElemStack.popTop();
- emitError
- (
- XMLErrs::EndedWithTagsOnStack
- , topElem->fThisElement->getFullName()
- );
- }
- // Its the end of file, so clear the got data flag
- gotData = false;
- continue;
- }
- // We are in some sort of markup now
- inMarkup = true;
- // According to the token we got, call the appropriate
- // scanning method.
- switch(curToken)
- {
- case Token_CData :
- // Make sure we are within content
- if (fElemStack.isEmpty())
- emitError(XMLErrs::CDATAOutsideOfContent);
- scanCDSection();
- break;
- case Token_Comment :
- scanComment();
- break;
- case Token_EndTag :
- scanEndTag(gotData);
- break;
- case Token_PI :
- scanPI();
- break;
- case Token_StartTag :
- scanStartTag(gotData);
- break;
- default :
- fReaderMgr.skipToChar(chOpenAngle);
- break;
- }
- if (orgReader != fReaderMgr.getCurrentReaderNum())
- emitError(XMLErrs::PartialMarkupInEntity);
- // And we are back out of markup again
- inMarkup = false;
- }
- }
- catch(const EndOfEntityException& toCatch)
- {
- // If we were in some markup when this happened, then its a
- // partial markup error.
- if (inMarkup)
- emitError(XMLErrs::PartialMarkupInEntity);
- // Send an end of entity reference event
- if (fDocHandler)
- fDocHandler->endEntityReference(toCatch.getEntity());
- inMarkup = false;
- }
- }
- // It went ok, so return success
- return true;
- }
- void DGXMLScanner::scanEndTag(bool& gotData)
- {
- // Assume we will still have data until proven otherwise. It will only
- // ever be false if this is the end of the root element.
- gotData = true;
- // Check if the element stack is empty. If so, then this is an unbalanced
- // element (i.e. more ends than starts, perhaps because of bad text
- // causing one to be skipped.)
- if (fElemStack.isEmpty())
- {
- emitError(XMLErrs::MoreEndThanStartTags);
- fReaderMgr.skipPastChar(chCloseAngle);
- ThrowXML(RuntimeException, XMLExcepts::Scan_UnbalancedStartEnd);
- }
- // After the </ is the element QName, so get a name from the input
- if (!fReaderMgr.getName(fQNameBuf))
- {
- // It failed so we can't really do anything with it
- emitError(XMLErrs::ExpectedElementName);
- fReaderMgr.skipPastChar(chCloseAngle);
- return;
- }
- // Resolve element name uri if needed
- unsigned int uriId = fEmptyNamespaceId;
- const ElemStack::StackElem* topElem = fElemStack.topElement();
- if (fDoNamespaces)
- {
- uriId = resolvePrefix
- (
- topElem->fThisElement->getElementName()->getPrefix()
- , ElemStack::Mode_Element
- );
- }
- // Pop the stack of the element we are supposed to be ending. Remember
- // that we don't own this. The stack just keeps them and reuses them.
- fElemStack.popTop();
- // See if it was the root element, to avoid multiple calls below
- const bool isRoot = fElemStack.isEmpty();
- // Make sure that its the end of the element that we expect
- if (!XMLString::equals(topElem->fThisElement->getFullName(), fQNameBuf.getRawBuffer()))
- {
- emitError
- (
- XMLErrs::ExpectedEndOfTagX
- , topElem->fThisElement->getFullName()
- );
- }
- // Make sure we are back on the same reader as where we started
- if (topElem->fReaderNum != fReaderMgr.getCurrentReaderNum())
- emitError(XMLErrs::PartialTagMarkupError);
- // Skip optional whitespace
- fReaderMgr.skipPastSpaces();
- // Make sure we find the closing bracket
- if (!fReaderMgr.skippedChar(chCloseAngle))
- {
- emitError
- (
- XMLErrs::UnterminatedEndTag
- , topElem->fThisElement->getFullName()
- );
- }
- // If validation is enabled, then lets pass him the list of children and
- // this element and let him validate it.
- if (fValidate)
- {
- int res = fValidator->checkContent
- (
- topElem->fThisElement
- , topElem->fChildren
- , topElem->fChildCount
- );
- if (res >= 0)
- {
- // One of the elements is not valid for the content. NOTE that
- // if no children were provided but the content model requires
- // them, it comes back with a zero value. But we cannot use that
- // to index the child array in this case, and have to put out a
- // special message.
- if (!topElem->fChildCount)
- {
- fValidator->emitError
- (
- XMLValid::EmptyNotValidForContent
- , topElem->fThisElement->getFormattedContentModel()
- );
- }
- else if ((unsigned int)res >= topElem->fChildCount)
- {
- fValidator->emitError
- (
- XMLValid::NotEnoughElemsForCM
- , topElem->fThisElement->getFormattedContentModel()
- );
- }
- else
- {
- fValidator->emitError
- (
- XMLValid::ElementNotValidForContent
- , topElem->fChildren[res]->getRawName()
- , topElem->fThisElement->getFormattedContentModel()
- );
- }
- }
- }
- // If we have a doc handler, tell it about the end tag
- if (fDocHandler)
- {
- fDocHandler->endElement
- (
- *topElem->fThisElement
- , uriId
- , isRoot
- , (fDoNamespaces)
- ? topElem->fThisElement->getElementName()->getPrefix()
- : XMLUni::fgZeroLenString
- );
- }
- // If this was the root, then done with content
- gotData = !isRoot;
- }
- // This method handles the high level logic of scanning the DOCType
- // declaration. This calls the DTDScanner and kicks off both the scanning of
- // the internal subset and the scanning of the external subset, if any.
- //
- // When we get here the '<!DOCTYPE' part has already been scanned, which is
- // what told us that we had a doc type decl to parse.
- void DGXMLScanner::scanDocTypeDecl()
- {
- if (fDocTypeHandler)
- fDocTypeHandler->resetDocType();
- // There must be some space after DOCTYPE
- if (!fReaderMgr.skipPastSpaces())
- {
- emitError(XMLErrs::ExpectedWhitespace);
- // Just skip the Doctype declaration and return
- fReaderMgr.skipPastChar(chCloseAngle);
- return;
- }
- // Get a buffer for the root element
- XMLBufBid bbRootName(&fBufMgr);
- // Get a name from the input, which should be the name of the root
- // element of the upcoming content.
- fReaderMgr.getName(bbRootName.getBuffer());
- if (bbRootName.isEmpty())
- {
- emitError(XMLErrs::NoRootElemInDOCTYPE);
- fReaderMgr.skipPastChar(chCloseAngle);
- return;
- }
- // Store the root element name for later check
- setRootElemName(bbRootName.getRawBuffer());
- // This element obviously is not going to exist in the element decl
- // pool yet, but we need to call docTypeDecl. So force it into
- // the element decl pool, marked as being there because it was in
- // the DOCTYPE. Later, when its declared, the status will be updated.
- //
- // Only do this if we are not reusing the validator! If we are reusing,
- // then look it up instead. It has to exist!
- DTDElementDecl* rootDecl = new (fGrammarPoolMemoryManager) DTDElementDecl
- (
- bbRootName.getRawBuffer()
- , fEmptyNamespaceId
- , DTDElementDecl::Any
- , fGrammarPoolMemoryManager
- );
- rootDecl->setCreateReason(DTDElementDecl::AsRootElem);
- rootDecl->setExternalElemDeclaration(true);
- if(!fUseCachedGrammar)
- {
- // this will break getRootElemId on DTDGrammar when
- // cached grammars are in use, but
- // why would one use this anyway???
- ((DTDGrammar*)fGrammar)->setRootElemId(fGrammar->putElemDecl(rootDecl));
- } else
- {
- // put this in the undeclared pool so it gets deleted...
- rootDecl->setId(fDTDElemNonDeclPool->put((DTDElementDecl*)rootDecl));
- }
- // Skip any spaces after the name
- fReaderMgr.skipPastSpaces();
- // And now if we are looking at a >, then we are done. It is not
- // required to have an internal or external subset, though why you
- // would not escapes me.
- if (fReaderMgr.skippedChar(chCloseAngle)) {
- // If we have a doc type handler and advanced callbacks are enabled,
- // call the doctype event.
- if (fDocTypeHandler)
- fDocTypeHandler->doctypeDecl(*rootDecl, 0, 0, false);
- return;
- }
- // either internal/external subset
- if (fValScheme == Val_Auto && !fValidate)
- fValidate = true;
- bool hasIntSubset = false;
- bool hasExtSubset = false;
- XMLCh* sysId = 0;
- XMLCh* pubId = 0;
- DTDScanner dtdScanner
- (
- (DTDGrammar*) fGrammar
- , fDocTypeHandler
- , fGrammarPoolMemoryManager
- , fMemoryManager
- );
- dtdScanner.setScannerInfo(this, &fReaderMgr, &fBufMgr);
- // If the next character is '[' then we have no external subset cause
- // there is no system id, just the opening character of the internal
- // subset. Else, has to be an id.
- //
- // Just look at the next char, don't eat it.
- if (fReaderMgr.peekNextChar() == chOpenSquare)
- {
- hasIntSubset = true;
- }
- else
- {
- // Indicate we have an external subset
- hasExtSubset = true;
- fHasNoDTD = false;
- // Get buffers for the ids
- XMLBufBid bbPubId(&fBufMgr);
- XMLBufBid bbSysId(&fBufMgr);
- // Get the external subset id
- if (!dtdScanner.scanId(bbPubId.getBuffer(), bbSysId.getBuffer(), DTDScanner::IDType_External))
- {
- fReaderMgr.skipPastChar(chCloseAngle);
- return;
- }
- // Get copies of the ids we got
- pubId = XMLString::replicate(bbPubId.getRawBuffer(), fMemoryManager);
- sysId = XMLString::replicate(bbSysId.getRawBuffer(), fMemoryManager);
- // Skip spaces and check again for the opening of an internal subset
- fReaderMgr.skipPastSpaces();
- // Just look at the next char, don't eat it.
- if (fReaderMgr.peekNextChar() == chOpenSquare) {
- hasIntSubset = true;
- }
- }
- // Insure that the ids get cleaned up, if they got allocated
- ArrayJanitor<XMLCh> janSysId(sysId, fMemoryManager);
- ArrayJanitor<XMLCh> janPubId(pubId, fMemoryManager);
- // If we have a doc type handler and advanced callbacks are enabled,
- // call the doctype event.
- if (fDocTypeHandler)
- fDocTypeHandler->doctypeDecl(*rootDecl, pubId, sysId, hasIntSubset, hasExtSubset);
- // Ok, if we had an internal subset, we are just past the [ character
- // and need to parse that first.
- if (hasIntSubset)
- {
- // Eat the opening square bracket
- fReaderMgr.getNextChar();
- // We can't have any internal subset if we are reusing the validator
- if (fUseCachedGrammar || fToCacheGrammar)
- ThrowXML(RuntimeException, XMLExcepts::Val_CantHaveIntSS);
- // And try to scan the internal subset. If we fail, try to recover
- // by skipping forward tot he close angle and returning.
- if (!dtdScanner.scanInternalSubset())
- {
- fReaderMgr.skipPastChar(chCloseAngle);
- return;
- }
- // Do a sanity check that some expanded PE did not propogate out of
- // the doctype. This could happen if it was terminated early by bad
- // syntax.
- if (fReaderMgr.getReaderDepth() > 1)
- {
- emitError(XMLErrs::PEPropogated);
- // Ask the reader manager to pop back down to the main level
- fReaderMgr.cleanStackBackTo(1);
- }
- fReaderMgr.skipPastSpaces();
- }
- // And that should leave us at the closing > of the DOCTYPE line
- if (!fReaderMgr.skippedChar(chCloseAngle))
- {
- // Do a special check for the common scenario of an extra ] char at
- // the end. This is easy to recover from.
- if (fReaderMgr.skippedChar(chCloseSquare)
- && fReaderMgr.skippedChar(chCloseAngle))
- {
- emitError(XMLErrs::ExtraCloseSquare);
- }
- else
- {
- emitError(XMLErrs::UnterminatedDOCTYPE);
- fReaderMgr.skipPastChar(chCloseAngle);
- }
- }
- // If we had an external subset, then we need to deal with that one
- // next. If we are reusing the validator, then don't scan it.
- if (hasExtSubset) {
- if (fUseCachedGrammar)
- {
- InputSource* sysIdSrc = resolveSystemId(sysId);
- Janitor<InputSource> janSysIdSrc(sysIdSrc);
- Grammar* grammar = fGrammarResolver->getGrammar(sysIdSrc->getSystemId());
- if (grammar && grammar->getGrammarType() == Grammar::DTDGrammarType) {
- fDTDGrammar = (DTDGrammar*) grammar;
- fGrammar = fDTDGrammar;
- fValidator->setGrammar(fGrammar);
- // we *cannot* identify the root element on
- // cached grammars; else we risk breaking multithreaded
- // applications. - NG
- /*******
- rootDecl = (DTDElementDecl*) fGrammar->getElemDecl(fEmptyNamespaceId, 0, bbRootName.getRawBuffer(), Grammar::TOP_LEVEL_SCOPE);
- if (rootDecl)
- ((DTDGrammar*)fGrammar)->setRootElemId(rootDecl->getId());
- else {
- rootDecl = new (fGrammarPoolMemoryManager) DTDElementDecl
- (
- bbRootName.getRawBuffer()
- , fEmptyNamespaceId
- , DTDElementDecl::Any
- , fGrammarPoolMemoryManager
- );
- rootDecl->setCreateReason(DTDElementDecl::AsRootElem);
- rootDecl->setExternalElemDeclaration(true);
- ((DTDGrammar*)fGrammar)->setRootElemId(fGrammar->putElemDecl(rootDecl));
- }
- *********/
- return;
- }
- }
- if (fLoadExternalDTD || fValidate)
- {
- // And now create a reader to read this entity
- InputSource* srcUsed;
- XMLReader* reader = fReaderMgr.createReader
- (
- sysId
- , pubId
- , false
- , XMLReader::RefFrom_NonLiteral
- , XMLReader::Type_General
- , XMLReader::Source_External
- , srcUsed
- , fCalculateSrcOfs
- );
- // Put a janitor on the input source
- Janitor<InputSource> janSrc(srcUsed);
- // If it failed then throw an exception
- if (!reader)
- ThrowXML1(RuntimeException, XMLExcepts::Gen_CouldNotOpenDTD, srcUsed->getSystemId());
- if (fToCacheGrammar) {
- unsigned int stringId = fGrammarResolver->getStringPool()->addOrFind(srcUsed->getSystemId());
- const XMLCh* sysIdStr = fGrammarResolver->getStringPool()->getValueForId(stringId);
- fGrammarResolver->orphanGrammar(XMLUni::fgDTDEntityString);
- ((XMLDTDDescription*) (fGrammar->getGrammarDescription()))->setRootName(sysIdStr);
- fGrammarResolver->putGrammar(fGrammar);
- }
- // In order to make the processing work consistently, we have to
- // make this look like an external entity. So create an entity
- // decl and fill it in and push it with the reader, as happens
- // with an external entity. Put a janitor on it to insure it gets
- // cleaned up. The reader manager does not adopt them.
- const XMLCh gDTDStr[] = { chLatin_D, chLatin_T, chLatin_D , chNull };
- DTDEntityDecl* declDTD = new (fGrammarPoolMemoryManager) DTDEntityDecl(gDTDStr, false, fGrammarPoolMemoryManager);
- declDTD->setSystemId(sysId);
- Janitor<DTDEntityDecl> janDecl(declDTD);
- // Mark this one as a throw at end
- reader->setThrowAtEnd(true);
- // And push it onto the stack, with its pseudo name
- fReaderMgr.pushReader(reader, declDTD);
- // Tell it its not in an include section
- dtdScanner.scanExtSubsetDecl(false, true);
- }
- }
- }
- bool DGXMLScanner::scanStartTag(bool& gotData)
- {
- // Assume we will still have data until proven otherwise. It will only
- // ever be false if this is the root and its empty.
- gotData = true;
- // Get the QName. In this case, we are not doing namespaces, so we just
- // use it as is and don't have to break it into parts.
- if (!fReaderMgr.getName(fQNameBuf))
- {
- emitError(XMLErrs::ExpectedElementName);
- fReaderMgr.skipToChar(chOpenAngle);
- return false;
- }
- // Assume it won't be an empty tag
- bool isEmpty = false;
- // See if its the root element
- const bool isRoot = fElemStack.isEmpty();
- // Lets try to look up the element in the validator's element decl pool
- // We can pass bogus values for the URI id and the base name. We know that
- // this can only be called if we are doing a DTD style validator and that
- // he will only look at the QName.
- //
- // We *do not* tell him to fault in a decl if he does not find one - NG.
- bool wasAdded = false;
- const XMLCh* qnameRawBuf = fQNameBuf.getRawBuffer();
- XMLElementDecl* elemDecl = fGrammar->getElemDecl
- (
- fEmptyNamespaceId
- , 0
- , qnameRawBuf
- , Grammar::TOP_LEVEL_SCOPE
- );
- // look in the undeclared pool:
- if(!elemDecl)
- {
- elemDecl = fDTDElemNonDeclPool->getByKey(qnameRawBuf);
- }
- if(!elemDecl)
- {
- wasAdded = true;
- elemDecl = new (fMemoryManager) DTDElementDecl
- (
- qnameRawBuf
- , fEmptyNamespaceId
- , DTDElementDecl::Any
- , fMemoryManager
- );
- elemDecl->setId(fDTDElemNonDeclPool->put((DTDElementDecl*)elemDecl));
- }
- if (fValidate) {
- if (wasAdded)
- {
- // This is to tell the reuse Validator that this element was
- // faulted-in, was not an element in the validator pool originally
- elemDecl->setCreateReason(XMLElementDecl::JustFaultIn);
- fValidator->emitError
- (
- XMLValid::ElementNotDefined
- , qnameRawBuf
- );
- }
- // If its not marked declared, then emit an error
- else if (!elemDecl->isDeclared())
- {
- fValidator->emitError
- (
- XMLValid::ElementNotDefined
- , qnameRawBuf
- );
- }
- fValidator->validateElement(elemDecl);
- }
- // Expand the element stack and add the new element
- fElemStack.addLevel(elemDecl, fReaderMgr.getCurrentReaderNum());
- // If this is the first element and we are validating, check the root
- // element.
- if (isRoot)
- {
- fRootGrammar = fGrammar;
- if (fValidate)
- {
- // If a DocType exists, then check if it matches the root name there.
- if (fRootElemName && !XMLString::equals(qnameRawBuf, fRootElemName))
- fValidator->emitError(XMLValid::RootElemNotLikeDocType);
- // Some validators may also want to check the root, call the
- // XMLValidator::checkRootElement
- if (fValidatorFromUser && !fValidator->checkRootElement(elemDecl->getId()))
- fValidator->emitError(XMLValid::RootElemNotLikeDocType);
- }
- }
- else if (fValidate)
- {
- // If the element stack is not empty, then add this element as a
- // child of the previous top element. If its empty, this is the root
- // elem and is not the child of anything.
- fElemStack.addChild(elemDecl->getElementName(), true);
- }
- // Skip any whitespace after the name
- fReaderMgr.skipPastSpaces();
- // We loop until we either see a /> or >, handling attribute/value
- // pairs until we get there.
- unsigned int attCount = 0;
- unsigned int curAttListSize = fAttrList->size();
- wasAdded = false;
- fElemCount++;
- while (true)
- {
- // And get the next non-space character
- XMLCh nextCh = fReaderMgr.peekNextChar();
- // If the next character is not a slash or closed angle bracket,
- // then it must be whitespace, since whitespace is required
- // between the end of the last attribute and the name of the next
- // one.
- if (attCount)
- {
- if ((nextCh != chForwardSlash) && (nextCh != chCloseAngle))
- {
- if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh))
- {
- // Ok, skip by them and peek another char
- fReaderMgr.skipPastSpaces();
- nextCh = fReaderMgr.peekNextChar();
- }
- else
- {
- // Emit the error but keep on going
- emitError(XMLErrs::ExpectedWhitespace);
- }
- }
- }
- // Ok, here we first check for any of the special case characters.
- // If its not one, then we do the normal case processing, which
- // assumes that we've hit an attribute value, Otherwise, we do all
- // the special case checks.
- if (!fReaderMgr.getCurrentReader()->isSpecialStartTagChar(nextCh))
- {
- // Assume its going to be an attribute, so get a name from
- // the input.
- if (!fReaderMgr.getName(fAttNameBuf))
- {
- emitError(XMLErrs::ExpectedAttrName);
- fReaderMgr.skipPastChar(chCloseAngle);
- return false;
- }
- // And next must be an equal sign
- if (!scanEq())
- {
- static const XMLCh tmpList[] =
- {
- chSingleQuote, chDoubleQuote, chCloseAngle
- , chOpenAngle, chForwardSlash, chNull
- };
- emitError(XMLErrs::ExpectedEqSign);
- // Try to sync back up by skipping forward until we either
- // hit something meaningful.
- const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
- if ((chFound == chCloseAngle) || (chFound == chForwardSlash))
- {
- // Jump back to top for normal processing of these
- continue;
- }
- else if ((chFound == chSingleQuote)
- || (chFound == chDoubleQuote)
- || fReaderMgr.getCurrentReader()->isWhitespace(chFound))
- {
- // Just fall through assuming that the value is to follow
- }
- else if (chFound == chOpenAngle)
- {
- // Assume a malformed tag and that new one is starting
- emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
- return false;
- }
- else
- {
- // Something went really wrong
- return false;
- }
- }
- // See if this attribute is declared for this element. If we are
- // not validating of course it will not be at first, but we will
- // fault it into the pool (to avoid lots of redundant errors.)
- XMLAttDef* attDef = ((DTDElementDecl *)elemDecl)->getAttDef ( fAttNameBuf.getRawBuffer());
- // now need to prepare for duplicate detection
- if(attDef)
- {
- unsigned int *curCountPtr = fAttDefRegistry->get(attDef);
- if(!curCountPtr)
- {
- curCountPtr = getNewUIntPtr();
- *curCountPtr = fElemCount;
- fAttDefRegistry->put(attDef, curCountPtr);
- }
- else if(*curCountPtr < fElemCount)
- *curCountPtr = fElemCount;
- else
- {
- emitError
- (
- XMLErrs::AttrAlreadyUsedInSTag
- , attDef->getFullName()
- , elemDecl->getFullName()
- );
- }
- }
- else
- {
- XMLCh * namePtr = fAttNameBuf.getRawBuffer();
- unsigned int *curCountPtr = fUndeclaredAttrRegistry->get(namePtr);
- if(!curCountPtr)
- {
- curCountPtr = getNewUIntPtr();
- *curCountPtr = fElemCount;
- fUndeclaredAttrRegistry->put((void *)namePtr, curCountPtr);
- }
- else if(*curCountPtr < fElemCount)
- *curCountPtr = fElemCount;
- else
- {
- emitError
- (
- XMLErrs::AttrAlreadyUsedInSTag
- , namePtr
- , elemDecl->getFullName()
- );
- }
- }
- if (fValidate)
- {
- if (!attDef)
- {
- fValidator->emitError
- (
- XMLValid::AttNotDefinedForElement
- , fAttNameBuf.getRawBuffer()
- , qnameRawBuf
- );
- }
- }
- // Skip any whitespace before the value and then scan the att
- // value. This will come back normalized with entity refs and
- // char refs expanded.
- fReaderMgr.skipPastSpaces();
- if (!scanAttValue(attDef, fAttNameBuf.getRawBuffer(), fAttValueBuf))
- {
- static const XMLCh tmpList[] =
- {
- chCloseAngle, chOpenAngle, chForwardSlash, chNull
- };
- emitError(XMLErrs::ExpectedAttrValue);
- // It failed, so lets try to get synced back up. We skip
- // forward until we find some whitespace or one of the
- // chars in our list.
- const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
- if ((chFound == chCloseAngle)
- || (chFound == chForwardSlash)
- || fReaderMgr.getCurrentReader()->isWhitespace(chFound))
- {
- // Just fall through and process this attribute, though
- // the value will be "".
- }
- else if (chFound == chOpenAngle)
- {
- // Assume a malformed tag and that new one is starting
- emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
- return false;
- }
- else
- {
- // Something went really wrong
- return false;
- }
- }
- // Now that its all stretched out, lets look at its type and
- // determine if it has a valid value. It will output any needed
- // errors, but we just keep going. We only need to do this if
- // we are validating.
- if (attDef)
- {
- // Let the validator pass judgement on the attribute value
- if (fValidate)
- {
- fValidator->validateAttrValue
- (
- attDef
- , fAttValueBuf.getRawBuffer()
- , false
- , elemDecl
- );
- }
- }
- if (fDoNamespaces)
- {
- // Make sure that the name is basically well formed for namespace
- // enabled rules. It either has no colons, or it has one which
- // is neither the first or last char.
- const int colonFirst = XMLString::indexOf(fAttNameBuf.getRawBuffer(), chColon);
- if (colonFirst != -1)
- {
- const int colonLast = XMLString::lastIndexOf(fAttNameBuf.getRawBuffer(), chColon);
- if (colonFirst != colonLast)
- {
- emitError(XMLErrs::TooManyColonsInName);
- continue;
- }
- else if ((colonFirst == 0)
- || (colonLast == (int)fAttNameBuf.getLen() - 1))
- {
- emitError(XMLErrs::InvalidColonPos);
- continue;
- }
- }
- }
- // Add this attribute to the attribute list that we use to
- // pass them to the handler. We reuse its existing elements
- // but expand it as required.
- XMLAttr* curAtt;
- if (attCount >= curAttListSize)
- {
- if (fDoNamespaces) {
- curAtt = new (fMemoryManager) XMLAttr
- (
- fEmptyNamespaceId
- , fAttNameBuf.getRawBuffer()
- , fAttValueBuf.getRawBuffer()
- , (attDef)?attDef->getType():XMLAttDef::CData
- , true
- , fMemoryManager
- );
- }
- else
- {
- curAtt = new (fMemoryManager) XMLAttr
- (
- -1
- , fAttNameBuf.getRawBuffer()
- , XMLUni::fgZeroLenString
- , fAttValueBuf.getRawBuffer()
- , (attDef)?attDef->getType():XMLAttDef::CData
- , true
- , fMemoryManager
- );
- }
- fAttrList->addElement(curAtt);
- }
- else
- {
- curAtt = fAttrList->elementAt(attCount);
- if (fDoNamespaces)
- {
- curAtt->set
- (
- fEmptyNamespaceId
- , fAttN…
Large files files are truncated, but you can click here to view the full file