PageRenderTime 53ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 1ms

/ATF2/control-software/epics-3.14.8/extensions/src/ChannelArchiver/ThirdParty/xerces-c-src2_4_0/src/xercesc/internal/XMLReader.cpp

http://atf2flightsim.googlecode.com/
C++ | 1612 lines | 923 code | 194 blank | 495 comment | 290 complexity | 67393329c8759eecb90374fb36ab46a7 MD5 | raw file
Possible License(s): BSD-2-Clause, LGPL-2.0, IPL-1.0, BSD-3-Clause

Large files files are truncated, but you can click here to view the full file

  1. /*
  2. * The Apache Software License, Version 1.1
  3. *
  4. * Copyright (c) 1999-2003 The Apache Software Foundation. All rights
  5. * reserved.
  6. *
  7. * Redistribution and use in source and binary forms, with or without
  8. * modification, are permitted provided that the following conditions
  9. * are met:
  10. *
  11. * 1. Redistributions of source code must retain the above copyright
  12. * notice, this list of conditions and the following disclaimer.
  13. *
  14. * 2. Redistributions in binary form must reproduce the above copyright
  15. * notice, this list of conditions and the following disclaimer in
  16. * the documentation and/or other materials provided with the
  17. * distribution.
  18. *
  19. * 3. The end-user documentation included with the redistribution,
  20. * if any, must include the following acknowledgment:
  21. * "This product includes software developed by the
  22. * Apache Software Foundation (http://www.apache.org/)."
  23. * Alternately, this acknowledgment may appear in the software itself,
  24. * if and wherever such third-party acknowledgments normally appear.
  25. *
  26. * 4. The names "Xerces" and "Apache Software Foundation" must
  27. * not be used to endorse or promote products derived from this
  28. * software without prior written permission. For written
  29. * permission, please contact apache\@apache.org.
  30. *
  31. * 5. Products derived from this software may not be called "Apache",
  32. * nor may "Apache" appear in their name, without prior written
  33. * permission of the Apache Software Foundation.
  34. *
  35. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  36. * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  37. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  38. * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  39. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  40. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  41. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  42. * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  43. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  44. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  45. * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  46. * SUCH DAMAGE.
  47. * ====================================================================
  48. *
  49. * This software consists of voluntary contributions made by many
  50. * individuals on behalf of the Apache Software Foundation, and was
  51. * originally based on software copyright (c) 1999, International
  52. * Business Machines, Inc., http://www.ibm.com . For more information
  53. * on the Apache Software Foundation, please see
  54. * <http://www.apache.org/>.
  55. */
  56. /*
  57. * $Id: XMLReader.cpp,v 1.1.1.1 2009/03/14 06:42:21 whitegr Exp $
  58. */
  59. // ---------------------------------------------------------------------------
  60. // Includes
  61. // ---------------------------------------------------------------------------
  62. #include <xercesc/internal/XMLReader.hpp>
  63. #include <xercesc/util/BitOps.hpp>
  64. #include <xercesc/util/BinInputStream.hpp>
  65. #include <xercesc/util/PlatformUtils.hpp>
  66. #include <xercesc/util/RuntimeException.hpp>
  67. #include <xercesc/util/TranscodingException.hpp>
  68. #include <xercesc/util/TransService.hpp>
  69. #include <xercesc/util/XMLEBCDICTranscoder.hpp>
  70. #include <xercesc/util/XMLString.hpp>
  71. XERCES_CPP_NAMESPACE_BEGIN
  72. // ---------------------------------------------------------------------------
  73. // XMLReader: Query Methods
  74. // ---------------------------------------------------------------------------
  75. // Checks whether all of the chars in the passed buffer are whitespace or
  76. // not. Breaks out on the first non-whitespace.
  77. //
  78. bool XMLReader::isAllSpaces(const XMLCh* const toCheck
  79. , const unsigned int count)
  80. {
  81. const XMLCh* curCh = toCheck;
  82. const XMLCh* endPtr = toCheck + count;
  83. while (curCh < endPtr)
  84. {
  85. if (!(fgCharCharsTable[*curCh++] & gWhitespaceCharMask))
  86. return false;
  87. }
  88. return true;
  89. }
  90. //
  91. // Checks whether at least one of the chars in the passed buffer are whitespace or
  92. // not.
  93. //
  94. bool XMLReader::containsWhiteSpace(const XMLCh* const toCheck
  95. , const unsigned int count)
  96. {
  97. const XMLCh* curCh = toCheck;
  98. const XMLCh* endPtr = toCheck + count;
  99. while (curCh < endPtr)
  100. {
  101. if (fgCharCharsTable[*curCh++] & gWhitespaceCharMask)
  102. return true;
  103. }
  104. return false;
  105. }
  106. //
  107. // This one is not called terribly often, so call the XMLChar utility
  108. //
  109. bool XMLReader::isPublicIdChar(const XMLCh toCheck)
  110. {
  111. if (fXMLVersion == XMLV1_1)
  112. return XMLChar1_1::isPublicIdChar(toCheck);
  113. else
  114. return XMLChar1_0::isPublicIdChar(toCheck);
  115. }
  116. // ---------------------------------------------------------------------------
  117. // XMLReader: Constructors and Destructor
  118. // ---------------------------------------------------------------------------
  119. XMLReader::XMLReader(const XMLCh* const pubId
  120. , const XMLCh* const sysId
  121. , BinInputStream* const streamToAdopt
  122. , const RefFrom from
  123. , const Types type
  124. , const Sources source
  125. , const bool throwAtEnd
  126. , const bool calculateSrcOfs
  127. , const XMLVersion version
  128. , MemoryManager* const manager) :
  129. fCharIndex(0)
  130. , fCharsAvail(0)
  131. , fCurCol(1)
  132. , fCurLine(1)
  133. , fEncodingStr(0)
  134. , fForcedEncoding(false)
  135. , fNoMore(false)
  136. , fPublicId(XMLString::replicate(pubId, manager))
  137. , fRawBufIndex(0)
  138. , fRawBytesAvail(0)
  139. , fReaderNum(0xFFFFFFFF)
  140. , fRefFrom(from)
  141. , fSentTrailingSpace(false)
  142. , fSource(source)
  143. , fSrcOfsBase(0)
  144. , fSrcOfsSupported(false)
  145. , fCalculateSrcOfs(calculateSrcOfs)
  146. , fStream(streamToAdopt)
  147. , fSystemId(XMLString::replicate(sysId, manager))
  148. , fSwapped(false)
  149. , fThrowAtEnd(throwAtEnd)
  150. , fTranscoder(0)
  151. , fType(type)
  152. , fMemoryManager(manager)
  153. {
  154. setXMLVersion(version);
  155. // Do an initial load of raw bytes
  156. refreshRawBuffer();
  157. // Ask the transcoding service if it supports src offset info
  158. fSrcOfsSupported = XMLPlatformUtils::fgTransService->supportsSrcOfs();
  159. //
  160. // Use the recognizer class to get a basic sense of what family of
  161. // encodings this file is in. We'll start off with a reader of that
  162. // type, and update it later if needed when we read the XMLDecl line.
  163. //
  164. fEncoding = XMLRecognizer::basicEncodingProbe(fRawByteBuf, fRawBytesAvail);
  165. #if defined(XERCES_DEBUG)
  166. if ((fEncoding < XMLRecognizer::Encodings_Min)
  167. || (fEncoding > XMLRecognizer::Encodings_Max))
  168. {
  169. ThrowXML(RuntimeException, XMLExcepts::Reader_BadAutoEncoding);
  170. }
  171. #endif
  172. fEncodingStr = XMLString::replicate(XMLRecognizer::nameForEncoding(fEncoding), fMemoryManager);
  173. // Check whether the fSwapped flag should be set or not
  174. checkForSwapped();
  175. //
  176. // This will check to see if the first line is an XMLDecl and, if
  177. // so, decode that first line manually one character at a time. This
  178. // leaves enough characters in the buffer that the high level code
  179. // can get through the Decl and call us back with the real encoding.
  180. //
  181. doInitDecode();
  182. //
  183. // NOTE: We won't create a transcoder until we either get a call to
  184. // setEncoding() or we get a call to refreshCharBuffer() and no
  185. // transcoder has been set yet.
  186. //
  187. }
  188. XMLReader::XMLReader(const XMLCh* const pubId
  189. , const XMLCh* const sysId
  190. , BinInputStream* const streamToAdopt
  191. , const XMLCh* const encodingStr
  192. , const RefFrom from
  193. , const Types type
  194. , const Sources source
  195. , const bool throwAtEnd
  196. , const bool calculateSrcOfs
  197. , const XMLVersion version
  198. , MemoryManager* const manager) :
  199. fCharIndex(0)
  200. , fCharsAvail(0)
  201. , fCurCol(1)
  202. , fCurLine(1)
  203. , fEncoding(XMLRecognizer::UTF_8)
  204. , fEncodingStr(0)
  205. , fForcedEncoding(true)
  206. , fNoMore(false)
  207. , fPublicId(XMLString::replicate(pubId, manager))
  208. , fRawBufIndex(0)
  209. , fRawBytesAvail(0)
  210. , fReaderNum(0xFFFFFFFF)
  211. , fRefFrom(from)
  212. , fSentTrailingSpace(false)
  213. , fSource(source)
  214. , fSrcOfsBase(0)
  215. , fSrcOfsSupported(false)
  216. , fCalculateSrcOfs(calculateSrcOfs)
  217. , fStream(streamToAdopt)
  218. , fSystemId(XMLString::replicate(sysId, manager))
  219. , fSwapped(false)
  220. , fThrowAtEnd(throwAtEnd)
  221. , fTranscoder(0)
  222. , fType(type)
  223. , fMemoryManager(manager)
  224. {
  225. setXMLVersion(version);
  226. // Do an initial load of raw bytes
  227. refreshRawBuffer();
  228. // Copy the encoding string to our member
  229. fEncodingStr = XMLString::replicate(encodingStr, fMemoryManager);
  230. XMLString::upperCase(fEncodingStr);
  231. // Ask the transcoding service if it supports src offset info
  232. fSrcOfsSupported = XMLPlatformUtils::fgTransService->supportsSrcOfs();
  233. //
  234. // Map the passed encoding name to one of our enums. If it does not
  235. // match one of the intrinsic encodings, it will come back 'other',
  236. // which tells us to create a transcoder based reader.
  237. //
  238. fEncoding = XMLRecognizer::encodingForName(fEncodingStr);
  239. // Check whether the fSwapped flag should be set or not
  240. checkForSwapped();
  241. //
  242. // Create a transcoder for the encoding. Since the encoding has been
  243. // forced, this will be the one we will use, period.
  244. //
  245. XMLTransService::Codes failReason;
  246. if (fEncoding == XMLRecognizer::OtherEncoding)
  247. {
  248. //
  249. // fEncodingStr not pre-recognized, use it
  250. // directly for transcoder
  251. //
  252. fTranscoder = XMLPlatformUtils::fgTransService->makeNewTranscoderFor
  253. (
  254. fEncodingStr
  255. , failReason
  256. , kCharBufSize
  257. , fMemoryManager
  258. );
  259. }
  260. else
  261. {
  262. //
  263. // Use the recognized fEncoding to create the transcoder
  264. //
  265. fTranscoder = XMLPlatformUtils::fgTransService->makeNewTranscoderFor
  266. (
  267. fEncoding
  268. , failReason
  269. , kCharBufSize
  270. , fMemoryManager
  271. );
  272. }
  273. if (!fTranscoder)
  274. {
  275. ThrowXML1
  276. (
  277. TranscodingException
  278. , XMLExcepts::Trans_CantCreateCvtrFor
  279. , fEncodingStr
  280. );
  281. }
  282. //
  283. // Note that, unlike above, we do not do an initial decode of the
  284. // first line. We take the caller's word that the encoding is correct
  285. // and just assume that the first bulk decode (kicked off by the first
  286. // get of a character) will work.
  287. //
  288. // So we do here the slipping in of the leading space if required.
  289. //
  290. if ((fType == Type_PE) && (fRefFrom == RefFrom_NonLiteral))
  291. {
  292. // This represents no data from the source
  293. fCharSizeBuf[fCharsAvail] = 0;
  294. fCharBuf[fCharsAvail++] = chSpace;
  295. }
  296. }
  297. XMLReader::XMLReader(const XMLCh* const pubId
  298. , const XMLCh* const sysId
  299. , BinInputStream* const streamToAdopt
  300. , XMLRecognizer::Encodings encodingEnum
  301. , const RefFrom from
  302. , const Types type
  303. , const Sources source
  304. , const bool throwAtEnd
  305. , const bool calculateSrcOfs
  306. , const XMLVersion version
  307. , MemoryManager* const manager) :
  308. fCharIndex(0)
  309. , fCharsAvail(0)
  310. , fCurCol(1)
  311. , fCurLine(1)
  312. , fEncoding(XMLRecognizer::UTF_8)
  313. , fEncodingStr(0)
  314. , fForcedEncoding(true)
  315. , fNoMore(false)
  316. , fPublicId(XMLString::replicate(pubId, manager))
  317. , fRawBufIndex(0)
  318. , fRawBytesAvail(0)
  319. , fReaderNum(0xFFFFFFFF)
  320. , fRefFrom(from)
  321. , fSentTrailingSpace(false)
  322. , fSource(source)
  323. , fSrcOfsBase(0)
  324. , fSrcOfsSupported(false)
  325. , fCalculateSrcOfs(calculateSrcOfs)
  326. , fStream(streamToAdopt)
  327. , fSystemId(XMLString::replicate(sysId, manager))
  328. , fSwapped(false)
  329. , fThrowAtEnd(throwAtEnd)
  330. , fTranscoder(0)
  331. , fType(type)
  332. , fMemoryManager(manager)
  333. {
  334. setXMLVersion(version);
  335. // Do an initial load of raw bytes
  336. refreshRawBuffer();
  337. // Ask the transcoding service if it supports src offset info
  338. fSrcOfsSupported = XMLPlatformUtils::fgTransService->supportsSrcOfs();
  339. //
  340. // Use the passed encoding code
  341. //
  342. fEncoding = encodingEnum;
  343. fEncodingStr = XMLString::replicate(XMLRecognizer::nameForEncoding(fEncoding), fMemoryManager);
  344. // Check whether the fSwapped flag should be set or not
  345. checkForSwapped();
  346. //
  347. // Create a transcoder for the encoding. Since the encoding has been
  348. // forced, this will be the one we will use, period.
  349. //
  350. XMLTransService::Codes failReason;
  351. fTranscoder = XMLPlatformUtils::fgTransService->makeNewTranscoderFor
  352. (
  353. fEncoding
  354. , failReason
  355. , kCharBufSize
  356. , fMemoryManager
  357. );
  358. if (!fTranscoder)
  359. {
  360. ThrowXML1
  361. (
  362. TranscodingException
  363. , XMLExcepts::Trans_CantCreateCvtrFor
  364. , fEncodingStr
  365. );
  366. }
  367. //
  368. // Note that, unlike above, we do not do an initial decode of the
  369. // first line. We take the caller's word that the encoding is correct
  370. // and just assume that the first bulk decode (kicked off by the first
  371. // get of a character) will work.
  372. //
  373. // So we do here the slipping in of the leading space if required.
  374. //
  375. if ((fType == Type_PE) && (fRefFrom == RefFrom_NonLiteral))
  376. {
  377. // This represents no data from the source
  378. fCharSizeBuf[fCharsAvail] = 0;
  379. fCharBuf[fCharsAvail++] = chSpace;
  380. }
  381. }
  382. XMLReader::~XMLReader()
  383. {
  384. fMemoryManager->deallocate(fEncodingStr);
  385. fMemoryManager->deallocate(fPublicId);
  386. fMemoryManager->deallocate(fSystemId);
  387. delete fStream;
  388. delete fTranscoder;
  389. }
  390. // ---------------------------------------------------------------------------
  391. // XMLReader: Character buffer management methods
  392. // ---------------------------------------------------------------------------
  393. unsigned int XMLReader::getSrcOffset() const
  394. {
  395. if (!fSrcOfsSupported || !fCalculateSrcOfs)
  396. ThrowXML(RuntimeException, XMLExcepts::Reader_SrcOfsNotSupported);
  397. //
  398. // Take the current source offset and add in the sizes that we've
  399. // eaten from the source so far.
  400. //
  401. unsigned int offset = fSrcOfsBase;
  402. for (unsigned int index = 0; index < fCharIndex; index++)
  403. offset += fCharSizeBuf[index];
  404. return offset;
  405. }
  406. bool XMLReader::refreshCharBuffer()
  407. {
  408. // If the no more flag is set, then don't both doing anything
  409. if (fNoMore)
  410. return false;
  411. unsigned int startInd;
  412. // See if we have any existing chars.
  413. const unsigned int spareChars = fCharsAvail - fCharIndex;
  414. // If we are full, then don't do anything.
  415. if (spareChars == kCharBufSize)
  416. return true;
  417. //
  418. // If no transcoder has been created yet, then we never saw the
  419. // any encoding="" string and the encoding was not forced, so lets
  420. // create one now. We know that it won't change now.
  421. //
  422. // However, note that if we autosensed EBCDIC, then we have to
  423. // consider it an error if we never got an encoding since we don't
  424. // know what variant of EBCDIC it is.
  425. //
  426. if (!fTranscoder)
  427. {
  428. if (fEncoding == XMLRecognizer::EBCDIC)
  429. ThrowXML(RuntimeException, XMLExcepts::Reader_EncodingStrRequired);
  430. // Ask the transcoding service to make use a transcoder
  431. XMLTransService::Codes failReason;
  432. fTranscoder = XMLPlatformUtils::fgTransService->makeNewTranscoderFor
  433. (
  434. fEncodingStr
  435. , failReason
  436. , kCharBufSize
  437. , fMemoryManager
  438. );
  439. if (!fTranscoder)
  440. {
  441. ThrowXML1
  442. (
  443. TranscodingException
  444. , XMLExcepts::Trans_CantCreateCvtrFor
  445. , fEncodingStr
  446. );
  447. }
  448. }
  449. //
  450. // Add the number of source bytes eaten so far to the base src
  451. // offset member.
  452. //
  453. if (fCalculateSrcOfs) {
  454. for (startInd = 0; startInd < fCharIndex; startInd++)
  455. fSrcOfsBase += fCharSizeBuf[startInd];
  456. }
  457. //
  458. // If there are spare chars, then move then down to the bottom. We
  459. // have to move the char sizes down also.
  460. //
  461. startInd = 0;
  462. if (spareChars)
  463. {
  464. for (unsigned int index = fCharIndex; index < fCharsAvail; index++)
  465. {
  466. fCharBuf[startInd] = fCharBuf[index];
  467. fCharSizeBuf[startInd] = fCharSizeBuf[index];
  468. startInd++;
  469. }
  470. }
  471. //
  472. // And then get more chars, starting after any spare chars that were
  473. // left over from the last time.
  474. //
  475. fCharsAvail = xcodeMoreChars
  476. (
  477. &fCharBuf[startInd]
  478. , &fCharSizeBuf[startInd]
  479. , kCharBufSize - spareChars
  480. );
  481. // Add back in the spare chars
  482. fCharsAvail += spareChars;
  483. // Reset the buffer index to zero, so we start from the 0th char again
  484. fCharIndex = 0;
  485. //
  486. // If no chars available, then we have to check for one last thing. If
  487. // this is reader for a PE and its not being expanded inside a literal,
  488. // then unget a trailing space. We use a boolean to avoid triggering
  489. // this more than once.
  490. //
  491. if (!fCharsAvail
  492. && (fType == Type_PE)
  493. && (fRefFrom == RefFrom_NonLiteral)
  494. && !fSentTrailingSpace)
  495. {
  496. fCharBuf[0] = chSpace;
  497. fCharsAvail = 1;
  498. fSentTrailingSpace = true;
  499. }
  500. //
  501. // If we are on our first block of chars and the encoding is one of the
  502. // UTF-16 formats, then check the first char for the BOM and skip over
  503. // it manually.
  504. //
  505. if (fCharsAvail)
  506. {
  507. if ((fCurLine == 1) && (fCurCol == 1))
  508. {
  509. if (((fEncoding == XMLRecognizer::UTF_16L)
  510. || (fEncoding == XMLRecognizer::UTF_16B))
  511. && !startInd)
  512. {
  513. if ((fCharBuf[startInd] == chUnicodeMarker)
  514. || (fCharBuf[startInd] == chSwappedUnicodeMarker))
  515. {
  516. fCharIndex++;
  517. }
  518. }
  519. }
  520. }
  521. //
  522. // If we get here with no more chars, then set the fNoMore flag which
  523. // lets us optimize and know without checking that no more chars are
  524. // available.
  525. //
  526. if (!fCharsAvail)
  527. fNoMore = true;
  528. return (fCharsAvail != 0);
  529. }
  530. // ---------------------------------------------------------------------------
  531. // XMLReader: Scanning methods
  532. // ---------------------------------------------------------------------------
  533. bool XMLReader::getName(XMLBuffer& toFill, const bool token)
  534. {
  535. // Ok, first lets see if we have chars in the buffer. If not, then lets
  536. // reload.
  537. if (fCharIndex == fCharsAvail)
  538. {
  539. if (!refreshCharBuffer())
  540. return false;
  541. }
  542. // Lets check the first char for being a first name char. If not, then
  543. // what's the point in living mannnn? Just give up now. We only do this
  544. // if its a name and not a name token that they want.
  545. if (!token)
  546. {
  547. if (fXMLVersion == XMLV1_1 && ((fCharBuf[fCharIndex] >= 0xD800) && (fCharBuf[fCharIndex] <= 0xDB7F))) {
  548. if ((fCharBuf[fCharIndex+1] < 0xDC00) || (fCharBuf[fCharIndex+1] > 0xDFFF))
  549. return false;
  550. // Looks ok, so lets eat it and put it in our buffer
  551. toFill.append(fCharBuf[fCharIndex++]);
  552. fCurCol++;
  553. toFill.append(fCharBuf[fCharIndex++]);
  554. fCurCol++;
  555. }
  556. else {
  557. if (!isFirstNameChar(fCharBuf[fCharIndex]))
  558. return false;
  559. // Looks ok, so lets eat it and put it in our buffer. Update column also!
  560. toFill.append(fCharBuf[fCharIndex++]);
  561. fCurCol++;
  562. }
  563. }
  564. // And now we loop until we run out of data in this reader or we hit
  565. // a non-name char.
  566. do {
  567. while (fCharIndex < fCharsAvail)
  568. {
  569. // Check the current char and take it if its a name char. Else
  570. // break out.
  571. if (fXMLVersion == XMLV1_1 && ((fCharBuf[fCharIndex] >= 0xD800) && (fCharBuf[fCharIndex] <= 0xDB7F))) {
  572. if ((fCharBuf[fCharIndex+1] < 0xDC00) || (fCharBuf[fCharIndex+1] > 0xDFFF))
  573. return !toFill.isEmpty();
  574. toFill.append(fCharBuf[fCharIndex++]);
  575. fCurCol++;
  576. toFill.append(fCharBuf[fCharIndex++]);
  577. fCurCol++;
  578. }
  579. else {
  580. if (!isNameChar(fCharBuf[fCharIndex]))
  581. {
  582. return !toFill.isEmpty();
  583. }
  584. toFill.append(fCharBuf[fCharIndex++]);
  585. fCurCol++;
  586. }
  587. }
  588. // If we don't get no more, then break out.
  589. } while (refreshCharBuffer());
  590. return !toFill.isEmpty();
  591. }
  592. bool XMLReader::getSpaces(XMLBuffer& toFill)
  593. {
  594. //
  595. // We just loop until we either hit a non-space or the end of this
  596. // entity. We return true if we returned because of a non-space and
  597. // false if because of end of entity.
  598. //
  599. // NOTE: We have to maintain line/col info here and we have to do
  600. // whitespace normalization if we are not already internalized.
  601. //
  602. while (true)
  603. {
  604. // Loop through the current chars in the buffer
  605. while (fCharIndex < fCharsAvail)
  606. {
  607. // Get the current char out of the buffer
  608. XMLCh curCh = fCharBuf[fCharIndex];
  609. //
  610. // See if its a white space char. If so, then process it. Else
  611. // we've hit a non-space and need to return.
  612. //
  613. if (isWhitespace(curCh))
  614. {
  615. // Eat this char
  616. fCharIndex++;
  617. //
  618. // Ok, we've got some whitespace here. So we have to store
  619. // it. But we have to normalize it and update the line and
  620. // column info along the way.
  621. //
  622. if (curCh == chCR)
  623. {
  624. fCurCol = 1;
  625. fCurLine++;
  626. //
  627. // If not already internalized, then convert it to an
  628. // LF and eat any following LF.
  629. //
  630. if (fSource == Source_External)
  631. {
  632. if ((fCharIndex < fCharsAvail) || refreshCharBuffer())
  633. {
  634. if (fCharBuf[fCharIndex] == chLF
  635. || ((fCharBuf[fCharIndex] == chNEL) && fNEL))
  636. fCharIndex++;
  637. }
  638. curCh = chLF;
  639. }
  640. }
  641. else if (curCh == chLF
  642. || ((curCh == chNEL || curCh == chLineSeparator) && fNEL))
  643. {
  644. curCh = chLF;
  645. fCurCol = 1;
  646. fCurLine++;
  647. }
  648. else
  649. {
  650. fCurCol++;
  651. }
  652. // Ok we can add this guy to our buffer
  653. toFill.append(curCh);
  654. }
  655. else
  656. {
  657. // Return true to indicate we broke out due to a whitespace
  658. return true;
  659. }
  660. }
  661. //
  662. // We've eaten up the current buffer, so lets try to reload it. If
  663. // we don't get anything new, then break out. If we do, then we go
  664. // back to the top to keep getting spaces.
  665. //
  666. if (!refreshCharBuffer())
  667. break;
  668. }
  669. return false;
  670. }
  671. bool XMLReader::getUpToCharOrWS(XMLBuffer& toFill, const XMLCh toCheck)
  672. {
  673. while (true)
  674. {
  675. // Loop through the current chars in the buffer
  676. while (fCharIndex < fCharsAvail)
  677. {
  678. // Get the current char out of the buffer
  679. XMLCh curCh = fCharBuf[fCharIndex];
  680. //
  681. // See if its not a white space or our target char, then process
  682. // it. Else, we need to return.
  683. //
  684. if (!isWhitespace(curCh) && (curCh != toCheck))
  685. {
  686. // Eat this char
  687. fCharIndex++;
  688. //
  689. // Ok, we've got some whitespace here. So we have to store
  690. // it. But we have to normalize it and update the line and
  691. // column info along the way.
  692. //
  693. if (curCh == chCR)
  694. {
  695. fCurCol = 1;
  696. fCurLine++;
  697. //
  698. // If not already internalized, then convert it to an
  699. // LF and eat any following LF.
  700. //
  701. if (fSource == Source_External)
  702. {
  703. if ((fCharIndex < fCharsAvail) || refreshCharBuffer())
  704. {
  705. if (fCharBuf[fCharIndex] == chLF
  706. || ((fCharBuf[fCharIndex] == chNEL) && fNEL))
  707. fCharIndex++;
  708. }
  709. curCh = chLF;
  710. }
  711. }
  712. else if (curCh == chLF
  713. || ((curCh == chNEL || curCh == chLineSeparator) && fNEL))
  714. {
  715. curCh = chLF;
  716. fCurCol = 1;
  717. fCurLine++;
  718. }
  719. else
  720. {
  721. fCurCol++;
  722. }
  723. // Add it to our buffer
  724. toFill.append(curCh);
  725. }
  726. else
  727. {
  728. return true;
  729. }
  730. }
  731. //
  732. // We've eaten up the current buffer, so lets try to reload it. If
  733. // we don't get anything new, then break out. If we do, then we go
  734. // back to the top to keep getting spaces.
  735. //
  736. if (!refreshCharBuffer())
  737. break;
  738. }
  739. // We never hit any non-space and ate up the whole reader
  740. return false;
  741. }
  742. bool XMLReader::skipIfQuote(XMLCh& chGotten)
  743. {
  744. if (fCharIndex == fCharsAvail)
  745. {
  746. if (!refreshCharBuffer())
  747. return false;
  748. }
  749. const XMLCh curCh = fCharBuf[fCharIndex];
  750. if ((curCh == chDoubleQuote) || (curCh == chSingleQuote))
  751. {
  752. chGotten = curCh;
  753. fCharIndex++;
  754. fCurCol++;
  755. return true;
  756. }
  757. return false;
  758. }
  759. bool XMLReader::skipSpaces(bool& skippedSomething)
  760. {
  761. // Remember the current line and column
  762. XMLSSize_t orgLine = fCurLine;
  763. XMLSSize_t orgCol = fCurCol;
  764. // We enter a loop where we skip over spaces until we hit the end of
  765. // this reader or a non-space value. The return indicates whether we
  766. // hit the non-space (true) or the end (false).
  767. while (true)
  768. {
  769. // Loop through the current chars in the buffer
  770. while (fCharIndex < fCharsAvail)
  771. {
  772. // See if its a white space char. If so, then process it. Else
  773. // we've hit a non-space and need to return.
  774. if (isWhitespace(fCharBuf[fCharIndex]))
  775. {
  776. // Get the current char out of the buffer and eat it
  777. XMLCh curCh = fCharBuf[fCharIndex++];
  778. // Ok, we've got some whitespace here. So we have to store
  779. // it. But we have to normalize it and update the line and
  780. // column info along the way.
  781. if (curCh == chCR)
  782. {
  783. fCurCol = 1;
  784. fCurLine++;
  785. // If not already internalized, then convert it to an
  786. // LF and eat any following LF.
  787. if (fSource == Source_External)
  788. {
  789. if ((fCharIndex < fCharsAvail) || refreshCharBuffer())
  790. {
  791. if (fCharBuf[fCharIndex] == chLF
  792. || ((fCharBuf[fCharIndex] == chNEL) && fNEL))
  793. fCharIndex++;
  794. }
  795. }
  796. }
  797. else if (curCh == chLF
  798. || ((curCh == chNEL || curCh == chLineSeparator) && fNEL))
  799. {
  800. fCurCol = 1;
  801. fCurLine++;
  802. }
  803. else
  804. {
  805. fCurCol++;
  806. }
  807. }
  808. else
  809. {
  810. skippedSomething = (orgLine != fCurLine) || (orgCol != fCurCol);
  811. return true;
  812. }
  813. }
  814. // We've eaten up the current buffer, so lets try to reload it. If
  815. // we don't get anything new, then break out. If we do, then we go
  816. // back to the top to keep getting spaces.
  817. if (!refreshCharBuffer())
  818. break;
  819. }
  820. // We never hit any non-space and ate up the whole reader
  821. skippedSomething = (orgLine != fCurLine) || (orgCol != fCurCol);
  822. return false;
  823. }
  824. bool XMLReader::skippedChar(const XMLCh toSkip)
  825. {
  826. //
  827. // If the buffer is empty, then try to reload it. If we still get
  828. // nothing, then return false.
  829. //
  830. if (fCharIndex == fCharsAvail)
  831. {
  832. if (!refreshCharBuffer())
  833. return false;
  834. }
  835. //
  836. // See if the current char is the one we want. If so, then we need
  837. // to eat it and return true.
  838. //
  839. if (fCharBuf[fCharIndex] == toSkip)
  840. {
  841. fCharIndex++;
  842. fCurCol++;
  843. return true;
  844. }
  845. return false;
  846. }
  847. bool XMLReader::skippedSpace()
  848. {
  849. //
  850. // If the buffer is empty, then try to reload it. If we still get
  851. // nothing, then return false.
  852. //
  853. if (fCharIndex == fCharsAvail)
  854. {
  855. if (!refreshCharBuffer())
  856. return false;
  857. }
  858. //
  859. // See if the current char is a whitespace. If so, then we need to eat
  860. // it and return true.
  861. //
  862. const XMLCh curCh = fCharBuf[fCharIndex];
  863. if (isWhitespace(curCh))
  864. {
  865. // Eat the character
  866. fCharIndex++;
  867. if (curCh == chCR)
  868. {
  869. fCurLine++;
  870. fCurCol = 1;
  871. if (fSource == Source_External)
  872. {
  873. if ((fCharIndex < fCharsAvail) || refreshCharBuffer())
  874. {
  875. if (fCharBuf[fCharIndex] == chLF
  876. || ((fCharBuf[fCharIndex] == chNEL) && fNEL))
  877. fCharIndex++;
  878. }
  879. }
  880. }
  881. else if (curCh == chLF
  882. || ((curCh == chNEL || curCh == chLineSeparator) && fNEL))
  883. {
  884. fCurLine++;
  885. fCurCol = 1;
  886. }
  887. else
  888. {
  889. fCurCol++;
  890. }
  891. return true;
  892. }
  893. return false;
  894. }
  895. bool XMLReader::skippedString(const XMLCh* const toSkip)
  896. {
  897. // Get the length of the string to skip
  898. const unsigned int srcLen = XMLString::stringLen(toSkip);
  899. //
  900. // See if the current reader has enough chars to test against this
  901. // string. If not, then ask it to reload its buffer. If that does not
  902. // get us enough, then it cannot match.
  903. //
  904. // NOTE: This works because strings never have to cross a reader! And
  905. // a string to skip will never have a new line in it, so we will never
  906. // miss adjusting the current line.
  907. //
  908. unsigned int charsLeft = charsLeftInBuffer();
  909. while (charsLeft < srcLen)
  910. {
  911. refreshCharBuffer();
  912. unsigned int t = charsLeftInBuffer();
  913. if (t == charsLeft) // if the refreshCharBuf() did not add anything new
  914. return false; // give up and return.
  915. charsLeft = t;
  916. }
  917. //
  918. // Ok, now we now that the current reader has enough chars in its
  919. // buffer and that its index is back at zero. So we can do a quick and
  920. // dirty comparison straight to its buffer with no requirement to unget
  921. // if it fails.
  922. //
  923. if (XMLString::compareNString(&fCharBuf[fCharIndex], toSkip, srcLen))
  924. return false;
  925. // Add the source length to the current column to get it back right
  926. fCurCol += srcLen;
  927. //
  928. // And get the character buffer index back right by just adding the
  929. // source len to it.
  930. //
  931. fCharIndex += srcLen;
  932. return true;
  933. }
  934. //
  935. // This is just to peek if the next coming buffer
  936. // matches the string toPeek.
  937. // Similar to skippedString, but just the fCharIndex and fCurCol are not updated
  938. //
  939. bool XMLReader::peekString(const XMLCh* const toPeek)
  940. {
  941. // Get the length of the string to skip
  942. const unsigned int srcLen = XMLString::stringLen(toPeek);
  943. //
  944. // See if the current reader has enough chars to test against this
  945. // string. If not, then ask it to reload its buffer. If that does not
  946. // get us enough, then it cannot match.
  947. //
  948. // NOTE: This works because strings never have to cross a reader! And
  949. // a string to skip will never have a new line in it, so we will never
  950. // miss adjusting the current line.
  951. //
  952. unsigned int charsLeft = charsLeftInBuffer();
  953. while (charsLeft < srcLen)
  954. {
  955. refreshCharBuffer();
  956. unsigned int t = charsLeftInBuffer();
  957. if (t == charsLeft) // if the refreshCharBuf() did not add anything new
  958. return false; // give up and return.
  959. charsLeft = t;
  960. }
  961. //
  962. // Ok, now we now that the current reader has enough chars in its
  963. // buffer and that its index is back at zero. So we can do a quick and
  964. // dirty comparison straight to its buffer with no requirement to unget
  965. // if it fails.
  966. //
  967. if (XMLString::compareNString(&fCharBuf[fCharIndex], toPeek, srcLen))
  968. return false;
  969. return true;
  970. }
  971. // ---------------------------------------------------------------------------
  972. // XMLReader: Setter methods (most are inlined)
  973. // ---------------------------------------------------------------------------
  974. bool XMLReader::setEncoding(const XMLCh* const newEncoding)
  975. {
  976. //
  977. // If the encoding was forced, then we ignore the new value and just
  978. // return with success. If it was forced, then we are to use that
  979. // encoding without question. Note that, if we are forced, we created
  980. // a transcoder up front so there is no need to do one here in that
  981. // case.
  982. //
  983. if (fForcedEncoding)
  984. return true;
  985. //
  986. // upperCase the newEncoding first for better performance
  987. //
  988. XMLCh* inputEncoding = XMLString::replicate(newEncoding, fMemoryManager);
  989. XMLString::upperCase(inputEncoding);
  990. //
  991. // Try to map the string to one of our standard encodings. If its not
  992. // one of them, then it has to be one of the non-intrinsic encodings,
  993. // in which case we have to delete our intrinsic encoder and create a
  994. // new one.
  995. //
  996. XMLRecognizer::Encodings newBaseEncoding = XMLRecognizer::encodingForName
  997. (
  998. inputEncoding
  999. );
  1000. //
  1001. // If it does not come back as one of the auto-sensed encodings, then we
  1002. // have to possibly replace it and at least check a few things.
  1003. //
  1004. if (newBaseEncoding == XMLRecognizer::OtherEncoding)
  1005. {
  1006. //
  1007. // Check for non-endian specific UTF-16 or UCS-4. If so, and if we
  1008. // are already in one of the endian versions of those encodings,
  1009. // then just keep it and go on. Otherwise, its not valid.
  1010. //
  1011. if (!XMLString::compareString(inputEncoding, XMLUni::fgUTF16EncodingString)
  1012. || !XMLString::compareString(inputEncoding, XMLUni::fgUTF16EncodingString2)
  1013. || !XMLString::compareString(inputEncoding, XMLUni::fgUTF16EncodingString3)
  1014. || !XMLString::compareString(inputEncoding, XMLUni::fgUTF16EncodingString4)
  1015. || !XMLString::compareString(inputEncoding, XMLUni::fgUTF16EncodingString5))
  1016. {
  1017. fMemoryManager->deallocate(inputEncoding);
  1018. if ((fEncoding != XMLRecognizer::UTF_16L)
  1019. && (fEncoding != XMLRecognizer::UTF_16B))
  1020. {
  1021. return false;
  1022. }
  1023. // Override with the original endian specific encoding
  1024. newBaseEncoding = fEncoding;
  1025. if (fEncoding == XMLRecognizer::UTF_16L) {
  1026. fMemoryManager->deallocate(fEncodingStr);
  1027. fEncodingStr = XMLString::replicate(XMLUni::fgUTF16LEncodingString, fMemoryManager);
  1028. }
  1029. else {
  1030. fMemoryManager->deallocate(fEncodingStr);
  1031. fEncodingStr = XMLString::replicate(XMLUni::fgUTF16BEncodingString, fMemoryManager);
  1032. }
  1033. }
  1034. else if (!XMLString::compareString(inputEncoding, XMLUni::fgUCS4EncodingString)
  1035. || !XMLString::compareString(inputEncoding, XMLUni::fgUCS4EncodingString2)
  1036. || !XMLString::compareString(inputEncoding, XMLUni::fgUCS4EncodingString3))
  1037. {
  1038. fMemoryManager->deallocate(inputEncoding);
  1039. if ((fEncoding != XMLRecognizer::UCS_4L)
  1040. && (fEncoding != XMLRecognizer::UCS_4B))
  1041. {
  1042. return false;
  1043. }
  1044. // Override with the original endian specific encoding
  1045. newBaseEncoding = fEncoding;
  1046. if (fEncoding == XMLRecognizer::UCS_4L) {
  1047. fMemoryManager->deallocate(fEncodingStr);
  1048. fEncodingStr = XMLString::replicate(XMLUni::fgUCS4LEncodingString, fMemoryManager);
  1049. }
  1050. else {
  1051. fMemoryManager->deallocate(fEncodingStr);
  1052. fEncodingStr = XMLString::replicate(XMLUni::fgUCS4BEncodingString, fMemoryManager);
  1053. }
  1054. }
  1055. else
  1056. {
  1057. //
  1058. // None of those special cases, so just replicate the new name
  1059. // and use it directly to create the transcoder
  1060. //
  1061. fMemoryManager->deallocate(fEncodingStr);
  1062. fEncodingStr = inputEncoding;
  1063. XMLTransService::Codes failReason;
  1064. fTranscoder = XMLPlatformUtils::fgTransService->makeNewTranscoderFor
  1065. (
  1066. fEncodingStr
  1067. , failReason
  1068. , kCharBufSize
  1069. , fMemoryManager
  1070. );
  1071. }
  1072. }
  1073. else
  1074. {
  1075. // Store the new encoding string since it is just an intrinsic
  1076. fMemoryManager->deallocate(fEncodingStr);
  1077. fEncodingStr = inputEncoding;
  1078. }
  1079. if (!fTranscoder) {
  1080. //
  1081. // Now we can create a transcoder using the recognized fEncoding. We
  1082. // might get back a transcoder for an intrinsically supported encoding,
  1083. // or we might get one from the underlying transcoding service.
  1084. //
  1085. XMLTransService::Codes failReason;
  1086. fTranscoder = XMLPlatformUtils::fgTransService->makeNewTranscoderFor
  1087. (
  1088. newBaseEncoding
  1089. , failReason
  1090. , kCharBufSize
  1091. , fMemoryManager
  1092. );
  1093. if (!fTranscoder)
  1094. ThrowXML1(TranscodingException, XMLExcepts::Trans_CantCreateCvtrFor, fEncodingStr);
  1095. }
  1096. // Update the base encoding member with the new base encoding found
  1097. fEncoding = newBaseEncoding;
  1098. // Looks ok to us
  1099. return true;
  1100. }
  1101. // ---------------------------------------------------------------------------
  1102. // XMLReader: Private helper methods
  1103. // ---------------------------------------------------------------------------
  1104. //
  1105. // This is called when the encoding flag is set and just sets the fSwapped
  1106. // flag appropriately.
  1107. //
  1108. void XMLReader::checkForSwapped()
  1109. {
  1110. // Assume not swapped
  1111. fSwapped = false;
  1112. #if defined(ENDIANMODE_LITTLE)
  1113. if ((fEncoding == XMLRecognizer::UTF_16B)
  1114. || (fEncoding == XMLRecognizer::UCS_4B))
  1115. {
  1116. fSwapped = true;
  1117. }
  1118. #elif defined(ENDIANMODE_BIG)
  1119. if ((fEncoding == XMLRecognizer::UTF_16L)
  1120. || (fEncoding == XMLRecognizer::UCS_4L))
  1121. {
  1122. fSwapped = true;
  1123. }
  1124. #endif
  1125. }
  1126. //
  1127. // This is called from the constructor when the encoding is not forced.
  1128. // We assume that the encoding has been auto-sensed at this point and that
  1129. // fSwapped is set correctly.
  1130. //
  1131. // In the case of UCS-4 and EBCDIC, we don't have to check for a decl.
  1132. // The fact that we got here, means that there is one, because that's the
  1133. // only way we can autosense those.
  1134. //
  1135. void XMLReader::doInitDecode()
  1136. {
  1137. switch(fEncoding)
  1138. {
  1139. case XMLRecognizer::UCS_4B :
  1140. case XMLRecognizer::UCS_4L :
  1141. {
  1142. // Remove bom if any
  1143. if (((fRawByteBuf[0] == 0x00) && (fRawByteBuf[1] == 0x00) && (fRawByteBuf[2] == 0xFE) && (fRawByteBuf[3] == 0xFF)) ||
  1144. ((fRawByteBuf[0] == 0xFF) && (fRawByteBuf[1] == 0xFE) && (fRawByteBuf[2] == 0x00) && (fRawByteBuf[3] == 0x00)) )
  1145. {
  1146. for (unsigned int i = 0; i < fRawBytesAvail; i++)
  1147. fRawByteBuf[i] = fRawByteBuf[i+4];
  1148. fRawBytesAvail -=4;
  1149. }
  1150. // Look at the raw buffer as UCS4 chars
  1151. const UCS4Ch* asUCS = (const UCS4Ch*)fRawByteBuf;
  1152. while (fRawBufIndex < fRawBytesAvail)
  1153. {
  1154. // Get out the current 4 byte value and inc our raw buf index
  1155. UCS4Ch curVal = *asUCS++;
  1156. fRawBufIndex += sizeof(UCS4Ch);
  1157. // Swap if that is required for this machine
  1158. if (fSwapped)
  1159. curVal = BitOps::swapBytes(curVal);
  1160. // Make sure its at least semi legal. If not, undo and throw
  1161. if (curVal > 0xFFFF)
  1162. {
  1163. fCharsAvail = 0;
  1164. fRawBufIndex = 0;
  1165. ThrowXML1
  1166. (
  1167. TranscodingException
  1168. , XMLExcepts::Reader_CouldNotDecodeFirstLine
  1169. , fSystemId
  1170. );
  1171. }
  1172. // Convert the value to an XML char and store it
  1173. fCharSizeBuf[fCharsAvail] = 4;
  1174. fCharBuf[fCharsAvail++] = XMLCh(curVal);
  1175. // Break out on the > character
  1176. if (curVal == chCloseAngle)
  1177. break;
  1178. }
  1179. break;
  1180. }
  1181. case XMLRecognizer::UTF_8 :
  1182. {
  1183. // If there's a utf-8 BOM (0xEF 0xBB 0xBF), skip past it.
  1184. // Don't move to char buf - no one wants to see it.
  1185. // Note: this causes any encoding= declaration to override
  1186. // the BOM's attempt to say that the encoding is utf-8.
  1187. // Look at the raw buffer as short chars
  1188. const char* asChars = (const char*)fRawByteBuf;
  1189. if (fRawBytesAvail > XMLRecognizer::fgUTF8BOMLen &&
  1190. XMLString::compareNString( asChars
  1191. , XMLRecognizer::fgUTF8BOM
  1192. , XMLRecognizer::fgUTF8BOMLen) == 0)
  1193. {
  1194. fRawBufIndex += XMLRecognizer::fgUTF8BOMLen;
  1195. asChars += XMLRecognizer::fgUTF8BOMLen;
  1196. }
  1197. //
  1198. // First check that there are enough bytes to even see the
  1199. // decl indentifier. If not, get out now with no action since
  1200. // there is no decl.
  1201. //
  1202. if (fRawBytesAvail < XMLRecognizer::fgASCIIPreLen)
  1203. break;
  1204. // Check for the opening sequence. If not, then no decl
  1205. if (XMLString::compareNString( asChars
  1206. , XMLRecognizer::fgASCIIPre
  1207. , XMLRecognizer::fgASCIIPreLen))
  1208. {
  1209. break;
  1210. }
  1211. while (fRawBufIndex < fRawBytesAvail)
  1212. {
  1213. const char curCh = *asChars++;
  1214. fRawBufIndex++;
  1215. // Looks ok, so store it
  1216. fCharSizeBuf[fCharsAvail] = 1;
  1217. fCharBuf[fCharsAvail++] = XMLCh(curCh);
  1218. // Break out on a > character
  1219. if (curCh == chCloseAngle)
  1220. break;
  1221. //
  1222. // A char greater than 0x7F is not allowed in this case. If
  1223. // so, undo and throw.
  1224. //
  1225. if (curCh & 0x80)
  1226. {
  1227. fCharsAvail = 0;
  1228. fRawBufIndex = 0;
  1229. ThrowXML1
  1230. (
  1231. TranscodingException
  1232. , XMLExcepts::Reader_CouldNotDecodeFirstLine
  1233. , fSystemId
  1234. );
  1235. }
  1236. }
  1237. break;
  1238. }
  1239. case XMLRecognizer::UTF_16B :
  1240. case XMLRecognizer::UTF_16L :
  1241. {
  1242. //
  1243. // If there is a decl here, we just truncate back the characters
  1244. // as we go. No surrogate creation would be allowed here in legal
  1245. // XML, so we consider it a transoding error if we find one.
  1246. //
  1247. if (fRawBytesAvail < 2)
  1248. break;
  1249. const UTF16Ch* asUTF16 = (const UTF16Ch*)&fRawByteBuf[fRawBufIndex];
  1250. if ((*asUTF16 == chUnicodeMarker) || (*asUTF16 == chSwappedUnicodeMarker))
  1251. {
  1252. fRawBufIndex += sizeof(UTF16Ch);
  1253. asUTF16++;
  1254. }
  1255. // First check that there are enough raw bytes for there to even
  1256. // be a decl indentifier. If not, then nothing to do.
  1257. //
  1258. if (fRawBytesAvail - fRawBufIndex < XMLRecognizer::fgUTF16PreLen)
  1259. {
  1260. fRawBufIndex = 0;
  1261. break;
  1262. }
  1263. //
  1264. // See we get a match on the prefix. If not, then reset and
  1265. // break out.
  1266. //
  1267. if (fEncoding == XMLRecognizer::UTF_16B)
  1268. {
  1269. if (memcmp(asUTF16, XMLRecognizer::fgUTF16BPre, XMLRecognizer::fgUTF16PreLen))
  1270. {
  1271. fRawBufIndex = 0;
  1272. break;
  1273. }
  1274. }
  1275. else
  1276. {
  1277. if (memcmp(asUTF16, XMLRecognizer::fgUTF16LPre, XMLRecognizer::fgUTF16PreLen))
  1278. {
  1279. fRawBufIndex = 0;
  1280. break;
  1281. }
  1282. }
  1283. while (fRawBufIndex < fRawBytesAvail)
  1284. {
  1285. // Get out the current 2 byte value
  1286. UTF16Ch curVal = *asUTF16++;
  1287. fRawBufIndex += sizeof(UTF16Ch);
  1288. // Swap if that is required for this machine
  1289. if (fSwapped)
  1290. curVal = BitOps::swapBytes(curVal);
  1291. //
  1292. // Store it and bump the target index, implicitly converting
  1293. // if UTF16Ch and XMLCh are not the same size.
  1294. //
  1295. fCharSizeBuf[fCharsAvail] = 2;
  1296. fCharBuf[fCharsAvail++] = curVal;
  1297. // Break out on a > char
  1298. if (curVal == chCloseAngle)
  1299. break;
  1300. }
  1301. break;
  1302. }
  1303. case XMLRecognizer::EBCDIC :
  1304. {
  1305. //
  1306. // We use special support in the intrinsic EBCDIC-US transcoder
  1307. // to go through one char at a time.
  1308. //
  1309. const XMLByte* srcPtr = fRawByteBuf;
  1310. while (1)
  1311. {
  1312. // Transcode one char from the source
  1313. const XMLCh chCur = XMLEBCDICTranscoder::xlatThisOne(*srcPtr++);
  1314. fRawBufIndex++;
  1315. //
  1316. // And put it into the character buffer. This stuff has to
  1317. // look like it was normally transcoded.
  1318. //
  1319. fCharSizeBuf[fCharsAvail] = 1;
  1320. fCharBuf[fCharsAvail++] = chCur;
  1321. // If its a > char, then break out
  1322. if (chCur == chCloseAngle)
  1323. break;
  1324. // Watch for using up all input and get out
  1325. if (fRawBufIndex == fRawBytesAvail)
  1326. break;
  1327. }
  1328. break;
  1329. }
  1330. default :
  1331. // It should never be anything else here
  1332. ThrowXML(TranscodingException, XMLExcepts::Reader_BadAutoEncoding);
  1333. break;
  1334. }
  1335. //
  1336. // Ok, by the time we get here, if its a legal XML file we have eaten
  1337. // the XML/TextDecl. So, if we are a PE and are being referenced from
  1338. // outside a literal, then we need to throw in an arbitrary space that
  1339. // is required by XML.
  1340. //
  1341. if ((fType == Type_PE) && (fRefFrom == RefFrom_NonLiteral))
  1342. fCharBuf[fCharsAvail++] = chSpace;
  1343. }
  1344. //
  1345. // This method is called internally when we run out of bytes in the raw
  1346. // buffer. We just read as many bytes as we can into the raw buffer again
  1347. // and store the number of bytes we got.
  1348. //
  1349. void XMLReader::refres

Large files files are truncated, but you can click here to view the full file