PageRenderTime 57ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 1ms

/ATF2/control-software/epics-3.14.8/extensions/src/ChannelArchiver/ThirdParty/xerces-c-src2_4_0/src/xercesc/util/Transcoders/ICU/ICUTransService.cpp

http://atf2flightsim.googlecode.com/
C++ | 1359 lines | 835 code | 181 blank | 343 comment | 118 complexity | d17220c3d2091161fd35e6fba0533fa4 MD5 | raw file
Possible License(s): BSD-2-Clause, LGPL-2.0, IPL-1.0, BSD-3-Clause
  1. /*
  2. * The Apache Software License, Version 1.1
  3. *
  4. * Copyright (c) 1999-2000 The Apache Software Foundation. All rights
  5. * reserved.
  6. *
  7. * Redistribution and use in source and binary forms, with or without
  8. * modification, are permitted provided that the following conditions
  9. * are met:
  10. *
  11. * 1. Redistributions of source code must retain the above copyright
  12. * notice, this list of conditions and the following disclaimer.
  13. *
  14. * 2. Redistributions in binary form must reproduce the above copyright
  15. * notice, this list of conditions and the following disclaimer in
  16. * the documentation and/or other materials provided with the
  17. * distribution.
  18. *
  19. * 3. The end-user documentation included with the redistribution,
  20. * if any, must include the following acknowledgment:
  21. * "This product includes software developed by the
  22. * Apache Software Foundation (http://www.apache.org/)."
  23. * Alternately, this acknowledgment may appear in the software itself,
  24. * if and wherever such third-party acknowledgments normally appear.
  25. *
  26. * 4. The names "Xerces" and "Apache Software Foundation" must
  27. * not be used to endorse or promote products derived from this
  28. * software without prior written permission. For written
  29. * permission, please contact apache\@apache.org.
  30. *
  31. * 5. Products derived from this software may not be called "Apache",
  32. * nor may "Apache" appear in their name, without prior written
  33. * permission of the Apache Software Foundation.
  34. *
  35. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  36. * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  37. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  38. * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  39. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  40. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  41. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  42. * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  43. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  44. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  45. * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  46. * SUCH DAMAGE.
  47. * ====================================================================
  48. *
  49. * This software consists of voluntary contributions made by many
  50. * individuals on behalf of the Apache Software Foundation, and was
  51. * originally based on software copyright (c) 1999, International
  52. * Business Machines, Inc., http://www.ibm.com . For more information
  53. * on the Apache Software Foundation, please see
  54. * <http://www.apache.org/>.
  55. */
  56. /*
  57. * $Id: ICUTransService.cpp,v 1.1.1.1 2009/03/14 06:42:36 whitegr Exp $
  58. */
  59. // ---------------------------------------------------------------------------
  60. // Includes
  61. // ---------------------------------------------------------------------------
  62. #include <xercesc/util/Janitor.hpp>
  63. #include <xercesc/util/TranscodingException.hpp>
  64. #include <xercesc/util/XMLString.hpp>
  65. #include <xercesc/util/XMLUniDefs.hpp>
  66. #include "ICUTransService.hpp"
  67. #include <string.h>
  68. #include <unicode/uloc.h>
  69. #include <unicode/uchar.h>
  70. #include <unicode/ucnv.h>
  71. #include <unicode/ucnv_err.h>
  72. #include <unicode/ustring.h>
  73. #include <unicode/udata.h>
  74. #if (U_ICU_VERSION_MAJOR_NUM >= 2)
  75. #include <unicode/uclean.h>
  76. #endif
  77. #if !defined(XML_OS390) && !defined(XML_AS400) && !defined(XML_HPUX) && !defined(XML_PTX)
  78. // Forward reference the symbol which points to the ICU converter data.
  79. #if (U_ICU_VERSION_MAJOR_NUM < 2)
  80. extern "C" const uint8_t U_IMPORT icudata_dat[];
  81. #endif
  82. #endif
  83. XERCES_CPP_NAMESPACE_BEGIN
  84. // ---------------------------------------------------------------------------
  85. // Local, const data
  86. // ---------------------------------------------------------------------------
  87. static const XMLCh gMyServiceId[] =
  88. {
  89. chLatin_I, chLatin_C, chLatin_U, chNull
  90. };
  91. // ---------------------------------------------------------------------------
  92. // Local functions
  93. // ---------------------------------------------------------------------------
  94. //
  95. // When XMLCh and ICU's UChar are not the same size, we have to do a temp
  96. // conversion of all strings. These local helper methods make that easier.
  97. //
  98. static UChar* convertToUChar( const XMLCh* const toConvert
  99. , const unsigned int srcLen = 0
  100. , MemoryManager* const manager = 0)
  101. {
  102. const unsigned int actualLen = srcLen
  103. ? srcLen : XMLString::stringLen(toConvert);
  104. UChar* tmpBuf = (manager)
  105. ? (UChar*) manager->allocate((actualLen + 1) * sizeof(UChar))
  106. : new UChar[actualLen + 1];
  107. const XMLCh* srcPtr = toConvert;
  108. UChar* outPtr = tmpBuf;
  109. while (*srcPtr)
  110. *outPtr++ = UChar(*srcPtr++);
  111. *outPtr = 0;
  112. return tmpBuf;
  113. }
  114. static XMLCh* convertToXMLCh( const UChar* const toConvert,
  115. MemoryManager* const manager = 0)
  116. {
  117. const unsigned int srcLen = u_strlen(toConvert);
  118. XMLCh* retBuf = (manager)
  119. ? (XMLCh*) manager->allocate((srcLen+1) * sizeof(XMLCh))
  120. : new XMLCh[srcLen + 1];
  121. XMLCh* outPtr = retBuf;
  122. const UChar* srcPtr = toConvert;
  123. while (*srcPtr)
  124. *outPtr++ = XMLCh(*srcPtr++);
  125. *outPtr = 0;
  126. return retBuf;
  127. }
  128. // ---------------------------------------------------------------------------
  129. // ICUTransService: Constructors and Destructor
  130. // ---------------------------------------------------------------------------
  131. ICUTransService::ICUTransService()
  132. {
  133. #if !defined(XML_OS390) && !defined(XML_AS400) && !defined(XML_HPUX) && !defined(XML_PTX)
  134. #if (U_ICU_VERSION_MAJOR_NUM < 2)
  135. // Starting with ICU 2.0, ICU itself includes a static reference to the data
  136. // entrypoint symbol.
  137. //
  138. // ICU 1.8 (and previous) did not include a static reference, but would
  139. // dynamically load the data dll when it was first needed, however this dynamic
  140. // loading proved unreliable in some of the odd environments that Xerces needed
  141. // to run in. Hence, the static reference.
  142. // Pass the location of the converter data to ICU. By doing so, we are
  143. // forcing the load of ICU converter data DLL, after the Xerces-C DLL is
  144. // loaded. This implies that Xerces-C, now has to explicitly link with the
  145. // ICU converter dll. However, the advantage is that we no longer depend
  146. // on the code which does demand dynamic loading of DLL's. The demand
  147. // loading is highly system dependent and was a constant source of support
  148. // calls.
  149. UErrorCode uerr = U_ZERO_ERROR;
  150. udata_setCommonData((void *) icudata_dat, &uerr);
  151. #endif
  152. #endif
  153. }
  154. ICUTransService::~ICUTransService()
  155. {
  156. /*
  157. * commented out the following clean up code
  158. * in case users use ICU outside of the parser
  159. * if we clean up here, users' code may crash
  160. *
  161. #if (U_ICU_VERSION_MAJOR_NUM >= 2)
  162. // release all lasily allocated data
  163. u_cleanup();
  164. #endif
  165. */
  166. }
  167. // ---------------------------------------------------------------------------
  168. // ICUTransService: The virtual transcoding service API
  169. // ---------------------------------------------------------------------------
  170. int ICUTransService::compareIString(const XMLCh* const comp1
  171. , const XMLCh* const comp2)
  172. {
  173. const XMLCh* psz1 = comp1;
  174. const XMLCh* psz2 = comp2;
  175. unsigned int curCount = 0;
  176. while (true)
  177. {
  178. //
  179. // If an inequality, then return the difference. Note that the XMLCh
  180. // might be bigger physically than UChar, but it won't hold anything
  181. // larger than 0xFFFF, so our cast here will work for both possible
  182. // sizes of XMLCh.
  183. //
  184. if (u_toupper(UChar(*psz1)) != u_toupper(UChar(*psz2)))
  185. return int(*psz1) - int(*psz2);
  186. // If either has ended, then they both ended, so equal
  187. if (!*psz1 || !*psz2)
  188. break;
  189. // Move upwards for the next round
  190. psz1++;
  191. psz2++;
  192. }
  193. return 0;
  194. }
  195. int ICUTransService::compareNIString(const XMLCh* const comp1
  196. , const XMLCh* const comp2
  197. , const unsigned int maxChars)
  198. {
  199. const XMLCh* psz1 = comp1;
  200. const XMLCh* psz2 = comp2;
  201. unsigned int curCount = 0;
  202. while (true)
  203. {
  204. //
  205. // If an inequality, then return the difference. Note that the XMLCh
  206. // might be bigger physically than UChar, but it won't hold anything
  207. // larger than 0xFFFF, so our cast here will work for both possible
  208. // sizes of XMLCh.
  209. //
  210. if (u_toupper(UChar(*psz1)) != u_toupper(UChar(*psz2)))
  211. return int(*psz1) - int(*psz2);
  212. // If either ended, then both ended, so equal
  213. if (!*psz1 || !*psz2)
  214. break;
  215. // Move upwards to next chars
  216. psz1++;
  217. psz2++;
  218. //
  219. // Bump the count of chars done. If it equals the count then we
  220. // are equal for the requested count, so break out and return
  221. // equal.
  222. //
  223. curCount++;
  224. if (maxChars == curCount)
  225. break;
  226. }
  227. return 0;
  228. }
  229. const XMLCh* ICUTransService::getId() const
  230. {
  231. return gMyServiceId;
  232. }
  233. bool ICUTransService::isSpace(const XMLCh toCheck) const
  234. {
  235. //
  236. // <TBD>
  237. // For now, we short circuit some of the control chars because ICU
  238. // is not correctly reporting them as space. Later, when they change
  239. // this, we can get rid of this special case.
  240. //
  241. if ((toCheck == 0x09)
  242. || (toCheck == 0x0A)
  243. || (toCheck == 0x0D))
  244. {
  245. return true;
  246. }
  247. return (u_isspace(UChar(toCheck)) != 0);
  248. }
  249. XMLLCPTranscoder* ICUTransService::makeNewLCPTranscoder()
  250. {
  251. //
  252. // Try to create a default converter. If it fails, return a null
  253. // pointer which will basically cause the system to give up because
  254. // we really can't do anything without one.
  255. //
  256. UErrorCode uerr = U_ZERO_ERROR;
  257. UConverter* converter = ucnv_open(NULL, &uerr);
  258. if (!converter)
  259. return 0;
  260. // That went ok, so create an ICU LCP transcoder wrapper and return it
  261. return new ICULCPTranscoder(converter);
  262. }
  263. bool ICUTransService::supportsSrcOfs() const
  264. {
  265. // This implementation supports source offset information
  266. return true;
  267. }
  268. void ICUTransService::upperCase(XMLCh* const toUpperCase) const
  269. {
  270. XMLCh* outPtr = toUpperCase;
  271. while (*outPtr)
  272. {
  273. *outPtr = XMLCh(u_toupper(UChar(*outPtr)));
  274. outPtr++;
  275. }
  276. }
  277. void ICUTransService::lowerCase(XMLCh* const toLowerCase) const
  278. {
  279. XMLCh* outPtr = toLowerCase;
  280. while (*outPtr)
  281. {
  282. *outPtr = XMLCh(u_tolower(UChar(*outPtr)));
  283. outPtr++;
  284. }
  285. }
  286. // ---------------------------------------------------------------------------
  287. // ICUTransService: The protected virtual transcoding service API
  288. // ---------------------------------------------------------------------------
  289. XMLTranscoder* ICUTransService::
  290. makeNewXMLTranscoder(const XMLCh* const encodingName
  291. , XMLTransService::Codes& resValue
  292. , const unsigned int blockSize
  293. , MemoryManager* const manager)
  294. {
  295. //
  296. // If UChar and XMLCh are not the same size, then we have premassage the
  297. // encoding name into a UChar type string.
  298. //
  299. const UChar* actualName;
  300. UChar* tmpName = 0;
  301. if (sizeof(UChar) == sizeof(XMLCh))
  302. {
  303. actualName = (const UChar*)encodingName;
  304. }
  305. else
  306. {
  307. tmpName = convertToUChar(encodingName, 0, XMLPlatformUtils::fgMemoryManager);
  308. actualName = tmpName;
  309. }
  310. ArrayJanitor<UChar> janTmp(tmpName, XMLPlatformUtils::fgMemoryManager);
  311. UErrorCode uerr = U_ZERO_ERROR;
  312. UConverter* converter = ucnv_openU(actualName, &uerr);
  313. if (!converter)
  314. {
  315. resValue = XMLTransService::UnsupportedEncoding;
  316. return 0;
  317. }
  318. return new (manager) ICUTranscoder(encodingName, converter, blockSize, manager);
  319. }
  320. // ---------------------------------------------------------------------------
  321. // ICUTranscoder: Constructors and Destructor
  322. // ---------------------------------------------------------------------------
  323. ICUTranscoder::ICUTranscoder(const XMLCh* const encodingName
  324. , UConverter* const toAdopt
  325. , const unsigned int blockSize
  326. , MemoryManager* const manager) :
  327. XMLTranscoder(encodingName, blockSize, manager)
  328. , fConverter(toAdopt)
  329. , fFixed(false)
  330. , fSrcOffsets(0)
  331. {
  332. // If there is a block size, then allocate our source offset array
  333. if (blockSize)
  334. fSrcOffsets = (XMLUInt32*) manager->allocate
  335. (
  336. blockSize * sizeof(XMLUInt32)
  337. );//new XMLUInt32[blockSize];
  338. // Remember if its a fixed size encoding
  339. fFixed = (ucnv_getMaxCharSize(fConverter) == ucnv_getMinCharSize(fConverter));
  340. }
  341. ICUTranscoder::~ICUTranscoder()
  342. {
  343. getMemoryManager()->deallocate(fSrcOffsets);//delete [] fSrcOffsets;
  344. // If there is a converter, ask ICU to clean it up
  345. if (fConverter)
  346. {
  347. // <TBD> Does this actually delete the structure???
  348. ucnv_close(fConverter);
  349. fConverter = 0;
  350. }
  351. }
  352. // ---------------------------------------------------------------------------
  353. // ICUTranscoder: The virtual transcoder API
  354. // ---------------------------------------------------------------------------
  355. unsigned int
  356. ICUTranscoder::transcodeFrom(const XMLByte* const srcData
  357. , const unsigned int srcCount
  358. , XMLCh* const toFill
  359. , const unsigned int maxChars
  360. , unsigned int& bytesEaten
  361. , unsigned char* const charSizes)
  362. {
  363. // If debugging, insure the block size is legal
  364. #if defined(XERCES_DEBUG)
  365. checkBlockSize(maxChars);
  366. #endif
  367. // Set up pointers to the start and end of the source buffer
  368. const XMLByte* startSrc = srcData;
  369. const XMLByte* endSrc = srcData + srcCount;
  370. //
  371. // And now do the target buffer. This works differently according to
  372. // whether XMLCh and UChar are the same size or not.
  373. //
  374. UChar* startTarget;
  375. if (sizeof(XMLCh) == sizeof(UChar))
  376. startTarget = (UChar*)toFill;
  377. else
  378. startTarget = (UChar*) getMemoryManager()->allocate
  379. (
  380. maxChars * sizeof(UChar)
  381. );//new UChar[maxChars];
  382. UChar* orgTarget = startTarget;
  383. //
  384. // Transoode the buffer. Buffer overflow errors are normal, occuring
  385. // when the raw input buffer holds more characters than will fit in
  386. // the Unicode output buffer.
  387. //
  388. UErrorCode err = U_ZERO_ERROR;
  389. ucnv_toUnicode
  390. (
  391. fConverter
  392. , &startTarget
  393. , startTarget + maxChars
  394. , (const char**)&startSrc
  395. , (const char*)endSrc
  396. , (fFixed ? 0 : (int32_t*)fSrcOffsets)
  397. , false
  398. , &err
  399. );
  400. if ((err != U_ZERO_ERROR) && (err != U_BUFFER_OVERFLOW_ERROR))
  401. {
  402. if (orgTarget != (UChar*)toFill)
  403. getMemoryManager()->deallocate(orgTarget);//delete [] orgTarget;
  404. if (fFixed)
  405. {
  406. XMLCh tmpBuf[16];
  407. XMLString::binToText((unsigned int)(*startTarget), tmpBuf, 16, 16);
  408. ThrowXML2
  409. (
  410. TranscodingException
  411. , XMLExcepts::Trans_BadSrcCP
  412. , tmpBuf
  413. , getEncodingName()
  414. );
  415. }
  416. else
  417. {
  418. ThrowXML(TranscodingException, XMLExcepts::Trans_BadSrcSeq);
  419. }
  420. }
  421. // Calculate the bytes eaten and store in caller's param
  422. bytesEaten = startSrc - srcData;
  423. // And the characters decoded
  424. const unsigned int charsDecoded = startTarget - orgTarget;
  425. //
  426. // Translate the array of char offsets into an array of character
  427. // sizes, which is what the transcoder interface semantics requires.
  428. // If its fixed, then we can optimize it.
  429. //
  430. if (fFixed)
  431. {
  432. const unsigned char fillSize = (unsigned char)ucnv_getMaxCharSize(fConverter);;
  433. memset(charSizes, fillSize, maxChars);
  434. }
  435. else
  436. {
  437. //
  438. // We have to convert the series of offsets into a series of
  439. // sizes. If just one char was decoded, then its the total bytes
  440. // eaten. Otherwise, do a loop and subtract out each element from
  441. // its previous element.
  442. //
  443. if (charsDecoded == 1)
  444. {
  445. charSizes[0] = (unsigned char)bytesEaten;
  446. }
  447. else
  448. {
  449. // ICU does not return an extra element to allow us to figure
  450. // out the last char size, so we have to compute it from the
  451. // total bytes used.
  452. unsigned int index;
  453. for (index = 0; index < charsDecoded - 1; index++)
  454. {
  455. charSizes[index] = (unsigned char)(fSrcOffsets[index + 1]
  456. - fSrcOffsets[index]);
  457. }
  458. if( charsDecoded > 0 ) {
  459. charSizes[charsDecoded - 1] = (unsigned char)(bytesEaten
  460. - fSrcOffsets[charsDecoded - 1]);
  461. }
  462. }
  463. }
  464. //
  465. // If XMLCh and UChar are not the same size, then we need to copy over
  466. // the temp buffer to the new one.
  467. //
  468. if (sizeof(UChar) != sizeof(XMLCh))
  469. {
  470. XMLCh* outPtr = toFill;
  471. startTarget = orgTarget;
  472. for (unsigned int index = 0; index < charsDecoded; index++)
  473. *outPtr++ = XMLCh(*startTarget++);
  474. // And delete the temp buffer
  475. getMemoryManager()->deallocate(orgTarget);//delete [] orgTarget;
  476. }
  477. // Return the chars we put into the target buffer
  478. return charsDecoded;
  479. }
  480. unsigned int
  481. ICUTranscoder::transcodeTo( const XMLCh* const srcData
  482. , const unsigned int srcCount
  483. , XMLByte* const toFill
  484. , const unsigned int maxBytes
  485. , unsigned int& charsEaten
  486. , const UnRepOpts options)
  487. {
  488. //
  489. // Get a pointer to the buffer to transcode. If UChar and XMLCh are
  490. // the same size here, then use the original. Else, create a temp
  491. // one and put a janitor on it.
  492. //
  493. const UChar* srcPtr;
  494. UChar* tmpBufPtr = 0;
  495. if (sizeof(XMLCh) == sizeof(UChar))
  496. {
  497. srcPtr = (const UChar*)srcData;
  498. }
  499. else
  500. {
  501. tmpBufPtr = convertToUChar(srcData, srcCount, getMemoryManager());
  502. srcPtr = tmpBufPtr;
  503. }
  504. ArrayJanitor<UChar> janTmpBuf(tmpBufPtr, getMemoryManager());
  505. //
  506. // Set the appropriate callback so that it will either fail or use
  507. // the rep char. Remember the old one so we can put it back.
  508. //
  509. UErrorCode err = U_ZERO_ERROR;
  510. UConverterFromUCallback oldCB = NULL;
  511. #if (U_ICU_VERSION_MAJOR_NUM < 2)
  512. void* orgContent;
  513. #else
  514. const void* orgContent;
  515. #endif
  516. ucnv_setFromUCallBack
  517. (
  518. fConverter
  519. , (options == UnRep_Throw) ? UCNV_FROM_U_CALLBACK_STOP
  520. : UCNV_FROM_U_CALLBACK_SUBSTITUTE
  521. , NULL
  522. , &oldCB
  523. , &orgContent
  524. , &err
  525. );
  526. //
  527. // Ok, lets transcode as many chars as we we can in one shot. The
  528. // ICU API gives enough info not to have to do this one char by char.
  529. //
  530. XMLByte* startTarget = toFill;
  531. const UChar* startSrc = srcPtr;
  532. err = U_ZERO_ERROR;
  533. ucnv_fromUnicode
  534. (
  535. fConverter
  536. , (char**)&startTarget
  537. , (char*)(startTarget + maxBytes)
  538. , &startSrc
  539. , srcPtr + srcCount
  540. , 0
  541. , false
  542. , &err
  543. );
  544. // Rememember the status before we possibly overite the error code
  545. const bool res = (err == U_ZERO_ERROR);
  546. // Put the old handler back
  547. err = U_ZERO_ERROR;
  548. UConverterFromUCallback orgAction = NULL;
  549. ucnv_setFromUCallBack(fConverter, oldCB, NULL, &orgAction, &orgContent, &err);
  550. if (!res)
  551. {
  552. XMLCh tmpBuf[16];
  553. XMLString::binToText((unsigned int)*startSrc, tmpBuf, 16, 16);
  554. ThrowXML2
  555. (
  556. TranscodingException
  557. , XMLExcepts::Trans_Unrepresentable
  558. , tmpBuf
  559. , getEncodingName()
  560. );
  561. }
  562. // Fill in the chars we ate from the input
  563. charsEaten = startSrc - srcPtr;
  564. // Return the chars we stored
  565. return startTarget - toFill;
  566. }
  567. bool ICUTranscoder::canTranscodeTo(const unsigned int toCheck) const
  568. {
  569. //
  570. // If the passed value is really a surrogate embedded together, then
  571. // we need to break it out into its two chars. Else just one. While
  572. // we are ate it, convert them to UChar format if required.
  573. //
  574. UChar srcBuf[2];
  575. unsigned int srcCount = 1;
  576. if (toCheck & 0xFFFF0000)
  577. {
  578. srcBuf[0] = UChar((toCheck >> 10) + 0xD800);
  579. srcBuf[1] = UChar(toCheck & 0x3FF) + 0xDC00;
  580. srcCount++;
  581. }
  582. else
  583. {
  584. srcBuf[0] = UChar(toCheck);
  585. }
  586. //
  587. // Set the callback so that it will fail instead of using the rep char.
  588. // Remember the old one so we can put it back.
  589. //
  590. UErrorCode err = U_ZERO_ERROR;
  591. UConverterFromUCallback oldCB = NULL;
  592. #if (U_ICU_VERSION_MAJOR_NUM < 2)
  593. void* orgContent;
  594. #else
  595. const void* orgContent;
  596. #endif
  597. ucnv_setFromUCallBack
  598. (
  599. fConverter
  600. , UCNV_FROM_U_CALLBACK_STOP
  601. , NULL
  602. , &oldCB
  603. , &orgContent
  604. , &err
  605. );
  606. // Set upa temp buffer to format into. Make it more than big enough
  607. char tmpBuf[64];
  608. char* startTarget = tmpBuf;
  609. const UChar* startSrc = srcBuf;
  610. err = U_ZERO_ERROR;
  611. ucnv_fromUnicode
  612. (
  613. fConverter
  614. , &startTarget
  615. , startTarget + 64
  616. , &startSrc
  617. , srcBuf + srcCount
  618. , 0
  619. , false
  620. , &err
  621. );
  622. // Save the result before we overight the error code
  623. const bool res = (err == U_ZERO_ERROR);
  624. // Put the old handler back
  625. err = U_ZERO_ERROR;
  626. UConverterFromUCallback orgAction = NULL;
  627. ucnv_setFromUCallBack(fConverter, oldCB, NULL, &orgAction, &orgContent, &err);
  628. return res;
  629. }
  630. // ---------------------------------------------------------------------------
  631. // ICULCPTranscoder: Constructors and Destructor
  632. // ---------------------------------------------------------------------------
  633. ICULCPTranscoder::ICULCPTranscoder(UConverter* const toAdopt) :
  634. fConverter(toAdopt)
  635. {
  636. }
  637. ICULCPTranscoder::~ICULCPTranscoder()
  638. {
  639. // If there is a converter, ask ICU to clean it up
  640. if (fConverter)
  641. {
  642. // <TBD> Does this actually delete the structure???
  643. ucnv_close(fConverter);
  644. fConverter = 0;
  645. }
  646. }
  647. // ---------------------------------------------------------------------------
  648. // ICULCPTranscoder: Constructors and Destructor
  649. // ---------------------------------------------------------------------------
  650. unsigned int ICULCPTranscoder::calcRequiredSize(const XMLCh* const srcText)
  651. {
  652. if (!srcText)
  653. return 0;
  654. //
  655. // We do two different versions of this, according to whether XMLCh
  656. // is the same size as UChar or not.
  657. //
  658. UErrorCode err = U_ZERO_ERROR;
  659. int32_t targetCap;
  660. if (sizeof(XMLCh) == sizeof(UChar))
  661. {
  662. // Use a faux scope to synchronize while we do this
  663. {
  664. XMLMutexLock lockConverter(&fMutex);
  665. targetCap = ucnv_fromUChars
  666. (
  667. fConverter
  668. , 0
  669. , 0
  670. , (const UChar*)srcText
  671. , -1
  672. , &err
  673. );
  674. }
  675. }
  676. else
  677. {
  678. // Copy the source to a local temp
  679. UChar* tmpBuf = convertToUChar(srcText, 0, XMLPlatformUtils::fgMemoryManager);
  680. ArrayJanitor<UChar> janTmp(tmpBuf, XMLPlatformUtils::fgMemoryManager);
  681. // Use a faux scope to synchronize while we do this
  682. {
  683. XMLMutexLock lockConverter(&fMutex);
  684. targetCap = ucnv_fromUChars
  685. (
  686. fConverter
  687. , 0
  688. , 0
  689. , tmpBuf
  690. , -1
  691. , &err
  692. );
  693. }
  694. }
  695. if (err != U_BUFFER_OVERFLOW_ERROR)
  696. return 0;
  697. return (unsigned int)targetCap;
  698. }
  699. unsigned int ICULCPTranscoder::calcRequiredSize(const char* const srcText)
  700. {
  701. if (!srcText)
  702. return 0;
  703. int32_t targetCap;
  704. UErrorCode err = U_ZERO_ERROR;
  705. // Use a faux scope to synchronize while we do this
  706. {
  707. XMLMutexLock lockConverter(&fMutex);
  708. targetCap = ucnv_toUChars
  709. (
  710. fConverter
  711. , 0
  712. , 0
  713. , srcText
  714. , strlen(srcText)
  715. , &err
  716. );
  717. }
  718. if (err != U_BUFFER_OVERFLOW_ERROR)
  719. return 0;
  720. #if (U_ICU_VERSION_MAJOR_NUM < 2)
  721. // Subtract one since it includes the terminator space
  722. return (unsigned int)(targetCap - 1);
  723. #else
  724. // Starting ICU 2.0, this is fixed and all ICU String functions have consistent NUL-termination behavior.
  725. // The returned length is always the number of output UChar's, not counting an additional, terminating NUL.
  726. return (unsigned int)(targetCap);
  727. #endif
  728. }
  729. char* ICULCPTranscoder::transcode(const XMLCh* const toTranscode)
  730. {
  731. char* retBuf = 0;
  732. // Check for a couple of special cases
  733. if (!toTranscode)
  734. return retBuf;
  735. if (!*toTranscode)
  736. {
  737. retBuf = new char[1];
  738. retBuf[0] = 0;
  739. return retBuf;
  740. }
  741. //
  742. // Get the length of the source string since we'll have to use it in
  743. // a couple places below.
  744. //
  745. const unsigned int srcLen = XMLString::stringLen(toTranscode);
  746. //
  747. // If XMLCh and UChar are not the same size, then we have to make a
  748. // temp copy of the text to pass to ICU.
  749. //
  750. const UChar* actualSrc;
  751. UChar* ncActual = 0;
  752. if (sizeof(XMLCh) == sizeof(UChar))
  753. {
  754. actualSrc = (const UChar*)toTranscode;
  755. }
  756. else
  757. {
  758. // Allocate a non-const temp buf, but store it also in the actual
  759. ncActual = convertToUChar(toTranscode, 0, XMLPlatformUtils::fgMemoryManager);
  760. actualSrc = ncActual;
  761. }
  762. // Insure that the temp buffer, if any, gets cleaned up via the nc pointer
  763. ArrayJanitor<UChar> janTmp(ncActual, XMLPlatformUtils::fgMemoryManager);
  764. // Caculate a return buffer size not too big, but less likely to overflow
  765. int32_t targetLen = (int32_t)(srcLen * 1.25);
  766. // Allocate the return buffer
  767. retBuf = new char[targetLen + 1];
  768. //
  769. // Lock now while we call the converter. Use a faux block to do the
  770. // lock so that it unlocks immediately afterwards.
  771. //
  772. UErrorCode err = U_ZERO_ERROR;
  773. int32_t targetCap;
  774. {
  775. XMLMutexLock lockConverter(&fMutex);
  776. targetCap = ucnv_fromUChars
  777. (
  778. fConverter
  779. , retBuf
  780. , targetLen + 1
  781. , actualSrc
  782. , -1
  783. , &err
  784. );
  785. }
  786. // If targetLen is not enough then buffer overflow might occur
  787. if ((err == U_BUFFER_OVERFLOW_ERROR) || (err == U_STRING_NOT_TERMINATED_WARNING))
  788. {
  789. //
  790. // Reset the error, delete the old buffer, allocate a new one,
  791. // and try again.
  792. //
  793. err = U_ZERO_ERROR;
  794. delete [] retBuf;
  795. retBuf = new char[targetCap + 1];
  796. // Lock again before we retry
  797. XMLMutexLock lockConverter(&fMutex);
  798. targetCap = ucnv_fromUChars
  799. (
  800. fConverter
  801. , retBuf
  802. , targetCap
  803. , actualSrc
  804. , -1
  805. , &err
  806. );
  807. }
  808. if (U_FAILURE(err))
  809. {
  810. delete [] retBuf;
  811. return 0;
  812. }
  813. return retBuf;
  814. }
  815. char* ICULCPTranscoder::transcode(const XMLCh* const toTranscode,
  816. MemoryManager* const manager)
  817. {
  818. char* retBuf = 0;
  819. // Check for a couple of special cases
  820. if (!toTranscode)
  821. return retBuf;
  822. if (!*toTranscode)
  823. {
  824. retBuf = (char*) manager->allocate(sizeof(char));//new char[1];
  825. retBuf[0] = 0;
  826. return retBuf;
  827. }
  828. //
  829. // Get the length of the source string since we'll have to use it in
  830. // a couple places below.
  831. //
  832. const unsigned int srcLen = XMLString::stringLen(toTranscode);
  833. //
  834. // If XMLCh and UChar are not the same size, then we have to make a
  835. // temp copy of the text to pass to ICU.
  836. //
  837. const UChar* actualSrc;
  838. UChar* ncActual = 0;
  839. if (sizeof(XMLCh) == sizeof(UChar))
  840. {
  841. actualSrc = (const UChar*)toTranscode;
  842. }
  843. else
  844. {
  845. // Allocate a non-const temp buf, but store it also in the actual
  846. ncActual = convertToUChar(toTranscode, 0, manager);
  847. actualSrc = ncActual;
  848. }
  849. // Insure that the temp buffer, if any, gets cleaned up via the nc pointer
  850. ArrayJanitor<UChar> janTmp(ncActual, manager);
  851. // Caculate a return buffer size not too big, but less likely to overflow
  852. int32_t targetLen = (int32_t)(srcLen * 1.25);
  853. // Allocate the return buffer
  854. retBuf = (char*) manager->allocate((targetLen + 1) * sizeof(char));//new char[targetLen + 1];
  855. //
  856. // Lock now while we call the converter. Use a faux block to do the
  857. // lock so that it unlocks immediately afterwards.
  858. //
  859. UErrorCode err = U_ZERO_ERROR;
  860. int32_t targetCap;
  861. {
  862. XMLMutexLock lockConverter(&fMutex);
  863. targetCap = ucnv_fromUChars
  864. (
  865. fConverter
  866. , retBuf
  867. , targetLen + 1
  868. , actualSrc
  869. , -1
  870. , &err
  871. );
  872. }
  873. // If targetLen is not enough then buffer overflow might occur
  874. if ((err == U_BUFFER_OVERFLOW_ERROR) || (err == U_STRING_NOT_TERMINATED_WARNING))
  875. {
  876. //
  877. // Reset the error, delete the old buffer, allocate a new one,
  878. // and try again.
  879. //
  880. err = U_ZERO_ERROR;
  881. manager->deallocate(retBuf);//delete [] retBuf;
  882. retBuf = (char*) manager->allocate((targetCap + 1) * sizeof(char));//new char[targetCap + 1];
  883. // Lock again before we retry
  884. XMLMutexLock lockConverter(&fMutex);
  885. targetCap = ucnv_fromUChars
  886. (
  887. fConverter
  888. , retBuf
  889. , targetCap
  890. , actualSrc
  891. , -1
  892. , &err
  893. );
  894. }
  895. if (U_FAILURE(err))
  896. {
  897. manager->deallocate(retBuf);//delete [] retBuf;
  898. return 0;
  899. }
  900. return retBuf;
  901. }
  902. XMLCh* ICULCPTranscoder::transcode(const char* const toTranscode)
  903. {
  904. // Watch for a few pyscho corner cases
  905. if (!toTranscode)
  906. return 0;
  907. if (!*toTranscode)
  908. {
  909. XMLCh* retVal = new XMLCh[1];
  910. retVal[0] = 0;
  911. return retVal;
  912. }
  913. //
  914. // Get the length of the string to transcode. The Unicode string will
  915. // almost always be no more chars than were in the source, so this is
  916. // the best guess as to the storage needed.
  917. //
  918. const int32_t srcLen = (int32_t)strlen(toTranscode);
  919. // We need a target buffer of UChars to fill in
  920. UChar* targetBuf = 0;
  921. // Now lock while we do these calculations
  922. UErrorCode err = U_ZERO_ERROR;
  923. int32_t targetCap;
  924. {
  925. XMLMutexLock lockConverter(&fMutex);
  926. //
  927. // Here we don't know what the target length will be so use 0 and
  928. // expect an U_BUFFER_OVERFLOW_ERROR in which case it'd get resolved
  929. // by the correct capacity value.
  930. //
  931. targetCap = ucnv_toUChars
  932. (
  933. fConverter
  934. , 0
  935. , 0
  936. , toTranscode
  937. , srcLen
  938. , &err
  939. );
  940. if (err != U_BUFFER_OVERFLOW_ERROR)
  941. return 0;
  942. err = U_ZERO_ERROR;
  943. targetBuf = new UChar[targetCap + 1];
  944. ucnv_toUChars
  945. (
  946. fConverter
  947. , targetBuf
  948. , targetCap
  949. , toTranscode
  950. , srcLen
  951. , &err
  952. );
  953. }
  954. if (U_FAILURE(err))
  955. {
  956. // Clean up if we got anything allocated
  957. delete [] targetBuf;
  958. return 0;
  959. }
  960. // Cap it off to make sure
  961. targetBuf[targetCap] = 0;
  962. //
  963. // If XMLCh and UChar are the same size, then we can return retVal
  964. // as is. Else, we have to allocate another buffer and copy the data
  965. // over to it.
  966. //
  967. XMLCh* actualRet;
  968. if (sizeof(XMLCh) == sizeof(UChar))
  969. {
  970. actualRet = (XMLCh*)targetBuf;
  971. }
  972. else
  973. {
  974. actualRet = convertToXMLCh(targetBuf);
  975. delete [] targetBuf;
  976. }
  977. return actualRet;
  978. }
  979. XMLCh* ICULCPTranscoder::transcode(const char* const toTranscode,
  980. MemoryManager* const manager)
  981. {
  982. // Watch for a few pyscho corner cases
  983. if (!toTranscode)
  984. return 0;
  985. if (!*toTranscode)
  986. {
  987. XMLCh* retVal = (XMLCh*) manager->allocate(sizeof(XMLCh));//new XMLCh[1];
  988. retVal[0] = 0;
  989. return retVal;
  990. }
  991. //
  992. // Get the length of the string to transcode. The Unicode string will
  993. // almost always be no more chars than were in the source, so this is
  994. // the best guess as to the storage needed.
  995. //
  996. const int32_t srcLen = (int32_t)strlen(toTranscode);
  997. // We need a target buffer of UChars to fill in
  998. UChar* targetBuf = 0;
  999. // Now lock while we do these calculations
  1000. UErrorCode err = U_ZERO_ERROR;
  1001. int32_t targetCap;
  1002. {
  1003. XMLMutexLock lockConverter(&fMutex);
  1004. //
  1005. // Here we don't know what the target length will be so use 0 and
  1006. // expect an U_BUFFER_OVERFLOW_ERROR in which case it'd get resolved
  1007. // by the correct capacity value.
  1008. //
  1009. targetCap = ucnv_toUChars
  1010. (
  1011. fConverter
  1012. , 0
  1013. , 0
  1014. , toTranscode
  1015. , srcLen
  1016. , &err
  1017. );
  1018. if (err != U_BUFFER_OVERFLOW_ERROR)
  1019. return 0;
  1020. err = U_ZERO_ERROR;
  1021. targetBuf = (UChar*) manager->allocate((targetCap+1) * sizeof(UChar));//new UChar[targetCap + 1];
  1022. ucnv_toUChars
  1023. (
  1024. fConverter
  1025. , targetBuf
  1026. , targetCap
  1027. , toTranscode
  1028. , srcLen
  1029. , &err
  1030. );
  1031. }
  1032. if (U_FAILURE(err))
  1033. {
  1034. // Clean up if we got anything allocated
  1035. manager->deallocate(targetBuf);//delete [] targetBuf;
  1036. return 0;
  1037. }
  1038. // Cap it off to make sure
  1039. targetBuf[targetCap] = 0;
  1040. //
  1041. // If XMLCh and UChar are the same size, then we can return retVal
  1042. // as is. Else, we have to allocate another buffer and copy the data
  1043. // over to it.
  1044. //
  1045. XMLCh* actualRet;
  1046. if (sizeof(XMLCh) == sizeof(UChar))
  1047. {
  1048. actualRet = (XMLCh*)targetBuf;
  1049. }
  1050. else
  1051. {
  1052. actualRet = convertToXMLCh(targetBuf, manager);
  1053. manager->deallocate(targetBuf);//delete [] targetBuf;
  1054. }
  1055. return actualRet;
  1056. }
  1057. bool ICULCPTranscoder::transcode(const char* const toTranscode
  1058. , XMLCh* const toFill
  1059. , const unsigned int maxChars)
  1060. {
  1061. // Check for a couple of psycho corner cases
  1062. if (!toTranscode || !maxChars)
  1063. {
  1064. toFill[0] = 0;
  1065. return true;
  1066. }
  1067. if (!*toTranscode)
  1068. {
  1069. toFill[0] = 0;
  1070. return true;
  1071. }
  1072. // We'll need this in a couple of places below
  1073. const unsigned int srcLen = strlen(toTranscode);
  1074. //
  1075. // Set up the target buffer. If XMLCh and UChar are not the same size
  1076. // then we have to use a temp buffer and convert over.
  1077. //
  1078. UChar* targetBuf;
  1079. if (sizeof(XMLCh) == sizeof(UChar))
  1080. targetBuf = (UChar*)toFill;
  1081. else
  1082. targetBuf = (UChar*) XMLPlatformUtils::fgMemoryManager->allocate
  1083. (
  1084. (maxChars + 1) * sizeof(UChar)
  1085. );//new UChar[maxChars + 1];
  1086. //
  1087. // Use a faux block to enforce a lock on the converter, which will
  1088. // unlock immediately after its completed.
  1089. //
  1090. UErrorCode err = U_ZERO_ERROR;
  1091. {
  1092. XMLMutexLock lockConverter(&fMutex);
  1093. ucnv_toUChars
  1094. (
  1095. fConverter
  1096. , targetBuf
  1097. , maxChars + 1
  1098. , toTranscode
  1099. , srcLen
  1100. , &err
  1101. );
  1102. }
  1103. if (U_FAILURE(err))
  1104. {
  1105. if (targetBuf != (UChar*)toFill)
  1106. XMLPlatformUtils::fgMemoryManager->deallocate(targetBuf);//delete [] targetBuf;
  1107. return false;
  1108. }
  1109. // If the sizes are not the same, then copy the data over
  1110. if (sizeof(XMLCh) != sizeof(UChar))
  1111. {
  1112. UChar* srcPtr = targetBuf;
  1113. XMLCh* outPtr = toFill;
  1114. while (*srcPtr)
  1115. *outPtr++ = XMLCh(*srcPtr++);
  1116. *outPtr = 0;
  1117. // And delete the temp buffer
  1118. XMLPlatformUtils::fgMemoryManager->deallocate(targetBuf);//delete [] targetBuf;
  1119. }
  1120. return true;
  1121. }
  1122. bool ICULCPTranscoder::transcode( const XMLCh* const toTranscode
  1123. , char* const toFill
  1124. , const unsigned int maxChars)
  1125. {
  1126. // Watch for a few psycho corner cases
  1127. if (!toTranscode || !maxChars)
  1128. {
  1129. toFill[0] = 0;
  1130. return true;
  1131. }
  1132. if (!*toTranscode)
  1133. {
  1134. toFill[0] = 0;
  1135. return true;
  1136. }
  1137. //
  1138. // If XMLCh and UChar are not the same size, then we have to make a
  1139. // temp copy of the text to pass to ICU.
  1140. //
  1141. const UChar* actualSrc;
  1142. UChar* ncActual = 0;
  1143. if (sizeof(XMLCh) == sizeof(UChar))
  1144. {
  1145. actualSrc = (const UChar*)toTranscode;
  1146. }
  1147. else
  1148. {
  1149. // Allocate a non-const temp buf, but store it also in the actual
  1150. ncActual = convertToUChar(toTranscode, 0, XMLPlatformUtils::fgMemoryManager);
  1151. actualSrc = ncActual;
  1152. }
  1153. // Insure that the temp buffer, if any, gets cleaned up via the nc pointer
  1154. ArrayJanitor<UChar> janTmp(ncActual, XMLPlatformUtils::fgMemoryManager);
  1155. //
  1156. // Use a faux block to enforce a lock on the converter while we do this.
  1157. // It will be released immediately after its done.
  1158. //
  1159. UErrorCode err = U_ZERO_ERROR;
  1160. int32_t targetCap;
  1161. {
  1162. XMLMutexLock lockConverter(&fMutex);
  1163. targetCap = ucnv_fromUChars
  1164. (
  1165. fConverter
  1166. , toFill
  1167. , maxChars
  1168. , actualSrc
  1169. , -1
  1170. , &err
  1171. );
  1172. }
  1173. if (U_FAILURE(err))
  1174. return false;
  1175. toFill[targetCap] = 0;
  1176. return true;
  1177. }
  1178. XERCES_CPP_NAMESPACE_END