PageRenderTime 54ms CodeModel.GetById 25ms RepoModel.GetById 0ms app.codeStats 1ms

/XQilla-2.2.4/src/utils/XPath2Utils.cpp

#
C++ | 354 lines | 265 code | 52 blank | 37 comment | 78 complexity | 575dbcd563f8bbb0e244c2ab7ea4c9e3 MD5 | raw file
Possible License(s): Apache-2.0
  1. /*
  2. * Copyright (c) 2001-2008
  3. * DecisionSoft Limited. All rights reserved.
  4. * Copyright (c) 2004-2008
  5. * Oracle. All rights reserved.
  6. *
  7. * Licensed under the Apache License, Version 2.0 (the "License");
  8. * you may not use this file except in compliance with the License.
  9. * You may obtain a copy of the License at
  10. *
  11. * http://www.apache.org/licenses/LICENSE-2.0
  12. *
  13. * Unless required by applicable law or agreed to in writing, software
  14. * distributed under the License is distributed on an "AS IS" BASIS,
  15. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16. * See the License for the specific language governing permissions and
  17. * limitations under the License.
  18. *
  19. * $Id$
  20. */
  21. #include "../config/xqilla_config.h"
  22. #include <assert.h>
  23. #include <xercesc/util/XMLString.hpp>
  24. #include <xercesc/util/XMLUniDefs.hpp>
  25. #include <xercesc/util/XMLUri.hpp>
  26. #include <xercesc/framework/XMLBuffer.hpp>
  27. #include <xercesc/util/TranscodingException.hpp>
  28. #include <xercesc/util/BinInputStream.hpp>
  29. #include <xercesc/util/TransService.hpp>
  30. #include <xercesc/framework/XMLRecognizer.hpp>
  31. #include <xercesc/sax/InputSource.hpp>
  32. #include <xercesc/util/regx/RegularExpression.hpp>
  33. #include <xercesc/util/regx/Match.hpp>
  34. #include <xqilla/framework/XPath2MemoryManager.hpp>
  35. #include <xqilla/utils/XPath2Utils.hpp>
  36. #include <xqilla/utils/XPath2NSUtils.hpp>
  37. #include "../exceptions/MiscException.hpp"
  38. #include <xqilla/runtime/Sequence.hpp>
  39. #include <xqilla/items/Node.hpp>
  40. #include <xqilla/mapm/m_apm.h>
  41. #if defined(WIN32) && !defined(__CYGWIN__)
  42. #define snprintf _snprintf
  43. #endif
  44. XERCES_CPP_NAMESPACE_USE;
  45. const XMLCh* XPath2Utils::escapeURI(const XMLCh* const str, bool escapeRes, XPath2MemoryManager* memMgr)
  46. {
  47. if(XMLString::stringLen(str) == 0) {
  48. return 0;
  49. }
  50. XMLBuffer buf(1023, memMgr);
  51. const XMLCh *cptr;
  52. for (cptr = str; *cptr; cptr++) {
  53. // here we compare 16-bit XMLCh's to zero-padded 8-bit chars, so it
  54. // magically works
  55. if ((*cptr >= 'A' && *cptr <= 'Z') ||
  56. (*cptr >= 'a' && *cptr <= 'z') ||
  57. (*cptr >= '0' && *cptr <= '9') ||
  58. *cptr == '%' || *cptr == '#' ||
  59. *cptr == '-' || *cptr == '_' || *cptr == '.' ||
  60. *cptr == '!' || *cptr == '~' || *cptr == '*' ||
  61. *cptr == '\''|| *cptr == '(' || *cptr == ')' ||
  62. (!escapeRes &&
  63. (*cptr == ';' || *cptr == '/' || *cptr == '?' ||
  64. *cptr == ':' || *cptr == '@' || *cptr == '&' ||
  65. *cptr == '=' || *cptr == '+' || *cptr == '$' ||
  66. *cptr == ',' || *cptr == '[' || *cptr == ']' ))) {
  67. buf.append(*cptr);
  68. } else {
  69. buf.append(X("%"));
  70. // if the upper 8 bits of the XMLCh are less than 10 (covers all of 8-bit ascii)
  71. if ((*cptr >> 4) < 10)
  72. buf.append((XMLCh)('0' + (*cptr >> 4)));
  73. else // covers extended ascii
  74. buf.append((XMLCh)('A' - 10 + (*cptr >> 4)));
  75. // bitwise-and the 16-bit XMLCh with octal F, 11111111
  76. // we are checking the lower 8-bits
  77. if ((*cptr & 0xF) < 10)
  78. buf.append((XMLCh)('0' + (*cptr & 0xF)));
  79. else
  80. buf.append((XMLCh)('A' - 10 + (*cptr & 0xF)));
  81. }
  82. }//for
  83. return memMgr->getPooledString(buf.getRawBuffer());
  84. }
  85. bool XPath2Utils::isValidURI(const XMLCh* const str, MemoryManager* memMgr)
  86. {
  87. // XMLSchema specs say: "Spaces are, in principle, allowed in the lexical space of anyURI, however,
  88. // their use is highly discouraged (unless they are encoded by %20)"
  89. // Xerces complains if a space is found, so let's encode it
  90. const XMLCh escSpace[]={ chPercent, chDigit_2, chDigit_0, chNull };
  91. XMLBuffer buff(1023, memMgr);
  92. const XMLCh* pCursor=str;
  93. while(*pCursor)
  94. {
  95. if(*pCursor==chSpace)
  96. buff.append(escSpace);
  97. else
  98. buff.append(*pCursor);
  99. pCursor++;
  100. }
  101. return XMLUri::isValidURI(true, buff.getRawBuffer());
  102. }
  103. const XMLCh* XPath2Utils::concatStrings(const XMLCh* src1, const XMLCh src, XPath2MemoryManager* memMgr) {
  104. XMLCh dummy[2] = {src, chNull};
  105. return XPath2Utils::concatStrings(src1, dummy, memMgr);
  106. }
  107. const XMLCh* XPath2Utils::concatStrings(const XMLCh* src1, const XMLCh* src2, XPath2MemoryManager* memMgr) {
  108. XMLBuffer buffer(1023, memMgr);
  109. buffer.set(src1);
  110. buffer.append(src2);
  111. return memMgr->getPooledString(buffer.getRawBuffer());
  112. }
  113. const XMLCh* XPath2Utils::concatStrings(const XMLCh* src1, const XMLCh* src2, const XMLCh* src3, XPath2MemoryManager* memMgr) {
  114. XMLBuffer buffer(1023, memMgr);
  115. buffer.set(src1);
  116. buffer.append(src2);
  117. buffer.append(src3);
  118. return memMgr->getPooledString(buffer.getRawBuffer());
  119. }
  120. const XMLCh* XPath2Utils::asStr(const XMLCh ch, XPath2MemoryManager* memMgr) {
  121. XMLCh newStr[2] = {ch, chNull};
  122. return memMgr->getPooledString(newStr);
  123. }
  124. const XMLCh* XPath2Utils::subString(const XMLCh* src, unsigned int offset, unsigned int count, XPath2MemoryManager* memMgr) {
  125. if (src == 0) {
  126. XQThrow2(MiscException,X("XPath2Utils:subString"),X("Cannot take substring of null string"));
  127. }
  128. AutoDeleteArray<XMLCh> newStr(new XMLCh [ count + 1 ]);
  129. XMLString::subString(newStr, src, offset, offset + count);
  130. return memMgr->getPooledString(newStr);
  131. }
  132. const XMLCh* XPath2Utils::deleteData( const XMLCh* const target, unsigned int offset, unsigned int count, XPath2MemoryManager* memMgr) {
  133. if (target == 0) {
  134. return 0;
  135. }
  136. unsigned int targetSize = uintStrlen(target);
  137. unsigned int newSize = targetSize - count;
  138. AutoDeleteArray<XMLCh> stringGuard(new XMLCh [newSize + 1]);
  139. XMLCh *newString = stringGuard;
  140. XMLString::copyNString(newString, target, offset);// * sizeof(XMLCh));
  141. XMLString::copyNString(newString + offset, target + offset + count, ( targetSize - offset - count));//*sizeof(XMLCh));
  142. newString[newSize] = 0;
  143. const XMLCh* retval = memMgr->getPooledString(newString);
  144. return retval;
  145. }
  146. const XMLCh* XPath2Utils::toLower( const XMLCh* const target, XPath2MemoryManager* memMgr) {
  147. XMLCh *newStr = XMLString::replicate(target);
  148. XMLString::lowerCase(newStr);
  149. const XMLCh* retval=memMgr->getPooledString(newStr);
  150. XMLString::release(&newStr);
  151. return retval;
  152. }
  153. const XMLCh* XPath2Utils::toUpper( const XMLCh* const target, XPath2MemoryManager* memMgr) {
  154. XMLCh *newStr = XMLString::replicate(target);
  155. XMLString::upperCase(newStr);
  156. const XMLCh* retval=memMgr->getPooledString(newStr);
  157. XMLString::release(&newStr);
  158. return retval;
  159. }
  160. const XMLCh* XPath2Utils::toCollapsedWS(const XMLCh* const target, XPath2MemoryManager* memMgr) {
  161. XMLCh *newStr = XMLString::replicate(target);
  162. XMLString::collapseWS(newStr);
  163. const XMLCh* retval=memMgr->getPooledString(newStr);
  164. XMLString::release(&newStr);
  165. return retval;
  166. }
  167. const XMLCh* XPath2Utils::normalizeEOL(const XMLCh* const src, XPath2MemoryManager* memMgr) {
  168. int len=intStrlen(src);
  169. int j=0;
  170. XMLCh* dst=(XMLCh*)memMgr->allocate((len+1)*sizeof(XMLCh*));
  171. // A.2.3 End-of-Line Handling
  172. // For [XML 1.0] processing, all of the following must be translated to a single #xA character:
  173. // 1. the two-character sequence #xD #xA
  174. // 2. any #xD character that is not immediately followed by #xA.
  175. for(int i=0;i<len;i++)
  176. {
  177. if (src[i]== chCR && i<len && src[i+1]== chLF)
  178. {
  179. dst[j++]=chLF;
  180. i++;
  181. }
  182. else if(src[i]== chCR)
  183. dst[j++]=chLF;
  184. else
  185. dst[j++]=src[i];
  186. }
  187. dst[j++]=0;
  188. return dst;
  189. }
  190. std::vector<const XMLCh*> XPath2Utils::getVal(const XMLCh* values, XPath2MemoryManager* memMgr){
  191. std::vector<const XMLCh*> valList;
  192. bool munchWS = true; // munch initial WS
  193. unsigned int start = 0;
  194. /* XPath requires this bizarre WS separated splitting of the string, as the
  195. string can hold many id's. */
  196. int valuesLen = intStrlen(values);
  197. for(int i = 0; i < valuesLen; i++) {
  198. switch(values[i]) {
  199. case 0x0020:
  200. case 0x0009:
  201. case 0x000d:
  202. case 0x000a: {
  203. if(!munchWS) {
  204. munchWS = true;
  205. // Found the end of a value, so add it to a vector.
  206. valList.push_back(subString(values, start, i-start, memMgr));
  207. }
  208. break;
  209. }
  210. default: {
  211. if(munchWS) {
  212. //found the start of an ID
  213. start = i;
  214. munchWS = false;
  215. }
  216. }
  217. }
  218. }
  219. if(!munchWS) {
  220. // Found the end of a value, so add it to the list of values.
  221. valList.push_back(subString(values, start, valuesLen-start, memMgr));
  222. }
  223. return valList;
  224. }
  225. bool XPath2Utils::containsString(std::vector<const XMLCh*> values, const XMLCh* val) {
  226. for (std::vector<const XMLCh*>::const_iterator it=values.begin();it!=values.end();it++)
  227. if (XPath2Utils::equals(val, *it))
  228. return true;
  229. return false;
  230. }
  231. #define BUFFER_SIZE 1024
  232. bool XPath2Utils::readSource(const InputSource &src, MemoryManager *mm, XMLBuffer &result, bool sniffEncoding)
  233. {
  234. BinInputStream *stream = src.makeStream();
  235. if(stream == NULL) return false;
  236. Janitor<BinInputStream> janStream(stream);
  237. readSource(stream, mm, result, src.getEncoding(), sniffEncoding);
  238. return true;
  239. }
  240. void XPath2Utils::readSource(BinInputStream *stream, MemoryManager *mm, XMLBuffer &result,
  241. const XMLCh *encoding, bool sniffEncoding)
  242. {
  243. XMLByte buffer[BUFFER_SIZE];
  244. XercesSizeUint nRead = 0;
  245. Janitor<XMLTranscoder> transcoder(NULL);
  246. XMLTransService::Codes retCode;
  247. if(encoding) {
  248. transcoder.reset(XMLPlatformUtils::fgTransService->makeNewTranscoderFor(encoding, retCode, BUFFER_SIZE, mm));
  249. if(transcoder.get() == 0) {
  250. ThrowXMLwithMemMgr1(TranscodingException, XMLExcepts::Trans_CantCreateCvtrFor, encoding, mm);
  251. }
  252. }
  253. else {
  254. #ifdef HAVE_GETCONTENTTYPE
  255. RegularExpression charsetRE(".*; *charset=([^ ;]*|\"[^\"]*\").*", "iH", mm);
  256. if(charsetRE.matches(stream->getContentType(), mm)) {
  257. XMLCh *charset = charsetRE.replace(stream->getContentType(), X("$1"), mm);
  258. AutoDeallocate<XMLCh> charsetGuard(charset, mm);
  259. if(*charset == '"') {
  260. // Trim the quotes
  261. charset += 1;
  262. *(charset + XMLString::stringLen(charset) - 1) = 0;
  263. }
  264. transcoder.reset(XMLPlatformUtils::fgTransService->makeNewTranscoderFor(charset, retCode, BUFFER_SIZE, mm));
  265. if(transcoder.get() == 0) {
  266. ThrowXMLwithMemMgr1(TranscodingException, XMLExcepts::Trans_CantCreateCvtrFor, charset, mm);
  267. }
  268. }
  269. else
  270. #endif
  271. if(sniffEncoding) {
  272. // TBD make this better by using an XQuery specific encoding sniffer - jpcs
  273. nRead = stream->readBytes(buffer, BUFFER_SIZE);
  274. XMLRecognizer::Encodings encoding = XMLRecognizer::basicEncodingProbe(buffer, BUFFER_SIZE);
  275. transcoder.reset(XMLPlatformUtils::fgTransService->makeNewTranscoderFor(encoding, retCode, BUFFER_SIZE, mm));
  276. }
  277. else {
  278. transcoder.reset(XMLPlatformUtils::fgTransService->makeNewTranscoderFor("UTF-8", retCode, BUFFER_SIZE, mm));
  279. }
  280. }
  281. XMLCh tempBuff[BUFFER_SIZE];
  282. unsigned char charSizes[BUFFER_SIZE];
  283. XercesSizeUint bytesEaten = 0, nOffset = 0;
  284. XercesSizeUint nCount;
  285. do {
  286. nCount = transcoder->transcodeFrom(buffer, nRead, tempBuff, BUFFER_SIZE, bytesEaten, charSizes);
  287. if(nCount) result.append(tempBuff, nCount);
  288. if(bytesEaten < nRead){
  289. nOffset = nRead - bytesEaten;
  290. memmove(buffer, buffer + bytesEaten, nOffset);
  291. }
  292. nRead = stream->readBytes(buffer + nOffset, BUFFER_SIZE - nOffset);
  293. if(nRead == 0 && nCount == 0) break;
  294. nRead += nOffset;
  295. } while(nRead > 0);
  296. }