/Firebird-2.5.1.26351-0/src/jrd/Collation.cpp

# · C++ · 948 lines · 683 code · 129 blank · 136 comment · 170 complexity · c3c342d9a2a41de19f1dbd10be298735 MD5 · raw file

  1. /*
  2. * PROGRAM: JRD Intl
  3. * MODULE: Collation.cpp
  4. * DESCRIPTION: International text support routines
  5. *
  6. * copyright (c) 1992, 1993 by Borland International
  7. */
  8. /************* history ************
  9. *
  10. * COMPONENT: JRD MODULE: INTL.CPP
  11. * generated by Marion V2.5 2/6/90
  12. * from dev db on 4-JAN-1995
  13. *****************************************************************
  14. *
  15. * PR 2002-06-02 Added ugly c hack in
  16. * intl_back_compat_alloc_func_lookup.
  17. * When someone has time we need to change the references to
  18. * return (void*) function to something more C++ like
  19. *
  20. * 42 4711 3 11 17 tamlin 2001
  21. * Added silly numbers before my name, and converted it to C++.
  22. *
  23. * 18850 daves 4-JAN-1995
  24. * Fix gds__alloc usage
  25. *
  26. * 18837 deej 31-DEC-1994
  27. * fixing up HARBOR_MERGE
  28. *
  29. * 18821 deej 27-DEC-1994
  30. * HARBOR MERGE
  31. *
  32. * 18789 jdavid 19-DEC-1994
  33. * Cast some functions
  34. *
  35. * 17508 jdavid 15-JUL-1994
  36. * Bring it up to date
  37. *
  38. * 17500 daves 13-JUL-1994
  39. * Bug 6645: Different calculation of partial keys
  40. *
  41. * 17202 katz 24-MAY-1994
  42. * PC_PLATFORM requires the .dll extension
  43. *
  44. * 17191 katz 23-MAY-1994
  45. * OS/2 requires the .dll extension
  46. *
  47. * 17180 katz 23-MAY-1994
  48. * Define location of DLL on OS/2
  49. *
  50. * 17149 katz 20-MAY-1994
  51. * In JRD, isc_arg_number arguments are SLONG's not int's
  52. *
  53. * 16633 daves 19-APR-1994
  54. * Bug 6202: International licensing uses INTERNATIONAL product code
  55. *
  56. * 16555 katz 17-APR-1994
  57. * The last argument of calls to ERR_post should be 0
  58. *
  59. * 16521 katz 14-APR-1994
  60. * Borland C needs a decorated symbol to lookup
  61. *
  62. * 16403 daves 8-APR-1994
  63. * Bug 6441: Emit an error whenever transliteration from ttype_binary attempted
  64. *
  65. * 16141 katz 28-MAR-1994
  66. * Don't declare return value from ISC_lookup_entrypoint as API_ROUTINE
  67. *
  68. * The contents of this file are subject to the Interbase Public
  69. * License Version 1.0 (the "License"); you may not use this file
  70. * except in compliance with the License. You may obtain a copy
  71. * of the License at http://www.Inprise.com/IPL.html
  72. *
  73. * Software distributed under the License is distributed on an
  74. * "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express
  75. * or implied. See the License for the specific language governing
  76. * rights and limitations under the License.
  77. *
  78. * The Original Code was created by Inprise Corporation
  79. * and its predecessors. Portions created by Inprise Corporation are
  80. * Copyright (C) Inprise Corporation.
  81. *
  82. * All Rights Reserved.
  83. * Contributor(s): ______________________________________.
  84. *
  85. * 2002.10.29 Sean Leyne - Removed obsolete "Netware" port
  86. *
  87. * 2002.10.30 Sean Leyne - Removed support for obsolete "PC_PLATFORM" define
  88. *
  89. * 2006.10.10 Adriano dos Santos Fernandes - refactored from intl.cpp
  90. *
  91. */
  92. #include "firebird.h"
  93. #include "gen/iberror.h"
  94. #include "../jrd/jrd.h"
  95. #include "../jrd/err_proto.h"
  96. #include "../jrd/evl_string.h"
  97. #include "../jrd/intl_classes.h"
  98. #include "../jrd/lck_proto.h"
  99. #include "../jrd/intl_classes.h"
  100. #include "../jrd/TextType.h"
  101. #include "../jrd/SimilarToMatcher.h"
  102. using namespace Jrd;
  103. namespace {
  104. // constants used in matches and sleuth
  105. const int CHAR_GDML_MATCH_ONE = TextType::CHAR_QUESTION_MARK;
  106. const int CHAR_GDML_MATCH_ANY = TextType::CHAR_ASTERISK;
  107. const int CHAR_GDML_QUOTE = TextType::CHAR_AT;
  108. const int CHAR_GDML_NOT = TextType::CHAR_TILDE;
  109. const int CHAR_GDML_RANGE = TextType::CHAR_MINUS;
  110. const int CHAR_GDML_CLASS_START = TextType::CHAR_OPEN_BRACKET;
  111. const int CHAR_GDML_CLASS_END = TextType::CHAR_CLOSE_BRACKET;
  112. const int CHAR_GDML_SUBSTITUTE = TextType::CHAR_EQUAL;
  113. const int CHAR_GDML_FLAG_SET = TextType::CHAR_PLUS;
  114. const int CHAR_GDML_FLAG_CLEAR = TextType::CHAR_MINUS;
  115. const int CHAR_GDML_COMMA = TextType::CHAR_COMMA;
  116. const int CHAR_GDML_LPAREN = TextType::CHAR_OPEN_PAREN;
  117. const int CHAR_GDML_RPAREN = TextType::CHAR_CLOSE_PAREN;
  118. static const UCHAR SLEUTH_SPECIAL[128] =
  119. {
  120. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  121. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  122. 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, // $%*+- (dollar, percent, star, plus, minus)
  123. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, // ? (question)
  124. 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // @ (at-sign)
  125. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, // [ (open square)
  126. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  127. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0 // ~ (tilde)
  128. };
  129. // Below are templates for functions used in Collation implementation
  130. template <typename StrConverter, typename CharType>
  131. class LikeMatcher : public PatternMatcher
  132. {
  133. public:
  134. LikeMatcher(MemoryPool& pool, TextType* ttype, const CharType* str, SLONG str_len,
  135. CharType escape, bool use_escape, CharType sql_match_any, CharType sql_match_one)
  136. : PatternMatcher(pool, ttype),
  137. evaluator(pool, str, str_len, escape, use_escape, sql_match_any, sql_match_one)
  138. {
  139. }
  140. void reset()
  141. {
  142. evaluator.reset();
  143. }
  144. bool result()
  145. {
  146. return evaluator.getResult();
  147. }
  148. bool process(const UCHAR* str, SLONG length)
  149. {
  150. StrConverter cvt(pool, textType, str, length);
  151. fb_assert(length % sizeof(CharType) == 0);
  152. return evaluator.processNextChunk(
  153. reinterpret_cast<const CharType*>(str), length / sizeof(CharType));
  154. }
  155. static LikeMatcher* create(MemoryPool& pool, TextType* ttype, const UCHAR* str,
  156. SLONG length, const UCHAR* escape, SLONG escape_length,
  157. const UCHAR* sql_match_any, SLONG match_any_length,
  158. const UCHAR* sql_match_one, SLONG match_one_length)
  159. {
  160. StrConverter cvt(pool, ttype, str, length),
  161. cvt_escape(pool, ttype, escape, escape_length),
  162. cvt_match_any(pool, ttype, sql_match_any, match_any_length),
  163. cvt_match_one(pool, ttype, sql_match_one, match_one_length);
  164. fb_assert(length % sizeof(CharType) == 0);
  165. return FB_NEW(pool) LikeMatcher(pool, ttype,
  166. reinterpret_cast<const CharType*>(str), length / sizeof(CharType),
  167. (escape ? *reinterpret_cast<const CharType*>(escape) : 0), escape_length != 0,
  168. *reinterpret_cast<const CharType*>(sql_match_any),
  169. *reinterpret_cast<const CharType*>(sql_match_one));
  170. }
  171. static bool evaluate(MemoryPool& pool, TextType* ttype, const UCHAR* s, SLONG sl,
  172. const UCHAR* p, SLONG pl, const UCHAR* escape, SLONG escape_length,
  173. const UCHAR* sql_match_any, SLONG match_any_length,
  174. const UCHAR* sql_match_one, SLONG match_one_length)
  175. {
  176. StrConverter cvt1(pool, ttype, p, pl),
  177. cvt2(pool, ttype, s, sl),
  178. cvt_escape(pool, ttype, escape, escape_length),
  179. cvt_match_any(pool, ttype, sql_match_any, match_any_length),
  180. cvt_match_one(pool, ttype, sql_match_one, match_one_length);
  181. fb_assert(pl % sizeof(CharType) == 0);
  182. fb_assert(sl % sizeof(CharType) == 0);
  183. Firebird::LikeEvaluator<CharType> evaluator(pool,
  184. reinterpret_cast<const CharType*>(p), pl / sizeof(CharType),
  185. (escape ? *reinterpret_cast<const CharType*>(escape) : 0), escape_length != 0,
  186. *reinterpret_cast<const CharType*>(sql_match_any),
  187. *reinterpret_cast<const CharType*>(sql_match_one));
  188. evaluator.processNextChunk(reinterpret_cast<const CharType*>(s), sl / sizeof(CharType));
  189. return evaluator.getResult();
  190. }
  191. private:
  192. Firebird::LikeEvaluator<CharType> evaluator;
  193. };
  194. template <typename StrConverter, typename CharType>
  195. class StartsMatcher : public PatternMatcher
  196. {
  197. public:
  198. StartsMatcher(MemoryPool& pool, TextType* ttype, const CharType* str, SLONG str_len)
  199. : PatternMatcher(pool, ttype),
  200. evaluator(pool, str, str_len)
  201. {
  202. }
  203. void reset()
  204. {
  205. evaluator.reset();
  206. }
  207. bool result()
  208. {
  209. return evaluator.getResult();
  210. }
  211. bool process(const UCHAR* str, SLONG length)
  212. {
  213. StrConverter cvt(pool, textType, str, length);
  214. fb_assert(length % sizeof(CharType) == 0);
  215. return evaluator.processNextChunk(
  216. reinterpret_cast<const CharType*>(str), length / sizeof(CharType));
  217. }
  218. static StartsMatcher* create(MemoryPool& pool, TextType* ttype,
  219. const UCHAR* str, SLONG length)
  220. {
  221. StrConverter cvt(pool, ttype, str, length);
  222. fb_assert(length % sizeof(CharType) == 0);
  223. return FB_NEW(pool) StartsMatcher(pool, ttype,
  224. reinterpret_cast<const CharType*>(str), length / sizeof(CharType));
  225. }
  226. static bool evaluate(MemoryPool& pool, TextType* ttype, const UCHAR* s, SLONG sl,
  227. const UCHAR* p, SLONG pl)
  228. {
  229. StrConverter cvt1(pool, ttype, p, pl);
  230. StrConverter cvt2(pool, ttype, s, sl);
  231. fb_assert(pl % sizeof(CharType) == 0);
  232. fb_assert(sl % sizeof(CharType) == 0);
  233. Firebird::StartsEvaluator<CharType> evaluator(pool,
  234. reinterpret_cast<const CharType*>(p), pl / sizeof(CharType));
  235. evaluator.processNextChunk(reinterpret_cast<const CharType*>(s), sl / sizeof(CharType));
  236. return evaluator.getResult();
  237. }
  238. private:
  239. Firebird::StartsEvaluator<CharType> evaluator;
  240. };
  241. template <typename StrConverter, typename CharType>
  242. class ContainsMatcher : public PatternMatcher
  243. {
  244. public:
  245. ContainsMatcher(MemoryPool& pool, TextType* ttype, const CharType* str, SLONG str_len)
  246. : PatternMatcher(pool, ttype),
  247. evaluator(pool, str, str_len)
  248. {
  249. }
  250. void reset()
  251. {
  252. evaluator.reset();
  253. }
  254. bool result()
  255. {
  256. return evaluator.getResult();
  257. }
  258. bool process(const UCHAR* str, SLONG length)
  259. {
  260. StrConverter cvt(pool, textType, str, length);
  261. fb_assert(length % sizeof(CharType) == 0);
  262. return evaluator.processNextChunk(
  263. reinterpret_cast<const CharType*>(str), length / sizeof(CharType));
  264. }
  265. static ContainsMatcher* create(MemoryPool& pool, TextType* ttype, const UCHAR* str, SLONG length)
  266. {
  267. StrConverter cvt(pool, ttype, str, length);
  268. fb_assert(length % sizeof(CharType) == 0);
  269. return FB_NEW(pool) ContainsMatcher(pool, ttype,
  270. reinterpret_cast<const CharType*>(str), length / sizeof(CharType));
  271. }
  272. static bool evaluate(MemoryPool& pool, TextType* ttype, const UCHAR* s, SLONG sl,
  273. const UCHAR* p, SLONG pl)
  274. {
  275. StrConverter cvt1(pool, ttype, p, pl);
  276. StrConverter cvt2(pool, ttype, s, sl);
  277. fb_assert(pl % sizeof(CharType) == 0);
  278. fb_assert(sl % sizeof(CharType) == 0);
  279. Firebird::ContainsEvaluator<CharType> evaluator(pool,
  280. reinterpret_cast<const CharType*>(p), pl / sizeof(CharType));
  281. evaluator.processNextChunk(reinterpret_cast<const CharType*>(s), sl / sizeof(CharType));
  282. return evaluator.getResult();
  283. }
  284. private:
  285. Firebird::ContainsEvaluator<CharType> evaluator;
  286. };
  287. template <typename StrConverter, typename CharType>
  288. class MatchesMatcher
  289. {
  290. public:
  291. static bool evaluate(MemoryPool& pool, TextType* ttype, const UCHAR* s, SLONG sl,
  292. const UCHAR* p, SLONG pl)
  293. {
  294. StrConverter cvt1(pool, ttype, p, pl);
  295. StrConverter cvt2(pool, ttype, s, sl);
  296. fb_assert(pl % sizeof(CharType) == 0);
  297. fb_assert(sl % sizeof(CharType) == 0);
  298. return matches(pool, ttype, reinterpret_cast<const CharType*>(s), sl,
  299. reinterpret_cast<const CharType*>(p), pl);
  300. }
  301. private:
  302. // Return true if a string (p1, l1) matches a given pattern (p2, l2).
  303. // The character '?' in the pattern may match any single character
  304. // in the the string, and the character '*' may match any sequence
  305. // of characters.
  306. //
  307. // Wide SCHAR version operates on short-based buffer,
  308. // instead of SCHAR-based.
  309. //
  310. // Matches is not a case-sensitive operation, thus it has no
  311. // 8-bit international impact.
  312. static bool matches(MemoryPool& pool, Jrd::TextType* obj, const CharType* p1,
  313. SLONG l1_bytes, const CharType* p2, SLONG l2_bytes)
  314. {
  315. fb_assert(p1 != NULL);
  316. fb_assert(p2 != NULL);
  317. fb_assert((l1_bytes % sizeof(CharType)) == 0);
  318. fb_assert((l2_bytes % sizeof(CharType)) == 0);
  319. fb_assert((obj->getCanonicalWidth() == sizeof(CharType)));
  320. SLONG l1 = l1_bytes / sizeof(CharType);
  321. SLONG l2 = l2_bytes / sizeof(CharType);
  322. while (l2-- > 0)
  323. {
  324. const CharType c = *p2++;
  325. if (c == *(CharType*) obj->getCanonicalChar(CHAR_GDML_MATCH_ANY))
  326. {
  327. while ((l2 > 0) && (*p2 == *(CharType*) obj->getCanonicalChar(CHAR_GDML_MATCH_ANY)))
  328. {
  329. l2--;
  330. p2++;
  331. }
  332. if (l2 == 0)
  333. return true;
  334. while (l1)
  335. {
  336. if (matches(pool, obj, p1++, l1-- * sizeof(CharType), p2, l2 * sizeof(CharType)))
  337. return true;
  338. }
  339. return false;
  340. }
  341. if (l1-- == 0)
  342. return false;
  343. if (c != *(CharType*) obj->getCanonicalChar(CHAR_GDML_MATCH_ONE) && c != *p1)
  344. return false;
  345. p1++;
  346. }
  347. return !l1;
  348. }
  349. };
  350. template <typename StrConverter, typename CharType>
  351. class SleuthMatcher
  352. {
  353. public:
  354. // Evaluate the "sleuth" search operator.
  355. // Turn the (pointer, byte length) input parameters into
  356. // (pointer, end_pointer) for use in aux function
  357. static bool check(MemoryPool& pool, TextType* ttype, USHORT flags,
  358. const UCHAR* search, SLONG search_len, const UCHAR* match, SLONG match_len)
  359. {
  360. StrConverter cvt1(pool, ttype, search, search_len);//, cvt2(pool, ttype, match, match_len);
  361. fb_assert((match_len % sizeof(CharType)) == 0);
  362. fb_assert((search_len % sizeof(CharType)) == 0);
  363. fb_assert(ttype->getCanonicalWidth() == sizeof(CharType));
  364. const CharType* const end_match =
  365. reinterpret_cast<const CharType*>(match) + (match_len / sizeof(CharType));
  366. const CharType* const end_search =
  367. reinterpret_cast<const CharType*>(search) + (search_len / sizeof(CharType));
  368. return aux(ttype, flags, reinterpret_cast<const CharType*>(search),
  369. end_search, reinterpret_cast<const CharType*>(match), end_match);
  370. }
  371. static ULONG merge(MemoryPool& pool, TextType* ttype,
  372. const UCHAR* match, SLONG match_bytes,
  373. const UCHAR* control, SLONG control_bytes,
  374. UCHAR* combined) //, SLONG combined_bytes)
  375. {
  376. StrConverter cvt1(pool, ttype, match, match_bytes);
  377. StrConverter cvt2(pool, ttype, control, control_bytes);
  378. fb_assert(match_bytes % sizeof(CharType) == 0);
  379. fb_assert(control_bytes % sizeof(CharType) == 0);
  380. return actualMerge(/*pool,*/ ttype,
  381. reinterpret_cast<const CharType*>(match), match_bytes,
  382. reinterpret_cast<const CharType*>(control), control_bytes,
  383. reinterpret_cast<CharType*>(combined)); //, combined_bytes);
  384. }
  385. private:
  386. // Evaluate the "sleuth" search operator.
  387. static bool aux(Jrd::TextType* obj, USHORT flags,
  388. const CharType* search, const CharType* end_search,
  389. const CharType* match, const CharType* end_match)
  390. {
  391. fb_assert(search != NULL);
  392. fb_assert(end_search != NULL);
  393. fb_assert(match != NULL);
  394. fb_assert(end_match != NULL);
  395. fb_assert(search <= end_search);
  396. fb_assert(match <= end_match);
  397. fb_assert(obj->getCanonicalWidth() == sizeof(CharType));
  398. while (match < end_match)
  399. {
  400. CharType c = *match++;
  401. if ((c == *(CharType*) obj->getCanonicalChar(CHAR_GDML_QUOTE) && (c = *match++)) ||
  402. (size_t(c) < FB_NELEM(SLEUTH_SPECIAL) && !SLEUTH_SPECIAL[c]))
  403. {
  404. if (match >= end_match || *match != *(CharType*) obj->getCanonicalChar(CHAR_GDML_MATCH_ANY))
  405. {
  406. if (search >= end_search)
  407. return false;
  408. const CharType d = *search++;
  409. if (c != d)
  410. return false;
  411. }
  412. else
  413. {
  414. ++match;
  415. for (;;)
  416. {
  417. if (aux(obj, flags, search, end_search, match, end_match))
  418. return true;
  419. if (search < end_search)
  420. {
  421. const CharType d = *search++;
  422. if (c != d)
  423. return false;
  424. }
  425. else
  426. return false;
  427. }
  428. }
  429. }
  430. else if (c == *(CharType*) obj->getCanonicalChar(CHAR_GDML_MATCH_ONE))
  431. {
  432. if (match >= end_match || *match != *(CharType*) obj->getCanonicalChar(CHAR_GDML_MATCH_ANY))
  433. {
  434. if (search >= end_search)
  435. return false;
  436. search++;
  437. }
  438. else
  439. {
  440. if (++match >= end_match)
  441. return true;
  442. for (;;)
  443. {
  444. if (aux(obj, flags, search, end_search, match, end_match))
  445. return true;
  446. if (++search >= end_search)
  447. return false;
  448. }
  449. }
  450. }
  451. else if (c == *(CharType*) obj->getCanonicalChar(CHAR_GDML_CLASS_START))
  452. {
  453. const CharType* const char_class = match;
  454. while (*match++ != *(CharType*) obj->getCanonicalChar(CHAR_GDML_CLASS_END))
  455. {
  456. if (match >= end_match)
  457. return false;
  458. }
  459. const CharType* const end_class = match - 1;
  460. if (match >= end_match || *match != *(CharType*) obj->getCanonicalChar(CHAR_GDML_MATCH_ANY))
  461. {
  462. if (!className(obj, /*flags,*/ char_class, end_class, *search++))
  463. return false;
  464. }
  465. else
  466. {
  467. ++match;
  468. for (;;)
  469. {
  470. if (aux(obj, flags, search, end_search, match, end_match))
  471. return true;
  472. if (search < end_search)
  473. {
  474. if (!className(obj, /*flags,*/ char_class, end_class, *search++))
  475. return false;
  476. }
  477. else
  478. return false;
  479. }
  480. }
  481. }
  482. else if (c == *(CharType*) obj->getCanonicalChar(CHAR_GDML_FLAG_SET))
  483. {
  484. c = *match++;
  485. if (c == *(CharType*) obj->getCanonicalChar(TextType::CHAR_LOWER_S) ||
  486. c == *(CharType*) obj->getCanonicalChar(TextType::CHAR_UPPER_S))
  487. {
  488. flags &= ~SLEUTH_INSENSITIVE;
  489. }
  490. }
  491. else if (c == *(CharType*) obj->getCanonicalChar(CHAR_GDML_FLAG_CLEAR))
  492. {
  493. c = *match++;
  494. if (c == *(CharType*) obj->getCanonicalChar(TextType::CHAR_LOWER_S) ||
  495. c == *(CharType*) obj->getCanonicalChar(TextType::CHAR_UPPER_S))
  496. {
  497. flags |= SLEUTH_INSENSITIVE;
  498. }
  499. }
  500. }
  501. if (search < end_search)
  502. return false;
  503. return true;
  504. }
  505. // See if a character is a member of a class.
  506. // Japanese version operates on short-based buffer,
  507. // instead of SCHAR-based.
  508. static bool className(Jrd::TextType* obj, // USHORT flags,
  509. const CharType* char_class, const CharType* const end_class, CharType character)
  510. {
  511. fb_assert(char_class != NULL);
  512. fb_assert(end_class != NULL);
  513. fb_assert(char_class <= end_class);
  514. fb_assert(obj->getCanonicalWidth() == sizeof(CharType));
  515. bool result = true;
  516. if (*char_class == *(CharType*) obj->getCanonicalChar(CHAR_GDML_NOT))
  517. {
  518. ++char_class;
  519. result = false;
  520. }
  521. while (char_class < end_class)
  522. {
  523. const CharType c = *char_class++;
  524. if (c == *(CharType*) obj->getCanonicalChar(CHAR_GDML_QUOTE))
  525. {
  526. if (*char_class++ == character)
  527. return true;
  528. }
  529. else if (*char_class == *(CharType*) obj->getCanonicalChar(CHAR_GDML_RANGE))
  530. {
  531. char_class += 2;
  532. if (character >= c && character <= char_class[-1])
  533. return result;
  534. }
  535. else if (character == c)
  536. return result;
  537. }
  538. return !result;
  539. }
  540. // Merge the matching pattern and control strings to give a cannonical
  541. // matching pattern. Return the length of the combined string.
  542. //
  543. // What this routine does is to take the language template, strip off
  544. // the prefix and put it in the output string, then parse the definitions
  545. // into an array of character pointers. The index array is the defined
  546. // character. The routine then takes the actual match pattern and uses
  547. // the characters in it to index into the definitions to produce an equivalent
  548. // pattern in the cannonical language.
  549. //
  550. // The silly loop setting *v++ to zero initializes the array up to the
  551. // highest character defined (also max_op). Believe it or not, that part
  552. // is not a bug.
  553. static ULONG actualMerge(/*MemoryPool& pool,*/ Jrd::TextType* obj,
  554. const CharType* match, SLONG match_bytes,
  555. const CharType* control, SLONG control_bytes,
  556. CharType* combined) //, SLONG combined_bytes)
  557. {
  558. fb_assert(match != NULL);
  559. fb_assert(control != NULL);
  560. fb_assert(combined != NULL);
  561. fb_assert((match_bytes % sizeof(CharType)) == 0);
  562. fb_assert((control_bytes % sizeof(CharType)) == 0);
  563. fb_assert(obj->getCanonicalWidth() == sizeof(CharType));
  564. const CharType* const end_match = match + (match_bytes / sizeof(CharType));
  565. const CharType* const end_control = control + (control_bytes / sizeof(CharType));
  566. CharType max_op = 0;
  567. CharType* comb = combined;
  568. CharType* vector[256];
  569. CharType** v = vector;
  570. CharType temp[256];
  571. CharType* t = temp;
  572. // Parse control string into substitution strings and initializing string
  573. while (control < end_control)
  574. {
  575. CharType c = *control++;
  576. if (*control == *(CharType*) obj->getCanonicalChar(CHAR_GDML_SUBSTITUTE))
  577. {
  578. /* Note: don't allow substitution characters larger than vector */
  579. CharType** const end_vector = vector + (((int) c < FB_NELEM(vector)) ? c : 0);
  580. while (v <= end_vector)
  581. *v++ = 0;
  582. *end_vector = t;
  583. ++control;
  584. while (control < end_control)
  585. {
  586. c = *control++;
  587. if ((t > temp && t[-1] == *(CharType*) obj->getCanonicalChar(CHAR_GDML_QUOTE)) ||
  588. ((c != *(CharType*) obj->getCanonicalChar(CHAR_GDML_COMMA)) &&
  589. (c != *(CharType*) obj->getCanonicalChar(CHAR_GDML_RPAREN))))
  590. {
  591. *t++ = c;
  592. }
  593. else
  594. break;
  595. }
  596. *t++ = 0;
  597. }
  598. else if (c == *(CharType*) obj->getCanonicalChar(CHAR_GDML_QUOTE) && control < end_control)
  599. *comb++ = *control++;
  600. else if (c == *(CharType*) obj->getCanonicalChar(CHAR_GDML_RPAREN))
  601. break;
  602. else if (c != *(CharType*) obj->getCanonicalChar(CHAR_GDML_LPAREN))
  603. *comb++ = c;
  604. }
  605. max_op = v - vector;
  606. // Interpret matching string, substituting where appropriate
  607. while (match < end_match)
  608. {
  609. const CharType c = *match++;
  610. // if we've got a defined character, slurp the definition
  611. CharType* p;
  612. if (c <= max_op && (p = vector[c]))
  613. {
  614. while (*p)
  615. *comb++ = *p++;
  616. // if we've got the definition of a quote character,
  617. // slurp the next character too
  618. if (comb > combined &&
  619. comb[-1] == *(CharType*) obj->getCanonicalChar(CHAR_GDML_QUOTE) && *match)
  620. {
  621. *comb++ = *match++;
  622. }
  623. }
  624. else
  625. {
  626. // at this point we've got a non-match, but as it might be one of ours, quote it
  627. if (size_t(c) < FB_NELEM(SLEUTH_SPECIAL) && SLEUTH_SPECIAL[c] &&
  628. comb > combined && comb[-1] != *(CharType*) obj->getCanonicalChar(CHAR_GDML_QUOTE))
  629. {
  630. *comb++ = *(CharType*) obj->getCanonicalChar(CHAR_GDML_QUOTE);
  631. }
  632. *comb++ = c;
  633. }
  634. }
  635. // Put in trailing stuff
  636. while (control < end_control)
  637. *comb++ = *control++;
  638. // YYY - need to add code watching for overflow of combined
  639. return (comb - combined) * sizeof(CharType);
  640. }
  641. private:
  642. static const int SLEUTH_INSENSITIVE;
  643. };
  644. template <typename StrConverter, typename CharType>
  645. const int SleuthMatcher<StrConverter, CharType>::SLEUTH_INSENSITIVE = 1;
  646. template <typename pStartsMatcher, typename pContainsMatcher, typename pLikeMatcher,
  647. typename pSimilarToMatcher, typename pMatchesMatcher, typename pSleuthMatcher>
  648. class CollationImpl : public Collation
  649. {
  650. public:
  651. CollationImpl(TTYPE_ID a_type, texttype* a_tt, CharSet* a_cs)
  652. : Collation(a_type, a_tt, a_cs)
  653. {
  654. }
  655. virtual bool matches(MemoryPool& pool, const UCHAR* a, SLONG b, const UCHAR* c, SLONG d)
  656. {
  657. return pMatchesMatcher::evaluate(pool, this, a, b, c, d);
  658. }
  659. virtual bool sleuthCheck(MemoryPool& pool, USHORT a, const UCHAR* b,
  660. SLONG c, const UCHAR* d, SLONG e)
  661. {
  662. return pSleuthMatcher::check(pool, this, a, b, c, d, e);
  663. }
  664. virtual ULONG sleuthMerge(MemoryPool& pool, const UCHAR* a, SLONG b,
  665. const UCHAR* c, SLONG d, UCHAR* e) //, SLONG f)
  666. {
  667. return pSleuthMatcher::merge(pool, this, a, b, c, d, e); //, f);
  668. }
  669. virtual bool starts(MemoryPool& pool, const UCHAR* s, SLONG sl, const UCHAR* p, SLONG pl)
  670. {
  671. return pStartsMatcher::evaluate(pool, this, s, sl, p, pl);
  672. }
  673. virtual PatternMatcher* createStartsMatcher(MemoryPool& pool, const UCHAR* p, SLONG pl)
  674. {
  675. return pStartsMatcher::create(pool, this, p, pl);
  676. }
  677. virtual bool like(MemoryPool& pool, const UCHAR* s, SLONG sl,
  678. const UCHAR* p, SLONG pl, const UCHAR* escape, SLONG escape_length)
  679. {
  680. return pLikeMatcher::evaluate(pool, this, s, sl, p, pl, escape, escape_length,
  681. getCharSet()->getSqlMatchAny(), getCharSet()->getSqlMatchAnyLength(),
  682. getCharSet()->getSqlMatchOne(), getCharSet()->getSqlMatchOneLength());
  683. }
  684. virtual PatternMatcher* createLikeMatcher(MemoryPool& pool, const UCHAR* p, SLONG pl,
  685. const UCHAR* escape, SLONG escape_length)
  686. {
  687. return pLikeMatcher::create(pool, this, p, pl, escape, escape_length,
  688. getCharSet()->getSqlMatchAny(), getCharSet()->getSqlMatchAnyLength(),
  689. getCharSet()->getSqlMatchOne(), getCharSet()->getSqlMatchOneLength());
  690. }
  691. virtual bool similarTo(MemoryPool& pool, const UCHAR* s, SLONG sl,
  692. const UCHAR* p, SLONG pl, const UCHAR* escape, SLONG escape_length)
  693. {
  694. return pSimilarToMatcher::evaluate(pool, this, s, sl, p, pl, escape, escape_length);
  695. }
  696. virtual PatternMatcher* createSimilarToMatcher(MemoryPool& pool, const UCHAR* p, SLONG pl,
  697. const UCHAR* escape, SLONG escape_length)
  698. {
  699. return pSimilarToMatcher::create(pool, this, p, pl, escape, escape_length);
  700. }
  701. virtual bool contains(MemoryPool& pool, const UCHAR* s, SLONG sl, const UCHAR* p, SLONG pl)
  702. {
  703. return pContainsMatcher::evaluate(pool, this, s, sl, p, pl);
  704. }
  705. virtual PatternMatcher* createContainsMatcher(MemoryPool& pool, const UCHAR* p, SLONG pl)
  706. {
  707. return pContainsMatcher::create(pool, this, p, pl);
  708. }
  709. };
  710. using namespace Firebird;
  711. typedef StartsMatcher<NullStrConverter, UCHAR> StartsMatcherUCharDirect;
  712. typedef StartsMatcher<CanonicalConverter<NullStrConverter>, UCHAR> StartsMatcherUCharCanonical;
  713. typedef ContainsMatcher<UpcaseConverter<NullStrConverter>, UCHAR> ContainsMatcherUCharDirect;
  714. //typedef ContainsMatcher<UpcaseConverter<NullStrConverter>, USHORT> ContainsMatcherUShortDirect;
  715. //typedef ContainsMatcher<UpcaseConverter<NullStrConverter>, ULONG> ContainsMatcherULongDirect;
  716. typedef MatchesMatcher<CanonicalConverter<NullStrConverter>, UCHAR> MatchesMatcherUCharCanonical;
  717. typedef SleuthMatcher<CanonicalConverter<NullStrConverter>, UCHAR> SleuthMatcherUCharCanonical;
  718. typedef LikeMatcher<CanonicalConverter<NullStrConverter>, UCHAR> LikeMatcherUCharCanonical;
  719. typedef SimilarToMatcher<CanonicalConverter<NullStrConverter>, UCHAR> SimilarToMatcherUCharCanonical;
  720. typedef ContainsMatcher<CanonicalConverter<UpcaseConverter<NullStrConverter> >, UCHAR> ContainsMatcherUCharCanonical;
  721. typedef MatchesMatcher<CanonicalConverter<NullStrConverter>, USHORT> MatchesMatcherUShortCanonical;
  722. typedef SleuthMatcher<CanonicalConverter<NullStrConverter>, USHORT> SleuthMatcherUShortCanonical;
  723. typedef LikeMatcher<CanonicalConverter<NullStrConverter>, USHORT> LikeMatcherUShortCanonical;
  724. typedef SimilarToMatcher<CanonicalConverter<NullStrConverter>, USHORT> SimilarToMatcherUShortCanonical;
  725. typedef ContainsMatcher<CanonicalConverter<UpcaseConverter<NullStrConverter> >, USHORT> ContainsMatcherUShortCanonical;
  726. typedef MatchesMatcher<CanonicalConverter<NullStrConverter>, ULONG> MatchesMatcherULongCanonical;
  727. typedef SleuthMatcher<CanonicalConverter<NullStrConverter>, ULONG> SleuthMatcherULongCanonical;
  728. typedef LikeMatcher<CanonicalConverter<NullStrConverter>, ULONG> LikeMatcherULongCanonical;
  729. typedef SimilarToMatcher<CanonicalConverter<NullStrConverter>, ULONG> SimilarToMatcherULongCanonical;
  730. typedef ContainsMatcher<CanonicalConverter<UpcaseConverter<NullStrConverter> >, ULONG> ContainsMatcherULongCanonical;
  731. } // namespace
  732. //-------------
  733. namespace Jrd {
  734. Collation* Collation::createInstance(MemoryPool& pool, TTYPE_ID id, texttype* tt, CharSet* cs)
  735. {
  736. fb_assert(tt->texttype_canonical_width == 1 ||
  737. tt->texttype_canonical_width == 2 ||
  738. tt->texttype_canonical_width == 4);
  739. switch (tt->texttype_canonical_width)
  740. {
  741. case 1:
  742. if (tt->texttype_flags & TEXTTYPE_DIRECT_MATCH)
  743. {
  744. return FB_NEW(pool) CollationImpl<StartsMatcherUCharDirect, ContainsMatcherUCharDirect,
  745. LikeMatcherUCharCanonical, SimilarToMatcherUCharCanonical,
  746. MatchesMatcherUCharCanonical, SleuthMatcherUCharCanonical>(id, tt, cs);
  747. }
  748. return FB_NEW(pool) CollationImpl<StartsMatcherUCharCanonical, ContainsMatcherUCharCanonical,
  749. LikeMatcherUCharCanonical, SimilarToMatcherUCharCanonical,
  750. MatchesMatcherUCharCanonical, SleuthMatcherUCharCanonical>(id, tt, cs);
  751. case 2:
  752. if (tt->texttype_flags & TEXTTYPE_DIRECT_MATCH)
  753. {
  754. return FB_NEW(pool) CollationImpl<StartsMatcherUCharDirect, ContainsMatcherUCharDirect,
  755. LikeMatcherUShortCanonical, SimilarToMatcherUShortCanonical,
  756. MatchesMatcherUShortCanonical, SleuthMatcherUShortCanonical>(id, tt, cs);
  757. }
  758. return FB_NEW(pool) CollationImpl<StartsMatcherUCharCanonical, ContainsMatcherUShortCanonical,
  759. LikeMatcherUShortCanonical, SimilarToMatcherUShortCanonical,
  760. MatchesMatcherUShortCanonical, SleuthMatcherUShortCanonical>(id, tt, cs);
  761. case 4:
  762. if (tt->texttype_flags & TEXTTYPE_DIRECT_MATCH)
  763. {
  764. return FB_NEW(pool) CollationImpl<StartsMatcherUCharDirect, ContainsMatcherUCharDirect,
  765. LikeMatcherULongCanonical, SimilarToMatcherULongCanonical,
  766. MatchesMatcherULongCanonical, SleuthMatcherULongCanonical>(id, tt, cs);
  767. }
  768. return FB_NEW(pool) CollationImpl<StartsMatcherUCharCanonical, ContainsMatcherULongCanonical,
  769. LikeMatcherULongCanonical, SimilarToMatcherULongCanonical,
  770. MatchesMatcherULongCanonical, SleuthMatcherULongCanonical>(id, tt, cs);
  771. }
  772. fb_assert(false);
  773. return NULL; // compiler silencer
  774. }
  775. void Collation::release()
  776. {
  777. fb_assert(useCount >= 0);
  778. if (existenceLock)
  779. {
  780. // Establish a thread context
  781. ThreadContextHolder tdbb;
  782. tdbb->setDatabase(existenceLock->lck_dbb);
  783. tdbb->setAttachment(existenceLock->lck_attachment);
  784. Jrd::ContextPoolHolder context(tdbb, 0);
  785. LCK_release(tdbb, existenceLock);
  786. useCount = 0;
  787. }
  788. }
  789. void Collation::destroy()
  790. {
  791. fb_assert(useCount == 0);
  792. if (tt->texttype_fn_destroy)
  793. tt->texttype_fn_destroy(tt);
  794. delete tt;
  795. release();
  796. delete existenceLock;
  797. existenceLock = NULL;
  798. }
  799. void Collation::incUseCount(thread_db* tdbb)
  800. {
  801. fb_assert(!obsolete);
  802. fb_assert(useCount >= 0);
  803. ++useCount;
  804. }
  805. void Collation::decUseCount(thread_db* tdbb)
  806. {
  807. fb_assert(useCount >= 0);
  808. if (useCount > 0)
  809. {
  810. useCount--;
  811. if (!useCount)
  812. {
  813. fb_assert(existenceLock);
  814. if (obsolete)
  815. LCK_re_post(tdbb, existenceLock);
  816. }
  817. }
  818. }
  819. } // namespace Jrd