PageRenderTime 39ms CodeModel.GetById 14ms RepoModel.GetById 1ms app.codeStats 0ms

/trunk/harbour/src/3rd/pcre/pcreget.c

#
C | 587 lines | 291 code | 74 blank | 222 comment | 68 complexity | 7b7c8c636f8abb5484feb23538851117 MD5 | raw file
Possible License(s): AGPL-1.0, BSD-3-Clause, CC-BY-SA-3.0, LGPL-3.0, GPL-2.0, LGPL-2.0, LGPL-2.1
  1. /*************************************************
  2. * Perl-Compatible Regular Expressions *
  3. *************************************************/
  4. /* PCRE is a library of functions to support regular expressions whose syntax
  5. and semantics are as close as possible to those of the Perl 5 language.
  6. Written by Philip Hazel
  7. Copyright (c) 1997-2012 University of Cambridge
  8. -----------------------------------------------------------------------------
  9. Redistribution and use in source and binary forms, with or without
  10. modification, are permitted provided that the following conditions are met:
  11. * Redistributions of source code must retain the above copyright notice,
  12. this list of conditions and the following disclaimer.
  13. * Redistributions in binary form must reproduce the above copyright
  14. notice, this list of conditions and the following disclaimer in the
  15. documentation and/or other materials provided with the distribution.
  16. * Neither the name of the University of Cambridge nor the names of its
  17. contributors may be used to endorse or promote products derived from
  18. this software without specific prior written permission.
  19. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  20. AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  21. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  22. ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  23. LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  24. CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  25. SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  26. INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  27. CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  28. ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  29. POSSIBILITY OF SUCH DAMAGE.
  30. -----------------------------------------------------------------------------
  31. */
  32. /* This module contains some convenience functions for extracting substrings
  33. from the subject string after a regex match has succeeded. The original idea
  34. for these functions came from Scott Wimer. */
  35. #ifdef HAVE_CONFIG_H
  36. #include "config.h"
  37. #endif
  38. #include "pcreinal.h"
  39. /*************************************************
  40. * Find number for named string *
  41. *************************************************/
  42. /* This function is used by the get_first_set() function below, as well
  43. as being generally available. It assumes that names are unique.
  44. Arguments:
  45. code the compiled regex
  46. stringname the name whose number is required
  47. Returns: the number of the named parentheses, or a negative number
  48. (PCRE_ERROR_NOSUBSTRING) if not found
  49. */
  50. #ifdef COMPILE_PCRE8
  51. PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
  52. pcre_get_stringnumber(const pcre *code, const char *stringname)
  53. #else
  54. PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
  55. pcre16_get_stringnumber(const pcre16 *code, PCRE_SPTR16 stringname)
  56. #endif
  57. {
  58. int rc;
  59. int entrysize;
  60. int top, bot;
  61. pcre_uchar *nametable;
  62. #ifdef COMPILE_PCRE8
  63. if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
  64. return rc;
  65. if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
  66. if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
  67. return rc;
  68. if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
  69. return rc;
  70. #endif
  71. #ifdef COMPILE_PCRE16
  72. if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
  73. return rc;
  74. if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
  75. if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
  76. return rc;
  77. if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
  78. return rc;
  79. #endif
  80. bot = 0;
  81. while (top > bot)
  82. {
  83. int mid = (top + bot) / 2;
  84. pcre_uchar *entry = nametable + entrysize*mid;
  85. int c = STRCMP_UC_UC((pcre_uchar *)stringname,
  86. (pcre_uchar *)(entry + IMM2_SIZE));
  87. if (c == 0) return GET2(entry, 0);
  88. if (c > 0) bot = mid + 1; else top = mid;
  89. }
  90. return PCRE_ERROR_NOSUBSTRING;
  91. }
  92. /*************************************************
  93. * Find (multiple) entries for named string *
  94. *************************************************/
  95. /* This is used by the get_first_set() function below, as well as being
  96. generally available. It is used when duplicated names are permitted.
  97. Arguments:
  98. code the compiled regex
  99. stringname the name whose entries required
  100. firstptr where to put the pointer to the first entry
  101. lastptr where to put the pointer to the last entry
  102. Returns: the length of each entry, or a negative number
  103. (PCRE_ERROR_NOSUBSTRING) if not found
  104. */
  105. #ifdef COMPILE_PCRE8
  106. PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
  107. pcre_get_stringtable_entries(const pcre *code, const char *stringname,
  108. char **firstptr, char **lastptr)
  109. #else
  110. PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
  111. pcre16_get_stringtable_entries(const pcre16 *code, PCRE_SPTR16 stringname,
  112. PCRE_UCHAR16 **firstptr, PCRE_UCHAR16 **lastptr)
  113. #endif
  114. {
  115. int rc;
  116. int entrysize;
  117. int top, bot;
  118. pcre_uchar *nametable, *lastentry;
  119. #ifdef COMPILE_PCRE8
  120. if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
  121. return rc;
  122. if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
  123. if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
  124. return rc;
  125. if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
  126. return rc;
  127. #endif
  128. #ifdef COMPILE_PCRE16
  129. if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
  130. return rc;
  131. if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
  132. if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
  133. return rc;
  134. if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
  135. return rc;
  136. #endif
  137. lastentry = nametable + entrysize * (top - 1);
  138. bot = 0;
  139. while (top > bot)
  140. {
  141. int mid = (top + bot) / 2;
  142. pcre_uchar *entry = nametable + entrysize*mid;
  143. int c = STRCMP_UC_UC((pcre_uchar *)stringname,
  144. (pcre_uchar *)(entry + IMM2_SIZE));
  145. if (c == 0)
  146. {
  147. pcre_uchar *first = entry;
  148. pcre_uchar *last = entry;
  149. while (first > nametable)
  150. {
  151. if (STRCMP_UC_UC((pcre_uchar *)stringname,
  152. (pcre_uchar *)(first - entrysize + IMM2_SIZE)) != 0) break;
  153. first -= entrysize;
  154. }
  155. while (last < lastentry)
  156. {
  157. if (STRCMP_UC_UC((pcre_uchar *)stringname,
  158. (pcre_uchar *)(last + entrysize + IMM2_SIZE)) != 0) break;
  159. last += entrysize;
  160. }
  161. #ifdef COMPILE_PCRE8
  162. *firstptr = (char *)first;
  163. *lastptr = (char *)last;
  164. #else
  165. *firstptr = (PCRE_UCHAR16 *)first;
  166. *lastptr = (PCRE_UCHAR16 *)last;
  167. #endif
  168. return entrysize;
  169. }
  170. if (c > 0) bot = mid + 1; else top = mid;
  171. }
  172. return PCRE_ERROR_NOSUBSTRING;
  173. }
  174. /*************************************************
  175. * Find first set of multiple named strings *
  176. *************************************************/
  177. /* This function allows for duplicate names in the table of named substrings.
  178. It returns the number of the first one that was set in a pattern match.
  179. Arguments:
  180. code the compiled regex
  181. stringname the name of the capturing substring
  182. ovector the vector of matched substrings
  183. Returns: the number of the first that is set,
  184. or the number of the last one if none are set,
  185. or a negative number on error
  186. */
  187. #ifdef COMPILE_PCRE8
  188. static int
  189. get_first_set(const pcre *code, const char *stringname, int *ovector)
  190. #else
  191. static int
  192. get_first_set(const pcre16 *code, PCRE_SPTR16 stringname, int *ovector)
  193. #endif
  194. {
  195. const REAL_PCRE *re = (const REAL_PCRE *)code;
  196. int entrysize;
  197. pcre_uchar *entry;
  198. #ifdef COMPILE_PCRE8
  199. char *first, *last;
  200. #else
  201. PCRE_UCHAR16 *first, *last;
  202. #endif
  203. #ifdef COMPILE_PCRE8
  204. if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
  205. return pcre_get_stringnumber(code, stringname);
  206. entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
  207. #else
  208. if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
  209. return pcre16_get_stringnumber(code, stringname);
  210. entrysize = pcre16_get_stringtable_entries(code, stringname, &first, &last);
  211. #endif
  212. if (entrysize <= 0) return entrysize;
  213. for (entry = (pcre_uchar *)first; entry <= (pcre_uchar *)last; entry += entrysize)
  214. {
  215. int n = GET2(entry, 0);
  216. if (ovector[n*2] >= 0) return n;
  217. }
  218. return GET2(entry, 0);
  219. }
  220. /*************************************************
  221. * Copy captured string to given buffer *
  222. *************************************************/
  223. /* This function copies a single captured substring into a given buffer.
  224. Note that we use memcpy() rather than strncpy() in case there are binary zeros
  225. in the string.
  226. Arguments:
  227. subject the subject string that was matched
  228. ovector pointer to the offsets table
  229. stringcount the number of substrings that were captured
  230. (i.e. the yield of the pcre_exec call, unless
  231. that was zero, in which case it should be 1/3
  232. of the offset table size)
  233. stringnumber the number of the required substring
  234. buffer where to put the substring
  235. size the size of the buffer
  236. Returns: if successful:
  237. the length of the copied string, not including the zero
  238. that is put on the end; can be zero
  239. if not successful:
  240. PCRE_ERROR_NOMEMORY (-6) buffer too small
  241. PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
  242. */
  243. #ifdef COMPILE_PCRE8
  244. PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
  245. pcre_copy_substring(const char *subject, int *ovector, int stringcount,
  246. int stringnumber, char *buffer, int size)
  247. #else
  248. PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
  249. pcre16_copy_substring(PCRE_SPTR16 subject, int *ovector, int stringcount,
  250. int stringnumber, PCRE_UCHAR16 *buffer, int size)
  251. #endif
  252. {
  253. int yield;
  254. if (stringnumber < 0 || stringnumber >= stringcount)
  255. return PCRE_ERROR_NOSUBSTRING;
  256. stringnumber *= 2;
  257. yield = ovector[stringnumber+1] - ovector[stringnumber];
  258. if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
  259. memcpy(buffer, subject + ovector[stringnumber], IN_UCHARS(yield));
  260. buffer[yield] = 0;
  261. return yield;
  262. }
  263. /*************************************************
  264. * Copy named captured string to given buffer *
  265. *************************************************/
  266. /* This function copies a single captured substring into a given buffer,
  267. identifying it by name. If the regex permits duplicate names, the first
  268. substring that is set is chosen.
  269. Arguments:
  270. code the compiled regex
  271. subject the subject string that was matched
  272. ovector pointer to the offsets table
  273. stringcount the number of substrings that were captured
  274. (i.e. the yield of the pcre_exec call, unless
  275. that was zero, in which case it should be 1/3
  276. of the offset table size)
  277. stringname the name of the required substring
  278. buffer where to put the substring
  279. size the size of the buffer
  280. Returns: if successful:
  281. the length of the copied string, not including the zero
  282. that is put on the end; can be zero
  283. if not successful:
  284. PCRE_ERROR_NOMEMORY (-6) buffer too small
  285. PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
  286. */
  287. #ifdef COMPILE_PCRE8
  288. PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
  289. pcre_copy_named_substring(const pcre *code, const char *subject,
  290. int *ovector, int stringcount, const char *stringname,
  291. char *buffer, int size)
  292. #else
  293. PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
  294. pcre16_copy_named_substring(const pcre16 *code, PCRE_SPTR16 subject,
  295. int *ovector, int stringcount, PCRE_SPTR16 stringname,
  296. PCRE_UCHAR16 *buffer, int size)
  297. #endif
  298. {
  299. int n = get_first_set(code, stringname, ovector);
  300. if (n <= 0) return n;
  301. #ifdef COMPILE_PCRE8
  302. return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
  303. #else
  304. return pcre16_copy_substring(subject, ovector, stringcount, n, buffer, size);
  305. #endif
  306. }
  307. /*************************************************
  308. * Copy all captured strings to new store *
  309. *************************************************/
  310. /* This function gets one chunk of store and builds a list of pointers and all
  311. of the captured substrings in it. A NULL pointer is put on the end of the list.
  312. Arguments:
  313. subject the subject string that was matched
  314. ovector pointer to the offsets table
  315. stringcount the number of substrings that were captured
  316. (i.e. the yield of the pcre_exec call, unless
  317. that was zero, in which case it should be 1/3
  318. of the offset table size)
  319. listptr set to point to the list of pointers
  320. Returns: if successful: 0
  321. if not successful:
  322. PCRE_ERROR_NOMEMORY (-6) failed to get store
  323. */
  324. #ifdef COMPILE_PCRE8
  325. PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
  326. pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
  327. const char ***listptr)
  328. #else
  329. PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
  330. pcre16_get_substring_list(PCRE_SPTR16 subject, int *ovector, int stringcount,
  331. PCRE_SPTR16 **listptr)
  332. #endif
  333. {
  334. int i;
  335. int size = sizeof(pcre_uchar *);
  336. int double_count = stringcount * 2;
  337. pcre_uchar **stringlist;
  338. pcre_uchar *p;
  339. for (i = 0; i < double_count; i += 2)
  340. size += sizeof(pcre_uchar *) + IN_UCHARS(ovector[i+1] - ovector[i] + 1);
  341. stringlist = (pcre_uchar **)(PUBL(malloc))(size);
  342. if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
  343. #ifdef COMPILE_PCRE8
  344. *listptr = (const char **)stringlist;
  345. #else
  346. *listptr = (PCRE_SPTR16 *)stringlist;
  347. #endif
  348. p = (pcre_uchar *)(stringlist + stringcount + 1);
  349. for (i = 0; i < double_count; i += 2)
  350. {
  351. int len = ovector[i+1] - ovector[i];
  352. memcpy(p, subject + ovector[i], IN_UCHARS(len));
  353. *stringlist++ = p;
  354. p += len;
  355. *p++ = 0;
  356. }
  357. *stringlist = NULL;
  358. return 0;
  359. }
  360. /*************************************************
  361. * Free store obtained by get_substring_list *
  362. *************************************************/
  363. /* This function exists for the benefit of people calling PCRE from non-C
  364. programs that can call its functions, but not free() or (PUBL(free))()
  365. directly.
  366. Argument: the result of a previous pcre_get_substring_list()
  367. Returns: nothing
  368. */
  369. #ifdef COMPILE_PCRE8
  370. PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
  371. pcre_free_substring_list(const char **pointer)
  372. #else
  373. PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
  374. pcre16_free_substring_list(PCRE_SPTR16 *pointer)
  375. #endif
  376. {
  377. (PUBL(free))((void *)pointer);
  378. }
  379. /*************************************************
  380. * Copy captured string to new store *
  381. *************************************************/
  382. /* This function copies a single captured substring into a piece of new
  383. store
  384. Arguments:
  385. subject the subject string that was matched
  386. ovector pointer to the offsets table
  387. stringcount the number of substrings that were captured
  388. (i.e. the yield of the pcre_exec call, unless
  389. that was zero, in which case it should be 1/3
  390. of the offset table size)
  391. stringnumber the number of the required substring
  392. stringptr where to put a pointer to the substring
  393. Returns: if successful:
  394. the length of the string, not including the zero that
  395. is put on the end; can be zero
  396. if not successful:
  397. PCRE_ERROR_NOMEMORY (-6) failed to get store
  398. PCRE_ERROR_NOSUBSTRING (-7) substring not present
  399. */
  400. #ifdef COMPILE_PCRE8
  401. PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
  402. pcre_get_substring(const char *subject, int *ovector, int stringcount,
  403. int stringnumber, const char **stringptr)
  404. #else
  405. PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
  406. pcre16_get_substring(PCRE_SPTR16 subject, int *ovector, int stringcount,
  407. int stringnumber, PCRE_SPTR16 *stringptr)
  408. #endif
  409. {
  410. int yield;
  411. pcre_uchar *substring;
  412. if (stringnumber < 0 || stringnumber >= stringcount)
  413. return PCRE_ERROR_NOSUBSTRING;
  414. stringnumber *= 2;
  415. yield = ovector[stringnumber+1] - ovector[stringnumber];
  416. substring = (pcre_uchar *)(PUBL(malloc))(IN_UCHARS(yield + 1));
  417. if (substring == NULL) return PCRE_ERROR_NOMEMORY;
  418. memcpy(substring, subject + ovector[stringnumber], IN_UCHARS(yield));
  419. substring[yield] = 0;
  420. #ifdef COMPILE_PCRE8
  421. *stringptr = (const char *)substring;
  422. #else
  423. *stringptr = (PCRE_SPTR16)substring;
  424. #endif
  425. return yield;
  426. }
  427. /*************************************************
  428. * Copy named captured string to new store *
  429. *************************************************/
  430. /* This function copies a single captured substring, identified by name, into
  431. new store. If the regex permits duplicate names, the first substring that is
  432. set is chosen.
  433. Arguments:
  434. code the compiled regex
  435. subject the subject string that was matched
  436. ovector pointer to the offsets table
  437. stringcount the number of substrings that were captured
  438. (i.e. the yield of the pcre_exec call, unless
  439. that was zero, in which case it should be 1/3
  440. of the offset table size)
  441. stringname the name of the required substring
  442. stringptr where to put the pointer
  443. Returns: if successful:
  444. the length of the copied string, not including the zero
  445. that is put on the end; can be zero
  446. if not successful:
  447. PCRE_ERROR_NOMEMORY (-6) couldn't get memory
  448. PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
  449. */
  450. #ifdef COMPILE_PCRE8
  451. PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
  452. pcre_get_named_substring(const pcre *code, const char *subject,
  453. int *ovector, int stringcount, const char *stringname,
  454. const char **stringptr)
  455. #else
  456. PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
  457. pcre16_get_named_substring(const pcre16 *code, PCRE_SPTR16 subject,
  458. int *ovector, int stringcount, PCRE_SPTR16 stringname,
  459. PCRE_SPTR16 *stringptr)
  460. #endif
  461. {
  462. int n = get_first_set(code, stringname, ovector);
  463. if (n <= 0) return n;
  464. #ifdef COMPILE_PCRE8
  465. return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
  466. #else
  467. return pcre16_get_substring(subject, ovector, stringcount, n, stringptr);
  468. #endif
  469. }
  470. /*************************************************
  471. * Free store obtained by get_substring *
  472. *************************************************/
  473. /* This function exists for the benefit of people calling PCRE from non-C
  474. programs that can call its functions, but not free() or (PUBL(free))()
  475. directly.
  476. Argument: the result of a previous pcre_get_substring()
  477. Returns: nothing
  478. */
  479. #ifdef COMPILE_PCRE8
  480. PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
  481. pcre_free_substring(const char *pointer)
  482. #else
  483. PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
  484. pcre16_free_substring(PCRE_SPTR16 pointer)
  485. #endif
  486. {
  487. (PUBL(free))((void *)pointer);
  488. }
  489. /* End of pcre_get.c */