PageRenderTime 46ms CodeModel.GetById 17ms RepoModel.GetById 1ms app.codeStats 0ms

/testsuite/tst-regex.c

#
C | 271 lines | 192 code | 49 blank | 30 comment | 50 complexity | 5e1015087235ae7e1369a1903a73736f MD5 | raw file
Possible License(s): GPL-3.0, CC-BY-SA-4.0
  1. /* Copyright (C) 2001, 2003 Free Software Foundation, Inc.
  2. This file is part of the GNU C Library.
  3. The GNU C Library is free software; you can redistribute it and/or
  4. modify it under the terms of the GNU Lesser General Public
  5. License as published by the Free Software Foundation; either
  6. version 2.1 of the License, or (at your option) any later version.
  7. The GNU C Library is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  10. Lesser General Public License for more details.
  11. You should have received a copy of the GNU Lesser General Public
  12. License along with the GNU C Library; if not, write to the Free
  13. Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  14. 02110-1301 USA. */
  15. #ifdef HAVE_CONFIG_H
  16. #include "config.h"
  17. #endif
  18. #include <alloca.h>
  19. #include <assert.h>
  20. #include <errno.h>
  21. #include <fcntl.h>
  22. #include <iconv.h>
  23. #include <locale.h>
  24. #ifdef HAVE_MCHECK_H
  25. #include <mcheck.h>
  26. #endif
  27. #include <stdio.h>
  28. #include <stdlib.h>
  29. #include <string.h>
  30. #include <time.h>
  31. #include <unistd.h>
  32. #include <sys/stat.h>
  33. #include <sys/types.h>
  34. #include <regex.h>
  35. static iconv_t cd;
  36. static char *mem;
  37. static char *umem;
  38. static size_t memlen;
  39. static size_t umemlen;
  40. static int test_expr (const char *expr, int expected, int expectedicase);
  41. static int run_test (const char *expr, const char *mem, size_t memlen,
  42. int icase, int expected);
  43. static int run_test_backwards (const char *expr, const char *mem,
  44. size_t memlen, int icase, int expected);
  45. int
  46. main (int argc, char *argv[])
  47. {
  48. int fd;
  49. struct stat st;
  50. int result;
  51. char *inmem;
  52. char *outmem;
  53. size_t inlen;
  54. size_t outlen;
  55. #ifdef HAVE_MCHECK_H
  56. mtrace ();
  57. #endif
  58. if (!argv[1])
  59. exit (1);
  60. /* Make the content of the file available in memory. */
  61. fd = open (argv[1], O_RDONLY);
  62. if (fd == -1)
  63. error (EXIT_FAILURE, errno, "cannot open %s", basename (argv[1]));
  64. if (fstat (fd, &st) != 0)
  65. error (EXIT_FAILURE, errno, "cannot stat %s", basename (argv[1]));
  66. memlen = st.st_size;
  67. mem = (char *) malloc (memlen + 1);
  68. if (mem == NULL)
  69. error (EXIT_FAILURE, errno, "while allocating buffer");
  70. if ((size_t) read (fd, mem, memlen) != memlen)
  71. error (EXIT_FAILURE, 0, "cannot read entire file");
  72. mem[memlen] = '\0';
  73. close (fd);
  74. /* We have to convert a few things from Latin-1 to UTF-8. */
  75. cd = iconv_open ("UTF-8", "ISO-8859-1");
  76. if (cd == (iconv_t) -1)
  77. error (EXIT_FAILURE, errno, "cannot get conversion descriptor");
  78. /* For the second test we have to convert the file content to UTF-8.
  79. Since the text is mostly ASCII it should be enough to allocate
  80. twice as much memory for the UTF-8 text than for the Latin-1
  81. text. */
  82. umem = (char *) calloc (2, memlen);
  83. if (umem == NULL)
  84. error (EXIT_FAILURE, errno, "while allocating buffer");
  85. inmem = mem;
  86. inlen = memlen;
  87. outmem = umem;
  88. outlen = 2 * memlen - 1;
  89. iconv (cd, &inmem, &inlen, &outmem, &outlen);
  90. umemlen = outmem - umem;
  91. if (inlen != 0)
  92. error (EXIT_FAILURE, errno, "cannot convert buffer");
  93. #ifdef DEBUG
  94. re_set_syntax (RE_DEBUG);
  95. #endif
  96. /* Run the actual tests. All tests are run in a single-byte and a
  97. multi-byte locale. */
  98. result = test_expr ("[äáŕâéčęíěîńöóňôüúůű]", 2, 2);
  99. result |= test_expr ("G.ran", 2, 3);
  100. result |= test_expr ("G.\\{1\\}ran", 2, 3);
  101. result |= test_expr ("G.*ran", 3, 44);
  102. result |= test_expr ("[äáŕâ]", 0, 0);
  103. result |= test_expr ("Uddeborg", 2, 2);
  104. result |= test_expr (".Uddeborg", 2, 2);
  105. /* Free the resources. */
  106. free (umem);
  107. iconv_close (cd);
  108. free (mem);
  109. return result;
  110. }
  111. static int
  112. test_expr (const char *expr, int expected, int expectedicase)
  113. {
  114. int result;
  115. printf ("\nTest \"%s\" with 8-bit locale\n", expr);
  116. result = run_test (expr, mem, memlen, 0, expected);
  117. printf ("\nTest \"%s\" with 8-bit locale, case insensitive\n", expr);
  118. result |= run_test (expr, mem, memlen, 1, expectedicase);
  119. printf ("\nTest \"%s\" backwards with 8-bit locale\n", expr);
  120. result |= run_test_backwards (expr, mem, memlen, 0, expected);
  121. printf ("\nTest \"%s\" backwards with 8-bit locale, case insensitive\n",
  122. expr);
  123. result |= run_test_backwards (expr, mem, memlen, 1, expectedicase);
  124. return result;
  125. }
  126. static int
  127. run_test (const char *expr, const char *mem, size_t memlen, int icase,
  128. int expected)
  129. {
  130. regex_t re;
  131. int err;
  132. size_t offset;
  133. int cnt;
  134. err = regcomp (&re, expr, REG_NEWLINE | (icase ? REG_ICASE : 0));
  135. if (err != REG_NOERROR)
  136. {
  137. char buf[200];
  138. regerror (err, &re, buf, sizeof buf);
  139. error (EXIT_FAILURE, 0, "cannot compile expression: %s", buf);
  140. }
  141. cnt = 0;
  142. offset = 0;
  143. assert (mem[memlen] == '\0');
  144. while (offset < memlen)
  145. {
  146. regmatch_t ma[1];
  147. const char *sp;
  148. const char *ep;
  149. err = regexec (&re, mem + offset, 1, ma, 0);
  150. if (err == REG_NOMATCH)
  151. break;
  152. if (err != REG_NOERROR)
  153. {
  154. char buf[200];
  155. regerror (err, &re, buf, sizeof buf);
  156. error (EXIT_FAILURE, 0, "cannot use expression: %s", buf);
  157. }
  158. assert (ma[0].rm_so >= 0);
  159. sp = mem + offset + ma[0].rm_so;
  160. while (sp > mem && sp[-1] != '\n')
  161. --sp;
  162. ep = mem + offset + ma[0].rm_so;
  163. while (*ep != '\0' && *ep != '\n')
  164. ++ep;
  165. printf ("match %d: \"%.*s\"\n", ++cnt, (int) (ep - sp), sp);
  166. offset = ep + 1 - mem;
  167. }
  168. regfree (&re);
  169. /* Return an error if the number of matches found is not match we
  170. expect. */
  171. return cnt != expected;
  172. }
  173. static int
  174. run_test_backwards (const char *expr, const char *mem, size_t memlen,
  175. int icase, int expected)
  176. {
  177. regex_t re;
  178. const char *err;
  179. size_t offset;
  180. int cnt;
  181. re_set_syntax ((RE_SYNTAX_POSIX_BASIC & ~RE_DOT_NEWLINE)
  182. | RE_HAT_LISTS_NOT_NEWLINE
  183. | (icase ? RE_ICASE : 0));
  184. memset (&re, 0, sizeof (re));
  185. re.fastmap = malloc (256);
  186. if (re.fastmap == NULL)
  187. error (EXIT_FAILURE, errno, "cannot allocate fastmap");
  188. err = re_compile_pattern (expr, strlen (expr), &re);
  189. if (err != NULL)
  190. error (EXIT_FAILURE, 0, "cannot compile expression: %s", err);
  191. if (re_compile_fastmap (&re))
  192. error (EXIT_FAILURE, 0, "couldn't compile fastmap");
  193. cnt = 0;
  194. offset = memlen;
  195. assert (mem[memlen] == '\0');
  196. while (offset <= memlen)
  197. {
  198. int start;
  199. const char *sp;
  200. const char *ep;
  201. start = re_search (&re, mem, memlen, offset, -offset, NULL);
  202. if (start == -1)
  203. break;
  204. if (start == -2)
  205. error (EXIT_FAILURE, 0, "internal error in re_search");
  206. sp = mem + start;
  207. while (sp > mem && sp[-1] != '\n')
  208. --sp;
  209. ep = mem + start;
  210. while (*ep != '\0' && *ep != '\n')
  211. ++ep;
  212. printf ("match %d: \"%.*s\"\n", ++cnt, (int) (ep - sp), sp);
  213. offset = sp - 1 - mem;
  214. }
  215. regfree (&re);
  216. /* Return an error if the number of matches found is not match we
  217. expect. */
  218. return cnt != expected;
  219. }