PageRenderTime 24ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/contrib/netbsd-tests/lib/libc/regex/split.c

https://bitbucket.org/freebsd/freebsd-base
C | 344 lines | 264 code | 28 blank | 52 comment | 107 complexity | 15617519baa209349ced4e89a0231c0f MD5 | raw file
  1. /* $NetBSD: split.c,v 1.1 2011/01/08 18:10:31 pgoyette Exp $ */
  2. /*-
  3. * Copyright (c) 1993 The NetBSD Foundation, Inc.
  4. * All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. * 1. Redistributions of source code must retain the above copyright
  10. * notice, this list of conditions and the following disclaimer.
  11. * 2. Redistributions in binary form must reproduce the above copyright
  12. * notice, this list of conditions and the following disclaimer in the
  13. * documentation and/or other materials provided with the distribution.
  14. *
  15. * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  16. * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  17. * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  18. * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  19. * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  20. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  21. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  22. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  23. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  24. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  25. * POSSIBILITY OF SUCH DAMAGE.
  26. */
  27. #include <regex.h>
  28. #include <stdio.h>
  29. #include <string.h>
  30. #include "test_regex.h"
  31. /*
  32. * split - divide a string into fields, like awk split()
  33. *
  34. * returns number of fields, including overflow
  35. *
  36. * fields[] list is not NULL-terminated
  37. * nfields number of entries available in fields[]
  38. * sep "" white, "c" single char, "ab" [ab]+
  39. */
  40. int
  41. split(char *string, char *fields[], int nfields, const char *sep)
  42. {
  43. char *p = string;
  44. char c; /* latest character */
  45. char sepc = *sep;
  46. char sepc2;
  47. int fn;
  48. char **fp = fields;
  49. const char *sepp;
  50. int trimtrail;
  51. /* white space */
  52. if (sepc == '\0') {
  53. while ((c = *p++) == ' ' || c == '\t')
  54. continue;
  55. p--;
  56. trimtrail = 1;
  57. sep = " \t"; /* note, code below knows this is 2 long */
  58. sepc = ' ';
  59. } else
  60. trimtrail = 0;
  61. sepc2 = sep[1]; /* now we can safely pick this up */
  62. /* catch empties */
  63. if (*p == '\0')
  64. return(0);
  65. /* single separator */
  66. if (sepc2 == '\0') {
  67. fn = nfields;
  68. for (;;) {
  69. *fp++ = p;
  70. fn--;
  71. if (fn == 0)
  72. break;
  73. while ((c = *p++) != sepc)
  74. if (c == '\0')
  75. return(nfields - fn);
  76. *(p-1) = '\0';
  77. }
  78. /* we have overflowed the fields vector -- just count them */
  79. fn = nfields;
  80. for (;;) {
  81. while ((c = *p++) != sepc)
  82. if (c == '\0')
  83. return(fn);
  84. fn++;
  85. }
  86. /* not reached */
  87. }
  88. /* two separators */
  89. if (sep[2] == '\0') {
  90. fn = nfields;
  91. for (;;) {
  92. *fp++ = p;
  93. fn--;
  94. while ((c = *p++) != sepc && c != sepc2)
  95. if (c == '\0') {
  96. if (trimtrail && **(fp-1) == '\0')
  97. fn++;
  98. return(nfields - fn);
  99. }
  100. if (fn == 0)
  101. break;
  102. *(p-1) = '\0';
  103. while ((c = *p++) == sepc || c == sepc2)
  104. continue;
  105. p--;
  106. }
  107. /* we have overflowed the fields vector -- just count them */
  108. fn = nfields;
  109. while (c != '\0') {
  110. while ((c = *p++) == sepc || c == sepc2)
  111. continue;
  112. p--;
  113. fn++;
  114. while ((c = *p++) != '\0' && c != sepc && c != sepc2)
  115. continue;
  116. }
  117. /* might have to trim trailing white space */
  118. if (trimtrail) {
  119. p--;
  120. while ((c = *--p) == sepc || c == sepc2)
  121. continue;
  122. p++;
  123. if (*p != '\0') {
  124. if (fn == nfields+1)
  125. *p = '\0';
  126. fn--;
  127. }
  128. }
  129. return(fn);
  130. }
  131. /* n separators */
  132. fn = 0;
  133. for (;;) {
  134. if (fn < nfields)
  135. *fp++ = p;
  136. fn++;
  137. for (;;) {
  138. c = *p++;
  139. if (c == '\0')
  140. return(fn);
  141. sepp = sep;
  142. while ((sepc = *sepp++) != '\0' && sepc != c)
  143. continue;
  144. if (sepc != '\0') /* it was a separator */
  145. break;
  146. }
  147. if (fn < nfields)
  148. *(p-1) = '\0';
  149. for (;;) {
  150. c = *p++;
  151. sepp = sep;
  152. while ((sepc = *sepp++) != '\0' && sepc != c)
  153. continue;
  154. if (sepc == '\0') /* it wasn't a separator */
  155. break;
  156. }
  157. p--;
  158. }
  159. /* not reached */
  160. }
  161. #ifdef TEST_SPLIT
  162. /*
  163. * test program
  164. * pgm runs regression
  165. * pgm sep splits stdin lines by sep
  166. * pgm str sep splits str by sep
  167. * pgm str sep n splits str by sep n times
  168. */
  169. int
  170. main(int argc, char *argv[])
  171. {
  172. char buf[512];
  173. int n;
  174. # define MNF 10
  175. char *fields[MNF];
  176. if (argc > 4)
  177. for (n = atoi(argv[3]); n > 0; n--) {
  178. (void) strcpy(buf, argv[1]);
  179. }
  180. else if (argc > 3)
  181. for (n = atoi(argv[3]); n > 0; n--) {
  182. (void) strcpy(buf, argv[1]);
  183. (void) split(buf, fields, MNF, argv[2]);
  184. }
  185. else if (argc > 2)
  186. dosplit(argv[1], argv[2]);
  187. else if (argc > 1)
  188. while (fgets(buf, sizeof(buf), stdin) != NULL) {
  189. buf[strlen(buf)-1] = '\0'; /* stomp newline */
  190. dosplit(buf, argv[1]);
  191. }
  192. else
  193. regress();
  194. exit(0);
  195. }
  196. void
  197. dosplit(char *string, char *seps)
  198. {
  199. # define NF 5
  200. char *fields[NF];
  201. int nf;
  202. nf = split(string, fields, NF, seps);
  203. print(nf, NF, fields);
  204. }
  205. void
  206. print(int nf, int nfp, char *fields)
  207. {
  208. int fn;
  209. int bound;
  210. bound = (nf > nfp) ? nfp : nf;
  211. printf("%d:\t", nf);
  212. for (fn = 0; fn < bound; fn++)
  213. printf("\"%s\"%s", fields[fn], (fn+1 < nf) ? ", " : "\n");
  214. }
  215. #define RNF 5 /* some table entries know this */
  216. struct {
  217. char *str;
  218. char *seps;
  219. int nf;
  220. char *fi[RNF];
  221. } tests[] = {
  222. "", " ", 0, { "" },
  223. " ", " ", 2, { "", "" },
  224. "x", " ", 1, { "x" },
  225. "xy", " ", 1, { "xy" },
  226. "x y", " ", 2, { "x", "y" },
  227. "abc def g ", " ", 5, { "abc", "def", "", "g", "" },
  228. " a bcd", " ", 4, { "", "", "a", "bcd" },
  229. "a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" },
  230. " a b c d ", " ", 6, { "", "a", "b", "c", "d " },
  231. "", " _", 0, { "" },
  232. " ", " _", 2, { "", "" },
  233. "x", " _", 1, { "x" },
  234. "x y", " _", 2, { "x", "y" },
  235. "ab _ cd", " _", 2, { "ab", "cd" },
  236. " a_b c ", " _", 5, { "", "a", "b", "c", "" },
  237. "a b c_d e f", " _", 6, { "a", "b", "c", "d", "e f" },
  238. " a b c d ", " _", 6, { "", "a", "b", "c", "d " },
  239. "", " _~", 0, { "" },
  240. " ", " _~", 2, { "", "" },
  241. "x", " _~", 1, { "x" },
  242. "x y", " _~", 2, { "x", "y" },
  243. "ab _~ cd", " _~", 2, { "ab", "cd" },
  244. " a_b c~", " _~", 5, { "", "a", "b", "c", "" },
  245. "a b_c d~e f", " _~", 6, { "a", "b", "c", "d", "e f" },
  246. "~a b c d ", " _~", 6, { "", "a", "b", "c", "d " },
  247. "", " _~-", 0, { "" },
  248. " ", " _~-", 2, { "", "" },
  249. "x", " _~-", 1, { "x" },
  250. "x y", " _~-", 2, { "x", "y" },
  251. "ab _~- cd", " _~-", 2, { "ab", "cd" },
  252. " a_b c~", " _~-", 5, { "", "a", "b", "c", "" },
  253. "a b_c-d~e f", " _~-", 6, { "a", "b", "c", "d", "e f" },
  254. "~a-b c d ", " _~-", 6, { "", "a", "b", "c", "d " },
  255. "", " ", 0, { "" },
  256. " ", " ", 2, { "", "" },
  257. "x", " ", 1, { "x" },
  258. "xy", " ", 1, { "xy" },
  259. "x y", " ", 2, { "x", "y" },
  260. "abc def g ", " ", 4, { "abc", "def", "g", "" },
  261. " a bcd", " ", 3, { "", "a", "bcd" },
  262. "a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" },
  263. " a b c d ", " ", 6, { "", "a", "b", "c", "d " },
  264. "", "", 0, { "" },
  265. " ", "", 0, { "" },
  266. "x", "", 1, { "x" },
  267. "xy", "", 1, { "xy" },
  268. "x y", "", 2, { "x", "y" },
  269. "abc def g ", "", 3, { "abc", "def", "g" },
  270. "\t a bcd", "", 2, { "a", "bcd" },
  271. " a \tb\t c ", "", 3, { "a", "b", "c" },
  272. "a b c d e ", "", 5, { "a", "b", "c", "d", "e" },
  273. "a b\tc d e f", "", 6, { "a", "b", "c", "d", "e f" },
  274. " a b c d e f ", "", 6, { "a", "b", "c", "d", "e f " },
  275. NULL, NULL, 0, { NULL },
  276. };
  277. void
  278. regress(void)
  279. {
  280. char buf[512];
  281. int n;
  282. char *fields[RNF+1];
  283. int nf;
  284. int i;
  285. int printit;
  286. char *f;
  287. for (n = 0; tests[n].str != NULL; n++) {
  288. (void) strcpy(buf, tests[n].str);
  289. fields[RNF] = NULL;
  290. nf = split(buf, fields, RNF, tests[n].seps);
  291. printit = 0;
  292. if (nf != tests[n].nf) {
  293. printf("split `%s' by `%s' gave %d fields, not %d\n",
  294. tests[n].str, tests[n].seps, nf, tests[n].nf);
  295. printit = 1;
  296. } else if (fields[RNF] != NULL) {
  297. printf("split() went beyond array end\n");
  298. printit = 1;
  299. } else {
  300. for (i = 0; i < nf && i < RNF; i++) {
  301. f = fields[i];
  302. if (f == NULL)
  303. f = "(NULL)";
  304. if (strcmp(f, tests[n].fi[i]) != 0) {
  305. printf("split `%s' by `%s', field %d is `%s', not `%s'\n",
  306. tests[n].str, tests[n].seps,
  307. i, fields[i], tests[n].fi[i]);
  308. printit = 1;
  309. }
  310. }
  311. }
  312. if (printit)
  313. print(nf, RNF, fields);
  314. }
  315. }
  316. #endif