PageRenderTime 51ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 0ms

/src/freebsd/usr.bin/csplit/csplit.c

https://bitbucket.org/killerpenguinassassins/open_distrib_devel
C | 467 lines | 322 code | 56 blank | 89 comment | 142 complexity | 4e94821cf2c73c578c56443623834806 MD5 | raw file
Possible License(s): CC0-1.0, MIT, LGPL-2.0, LGPL-3.0, WTFPL, GPL-2.0, BSD-2-Clause, AGPL-3.0, CC-BY-SA-3.0, MPL-2.0, JSON, BSD-3-Clause-No-Nuclear-License-2014, LGPL-2.1, CPL-1.0, AGPL-1.0, 0BSD, ISC, Apache-2.0, GPL-3.0, IPL-1.0, MPL-2.0-no-copyleft-exception, BSD-3-Clause
  1. /*-
  2. * Copyright (c) 2002 Tim J. Robbins.
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions
  7. * are met:
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * 2. Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. *
  14. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  15. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  16. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  17. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  18. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  19. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  20. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  21. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  22. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  23. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  24. * SUCH DAMAGE.
  25. */
  26. /*
  27. * csplit -- split files based on context
  28. *
  29. * This utility splits its input into numbered output files by line number
  30. * or by a regular expression. Regular expression matches have an optional
  31. * offset with them, allowing the split to occur a specified number of
  32. * lines before or after the match.
  33. *
  34. * To handle negative offsets, we stop reading when the match occurs and
  35. * store the offset that the file should have been split at, then use
  36. * this output file as input until all the "overflowed" lines have been read.
  37. * The file is then closed and truncated to the correct length.
  38. *
  39. * We assume that the output files can be seeked upon (ie. they cannot be
  40. * symlinks to named pipes or character devices), but make no such
  41. * assumption about the input.
  42. */
  43. #include <sys/cdefs.h>
  44. __FBSDID("$FreeBSD$");
  45. #include <sys/types.h>
  46. #include <ctype.h>
  47. #include <err.h>
  48. #include <errno.h>
  49. #include <limits.h>
  50. #include <locale.h>
  51. #include <regex.h>
  52. #include <signal.h>
  53. #include <stdint.h>
  54. #include <stdio.h>
  55. #include <stdlib.h>
  56. #include <string.h>
  57. #include <unistd.h>
  58. static void cleanup(void);
  59. static void do_lineno(const char *);
  60. static void do_rexp(const char *);
  61. static char *getline(void);
  62. static void handlesig(int);
  63. static FILE *newfile(void);
  64. static void toomuch(FILE *, long);
  65. static void usage(void);
  66. /*
  67. * Command line options
  68. */
  69. static const char *prefix; /* File name prefix */
  70. static long sufflen; /* Number of decimal digits for suffix */
  71. static int sflag; /* Suppress output of file names */
  72. static int kflag; /* Keep output if error occurs */
  73. /*
  74. * Other miscellaneous globals (XXX too many)
  75. */
  76. static long lineno; /* Current line number in input file */
  77. static long reps; /* Number of repetitions for this pattern */
  78. static long nfiles; /* Number of files output so far */
  79. static long maxfiles; /* Maximum number of files we can create */
  80. static char currfile[PATH_MAX]; /* Current output file */
  81. static const char *infn; /* Name of the input file */
  82. static FILE *infile; /* Input file handle */
  83. static FILE *overfile; /* Overflow file for toomuch() */
  84. static off_t truncofs; /* Offset this file should be truncated at */
  85. static int doclean; /* Should cleanup() remove output? */
  86. int
  87. main(int argc, char *argv[])
  88. {
  89. struct sigaction sa;
  90. long i;
  91. int ch;
  92. const char *expr;
  93. char *ep, *p;
  94. FILE *ofp;
  95. setlocale(LC_ALL, "");
  96. kflag = sflag = 0;
  97. prefix = "xx";
  98. sufflen = 2;
  99. while ((ch = getopt(argc, argv, "ksf:n:")) > 0) {
  100. switch (ch) {
  101. case 'f':
  102. prefix = optarg;
  103. break;
  104. case 'k':
  105. kflag = 1;
  106. break;
  107. case 'n':
  108. errno = 0;
  109. sufflen = strtol(optarg, &ep, 10);
  110. if (sufflen <= 0 || *ep != '\0' || errno != 0)
  111. errx(1, "%s: bad suffix length", optarg);
  112. break;
  113. case 's':
  114. sflag = 1;
  115. break;
  116. default:
  117. usage();
  118. /*NOTREACHED*/
  119. }
  120. }
  121. if (sufflen + strlen(prefix) >= PATH_MAX)
  122. errx(1, "name too long");
  123. argc -= optind;
  124. argv += optind;
  125. if ((infn = *argv++) == NULL)
  126. usage();
  127. if (strcmp(infn, "-") == 0) {
  128. infile = stdin;
  129. infn = "stdin";
  130. } else if ((infile = fopen(infn, "r")) == NULL)
  131. err(1, "%s", infn);
  132. if (!kflag) {
  133. doclean = 1;
  134. atexit(cleanup);
  135. sa.sa_flags = 0;
  136. sa.sa_handler = handlesig;
  137. sigemptyset(&sa.sa_mask);
  138. sigaddset(&sa.sa_mask, SIGHUP);
  139. sigaddset(&sa.sa_mask, SIGINT);
  140. sigaddset(&sa.sa_mask, SIGTERM);
  141. sigaction(SIGHUP, &sa, NULL);
  142. sigaction(SIGINT, &sa, NULL);
  143. sigaction(SIGTERM, &sa, NULL);
  144. }
  145. lineno = 0;
  146. nfiles = 0;
  147. truncofs = 0;
  148. overfile = NULL;
  149. /* Ensure 10^sufflen < LONG_MAX. */
  150. for (maxfiles = 1, i = 0; i < sufflen; i++) {
  151. if (maxfiles > LONG_MAX / 10)
  152. errx(1, "%ld: suffix too long (limit %ld)",
  153. sufflen, i);
  154. maxfiles *= 10;
  155. }
  156. /* Create files based on supplied patterns. */
  157. while (nfiles < maxfiles - 1 && (expr = *argv++) != NULL) {
  158. /* Look ahead & see if this pattern has any repetitions. */
  159. if (*argv != NULL && **argv == '{') {
  160. errno = 0;
  161. reps = strtol(*argv + 1, &ep, 10);
  162. if (reps < 0 || *ep != '}' || errno != 0)
  163. errx(1, "%s: bad repetition count", *argv + 1);
  164. argv++;
  165. } else
  166. reps = 0;
  167. if (*expr == '/' || *expr == '%') {
  168. do
  169. do_rexp(expr);
  170. while (reps-- != 0 && nfiles < maxfiles - 1);
  171. } else if (isdigit((unsigned char)*expr))
  172. do_lineno(expr);
  173. else
  174. errx(1, "%s: unrecognised pattern", expr);
  175. }
  176. /* Copy the rest into a new file. */
  177. if (!feof(infile)) {
  178. ofp = newfile();
  179. while ((p = getline()) != NULL && fputs(p, ofp) == 0)
  180. ;
  181. if (!sflag)
  182. printf("%jd\n", (intmax_t)ftello(ofp));
  183. if (fclose(ofp) != 0)
  184. err(1, "%s", currfile);
  185. }
  186. toomuch(NULL, 0);
  187. doclean = 0;
  188. return (0);
  189. }
  190. static void
  191. usage(void)
  192. {
  193. fprintf(stderr,
  194. "usage: csplit [-ks] [-f prefix] [-n number] file args ...\n");
  195. exit(1);
  196. }
  197. static void
  198. handlesig(int sig __unused)
  199. {
  200. const char msg[] = "csplit: caught signal, cleaning up\n";
  201. write(STDERR_FILENO, msg, sizeof(msg) - 1);
  202. cleanup();
  203. _exit(2);
  204. }
  205. /* Create a new output file. */
  206. static FILE *
  207. newfile(void)
  208. {
  209. FILE *fp;
  210. if ((size_t)snprintf(currfile, sizeof(currfile), "%s%0*ld", prefix,
  211. (int)sufflen, nfiles) >= sizeof(currfile))
  212. errc(1, ENAMETOOLONG, NULL);
  213. if ((fp = fopen(currfile, "w+")) == NULL)
  214. err(1, "%s", currfile);
  215. nfiles++;
  216. return (fp);
  217. }
  218. /* Remove partial output, called before exiting. */
  219. static void
  220. cleanup(void)
  221. {
  222. char fnbuf[PATH_MAX];
  223. long i;
  224. if (!doclean)
  225. return;
  226. /*
  227. * NOTE: One cannot portably assume to be able to call snprintf()
  228. * from inside a signal handler. It does, however, appear to be safe
  229. * to do on FreeBSD. The solution to this problem is worse than the
  230. * problem itself.
  231. */
  232. for (i = 0; i < nfiles; i++) {
  233. snprintf(fnbuf, sizeof(fnbuf), "%s%0*ld", prefix,
  234. (int)sufflen, i);
  235. unlink(fnbuf);
  236. }
  237. }
  238. /* Read a line from the input into a static buffer. */
  239. static char *
  240. getline(void)
  241. {
  242. static char lbuf[LINE_MAX];
  243. FILE *src;
  244. src = overfile != NULL ? overfile : infile;
  245. again: if (fgets(lbuf, sizeof(lbuf), src) == NULL) {
  246. if (src == overfile) {
  247. src = infile;
  248. goto again;
  249. }
  250. return (NULL);
  251. }
  252. if (ferror(src))
  253. err(1, "%s", infn);
  254. lineno++;
  255. return (lbuf);
  256. }
  257. /* Conceptually rewind the input (as obtained by getline()) back `n' lines. */
  258. static void
  259. toomuch(FILE *ofp, long n)
  260. {
  261. char buf[BUFSIZ];
  262. size_t i, nread;
  263. if (overfile != NULL) {
  264. /*
  265. * Truncate the previous file we overflowed into back to
  266. * the correct length, close it.
  267. */
  268. if (fflush(overfile) != 0)
  269. err(1, "overflow");
  270. if (ftruncate(fileno(overfile), truncofs) != 0)
  271. err(1, "overflow");
  272. if (fclose(overfile) != 0)
  273. err(1, "overflow");
  274. overfile = NULL;
  275. }
  276. if (n == 0)
  277. /* Just tidying up */
  278. return;
  279. lineno -= n;
  280. /*
  281. * Wind the overflow file backwards to `n' lines before the
  282. * current one.
  283. */
  284. do {
  285. if (ftello(ofp) < (off_t)sizeof(buf))
  286. rewind(ofp);
  287. else
  288. fseeko(ofp, -(off_t)sizeof(buf), SEEK_CUR);
  289. if (ferror(ofp))
  290. errx(1, "%s: can't seek", currfile);
  291. if ((nread = fread(buf, 1, sizeof(buf), ofp)) == 0)
  292. errx(1, "can't read overflowed output");
  293. if (fseeko(ofp, -(off_t)nread, SEEK_CUR) != 0)
  294. err(1, "%s", currfile);
  295. for (i = 1; i <= nread; i++)
  296. if (buf[nread - i] == '\n' && n-- == 0)
  297. break;
  298. if (ftello(ofp) == 0)
  299. break;
  300. } while (n > 0);
  301. if (fseeko(ofp, nread - i + 1, SEEK_CUR) != 0)
  302. err(1, "%s", currfile);
  303. /*
  304. * getline() will read from here. Next call will truncate to
  305. * truncofs in this file.
  306. */
  307. overfile = ofp;
  308. truncofs = ftello(overfile);
  309. }
  310. /* Handle splits for /regexp/ and %regexp% patterns. */
  311. static void
  312. do_rexp(const char *expr)
  313. {
  314. regex_t cre;
  315. intmax_t nwritten;
  316. long ofs;
  317. int first;
  318. char *ecopy, *ep, *p, *pofs, *re;
  319. FILE *ofp;
  320. if ((ecopy = strdup(expr)) == NULL)
  321. err(1, "strdup");
  322. re = ecopy + 1;
  323. if ((pofs = strrchr(ecopy, *expr)) == NULL || pofs[-1] == '\\')
  324. errx(1, "%s: missing trailing %c", expr, *expr);
  325. *pofs++ = '\0';
  326. if (*pofs != '\0') {
  327. errno = 0;
  328. ofs = strtol(pofs, &ep, 10);
  329. if (*ep != '\0' || errno != 0)
  330. errx(1, "%s: bad offset", pofs);
  331. } else
  332. ofs = 0;
  333. if (regcomp(&cre, re, REG_BASIC|REG_NOSUB) != 0)
  334. errx(1, "%s: bad regular expression", re);
  335. if (*expr == '/')
  336. /* /regexp/: Save results to a file. */
  337. ofp = newfile();
  338. else {
  339. /* %regexp%: Make a temporary file for overflow. */
  340. if ((ofp = tmpfile()) == NULL)
  341. err(1, "tmpfile");
  342. }
  343. /* Read and output lines until we get a match. */
  344. first = 1;
  345. while ((p = getline()) != NULL) {
  346. if (fputs(p, ofp) != 0)
  347. break;
  348. if (!first && regexec(&cre, p, 0, NULL, 0) == 0)
  349. break;
  350. first = 0;
  351. }
  352. if (p == NULL)
  353. errx(1, "%s: no match", re);
  354. if (ofs <= 0) {
  355. /*
  356. * Negative (or zero) offset: throw back any lines we should
  357. * not have read yet.
  358. */
  359. if (p != NULL) {
  360. toomuch(ofp, -ofs + 1);
  361. nwritten = (intmax_t)truncofs;
  362. } else
  363. nwritten = (intmax_t)ftello(ofp);
  364. } else {
  365. /*
  366. * Positive offset: copy the requested number of lines
  367. * after the match.
  368. */
  369. while (--ofs > 0 && (p = getline()) != NULL)
  370. fputs(p, ofp);
  371. toomuch(NULL, 0);
  372. nwritten = (intmax_t)ftello(ofp);
  373. if (fclose(ofp) != 0)
  374. err(1, "%s", currfile);
  375. }
  376. if (!sflag && *expr == '/')
  377. printf("%jd\n", nwritten);
  378. regfree(&cre);
  379. free(ecopy);
  380. }
  381. /* Handle splits based on line number. */
  382. static void
  383. do_lineno(const char *expr)
  384. {
  385. long lastline, tgtline;
  386. char *ep, *p;
  387. FILE *ofp;
  388. errno = 0;
  389. tgtline = strtol(expr, &ep, 10);
  390. if (tgtline <= 0 || errno != 0 || *ep != '\0')
  391. errx(1, "%s: bad line number", expr);
  392. lastline = tgtline;
  393. if (lastline <= lineno)
  394. errx(1, "%s: can't go backwards", expr);
  395. while (nfiles < maxfiles - 1) {
  396. ofp = newfile();
  397. while (lineno + 1 != lastline) {
  398. if ((p = getline()) == NULL)
  399. errx(1, "%ld: out of range", lastline);
  400. if (fputs(p, ofp) != 0)
  401. break;
  402. }
  403. if (!sflag)
  404. printf("%jd\n", (intmax_t)ftello(ofp));
  405. if (fclose(ofp) != 0)
  406. err(1, "%s", currfile);
  407. if (reps-- == 0)
  408. break;
  409. lastline += tgtline;
  410. }
  411. }