PageRenderTime 40ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 0ms

/src/freebsd/usr.bin/split/split.c

https://bitbucket.org/killerpenguinassassins/open_distrib_devel
C | 394 lines | 292 code | 42 blank | 60 comment | 128 complexity | 5e8e63f8522455152b11327b1ee2c352 MD5 | raw file
Possible License(s): CC0-1.0, MIT, LGPL-2.0, LGPL-3.0, WTFPL, GPL-2.0, BSD-2-Clause, AGPL-3.0, CC-BY-SA-3.0, MPL-2.0, JSON, BSD-3-Clause-No-Nuclear-License-2014, LGPL-2.1, CPL-1.0, AGPL-1.0, 0BSD, ISC, Apache-2.0, GPL-3.0, IPL-1.0, MPL-2.0-no-copyleft-exception, BSD-3-Clause
  1. /*
  2. * Copyright (c) 1987, 1993, 1994
  3. * The Regents of the University of California. All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions
  7. * are met:
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * 2. Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. * 4. Neither the name of the University nor the names of its contributors
  14. * may be used to endorse or promote products derived from this software
  15. * without specific prior written permission.
  16. *
  17. * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  18. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20. * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  21. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  22. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  23. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  24. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  25. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  26. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  27. * SUCH DAMAGE.
  28. */
  29. #include <sys/cdefs.h>
  30. __FBSDID("$FreeBSD$");
  31. #ifndef lint
  32. static const char copyright[] =
  33. "@(#) Copyright (c) 1987, 1993, 1994\n\
  34. The Regents of the University of California. All rights reserved.\n";
  35. #endif
  36. #ifndef lint
  37. static const char sccsid[] = "@(#)split.c 8.2 (Berkeley) 4/16/94";
  38. #endif
  39. #include <sys/param.h>
  40. #include <sys/types.h>
  41. #include <sys/stat.h>
  42. #include <ctype.h>
  43. #include <err.h>
  44. #include <errno.h>
  45. #include <fcntl.h>
  46. #include <inttypes.h>
  47. #include <limits.h>
  48. #include <locale.h>
  49. #include <stdint.h>
  50. #include <stdio.h>
  51. #include <stdlib.h>
  52. #include <string.h>
  53. #include <unistd.h>
  54. #include <regex.h>
  55. #include <sysexits.h>
  56. #define DEFLINE 1000 /* Default num lines per file. */
  57. static off_t bytecnt; /* Byte count to split on. */
  58. static off_t chunks = 0; /* Chunks count to split into. */
  59. static long numlines; /* Line count to split on. */
  60. static int file_open; /* If a file open. */
  61. static int ifd = -1, ofd = -1; /* Input/output file descriptors. */
  62. static char bfr[MAXBSIZE]; /* I/O buffer. */
  63. static char fname[MAXPATHLEN]; /* File name prefix. */
  64. static regex_t rgx;
  65. static int pflag;
  66. static long sufflen = 2; /* File name suffix length. */
  67. static void newfile(void);
  68. static void split1(void);
  69. static void split2(void);
  70. static void split3(void);
  71. static void usage(void);
  72. int
  73. main(int argc, char **argv)
  74. {
  75. intmax_t bytecnti;
  76. long scale;
  77. int ch;
  78. char *ep, *p;
  79. setlocale(LC_ALL, "");
  80. while ((ch = getopt(argc, argv, "0123456789a:b:l:n:p:")) != -1)
  81. switch (ch) {
  82. case '0': case '1': case '2': case '3': case '4':
  83. case '5': case '6': case '7': case '8': case '9':
  84. /*
  85. * Undocumented kludge: split was originally designed
  86. * to take a number after a dash.
  87. */
  88. if (numlines == 0) {
  89. p = argv[optind - 1];
  90. if (p[0] == '-' && p[1] == ch && !p[2])
  91. numlines = strtol(++p, &ep, 10);
  92. else
  93. numlines =
  94. strtol(argv[optind] + 1, &ep, 10);
  95. if (numlines <= 0 || *ep)
  96. errx(EX_USAGE,
  97. "%s: illegal line count", optarg);
  98. }
  99. break;
  100. case 'a': /* Suffix length */
  101. if ((sufflen = strtol(optarg, &ep, 10)) <= 0 || *ep)
  102. errx(EX_USAGE,
  103. "%s: illegal suffix length", optarg);
  104. break;
  105. case 'b': /* Byte count. */
  106. errno = 0;
  107. if ((bytecnti = strtoimax(optarg, &ep, 10)) <= 0 ||
  108. strchr("kKmMgG", *ep) == NULL || errno != 0)
  109. errx(EX_USAGE,
  110. "%s: illegal byte count", optarg);
  111. if (*ep == 'k' || *ep == 'K')
  112. scale = 1024;
  113. else if (*ep == 'm' || *ep == 'M')
  114. scale = 1024 * 1024;
  115. else if (*ep == 'g' || *ep == 'G')
  116. scale = 1024 * 1024 * 1024;
  117. else
  118. scale = 1;
  119. if (bytecnti > OFF_MAX / scale)
  120. errx(EX_USAGE, "%s: offset too large", optarg);
  121. bytecnt = (off_t)(bytecnti * scale);
  122. break;
  123. case 'l': /* Line count. */
  124. if (numlines != 0)
  125. usage();
  126. if ((numlines = strtol(optarg, &ep, 10)) <= 0 || *ep)
  127. errx(EX_USAGE,
  128. "%s: illegal line count", optarg);
  129. break;
  130. case 'n': /* Chunks. */
  131. if (!isdigit((unsigned char)optarg[0]) ||
  132. (chunks = (size_t)strtoul(optarg, &ep, 10)) == 0 ||
  133. *ep != '\0') {
  134. errx(EX_USAGE, "%s: illegal number of chunks",
  135. optarg);
  136. }
  137. break;
  138. case 'p': /* pattern matching. */
  139. if (regcomp(&rgx, optarg, REG_EXTENDED|REG_NOSUB) != 0)
  140. errx(EX_USAGE, "%s: illegal regexp", optarg);
  141. pflag = 1;
  142. break;
  143. default:
  144. usage();
  145. }
  146. argv += optind;
  147. argc -= optind;
  148. if (*argv != NULL) { /* Input file. */
  149. if (strcmp(*argv, "-") == 0)
  150. ifd = STDIN_FILENO;
  151. else if ((ifd = open(*argv, O_RDONLY, 0)) < 0)
  152. err(EX_NOINPUT, "%s", *argv);
  153. ++argv;
  154. }
  155. if (*argv != NULL) /* File name prefix. */
  156. if (strlcpy(fname, *argv++, sizeof(fname)) >= sizeof(fname))
  157. errx(EX_USAGE, "file name prefix is too long");
  158. if (*argv != NULL)
  159. usage();
  160. if (strlen(fname) + (unsigned long)sufflen >= sizeof(fname))
  161. errx(EX_USAGE, "suffix is too long");
  162. if (pflag && (numlines != 0 || bytecnt != 0 || chunks != 0))
  163. usage();
  164. if (numlines == 0)
  165. numlines = DEFLINE;
  166. else if (bytecnt != 0 || chunks != 0)
  167. usage();
  168. if (bytecnt && chunks)
  169. usage();
  170. if (ifd == -1) /* Stdin by default. */
  171. ifd = 0;
  172. if (bytecnt) {
  173. split1();
  174. exit (0);
  175. } else if (chunks) {
  176. split3();
  177. exit (0);
  178. }
  179. split2();
  180. if (pflag)
  181. regfree(&rgx);
  182. exit(0);
  183. }
  184. /*
  185. * split1 --
  186. * Split the input by bytes.
  187. */
  188. static void
  189. split1(void)
  190. {
  191. off_t bcnt;
  192. char *C;
  193. ssize_t dist, len;
  194. int nfiles;
  195. nfiles = 0;
  196. for (bcnt = 0;;)
  197. switch ((len = read(ifd, bfr, MAXBSIZE))) {
  198. case 0:
  199. exit(0);
  200. case -1:
  201. err(EX_IOERR, "read");
  202. /* NOTREACHED */
  203. default:
  204. if (!file_open) {
  205. if (!chunks || (nfiles < chunks)) {
  206. newfile();
  207. nfiles++;
  208. }
  209. }
  210. if (bcnt + len >= bytecnt) {
  211. dist = bytecnt - bcnt;
  212. if (write(ofd, bfr, dist) != dist)
  213. err(EX_IOERR, "write");
  214. len -= dist;
  215. for (C = bfr + dist; len >= bytecnt;
  216. len -= bytecnt, C += bytecnt) {
  217. if (!chunks || (nfiles < chunks)) {
  218. newfile();
  219. nfiles++;
  220. }
  221. if (write(ofd,
  222. C, bytecnt) != bytecnt)
  223. err(EX_IOERR, "write");
  224. }
  225. if (len != 0) {
  226. if (!chunks || (nfiles < chunks)) {
  227. newfile();
  228. nfiles++;
  229. }
  230. if (write(ofd, C, len) != len)
  231. err(EX_IOERR, "write");
  232. } else
  233. file_open = 0;
  234. bcnt = len;
  235. } else {
  236. bcnt += len;
  237. if (write(ofd, bfr, len) != len)
  238. err(EX_IOERR, "write");
  239. }
  240. }
  241. }
  242. /*
  243. * split2 --
  244. * Split the input by lines.
  245. */
  246. static void
  247. split2(void)
  248. {
  249. long lcnt = 0;
  250. FILE *infp;
  251. /* Stick a stream on top of input file descriptor */
  252. if ((infp = fdopen(ifd, "r")) == NULL)
  253. err(EX_NOINPUT, "fdopen");
  254. /* Process input one line at a time */
  255. while (fgets(bfr, sizeof(bfr), infp) != NULL) {
  256. const int len = strlen(bfr);
  257. /* If line is too long to deal with, just write it out */
  258. if (bfr[len - 1] != '\n')
  259. goto writeit;
  260. /* Check if we need to start a new file */
  261. if (pflag) {
  262. regmatch_t pmatch;
  263. pmatch.rm_so = 0;
  264. pmatch.rm_eo = len - 1;
  265. if (regexec(&rgx, bfr, 0, &pmatch, REG_STARTEND) == 0)
  266. newfile();
  267. } else if (lcnt++ == numlines) {
  268. newfile();
  269. lcnt = 1;
  270. }
  271. writeit:
  272. /* Open output file if needed */
  273. if (!file_open)
  274. newfile();
  275. /* Write out line */
  276. if (write(ofd, bfr, len) != len)
  277. err(EX_IOERR, "write");
  278. }
  279. /* EOF or error? */
  280. if (ferror(infp))
  281. err(EX_IOERR, "read");
  282. else
  283. exit(0);
  284. }
  285. /*
  286. * split3 --
  287. * Split the input into specified number of chunks
  288. */
  289. static void
  290. split3(void)
  291. {
  292. struct stat sb;
  293. if (fstat(ifd, &sb) == -1) {
  294. err(1, "stat");
  295. /* NOTREACHED */
  296. }
  297. if (chunks > sb.st_size) {
  298. errx(1, "can't split into more than %d files",
  299. (int)sb.st_size);
  300. /* NOTREACHED */
  301. }
  302. bytecnt = sb.st_size / chunks;
  303. split1();
  304. }
  305. /*
  306. * newfile --
  307. * Open a new output file.
  308. */
  309. static void
  310. newfile(void)
  311. {
  312. long i, maxfiles, tfnum;
  313. static long fnum;
  314. static char *fpnt;
  315. if (ofd == -1) {
  316. if (fname[0] == '\0') {
  317. fname[0] = 'x';
  318. fpnt = fname + 1;
  319. } else {
  320. fpnt = fname + strlen(fname);
  321. }
  322. ofd = fileno(stdout);
  323. }
  324. /* maxfiles = 26^sufflen, but don't use libm. */
  325. for (maxfiles = 1, i = 0; i < sufflen; i++)
  326. if ((maxfiles *= 26) <= 0)
  327. errx(EX_USAGE, "suffix is too long (max %ld)", i);
  328. if (fnum == maxfiles)
  329. errx(EX_DATAERR, "too many files");
  330. /* Generate suffix of sufflen letters */
  331. tfnum = fnum;
  332. i = sufflen - 1;
  333. do {
  334. fpnt[i] = tfnum % 26 + 'a';
  335. tfnum /= 26;
  336. } while (i-- > 0);
  337. fpnt[sufflen] = '\0';
  338. ++fnum;
  339. if (!freopen(fname, "w", stdout))
  340. err(EX_IOERR, "%s", fname);
  341. file_open = 1;
  342. }
  343. static void
  344. usage(void)
  345. {
  346. (void)fprintf(stderr,
  347. "usage: split [-l line_count] [-a suffix_length] [file [prefix]]\n"
  348. " split -b byte_count[K|k|M|m|G|g] [-a suffix_length] [file [prefix]]\n"
  349. " split -n chunk_count [-a suffix_length] [file [prefix]]\n"
  350. " split -p pattern [-a suffix_length] [file [prefix]]\n");
  351. exit(EX_USAGE);
  352. }