/usr.bin/col/col.c

https://bitbucket.org/freebsd/freebsd-head/ · C · 548 lines · 429 code · 51 blank · 68 comment · 97 complexity · 1ca122d2b724f4f9c9737c2e86e8f261 MD5 · raw file

  1. /*-
  2. * Copyright (c) 1990, 1993, 1994
  3. * The Regents of the University of California. All rights reserved.
  4. *
  5. * This code is derived from software contributed to Berkeley by
  6. * Michael Rendell of the Memorial University of Newfoundland.
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions
  10. * are met:
  11. * 1. Redistributions of source code must retain the above copyright
  12. * notice, this list of conditions and the following disclaimer.
  13. * 2. Redistributions in binary form must reproduce the above copyright
  14. * notice, this list of conditions and the following disclaimer in the
  15. * documentation and/or other materials provided with the distribution.
  16. * 4. Neither the name of the University nor the names of its contributors
  17. * may be used to endorse or promote products derived from this software
  18. * without specific prior written permission.
  19. *
  20. * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  21. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  22. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  23. * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  24. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  25. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  26. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  28. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  29. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  30. * SUCH DAMAGE.
  31. */
  32. #ifndef lint
  33. static const char copyright[] =
  34. "@(#) Copyright (c) 1990, 1993, 1994\n\
  35. The Regents of the University of California. All rights reserved.\n";
  36. #endif
  37. #if 0
  38. #ifndef lint
  39. static char sccsid[] = "@(#)col.c 8.5 (Berkeley) 5/4/95";
  40. #endif
  41. #endif
  42. #include <sys/cdefs.h>
  43. __FBSDID("$FreeBSD$");
  44. #include <err.h>
  45. #include <locale.h>
  46. #include <stdio.h>
  47. #include <stdlib.h>
  48. #include <string.h>
  49. #include <unistd.h>
  50. #include <wchar.h>
  51. #include <wctype.h>
  52. #define BS '\b' /* backspace */
  53. #define TAB '\t' /* tab */
  54. #define SPACE ' ' /* space */
  55. #define NL '\n' /* newline */
  56. #define CR '\r' /* carriage return */
  57. #define ESC '\033' /* escape */
  58. #define SI '\017' /* shift in to normal character set */
  59. #define SO '\016' /* shift out to alternate character set */
  60. #define VT '\013' /* vertical tab (aka reverse line feed) */
  61. #define RLF '\007' /* ESC-07 reverse line feed */
  62. #define RHLF '\010' /* ESC-010 reverse half-line feed */
  63. #define FHLF '\011' /* ESC-011 forward half-line feed */
  64. /* build up at least this many lines before flushing them out */
  65. #define BUFFER_MARGIN 32
  66. typedef char CSET;
  67. typedef struct char_str {
  68. #define CS_NORMAL 1
  69. #define CS_ALTERNATE 2
  70. short c_column; /* column character is in */
  71. CSET c_set; /* character set (currently only 2) */
  72. wchar_t c_char; /* character in question */
  73. int c_width; /* character width */
  74. } CHAR;
  75. typedef struct line_str LINE;
  76. struct line_str {
  77. CHAR *l_line; /* characters on the line */
  78. LINE *l_prev; /* previous line */
  79. LINE *l_next; /* next line */
  80. int l_lsize; /* allocated sizeof l_line */
  81. int l_line_len; /* strlen(l_line) */
  82. int l_needs_sort; /* set if chars went in out of order */
  83. int l_max_col; /* max column in the line */
  84. };
  85. static LINE *alloc_line(void);
  86. static void dowarn(int);
  87. static void flush_line(LINE *);
  88. static void flush_lines(int);
  89. static void flush_blanks(void);
  90. static void free_line(LINE *);
  91. static void usage(void);
  92. static CSET last_set; /* char_set of last char printed */
  93. static LINE *lines;
  94. static int compress_spaces; /* if doing space -> tab conversion */
  95. static int fine; /* if `fine' resolution (half lines) */
  96. static int max_bufd_lines; /* max # lines to keep in memory */
  97. static int nblank_lines; /* # blanks after last flushed line */
  98. static int no_backspaces; /* if not to output any backspaces */
  99. static int pass_unknown_seqs; /* pass unknown control sequences */
  100. #define PUTC(ch) \
  101. do { \
  102. if (putwchar(ch) == WEOF) \
  103. errx(1, "write error"); \
  104. } while (0)
  105. int
  106. main(int argc, char **argv)
  107. {
  108. wint_t ch;
  109. CHAR *c;
  110. CSET cur_set; /* current character set */
  111. LINE *l; /* current line */
  112. int extra_lines; /* # of lines above first line */
  113. int cur_col; /* current column */
  114. int cur_line; /* line number of current position */
  115. int max_line; /* max value of cur_line */
  116. int this_line; /* line l points to */
  117. int nflushd_lines; /* number of lines that were flushed */
  118. int adjust, opt, warned, width;
  119. (void)setlocale(LC_CTYPE, "");
  120. max_bufd_lines = 128;
  121. compress_spaces = 1; /* compress spaces into tabs */
  122. while ((opt = getopt(argc, argv, "bfhl:px")) != -1)
  123. switch (opt) {
  124. case 'b': /* do not output backspaces */
  125. no_backspaces = 1;
  126. break;
  127. case 'f': /* allow half forward line feeds */
  128. fine = 1;
  129. break;
  130. case 'h': /* compress spaces into tabs */
  131. compress_spaces = 1;
  132. break;
  133. case 'l': /* buffered line count */
  134. if ((max_bufd_lines = atoi(optarg)) <= 0)
  135. errx(1, "bad -l argument %s", optarg);
  136. break;
  137. case 'p': /* pass unknown control sequences */
  138. pass_unknown_seqs = 1;
  139. break;
  140. case 'x': /* do not compress spaces into tabs */
  141. compress_spaces = 0;
  142. break;
  143. case '?':
  144. default:
  145. usage();
  146. }
  147. if (optind != argc)
  148. usage();
  149. /* this value is in half lines */
  150. max_bufd_lines *= 2;
  151. adjust = cur_col = extra_lines = warned = 0;
  152. cur_line = max_line = nflushd_lines = this_line = 0;
  153. cur_set = last_set = CS_NORMAL;
  154. lines = l = alloc_line();
  155. while ((ch = getwchar()) != WEOF) {
  156. if (!iswgraph(ch)) {
  157. switch (ch) {
  158. case BS: /* can't go back further */
  159. if (cur_col == 0)
  160. continue;
  161. --cur_col;
  162. continue;
  163. case CR:
  164. cur_col = 0;
  165. continue;
  166. case ESC: /* just ignore EOF */
  167. switch(getwchar()) {
  168. case RLF:
  169. cur_line -= 2;
  170. break;
  171. case RHLF:
  172. cur_line--;
  173. break;
  174. case FHLF:
  175. cur_line++;
  176. if (cur_line > max_line)
  177. max_line = cur_line;
  178. }
  179. continue;
  180. case NL:
  181. cur_line += 2;
  182. if (cur_line > max_line)
  183. max_line = cur_line;
  184. cur_col = 0;
  185. continue;
  186. case SPACE:
  187. ++cur_col;
  188. continue;
  189. case SI:
  190. cur_set = CS_NORMAL;
  191. continue;
  192. case SO:
  193. cur_set = CS_ALTERNATE;
  194. continue;
  195. case TAB: /* adjust column */
  196. cur_col |= 7;
  197. ++cur_col;
  198. continue;
  199. case VT:
  200. cur_line -= 2;
  201. continue;
  202. }
  203. if (iswspace(ch)) {
  204. if ((width = wcwidth(ch)) > 0)
  205. cur_col += width;
  206. continue;
  207. }
  208. if (!pass_unknown_seqs)
  209. continue;
  210. }
  211. /* Must stuff ch in a line - are we at the right one? */
  212. if (cur_line != this_line - adjust) {
  213. LINE *lnew;
  214. int nmove;
  215. adjust = 0;
  216. nmove = cur_line - this_line;
  217. if (!fine) {
  218. /* round up to next line */
  219. if (cur_line & 1) {
  220. adjust = 1;
  221. nmove++;
  222. }
  223. }
  224. if (nmove < 0) {
  225. for (; nmove < 0 && l->l_prev; nmove++)
  226. l = l->l_prev;
  227. if (nmove) {
  228. if (nflushd_lines == 0) {
  229. /*
  230. * Allow backup past first
  231. * line if nothing has been
  232. * flushed yet.
  233. */
  234. for (; nmove < 0; nmove++) {
  235. lnew = alloc_line();
  236. l->l_prev = lnew;
  237. lnew->l_next = l;
  238. l = lines = lnew;
  239. extra_lines++;
  240. }
  241. } else {
  242. if (!warned++)
  243. dowarn(cur_line);
  244. cur_line -= nmove;
  245. }
  246. }
  247. } else {
  248. /* may need to allocate here */
  249. for (; nmove > 0 && l->l_next; nmove--)
  250. l = l->l_next;
  251. for (; nmove > 0; nmove--) {
  252. lnew = alloc_line();
  253. lnew->l_prev = l;
  254. l->l_next = lnew;
  255. l = lnew;
  256. }
  257. }
  258. this_line = cur_line + adjust;
  259. nmove = this_line - nflushd_lines;
  260. if (nmove >= max_bufd_lines + BUFFER_MARGIN) {
  261. nflushd_lines += nmove - max_bufd_lines;
  262. flush_lines(nmove - max_bufd_lines);
  263. }
  264. }
  265. /* grow line's buffer? */
  266. if (l->l_line_len + 1 >= l->l_lsize) {
  267. int need;
  268. need = l->l_lsize ? l->l_lsize * 2 : 90;
  269. if ((l->l_line = realloc(l->l_line,
  270. (unsigned)need * sizeof(CHAR))) == NULL)
  271. err(1, (char *)NULL);
  272. l->l_lsize = need;
  273. }
  274. c = &l->l_line[l->l_line_len++];
  275. c->c_char = ch;
  276. c->c_set = cur_set;
  277. c->c_column = cur_col;
  278. c->c_width = wcwidth(ch);
  279. /*
  280. * If things are put in out of order, they will need sorting
  281. * when it is flushed.
  282. */
  283. if (cur_col < l->l_max_col)
  284. l->l_needs_sort = 1;
  285. else
  286. l->l_max_col = cur_col;
  287. if (c->c_width > 0)
  288. cur_col += c->c_width;
  289. }
  290. if (ferror(stdin))
  291. err(1, NULL);
  292. if (max_line == 0)
  293. exit(0); /* no lines, so just exit */
  294. /* goto the last line that had a character on it */
  295. for (; l->l_next; l = l->l_next)
  296. this_line++;
  297. flush_lines(this_line - nflushd_lines + extra_lines + 1);
  298. /* make sure we leave things in a sane state */
  299. if (last_set != CS_NORMAL)
  300. PUTC('\017');
  301. /* flush out the last few blank lines */
  302. nblank_lines = max_line - this_line;
  303. if (max_line & 1)
  304. nblank_lines++;
  305. else if (!nblank_lines)
  306. /* missing a \n on the last line? */
  307. nblank_lines = 2;
  308. flush_blanks();
  309. exit(0);
  310. }
  311. static void
  312. flush_lines(int nflush)
  313. {
  314. LINE *l;
  315. while (--nflush >= 0) {
  316. l = lines;
  317. lines = l->l_next;
  318. if (l->l_line) {
  319. flush_blanks();
  320. flush_line(l);
  321. }
  322. nblank_lines++;
  323. if (l->l_line)
  324. (void)free(l->l_line);
  325. free_line(l);
  326. }
  327. if (lines)
  328. lines->l_prev = NULL;
  329. }
  330. /*
  331. * Print a number of newline/half newlines. If fine flag is set, nblank_lines
  332. * is the number of half line feeds, otherwise it is the number of whole line
  333. * feeds.
  334. */
  335. static void
  336. flush_blanks(void)
  337. {
  338. int half, i, nb;
  339. half = 0;
  340. nb = nblank_lines;
  341. if (nb & 1) {
  342. if (fine)
  343. half = 1;
  344. else
  345. nb++;
  346. }
  347. nb /= 2;
  348. for (i = nb; --i >= 0;)
  349. PUTC('\n');
  350. if (half) {
  351. PUTC('\033');
  352. PUTC('9');
  353. if (!nb)
  354. PUTC('\r');
  355. }
  356. nblank_lines = 0;
  357. }
  358. /*
  359. * Write a line to stdout taking care of space to tab conversion (-h flag)
  360. * and character set shifts.
  361. */
  362. static void
  363. flush_line(LINE *l)
  364. {
  365. CHAR *c, *endc;
  366. int i, j, nchars, last_col, save, this_col, tot;
  367. last_col = 0;
  368. nchars = l->l_line_len;
  369. if (l->l_needs_sort) {
  370. static CHAR *sorted;
  371. static int count_size, *count, sorted_size;
  372. /*
  373. * Do an O(n) sort on l->l_line by column being careful to
  374. * preserve the order of characters in the same column.
  375. */
  376. if (l->l_lsize > sorted_size) {
  377. sorted_size = l->l_lsize;
  378. if ((sorted = realloc(sorted,
  379. (unsigned)sizeof(CHAR) * sorted_size)) == NULL)
  380. err(1, (char *)NULL);
  381. }
  382. if (l->l_max_col >= count_size) {
  383. count_size = l->l_max_col + 1;
  384. if ((count = realloc(count,
  385. (unsigned)sizeof(int) * count_size)) == NULL)
  386. err(1, (char *)NULL);
  387. }
  388. memset(count, 0, sizeof(int) * l->l_max_col + 1);
  389. for (i = nchars, c = l->l_line; --i >= 0; c++)
  390. count[c->c_column]++;
  391. /*
  392. * calculate running total (shifted down by 1) to use as
  393. * indices into new line.
  394. */
  395. for (tot = 0, i = 0; i <= l->l_max_col; i++) {
  396. save = count[i];
  397. count[i] = tot;
  398. tot += save;
  399. }
  400. for (i = nchars, c = l->l_line; --i >= 0; c++)
  401. sorted[count[c->c_column]++] = *c;
  402. c = sorted;
  403. } else
  404. c = l->l_line;
  405. while (nchars > 0) {
  406. this_col = c->c_column;
  407. endc = c;
  408. do {
  409. ++endc;
  410. } while (--nchars > 0 && this_col == endc->c_column);
  411. /* if -b only print last character */
  412. if (no_backspaces) {
  413. c = endc - 1;
  414. if (nchars > 0 &&
  415. this_col + c->c_width > endc->c_column)
  416. continue;
  417. }
  418. if (this_col > last_col) {
  419. int nspace = this_col - last_col;
  420. if (compress_spaces && nspace > 1) {
  421. while (1) {
  422. int tab_col, tab_size;
  423. tab_col = (last_col + 8) & ~7;
  424. if (tab_col > this_col)
  425. break;
  426. tab_size = tab_col - last_col;
  427. if (tab_size == 1)
  428. PUTC(' ');
  429. else
  430. PUTC('\t');
  431. nspace -= tab_size;
  432. last_col = tab_col;
  433. }
  434. }
  435. while (--nspace >= 0)
  436. PUTC(' ');
  437. last_col = this_col;
  438. }
  439. for (;;) {
  440. if (c->c_set != last_set) {
  441. switch (c->c_set) {
  442. case CS_NORMAL:
  443. PUTC('\017');
  444. break;
  445. case CS_ALTERNATE:
  446. PUTC('\016');
  447. }
  448. last_set = c->c_set;
  449. }
  450. PUTC(c->c_char);
  451. if ((c + 1) < endc)
  452. for (j = 0; j < c->c_width; j++)
  453. PUTC('\b');
  454. if (++c >= endc)
  455. break;
  456. }
  457. last_col += (c - 1)->c_width;
  458. }
  459. }
  460. #define NALLOC 64
  461. static LINE *line_freelist;
  462. static LINE *
  463. alloc_line(void)
  464. {
  465. LINE *l;
  466. int i;
  467. if (!line_freelist) {
  468. if ((l = realloc(NULL, sizeof(LINE) * NALLOC)) == NULL)
  469. err(1, (char *)NULL);
  470. line_freelist = l;
  471. for (i = 1; i < NALLOC; i++, l++)
  472. l->l_next = l + 1;
  473. l->l_next = NULL;
  474. }
  475. l = line_freelist;
  476. line_freelist = l->l_next;
  477. memset(l, 0, sizeof(LINE));
  478. return (l);
  479. }
  480. static void
  481. free_line(LINE *l)
  482. {
  483. l->l_next = line_freelist;
  484. line_freelist = l;
  485. }
  486. static void
  487. usage(void)
  488. {
  489. (void)fprintf(stderr, "usage: col [-bfhpx] [-l nline]\n");
  490. exit(1);
  491. }
  492. static void
  493. dowarn(int line)
  494. {
  495. warnx("warning: can't back up %s",
  496. line < 0 ? "past first line" : "-- line already flushed");
  497. }