PageRenderTime 36ms CodeModel.GetById 8ms RepoModel.GetById 0ms app.codeStats 0ms

/junkcode/tinkertim@gmail.com-grawk/grawk.c

http://github.com/rustyrussell/ccan
C | 600 lines | 438 code | 59 blank | 103 comment | 98 complexity | 1bae13728bf6d7cf68fc1ca01658f1d6 MD5 | raw file
Possible License(s): Apache-2.0, GPL-3.0, BSD-3-Clause, LGPL-3.0, GPL-2.0, LGPL-2.1, CC0-1.0
  1. /* Copyright (c) 2008, Tim Post <tinkertim@gmail.com>
  2. * All rights reserved.
  3. *
  4. * Redistribution and use in source and binary forms, with or without
  5. * modification, are permitted provided that the following conditions are met:
  6. *
  7. * Redistributions of source code must retain the above copyright notice, this
  8. * list of conditions and the following disclaimer.
  9. *
  10. * Redistributions in binary form must reproduce the above copyright notice,
  11. * this list of conditions and the following disclaimer in the documentation
  12. * and/or other materials provided with the distribution.
  13. *
  14. * Neither the name of the original program's authors nor the names of its
  15. * contributors may be used to endorse or promote products derived from this
  16. * software without specific prior written permission.
  17. *
  18. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  19. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  20. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  21. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  22. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  25. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  26. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  27. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  28. * POSSIBILITY OF SUCH DAMAGE.
  29. */
  30. /* Some example usages:
  31. * grawk shutdown '$5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15' messages
  32. * grawk shutdown '$5, $6, $7, $8, $9, $10, " -- " $1, $2, $3' messages
  33. * grawk dhclient '$1, $2 " \"$$\"-- " $3' syslog
  34. * cat syslog | grawk dhclient '$0'
  35. * cat myservice.log | grawk -F , error '$3'
  36. *
  37. * Contributors:
  38. * Tim Post, Nicholas Clements, Alex Karlov
  39. * We hope that you find this useful! */
  40. /* FIXME:
  41. * readline() should probably be renamed
  42. */
  43. /* TODO:
  44. * Add a tail -f like behavior that applies expressions and fields
  45. * Recursive (like grep -r) or at least honor symlinks ? */
  46. #include <stdio.h>
  47. #include <stdlib.h>
  48. #include <string.h>
  49. #include <getopt.h>
  50. #include <sys/types.h>
  51. #include <sys/stat.h>
  52. #include <regex.h>
  53. #define VERSION "1.0.7"
  54. #define MAINTAINER "Tim Post <echo@echoreply.us>"
  55. /* Storage structure to hold awk-style pattern */
  56. struct awk_pattern
  57. {
  58. int maxfield; /* Maximum field number for $# fields */
  59. int numfields; /* Number of awk pattern fields */
  60. char **fields; /* The awk pattern fields */
  61. };
  62. typedef struct awk_pattern awk_pat_t;
  63. /* Option arguments */
  64. static struct option const long_options[] = {
  65. { "ignore-case", no_argument, 0, 'i' },
  66. { "with-filename", no_argument, 0, 'W' },
  67. { "no-filename", no_argument, 0, 'w' },
  68. { "line-number", no_argument, 0, 'n' },
  69. { "field-separator", required_argument, 0, 'F' },
  70. { "help", no_argument, 0, 'h' },
  71. { "version", no_argument, 0, 'v' },
  72. { 0, 0, 0, 0}
  73. };
  74. /* The official name of the program */
  75. const char *progname = "grawk";
  76. /* Global for delimiters used in tokenizing strings */
  77. char *tokdelim = NULL;
  78. /* Prototypes */
  79. static void usage(void);
  80. static int process(FILE *, regex_t, awk_pat_t, char *, int);
  81. static int process_line(char *, awk_pat_t, char *, char *);
  82. static int process_files(int, char **, regex_t, awk_pat_t, int, int);
  83. static int process_pipe(regex_t, awk_pat_t, int);
  84. static int awkcomp(awk_pat_t *, char *);
  85. static void awkfree(awk_pat_t *);
  86. static char *readline(FILE *);
  87. static void usage(void)
  88. {
  89. printf("%s %s\n", progname, VERSION);
  90. printf("Usage: %s [OPTION] PATTERN OUTPUT_PATTERN file1 [file2]...\n",
  91. progname);
  92. printf("Options:\n");
  93. printf(" --help "
  94. "show help and examples\n");
  95. printf(" -i, --ignore-case "
  96. "ignore case distinctions\n");
  97. printf(" -W, --with-filename "
  98. "Print filename for each match\n");
  99. printf(" -w, --no-filename "
  100. "Never print filename for each match\n");
  101. printf(" -n, --line-number "
  102. "Prefix each line of output with line number.\n");
  103. printf(" -F fs, --field-separator=fs "
  104. "Use fs as the field separator\n");
  105. printf(" -h, --help "
  106. "Print a brief help summary\n");
  107. printf(" -v, --version "
  108. "Print version information and exit normally\n");
  109. printf(" PATTERN "
  110. "a basic regular expression\n");
  111. printf(" OUTPUT_PATTERN "
  112. "awk-style print statement; defines "
  113. "output fields\n");
  114. printf("\nExamples:\n");
  115. printf(" Retrieve joe123's home directory from /etc/passwd:\n");
  116. printf("\t%s -F : \"joe123\" '$6' /etc/passwd\n", progname);
  117. printf("\n Find fields 2 3 and 4 on lines that begin with @ from stdin:\n");
  118. printf("\tcat file.txt | %s \"^@\" '$2,$3,$4'\n", progname);
  119. printf("\n Use as a simple grep:\n");
  120. printf("\t%s \"string to find\" '$0' /file.txt\n", progname);
  121. printf("\nReport bugs to %s\n", MAINTAINER);
  122. }
  123. /* readline() - read a line from the file handle.
  124. * Return an allocated string */
  125. static char *readline(FILE *fp)
  126. {
  127. char *str = (char *)NULL;
  128. int ch = 0, len = 256, step = 256, i = 0;
  129. str = (char *)malloc(len);
  130. if (str == NULL)
  131. return str;
  132. while (1) {
  133. ch = fgetc(fp);
  134. if (feof(fp))
  135. break;
  136. if (ch == '\n' || ch == '\r') {
  137. str[i++] = 0;
  138. break;
  139. }
  140. str[i++] = ch;
  141. if (i == len - 2) {
  142. len += step;
  143. str = (char *)realloc(str, len);
  144. if (str == NULL) {
  145. fclose(fp);
  146. return str;
  147. }
  148. }
  149. }
  150. return str;
  151. }
  152. /* process() - this is the actual processing where we compare against a
  153. * previously compiled grep pattern and output based on the awk pattern.
  154. * The file is opened by the calling function. We pass in an empty string
  155. * if we don't want to show the filename. If we want to show the line number,
  156. * the value of show_lineno is 1. If we find a line, return 1. If no line is
  157. * found, return 0. If an error occurs, return -1. */
  158. static int process(FILE *fp, regex_t re, awk_pat_t awk,
  159. char *filename, int show_lineno)
  160. {
  161. char *inbuf = NULL;
  162. char slineno[32];
  163. memset(slineno, 0, sizeof(slineno));
  164. long lineno = 0;
  165. int found = 0;
  166. while (1) {
  167. inbuf = readline(fp);
  168. if (!inbuf)
  169. break;
  170. if (feof(fp))
  171. break;
  172. lineno++;
  173. if (regexec(&re, inbuf, (size_t)0, NULL, 0) == 0) {
  174. found = 1; // Found a line.
  175. if (show_lineno)
  176. sprintf(slineno, "%ld:", lineno);
  177. if (process_line(inbuf, awk, filename, slineno)) {
  178. fprintf (stderr, "Error processing line [%s]\n", inbuf);
  179. free (inbuf);
  180. return -1;
  181. }
  182. }
  183. free (inbuf);
  184. }
  185. if (inbuf)
  186. free(inbuf);
  187. return found;
  188. }
  189. /* process_files() - process one or more files from the command-line.
  190. * If at least one line is found, return 1, else return 0 if no lines
  191. * were found or an error occurs. */
  192. static int process_files(int numfiles, char **files, regex_t re, awk_pat_t awk,
  193. int show_filename, int show_lineno)
  194. {
  195. int i, found = 0;
  196. FILE *fp = NULL;
  197. struct stat fstat;
  198. char filename[1024];
  199. memset(filename, 0, sizeof(filename));
  200. for(i = 0; i < numfiles; i++) {
  201. if (stat(files[i], &fstat) == -1) {
  202. /* Did a file get deleted from the time we started running? */
  203. fprintf (stderr,
  204. "Error accessing file %s. No such file\n", files[i]);
  205. continue;
  206. }
  207. if (show_filename)
  208. sprintf( filename, "%s:", files[i] );
  209. /* For now, we aren't recursive. Perhaps allow symlinks? */
  210. if ((fstat.st_mode & S_IFMT) != S_IFREG)
  211. continue;
  212. if (NULL == (fp = fopen(files[i], "r"))) {
  213. fprintf(stderr,
  214. "Error opening file %s. Permission denied\n", files[i]);
  215. continue;
  216. }
  217. if (process(fp, re, awk, filename, show_lineno) == 1)
  218. found = 1;
  219. fclose(fp);
  220. }
  221. return found;
  222. }
  223. /* process_pipe() - process input from stdin */
  224. static int process_pipe(regex_t re, awk_pat_t awk, int show_lineno)
  225. {
  226. if (process(stdin, re, awk, "", show_lineno) == 1)
  227. return 1;
  228. return 0;
  229. }
  230. /* process_line() - process the line based on the awk-style pattern and output
  231. * the results. */
  232. static int process_line(char *inbuf, awk_pat_t awk, char *filename, char *lineno)
  233. {
  234. char full_line[3] = { '\1', '0', '\0' };
  235. if (awk.numfields == 1 && strcmp(awk.fields[0], full_line) == 0) {
  236. /* If the caller only wants the whole string, oblige, quickly. */
  237. fprintf (stdout, "%s%s%s\n", filename, lineno, inbuf);
  238. return 0;
  239. }
  240. /* Build an array of fields from the line using strtok()
  241. * TODO: make this re-entrant so that grawk can be spawned as a thread */
  242. char **linefields = (char **)malloc((awk.maxfield + 1) * sizeof(char *));
  243. char *wrkbuf = strdup(inbuf), *tbuf;
  244. int count = 0, n = 1, i;
  245. for (i = 0; i < (awk.maxfield + 1); i++) {
  246. linefields[i] = NULL;
  247. }
  248. tbuf = strtok(wrkbuf, tokdelim);
  249. if(tbuf)
  250. linefields[0] = strdup(tbuf);
  251. while (tbuf != NULL) {
  252. tbuf = strtok(NULL, tokdelim);
  253. if (!tbuf)
  254. break;
  255. count++;
  256. if (count > awk.maxfield)
  257. break;
  258. linefields[count] = strdup(tbuf);
  259. if (!linefields[count]) {
  260. fprintf(stderr, "Could not allocate memory to process file %s\n",
  261. filename);
  262. return -1;
  263. }
  264. }
  265. /* For each field in the awk structure,
  266. * find the field and print it to stdout.*/
  267. fprintf(stdout, "%s%s", filename, lineno); /* if needed */
  268. for (i = 0; i < awk.numfields; i++) {
  269. if (awk.fields[i][0] == '\1') {
  270. n = atoi(&awk.fields[i][1]);
  271. if (n == 0) {
  272. fprintf(stdout, "%s", inbuf);
  273. continue;
  274. }
  275. if (linefields[n-1])
  276. fprintf(stdout, "%s", linefields[n-1]);
  277. continue;
  278. } else
  279. fprintf(stdout, "%s", awk.fields[i]);
  280. }
  281. fprintf(stdout, "\n");
  282. /* Cleanup */
  283. if (wrkbuf)
  284. free(wrkbuf);
  285. for (i = 0; i < count; i++) {
  286. free(linefields[i]);
  287. linefields[i] = (char *) NULL;
  288. }
  289. free(linefields);
  290. linefields = (char **)NULL;
  291. return 0;
  292. }
  293. /* awkcomp() - little awk-style print format compilation routine.
  294. * Returns structure with the apattern broken down into an array for easier
  295. * comparison and printing. Handles string literals as well as fields and
  296. * delimiters. Example: $1,$2 " \$ and \"blah\" " $4
  297. * Returns -1 on error, else 0. */
  298. static int awkcomp(awk_pat_t *awk, char *apattern)
  299. {
  300. awk->maxfield = 0;
  301. awk->numfields = 0;
  302. awk->fields = NULL;
  303. awk->fields = (char **)malloc(sizeof(char *));
  304. int i, num = 0;
  305. char *wrkbuf;
  306. wrkbuf = (char *)malloc(strlen(apattern) + 1);
  307. if (wrkbuf == NULL) {
  308. free(awk);
  309. fprintf(stderr, "Memory allocation error (wrkbuf) in awkcomp()\n");
  310. return -1;
  311. }
  312. int inString = 0, offs = 0;
  313. char ch;
  314. for (i = 0; i < strlen( apattern ); i++) {
  315. ch = apattern[i];
  316. if (inString && ch != '"' && ch != '\\') {
  317. wrkbuf[offs++] = ch;
  318. continue;
  319. }
  320. if (ch == ' ')
  321. continue;
  322. switch (ch) {
  323. /* Handle delimited strings inside of literal strings */
  324. case '\\':
  325. if (inString) {
  326. wrkbuf[offs++] = apattern[++i];
  327. continue;
  328. } else {
  329. /* Unexpected and unconventional escape (can get these
  330. * from improper invocations of sed in a pipe with grawk),
  331. * if sed is used to build the field delimiters */
  332. fprintf(stderr,
  333. "Unexpected character \'\\\' in output format\n");
  334. return -1;
  335. }
  336. break;
  337. /* Beginning or ending of a literal string */
  338. case '"':
  339. inString = !inString;
  340. if (inString)
  341. continue;
  342. break;
  343. /* Handle the awk-like $# field variables */
  344. case '$':
  345. /* We use a non-printable ASCII character to
  346. * delimit the string field values.*/
  347. wrkbuf[offs++] = '\1';
  348. /* We also need the max. field number */
  349. num = 0;
  350. while (1) {
  351. ch = apattern[++i];
  352. /* Not a number, exit this loop */
  353. if (ch < 48 || ch > 57) {
  354. i--;
  355. break;
  356. }
  357. num = (num * 10) + (ch - 48);
  358. wrkbuf[offs++] = ch;
  359. }
  360. if (num > awk->maxfield)
  361. awk->maxfield = num;
  362. /* Incomplete expression, a $ not followed by a number */
  363. if (wrkbuf[1] == 0) {
  364. fprintf(stderr, "Incomplete field descriptor at "
  365. "or near character %d in awk pattern\n", i+1);
  366. return -1;
  367. }
  368. break;
  369. /* Field separator */
  370. case ',':
  371. wrkbuf[offs++] = ' ';
  372. break;
  373. }
  374. /* if wrkbuf has nothing, we've got rubbish. Continue in the hopes
  375. * that something else makes sense. */
  376. if (offs == 0)
  377. continue;
  378. /* End of a field reached, put it into awk->fields */
  379. wrkbuf[offs] = '\0';
  380. awk->fields =
  381. (char **)realloc(awk->fields, (awk->numfields + 1)
  382. * sizeof(char *));
  383. if (!awk->fields ) {
  384. fprintf(stderr,
  385. "Memory allocation error (awk->fields) in awkcomp()\n");
  386. return -1;
  387. }
  388. awk->fields[awk->numfields] = strdup(wrkbuf);
  389. if (!awk->fields[awk->numfields]) {
  390. fprintf(stderr,
  391. "Memory allocation error (awk->fields[%d]) in awkcomp()\n",
  392. awk->numfields);
  393. return -1;
  394. }
  395. memset(wrkbuf, 0, strlen(apattern) + 1);
  396. awk->numfields++;
  397. offs = 0;
  398. }
  399. free(wrkbuf);
  400. if (awk->numfields == 0) {
  401. fprintf(stderr,
  402. "Unable to parse and compile the pattern; no fields found\n");
  403. return -1;
  404. }
  405. return 0;
  406. }
  407. /* awkfree() - free a previously allocated awk_pat structure */
  408. static void awkfree(awk_pat_t *awk )
  409. {
  410. int i;
  411. for (i = 0; i < awk->numfields; i++)
  412. free(awk->fields[i]);
  413. free(awk->fields);
  414. }
  415. int main(int argc, char **argv)
  416. {
  417. char *apattern = NULL, *gpattern = NULL;
  418. char **files = NULL;
  419. int numfiles = 0, i = 0, c = 0;
  420. int ignore_case = 0, no_filename = 0, with_filename = 0, line_number = 0;
  421. if (argc < 3) {
  422. usage();
  423. return EXIT_FAILURE;
  424. }
  425. tokdelim = strdup("\t\r\n ");
  426. while (1) {
  427. int opt_ind = 0;
  428. while (c != -1) {
  429. c = getopt_long(argc, argv, "wWhinF:", long_options, &opt_ind);
  430. switch (c) {
  431. case 'w':
  432. with_filename = 0;
  433. no_filename = 1;
  434. break;
  435. case 'i':
  436. ignore_case = 1;
  437. break;
  438. case 'W':
  439. with_filename = 1;
  440. no_filename = 0;
  441. break;
  442. case 'n':
  443. line_number = 1;
  444. break;
  445. case 'F':
  446. tokdelim = realloc(tokdelim, 3 + strlen(optarg) + 1);
  447. memset(tokdelim, 0, 3 + strlen( optarg ) + 1);
  448. sprintf(tokdelim, "\t\r\n%s", optarg);
  449. break;
  450. case 'h':
  451. usage();
  452. free(tokdelim);
  453. return EXIT_SUCCESS;
  454. break;
  455. case 'v':
  456. printf("%s\n", VERSION);
  457. free(tokdelim);
  458. return EXIT_SUCCESS;
  459. break;
  460. }
  461. }
  462. /* Now we'll grab our patterns and files. */
  463. if ((argc - optind) < 2) {
  464. usage();
  465. free(tokdelim);
  466. return EXIT_FAILURE;
  467. }
  468. /* pattern one will be our "grep" pattern */
  469. gpattern = strdup(argv[optind]);
  470. if (gpattern == NULL) {
  471. fprintf(stderr, "Memory allocation error");
  472. exit(EXIT_FAILURE);
  473. }
  474. optind++;
  475. /* pattern two is our "awk" pattern */
  476. apattern = strdup(argv[optind]);
  477. if(apattern == NULL) {
  478. fprintf(stderr, "Memory allocation error");
  479. exit(EXIT_FAILURE);
  480. }
  481. optind++;
  482. /* Anything that remains is a file or wildcard which should be
  483. * expanded by the calling shell. */
  484. if (optind < argc) {
  485. numfiles = argc - optind;
  486. files = (char **)malloc(sizeof(char *) * (numfiles + 1));
  487. for (i = 0; i < numfiles; i++) {
  488. files[i] = strdup(argv[optind + i]);
  489. }
  490. }
  491. /* If the number of files is greater than 1 then we default to
  492. * showing the filename unless specifically directed against it.*/
  493. if (numfiles > 1 && no_filename == 0)
  494. with_filename = 1;
  495. break;
  496. }
  497. /* Process everything */
  498. regex_t re;
  499. int cflags = 0, rc = 0;
  500. if (ignore_case)
  501. cflags = REG_ICASE;
  502. /* compile the regular expression parser */
  503. if (regcomp(&re, gpattern, cflags)) {
  504. fprintf(stderr,
  505. "Error compiling grep-style pattern [%s]\n", gpattern);
  506. return EXIT_FAILURE;
  507. }
  508. awk_pat_t awk;
  509. if (awkcomp(&awk, apattern))
  510. {
  511. fprintf(stderr,
  512. "Error compiling awk-style pattern [%s]\n", apattern);
  513. return EXIT_FAILURE;
  514. }
  515. if (numfiles > 0) {
  516. if(process_files(
  517. numfiles, files, re, awk, with_filename, line_number) == 0)
  518. rc = 255; // We'll return 255 if no lines were found.
  519. } else {
  520. if(process_pipe(re, awk, line_number) == 0)
  521. rc = 255;
  522. }
  523. /* Destructor */
  524. for (i = 0; i < numfiles; i++) {
  525. if (files[i])
  526. free(files[i]);
  527. }
  528. free(files);
  529. /* Awk pattern */
  530. free(apattern);
  531. /* Grep pattern */
  532. free(gpattern);
  533. /* Grep regex */
  534. regfree(&re);
  535. /* Awk pattern structure */
  536. awkfree(&awk);
  537. /* Token delimiter (might have been freed elsewhere) */
  538. if (tokdelim)
  539. free(tokdelim);
  540. return rc;
  541. }