/junkcode/tinkertim@gmail.com-grawk/grawk.c
C | 600 lines | 438 code | 59 blank | 103 comment | 98 complexity | 1bae13728bf6d7cf68fc1ca01658f1d6 MD5 | raw file
Possible License(s): Apache-2.0, GPL-3.0, BSD-3-Clause, LGPL-3.0, GPL-2.0, LGPL-2.1, CC0-1.0
- /* Copyright (c) 2008, Tim Post <tinkertim@gmail.com>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * Neither the name of the original program's authors nor the names of its
- * contributors may be used to endorse or promote products derived from this
- * software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
- /* Some example usages:
- * grawk shutdown '$5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15' messages
- * grawk shutdown '$5, $6, $7, $8, $9, $10, " -- " $1, $2, $3' messages
- * grawk dhclient '$1, $2 " \"$$\"-- " $3' syslog
- * cat syslog | grawk dhclient '$0'
- * cat myservice.log | grawk -F , error '$3'
- *
- * Contributors:
- * Tim Post, Nicholas Clements, Alex Karlov
- * We hope that you find this useful! */
- /* FIXME:
- * readline() should probably be renamed
- */
- /* TODO:
- * Add a tail -f like behavior that applies expressions and fields
- * Recursive (like grep -r) or at least honor symlinks ? */
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <getopt.h>
- #include <sys/types.h>
- #include <sys/stat.h>
- #include <regex.h>
- #define VERSION "1.0.7"
- #define MAINTAINER "Tim Post <echo@echoreply.us>"
- /* Storage structure to hold awk-style pattern */
- struct awk_pattern
- {
- int maxfield; /* Maximum field number for $# fields */
- int numfields; /* Number of awk pattern fields */
- char **fields; /* The awk pattern fields */
- };
- typedef struct awk_pattern awk_pat_t;
- /* Option arguments */
- static struct option const long_options[] = {
- { "ignore-case", no_argument, 0, 'i' },
- { "with-filename", no_argument, 0, 'W' },
- { "no-filename", no_argument, 0, 'w' },
- { "line-number", no_argument, 0, 'n' },
- { "field-separator", required_argument, 0, 'F' },
- { "help", no_argument, 0, 'h' },
- { "version", no_argument, 0, 'v' },
- { 0, 0, 0, 0}
- };
- /* The official name of the program */
- const char *progname = "grawk";
- /* Global for delimiters used in tokenizing strings */
- char *tokdelim = NULL;
- /* Prototypes */
- static void usage(void);
- static int process(FILE *, regex_t, awk_pat_t, char *, int);
- static int process_line(char *, awk_pat_t, char *, char *);
- static int process_files(int, char **, regex_t, awk_pat_t, int, int);
- static int process_pipe(regex_t, awk_pat_t, int);
- static int awkcomp(awk_pat_t *, char *);
- static void awkfree(awk_pat_t *);
- static char *readline(FILE *);
- static void usage(void)
- {
- printf("%s %s\n", progname, VERSION);
- printf("Usage: %s [OPTION] PATTERN OUTPUT_PATTERN file1 [file2]...\n",
- progname);
- printf("Options:\n");
- printf(" --help "
- "show help and examples\n");
- printf(" -i, --ignore-case "
- "ignore case distinctions\n");
- printf(" -W, --with-filename "
- "Print filename for each match\n");
- printf(" -w, --no-filename "
- "Never print filename for each match\n");
- printf(" -n, --line-number "
- "Prefix each line of output with line number.\n");
- printf(" -F fs, --field-separator=fs "
- "Use fs as the field separator\n");
- printf(" -h, --help "
- "Print a brief help summary\n");
- printf(" -v, --version "
- "Print version information and exit normally\n");
- printf(" PATTERN "
- "a basic regular expression\n");
- printf(" OUTPUT_PATTERN "
- "awk-style print statement; defines "
- "output fields\n");
- printf("\nExamples:\n");
- printf(" Retrieve joe123's home directory from /etc/passwd:\n");
- printf("\t%s -F : \"joe123\" '$6' /etc/passwd\n", progname);
- printf("\n Find fields 2 3 and 4 on lines that begin with @ from stdin:\n");
- printf("\tcat file.txt | %s \"^@\" '$2,$3,$4'\n", progname);
- printf("\n Use as a simple grep:\n");
- printf("\t%s \"string to find\" '$0' /file.txt\n", progname);
- printf("\nReport bugs to %s\n", MAINTAINER);
- }
- /* readline() - read a line from the file handle.
- * Return an allocated string */
- static char *readline(FILE *fp)
- {
- char *str = (char *)NULL;
- int ch = 0, len = 256, step = 256, i = 0;
- str = (char *)malloc(len);
- if (str == NULL)
- return str;
- while (1) {
- ch = fgetc(fp);
- if (feof(fp))
- break;
- if (ch == '\n' || ch == '\r') {
- str[i++] = 0;
- break;
- }
- str[i++] = ch;
- if (i == len - 2) {
- len += step;
- str = (char *)realloc(str, len);
- if (str == NULL) {
- fclose(fp);
- return str;
- }
- }
- }
- return str;
- }
- /* process() - this is the actual processing where we compare against a
- * previously compiled grep pattern and output based on the awk pattern.
- * The file is opened by the calling function. We pass in an empty string
- * if we don't want to show the filename. If we want to show the line number,
- * the value of show_lineno is 1. If we find a line, return 1. If no line is
- * found, return 0. If an error occurs, return -1. */
- static int process(FILE *fp, regex_t re, awk_pat_t awk,
- char *filename, int show_lineno)
- {
- char *inbuf = NULL;
- char slineno[32];
- memset(slineno, 0, sizeof(slineno));
- long lineno = 0;
- int found = 0;
- while (1) {
- inbuf = readline(fp);
- if (!inbuf)
- break;
- if (feof(fp))
- break;
- lineno++;
- if (regexec(&re, inbuf, (size_t)0, NULL, 0) == 0) {
- found = 1; // Found a line.
- if (show_lineno)
- sprintf(slineno, "%ld:", lineno);
- if (process_line(inbuf, awk, filename, slineno)) {
- fprintf (stderr, "Error processing line [%s]\n", inbuf);
- free (inbuf);
- return -1;
- }
- }
- free (inbuf);
- }
- if (inbuf)
- free(inbuf);
- return found;
- }
- /* process_files() - process one or more files from the command-line.
- * If at least one line is found, return 1, else return 0 if no lines
- * were found or an error occurs. */
- static int process_files(int numfiles, char **files, regex_t re, awk_pat_t awk,
- int show_filename, int show_lineno)
- {
- int i, found = 0;
- FILE *fp = NULL;
- struct stat fstat;
- char filename[1024];
- memset(filename, 0, sizeof(filename));
- for(i = 0; i < numfiles; i++) {
- if (stat(files[i], &fstat) == -1) {
- /* Did a file get deleted from the time we started running? */
- fprintf (stderr,
- "Error accessing file %s. No such file\n", files[i]);
- continue;
- }
- if (show_filename)
- sprintf( filename, "%s:", files[i] );
- /* For now, we aren't recursive. Perhaps allow symlinks? */
- if ((fstat.st_mode & S_IFMT) != S_IFREG)
- continue;
- if (NULL == (fp = fopen(files[i], "r"))) {
- fprintf(stderr,
- "Error opening file %s. Permission denied\n", files[i]);
- continue;
- }
- if (process(fp, re, awk, filename, show_lineno) == 1)
- found = 1;
- fclose(fp);
- }
- return found;
- }
- /* process_pipe() - process input from stdin */
- static int process_pipe(regex_t re, awk_pat_t awk, int show_lineno)
- {
- if (process(stdin, re, awk, "", show_lineno) == 1)
- return 1;
- return 0;
- }
- /* process_line() - process the line based on the awk-style pattern and output
- * the results. */
- static int process_line(char *inbuf, awk_pat_t awk, char *filename, char *lineno)
- {
- char full_line[3] = { '\1', '0', '\0' };
- if (awk.numfields == 1 && strcmp(awk.fields[0], full_line) == 0) {
- /* If the caller only wants the whole string, oblige, quickly. */
- fprintf (stdout, "%s%s%s\n", filename, lineno, inbuf);
- return 0;
- }
- /* Build an array of fields from the line using strtok()
- * TODO: make this re-entrant so that grawk can be spawned as a thread */
- char **linefields = (char **)malloc((awk.maxfield + 1) * sizeof(char *));
- char *wrkbuf = strdup(inbuf), *tbuf;
- int count = 0, n = 1, i;
- for (i = 0; i < (awk.maxfield + 1); i++) {
- linefields[i] = NULL;
- }
- tbuf = strtok(wrkbuf, tokdelim);
- if(tbuf)
- linefields[0] = strdup(tbuf);
- while (tbuf != NULL) {
- tbuf = strtok(NULL, tokdelim);
- if (!tbuf)
- break;
- count++;
- if (count > awk.maxfield)
- break;
- linefields[count] = strdup(tbuf);
- if (!linefields[count]) {
- fprintf(stderr, "Could not allocate memory to process file %s\n",
- filename);
- return -1;
- }
- }
- /* For each field in the awk structure,
- * find the field and print it to stdout.*/
- fprintf(stdout, "%s%s", filename, lineno); /* if needed */
- for (i = 0; i < awk.numfields; i++) {
- if (awk.fields[i][0] == '\1') {
- n = atoi(&awk.fields[i][1]);
- if (n == 0) {
- fprintf(stdout, "%s", inbuf);
- continue;
- }
- if (linefields[n-1])
- fprintf(stdout, "%s", linefields[n-1]);
- continue;
- } else
- fprintf(stdout, "%s", awk.fields[i]);
- }
- fprintf(stdout, "\n");
- /* Cleanup */
- if (wrkbuf)
- free(wrkbuf);
- for (i = 0; i < count; i++) {
- free(linefields[i]);
- linefields[i] = (char *) NULL;
- }
- free(linefields);
- linefields = (char **)NULL;
- return 0;
- }
- /* awkcomp() - little awk-style print format compilation routine.
- * Returns structure with the apattern broken down into an array for easier
- * comparison and printing. Handles string literals as well as fields and
- * delimiters. Example: $1,$2 " \$ and \"blah\" " $4
- * Returns -1 on error, else 0. */
- static int awkcomp(awk_pat_t *awk, char *apattern)
- {
- awk->maxfield = 0;
- awk->numfields = 0;
- awk->fields = NULL;
- awk->fields = (char **)malloc(sizeof(char *));
- int i, num = 0;
- char *wrkbuf;
- wrkbuf = (char *)malloc(strlen(apattern) + 1);
- if (wrkbuf == NULL) {
- free(awk);
- fprintf(stderr, "Memory allocation error (wrkbuf) in awkcomp()\n");
- return -1;
- }
- int inString = 0, offs = 0;
- char ch;
- for (i = 0; i < strlen( apattern ); i++) {
- ch = apattern[i];
- if (inString && ch != '"' && ch != '\\') {
- wrkbuf[offs++] = ch;
- continue;
- }
- if (ch == ' ')
- continue;
- switch (ch) {
- /* Handle delimited strings inside of literal strings */
- case '\\':
- if (inString) {
- wrkbuf[offs++] = apattern[++i];
- continue;
- } else {
- /* Unexpected and unconventional escape (can get these
- * from improper invocations of sed in a pipe with grawk),
- * if sed is used to build the field delimiters */
- fprintf(stderr,
- "Unexpected character \'\\\' in output format\n");
- return -1;
- }
- break;
- /* Beginning or ending of a literal string */
- case '"':
- inString = !inString;
- if (inString)
- continue;
- break;
- /* Handle the awk-like $# field variables */
- case '$':
- /* We use a non-printable ASCII character to
- * delimit the string field values.*/
- wrkbuf[offs++] = '\1';
- /* We also need the max. field number */
- num = 0;
- while (1) {
- ch = apattern[++i];
- /* Not a number, exit this loop */
- if (ch < 48 || ch > 57) {
- i--;
- break;
- }
- num = (num * 10) + (ch - 48);
- wrkbuf[offs++] = ch;
- }
- if (num > awk->maxfield)
- awk->maxfield = num;
- /* Incomplete expression, a $ not followed by a number */
- if (wrkbuf[1] == 0) {
- fprintf(stderr, "Incomplete field descriptor at "
- "or near character %d in awk pattern\n", i+1);
- return -1;
- }
- break;
- /* Field separator */
- case ',':
- wrkbuf[offs++] = ' ';
- break;
- }
- /* if wrkbuf has nothing, we've got rubbish. Continue in the hopes
- * that something else makes sense. */
- if (offs == 0)
- continue;
- /* End of a field reached, put it into awk->fields */
- wrkbuf[offs] = '\0';
- awk->fields =
- (char **)realloc(awk->fields, (awk->numfields + 1)
- * sizeof(char *));
- if (!awk->fields ) {
- fprintf(stderr,
- "Memory allocation error (awk->fields) in awkcomp()\n");
- return -1;
- }
- awk->fields[awk->numfields] = strdup(wrkbuf);
- if (!awk->fields[awk->numfields]) {
- fprintf(stderr,
- "Memory allocation error (awk->fields[%d]) in awkcomp()\n",
- awk->numfields);
- return -1;
- }
- memset(wrkbuf, 0, strlen(apattern) + 1);
- awk->numfields++;
- offs = 0;
- }
- free(wrkbuf);
- if (awk->numfields == 0) {
- fprintf(stderr,
- "Unable to parse and compile the pattern; no fields found\n");
- return -1;
- }
- return 0;
- }
- /* awkfree() - free a previously allocated awk_pat structure */
- static void awkfree(awk_pat_t *awk )
- {
- int i;
- for (i = 0; i < awk->numfields; i++)
- free(awk->fields[i]);
- free(awk->fields);
- }
- int main(int argc, char **argv)
- {
- char *apattern = NULL, *gpattern = NULL;
- char **files = NULL;
- int numfiles = 0, i = 0, c = 0;
- int ignore_case = 0, no_filename = 0, with_filename = 0, line_number = 0;
- if (argc < 3) {
- usage();
- return EXIT_FAILURE;
- }
- tokdelim = strdup("\t\r\n ");
- while (1) {
- int opt_ind = 0;
- while (c != -1) {
- c = getopt_long(argc, argv, "wWhinF:", long_options, &opt_ind);
- switch (c) {
- case 'w':
- with_filename = 0;
- no_filename = 1;
- break;
- case 'i':
- ignore_case = 1;
- break;
- case 'W':
- with_filename = 1;
- no_filename = 0;
- break;
- case 'n':
- line_number = 1;
- break;
- case 'F':
- tokdelim = realloc(tokdelim, 3 + strlen(optarg) + 1);
- memset(tokdelim, 0, 3 + strlen( optarg ) + 1);
- sprintf(tokdelim, "\t\r\n%s", optarg);
- break;
- case 'h':
- usage();
- free(tokdelim);
- return EXIT_SUCCESS;
- break;
- case 'v':
- printf("%s\n", VERSION);
- free(tokdelim);
- return EXIT_SUCCESS;
- break;
- }
- }
- /* Now we'll grab our patterns and files. */
- if ((argc - optind) < 2) {
- usage();
- free(tokdelim);
- return EXIT_FAILURE;
- }
- /* pattern one will be our "grep" pattern */
- gpattern = strdup(argv[optind]);
- if (gpattern == NULL) {
- fprintf(stderr, "Memory allocation error");
- exit(EXIT_FAILURE);
- }
- optind++;
- /* pattern two is our "awk" pattern */
- apattern = strdup(argv[optind]);
- if(apattern == NULL) {
- fprintf(stderr, "Memory allocation error");
- exit(EXIT_FAILURE);
- }
- optind++;
- /* Anything that remains is a file or wildcard which should be
- * expanded by the calling shell. */
- if (optind < argc) {
- numfiles = argc - optind;
- files = (char **)malloc(sizeof(char *) * (numfiles + 1));
- for (i = 0; i < numfiles; i++) {
- files[i] = strdup(argv[optind + i]);
- }
- }
- /* If the number of files is greater than 1 then we default to
- * showing the filename unless specifically directed against it.*/
- if (numfiles > 1 && no_filename == 0)
- with_filename = 1;
- break;
- }
- /* Process everything */
- regex_t re;
- int cflags = 0, rc = 0;
- if (ignore_case)
- cflags = REG_ICASE;
- /* compile the regular expression parser */
- if (regcomp(&re, gpattern, cflags)) {
- fprintf(stderr,
- "Error compiling grep-style pattern [%s]\n", gpattern);
- return EXIT_FAILURE;
- }
- awk_pat_t awk;
- if (awkcomp(&awk, apattern))
- {
- fprintf(stderr,
- "Error compiling awk-style pattern [%s]\n", apattern);
- return EXIT_FAILURE;
- }
- if (numfiles > 0) {
- if(process_files(
- numfiles, files, re, awk, with_filename, line_number) == 0)
- rc = 255; // We'll return 255 if no lines were found.
- } else {
- if(process_pipe(re, awk, line_number) == 0)
- rc = 255;
- }
- /* Destructor */
- for (i = 0; i < numfiles; i++) {
- if (files[i])
- free(files[i]);
- }
- free(files);
- /* Awk pattern */
- free(apattern);
- /* Grep pattern */
- free(gpattern);
- /* Grep regex */
- regfree(&re);
- /* Awk pattern structure */
- awkfree(&awk);
- /* Token delimiter (might have been freed elsewhere) */
- if (tokdelim)
- free(tokdelim);
- return rc;
- }