PageRenderTime 74ms CodeModel.GetById 15ms app.highlight 49ms RepoModel.GetById 1ms app.codeStats 0ms

/contrib/cvs/diff/diff.c

https://bitbucket.org/freebsd/freebsd-head/
C | 1266 lines | 937 code | 154 blank | 175 comment | 233 complexity | 287c8ea734c89721622e462ecbe70210 MD5 | raw file
   1/* GNU DIFF entry routine.
   2   Copyright (C) 1988, 1989, 1992, 1993, 1994, 1997, 1998 Free Software Foundation, Inc.
   3
   4This file is part of GNU DIFF.
   5
   6GNU DIFF is free software; you can redistribute it and/or modify
   7it under the terms of the GNU General Public License as published by
   8the Free Software Foundation; either version 2, or (at your option)
   9any later version.
  10
  11GNU DIFF is distributed in the hope that it will be useful,
  12but WITHOUT ANY WARRANTY; without even the implied warranty of
  13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14GNU General Public License for more details.
  15
  16*/
  17
  18/* GNU DIFF was written by Mike Haertel, David Hayes,
  19   Richard Stallman, Len Tower, and Paul Eggert.  */
  20
  21#define GDIFF_MAIN
  22#include "diff.h"
  23#include <signal.h>
  24#include "getopt.h"
  25
  26#ifdef HAVE_FNMATCH
  27# include <fnmatch.h> /* This is supposed to be available on Posix systems */
  28#else /* HAVE_FNMATCH */
  29# include "fnmatch.h" /* Our substitute */
  30#endif /* HAVE_FNMATCH */
  31
  32#ifndef DEFAULT_WIDTH
  33#define DEFAULT_WIDTH 130
  34#endif
  35
  36#ifndef GUTTER_WIDTH_MINIMUM
  37#define GUTTER_WIDTH_MINIMUM 3
  38#endif
  39
  40/* diff.c has a real initialize_main function. */
  41#ifdef initialize_main
  42#undef initialize_main
  43#endif
  44
  45static char const *filetype PARAMS((struct stat const *));
  46static char *option_list PARAMS((char **, int));
  47static int add_exclude_file PARAMS((char const *));
  48static int ck_atoi PARAMS((char const *, int *));
  49static int compare_files PARAMS((char const *, char const *, char const *, char const *, int));
  50static int specify_format PARAMS((char **, char *));
  51static void add_exclude PARAMS((char const *));
  52static void add_regexp PARAMS((struct regexp_list **, char const *));
  53static void specify_style PARAMS((enum output_style));
  54static int try_help PARAMS((char const *));
  55static void check_output PARAMS((FILE *));
  56static void usage PARAMS((void));
  57static void initialize_main PARAMS((int *, char ***));
  58
  59/* Nonzero for -r: if comparing two directories,
  60   compare their common subdirectories recursively.  */
  61
  62static int recursive;
  63
  64/* For debugging: don't do discard_confusing_lines.  */
  65
  66int no_discards;
  67
  68#if HAVE_SETMODE
  69/* I/O mode: nonzero only if using binary input/output.  */
  70static int binary_I_O;
  71#endif
  72
  73/* Return a string containing the command options with which diff was invoked.
  74   Spaces appear between what were separate ARGV-elements.
  75   There is a space at the beginning but none at the end.
  76   If there were no options, the result is an empty string.
  77
  78   Arguments: OPTIONVEC, a vector containing separate ARGV-elements, and COUNT,
  79   the length of that vector.  */
  80
  81static char *
  82option_list (optionvec, count)
  83     char **optionvec;  /* Was `vector', but that collides on Alliant.  */
  84     int count;
  85{
  86  int i;
  87  size_t length = 0;
  88  char *result;
  89
  90  for (i = 0; i < count; i++)
  91    length += strlen (optionvec[i]) + 1;
  92
  93  result = xmalloc (length + 1);
  94  result[0] = 0;
  95
  96  for (i = 0; i < count; i++)
  97    {
  98      strcat (result, " ");
  99      strcat (result, optionvec[i]);
 100    }
 101
 102  return result;
 103}
 104
 105/* Convert STR to a positive integer, storing the result in *OUT.
 106   If STR is not a valid integer, return -1 (otherwise 0). */
 107static int
 108ck_atoi (str, out)
 109     char const *str;
 110     int *out;
 111{
 112  char const *p;
 113  for (p = str; *p; p++)
 114    if (*p < '0' || *p > '9')
 115      return -1;
 116
 117  *out = atoi (optarg);
 118  return 0;
 119}
 120
 121/* Keep track of excluded file name patterns.  */
 122
 123static char const **exclude;
 124static int exclude_alloc, exclude_count;
 125
 126int
 127excluded_filename (f)
 128     char const *f;
 129{
 130  int i;
 131  for (i = 0;  i < exclude_count;  i++)
 132    if (fnmatch (exclude[i], f, 0) == 0)
 133      return 1;
 134  return 0;
 135}
 136
 137static void
 138add_exclude (pattern)
 139     char const *pattern;
 140{
 141  if (exclude_alloc <= exclude_count)
 142    exclude = (char const **)
 143	      (exclude_alloc == 0
 144	       ? xmalloc ((exclude_alloc = 64) * sizeof (*exclude))
 145	       : xrealloc (exclude, (exclude_alloc *= 2) * sizeof (*exclude)));
 146
 147  exclude[exclude_count++] = pattern;
 148}
 149
 150static int
 151add_exclude_file (name)
 152     char const *name;
 153{
 154  struct file_data f;
 155  char *p, *q, *lim;
 156
 157  f.name = optarg;
 158  f.desc = (strcmp (optarg, "-") == 0
 159	    ? STDIN_FILENO
 160	    : open (optarg, O_RDONLY, 0));
 161  if (f.desc < 0 || fstat (f.desc, &f.stat) != 0)
 162    return -1;
 163
 164  sip (&f, 1);
 165  slurp (&f);
 166
 167  for (p = f.buffer, lim = p + f.buffered_chars;  p < lim;  p = q)
 168    {
 169      q = (char *) memchr (p, '\n', lim - p);
 170      if (!q)
 171	q = lim;
 172      *q++ = 0;
 173      add_exclude (p);
 174    }
 175
 176  return close (f.desc);
 177}
 178
 179/* The numbers 129- that appear in the fourth element of some entries
 180   tell the big switch in `diff_run' how to process those options.  */
 181
 182static struct option const longopts[] =
 183{
 184  {"ignore-blank-lines", 0, 0, 'B'},
 185  {"context", 2, 0, 'C'},
 186  {"ifdef", 1, 0, 'D'},
 187  {"show-function-line", 1, 0, 'F'},
 188  {"speed-large-files", 0, 0, 'H'},
 189  {"ignore-matching-lines", 1, 0, 'I'},
 190  {"label", 1, 0, 'L'},
 191  {"file-label", 1, 0, 'L'},	/* An alias, no longer recommended */
 192  {"new-file", 0, 0, 'N'},
 193  {"entire-new-file", 0, 0, 'N'},	/* An alias, no longer recommended */
 194  {"unidirectional-new-file", 0, 0, 'P'},
 195  {"starting-file", 1, 0, 'S'},
 196  {"initial-tab", 0, 0, 'T'},
 197  {"width", 1, 0, 'W'},
 198  {"text", 0, 0, 'a'},
 199  {"ascii", 0, 0, 'a'},		/* An alias, no longer recommended */
 200  {"ignore-space-change", 0, 0, 'b'},
 201  {"minimal", 0, 0, 'd'},
 202  {"ed", 0, 0, 'e'},
 203  {"forward-ed", 0, 0, 'f'},
 204  {"ignore-case", 0, 0, 'i'},
 205  {"paginate", 0, 0, 'l'},
 206  {"print", 0, 0, 'l'},		/* An alias, no longer recommended */
 207  {"rcs", 0, 0, 'n'},
 208  {"show-c-function", 0, 0, 'p'},
 209  {"brief", 0, 0, 'q'},
 210  {"recursive", 0, 0, 'r'},
 211  {"report-identical-files", 0, 0, 's'},
 212  {"expand-tabs", 0, 0, 't'},
 213  {"version", 0, 0, 'v'},
 214  {"ignore-all-space", 0, 0, 'w'},
 215  {"exclude", 1, 0, 'x'},
 216  {"exclude-from", 1, 0, 'X'},
 217  {"side-by-side", 0, 0, 'y'},
 218  {"unified", 2, 0, 'U'},
 219  {"left-column", 0, 0, 129},
 220  {"suppress-common-lines", 0, 0, 130},
 221  {"sdiff-merge-assist", 0, 0, 131},
 222  {"old-line-format", 1, 0, 132},
 223  {"new-line-format", 1, 0, 133},
 224  {"unchanged-line-format", 1, 0, 134},
 225  {"line-format", 1, 0, 135},
 226  {"old-group-format", 1, 0, 136},
 227  {"new-group-format", 1, 0, 137},
 228  {"unchanged-group-format", 1, 0, 138},
 229  {"changed-group-format", 1, 0, 139},
 230  {"horizon-lines", 1, 0, 140},
 231  {"help", 0, 0, 141},
 232  {"binary", 0, 0, 142},
 233  {0, 0, 0, 0}
 234};
 235
 236
 237
 238int
 239diff_run (argc, argv, out, callbacks_arg)
 240     int argc;
 241     char *argv[];
 242     const char *out;
 243     const struct diff_callbacks *callbacks_arg;
 244{
 245  int val;
 246  int c;
 247  int prev = -1;
 248  int width = DEFAULT_WIDTH;
 249  int show_c_function = 0;
 250  int optind_old;
 251  int opened_file = 0;
 252
 253  callbacks = callbacks_arg;
 254
 255  /* Do our initializations.  */
 256  initialize_main (&argc, &argv);
 257  optind_old = optind;
 258  optind = 0;
 259
 260  /* Set the jump buffer, so that diff may abort execution without
 261     terminating the process. */
 262  val = setjmp (diff_abort_buf);
 263  if (val != 0)
 264    {
 265      optind = optind_old;
 266      if (opened_file)
 267	fclose (outfile);
 268      return val;
 269    }
 270
 271  /* Decode the options.  */
 272  while ((c = getopt_long (argc, argv,
 273			   "0123456789abBcC:dD:efF:hHiI:lL:nNpPqrsS:tTuU:vwW:x:X:y",
 274			   longopts, 0)) != EOF)
 275    {
 276      switch (c)
 277	{
 278	  /* All digits combine in decimal to specify the context-size.  */
 279	case '1':
 280	case '2':
 281	case '3':
 282	case '4':
 283	case '5':
 284	case '6':
 285	case '7':
 286	case '8':
 287	case '9':
 288	case '0':
 289	  if (context == -1)
 290	    context = 0;
 291	  /* If a context length has already been specified,
 292	     more digits allowed only if they follow right after the others.
 293	     Reject two separate runs of digits, or digits after -C.  */
 294	  else if (prev < '0' || prev > '9')
 295	    fatal ("context length specified twice");
 296
 297	  context = context * 10 + c - '0';
 298	  break;
 299
 300	case 'a':
 301	  /* Treat all files as text files; never treat as binary.  */
 302	  always_text_flag = 1;
 303	  break;
 304
 305	case 'b':
 306	  /* Ignore changes in amount of white space.  */
 307	  ignore_space_change_flag = 1;
 308	  ignore_some_changes = 1;
 309	  ignore_some_line_changes = 1;
 310	  break;
 311
 312	case 'B':
 313	  /* Ignore changes affecting only blank lines.  */
 314	  ignore_blank_lines_flag = 1;
 315	  ignore_some_changes = 1;
 316	  break;
 317
 318	case 'C':		/* +context[=lines] */
 319	case 'U':		/* +unified[=lines] */
 320	  if (optarg)
 321	    {
 322	      if (context >= 0)
 323		fatal ("context length specified twice");
 324
 325	      if (ck_atoi (optarg, &context))
 326		fatal ("invalid context length argument");
 327	    }
 328
 329	  /* Falls through.  */
 330	case 'c':
 331	  /* Make context-style output.  */
 332	  specify_style (c == 'U' ? OUTPUT_UNIFIED : OUTPUT_CONTEXT);
 333	  break;
 334
 335	case 'd':
 336	  /* Don't discard lines.  This makes things slower (sometimes much
 337	     slower) but will find a guaranteed minimal set of changes.  */
 338	  no_discards = 1;
 339	  break;
 340
 341	case 'D':
 342	  /* Make merged #ifdef output.  */
 343	  specify_style (OUTPUT_IFDEF);
 344	  {
 345	    int i, err = 0;
 346	    static char const C_ifdef_group_formats[] =
 347	      "#ifndef %s\n%%<#endif /* not %s */\n%c#ifdef %s\n%%>#endif /* %s */\n%c%%=%c#ifndef %s\n%%<#else /* %s */\n%%>#endif /* %s */\n";
 348	    char *b = xmalloc (sizeof (C_ifdef_group_formats)
 349			       + 7 * strlen(optarg) - 14 /* 7*"%s" */
 350			       - 8 /* 5*"%%" + 3*"%c" */);
 351	    sprintf (b, C_ifdef_group_formats,
 352		     optarg, optarg, 0,
 353		     optarg, optarg, 0, 0,
 354		     optarg, optarg, optarg);
 355	    for (i = 0; i < 4; i++)
 356	      {
 357		err |= specify_format (&group_format[i], b);
 358		b += strlen (b) + 1;
 359	      }
 360	    if (err)
 361	      diff_error ("conflicting #ifdef formats", 0, 0);
 362	  }
 363	  break;
 364
 365	case 'e':
 366	  /* Make output that is a valid `ed' script.  */
 367	  specify_style (OUTPUT_ED);
 368	  break;
 369
 370	case 'f':
 371	  /* Make output that looks vaguely like an `ed' script
 372	     but has changes in the order they appear in the file.  */
 373	  specify_style (OUTPUT_FORWARD_ED);
 374	  break;
 375
 376	case 'F':
 377	  /* Show, for each set of changes, the previous line that
 378	     matches the specified regexp.  Currently affects only
 379	     context-style output.  */
 380	  add_regexp (&function_regexp_list, optarg);
 381	  break;
 382
 383	case 'h':
 384	  /* Split the files into chunks of around 1500 lines
 385	     for faster processing.  Usually does not change the result.
 386
 387	     This currently has no effect.  */
 388	  break;
 389
 390	case 'H':
 391	  /* Turn on heuristics that speed processing of large files
 392	     with a small density of changes.  */
 393	  heuristic = 1;
 394	  break;
 395
 396	case 'i':
 397	  /* Ignore changes in case.  */
 398	  ignore_case_flag = 1;
 399	  ignore_some_changes = 1;
 400	  ignore_some_line_changes = 1;
 401	  break;
 402
 403	case 'I':
 404	  /* Ignore changes affecting only lines that match the
 405	     specified regexp.  */
 406	  add_regexp (&ignore_regexp_list, optarg);
 407	  ignore_some_changes = 1;
 408	  break;
 409
 410	case 'l':
 411	  /* Pass the output through `pr' to paginate it.  */
 412	  paginate_flag = 1;
 413#if !defined(SIGCHLD) && defined(SIGCLD)
 414#define SIGCHLD SIGCLD
 415#endif
 416#ifdef SIGCHLD
 417	  /* Pagination requires forking and waiting, and
 418	     System V fork+wait does not work if SIGCHLD is ignored.  */
 419	  signal (SIGCHLD, SIG_DFL);
 420#endif
 421	  break;
 422
 423	case 'L':
 424	  /* Specify file labels for `-c' output headers.  */
 425	  if (!file_label[0])
 426	    file_label[0] = optarg;
 427	  else if (!file_label[1])
 428	    file_label[1] = optarg;
 429	  else
 430	    fatal ("too many file label options");
 431	  break;
 432
 433	case 'n':
 434	  /* Output RCS-style diffs, like `-f' except that each command
 435	     specifies the number of lines affected.  */
 436	  specify_style (OUTPUT_RCS);
 437	  break;
 438
 439	case 'N':
 440	  /* When comparing directories, if a file appears only in one
 441	     directory, treat it as present but empty in the other.  */
 442	  entire_new_file_flag = 1;
 443	  break;
 444
 445	case 'p':
 446	  /* Make context-style output and show name of last C function.  */
 447	  show_c_function = 1;
 448	  add_regexp (&function_regexp_list, "^[_a-zA-Z$]");
 449	  break;
 450
 451	case 'P':
 452	  /* When comparing directories, if a file appears only in
 453	     the second directory of the two,
 454	     treat it as present but empty in the other.  */
 455	  unidirectional_new_file_flag = 1;
 456	  break;
 457
 458	case 'q':
 459	  no_details_flag = 1;
 460	  break;
 461
 462	case 'r':
 463	  /* When comparing directories,
 464	     recursively compare any subdirectories found.  */
 465	  recursive = 1;
 466	  break;
 467
 468	case 's':
 469	  /* Print a message if the files are the same.  */
 470	  print_file_same_flag = 1;
 471	  break;
 472
 473	case 'S':
 474	  /* When comparing directories, start with the specified
 475	     file name.  This is used for resuming an aborted comparison.  */
 476	  dir_start_file = optarg;
 477	  break;
 478
 479	case 't':
 480	  /* Expand tabs to spaces in the output so that it preserves
 481	     the alignment of the input files.  */
 482	  tab_expand_flag = 1;
 483	  break;
 484
 485	case 'T':
 486	  /* Use a tab in the output, rather than a space, before the
 487	     text of an input line, so as to keep the proper alignment
 488	     in the input line without changing the characters in it.  */
 489	  tab_align_flag = 1;
 490	  break;
 491
 492	case 'u':
 493	  /* Output the context diff in unidiff format.  */
 494	  specify_style (OUTPUT_UNIFIED);
 495	  break;
 496
 497	case 'v':
 498	  if (callbacks && callbacks->write_stdout)
 499	    {
 500	      (*callbacks->write_stdout) ("diff - GNU diffutils version ");
 501	      (*callbacks->write_stdout) (diff_version_string);
 502	      (*callbacks->write_stdout) ("\n");
 503	    }
 504	  else
 505	    printf ("diff - GNU diffutils version %s\n", diff_version_string);
 506	  return 0;
 507
 508	case 'w':
 509	  /* Ignore horizontal white space when comparing lines.  */
 510	  ignore_all_space_flag = 1;
 511	  ignore_some_changes = 1;
 512	  ignore_some_line_changes = 1;
 513	  break;
 514
 515	case 'x':
 516	  add_exclude (optarg);
 517	  break;
 518
 519	case 'X':
 520	  if (add_exclude_file (optarg) != 0)
 521	    pfatal_with_name (optarg);
 522	  break;
 523
 524	case 'y':
 525	  /* Use side-by-side (sdiff-style) columnar output. */
 526	  specify_style (OUTPUT_SDIFF);
 527	  break;
 528
 529	case 'W':
 530	  /* Set the line width for OUTPUT_SDIFF.  */
 531	  if (ck_atoi (optarg, &width) || width <= 0)
 532	    fatal ("column width must be a positive integer");
 533	  break;
 534
 535	case 129:
 536	  sdiff_left_only = 1;
 537	  break;
 538
 539	case 130:
 540	  sdiff_skip_common_lines = 1;
 541	  break;
 542
 543	case 131:
 544	  /* sdiff-style columns output. */
 545	  specify_style (OUTPUT_SDIFF);
 546	  sdiff_help_sdiff = 1;
 547	  break;
 548
 549	case 132:
 550	case 133:
 551	case 134:
 552	  specify_style (OUTPUT_IFDEF);
 553	  if (specify_format (&line_format[c - 132], optarg) != 0)
 554	    diff_error ("conflicting line format", 0, 0);
 555	  break;
 556
 557	case 135:
 558	  specify_style (OUTPUT_IFDEF);
 559	  {
 560	    int i, err = 0;
 561	    for (i = 0; i < sizeof (line_format) / sizeof (*line_format); i++)
 562	      err |= specify_format (&line_format[i], optarg);
 563	    if (err)
 564	      diff_error ("conflicting line format", 0, 0);
 565	  }
 566	  break;
 567
 568	case 136:
 569	case 137:
 570	case 138:
 571	case 139:
 572	  specify_style (OUTPUT_IFDEF);
 573	  if (specify_format (&group_format[c - 136], optarg) != 0)
 574	    diff_error ("conflicting group format", 0, 0);
 575	  break;
 576
 577	case 140:
 578	  if (ck_atoi (optarg, &horizon_lines) || horizon_lines < 0)
 579	    fatal ("horizon must be a nonnegative integer");
 580	  break;
 581
 582	case 141:
 583	  usage ();
 584	  if (! callbacks || ! callbacks->write_stdout)
 585	    check_output (stdout);
 586	  return 0;
 587
 588	case 142:
 589	  /* Use binary I/O when reading and writing data.
 590	     On Posix hosts, this has no effect.  */
 591#if HAVE_SETMODE
 592	  binary_I_O = 1;
 593#  if 0
 594	  /* Because this code is leftover from pre-library days,
 595	     there is no way to set stdout back to the default mode
 596	     when we are done.  As it turns out, I think the only
 597	     parts of CVS that pass out == NULL, and thus cause diff
 598	     to write to stdout, are "cvs diff" and "cvs rdiff".  So
 599	     I'm not going to worry about this too much yet.  */
 600	  setmode (STDOUT_FILENO, O_BINARY);
 601#  else
 602	  if (out == NULL)
 603	    error (0, 0, "warning: did not set stdout to binary mode");
 604#  endif
 605#endif
 606	  break;
 607
 608	default:
 609	  return try_help (0);
 610	}
 611      prev = c;
 612    }
 613
 614  if (argc - optind != 2)
 615    return try_help (argc - optind < 2 ? "missing operand" : "extra operand");
 616
 617  {
 618    /*
 619     *	We maximize first the half line width, and then the gutter width,
 620     *	according to the following constraints:
 621     *	1.  Two half lines plus a gutter must fit in a line.
 622     *	2.  If the half line width is nonzero:
 623     *	    a.  The gutter width is at least GUTTER_WIDTH_MINIMUM.
 624     *	    b.  If tabs are not expanded to spaces,
 625     *		a half line plus a gutter is an integral number of tabs,
 626     *		so that tabs in the right column line up.
 627     */
 628    int t = tab_expand_flag ? 1 : TAB_WIDTH;
 629    int off = (width + t + GUTTER_WIDTH_MINIMUM) / (2*t)  *  t;
 630    sdiff_half_width = max (0, min (off - GUTTER_WIDTH_MINIMUM, width - off)),
 631    sdiff_column2_offset = sdiff_half_width ? off : width;
 632  }
 633
 634  if (show_c_function && output_style != OUTPUT_UNIFIED)
 635    specify_style (OUTPUT_CONTEXT);
 636
 637  if (output_style != OUTPUT_CONTEXT && output_style != OUTPUT_UNIFIED)
 638    context = 0;
 639  else if (context == -1)
 640    /* Default amount of context for -c.  */
 641    context = 3;
 642
 643  if (output_style == OUTPUT_IFDEF)
 644    {
 645      /* Format arrays are char *, not char const *,
 646	 because integer formats are temporarily modified.
 647	 But it is safe to assign a constant like "%=" to a format array,
 648	 since "%=" does not format any integers.  */
 649      int i;
 650      for (i = 0; i < sizeof (line_format) / sizeof (*line_format); i++)
 651	if (!line_format[i])
 652	  line_format[i] = "%l\n";
 653      if (!group_format[OLD])
 654	group_format[OLD]
 655	  = group_format[UNCHANGED] ? group_format[UNCHANGED] : "%<";
 656      if (!group_format[NEW])
 657	group_format[NEW]
 658	  = group_format[UNCHANGED] ? group_format[UNCHANGED] : "%>";
 659      if (!group_format[UNCHANGED])
 660	group_format[UNCHANGED] = "%=";
 661      if (!group_format[CHANGED])
 662	group_format[CHANGED] = concat (group_format[OLD],
 663					group_format[NEW], "");
 664    }
 665
 666  no_diff_means_no_output =
 667    (output_style == OUTPUT_IFDEF ?
 668      (!*group_format[UNCHANGED]
 669       || (strcmp (group_format[UNCHANGED], "%=") == 0
 670	   && !*line_format[UNCHANGED]))
 671     : output_style == OUTPUT_SDIFF ? sdiff_skip_common_lines : 1);
 672
 673  switch_string = option_list (argv + 1, optind - 1);
 674
 675  if (callbacks && callbacks->write_output)
 676    {
 677      if (out != NULL)
 678	{
 679	  diff_error ("write callback with output file", 0, 0);
 680	  return 2;
 681	}
 682    }
 683  else
 684    {
 685      if (out == NULL)
 686	outfile = stdout;
 687      else
 688	{
 689#if HAVE_SETMODE
 690	  /* A diff which is full of ^Z and such isn't going to work
 691	     very well in text mode.  */
 692	  if (binary_I_O)
 693	    outfile = fopen (out, "wb");
 694	  else
 695#endif
 696	    outfile = fopen (out, "w");
 697	  if (outfile == NULL)
 698	    {
 699	      perror_with_name ("could not open output file");
 700	      return 2;
 701	    }
 702	  opened_file = 1;
 703	}
 704    }
 705
 706  val = compare_files (0, argv[optind], 0, argv[optind + 1], 0);
 707
 708  /* Print any messages that were saved up for last.  */
 709  print_message_queue ();
 710
 711  free (switch_string);
 712
 713  optind = optind_old;
 714
 715  if (! callbacks || ! callbacks->write_output)
 716    check_output (outfile);
 717
 718  if (opened_file)
 719    if (fclose (outfile) != 0)
 720	perror_with_name ("close error on output file");
 721
 722  return val;
 723}
 724
 725/* Add the compiled form of regexp PATTERN to REGLIST.  */
 726
 727static void
 728add_regexp (reglist, pattern)
 729     struct regexp_list **reglist;
 730     char const *pattern;
 731{
 732  struct regexp_list *r;
 733  char const *m;
 734
 735  r = (struct regexp_list *) xmalloc (sizeof (*r));
 736  bzero (r, sizeof (*r));
 737  r->buf.fastmap = xmalloc (256);
 738  m = re_compile_pattern (pattern, strlen (pattern), &r->buf);
 739  if (m != 0)
 740    diff_error ("%s: %s", pattern, m);
 741
 742  /* Add to the start of the list, since it's easier than the end.  */
 743  r->next = *reglist;
 744  *reglist = r;
 745}
 746
 747static int
 748try_help (reason)
 749     char const *reason;
 750{
 751  if (reason)
 752    diff_error ("%s", reason, 0);
 753  diff_error ("Try `%s --help' for more information.", diff_program_name, 0);
 754  return 2;
 755}
 756
 757static void
 758check_output (file)
 759    FILE *file;
 760{
 761  if (ferror (file) || fflush (file) != 0)
 762    fatal ("write error");
 763}
 764
 765static char const * const option_help[] = {
 766"-i  --ignore-case  Consider upper- and lower-case to be the same.",
 767"-w  --ignore-all-space  Ignore all white space.",
 768"-b  --ignore-space-change  Ignore changes in the amount of white space.",
 769"-B  --ignore-blank-lines  Ignore changes whose lines are all blank.",
 770"-I RE  --ignore-matching-lines=RE  Ignore changes whose lines all match RE.",
 771#if HAVE_SETMODE
 772"--binary  Read and write data in binary mode.",
 773#endif
 774"-a  --text  Treat all files as text.\n",
 775"-c  -C NUM  --context[=NUM]  Output NUM (default 2) lines of copied context.",
 776"-u  -U NUM  --unified[=NUM]  Output NUM (default 2) lines of unified context.",
 777"  -NUM  Use NUM context lines.",
 778"  -L LABEL  --label LABEL  Use LABEL instead of file name.",
 779"  -p  --show-c-function  Show which C function each change is in.",
 780"  -F RE  --show-function-line=RE  Show the most recent line matching RE.",
 781"-q  --brief  Output only whether files differ.",
 782"-e  --ed  Output an ed script.",
 783"-n  --rcs  Output an RCS format diff.",
 784"-y  --side-by-side  Output in two columns.",
 785"  -W NUM  --width=NUM  Output at most NUM (default 130) characters per line.",
 786"  --left-column  Output only the left column of common lines.",
 787"  --suppress-common-lines  Do not output common lines.",
 788"-DNAME  --ifdef=NAME  Output merged file to show `#ifdef NAME' diffs.",
 789"--GTYPE-group-format=GFMT  Similar, but format GTYPE input groups with GFMT.",
 790"--line-format=LFMT  Similar, but format all input lines with LFMT.",
 791"--LTYPE-line-format=LFMT  Similar, but format LTYPE input lines with LFMT.",
 792"  LTYPE is `old', `new', or `unchanged'.  GTYPE is LTYPE or `changed'.",
 793"  GFMT may contain:",
 794"    %<  lines from FILE1",
 795"    %>  lines from FILE2",
 796"    %=  lines common to FILE1 and FILE2",
 797"    %[-][WIDTH][.[PREC]]{doxX}LETTER  printf-style spec for LETTER",
 798"      LETTERs are as follows for new group, lower case for old group:",
 799"        F  first line number",
 800"        L  last line number",
 801"        N  number of lines = L-F+1",
 802"        E  F-1",
 803"        M  L+1",
 804"  LFMT may contain:",
 805"    %L  contents of line",
 806"    %l  contents of line, excluding any trailing newline",
 807"    %[-][WIDTH][.[PREC]]{doxX}n  printf-style spec for input line number",
 808"  Either GFMT or LFMT may contain:",
 809"    %%  %",
 810"    %c'C'  the single character C",
 811"    %c'\\OOO'  the character with octal code OOO\n",
 812"-l  --paginate  Pass the output through `pr' to paginate it.",
 813"-t  --expand-tabs  Expand tabs to spaces in output.",
 814"-T  --initial-tab  Make tabs line up by prepending a tab.\n",
 815"-r  --recursive  Recursively compare any subdirectories found.",
 816"-N  --new-file  Treat absent files as empty.",
 817"-P  --unidirectional-new-file  Treat absent first files as empty.",
 818"-s  --report-identical-files  Report when two files are the same.",
 819"-x PAT  --exclude=PAT  Exclude files that match PAT.",
 820"-X FILE  --exclude-from=FILE  Exclude files that match any pattern in FILE.",
 821"-S FILE  --starting-file=FILE  Start with FILE when comparing directories.\n",
 822"--horizon-lines=NUM  Keep NUM lines of the common prefix and suffix.",
 823"-d  --minimal  Try hard to find a smaller set of changes.",
 824"-H  --speed-large-files  Assume large files and many scattered small changes.\n",
 825"-v  --version  Output version info.",
 826"--help  Output this help.",
 8270
 828};
 829
 830static void
 831usage ()
 832{
 833  char const * const *p;
 834
 835  if (callbacks && callbacks->write_stdout)
 836    {
 837      (*callbacks->write_stdout) ("Usage: ");
 838      (*callbacks->write_stdout) (diff_program_name);
 839      (*callbacks->write_stdout) (" [OPTION]... FILE1 FILE2\n\n");
 840      for (p = option_help;  *p;  p++)
 841	{
 842	  (*callbacks->write_stdout) ("  ");
 843	  (*callbacks->write_stdout) (*p);
 844	  (*callbacks->write_stdout) ("\n");
 845	}
 846      (*callbacks->write_stdout)
 847	("\nIf FILE1 or FILE2 is `-', read standard input.\n");
 848    }
 849  else
 850    {
 851      printf ("Usage: %s [OPTION]... FILE1 FILE2\n\n", diff_program_name);
 852      for (p = option_help;  *p;  p++)
 853	printf ("  %s\n", *p);
 854      printf ("\nIf FILE1 or FILE2 is `-', read standard input.\n");
 855    }
 856}
 857
 858static int
 859specify_format (var, value)
 860     char **var;
 861     char *value;
 862{
 863  int err = *var ? strcmp (*var, value) : 0;
 864  *var = value;
 865  return err;
 866}
 867
 868static void
 869specify_style (style)
 870     enum output_style style;
 871{
 872  if (output_style != OUTPUT_NORMAL
 873      && output_style != style)
 874    diff_error ("conflicting specifications of output style", 0, 0);
 875  output_style = style;
 876}
 877
 878static char const *
 879filetype (st)
 880     struct stat const *st;
 881{
 882  /* See Posix.2 section 4.17.6.1.1 and Table 5-1 for these formats.
 883     To keep diagnostics grammatical, the returned string must start
 884     with a consonant.  */
 885
 886  if (S_ISREG (st->st_mode))
 887    {
 888      if (st->st_size == 0)
 889	return "regular empty file";
 890      /* Posix.2 section 5.14.2 seems to suggest that we must read the file
 891	 and guess whether it's C, Fortran, etc., but this is somewhat useless
 892	 and doesn't reflect historical practice.  We're allowed to guess
 893	 wrong, so we don't bother to read the file.  */
 894      return "regular file";
 895    }
 896  if (S_ISDIR (st->st_mode)) return "directory";
 897
 898  /* other Posix.1 file types */
 899#ifdef S_ISBLK
 900  if (S_ISBLK (st->st_mode)) return "block special file";
 901#endif
 902#ifdef S_ISCHR
 903  if (S_ISCHR (st->st_mode)) return "character special file";
 904#endif
 905#ifdef S_ISFIFO
 906  if (S_ISFIFO (st->st_mode)) return "fifo";
 907#endif
 908
 909  /* other Posix.1b file types */
 910#ifdef S_TYPEISMQ
 911  if (S_TYPEISMQ (st)) return "message queue";
 912#endif
 913#ifdef S_TYPEISSEM
 914  if (S_TYPEISSEM (st)) return "semaphore";
 915#endif
 916#ifdef S_TYPEISSHM
 917  if (S_TYPEISSHM (st)) return "shared memory object";
 918#endif
 919
 920  /* other popular file types */
 921  /* S_ISLNK is impossible with `fstat' and `stat'.  */
 922#ifdef S_ISSOCK
 923  if (S_ISSOCK (st->st_mode)) return "socket";
 924#endif
 925
 926  return "weird file";
 927}
 928
 929/* Compare two files (or dirs) with specified names
 930   DIR0/NAME0 and DIR1/NAME1, at level DEPTH in directory recursion.
 931   (if DIR0 is 0, then the name is just NAME0, etc.)
 932   This is self-contained; it opens the files and closes them.
 933
 934   Value is 0 if files are the same, 1 if different,
 935   2 if there is a problem opening them.  */
 936
 937static int
 938compare_files (dir0, name0, dir1, name1, depth)
 939     char const *dir0, *dir1;
 940     char const *name0, *name1;
 941     int depth;
 942{
 943  struct file_data inf[2];
 944  register int i;
 945  int val;
 946  int same_files;
 947  int failed = 0;
 948  char *free0 = 0, *free1 = 0;
 949
 950  /* If this is directory comparison, perhaps we have a file
 951     that exists only in one of the directories.
 952     If so, just print a message to that effect.  */
 953
 954  if (! ((name0 != 0 && name1 != 0)
 955	 || (unidirectional_new_file_flag && name1 != 0)
 956	 || entire_new_file_flag))
 957    {
 958      char const *name = name0 == 0 ? name1 : name0;
 959      char const *dir = name0 == 0 ? dir1 : dir0;
 960      message ("Only in %s: %s\n", dir, name);
 961      /* Return 1 so that diff_dirs will return 1 ("some files differ").  */
 962      return 1;
 963    }
 964
 965  bzero (inf, sizeof (inf));
 966
 967  /* Mark any nonexistent file with -1 in the desc field.  */
 968  /* Mark unopened files (e.g. directories) with -2. */
 969
 970  inf[0].desc = name0 == 0 ? -1 : -2;
 971  inf[1].desc = name1 == 0 ? -1 : -2;
 972
 973  /* Now record the full name of each file, including nonexistent ones.  */
 974
 975  if (name0 == 0)
 976    name0 = name1;
 977  if (name1 == 0)
 978    name1 = name0;
 979
 980  inf[0].name = dir0 == 0 ? name0 : (free0 = dir_file_pathname (dir0, name0));
 981  inf[1].name = dir1 == 0 ? name1 : (free1 = dir_file_pathname (dir1, name1));
 982
 983  /* Stat the files.  Record whether they are directories.  */
 984
 985  for (i = 0; i <= 1; i++)
 986    {
 987      if (inf[i].desc != -1)
 988	{
 989	  int stat_result;
 990
 991	  if (i && filename_cmp (inf[i].name, inf[0].name) == 0)
 992	    {
 993	      inf[i].stat = inf[0].stat;
 994	      stat_result = 0;
 995	    }
 996	  else if (strcmp (inf[i].name, "-") == 0)
 997	    {
 998	      inf[i].desc = STDIN_FILENO;
 999	      stat_result = fstat (STDIN_FILENO, &inf[i].stat);
1000	      if (stat_result == 0 && S_ISREG (inf[i].stat.st_mode))
1001		{
1002		  off_t pos = lseek (STDIN_FILENO, (off_t) 0, SEEK_CUR);
1003		  if (pos == -1)
1004		    stat_result = -1;
1005		  else
1006		    {
1007		      if (pos <= inf[i].stat.st_size)
1008			inf[i].stat.st_size -= pos;
1009		      else
1010			inf[i].stat.st_size = 0;
1011		      /* Posix.2 4.17.6.1.4 requires current time for stdin.  */
1012		      time (&inf[i].stat.st_mtime);
1013		    }
1014		}
1015	    }
1016	  else
1017	    stat_result = stat (inf[i].name, &inf[i].stat);
1018
1019	  if (stat_result != 0)
1020	    {
1021	      perror_with_name (inf[i].name);
1022	      failed = 1;
1023	    }
1024	  else
1025	    {
1026	      inf[i].dir_p = S_ISDIR (inf[i].stat.st_mode) && inf[i].desc != 0;
1027	      if (inf[1 - i].desc == -1)
1028		{
1029		  inf[1 - i].dir_p = inf[i].dir_p;
1030		  inf[1 - i].stat.st_mode = inf[i].stat.st_mode;
1031		}
1032	    }
1033	}
1034    }
1035
1036  if (! failed && depth == 0 && inf[0].dir_p != inf[1].dir_p)
1037    {
1038      /* If one is a directory, and it was specified in the command line,
1039	 use the file in that dir with the other file's basename.  */
1040
1041      int fnm_arg = inf[0].dir_p;
1042      int dir_arg = 1 - fnm_arg;
1043      char const *fnm = inf[fnm_arg].name;
1044      char const *dir = inf[dir_arg].name;
1045      char const *p = filename_lastdirchar (fnm);
1046      char const *filename = inf[dir_arg].name
1047	= dir_file_pathname (dir, p ? p + 1 : fnm);
1048
1049      if (strcmp (fnm, "-") == 0)
1050	fatal ("can't compare - to a directory");
1051
1052      if (stat (filename, &inf[dir_arg].stat) != 0)
1053	{
1054	  perror_with_name (filename);
1055	  failed = 1;
1056	}
1057      else
1058	inf[dir_arg].dir_p = S_ISDIR (inf[dir_arg].stat.st_mode);
1059    }
1060
1061  if (failed)
1062    {
1063
1064      /* If either file should exist but does not, return 2.  */
1065
1066      val = 2;
1067
1068    }
1069  else if ((same_files = inf[0].desc != -1 && inf[1].desc != -1
1070			 && 0 < same_file (&inf[0].stat, &inf[1].stat))
1071	   && no_diff_means_no_output)
1072    {
1073      /* The two named files are actually the same physical file.
1074	 We know they are identical without actually reading them.  */
1075
1076      val = 0;
1077    }
1078  else if (inf[0].dir_p & inf[1].dir_p)
1079    {
1080      if (output_style == OUTPUT_IFDEF)
1081	fatal ("-D option not supported with directories");
1082
1083      /* If both are directories, compare the files in them.  */
1084
1085      if (depth > 0 && !recursive)
1086	{
1087	  /* But don't compare dir contents one level down
1088	     unless -r was specified.  */
1089	  message ("Common subdirectories: %s and %s\n",
1090		   inf[0].name, inf[1].name);
1091	  val = 0;
1092	}
1093      else
1094	{
1095	  val = diff_dirs (inf, compare_files, depth);
1096	}
1097
1098    }
1099  else if ((inf[0].dir_p | inf[1].dir_p)
1100	   || (depth > 0
1101	       && (! S_ISREG (inf[0].stat.st_mode)
1102		   || ! S_ISREG (inf[1].stat.st_mode))))
1103    {
1104      /* Perhaps we have a subdirectory that exists only in one directory.
1105	 If so, just print a message to that effect.  */
1106
1107      if (inf[0].desc == -1 || inf[1].desc == -1)
1108	{
1109	  if ((inf[0].dir_p | inf[1].dir_p)
1110	      && recursive
1111	      && (entire_new_file_flag
1112		  || (unidirectional_new_file_flag && inf[0].desc == -1)))
1113	    val = diff_dirs (inf, compare_files, depth);
1114	  else
1115	    {
1116	      char const *dir = (inf[0].desc == -1) ? dir1 : dir0;
1117	      /* See Posix.2 section 4.17.6.1.1 for this format.  */
1118	      message ("Only in %s: %s\n", dir, name0);
1119	      val = 1;
1120	    }
1121	}
1122      else
1123	{
1124	  /* We have two files that are not to be compared.  */
1125
1126	  /* See Posix.2 section 4.17.6.1.1 for this format.  */
1127	  message5 ("File %s is a %s while file %s is a %s\n",
1128		    inf[0].name, filetype (&inf[0].stat),
1129		    inf[1].name, filetype (&inf[1].stat));
1130
1131	  /* This is a difference.  */
1132	  val = 1;
1133	}
1134    }
1135  else if ((no_details_flag & ~ignore_some_changes)
1136	   && inf[0].stat.st_size != inf[1].stat.st_size
1137	   && (inf[0].desc == -1 || S_ISREG (inf[0].stat.st_mode))
1138	   && (inf[1].desc == -1 || S_ISREG (inf[1].stat.st_mode)))
1139    {
1140      message ("Files %s and %s differ\n", inf[0].name, inf[1].name);
1141      val = 1;
1142    }
1143  else
1144    {
1145      /* Both exist and neither is a directory.  */
1146
1147      /* Open the files and record their descriptors.  */
1148
1149      if (inf[0].desc == -2)
1150	if ((inf[0].desc = open (inf[0].name, O_RDONLY, 0)) < 0)
1151	  {
1152	    perror_with_name (inf[0].name);
1153	    failed = 1;
1154	  }
1155      if (inf[1].desc == -2)
1156	{
1157	  if (same_files)
1158	    inf[1].desc = inf[0].desc;
1159	  else if ((inf[1].desc = open (inf[1].name, O_RDONLY, 0)) < 0)
1160	    {
1161	      perror_with_name (inf[1].name);
1162	      failed = 1;
1163	    }
1164	}
1165
1166#if HAVE_SETMODE
1167      if (binary_I_O)
1168	for (i = 0; i <= 1; i++)
1169	  if (0 <= inf[i].desc)
1170	    setmode (inf[i].desc, O_BINARY);
1171#endif
1172
1173      /* Compare the files, if no error was found.  */
1174
1175      val = failed ? 2 : diff_2_files (inf, depth);
1176
1177      /* Close the file descriptors.  */
1178
1179      if (inf[0].desc >= 0 && close (inf[0].desc) != 0)
1180	{
1181	  perror_with_name (inf[0].name);
1182	  val = 2;
1183	}
1184      if (inf[1].desc >= 0 && inf[0].desc != inf[1].desc
1185	  && close (inf[1].desc) != 0)
1186	{
1187	  perror_with_name (inf[1].name);
1188	  val = 2;
1189	}
1190    }
1191
1192  /* Now the comparison has been done, if no error prevented it,
1193     and VAL is the value this function will return.  */
1194
1195  if (val == 0 && !inf[0].dir_p)
1196    {
1197      if (print_file_same_flag)
1198	message ("Files %s and %s are identical\n",
1199		 inf[0].name, inf[1].name);
1200    }
1201  else
1202    flush_output ();
1203
1204  if (free0)
1205    free (free0);
1206  if (free1)
1207    free (free1);
1208
1209  return val;
1210}
1211
1212/* Initialize status variables and flag variables used in libdiff,
1213   to permit repeated calls to diff_run. */
1214
1215static void
1216initialize_main (argcp, argvp)
1217    int *argcp;
1218    char ***argvp;
1219{
1220  /* These variables really must be reset each time diff_run is called. */
1221  output_style = OUTPUT_NORMAL;
1222  context = -1;
1223  file_label[0] = NULL;
1224  file_label[1] = NULL;
1225  diff_program_name = (*argvp)[0];
1226  outfile = NULL;
1227
1228  /* Reset these also, just for safety's sake. (If one invocation turns
1229     on ignore_case_flag, it must be turned off before diff_run is called
1230     again.  But it is possible to make many diffs before encountering
1231     such a problem. */
1232  recursive = 0;
1233  no_discards = 0;
1234#if HAVE_SETMODE
1235  binary_I_O = 0;
1236#endif
1237  no_diff_means_no_output = 0;
1238  always_text_flag = 0;
1239  horizon_lines = 0;
1240  ignore_space_change_flag = 0;
1241  ignore_all_space_flag = 0;
1242  ignore_blank_lines_flag = 0;
1243  ignore_some_line_changes = 0;
1244  ignore_some_changes = 0;
1245  ignore_case_flag = 0;
1246  function_regexp_list = NULL;
1247  ignore_regexp_list = NULL;
1248  no_details_flag = 0;
1249  print_file_same_flag = 0;
1250  tab_align_flag = 0;
1251  tab_expand_flag = 0;
1252  dir_start_file = NULL;
1253  entire_new_file_flag = 0;
1254  unidirectional_new_file_flag = 0;
1255  paginate_flag = 0;
1256  bzero (group_format, sizeof (group_format));
1257  bzero (line_format, sizeof (line_format));
1258  sdiff_help_sdiff = 0;
1259  sdiff_left_only = 0;
1260  sdiff_skip_common_lines = 0;
1261  sdiff_half_width = 0;
1262  sdiff_column2_offset = 0;
1263  switch_string = NULL;
1264  heuristic = 0;
1265  bzero (files, sizeof (files));
1266}