coreutils/diff.c

   1 /* vi: set sw=4 ts=4: */
   2 /*
   3  * Mini diff implementation for busybox, adapted from OpenBSD diff.
   4  *
   5  * Copyright (C) 2006 by Robert Sullivan <cogito.ergo.cogito@hotmail.com>
   6  *
   7  * Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
   8  */
   9
  10 /*
  11  * Copyright (c) 2003 Todd C. Miller <Todd.Miller@courtesan.com>
  12  *
  13  * Permission to
  14  * use, copy, modify, and distribute this software for any
  15  * purpose with or without fee is hereby granted, provided that the above
  16  * copyright notice and this permission notice appear in all copies.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  19  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  20  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  21  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  22  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  23  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  24  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  25  *
  26  * Sponsored in part by the Defense Advanced Research Projects
  27  * Agency (DARPA) and Air Force Research Laboratory, Air Force
  28  * Materiel Command, USAF, under agreement number F39502-99-1-0512.
  29  */
  30
  31 #include <time.h>
  32 #include <sys/types.h>
  33 #include <sys/param.h>
  34 #include <sys/stat.h>
  35 #include <ctype.h>
  36 #include <errno.h>
  37 #include <signal.h>
  38 #include <stdlib.h>
  39 #include <stdio.h>
  40 #include <stdarg.h>
  41 #include <string.h>
  42 #include <unistd.h>
  43 #include <sys/wait.h>
  44 #include <fcntl.h>
  45 #include <stddef.h>
  46 #include <paths.h>
  47 #include <dirent.h>
  48 #include "busybox.h"
  49
  50 #define FSIZE_MAX 32768
  51
  52 /*
  53  * Output flags
  54  */
  55 #define D_HEADER        1        /* Print a header/footer between files */
  56 #define D_EMPTY1        2        /* Treat first file as empty (/dev/null) */
  57 #define D_EMPTY2        4        /* Treat second file as empty (/dev/null) */
  58
  59 /*
  60  * Status values for print_status() and diffreg() return values
  61  * Guide:
  62  * D_SAME - files are the same
  63  * D_DIFFER - files differ
  64  * D_BINARY - binary files differ
  65  * D_COMMON - subdirectory common to both dirs
  66  * D_ONLY - file only exists in one dir
  67  * D_MISMATCH1 - path1 a dir, path2 a file
  68  * D_MISMATCH2 - path1 a file, path2 a dir
  69  * D_ERROR - error occurred
  70  * D_SKIPPED1 - skipped path1 as it is a special file
  71  * D_SKIPPED2 - skipped path2 as it is a special file
  72  */
  73
  74 #define D_SAME          0
  75 #define D_DIFFER        (1<<0)
  76 #define D_BINARY        (1<<1)
  77 #define D_COMMON        (1<<2)
  78 #define D_ONLY          (1<<3)
  79 #define D_MISMATCH1     (1<<4)
  80 #define D_MISMATCH2     (1<<5)
  81 #define D_ERROR         (1<<6)
  82 #define D_SKIPPED1      (1<<7)
  83 #define D_SKIPPED2      (1<<8)
  84
  85 /* Command line options */
  86 static unsigned long cmd_flags;
  87 #define FLAG_a  (1<<0)
  88 #define FLAG_b  (1<<1)
  89 #define FLAG_d  (1<<2)
  90 #define FLAG_i  (1<<3)
  91 #define FLAG_N  (1<<4)
  92 #define FLAG_q  (1<<5)
  93 #define FLAG_r  (1<<6)
  94 #define FLAG_s  (1<<7)
  95 #define FLAG_S  (1<<8)
  96 #define FLAG_t  (1<<9)
  97 #define FLAG_T  (1<<10)
  98 #define FLAG_U  (1<<11)
  99 #define FLAG_w  (1<<12)
 100
 101 int context, status;
 102 char *start, *label[2];
 103 struct stat stb1, stb2;
 104 char **dl;
 105 int dl_count = 0;
 106
 107 struct cand {
 108         int x;
 109         int y;
 110         int pred;
 111 };
 112
 113 struct line {
 114         int serial;
 115         int value;
 116 } *file[2];
 117
 118 /*
 119  * The following struct is used to record change information
 120  * doing a "context" or "unified" diff.  (see routine "change" to
 121  * understand the highly mnemonic field names)
 122  */
 123 struct context_vec {
 124         int a;                        /* start line in old file */
 125         int b;                        /* end line in old file */
 126         int c;                        /* start line in new file */
 127         int d;                        /* end line in new file */
 128 };
 129
 130 static int  *J;                 /* will be overlaid on class */
 131 static int  *class;             /* will be overlaid on file[0] */
 132 static int  *klist;             /* will be overlaid on file[0] after class */
 133 static int  *member;            /* will be overlaid on file[1] */
 134 static int   clen;
 135 static int   len[2];
 136 static int   pref, suff;        /* length of prefix and suffix */
 137 static int   slen[2];
 138 static int   anychange;
 139 static long *ixnew;             /* will be overlaid on file[1] */
 140 static long *ixold;             /* will be overlaid on klist */
 141 static struct cand *clist;      /* merely a free storage pot for candidates */
 142 static int   clistlen;          /* the length of clist */
 143 static struct line *sfile[2];   /* shortened by pruning common prefix/suffix */
 144 static struct context_vec *context_vec_start;
 145 static struct context_vec *context_vec_end;
 146 static struct context_vec *context_vec_ptr;
 147
 148 static void print_only(const char *path, size_t dirlen, const char *entry)
 149 {
 150         if (dirlen > 1)
 151                 dirlen--;
 152         printf("Only in %.*s: %s\n", (int)dirlen, path, entry);
 153 }
 154
 155 static void print_status(int val, char *path1, char *path2, char *entry)
 156 {
 157         switch (val) {
 158         case D_ONLY:
 159                 print_only(path1, strlen(path1), entry);
 160                 break;
 161         case D_COMMON:
 162                 printf("Common subdirectories: %s%s and %s%s\n",
 163                     path1, entry ? entry : "", path2, entry ? entry : "");
 164                 break;
 165         case D_BINARY:
 166                 printf("Binary files %s%s and %s%s differ\n",
 167                     path1, entry ? entry : "", path2, entry ? entry : "");
 168                 break;
 169         case D_DIFFER:
 170                 if (cmd_flags & FLAG_q)
 171                         printf("Files %s%s and %s%s differ\n",
 172                             path1, entry ? entry : "",
 173                             path2, entry ? entry : "");
 174                 break;
 175         case D_SAME:
 176                 if (cmd_flags & FLAG_s)
 177                         printf("Files %s%s and %s%s are identical\n",
 178                             path1, entry ? entry : "",
 179                             path2, entry ? entry : "");
 180                 break;
 181         case D_MISMATCH1:
 182                 printf("File %s%s is a directory while file %s%s is a regular file\n",
 183                     path1, entry ? entry : "", path2, entry ? entry : "");
 184                 break;
 185         case D_MISMATCH2:
 186                 printf("File %s%s is a regular file while file %s%s is a directory\n",
 187                     path1, entry ? entry : "", path2, entry ? entry : "");
 188                 break;
 189         case D_SKIPPED1:
 190                 printf("File %s%s is not a regular file or directory and was skipped\n",
 191                     path1, entry ? entry : "");
 192                 break;
 193         case D_SKIPPED2:
 194                 printf("File %s%s is not a regular file or directory and was skipped\n",
 195                     path2, entry ? entry : "");
 196                 break;
 197         }
 198 }
 199
 200 /*
 201  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
 202  */
 203 static int readhash(FILE *f)
 204 {
 205         int i, t, space;
 206         int sum;
 207
 208         sum = 1;
 209         space = 0;
 210         if (!(cmd_flags & FLAG_b) && !(cmd_flags & FLAG_w)) {
 211                 if (FLAG_i)
 212                         for (i = 0; (t = getc(f)) != '\n'; i++) {
 213                                 if (t == EOF) {
 214                                         if (i == 0)
 215                                                 return (0);
 216                                         break;
 217                                 }
 218                                 sum = sum * 127 + t;
 219                         }
 220                 else
 221                         for (i = 0; (t = getc(f)) != '\n'; i++) {
 222                                 if (t == EOF) {
 223                                         if (i == 0)
 224                                                 return (0);
 225                                         break;
 226                                 }
 227                                 sum = sum * 127 + t;
 228                         }
 229         } else {
 230                 for (i = 0;;) {
 231                         switch (t = getc(f)) {
 232                         case '\t':
 233                         case '\r':
 234                         case '\v':
 235                         case '\f':
 236                         case ' ':
 237                                 space++;
 238                                 continue;
 239                         default:
 240                                 if (space && !(cmd_flags & FLAG_w)) {
 241                                         i++;
 242                                         space = 0;
 243                                 }
 244                                 sum = sum * 127 + t;
 245                                 i++;
 246                                 continue;
 247                         case EOF:
 248                                 if (i == 0)
 249                                         return (0);
 250                                 /* FALLTHROUGH */
 251                         case '\n':
 252                                 break;
 253                         }
 254                         break;
 255                 }
 256         }
 257         /*
 258          * There is a remote possibility that we end up with a zero sum.
 259          * Zero is used as an EOF marker, so return 1 instead.
 260          */
 261         return (sum == 0 ? 1 : sum);
 262 }
 263
 264
 265
 266 /*
 267  * Check to see if the given files differ.
 268  * Returns 0 if they are the same, 1 if different, and -1 on error.
 269  */
 270 static int files_differ(FILE *f1, FILE *f2, int flags)
 271 {
 272         char buf1[BUFSIZ], buf2[BUFSIZ];
 273         size_t i, j;
 274
 275         if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
 276             (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
 277                 return (1);
 278         while(1) {
 279                 i = fread(buf1, 1, sizeof(buf1), f1);
 280                 j = fread(buf2, 1, sizeof(buf2), f2);
 281                 if (i != j)
 282                         return (1);
 283                 if (i == 0 && j == 0) {
 284                         if (ferror(f1) || ferror(f2))
 285                                 return (1);
 286                         return (0);
 287                 }
 288                 if (memcmp(buf1, buf2, i) != 0)
 289                         return (1);
 290         }
 291 }
 292
 293 static void prepare(int i, FILE *fd, off_t filesize)
 294 {
 295         struct line *p;
 296         int j, h;
 297         size_t sz;
 298
 299         rewind(fd);
 300
 301         sz = (filesize <= FSIZE_MAX ? filesize : FSIZE_MAX) / 25;
 302         if (sz < 100)
 303                 sz = 100;
 304
 305         p = xmalloc((sz + 3) * sizeof(struct line));
 306         for (j = 0; (h = readhash(fd));) {
 307                 if (j == sz) {
 308                         sz = sz * 3 / 2;
 309                         p = xrealloc(p, (sz + 3) * sizeof(struct line));
 310                 }
 311                 p[++j].value = h;
 312         }
 313         len[i] = j;
 314         file[i] = p;
 315 }
 316
 317 static void prune(void)
 318 {
 319         int i, j;
 320
 321         for (pref = 0; pref < len[0] && pref < len[1] &&
 322             file[0][pref + 1].value == file[1][pref + 1].value;
 323             pref++)
 324                 ;
 325         for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
 326             file[0][len[0] - suff].value == file[1][len[1] - suff].value;
 327             suff++)
 328                 ;
 329         for (j = 0; j < 2; j++) {
 330                 sfile[j] = file[j] + pref;
 331                 slen[j] = len[j] - pref - suff;
 332                 for (i = 0; i <= slen[j]; i++)
 333                         sfile[j][i].serial = i;
 334         }
 335 }
 336
 337 static void equiv(struct line *a, int n, struct line *b, int m, int *c)
 338 {
 339         int i, j;
 340
 341         i = j = 1;
 342         while (i <= n && j <= m) {
 343                 if (a[i].value < b[j].value)
 344                         a[i++].value = 0;
 345                 else if (a[i].value == b[j].value)
 346                         a[i++].value = j;
 347                 else
 348                         j++;
 349         }
 350         while (i <= n)
 351                 a[i++].value = 0;
 352         b[m + 1].value = 0;
 353         j = 0;
 354         while (++j <= m) {
 355                 c[j] = -b[j].serial;
 356                 while (b[j + 1].value == b[j].value) {
 357                         j++;
 358                         c[j] = b[j].serial;
 359                 }
 360         }
 361         c[j] = -1;
 362 }
 363
 364 static int isqrt(int n) {
 365         int y, x = 1;
 366         if (n == 0) return(0);
 367
 368         do {
 369                 y = x;
 370                 x = n / x;
 371                 x += y;
 372                 x /= 2;
 373         } while ((x - y) > 1 || (x - y) < -1);
 374
 375         return (x);
 376 }
 377
 378
 379 static int newcand(int x, int y, int pred)
 380 {
 381         struct cand *q;
 382
 383         if (clen == clistlen) {
 384                 clistlen = clistlen * 11 / 10;
 385                 clist = xrealloc(clist, clistlen * sizeof(struct cand));
 386         }
 387         q = clist + clen;
 388         q->x = x;
 389         q->y = y;
 390         q->pred = pred;
 391         return (clen++);
 392 }
 393
 394
 395 static int search(int *c, int k, int y)
 396 {
 397         int i, j, l, t;
 398
 399         if (clist[c[k]].y < y)        /* quick look for typical case */
 400                 return (k + 1);
 401         i = 0;
 402         j = k + 1;
 403         while (1) {
 404                 l = i + j;
 405                 if ((l >>= 1) <= i)
 406                         break;
 407                 t = clist[c[l]].y;
 408                 if (t > y)
 409                         j = l;
 410                 else if (t < y)
 411                         i = l;
 412                 else
 413                         return (l);
 414         }
 415         return (l + 1);
 416 }
 417
 418
 419 static int stone(int *a, int n, int *b, int *c)
 420 {
 421         int i, k, y, j, l;
 422         int oldc, tc, oldl;
 423         u_int numtries;
 424 #ifdef CONFIG_FEATURE_DIFF_MINIMAL
 425         const u_int bound = (cmd_flags & FLAG_d) ? UINT_MAX : MAX(256, isqrt(n));
 426 #else
 427         const u_int bound = MAX(256, isqrt(n));
 428 #endif
 429         k = 0;
 430         c[0] = newcand(0, 0, 0);
 431         for (i = 1; i <= n; i++) {
 432                 j = a[i];
 433                 if (j == 0)
 434                         continue;
 435                 y = -b[j];
 436                 oldl = 0;
 437                 oldc = c[0];
 438                 numtries = 0;
 439                 do {
 440                         if (y <= clist[oldc].y)
 441                                 continue;
 442                         l = search(c, k, y);
 443                         if (l != oldl + 1)
 444                                 oldc = c[l - 1];
 445                         if (l <= k) {
 446                                 if (clist[c[l]].y <= y)
 447                                         continue;
 448                                 tc = c[l];
 449                                 c[l] = newcand(i, y, oldc);
 450                                 oldc = tc;
 451                                 oldl = l;
 452                                 numtries++;
 453                         } else {
 454                                 c[l] = newcand(i, y, oldc);
 455                                 k++;
 456                                 break;
 457                         }
 458                 } while ((y = b[++j]) > 0 && numtries < bound);
 459         }
 460         return (k);
 461 }
 462
 463 static void unravel(int p)
 464 {
 465         struct cand *q;
 466         int i;
 467
 468         for (i = 0; i <= len[0]; i++)
 469                 J[i] = i <= pref ? i :
 470                     i > len[0] - suff ? i + len[1] - len[0] : 0;
 471         for (q = clist + p; q->y != 0; q = clist + q->pred)
 472                 J[q->x + pref] = q->y + pref;
 473 }
 474
 475
 476 static void unsort(struct line *f, int l, int *b)
 477 {
 478         int *a, i;
 479
 480         a = xmalloc((l + 1) * sizeof(int));
 481         for (i = 1; i <= l; i++)
 482                 a[f[i].serial] = f[i].value;
 483         for (i = 1; i <= l; i++)
 484                 b[i] = a[i];
 485         free(a);
 486 }
 487
 488 static int skipline(FILE *f)
 489 {
 490         int i, c;
 491
 492         for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
 493                 continue;
 494         return (i);
 495 }
 496
 497
 498 /*
 499  * Check does double duty:
 500  *  1.  ferret out any fortuitous correspondences due
 501  *      to confounding by hashing (which result in "jackpot")
 502  *  2.  collect random access indexes to the two files
 503  */
 504 static void check(FILE *f1, FILE *f2)
 505 {
 506         int i, j, jackpot, c, d;
 507         long ctold, ctnew;
 508
 509         rewind(f1);
 510         rewind(f2);
 511         j = 1;
 512         ixold[0] = ixnew[0] = 0;
 513         jackpot = 0;
 514         ctold = ctnew = 0;
 515         for (i = 1; i <= len[0]; i++) {
 516                 if (J[i] == 0) {
 517                         ixold[i] = ctold += skipline(f1);
 518                         continue;
 519                 }
 520                 while (j < J[i]) {
 521                         ixnew[j] = ctnew += skipline(f2);
 522                         j++;
 523                 }
 524                 if ((cmd_flags & FLAG_b) || (cmd_flags & FLAG_w) || (cmd_flags & FLAG_i)) {
 525                         while (1) {
 526                                 c = getc(f1);
 527                                 d = getc(f2);
 528                                 /*
 529                                  * GNU diff ignores a missing newline
 530                                  * in one file if bflag || wflag.
 531                                  */
 532                                 if (((cmd_flags & FLAG_b) || (cmd_flags & FLAG_w)) &&
 533                                     ((c == EOF && d == '\n') ||
 534                                     (c == '\n' && d == EOF))) {
 535                                         break;
 536                                 }
 537                                 ctold++;
 538                                 ctnew++;
 539                                 if ((cmd_flags & FLAG_b) && isspace(c) && isspace(d)) {
 540                                         do {
 541                                                 if (c == '\n')
 542                                                         break;
 543                                                 ctold++;
 544                                         } while (isspace(c = getc(f1)));
 545                                         do {
 546                                                 if (d == '\n')
 547                                                         break;
 548                                                 ctnew++;
 549                                         } while (isspace(d = getc(f2)));
 550                                 } else if (cmd_flags & FLAG_w) {
 551                                         while (isspace(c) && c != '\n') {
 552                                                 c = getc(f1);
 553                                                 ctold++;
 554                                         }
 555                                         while (isspace(d) && d != '\n') {
 556                                                 d = getc(f2);
 557                                                 ctnew++;
 558                                         }
 559                                 }
 560                                 if (c != d) {
 561                                         jackpot++;
 562                                         J[i] = 0;
 563                                         if (c != '\n' && c != EOF)
 564                                                 ctold += skipline(f1);
 565                                         if (d != '\n' && c != EOF)
 566                                                 ctnew += skipline(f2);
 567                                         break;
 568                                 }
 569                                 if (c == '\n' || c == EOF)
 570                                         break;
 571                         }
 572                 } else {
 573                         while (1) {
 574                                 ctold++;
 575                                 ctnew++;
 576                                 if ((c = getc(f1)) != (d = getc(f2))) {
 577                                         J[i] = 0;
 578                                         if (c != '\n' && c != EOF)
 579                                                 ctold += skipline(f1);
 580                                         if (d != '\n' && c != EOF)
 581                                                 ctnew += skipline(f2);
 582                                         break;
 583                                 }
 584                                 if (c == '\n' || c == EOF)
 585                                         break;
 586                         }
 587                 }
 588                 ixold[i] = ctold;
 589                 ixnew[j] = ctnew;
 590                 j++;
 591         }
 592         for (; j <= len[1]; j++)
 593                 ixnew[j] = ctnew += skipline(f2);
 594 }
 595
 596 /* shellsort CACM #201 */
 597 static void sort(struct line *a, int n)
 598 {
 599         struct line *ai, *aim, w;
 600         int j, m = 0, k;
 601
 602         if (n == 0)
 603                 return;
 604         for (j = 1; j <= n; j *= 2)
 605                 m = 2 * j - 1;
 606         for (m /= 2; m != 0; m /= 2) {
 607                 k = n - m;
 608                 for (j = 1; j <= k; j++) {
 609                         for (ai = &a[j]; ai > a; ai -= m) {
 610                                 aim = &ai[m];
 611                                 if (aim < ai)
 612                                         break;        /* wraparound */
 613                                 if (aim->value > ai[0].value ||
 614                                     (aim->value == ai[0].value &&
 615                                         aim->serial > ai[0].serial))
 616                                         break;
 617                                 w.value = ai[0].value;
 618                                 ai[0].value = aim->value;
 619                                 aim->value = w.value;
 620                                 w.serial = ai[0].serial;
 621                                 ai[0].serial = aim->serial;
 622                                 aim->serial = w.serial;
 623                         }
 624                 }
 625         }
 626 }
 627
 628
 629 static void uni_range(int a, int b)
 630 {
 631         if (a < b)
 632                 printf("%d,%d", a, b - a + 1);
 633         else if (a == b)
 634                 printf("%d", b);
 635         else
 636                 printf("%d,0", b);
 637 }
 638
 639 static int fetch(long *f, int a, int b, FILE *lb, int ch)
 640 {
 641         int i, j, c, lastc, col, nc;
 642
 643         if (a > b)
 644                 return (0);
 645         for (i = a; i <= b; i++) {
 646                 fseek(lb, f[i - 1], SEEK_SET);
 647                 nc = f[i] - f[i - 1];
 648                 if (ch != '\0') {
 649                         putchar(ch);
 650                         if (cmd_flags & FLAG_T)
 651                                 putchar('\t');
 652                 }
 653                 col = 0;
 654                 for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
 655                         if ((c = getc(lb)) == EOF) {
 656                                 puts("\n\\ No newline at end of file");
 657                                 return (0);
 658                         }
 659                         if (c == '\t' && (cmd_flags & FLAG_t)) {
 660                                 do {
 661                                         putchar(' ');
 662                                 } while (++col & 7);
 663                         } else {
 664                                 putchar(c);
 665                                 col++;
 666                         }
 667                 }
 668         }
 669         return (0);
 670 }
 671
 672 static int asciifile(FILE *f)
 673 {
 674
 675         if ((cmd_flags & FLAG_a) || f == NULL)
 676                 return (1);
 677 #ifdef CONFIG_FEATURE_DIFF_BINARY
 678         unsigned char buf[BUFSIZ];
 679         int i, cnt;
 680
 681         rewind(f);
 682         cnt = fread(buf, 1, sizeof(buf), f);
 683         for (i = 0; i < cnt; i++)
 684                 if (!isprint(buf[i]) && !isspace(buf[i]))
 685                         return (0);
 686 #endif
 687         return (1);
 688 }
 689
 690 /* dump accumulated "unified" diff changes */
 691 static void dump_unified_vec(FILE *f1, FILE *f2)
 692 {
 693         struct context_vec *cvp = context_vec_start;
 694         int lowa, upb, lowc, upd;
 695         int a, b, c, d;
 696         char ch;
 697
 698         if (context_vec_start > context_vec_ptr)
 699                 return;
 700
 701         b = d = 0;                /* gcc */
 702         lowa = MAX(1, cvp->a - context);
 703         upb = MIN(len[0], context_vec_ptr->b + context);
 704         lowc = MAX(1, cvp->c - context);
 705         upd = MIN(len[1], context_vec_ptr->d + context);
 706
 707         fputs("@@ -", stdout);
 708         uni_range(lowa, upb);
 709         fputs(" +", stdout);
 710         uni_range(lowc, upd);
 711         fputs(" @@", stdout);
 712         putchar('\n');
 713
 714         /*
 715          * Output changes in "unified" diff format--the old and new lines
 716          * are printed together.
 717          */
 718         for (; cvp <= context_vec_ptr; cvp++) {
 719                 a = cvp->a;
 720                 b = cvp->b;
 721                 c = cvp->c;
 722                 d = cvp->d;
 723
 724                 /*
 725                  * c: both new and old changes
 726                  * d: only changes in the old file
 727                  * a: only changes in the new file
 728                  */
 729                 if (a <= b && c <= d)
 730                         ch = 'c';
 731                 else
 732                         ch = (a <= b) ? 'd' : 'a';
 733
 734                 switch (ch) {
 735                 case 'c':
 736                         fetch(ixold, lowa, a - 1, f1, ' ');
 737                         fetch(ixold, a, b, f1, '-');
 738                         fetch(ixnew, c, d, f2, '+');
 739                         break;
 740                 case 'd':
 741                         fetch(ixold, lowa, a - 1, f1, ' ');
 742                         fetch(ixold, a, b, f1, '-');
 743                         break;
 744                 case 'a':
 745                         fetch(ixnew, lowc, c - 1, f2, ' ');
 746                         fetch(ixnew, c, d, f2, '+');
 747                         break;
 748                 }
 749                 lowa = b + 1;
 750                 lowc = d + 1;
 751         }
 752         fetch(ixnew, d + 1, upd, f2, ' ');
 753
 754         context_vec_ptr = context_vec_start - 1;
 755 }
 756
 757
 758 static void print_header(const char *file1, const char *file2)
 759 {
 760         if (label[0] != NULL)
 761                 printf("%s %s\n", "---",
 762                     label[0]);
 763         else
 764                 printf("%s %s\t%s", "---",
 765                     file1, ctime(&stb1.st_mtime));
 766         if (label[1] != NULL)
 767                 printf("%s %s\n", "+++",
 768                     label[1]);
 769         else
 770                 printf("%s %s\t%s", "+++",
 771                     file2, ctime(&stb2.st_mtime));
 772 }
 773
 774
 775
 776 /*
 777  * Indicate that there is a difference between lines a and b of the from file
 778  * to get to lines c to d of the to file.  If a is greater then b then there
 779  * are no lines in the from file involved and this means that there were
 780  * lines appended (beginning at b).  If c is greater than d then there are
 781  * lines missing from the to file.
 782  */
 783 static void change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d)
 784 {
 785         static size_t max_context = 64;
 786
 787         if (a > b && c > d)     return;
 788         if (cmd_flags & FLAG_q) return;
 789
 790         /*
 791         * Allocate change records as needed.
 792         */
 793                 if (context_vec_ptr == context_vec_end - 1) {
 794                         ptrdiff_t offset = context_vec_ptr - context_vec_start;
 795                         max_context <<= 1;
 796                         context_vec_start = xrealloc(context_vec_start,
 797                             max_context * sizeof(struct context_vec));
 798                         context_vec_end = context_vec_start + max_context;
 799                         context_vec_ptr = context_vec_start + offset;
 800                 }
 801                 if (anychange == 0) {
 802                         /*
 803                          * Print the context/unidiff header first time through.
 804                          */
 805                         print_header(file1, file2);
 806                         anychange = 1;
 807                 } else if (a > context_vec_ptr->b + (2 * context) + 1 &&
 808                     c > context_vec_ptr->d + (2 * context) + 1) {
 809                         /*
 810                          * If this change is more than 'context' lines from the
 811                          * previous change, dump the record and reset it.
 812                          */
 813                         dump_unified_vec(f1, f2);
 814                 }
 815                 context_vec_ptr++;
 816                 context_vec_ptr->a = a;
 817                 context_vec_ptr->b = b;
 818                 context_vec_ptr->c = c;
 819                 context_vec_ptr->d = d;
 820                 return;
 821
 822 }
 823
 824
 825 static void output(char *file1, FILE *f1, char *file2, FILE *f2)
 826 {
 827         int m, i0, i1, j0, j1;
 828
 829         rewind(f1);
 830         rewind(f2);
 831         m = len[0];
 832         J[0] = 0;
 833         J[m + 1] = len[1] + 1;
 834         for (i0 = 1; i0 <= m; i0 = i1 + 1) {
 835                         while (i0 <= m && J[i0] == J[i0 - 1] + 1)
 836                                 i0++;
 837                         j0 = J[i0 - 1] + 1;
 838                         i1 = i0 - 1;
 839                         while (i1 < m && J[i1 + 1] == 0)
 840                                 i1++;
 841                         j1 = J[i1 + 1] - 1;
 842                         J[i1] = j1;
 843                         change(file1, f1, file2, f2, i0, i1, j0, j1);
 844         }
 845         if (m == 0) {
 846                 change(file1, f1, file2, f2, 1, 0, 1, len[1]);
 847         }
 848         if (anychange != 0) {
 849                 dump_unified_vec(f1, f2);
 850         }
 851 }
 852
 853 /*
 854  *      The following code uses an algorithm due to Harold Stone,
 855  *      which finds a pair of longest identical subsequences in
 856  *      the two files.
 857  *
 858  *      The major goal is to generate the match vector J.
 859  *      J[i] is the index of the line in file1 corresponding
 860  *      to line i file0. J[i] = 0 if there is no
 861  *      such line in file1.
 862  *
 863  *      Lines are hashed so as to work in core. All potential
 864  *      matches are located by sorting the lines of each file
 865  *      on the hash (called ``value''). In particular, this
 866  *      collects the equivalence classes in file1 together.
 867  *      Subroutine equiv replaces the value of each line in
 868  *      file0 by the index of the first element of its
 869  *      matching equivalence in (the reordered) file1.
 870  *      To save space equiv squeezes file1 into a single
 871  *      array member in which the equivalence classes
 872  *      are simply concatenated, except that their first
 873  *      members are flagged by changing sign.
 874  *
 875  *      Next the indices that point into member are unsorted into
 876  *      array class according to the original order of file0.
 877  *
 878  *      The cleverness lies in routine stone. This marches
 879  *      through the lines of file0, developing a vector klist
 880  *      of "k-candidates". At step i a k-candidate is a matched
 881  *      pair of lines x,y (x in file0 y in file1) such that
 882  *      there is a common subsequence of length k
 883  *      between the first i lines of file0 and the first y
 884  *      lines of file1, but there is no such subsequence for
 885  *      any smaller y. x is the earliest possible mate to y
 886  *      that occurs in such a subsequence.
 887  *
 888  *      Whenever any of the members of the equivalence class of
 889  *      lines in file1 matable to a line in file0 has serial number
 890  *      less than the y of some k-candidate, that k-candidate
 891  *      with the smallest such y is replaced. The new
 892  *      k-candidate is chained (via pred) to the current
 893  *      k-1 candidate so that the actual subsequence can
 894  *      be recovered. When a member has serial number greater
 895  *      that the y of all k-candidates, the klist is extended.
 896  *      At the end, the longest subsequence is pulled out
 897  *      and placed in the array J by unravel
 898  *
 899  *      With J in hand, the matches there recorded are
 900  *      checked against reality to assure that no spurious
 901  *      matches have crept in due to hashing. If they have,
 902  *      they are broken, and "jackpot" is recorded--a harmless
 903  *      matter except that a true match for a spuriously
 904  *      mated line may now be unnecessarily reported as a change.
 905  *
 906  *      Much of the complexity of the program comes simply
 907  *      from trying to minimize core utilization and
 908  *      maximize the range of doable problems by dynamically
 909  *      allocating what is needed and reusing what is not.
 910  *      The core requirements for problems larger than somewhat
 911  *      are (in words) 2*length(file0) + length(file1) +
 912  *      3*(number of k-candidates installed),  typically about
 913  *      6n words for files of length n.
 914  */
 915
 916 static int diffreg(char *ofile1, char *ofile2, int flags)
 917 {
 918         char *file1 = ofile1;
 919         char *file2 = ofile2;
 920         FILE *f1 = NULL;
 921         FILE *f2 = NULL;
 922         int rval = D_SAME;
 923         int i;
 924
 925         anychange = 0;
 926         context_vec_ptr = context_vec_start - 1;
 927
 928         if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
 929                 return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
 930         if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
 931                 goto closem;
 932
 933         if (flags & D_EMPTY1)
 934                 f1 = bb_xfopen(_PATH_DEVNULL, "r");
 935         else {
 936                 if (strcmp(file1, "-") == 0)
 937                         f1 = stdin;
 938                 else
 939                         f1 = bb_xfopen(file1, "r");
 940         }
 941
 942         if (flags & D_EMPTY2)
 943                 f2 = bb_xfopen(_PATH_DEVNULL, "r");
 944         else {
 945                 if (strcmp(file2, "-") == 0)
 946                         f2 = stdin;
 947                 else
 948                         f2 = bb_xfopen(file2, "r");
 949         }
 950
 951         switch (files_differ(f1, f2, flags)) {
 952         case 0:
 953                 goto closem;
 954         case 1:
 955                 break;
 956         default:
 957                 /* error */
 958                 status |= 2;
 959                 goto closem;
 960         }
 961
 962         if (!asciifile(f1) || !asciifile(f2)) {
 963                 rval = D_BINARY;
 964                 status |= 1;
 965                 goto closem;
 966         }
 967
 968         prepare(0, f1, stb1.st_size);
 969         prepare(1, f2, stb2.st_size);
 970         prune();
 971         sort(sfile[0], slen[0]);
 972         sort(sfile[1], slen[1]);
 973
 974         member = (int *)file[1];
 975         equiv(sfile[0], slen[0], sfile[1], slen[1], member);
 976         member = xrealloc(member, (slen[1] + 2) * sizeof(int));
 977
 978         class = (int *)file[0];
 979         unsort(sfile[0], slen[0], class);
 980         class = xrealloc(class, (slen[0] + 2) * sizeof(int));
 981
 982         klist = xmalloc((slen[0] + 2) * sizeof(int));
 983         clen = 0;
 984         clistlen = 100;
 985         clist = xmalloc(clistlen * sizeof(struct cand));
 986         i = stone(class, slen[0], member, klist);
 987         free(member);
 988         free(class);
 989
 990         J = xrealloc(J, (len[0] + 2) * sizeof(int));
 991         unravel(klist[i]);
 992         free(clist);
 993         free(klist);
 994
 995         ixold = xrealloc(ixold, (len[0] + 2) * sizeof(long));
 996         ixnew = xrealloc(ixnew, (len[1] + 2) * sizeof(long));
 997         check(f1, f2);
 998         output(file1, f1, file2, f2);
 999
1000 closem:
1001         if (anychange) {
1002                 status |= 1;
1003                 if (rval == D_SAME)
1004                         rval = D_DIFFER;
1005         }
1006         if (f1 != NULL)
1007                 fclose(f1);
1008         if (f2 != NULL)
1009                 fclose(f2);
1010         if (file1 != ofile1)
1011                 free(file1);
1012         if (file2 != ofile2)
1013                 free(file2);
1014         return (rval);
1015 }
1016
1017 #if ENABLE_FEATURE_DIFF_DIR
1018 static void do_diff (char *dir1, char *path1, char *dir2, char *path2) {
1019
1020         int flags = D_HEADER;
1021         int val;
1022
1023         char *fullpath1 = bb_xasprintf("%s/%s", dir1, path1);
1024         char *fullpath2 = bb_xasprintf("%s/%s", dir2, path2);
1025
1026         if (stat(fullpath1, &stb1) != 0) {
1027                 flags |= D_EMPTY1;
1028                 memset(&stb1, 0, sizeof(stb1));
1029                 fullpath1 = bb_xasprintf("%s/%s", dir1, path2);
1030         }
1031         if (stat(fullpath2, &stb2) != 0) {
1032                 flags |= D_EMPTY2;
1033                 memset(&stb2, 0, sizeof(stb2));
1034                 stb2.st_mode = stb1.st_mode;
1035                 fullpath2 = bb_xasprintf("%s/%s", dir2, path1);
1036         }
1037
1038         if (stb1.st_mode == 0)
1039                 stb1.st_mode = stb2.st_mode;
1040
1041         if (S_ISDIR(stb1.st_mode) && S_ISDIR(stb2.st_mode)) {
1042                 printf("Common subdirectories: %s and %s\n", fullpath1, fullpath2);
1043                 return;
1044         }
1045
1046         if (!S_ISREG(stb1.st_mode) && !S_ISDIR(stb1.st_mode))
1047                 val = D_SKIPPED1;
1048         else if (!S_ISREG(stb2.st_mode) && !S_ISDIR(stb2.st_mode))
1049                 val = D_SKIPPED2;
1050         else
1051                 val = diffreg(fullpath1, fullpath2, flags);
1052
1053         print_status(val, fullpath1, fullpath2, NULL);
1054 }
1055 #endif
1056
1057 #ifdef CONFIG_FEATURE_DIFF_DIR
1058 static int dir_strcmp(const void *p1, const void *p2) {
1059         return strcmp(*(char * const *)p1, *(char * const *)p2);
1060 }
1061
1062 /* This function adds a filename to dl, the directory listing. */
1063
1064 static int add_to_dirlist (const char *filename,
1065                 struct stat ATTRIBUTE_UNUSED *sb, void *userdata) {
1066         dl_count++;
1067         dl = xrealloc(dl, dl_count * sizeof(char *));
1068         dl[dl_count - 1] = bb_xstrdup(filename);
1069         if (cmd_flags & FLAG_r) {
1070                 int *pp = (int *) userdata;
1071                 int path_len = *pp + 1;
1072                 dl[dl_count - 1] = &(dl[dl_count - 1])[path_len];
1073         }
1074         return TRUE;
1075 }
1076
1077 /* This returns a sorted directory listing. */
1078 static char **get_dir(char *path) {
1079
1080         int i;
1081
1082         /* Reset dl_count - there's no need to free dl as bb_xrealloc does
1083          * the job nicely. */
1084         dl_count = 0;
1085
1086         /* If -r has been set, then the recursive_action function will be
1087          * used. Unfortunately, this outputs the root directory along with
1088          * the recursed paths, so use void *userdata to specify the string
1089          * length of the root directory. It can then be removed in
1090          * add_to_dirlist. */
1091
1092         int path_len = strlen(path);
1093         void *userdata = &path_len;
1094
1095         /* Now fill dl with a listing. */
1096         if (cmd_flags & FLAG_r)
1097                 recursive_action(path, TRUE, TRUE, FALSE, add_to_dirlist, NULL, userdata);
1098         else {
1099                 DIR *dp;
1100                 struct dirent *ep;
1101                 if ((dp = opendir(path)) == NULL)
1102                         bb_error_msg("Error reading directory");
1103                 while ((ep = readdir(dp))) {
1104                         if ((!strcmp(ep->d_name, "..")) || (!strcmp(ep->d_name, ".")))
1105                                 continue;
1106                         add_to_dirlist(ep->d_name, NULL, NULL);
1107                 }
1108                 closedir(dp);
1109         }
1110
1111         /* Sort dl alphabetically. */
1112         qsort(dl, dl_count, sizeof(char *), dir_strcmp);
1113
1114         /* Copy dl so that we can return it. */
1115         char **retval = xmalloc(dl_count * sizeof(char *));
1116         for (i = 0; i < dl_count; i++)
1117                 retval[i] = bb_xstrdup(dl[i]);
1118
1119         return retval;
1120 }
1121
1122 static void diffdir (char *p1, char *p2) {
1123
1124         char **dirlist1, **dirlist2;
1125         char *dp1, *dp2;
1126         int dirlist1_count, dirlist2_count;
1127         int pos;
1128
1129         /* Check for trailing slashes. */
1130
1131         if (p1[strlen(p1) - 1] == '/')
1132                 p1[strlen(p1) - 1] = '\0';
1133         if (p2[strlen(p2) - 1] == '/')
1134                 p2[strlen(p2) - 1] = '\0';
1135
1136         /* Get directory listings for p1 and p2. */
1137
1138         dirlist1 = get_dir(p1);
1139         dirlist1_count = dl_count;
1140         dirlist1[dirlist1_count] = NULL;
1141         dirlist2 = get_dir(p2);
1142         dirlist2_count = dl_count;
1143         dirlist2[dirlist2_count] = NULL;
1144
1145         /* If -S was set, find the starting point. */
1146         if (start) {
1147                 while (*dirlist1 != NULL && strcmp(*dirlist1, start) < 0)
1148                         dirlist1++;
1149                 while (*dirlist2 != NULL && strcmp(*dirlist2, start) < 0)
1150                         dirlist2++;
1151                 if ((*dirlist1 == NULL) || (*dirlist2 == NULL))
1152                         bb_error_msg("Invalid argument to -S");
1153         }
1154
1155         /* Now that both dirlist1 and dirlist2 contain sorted directory
1156          * listings, we can start to go through dirlist1. If both listings
1157          * contain the same file, then do a normal diff. Otherwise, behaviour
1158          * is determined by whether the -N flag is set. */
1159         while (*dirlist1 != NULL || *dirlist2 != NULL) {
1160                 dp1 = *dirlist1;
1161                 dp2 = *dirlist2;
1162                 pos = dp1 == NULL ? 1 : dp2 == NULL ? -1 : strcmp(dp1, dp2);
1163                 if (pos == 0) {
1164                         do_diff(p1, dp1, p2, dp2);
1165                         dirlist1++;
1166                         dirlist2++;
1167                 }
1168                 else if (pos < 0) {
1169                         if (cmd_flags & FLAG_N)
1170                                 do_diff(p1, dp1, p2, NULL);
1171                         else
1172                                 print_only(p1, strlen(p1) + 1, dp1);
1173                         dirlist1++;
1174                 }
1175                 else {
1176                         if (cmd_flags & FLAG_N)
1177                                 do_diff(p1, NULL, p2, dp2);
1178                         else
1179                                 print_only(p2, strlen(p2) + 1, dp2);
1180                         dirlist2++;
1181                 }
1182         }
1183 }
1184 #endif
1185
1186
1187
1188 extern int diff_main(int argc, char **argv) {
1189         char *ep;
1190         int gotstdin = 0;
1191
1192         char *U_opt;
1193         cmd_flags = bb_getopt_ulflags(argc, argv, "abdiNqrsS:tTU:wu", &start, &U_opt);
1194
1195         context = 3;    /* This is the default number of lines of context. */
1196         if (cmd_flags & FLAG_U) {
1197                 context = strtol(U_opt, &ep, 10);
1198                 if (context == 0) {
1199                         bb_error_msg("Invalid context length");
1200                         bb_show_usage();
1201                 }
1202         }
1203         argc -= optind;
1204         argv += optind;
1205
1206         /*
1207          * Do sanity checks, fill in stb1 and stb2 and call the appropriate
1208          * driver routine.  Both drivers use the contents of stb1 and stb2.
1209          */
1210         if (argc < 2) {
1211                 bb_error_msg("Missing filename");
1212                 bb_show_usage();
1213         }
1214         if (strcmp(argv[0], "-") == 0) {
1215                 fstat(STDIN_FILENO, &stb1);
1216                 gotstdin = 1;
1217         } else if (stat(argv[0], &stb1) != 0)
1218                 bb_perror_msg_and_die("Couldn't stat %s", argv[0]);
1219         if (strcmp(argv[1], "-") == 0) {
1220                 fstat(STDIN_FILENO, &stb2);
1221                 gotstdin = 1;
1222         } else if (stat(argv[1], &stb2) != 0)
1223                 bb_perror_msg_and_die("Couldn't stat %s", argv[1]);
1224         if (gotstdin && (S_ISDIR(stb1.st_mode) || S_ISDIR(stb2.st_mode)))
1225                 bb_error_msg_and_die("Can't compare - to a directory");
1226         if (S_ISDIR(stb1.st_mode) && S_ISDIR(stb2.st_mode)) {
1227 #ifdef CONFIG_FEATURE_DIFF_DIR
1228                 diffdir(argv[0], argv[1]);
1229 #else
1230                 bb_error_msg_and_die("Directory comparison not supported");
1231 #endif
1232         }
1233         else {
1234                 if (S_ISDIR(stb1.st_mode)) {
1235                         argv[0] = concat_path_file(argv[0], argv[1]);
1236                         if (stat(argv[0], &stb1) < 0)
1237                                 bb_perror_msg_and_die("Couldn't stat %s", argv[0]);
1238                 }
1239                 if (S_ISDIR(stb2.st_mode)) {
1240                         argv[1] = concat_path_file(argv[1], argv[0]);
1241                         if (stat(argv[1], &stb2) < 0)
1242                                 bb_perror_msg_and_die("Couldn't stat %s", argv[1]);
1243                 }
1244                 print_status(diffreg(argv[0], argv[1], 0), argv[0], argv[1], NULL);
1245         }
1246         exit(status);
1247 }
1248