1 /* vi: set sw=4 ts=4: */
3 * Mini diff implementation for busybox, adapted from OpenBSD diff.
5 * Copyright (C) 2006 by Robert Sullivan <cogito.ergo.cogito@hotmail.com>
6 * Copyright (c) 2003 Todd C. Miller <Todd.Miller@courtesan.com>
8 * Sponsored in part by the Defense Advanced Research Projects
9 * Agency (DARPA) and Air Force Research Laboratory, Air Force
10 * Materiel Command, USAF, under agreement number F39502-99-1-0512.
12 * Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
17 // #define FSIZE_MAX 32768
19 /* NOINLINEs added to prevent gcc from merging too much into diffreg()
20 * (it bites more than it can (efficiently) chew). */
26 /* Print a header/footer between files */
27 /* D_HEADER = 1, - unused */
28 /* Treat file as empty (/dev/null) */
29 D_EMPTY1 = 2 * ENABLE_FEATURE_DIFF_DIR,
30 D_EMPTY2 = 4 * ENABLE_FEATURE_DIFF_DIR,
34 * Status values for print_status() and diffreg() return values
36 * D_SAME - files are the same
37 * D_DIFFER - files differ
38 * D_BINARY - binary files differ
39 * D_COMMON - subdirectory common to both dirs
40 * D_ONLY - file only exists in one dir
41 * D_ISDIR1 - path1 a dir, path2 a file
42 * D_ISDIR2 - path1 a file, path2 a dir
43 * D_ERROR - error occurred
44 * D_SKIPPED1 - skipped path1 as it is a special file
45 * D_SKIPPED2 - skipped path2 as it is a special file
48 #define D_DIFFER (1 << 0)
49 #define D_BINARY (1 << 1)
50 #define D_COMMON (1 << 2)
51 /*#define D_ONLY (1 << 3) - unused */
52 #define D_ISDIR1 (1 << 4)
53 #define D_ISDIR2 (1 << 5)
54 #define D_ERROR (1 << 6)
55 #define D_SKIPPED1 (1 << 7)
56 #define D_SKIPPED2 (1 << 8)
58 /* Command line options */
59 #define FLAG_a (1 << 0)
60 #define FLAG_b (1 << 1)
61 #define FLAG_d (1 << 2)
62 #define FLAG_i (1 << 3)
63 #define FLAG_L (1 << 4)
64 #define FLAG_N (1 << 5)
65 #define FLAG_q (1 << 6)
66 #define FLAG_r (1 << 7)
67 #define FLAG_s (1 << 8)
68 #define FLAG_S (1 << 9)
69 #define FLAG_t (1 << 10)
70 #define FLAG_T (1 << 11)
71 #define FLAG_U (1 << 12)
72 #define FLAG_w (1 << 13)
87 * The following struct is used to record change information
88 * doing a "context" or "unified" diff. (see routine "change" to
89 * understand the highly mnemonic field names)
92 int a; /* start line in old file */
93 int b; /* end line in old file */
94 int c; /* start line in new file */
95 int d; /* end line in new file */
99 #define g_read_buf bb_common_bufsiz1
103 smallint exit_status;
105 size_t max_context; /* size of context_vec_start */
106 USE_FEATURE_DIFF_DIR(int dl_count;)
107 USE_FEATURE_DIFF_DIR(char **dl;)
111 int *J; /* will be overlaid on class */
113 int pref, suff; /* length of prefix and suffix */
116 int clistlen; /* the length of clist */
117 struct cand *clist; /* merely a free storage pot for candidates */
118 long *ixnew; /* will be overlaid on nfile[1] */
119 long *ixold; /* will be overlaid on klist */
120 struct line *nfile[2];
121 struct line *sfile[2]; /* shortened by pruning common prefix/suffix */
122 struct context_vec *context_vec_start;
123 struct context_vec *context_vec_end;
124 struct context_vec *context_vec_ptr;
125 char *tempname1, *tempname2;
126 struct stat stb1, stb2;
128 #define G (*ptr_to_globals)
129 #define anychange (G.anychange )
130 #define exit_status (G.exit_status )
131 #define opt_U_context (G.opt_U_context )
132 #define max_context (G.max_context )
133 #define dl_count (G.dl_count )
135 #define opt_S_start (G.opt_S_start )
136 #define label1 (G.label1 )
137 #define label2 (G.label2 )
139 #define clen (G.clen )
140 #define pref (G.pref )
141 #define suff (G.suff )
142 #define nlen (G.nlen )
143 #define slen (G.slen )
144 #define clistlen (G.clistlen )
145 #define clist (G.clist )
146 #define ixnew (G.ixnew )
147 #define ixold (G.ixold )
148 #define nfile (G.nfile )
149 #define sfile (G.sfile )
150 #define context_vec_start (G.context_vec_start )
151 #define context_vec_end (G.context_vec_end )
152 #define context_vec_ptr (G.context_vec_ptr )
153 #define stb1 (G.stb1 )
154 #define stb2 (G.stb2 )
155 #define tempname1 (G.tempname1 )
156 #define tempname2 (G.tempname2 )
157 #define INIT_G() do { \
158 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
164 /*static void print_only(const char *path, size_t dirlen, const char *entry)*/
165 static void print_only(const char *path, const char *entry)
167 printf("Only in %s: %s\n", path, entry);
171 /*static void print_status(int val, char *path1, char *path2, char *entry)*/
172 static void print_status(int val, char *_path1, char *_path2)
174 /*const char *const _entry = entry ? entry : "";*/
175 /*char *const _path1 = entry ? concat_path_file(path1, _entry) : path1;*/
176 /*char *const _path2 = entry ? concat_path_file(path2, _entry) : path2;*/
180 print_only(path1, entry);
184 printf("Common subdirectories: %s and %s\n", _path1, _path2);
187 printf("Binary files %s and %s differ\n", _path1, _path2);
190 if (option_mask32 & FLAG_q)
191 printf("Files %s and %s differ\n", _path1, _path2);
194 if (option_mask32 & FLAG_s)
195 printf("Files %s and %s are identical\n", _path1, _path2);
198 printf("File %s is a %s while file %s is a %s\n",
199 _path1, "directory", _path2, "regular file");
202 printf("File %s is a %s while file %s is a %s\n",
203 _path1, "regular file", _path2, "directory");
206 printf("File %s is not a regular file or directory and was skipped\n",
210 printf("File %s is not a regular file or directory and was skipped\n",
223 /* Read line, return its nonzero hash. Return 0 if EOF.
225 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
227 static ALWAYS_INLINE int fiddle_sum(int sum, int t)
229 return sum * 127 + t;
231 static int readhash(FILE *fp)
239 if (!(option_mask32 & (FLAG_b | FLAG_w))) {
240 while ((t = getc(fp)) != '\n') {
246 sum = fiddle_sum(sum, t);
251 switch (t = getc(fp)) {
260 if (space && !(option_mask32 & FLAG_w)) {
264 sum = fiddle_sum(sum, t);
278 * There is a remote possibility that we end up with a zero sum.
279 * Zero is used as an EOF marker, so return 1 instead.
281 return (sum == 0 ? 1 : sum);
285 static char *make_temp(FILE *f, struct stat *sb)
290 if (S_ISREG(sb->st_mode) || S_ISBLK(sb->st_mode))
292 name = xstrdup("/tmp/difXXXXXX");
295 bb_perror_msg_and_die("mkstemp");
296 if (bb_copyfd_eof(fileno(f), fd) < 0) {
299 xfunc_die(); /* error message is printed by bb_copyfd_eof */
303 if (freopen(name, "r+", f) == NULL) {
304 bb_perror_msg("freopen");
312 * Check to see if the given files differ.
313 * Returns 0 if they are the same, 1 if different, and -1 on error.
315 static NOINLINE int files_differ(FILE *f1, FILE *f2)
319 /* Prevent making copies for "/dev/null" (too common) */
320 /* Deal with input from pipes etc */
321 tempname1 = make_temp(f1, &stb1);
322 tempname2 = make_temp(f2, &stb2);
323 if (stb1.st_size != stb2.st_size) {
327 i = fread(g_read_buf, 1, COMMON_BUFSIZE/2, f1);
328 j = fread(g_read_buf + COMMON_BUFSIZE/2, 1, COMMON_BUFSIZE/2, f2);
332 return (ferror(f1) || ferror(f2)) ? -1 : 0;
333 if (memcmp(g_read_buf,
334 g_read_buf + COMMON_BUFSIZE/2, i) != 0)
340 static void prepare(int i, FILE *fp /*, off_t filesize*/)
348 /*sz = (filesize <= FSIZE_MAX ? filesize : FSIZE_MAX) / 25;*/
352 p = xmalloc((sz + 3) * sizeof(p[0]));
354 while ((h = readhash(fp)) != 0) { /* while not EOF */
357 p = xrealloc(p, (sz + 3) * sizeof(p[0]));
366 static void prune(void)
370 for (pref = 0; pref < nlen[0] && pref < nlen[1] &&
371 nfile[0][pref + 1].value == nfile[1][pref + 1].value; pref++)
373 for (suff = 0; suff < nlen[0] - pref && suff < nlen[1] - pref &&
374 nfile[0][nlen[0] - suff].value == nfile[1][nlen[1] - suff].value;
377 for (j = 0; j < 2; j++) {
378 sfile[j] = nfile[j] + pref;
379 slen[j] = nlen[j] - pref - suff;
380 for (i = 0; i <= slen[j]; i++)
381 sfile[j][i].serial = i;
386 static void equiv(struct line *a, int n, struct line *b, int m, int *c)
391 while (i <= n && j <= m) {
392 if (a[i].value < b[j].value)
394 else if (a[i].value == b[j].value)
405 while (b[j + 1].value == b[j].value) {
414 static int isqrt(int n)
426 } while ((x - y) > 1 || (x - y) < -1);
432 static int newcand(int x, int y, int pred)
436 if (clen == clistlen) {
437 clistlen = clistlen * 11 / 10;
438 clist = xrealloc(clist, clistlen * sizeof(struct cand));
448 static int search(int *c, int k, int y)
452 if (clist[c[k]].y < y) /* quick look for typical case */
472 static int stone(int *a, int n, int *b, int *c)
476 unsigned int numtries;
477 #if ENABLE_FEATURE_DIFF_MINIMAL
478 const unsigned int bound =
479 (option_mask32 & FLAG_d) ? UINT_MAX : MAX(256, isqrt(n));
481 const unsigned int bound = MAX(256, isqrt(n));
485 c[0] = newcand(0, 0, 0);
486 for (i = 1; i <= n; i++) {
495 if (y <= clist[oldc].y)
501 if (clist[c[l]].y <= y)
504 c[l] = newcand(i, y, oldc);
509 c[l] = newcand(i, y, oldc);
513 } while ((y = b[++j]) > 0 && numtries < bound);
519 static void unravel(int p)
524 for (i = 0; i <= nlen[0]; i++)
525 J[i] = i <= pref ? i : i > nlen[0] - suff ? i + nlen[1] - nlen[0] : 0;
526 for (q = clist + p; q->y != 0; q = clist + q->pred)
527 J[q->x + pref] = q->y + pref;
531 static void unsort(struct line *f, int l, int *b)
535 a = xmalloc((l + 1) * sizeof(int));
536 for (i = 1; i <= l; i++)
537 a[f[i].serial] = f[i].value;
538 for (i = 1; i <= l; i++)
544 static int skipline(FILE *f)
548 for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
555 * Check does double duty:
556 * 1. ferret out any fortuitous correspondences due
557 * to confounding by hashing (which result in "jackpot")
558 * 2. collect random access indexes to the two files
560 static NOINLINE void check(FILE *f1, FILE *f2)
562 int i, j, jackpot, c, d;
568 ixold[0] = ixnew[0] = 0;
571 for (i = 1; i <= nlen[0]; i++) {
573 ixold[i] = ctold += skipline(f1);
577 ixnew[j] = ctnew += skipline(f2);
580 if (option_mask32 & (FLAG_b | FLAG_w | FLAG_i)) {
585 * GNU diff ignores a missing newline
586 * in one file if bflag || wflag.
588 if ((option_mask32 & (FLAG_b | FLAG_w))
589 && ((c == EOF && d == '\n') || (c == '\n' && d == EOF))
595 if ((option_mask32 & FLAG_b) && isspace(c) && isspace(d)) {
601 } while (isspace(c));
607 } while (isspace(d));
608 } else if (option_mask32 & FLAG_w) {
609 while (isspace(c) && c != '\n') {
613 while (isspace(d) && d != '\n') {
621 if (c != '\n' && c != EOF)
622 ctold += skipline(f1);
623 if (d != '\n' && c != EOF)
624 ctnew += skipline(f2);
627 if (c == '\n' || c == EOF)
638 if (c != '\n' && c != EOF)
639 ctold += skipline(f1);
640 /* was buggy? "if (d != '\n' && c != EOF)" */
641 if (d != '\n' && d != EOF)
642 ctnew += skipline(f2);
645 if (c == '\n' || c == EOF)
653 for (; j <= nlen[1]; j++)
654 ixnew[j] = ctnew += skipline(f2);
658 /* shellsort CACM #201 */
659 static void sort(struct line *a, int n)
661 struct line *ai, *aim, w;
666 for (j = 1; j <= n; j *= 2)
668 for (m /= 2; m != 0; m /= 2) {
670 for (j = 1; j <= k; j++) {
671 for (ai = &a[j]; ai > a; ai -= m) {
674 break; /* wraparound */
675 if (aim->value > ai[0].value
676 || (aim->value == ai[0].value && aim->serial > ai[0].serial)
680 w.value = ai[0].value;
681 ai[0].value = aim->value;
682 aim->value = w.value;
683 w.serial = ai[0].serial;
684 ai[0].serial = aim->serial;
685 aim->serial = w.serial;
692 static void uni_range(int a, int b)
695 printf("%d,%d", a, b - a + 1);
703 static void fetch(long *f, int a, int b, FILE *lb, int ch)
705 int i, j, c, lastc, col, nc;
709 for (i = a; i <= b; i++) {
710 fseek(lb, f[i - 1], SEEK_SET);
711 nc = f[i] - f[i - 1];
714 if (option_mask32 & FLAG_T)
718 for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
721 printf("\n\\ No newline at end of file\n");
724 if (c == '\t' && (option_mask32 & FLAG_t)) {
737 #if ENABLE_FEATURE_DIFF_BINARY
738 static int asciifile(FILE *f)
742 if (option_mask32 & FLAG_a)
745 cnt = fread(g_read_buf, 1, COMMON_BUFSIZE, f);
746 for (i = 0; i < cnt; i++) {
747 if (!isprint(g_read_buf[i])
748 && !isspace(g_read_buf[i])
756 #define asciifile(f) 1
760 /* dump accumulated "unified" diff changes */
761 static void dump_unified_vec(FILE *f1, FILE *f2)
763 struct context_vec *cvp = context_vec_start;
764 int lowa, upb, lowc, upd;
768 if (context_vec_start > context_vec_ptr)
772 lowa = MAX(1, cvp->a - opt_U_context);
773 upb = MIN(nlen[0], context_vec_ptr->b + opt_U_context);
774 lowc = MAX(1, cvp->c - opt_U_context);
775 upd = MIN(nlen[1], context_vec_ptr->d + opt_U_context);
778 uni_range(lowa, upb);
780 uni_range(lowc, upd);
784 * Output changes in "unified" diff format--the old and new lines
785 * are printed together.
787 for (; cvp <= context_vec_ptr; cvp++) {
794 * c: both new and old changes
795 * d: only changes in the old file
796 * a: only changes in the new file
798 if (a <= b && c <= d)
801 ch = (a <= b) ? 'd' : 'a';
806 fetch(ixold, lowa, a - 1, f1, ' ');
807 fetch(ixold, a, b, f1, '-');
808 fetch(ixnew, c, d, f2, '+');
811 fetch(ixold, lowa, a - 1, f1, ' ');
812 fetch(ixold, a, b, f1, '-');
815 fetch(ixnew, lowc, c - 1, f2, ' ');
816 fetch(ixnew, c, d, f2, '+');
820 if (ch == 'c' || ch == 'd') {
821 fetch(ixold, lowa, a - 1, f1, ' ');
822 fetch(ixold, a, b, f1, '-');
825 fetch(ixnew, lowc, c - 1, f2, ' ');
826 if (ch == 'c' || ch == 'a')
827 fetch(ixnew, c, d, f2, '+');
832 fetch(ixnew, d + 1, upd, f2, ' ');
834 context_vec_ptr = context_vec_start - 1;
838 static void print_header(const char *file1, const char *file2)
841 printf("--- %s\n", label1);
843 printf("--- %s\t%s", file1, ctime(&stb1.st_mtime));
845 printf("+++ %s\n", label2);
847 printf("+++ %s\t%s", file2, ctime(&stb2.st_mtime));
852 * Indicate that there is a difference between lines a and b of the from file
853 * to get to lines c to d of the to file. If a is greater than b then there
854 * are no lines in the from file involved and this means that there were
855 * lines appended (beginning at b). If c is greater than d then there are
856 * lines missing from the to file.
858 static void change(char *file1, FILE *f1, char *file2, FILE *f2,
859 int a, int b, int c, int d)
861 if ((a > b && c > d) || (option_mask32 & FLAG_q)) {
867 * Allocate change records as needed.
869 if (context_vec_ptr == context_vec_end - 1) {
870 ptrdiff_t offset = context_vec_ptr - context_vec_start;
873 context_vec_start = xrealloc(context_vec_start,
874 max_context * sizeof(struct context_vec));
875 context_vec_end = context_vec_start + max_context;
876 context_vec_ptr = context_vec_start + offset;
878 if (anychange == 0) {
880 * Print the context/unidiff header first time through.
882 print_header(file1, file2);
883 } else if (a > context_vec_ptr->b + (2 * opt_U_context) + 1
884 && c > context_vec_ptr->d + (2 * opt_U_context) + 1
887 * If this change is more than 'context' lines from the
888 * previous change, dump the record and reset it.
890 // dump_unified_vec() seeks!
891 dump_unified_vec(f1, f2);
894 context_vec_ptr->a = a;
895 context_vec_ptr->b = b;
896 context_vec_ptr->c = c;
897 context_vec_ptr->d = d;
902 static void output(char *file1, FILE *f1, char *file2, FILE *f2)
904 /* Note that j0 and j1 can't be used as they are defined in math.h.
905 * This also allows the rather amusing variable 'j00'... */
906 int m, i0, i1, j00, j01;
912 J[m + 1] = nlen[1] + 1;
913 for (i0 = 1; i0 <= m; i0 = i1 + 1) {
914 while (i0 <= m && J[i0] == J[i0 - 1] + 1)
918 while (i1 < m && J[i1 + 1] == 0)
923 change(file1, f1, file2, f2, i0, i1, j00, j01);
927 change(file1, f1, file2, f2, 1, 0, 1, nlen[1]);
929 if (anychange != 0 && !(option_mask32 & FLAG_q)) {
930 // dump_unified_vec() seeks!
931 dump_unified_vec(f1, f2);
936 * The following code uses an algorithm due to Harold Stone,
937 * which finds a pair of longest identical subsequences in
940 * The major goal is to generate the match vector J.
941 * J[i] is the index of the line in file1 corresponding
942 * to line i in file0. J[i] = 0 if there is no
943 * such line in file1.
945 * Lines are hashed so as to work in core. All potential
946 * matches are located by sorting the lines of each file
947 * on the hash (called "value"). In particular, this
948 * collects the equivalence classes in file1 together.
949 * Subroutine equiv replaces the value of each line in
950 * file0 by the index of the first element of its
951 * matching equivalence in (the reordered) file1.
952 * To save space equiv squeezes file1 into a single
953 * array member in which the equivalence classes
954 * are simply concatenated, except that their first
955 * members are flagged by changing sign.
957 * Next the indices that point into member are unsorted into
958 * array class according to the original order of file0.
960 * The cleverness lies in routine stone. This marches
961 * through the lines of file0, developing a vector klist
962 * of "k-candidates". At step i a k-candidate is a matched
963 * pair of lines x,y (x in file0, y in file1) such that
964 * there is a common subsequence of length k
965 * between the first i lines of file0 and the first y
966 * lines of file1, but there is no such subsequence for
967 * any smaller y. x is the earliest possible mate to y
968 * that occurs in such a subsequence.
970 * Whenever any of the members of the equivalence class of
971 * lines in file1 matable to a line in file0 has serial number
972 * less than the y of some k-candidate, that k-candidate
973 * with the smallest such y is replaced. The new
974 * k-candidate is chained (via pred) to the current
975 * k-1 candidate so that the actual subsequence can
976 * be recovered. When a member has serial number greater
977 * that the y of all k-candidates, the klist is extended.
978 * At the end, the longest subsequence is pulled out
979 * and placed in the array J by unravel
981 * With J in hand, the matches there recorded are
982 * checked against reality to assure that no spurious
983 * matches have crept in due to hashing. If they have,
984 * they are broken, and "jackpot" is recorded--a harmless
985 * matter except that a true match for a spuriously
986 * mated line may now be unnecessarily reported as a change.
988 * Much of the complexity of the program comes simply
989 * from trying to minimize core utilization and
990 * maximize the range of doable problems by dynamically
991 * allocating what is needed and reusing what is not.
992 * The core requirements for problems larger than somewhat
993 * are (in words) 2*length(file0) + length(file1) +
994 * 3*(number of k-candidates installed), typically about
995 * 6n words for files of length n.
997 /* NB: files can be not REGular. The only sure thing that they
998 * are not both DIRectories. */
999 static unsigned diffreg(char *file1, char *file2, int flags)
1001 int *member; /* will be overlaid on nfile[1] */
1002 int *class; /* will be overlaid on nfile[0] */
1003 int *klist; /* will be overlaid on nfile[0] after class */
1010 context_vec_ptr = context_vec_start - 1;
1011 tempname1 = tempname2 = NULL;
1013 /* Is any of them a directory? Then it's simple */
1014 if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
1015 return (S_ISDIR(stb1.st_mode) ? D_ISDIR1 : D_ISDIR2);
1017 /* None of them are directories */
1020 if (flags & D_EMPTY1)
1021 f1 = xfopen(bb_dev_null, "r");
1023 f1 = xfopen_stdin(file1);
1024 if (flags & D_EMPTY2)
1025 f2 = xfopen(bb_dev_null, "r");
1027 f2 = xfopen_stdin(file2);
1029 /* NB: if D_EMPTY1/2 is set, other file is always a regular file,
1030 * not pipe/fifo/chardev/etc - D_EMPTY is used by "diff -r" only,
1031 * and it never diffs non-ordinary files in subdirs. */
1032 if (!(flags & (D_EMPTY1 | D_EMPTY2))) {
1033 /* Quick check whether they are different */
1034 /* NB: copies non-REG files to tempfiles and fills tempname1/2 */
1035 i = files_differ(f1, f2);
1036 if (i != 1) { /* not different? */
1037 if (i != 0) /* error? */
1043 if (!asciifile(f1) || !asciifile(f2)) {
1050 prepare(0, f1 /*, stb1.st_size*/);
1051 prepare(1, f2 /*, stb2.st_size*/);
1053 sort(sfile[0], slen[0]);
1054 sort(sfile[1], slen[1]);
1056 member = (int *) nfile[1];
1057 equiv(sfile[0], slen[0], sfile[1], slen[1], member);
1058 member = xrealloc(member, (slen[1] + 2) * sizeof(int));
1060 class = (int *) nfile[0];
1061 unsort(sfile[0], slen[0], class);
1062 class = xrealloc(class, (slen[0] + 2) * sizeof(int));
1064 klist = xmalloc((slen[0] + 2) * sizeof(int));
1067 clist = xmalloc(clistlen * sizeof(struct cand));
1068 i = stone(class, slen[0], member, klist);
1072 J = xrealloc(J, (nlen[0] + 2) * sizeof(int));
1077 ixold = xrealloc(ixold, (nlen[0] + 2) * sizeof(long));
1078 ixnew = xrealloc(ixnew, (nlen[1] + 2) * sizeof(long));
1082 output(file1, f1, file2, f2);
1090 fclose_if_not_stdin(f1);
1091 fclose_if_not_stdin(f2);
1104 #if ENABLE_FEATURE_DIFF_DIR
1105 static void do_diff(char *dir1, char *path1, char *dir2, char *path2)
1107 int flags = 0; /*D_HEADER;*/
1109 char *fullpath1 = NULL; /* if -N */
1110 char *fullpath2 = NULL;
1113 fullpath1 = concat_path_file(dir1, path1);
1115 fullpath2 = concat_path_file(dir2, path2);
1117 if (!fullpath1 || stat(fullpath1, &stb1) != 0) {
1119 memset(&stb1, 0, sizeof(stb1));
1122 fullpath1 = concat_path_file(dir1, path2);
1125 if (!fullpath2 || stat(fullpath2, &stb2) != 0) {
1127 memset(&stb2, 0, sizeof(stb2));
1128 stb2.st_mode = stb1.st_mode;
1131 fullpath2 = concat_path_file(dir2, path1);
1135 if (stb1.st_mode == 0)
1136 stb1.st_mode = stb2.st_mode;
1138 if (S_ISDIR(stb1.st_mode) && S_ISDIR(stb2.st_mode)) {
1139 printf("Common subdirectories: %s and %s\n", fullpath1, fullpath2);
1143 if (!S_ISREG(stb1.st_mode) && !S_ISDIR(stb1.st_mode))
1145 else if (!S_ISREG(stb2.st_mode) && !S_ISDIR(stb2.st_mode))
1148 /* Both files are either REGular or DIRectories */
1149 val = diffreg(fullpath1, fullpath2, flags);
1152 print_status(val, fullpath1, fullpath2 /*, NULL*/);
1160 #if ENABLE_FEATURE_DIFF_DIR
1161 /* This function adds a filename to dl, the directory listing. */
1162 static int add_to_dirlist(const char *filename,
1163 struct stat ATTRIBUTE_UNUSED *sb,
1165 int depth ATTRIBUTE_UNUSED)
1167 /* +2: with space for eventual trailing NULL */
1168 dl = xrealloc(dl, (dl_count+2) * sizeof(dl[0]));
1169 dl[dl_count] = xstrdup(filename + (int)(ptrdiff_t)userdata);
1175 /* This returns a sorted directory listing. */
1176 static char **get_recursive_dirlist(char *path)
1179 dl = xzalloc(sizeof(dl[0]));
1181 /* If -r has been set, then the recursive_action function will be
1182 * used. Unfortunately, this outputs the root directory along with
1183 * the recursed paths, so use void *userdata to specify the string
1184 * length of the root directory - '(void*)(strlen(path)+)'.
1185 * add_to_dirlist then removes root dir prefix. */
1186 if (option_mask32 & FLAG_r) {
1187 recursive_action(path, ACTION_RECURSE|ACTION_FOLLOWLINKS,
1188 add_to_dirlist, NULL,
1189 (void*)(strlen(path)+1), 0);
1194 dp = warn_opendir(path);
1195 while ((ep = readdir(dp))) {
1196 if (!strcmp(ep->d_name, "..") || LONE_CHAR(ep->d_name, '.'))
1198 add_to_dirlist(ep->d_name, NULL, (void*)(int)0, 0);
1203 /* Sort dl alphabetically. */
1204 qsort_string_vector(dl, dl_count);
1206 dl[dl_count] = NULL;
1211 static void diffdir(char *p1, char *p2)
1213 char **dirlist1, **dirlist2;
1217 /* Check for trailing slashes. */
1218 dp1 = last_char_is(p1, '/');
1221 dp2 = last_char_is(p2, '/');
1225 /* Get directory listings for p1 and p2. */
1226 dirlist1 = get_recursive_dirlist(p1);
1227 dirlist2 = get_recursive_dirlist(p2);
1229 /* If -S was set, find the starting point. */
1231 while (*dirlist1 != NULL && strcmp(*dirlist1, opt_S_start) < 0)
1233 while (*dirlist2 != NULL && strcmp(*dirlist2, opt_S_start) < 0)
1235 if ((*dirlist1 == NULL) || (*dirlist2 == NULL))
1236 bb_error_msg(bb_msg_invalid_arg, "NULL", "-S");
1239 /* Now that both dirlist1 and dirlist2 contain sorted directory
1240 * listings, we can start to go through dirlist1. If both listings
1241 * contain the same file, then do a normal diff. Otherwise, behaviour
1242 * is determined by whether the -N flag is set. */
1243 while (*dirlist1 != NULL || *dirlist2 != NULL) {
1246 pos = dp1 == NULL ? 1 : (dp2 == NULL ? -1 : strcmp(dp1, dp2));
1248 do_diff(p1, dp1, p2, dp2);
1251 } else if (pos < 0) {
1252 if (option_mask32 & FLAG_N)
1253 do_diff(p1, dp1, p2, NULL);
1255 print_only(p1, dp1);
1258 if (option_mask32 & FLAG_N)
1259 do_diff(p1, NULL, p2, dp2);
1261 print_only(p2, dp2);
1269 int diff_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
1270 int diff_main(int argc ATTRIBUTE_UNUSED, char **argv)
1274 llist_t *L_arg = NULL;
1278 /* exactly 2 params; collect multiple -L <label>; -U N */
1279 opt_complementary = "=2:L::U+";
1280 getopt32(argv, "abdiL:NqrsS:tTU:wu"
1281 "p" /* ignored (for compatibility) */,
1282 &L_arg, &opt_S_start, &opt_U_context);
1286 if (label1 && label2)
1289 label1 = L_arg->data;
1290 else { /* then label2 is NULL */
1292 label1 = L_arg->data;
1294 /* we leak L_arg here... */
1295 L_arg = L_arg->link;
1299 * Do sanity checks, fill in stb1 and stb2 and call the appropriate
1300 * driver routine. Both drivers use the contents of stb1 and stb2.
1304 if (LONE_DASH(f1)) {
1305 fstat(STDIN_FILENO, &stb1);
1309 if (LONE_DASH(f2)) {
1310 fstat(STDIN_FILENO, &stb2);
1315 if (gotstdin && (S_ISDIR(stb1.st_mode) || S_ISDIR(stb2.st_mode)))
1316 bb_error_msg_and_die("can't compare stdin to a directory");
1318 if (S_ISDIR(stb1.st_mode) && S_ISDIR(stb2.st_mode)) {
1319 #if ENABLE_FEATURE_DIFF_DIR
1323 bb_error_msg_and_die("no support for directory comparison");
1327 if (S_ISDIR(stb1.st_mode)) { /* "diff dir file" */
1328 /* NB: "diff dir dir2/dir3/file" must become
1329 * "diff dir/file dir2/dir3/file" */
1330 char *slash = strrchr(f2, '/');
1331 f1 = concat_path_file(f1, slash ? slash+1 : f2);
1334 if (S_ISDIR(stb2.st_mode)) {
1335 char *slash = strrchr(f1, '/');
1336 f2 = concat_path_file(f2, slash ? slash+1 : f1);
1340 /* diffreg can get non-regular files here,
1341 * they are not both DIRestories */
1342 print_status((gotstdin > 1 ? D_SAME : diffreg(f1, f2, 0)),