aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBernhard Reutner-Fischer <rep.dot.nop@gmail.com>2006-04-06 08:15:24 +0000
committerBernhard Reutner-Fischer <rep.dot.nop@gmail.com>2006-04-06 08:15:24 +0000
commit693a93608ae354b08583b5b18db4aedfe83e8e07 (patch)
treef1967971df4effb3958b01f2eb41d4e3ee00efdf
parent8f7d38970046c0ea53de911084561b79ffb2d6b9 (diff)
downloadbusybox-w32-693a93608ae354b08583b5b18db4aedfe83e8e07.tar.gz
busybox-w32-693a93608ae354b08583b5b18db4aedfe83e8e07.tar.bz2
busybox-w32-693a93608ae354b08583b5b18db4aedfe83e8e07.zip
- move code around to avoid the need for the prototypes.
-rw-r--r--coreutils/diff.c1144
1 files changed, 557 insertions, 587 deletions
diff --git a/coreutils/diff.c b/coreutils/diff.c
index b2945dd87..17975ad20 100644
--- a/coreutils/diff.c
+++ b/coreutils/diff.c
@@ -3,14 +3,14 @@
3 * Mini diff implementation for busybox, adapted from OpenBSD diff. 3 * Mini diff implementation for busybox, adapted from OpenBSD diff.
4 * 4 *
5 * Copyright (C) 2006 by Robert Sullivan <cogito.ergo.cogito@hotmail.com> 5 * Copyright (C) 2006 by Robert Sullivan <cogito.ergo.cogito@hotmail.com>
6 * 6 *
7 * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. 7 * Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
8 */ 8 */
9 9
10/* 10/*
11 * Copyright (c) 2003 Todd C. Miller <Todd.Miller@courtesan.com> 11 * Copyright (c) 2003 Todd C. Miller <Todd.Miller@courtesan.com>
12 * 12 *
13 * Permission to 13 * Permission to
14 * use, copy, modify, and distribute this software for any 14 * use, copy, modify, and distribute this software for any
15 * purpose with or without fee is hereby granted, provided that the above 15 * purpose with or without fee is hereby granted, provided that the above
16 * copyright notice and this permission notice appear in all copies. 16 * copyright notice and this permission notice appear in all copies.
@@ -71,8 +71,8 @@
71 * D_SKIPPED2 - skipped path2 as it is a special file 71 * D_SKIPPED2 - skipped path2 as it is a special file
72 */ 72 */
73 73
74#define D_SAME 0 74#define D_SAME 0
75#define D_DIFFER 1 75#define D_DIFFER (1<<0)
76#define D_BINARY (1<<1) 76#define D_BINARY (1<<1)
77#define D_COMMON (1<<2) 77#define D_COMMON (1<<2)
78#define D_ONLY (1<<3) 78#define D_ONLY (1<<3)
@@ -84,7 +84,7 @@
84 84
85/* Command line options */ 85/* Command line options */
86static unsigned long cmd_flags; 86static unsigned long cmd_flags;
87#define FLAG_a 1 87#define FLAG_a (1<<0)
88#define FLAG_b (1<<1) 88#define FLAG_b (1<<1)
89#define FLAG_d (1<<2) 89#define FLAG_d (1<<2)
90#define FLAG_i (1<<3) 90#define FLAG_i (1<<3)
@@ -145,207 +145,14 @@ static struct context_vec *context_vec_start;
145static struct context_vec *context_vec_end; 145static struct context_vec *context_vec_end;
146static struct context_vec *context_vec_ptr; 146static struct context_vec *context_vec_ptr;
147 147
148static void output(char *, FILE *, char *, FILE *); 148static void print_only(const char *path, size_t dirlen, const char *entry)
149static void check(char *, FILE *, char *, FILE *);
150static void uni_range(int, int);
151static void dump_unified_vec(FILE *, FILE *);
152static void prepare(int, FILE *, off_t);
153static void prune(void);
154static void equiv(struct line *, int, struct line *, int, int *);
155static void unravel(int);
156static void unsort(struct line *, int, int *);
157static void change(char *, FILE *, char *, FILE *, int, int, int, int);
158static void sort(struct line *, int);
159static void print_header(const char *, const char *);
160static int asciifile(FILE *);
161static int fetch(long *, int, int, FILE *, int, int);
162static int newcand(int, int, int);
163static int search(int *, int, int);
164static int skipline(FILE *);
165static int stone(int *, int, int *, int *);
166static int readhash(FILE *);
167static int files_differ(FILE *, FILE *, int);
168
169static int diffreg(char *, char *, int);
170static void print_only(const char *, size_t, const char *);
171static void print_status(int, char *, char *, char *);
172
173#ifdef CONFIG_FEATURE_DIFF_DIR
174static int dir_strcmp(const void *p1, const void *p2) {
175 return strcmp(*(char * const *)p1, *(char * const *)p2);
176}
177
178/* This function adds a filename to dl, the directory listing. */
179
180static int add_to_dirlist (const char *filename, struct stat *statbuf, void *userdata) {
181 dl_count++;
182 dl = xrealloc(dl, dl_count * sizeof(char *));
183 dl[dl_count - 1] = bb_xstrdup(filename);
184 if (cmd_flags & FLAG_r) {
185 int *pp = (int *) userdata;
186 int path_len = *pp + 1;
187 dl[dl_count - 1] = &(dl[dl_count - 1])[path_len];
188 }
189 return TRUE;
190}
191
192/* This returns a sorted directory listing. */
193static char **get_dir(char *path) {
194
195 int i;
196
197 /* Reset dl_count - there's no need to free dl as bb_xrealloc does
198 * the job nicely. */
199 dl_count = 0;
200
201 /* If -r has been set, then the recursive_action function will be
202 * used. Unfortunately, this outputs the root directory along with
203 * the recursed paths, so use void *userdata to specify the string
204 * length of the root directory. It can then be removed in
205 * add_to_dirlist. */
206
207 int path_len = strlen(path);
208 void *userdata = &path_len;
209
210 /* Now fill dl with a listing. */
211 if (cmd_flags & FLAG_r)
212 recursive_action(path, TRUE, TRUE, FALSE, add_to_dirlist, NULL, userdata);
213 else {
214 DIR *dp;
215 struct dirent *ep;
216 if ((dp = opendir(path)) == NULL)
217 bb_error_msg("Error reading directory");
218 while ((ep = readdir(dp))) {
219 if ((!strcmp(ep->d_name, "..")) || (!strcmp(ep->d_name, ".")))
220 continue;
221 add_to_dirlist(ep->d_name, NULL, NULL);
222 }
223 closedir(dp);
224 }
225
226 /* Sort dl alphabetically. */
227 qsort(dl, dl_count, sizeof(char *), dir_strcmp);
228
229 /* Copy dl so that we can return it. */
230 char **retval = xmalloc(dl_count * sizeof(char *));
231 for (i = 0; i < dl_count; i++)
232 retval[i] = bb_xstrdup(dl[i]);
233
234 return retval;
235}
236
237static void do_diff (char *dir1, char *path1, char *dir2, char *path2) {
238
239 int flags = D_HEADER;
240 int val;
241
242 char *fullpath1 = bb_xasprintf("%s/%s", dir1, path1);
243 char *fullpath2 = bb_xasprintf("%s/%s", dir2, path2);
244
245 if (stat(fullpath1, &stb1) != 0) {
246 flags |= D_EMPTY1;
247 memset(&stb1, 0, sizeof(stb1));
248 fullpath1 = bb_xasprintf("%s/%s", dir1, path2);
249 }
250 if (stat(fullpath2, &stb2) != 0) {
251 flags |= D_EMPTY2;
252 memset(&stb2, 0, sizeof(stb2));
253 stb2.st_mode = stb1.st_mode;
254 fullpath2 = bb_xasprintf("%s/%s", dir2, path1);
255 }
256
257 if (stb1.st_mode == 0)
258 stb1.st_mode = stb2.st_mode;
259
260 if (S_ISDIR(stb1.st_mode) && S_ISDIR(stb2.st_mode)) {
261 printf("Common subdirectories: %s and %s\n", fullpath1, fullpath2);
262 return;
263 }
264
265 if (!S_ISREG(stb1.st_mode) && !S_ISDIR(stb1.st_mode))
266 val = D_SKIPPED1;
267 else if (!S_ISREG(stb2.st_mode) && !S_ISDIR(stb2.st_mode))
268 val = D_SKIPPED2;
269 else
270 val = diffreg(fullpath1, fullpath2, flags);
271
272 print_status(val, fullpath1, fullpath2, NULL);
273}
274
275static void diffdir (char *p1, char *p2) {
276
277 char **dirlist1, **dirlist2;
278 char *dp1, *dp2;
279 int dirlist1_count, dirlist2_count;
280 int pos;
281
282 /* Check for trailing slashes. */
283
284 if (p1[strlen(p1) - 1] == '/')
285 p1[strlen(p1) - 1] = '\0';
286 if (p2[strlen(p2) - 1] == '/')
287 p2[strlen(p2) - 1] = '\0';
288
289 /* Get directory listings for p1 and p2. */
290
291 dirlist1 = get_dir(p1);
292 dirlist1_count = dl_count;
293 dirlist1[dirlist1_count] = NULL;
294 dirlist2 = get_dir(p2);
295 dirlist2_count = dl_count;
296 dirlist2[dirlist2_count] = NULL;
297
298 /* If -S was set, find the starting point. */
299 if (start) {
300 while (*dirlist1 != NULL && strcmp(*dirlist1, start) < 0)
301 dirlist1++;
302 while (*dirlist2 != NULL && strcmp(*dirlist2, start) < 0)
303 dirlist2++;
304 if ((*dirlist1 == NULL) || (*dirlist2 == NULL))
305 bb_error_msg("Invalid argument to -S");
306 }
307
308 /* Now that both dirlist1 and dirlist2 contain sorted directory
309 * listings, we can start to go through dirlist1. If both listings
310 * contain the same file, then do a normal diff. Otherwise, behaviour
311 * is determined by whether the -N flag is set. */
312 while (*dirlist1 != NULL || *dirlist2 != NULL) {
313 dp1 = *dirlist1;
314 dp2 = *dirlist2;
315 pos = dp1 == NULL ? 1 : dp2 == NULL ? -1 : strcmp(dp1, dp2);
316 if (pos == 0) {
317 do_diff(p1, dp1, p2, dp2);
318 dirlist1++;
319 dirlist2++;
320 }
321 else if (pos < 0) {
322 if (cmd_flags & FLAG_N)
323 do_diff(p1, dp1, p2, NULL);
324 else
325 print_only(p1, strlen(p1) + 1, dp1);
326 dirlist1++;
327 }
328 else {
329 if (cmd_flags & FLAG_N)
330 do_diff(p1, NULL, p2, dp2);
331 else
332 print_only(p2, strlen(p2) + 1, dp2);
333 dirlist2++;
334 }
335 }
336}
337#endif
338
339static void
340print_only(const char *path, size_t dirlen, const char *entry)
341{ 149{
342 if (dirlen > 1) 150 if (dirlen > 1)
343 dirlen--; 151 dirlen--;
344 printf("Only in %.*s: %s\n", (int)dirlen, path, entry); 152 printf("Only in %.*s: %s\n", (int)dirlen, path, entry);
345} 153}
346 154
347static void 155static void print_status(int val, char *path1, char *path2, char *entry)
348print_status(int val, char *path1, char *path2, char *entry)
349{ 156{
350 switch (val) { 157 switch (val) {
351 case D_ONLY: 158 case D_ONLY:
@@ -391,175 +198,76 @@ print_status(int val, char *path1, char *path2, char *entry)
391} 198}
392 199
393/* 200/*
394 * The following code uses an algorithm due to Harold Stone, 201 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
395 * which finds a pair of longest identical subsequences in
396 * the two files.
397 *
398 * The major goal is to generate the match vector J.
399 * J[i] is the index of the line in file1 corresponding
400 * to line i file0. J[i] = 0 if there is no
401 * such line in file1.
402 *
403 * Lines are hashed so as to work in core. All potential
404 * matches are located by sorting the lines of each file
405 * on the hash (called ``value''). In particular, this
406 * collects the equivalence classes in file1 together.
407 * Subroutine equiv replaces the value of each line in
408 * file0 by the index of the first element of its
409 * matching equivalence in (the reordered) file1.
410 * To save space equiv squeezes file1 into a single
411 * array member in which the equivalence classes
412 * are simply concatenated, except that their first
413 * members are flagged by changing sign.
414 *
415 * Next the indices that point into member are unsorted into
416 * array class according to the original order of file0.
417 *
418 * The cleverness lies in routine stone. This marches
419 * through the lines of file0, developing a vector klist
420 * of "k-candidates". At step i a k-candidate is a matched
421 * pair of lines x,y (x in file0 y in file1) such that
422 * there is a common subsequence of length k
423 * between the first i lines of file0 and the first y
424 * lines of file1, but there is no such subsequence for
425 * any smaller y. x is the earliest possible mate to y
426 * that occurs in such a subsequence.
427 *
428 * Whenever any of the members of the equivalence class of
429 * lines in file1 matable to a line in file0 has serial number
430 * less than the y of some k-candidate, that k-candidate
431 * with the smallest such y is replaced. The new
432 * k-candidate is chained (via pred) to the current
433 * k-1 candidate so that the actual subsequence can
434 * be recovered. When a member has serial number greater
435 * that the y of all k-candidates, the klist is extended.
436 * At the end, the longest subsequence is pulled out
437 * and placed in the array J by unravel
438 *
439 * With J in hand, the matches there recorded are
440 * checked against reality to assure that no spurious
441 * matches have crept in due to hashing. If they have,
442 * they are broken, and "jackpot" is recorded--a harmless
443 * matter except that a true match for a spuriously
444 * mated line may now be unnecessarily reported as a change.
445 *
446 * Much of the complexity of the program comes simply
447 * from trying to minimize core utilization and
448 * maximize the range of doable problems by dynamically
449 * allocating what is needed and reusing what is not.
450 * The core requirements for problems larger than somewhat
451 * are (in words) 2*length(file0) + length(file1) +
452 * 3*(number of k-candidates installed), typically about
453 * 6n words for files of length n.
454 */ 202 */
455 203static int readhash(FILE *f)
456static int diffreg(char *ofile1, char *ofile2, int flags)
457{ 204{
458 char *file1 = ofile1; 205 int i, t, space;
459 char *file2 = ofile2; 206 int sum;
460 FILE *f1 = NULL;
461 FILE *f2 = NULL;
462 int rval = D_SAME;
463 int i;
464
465 anychange = 0;
466 context_vec_ptr = context_vec_start - 1;
467
468 if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
469 return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
470 if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
471 goto closem;
472
473 if (flags & D_EMPTY1)
474 f1 = bb_xfopen(_PATH_DEVNULL, "r");
475 else {
476 if (strcmp(file1, "-") == 0)
477 f1 = stdin;
478 else
479 f1 = bb_xfopen(file1, "r");
480 }
481 207
482 if (flags & D_EMPTY2) 208 sum = 1;
483 f2 = bb_xfopen(_PATH_DEVNULL, "r"); 209 space = 0;
484 else { 210 if (!(cmd_flags & FLAG_b) && !(cmd_flags & FLAG_w)) {
485 if (strcmp(file2, "-") == 0) 211 if (FLAG_i)
486 f2 = stdin; 212 for (i = 0; (t = getc(f)) != '\n'; i++) {
213 if (t == EOF) {
214 if (i == 0)
215 return (0);
216 break;
217 }
218 sum = sum * 127 + t;
219 }
487 else 220 else
488 f2 = bb_xfopen(file2, "r"); 221 for (i = 0; (t = getc(f)) != '\n'; i++) {
489 } 222 if (t == EOF) {
490 223 if (i == 0)
491 switch (files_differ(f1, f2, flags)) { 224 return (0);
492 case 0: 225 break;
493 goto closem; 226 }
494 case 1: 227 sum = sum * 127 + t;
495 break; 228 }
496 default: 229 } else {
497 /* error */ 230 for (i = 0;;) {
498 status |= 2; 231 switch (t = getc(f)) {
499 goto closem; 232 case '\t':
500 } 233 case '\r':
501 234 case '\v':
502 if (!asciifile(f1) || !asciifile(f2)) { 235 case '\f':
503 rval = D_BINARY; 236 case ' ':
504 status |= 1; 237 space++;
505 goto closem; 238 continue;
239 default:
240 if (space && !(cmd_flags & FLAG_w)) {
241 i++;
242 space = 0;
243 }
244 sum = sum * 127 + t;
245 i++;
246 continue;
247 case EOF:
248 if (i == 0)
249 return (0);
250 /* FALLTHROUGH */
251 case '\n':
252 break;
253 }
254 break;
255 }
506 } 256 }
257 /*
258 * There is a remote possibility that we end up with a zero sum.
259 * Zero is used as an EOF marker, so return 1 instead.
260 */
261 return (sum == 0 ? 1 : sum);
262}
507 263
508 prepare(0, f1, stb1.st_size);
509 prepare(1, f2, stb2.st_size);
510 prune();
511 sort(sfile[0], slen[0]);
512 sort(sfile[1], slen[1]);
513
514 member = (int *)file[1];
515 equiv(sfile[0], slen[0], sfile[1], slen[1], member);
516 member = xrealloc(member, (slen[1] + 2) * sizeof(int));
517
518 class = (int *)file[0];
519 unsort(sfile[0], slen[0], class);
520 class = xrealloc(class, (slen[0] + 2) * sizeof(int));
521
522 klist = xmalloc((slen[0] + 2) * sizeof(int));
523 clen = 0;
524 clistlen = 100;
525 clist = xmalloc(clistlen * sizeof(struct cand));
526 i = stone(class, slen[0], member, klist);
527 free(member);
528 free(class);
529 264
530 J = xrealloc(J, (len[0] + 2) * sizeof(int));
531 unravel(klist[i]);
532 free(clist);
533 free(klist);
534
535 ixold = xrealloc(ixold, (len[0] + 2) * sizeof(long));
536 ixnew = xrealloc(ixnew, (len[1] + 2) * sizeof(long));
537 check(file1, f1, file2, f2);
538 output(file1, f1, file2, f2);
539
540closem:
541 if (anychange) {
542 status |= 1;
543 if (rval == D_SAME)
544 rval = D_DIFFER;
545 }
546 if (f1 != NULL)
547 fclose(f1);
548 if (f2 != NULL)
549 fclose(f2);
550 if (file1 != ofile1)
551 free(file1);
552 if (file2 != ofile2)
553 free(file2);
554 return (rval);
555}
556 265
557/* 266/*
558 * Check to see if the given files differ. 267 * Check to see if the given files differ.
559 * Returns 0 if they are the same, 1 if different, and -1 on error. 268 * Returns 0 if they are the same, 1 if different, and -1 on error.
560 */ 269 */
561static int 270static int files_differ(FILE *f1, FILE *f2, int flags)
562files_differ(FILE *f1, FILE *f2, int flags)
563{ 271{
564 char buf1[BUFSIZ], buf2[BUFSIZ]; 272 char buf1[BUFSIZ], buf2[BUFSIZ];
565 size_t i, j; 273 size_t i, j;
@@ -582,8 +290,7 @@ files_differ(FILE *f1, FILE *f2, int flags)
582 } 290 }
583} 291}
584 292
585static void 293static void prepare(int i, FILE *fd, off_t filesize)
586prepare(int i, FILE *fd, off_t filesize)
587{ 294{
588 struct line *p; 295 struct line *p;
589 int j, h; 296 int j, h;
@@ -607,8 +314,7 @@ prepare(int i, FILE *fd, off_t filesize)
607 file[i] = p; 314 file[i] = p;
608} 315}
609 316
610static void 317static void prune(void)
611prune(void)
612{ 318{
613 int i, j; 319 int i, j;
614 320
@@ -628,8 +334,7 @@ prune(void)
628 } 334 }
629} 335}
630 336
631static void 337static void equiv(struct line *a, int n, struct line *b, int m, int *c)
632equiv(struct line *a, int n, struct line *b, int m, int *c)
633{ 338{
634 int i, j; 339 int i, j;
635 340
@@ -658,8 +363,8 @@ equiv(struct line *a, int n, struct line *b, int m, int *c)
658 363
659static int isqrt(int n) { 364static int isqrt(int n) {
660 int y, x = 1; 365 int y, x = 1;
661 if (n == 0) return(0); 366 if (n == 0) return(0);
662 367
663 do { 368 do {
664 y = x; 369 y = x;
665 x = n / x; 370 x = n / x;
@@ -670,8 +375,48 @@ static int isqrt(int n) {
670 return (x); 375 return (x);
671} 376}
672 377
673static int 378
674stone(int *a, int n, int *b, int *c) 379static int newcand(int x, int y, int pred)
380{
381 struct cand *q;
382
383 if (clen == clistlen) {
384 clistlen = clistlen * 11 / 10;
385 clist = xrealloc(clist, clistlen * sizeof(struct cand));
386 }
387 q = clist + clen;
388 q->x = x;
389 q->y = y;
390 q->pred = pred;
391 return (clen++);
392}
393
394
395static int search(int *c, int k, int y)
396{
397 int i, j, l, t;
398
399 if (clist[c[k]].y < y) /* quick look for typical case */
400 return (k + 1);
401 i = 0;
402 j = k + 1;
403 while (1) {
404 l = i + j;
405 if ((l >>= 1) <= i)
406 break;
407 t = clist[c[l]].y;
408 if (t > y)
409 j = l;
410 else if (t < y)
411 i = l;
412 else
413 return (l);
414 }
415 return (l + 1);
416}
417
418
419static int stone(int *a, int n, int *b, int *c)
675{ 420{
676 int i, k, y, j, l; 421 int i, k, y, j, l;
677 int oldc, tc, oldl; 422 int oldc, tc, oldl;
@@ -715,67 +460,48 @@ stone(int *a, int n, int *b, int *c)
715 return (k); 460 return (k);
716} 461}
717 462
718static int 463static void unravel(int p)
719newcand(int x, int y, int pred)
720{ 464{
721 struct cand *q; 465 struct cand *q;
466 int i;
722 467
723 if (clen == clistlen) { 468 for (i = 0; i <= len[0]; i++)
724 clistlen = clistlen * 11 / 10; 469 J[i] = i <= pref ? i :
725 clist = xrealloc(clist, clistlen * sizeof(struct cand)); 470 i > len[0] - suff ? i + len[1] - len[0] : 0;
726 } 471 for (q = clist + p; q->y != 0; q = clist + q->pred)
727 q = clist + clen; 472 J[q->x + pref] = q->y + pref;
728 q->x = x;
729 q->y = y;
730 q->pred = pred;
731 return (clen++);
732} 473}
733 474
734static int 475
735search(int *c, int k, int y) 476static void unsort(struct line *f, int l, int *b)
736{ 477{
737 int i, j, l, t; 478 int *a, i;
738 479
739 if (clist[c[k]].y < y) /* quick look for typical case */ 480 a = xmalloc((l + 1) * sizeof(int));
740 return (k + 1); 481 for (i = 1; i <= l; i++)
741 i = 0; 482 a[f[i].serial] = f[i].value;
742 j = k + 1; 483 for (i = 1; i <= l; i++)
743 while (1) { 484 b[i] = a[i];
744 l = i + j; 485 free(a);
745 if ((l >>= 1) <= i)
746 break;
747 t = clist[c[l]].y;
748 if (t > y)
749 j = l;
750 else if (t < y)
751 i = l;
752 else
753 return (l);
754 }
755 return (l + 1);
756} 486}
757 487
758static void 488static int skipline(FILE *f)
759unravel(int p)
760{ 489{
761 struct cand *q; 490 int i, c;
762 int i;
763 491
764 for (i = 0; i <= len[0]; i++) 492 for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
765 J[i] = i <= pref ? i : 493 continue;
766 i > len[0] - suff ? i + len[1] - len[0] : 0; 494 return (i);
767 for (q = clist + p; q->y != 0; q = clist + q->pred)
768 J[q->x + pref] = q->y + pref;
769} 495}
770 496
497
771/* 498/*
772 * Check does double duty: 499 * Check does double duty:
773 * 1. ferret out any fortuitous correspondences due 500 * 1. ferret out any fortuitous correspondences due
774 * to confounding by hashing (which result in "jackpot") 501 * to confounding by hashing (which result in "jackpot")
775 * 2. collect random access indexes to the two files 502 * 2. collect random access indexes to the two files
776 */ 503 */
777static void 504static void check(char *file1, FILE *f1, char *file2, FILE *f2)
778check(char *file1, FILE *f1, char *file2, FILE *f2)
779{ 505{
780 int i, j, jackpot, c, d; 506 int i, j, jackpot, c, d;
781 long ctold, ctnew; 507 long ctold, ctnew;
@@ -868,8 +594,7 @@ check(char *file1, FILE *f1, char *file2, FILE *f2)
868} 594}
869 595
870/* shellsort CACM #201 */ 596/* shellsort CACM #201 */
871static void 597static void sort(struct line *a, int n)
872sort(struct line *a, int n)
873{ 598{
874 struct line *ai, *aim, w; 599 struct line *ai, *aim, w;
875 int j, m = 0, k; 600 int j, m = 0, k;
@@ -900,57 +625,6 @@ sort(struct line *a, int n)
900 } 625 }
901} 626}
902 627
903static void
904unsort(struct line *f, int l, int *b)
905{
906 int *a, i;
907
908 a = xmalloc((l + 1) * sizeof(int));
909 for (i = 1; i <= l; i++)
910 a[f[i].serial] = f[i].value;
911 for (i = 1; i <= l; i++)
912 b[i] = a[i];
913 free(a);
914}
915
916static int
917skipline(FILE *f)
918{
919 int i, c;
920
921 for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
922 continue;
923 return (i);
924}
925
926static void
927output(char *file1, FILE *f1, char *file2, FILE *f2)
928{
929 int m, i0, i1, j0, j1;
930
931 rewind(f1);
932 rewind(f2);
933 m = len[0];
934 J[0] = 0;
935 J[m + 1] = len[1] + 1;
936 for (i0 = 1; i0 <= m; i0 = i1 + 1) {
937 while (i0 <= m && J[i0] == J[i0 - 1] + 1)
938 i0++;
939 j0 = J[i0 - 1] + 1;
940 i1 = i0 - 1;
941 while (i1 < m && J[i1 + 1] == 0)
942 i1++;
943 j1 = J[i1 + 1] - 1;
944 J[i1] = j1;
945 change(file1, f1, file2, f2, i0, i1, j0, j1);
946 }
947 if (m == 0) {
948 change(file1, f1, file2, f2, 1, 0, 1, len[1]);
949 }
950 if (anychange != 0) {
951 dump_unified_vec(f1, f2);
952 }
953}
954 628
955static void uni_range(int a, int b) 629static void uni_range(int a, int b)
956{ 630{
@@ -962,57 +636,7 @@ static void uni_range(int a, int b)
962 printf("%d,0", b); 636 printf("%d,0", b);
963} 637}
964 638
965/* 639static int fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile)
966 * Indicate that there is a difference between lines a and b of the from file
967 * to get to lines c to d of the to file. If a is greater then b then there
968 * are no lines in the from file involved and this means that there were
969 * lines appended (beginning at b). If c is greater than d then there are
970 * lines missing from the to file.
971 */
972static void
973change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d)
974{
975 static size_t max_context = 64;
976
977 if (a > b && c > d) return;
978 if (cmd_flags & FLAG_q) return;
979
980 /*
981 * Allocate change records as needed.
982 */
983 if (context_vec_ptr == context_vec_end - 1) {
984 ptrdiff_t offset = context_vec_ptr - context_vec_start;
985 max_context <<= 1;
986 context_vec_start = xrealloc(context_vec_start,
987 max_context * sizeof(struct context_vec));
988 context_vec_end = context_vec_start + max_context;
989 context_vec_ptr = context_vec_start + offset;
990 }
991 if (anychange == 0) {
992 /*
993 * Print the context/unidiff header first time through.
994 */
995 print_header(file1, file2);
996 anychange = 1;
997 } else if (a > context_vec_ptr->b + (2 * context) + 1 &&
998 c > context_vec_ptr->d + (2 * context) + 1) {
999 /*
1000 * If this change is more than 'context' lines from the
1001 * previous change, dump the record and reset it.
1002 */
1003 dump_unified_vec(f1, f2);
1004 }
1005 context_vec_ptr++;
1006 context_vec_ptr->a = a;
1007 context_vec_ptr->b = b;
1008 context_vec_ptr->c = c;
1009 context_vec_ptr->d = d;
1010 return;
1011
1012}
1013
1014static int
1015fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile)
1016{ 640{
1017 int i, j, c, lastc, col, nc; 641 int i, j, c, lastc, col, nc;
1018 642
@@ -1045,81 +669,15 @@ fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile)
1045 return (0); 669 return (0);
1046} 670}
1047 671
1048/* 672static int asciifile(FILE *f)
1049 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1050 */
1051static int
1052readhash(FILE *f)
1053{ 673{
1054 int i, t, space;
1055 int sum;
1056 674
1057 sum = 1; 675 if ((cmd_flags & FLAG_a) || f == NULL)
1058 space = 0; 676 return (1);
1059 if (!(cmd_flags & FLAG_b) && !(cmd_flags & FLAG_w)) {
1060 if (FLAG_i)
1061 for (i = 0; (t = getc(f)) != '\n'; i++) {
1062 if (t == EOF) {
1063 if (i == 0)
1064 return (0);
1065 break;
1066 }
1067 sum = sum * 127 + t;
1068 }
1069 else
1070 for (i = 0; (t = getc(f)) != '\n'; i++) {
1071 if (t == EOF) {
1072 if (i == 0)
1073 return (0);
1074 break;
1075 }
1076 sum = sum * 127 + t;
1077 }
1078 } else {
1079 for (i = 0;;) {
1080 switch (t = getc(f)) {
1081 case '\t':
1082 case '\r':
1083 case '\v':
1084 case '\f':
1085 case ' ':
1086 space++;
1087 continue;
1088 default:
1089 if (space && !(cmd_flags & FLAG_w)) {
1090 i++;
1091 space = 0;
1092 }
1093 sum = sum * 127 + t;
1094 i++;
1095 continue;
1096 case EOF:
1097 if (i == 0)
1098 return (0);
1099 /* FALLTHROUGH */
1100 case '\n':
1101 break;
1102 }
1103 break;
1104 }
1105 }
1106 /*
1107 * There is a remote possibility that we end up with a zero sum.
1108 * Zero is used as an EOF marker, so return 1 instead.
1109 */
1110 return (sum == 0 ? 1 : sum);
1111}
1112
1113static int
1114asciifile(FILE *f)
1115{
1116
1117 if ((cmd_flags & FLAG_a) || f == NULL)
1118 return (1);
1119#ifdef CONFIG_FEATURE_DIFF_BINARY 677#ifdef CONFIG_FEATURE_DIFF_BINARY
1120 unsigned char buf[BUFSIZ]; 678 unsigned char buf[BUFSIZ];
1121 int i, cnt; 679 int i, cnt;
1122 680
1123 rewind(f); 681 rewind(f);
1124 cnt = fread(buf, 1, sizeof(buf), f); 682 cnt = fread(buf, 1, sizeof(buf), f);
1125 for (i = 0; i < cnt; i++) 683 for (i = 0; i < cnt; i++)
@@ -1130,8 +688,7 @@ asciifile(FILE *f)
1130} 688}
1131 689
1132/* dump accumulated "unified" diff changes */ 690/* dump accumulated "unified" diff changes */
1133static void 691static void dump_unified_vec(FILE *f1, FILE *f2)
1134dump_unified_vec(FILE *f1, FILE *f2)
1135{ 692{
1136 struct context_vec *cvp = context_vec_start; 693 struct context_vec *cvp = context_vec_start;
1137 int lowa, upb, lowc, upd; 694 int lowa, upb, lowc, upd;
@@ -1197,8 +754,8 @@ dump_unified_vec(FILE *f1, FILE *f2)
1197 context_vec_ptr = context_vec_start - 1; 754 context_vec_ptr = context_vec_start - 1;
1198} 755}
1199 756
1200static void 757
1201print_header(const char *file1, const char *file2) 758static void print_header(const char *file1, const char *file2)
1202{ 759{
1203 if (label[0] != NULL) 760 if (label[0] != NULL)
1204 printf("%s %s\n", "---", 761 printf("%s %s\n", "---",
@@ -1214,6 +771,419 @@ print_header(const char *file1, const char *file2)
1214 file2, ctime(&stb2.st_mtime)); 771 file2, ctime(&stb2.st_mtime));
1215} 772}
1216 773
774
775
776/*
777 * Indicate that there is a difference between lines a and b of the from file
778 * to get to lines c to d of the to file. If a is greater then b then there
779 * are no lines in the from file involved and this means that there were
780 * lines appended (beginning at b). If c is greater than d then there are
781 * lines missing from the to file.
782 */
783static void change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d)
784{
785 static size_t max_context = 64;
786
787 if (a > b && c > d) return;
788 if (cmd_flags & FLAG_q) return;
789
790 /*
791 * Allocate change records as needed.
792 */
793 if (context_vec_ptr == context_vec_end - 1) {
794 ptrdiff_t offset = context_vec_ptr - context_vec_start;
795 max_context <<= 1;
796 context_vec_start = xrealloc(context_vec_start,
797 max_context * sizeof(struct context_vec));
798 context_vec_end = context_vec_start + max_context;
799 context_vec_ptr = context_vec_start + offset;
800 }
801 if (anychange == 0) {
802 /*
803 * Print the context/unidiff header first time through.
804 */
805 print_header(file1, file2);
806 anychange = 1;
807 } else if (a > context_vec_ptr->b + (2 * context) + 1 &&
808 c > context_vec_ptr->d + (2 * context) + 1) {
809 /*
810 * If this change is more than 'context' lines from the
811 * previous change, dump the record and reset it.
812 */
813 dump_unified_vec(f1, f2);
814 }
815 context_vec_ptr++;
816 context_vec_ptr->a = a;
817 context_vec_ptr->b = b;
818 context_vec_ptr->c = c;
819 context_vec_ptr->d = d;
820 return;
821
822}
823
824
825static void output(char *file1, FILE *f1, char *file2, FILE *f2)
826{
827 int m, i0, i1, j0, j1;
828
829 rewind(f1);
830 rewind(f2);
831 m = len[0];
832 J[0] = 0;
833 J[m + 1] = len[1] + 1;
834 for (i0 = 1; i0 <= m; i0 = i1 + 1) {
835 while (i0 <= m && J[i0] == J[i0 - 1] + 1)
836 i0++;
837 j0 = J[i0 - 1] + 1;
838 i1 = i0 - 1;
839 while (i1 < m && J[i1 + 1] == 0)
840 i1++;
841 j1 = J[i1 + 1] - 1;
842 J[i1] = j1;
843 change(file1, f1, file2, f2, i0, i1, j0, j1);
844 }
845 if (m == 0) {
846 change(file1, f1, file2, f2, 1, 0, 1, len[1]);
847 }
848 if (anychange != 0) {
849 dump_unified_vec(f1, f2);
850 }
851}
852
853/*
854 * The following code uses an algorithm due to Harold Stone,
855 * which finds a pair of longest identical subsequences in
856 * the two files.
857 *
858 * The major goal is to generate the match vector J.
859 * J[i] is the index of the line in file1 corresponding
860 * to line i file0. J[i] = 0 if there is no
861 * such line in file1.
862 *
863 * Lines are hashed so as to work in core. All potential
864 * matches are located by sorting the lines of each file
865 * on the hash (called ``value''). In particular, this
866 * collects the equivalence classes in file1 together.
867 * Subroutine equiv replaces the value of each line in
868 * file0 by the index of the first element of its
869 * matching equivalence in (the reordered) file1.
870 * To save space equiv squeezes file1 into a single
871 * array member in which the equivalence classes
872 * are simply concatenated, except that their first
873 * members are flagged by changing sign.
874 *
875 * Next the indices that point into member are unsorted into
876 * array class according to the original order of file0.
877 *
878 * The cleverness lies in routine stone. This marches
879 * through the lines of file0, developing a vector klist
880 * of "k-candidates". At step i a k-candidate is a matched
881 * pair of lines x,y (x in file0 y in file1) such that
882 * there is a common subsequence of length k
883 * between the first i lines of file0 and the first y
884 * lines of file1, but there is no such subsequence for
885 * any smaller y. x is the earliest possible mate to y
886 * that occurs in such a subsequence.
887 *
888 * Whenever any of the members of the equivalence class of
889 * lines in file1 matable to a line in file0 has serial number
890 * less than the y of some k-candidate, that k-candidate
891 * with the smallest such y is replaced. The new
892 * k-candidate is chained (via pred) to the current
893 * k-1 candidate so that the actual subsequence can
894 * be recovered. When a member has serial number greater
895 * that the y of all k-candidates, the klist is extended.
896 * At the end, the longest subsequence is pulled out
897 * and placed in the array J by unravel
898 *
899 * With J in hand, the matches there recorded are
900 * checked against reality to assure that no spurious
901 * matches have crept in due to hashing. If they have,
902 * they are broken, and "jackpot" is recorded--a harmless
903 * matter except that a true match for a spuriously
904 * mated line may now be unnecessarily reported as a change.
905 *
906 * Much of the complexity of the program comes simply
907 * from trying to minimize core utilization and
908 * maximize the range of doable problems by dynamically
909 * allocating what is needed and reusing what is not.
910 * The core requirements for problems larger than somewhat
911 * are (in words) 2*length(file0) + length(file1) +
912 * 3*(number of k-candidates installed), typically about
913 * 6n words for files of length n.
914 */
915
916static int diffreg(char *ofile1, char *ofile2, int flags)
917{
918 char *file1 = ofile1;
919 char *file2 = ofile2;
920 FILE *f1 = NULL;
921 FILE *f2 = NULL;
922 int rval = D_SAME;
923 int i;
924
925 anychange = 0;
926 context_vec_ptr = context_vec_start - 1;
927
928 if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
929 return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
930 if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
931 goto closem;
932
933 if (flags & D_EMPTY1)
934 f1 = bb_xfopen(_PATH_DEVNULL, "r");
935 else {
936 if (strcmp(file1, "-") == 0)
937 f1 = stdin;
938 else
939 f1 = bb_xfopen(file1, "r");
940 }
941
942 if (flags & D_EMPTY2)
943 f2 = bb_xfopen(_PATH_DEVNULL, "r");
944 else {
945 if (strcmp(file2, "-") == 0)
946 f2 = stdin;
947 else
948 f2 = bb_xfopen(file2, "r");
949 }
950
951 switch (files_differ(f1, f2, flags)) {
952 case 0:
953 goto closem;
954 case 1:
955 break;
956 default:
957 /* error */
958 status |= 2;
959 goto closem;
960 }
961
962 if (!asciifile(f1) || !asciifile(f2)) {
963 rval = D_BINARY;
964 status |= 1;
965 goto closem;
966 }
967
968 prepare(0, f1, stb1.st_size);
969 prepare(1, f2, stb2.st_size);
970 prune();
971 sort(sfile[0], slen[0]);
972 sort(sfile[1], slen[1]);
973
974 member = (int *)file[1];
975 equiv(sfile[0], slen[0], sfile[1], slen[1], member);
976 member = xrealloc(member, (slen[1] + 2) * sizeof(int));
977
978 class = (int *)file[0];
979 unsort(sfile[0], slen[0], class);
980 class = xrealloc(class, (slen[0] + 2) * sizeof(int));
981
982 klist = xmalloc((slen[0] + 2) * sizeof(int));
983 clen = 0;
984 clistlen = 100;
985 clist = xmalloc(clistlen * sizeof(struct cand));
986 i = stone(class, slen[0], member, klist);
987 free(member);
988 free(class);
989
990 J = xrealloc(J, (len[0] + 2) * sizeof(int));
991 unravel(klist[i]);
992 free(clist);
993 free(klist);
994
995 ixold = xrealloc(ixold, (len[0] + 2) * sizeof(long));
996 ixnew = xrealloc(ixnew, (len[1] + 2) * sizeof(long));
997 check(file1, f1, file2, f2);
998 output(file1, f1, file2, f2);
999
1000closem:
1001 if (anychange) {
1002 status |= 1;
1003 if (rval == D_SAME)
1004 rval = D_DIFFER;
1005 }
1006 if (f1 != NULL)
1007 fclose(f1);
1008 if (f2 != NULL)
1009 fclose(f2);
1010 if (file1 != ofile1)
1011 free(file1);
1012 if (file2 != ofile2)
1013 free(file2);
1014 return (rval);
1015}
1016
1017#if ENABLE_FEATURE_DIFF_DIR
1018static void do_diff (char *dir1, char *path1, char *dir2, char *path2) {
1019
1020 int flags = D_HEADER;
1021 int val;
1022
1023 char *fullpath1 = bb_xasprintf("%s/%s", dir1, path1);
1024 char *fullpath2 = bb_xasprintf("%s/%s", dir2, path2);
1025
1026 if (stat(fullpath1, &stb1) != 0) {
1027 flags |= D_EMPTY1;
1028 memset(&stb1, 0, sizeof(stb1));
1029 fullpath1 = bb_xasprintf("%s/%s", dir1, path2);
1030 }
1031 if (stat(fullpath2, &stb2) != 0) {
1032 flags |= D_EMPTY2;
1033 memset(&stb2, 0, sizeof(stb2));
1034 stb2.st_mode = stb1.st_mode;
1035 fullpath2 = bb_xasprintf("%s/%s", dir2, path1);
1036 }
1037
1038 if (stb1.st_mode == 0)
1039 stb1.st_mode = stb2.st_mode;
1040
1041 if (S_ISDIR(stb1.st_mode) && S_ISDIR(stb2.st_mode)) {
1042 printf("Common subdirectories: %s and %s\n", fullpath1, fullpath2);
1043 return;
1044 }
1045
1046 if (!S_ISREG(stb1.st_mode) && !S_ISDIR(stb1.st_mode))
1047 val = D_SKIPPED1;
1048 else if (!S_ISREG(stb2.st_mode) && !S_ISDIR(stb2.st_mode))
1049 val = D_SKIPPED2;
1050 else
1051 val = diffreg(fullpath1, fullpath2, flags);
1052
1053 print_status(val, fullpath1, fullpath2, NULL);
1054}
1055#endif
1056
1057#ifdef CONFIG_FEATURE_DIFF_DIR
1058static int dir_strcmp(const void *p1, const void *p2) {
1059 return strcmp(*(char * const *)p1, *(char * const *)p2);
1060}
1061
1062/* This function adds a filename to dl, the directory listing. */
1063
1064static int add_to_dirlist (const char *filename, struct stat *statbuf, void *userdata) {
1065 dl_count++;
1066 dl = xrealloc(dl, dl_count * sizeof(char *));
1067 dl[dl_count - 1] = bb_xstrdup(filename);
1068 if (cmd_flags & FLAG_r) {
1069 int *pp = (int *) userdata;
1070 int path_len = *pp + 1;
1071 dl[dl_count - 1] = &(dl[dl_count - 1])[path_len];
1072 }
1073 return TRUE;
1074}
1075
1076/* This returns a sorted directory listing. */
1077static char **get_dir(char *path) {
1078
1079 int i;
1080
1081 /* Reset dl_count - there's no need to free dl as bb_xrealloc does
1082 * the job nicely. */
1083 dl_count = 0;
1084
1085 /* If -r has been set, then the recursive_action function will be
1086 * used. Unfortunately, this outputs the root directory along with
1087 * the recursed paths, so use void *userdata to specify the string
1088 * length of the root directory. It can then be removed in
1089 * add_to_dirlist. */
1090
1091 int path_len = strlen(path);
1092 void *userdata = &path_len;
1093
1094 /* Now fill dl with a listing. */
1095 if (cmd_flags & FLAG_r)
1096 recursive_action(path, TRUE, TRUE, FALSE, add_to_dirlist, NULL, userdata);
1097 else {
1098 DIR *dp;
1099 struct dirent *ep;
1100 if ((dp = opendir(path)) == NULL)
1101 bb_error_msg("Error reading directory");
1102 while ((ep = readdir(dp))) {
1103 if ((!strcmp(ep->d_name, "..")) || (!strcmp(ep->d_name, ".")))
1104 continue;
1105 add_to_dirlist(ep->d_name, NULL, NULL);
1106 }
1107 closedir(dp);
1108 }
1109
1110 /* Sort dl alphabetically. */
1111 qsort(dl, dl_count, sizeof(char *), dir_strcmp);
1112
1113 /* Copy dl so that we can return it. */
1114 char **retval = xmalloc(dl_count * sizeof(char *));
1115 for (i = 0; i < dl_count; i++)
1116 retval[i] = bb_xstrdup(dl[i]);
1117
1118 return retval;
1119}
1120
1121static void diffdir (char *p1, char *p2) {
1122
1123 char **dirlist1, **dirlist2;
1124 char *dp1, *dp2;
1125 int dirlist1_count, dirlist2_count;
1126 int pos;
1127
1128 /* Check for trailing slashes. */
1129
1130 if (p1[strlen(p1) - 1] == '/')
1131 p1[strlen(p1) - 1] = '\0';
1132 if (p2[strlen(p2) - 1] == '/')
1133 p2[strlen(p2) - 1] = '\0';
1134
1135 /* Get directory listings for p1 and p2. */
1136
1137 dirlist1 = get_dir(p1);
1138 dirlist1_count = dl_count;
1139 dirlist1[dirlist1_count] = NULL;
1140 dirlist2 = get_dir(p2);
1141 dirlist2_count = dl_count;
1142 dirlist2[dirlist2_count] = NULL;
1143
1144 /* If -S was set, find the starting point. */
1145 if (start) {
1146 while (*dirlist1 != NULL && strcmp(*dirlist1, start) < 0)
1147 dirlist1++;
1148 while (*dirlist2 != NULL && strcmp(*dirlist2, start) < 0)
1149 dirlist2++;
1150 if ((*dirlist1 == NULL) || (*dirlist2 == NULL))
1151 bb_error_msg("Invalid argument to -S");
1152 }
1153
1154 /* Now that both dirlist1 and dirlist2 contain sorted directory
1155 * listings, we can start to go through dirlist1. If both listings
1156 * contain the same file, then do a normal diff. Otherwise, behaviour
1157 * is determined by whether the -N flag is set. */
1158 while (*dirlist1 != NULL || *dirlist2 != NULL) {
1159 dp1 = *dirlist1;
1160 dp2 = *dirlist2;
1161 pos = dp1 == NULL ? 1 : dp2 == NULL ? -1 : strcmp(dp1, dp2);
1162 if (pos == 0) {
1163 do_diff(p1, dp1, p2, dp2);
1164 dirlist1++;
1165 dirlist2++;
1166 }
1167 else if (pos < 0) {
1168 if (cmd_flags & FLAG_N)
1169 do_diff(p1, dp1, p2, NULL);
1170 else
1171 print_only(p1, strlen(p1) + 1, dp1);
1172 dirlist1++;
1173 }
1174 else {
1175 if (cmd_flags & FLAG_N)
1176 do_diff(p1, NULL, p2, dp2);
1177 else
1178 print_only(p2, strlen(p2) + 1, dp2);
1179 dirlist2++;
1180 }
1181 }
1182}
1183#endif
1184
1185
1186
1217extern int diff_main(int argc, char **argv) { 1187extern int diff_main(int argc, char **argv) {
1218 char *ep; 1188 char *ep;
1219 int gotstdin = 0; 1189 int gotstdin = 0;