aboutsummaryrefslogtreecommitdiff
path: root/coreutils/diff.c
diff options
context:
space:
mode:
authorBernhard Reutner-Fischer <rep.dot.nop@gmail.com>2006-04-06 08:11:08 +0000
committerBernhard Reutner-Fischer <rep.dot.nop@gmail.com>2006-04-06 08:11:08 +0000
commit8f7d38970046c0ea53de911084561b79ffb2d6b9 (patch)
treeb19ed436f9361694ce3d071f3a8ec5957042c8e4 /coreutils/diff.c
parente11a01cc34ab024b81ca83fec5182c6a444d4798 (diff)
downloadbusybox-w32-8f7d38970046c0ea53de911084561b79ffb2d6b9.tar.gz
busybox-w32-8f7d38970046c0ea53de911084561b79ffb2d6b9.tar.bz2
busybox-w32-8f7d38970046c0ea53de911084561b79ffb2d6b9.zip
- new applet diff. Rob Sullivan writes:
Here's my attempt at a mini diff applet - it's adapted from the code at http://www.openbsd.org/cgi-bin/cvsweb/src/usr.bin/diff/, and only supports unified diffs. I've busyboxified everything to a reasonable degree, so I think the code is suitable enough to be included, but there's still a fair bit of cleaning up to be done.
Diffstat (limited to 'coreutils/diff.c')
-rw-r--r--coreutils/diff.c1277
1 files changed, 1277 insertions, 0 deletions
diff --git a/coreutils/diff.c b/coreutils/diff.c
new file mode 100644
index 000000000..b2945dd87
--- /dev/null
+++ b/coreutils/diff.c
@@ -0,0 +1,1277 @@
1/* vi: set sw=4 ts=4: */
2/*
3 * Mini diff implementation for busybox, adapted from OpenBSD diff.
4 *
5 * Copyright (C) 2006 by Robert Sullivan <cogito.ergo.cogito@hotmail.com>
6 *
7 * Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
8 */
9
10/*
11 * Copyright (c) 2003 Todd C. Miller <Todd.Miller@courtesan.com>
12 *
13 * Permission to
14 * use, copy, modify, and distribute this software for any
15 * purpose with or without fee is hereby granted, provided that the above
16 * copyright notice and this permission notice appear in all copies.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
19 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
20 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
21 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
22 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
23 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
24 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
25 *
26 * Sponsored in part by the Defense Advanced Research Projects
27 * Agency (DARPA) and Air Force Research Laboratory, Air Force
28 * Materiel Command, USAF, under agreement number F39502-99-1-0512.
29 */
30
31#include <time.h>
32#include <sys/types.h>
33#include <sys/param.h>
34#include <sys/stat.h>
35#include <ctype.h>
36#include <errno.h>
37#include <signal.h>
38#include <stdlib.h>
39#include <stdio.h>
40#include <stdarg.h>
41#include <string.h>
42#include <unistd.h>
43#include <sys/wait.h>
44#include <fcntl.h>
45#include <stddef.h>
46#include <paths.h>
47#include <dirent.h>
48#include "busybox.h"
49
50#define FSIZE_MAX 32768
51
52/*
53 * Output flags
54 */
55#define D_HEADER 1 /* Print a header/footer between files */
56#define D_EMPTY1 2 /* Treat first file as empty (/dev/null) */
57#define D_EMPTY2 4 /* Treat second file as empty (/dev/null) */
58
59/*
60 * Status values for print_status() and diffreg() return values
61 * Guide:
62 * D_SAME - files are the same
63 * D_DIFFER - files differ
64 * D_BINARY - binary files differ
65 * D_COMMON - subdirectory common to both dirs
66 * D_ONLY - file only exists in one dir
67 * D_MISMATCH1 - path1 a dir, path2 a file
68 * D_MISMATCH2 - path1 a file, path2 a dir
69 * D_ERROR - error occurred
70 * D_SKIPPED1 - skipped path1 as it is a special file
71 * D_SKIPPED2 - skipped path2 as it is a special file
72 */
73
74#define D_SAME 0
75#define D_DIFFER 1
76#define D_BINARY (1<<1)
77#define D_COMMON (1<<2)
78#define D_ONLY (1<<3)
79#define D_MISMATCH1 (1<<4)
80#define D_MISMATCH2 (1<<5)
81#define D_ERROR (1<<6)
82#define D_SKIPPED1 (1<<7)
83#define D_SKIPPED2 (1<<8)
84
85/* Command line options */
86static unsigned long cmd_flags;
87#define FLAG_a 1
88#define FLAG_b (1<<1)
89#define FLAG_d (1<<2)
90#define FLAG_i (1<<3)
91#define FLAG_N (1<<4)
92#define FLAG_q (1<<5)
93#define FLAG_r (1<<6)
94#define FLAG_s (1<<7)
95#define FLAG_S (1<<8)
96#define FLAG_t (1<<9)
97#define FLAG_T (1<<10)
98#define FLAG_U (1<<11)
99#define FLAG_w (1<<12)
100
101int context, status;
102char *start, *label[2];
103struct stat stb1, stb2;
104char **dl;
105int dl_count = 0;
106
107struct cand {
108 int x;
109 int y;
110 int pred;
111};
112
113struct line {
114 int serial;
115 int value;
116} *file[2];
117
118/*
119 * The following struct is used to record change information
120 * doing a "context" or "unified" diff. (see routine "change" to
121 * understand the highly mnemonic field names)
122 */
123struct context_vec {
124 int a; /* start line in old file */
125 int b; /* end line in old file */
126 int c; /* start line in new file */
127 int d; /* end line in new file */
128};
129
130static int *J; /* will be overlaid on class */
131static int *class; /* will be overlaid on file[0] */
132static int *klist; /* will be overlaid on file[0] after class */
133static int *member; /* will be overlaid on file[1] */
134static int clen;
135static int len[2];
136static int pref, suff; /* length of prefix and suffix */
137static int slen[2];
138static int anychange;
139static long *ixnew; /* will be overlaid on file[1] */
140static long *ixold; /* will be overlaid on klist */
141static struct cand *clist; /* merely a free storage pot for candidates */
142static int clistlen; /* the length of clist */
143static struct line *sfile[2]; /* shortened by pruning common prefix/suffix */
144static struct context_vec *context_vec_start;
145static struct context_vec *context_vec_end;
146static struct context_vec *context_vec_ptr;
147
148static void output(char *, FILE *, char *, FILE *);
149static void check(char *, FILE *, char *, FILE *);
150static void uni_range(int, int);
151static void dump_unified_vec(FILE *, FILE *);
152static void prepare(int, FILE *, off_t);
153static void prune(void);
154static void equiv(struct line *, int, struct line *, int, int *);
155static void unravel(int);
156static void unsort(struct line *, int, int *);
157static void change(char *, FILE *, char *, FILE *, int, int, int, int);
158static void sort(struct line *, int);
159static void print_header(const char *, const char *);
160static int asciifile(FILE *);
161static int fetch(long *, int, int, FILE *, int, int);
162static int newcand(int, int, int);
163static int search(int *, int, int);
164static int skipline(FILE *);
165static int stone(int *, int, int *, int *);
166static int readhash(FILE *);
167static int files_differ(FILE *, FILE *, int);
168
169static int diffreg(char *, char *, int);
170static void print_only(const char *, size_t, const char *);
171static void print_status(int, char *, char *, char *);
172
173#ifdef CONFIG_FEATURE_DIFF_DIR
174static int dir_strcmp(const void *p1, const void *p2) {
175 return strcmp(*(char * const *)p1, *(char * const *)p2);
176}
177
178/* This function adds a filename to dl, the directory listing. */
179
180static int add_to_dirlist (const char *filename, struct stat *statbuf, void *userdata) {
181 dl_count++;
182 dl = xrealloc(dl, dl_count * sizeof(char *));
183 dl[dl_count - 1] = bb_xstrdup(filename);
184 if (cmd_flags & FLAG_r) {
185 int *pp = (int *) userdata;
186 int path_len = *pp + 1;
187 dl[dl_count - 1] = &(dl[dl_count - 1])[path_len];
188 }
189 return TRUE;
190}
191
192/* This returns a sorted directory listing. */
193static char **get_dir(char *path) {
194
195 int i;
196
197 /* Reset dl_count - there's no need to free dl as bb_xrealloc does
198 * the job nicely. */
199 dl_count = 0;
200
201 /* If -r has been set, then the recursive_action function will be
202 * used. Unfortunately, this outputs the root directory along with
203 * the recursed paths, so use void *userdata to specify the string
204 * length of the root directory. It can then be removed in
205 * add_to_dirlist. */
206
207 int path_len = strlen(path);
208 void *userdata = &path_len;
209
210 /* Now fill dl with a listing. */
211 if (cmd_flags & FLAG_r)
212 recursive_action(path, TRUE, TRUE, FALSE, add_to_dirlist, NULL, userdata);
213 else {
214 DIR *dp;
215 struct dirent *ep;
216 if ((dp = opendir(path)) == NULL)
217 bb_error_msg("Error reading directory");
218 while ((ep = readdir(dp))) {
219 if ((!strcmp(ep->d_name, "..")) || (!strcmp(ep->d_name, ".")))
220 continue;
221 add_to_dirlist(ep->d_name, NULL, NULL);
222 }
223 closedir(dp);
224 }
225
226 /* Sort dl alphabetically. */
227 qsort(dl, dl_count, sizeof(char *), dir_strcmp);
228
229 /* Copy dl so that we can return it. */
230 char **retval = xmalloc(dl_count * sizeof(char *));
231 for (i = 0; i < dl_count; i++)
232 retval[i] = bb_xstrdup(dl[i]);
233
234 return retval;
235}
236
237static void do_diff (char *dir1, char *path1, char *dir2, char *path2) {
238
239 int flags = D_HEADER;
240 int val;
241
242 char *fullpath1 = bb_xasprintf("%s/%s", dir1, path1);
243 char *fullpath2 = bb_xasprintf("%s/%s", dir2, path2);
244
245 if (stat(fullpath1, &stb1) != 0) {
246 flags |= D_EMPTY1;
247 memset(&stb1, 0, sizeof(stb1));
248 fullpath1 = bb_xasprintf("%s/%s", dir1, path2);
249 }
250 if (stat(fullpath2, &stb2) != 0) {
251 flags |= D_EMPTY2;
252 memset(&stb2, 0, sizeof(stb2));
253 stb2.st_mode = stb1.st_mode;
254 fullpath2 = bb_xasprintf("%s/%s", dir2, path1);
255 }
256
257 if (stb1.st_mode == 0)
258 stb1.st_mode = stb2.st_mode;
259
260 if (S_ISDIR(stb1.st_mode) && S_ISDIR(stb2.st_mode)) {
261 printf("Common subdirectories: %s and %s\n", fullpath1, fullpath2);
262 return;
263 }
264
265 if (!S_ISREG(stb1.st_mode) && !S_ISDIR(stb1.st_mode))
266 val = D_SKIPPED1;
267 else if (!S_ISREG(stb2.st_mode) && !S_ISDIR(stb2.st_mode))
268 val = D_SKIPPED2;
269 else
270 val = diffreg(fullpath1, fullpath2, flags);
271
272 print_status(val, fullpath1, fullpath2, NULL);
273}
274
275static void diffdir (char *p1, char *p2) {
276
277 char **dirlist1, **dirlist2;
278 char *dp1, *dp2;
279 int dirlist1_count, dirlist2_count;
280 int pos;
281
282 /* Check for trailing slashes. */
283
284 if (p1[strlen(p1) - 1] == '/')
285 p1[strlen(p1) - 1] = '\0';
286 if (p2[strlen(p2) - 1] == '/')
287 p2[strlen(p2) - 1] = '\0';
288
289 /* Get directory listings for p1 and p2. */
290
291 dirlist1 = get_dir(p1);
292 dirlist1_count = dl_count;
293 dirlist1[dirlist1_count] = NULL;
294 dirlist2 = get_dir(p2);
295 dirlist2_count = dl_count;
296 dirlist2[dirlist2_count] = NULL;
297
298 /* If -S was set, find the starting point. */
299 if (start) {
300 while (*dirlist1 != NULL && strcmp(*dirlist1, start) < 0)
301 dirlist1++;
302 while (*dirlist2 != NULL && strcmp(*dirlist2, start) < 0)
303 dirlist2++;
304 if ((*dirlist1 == NULL) || (*dirlist2 == NULL))
305 bb_error_msg("Invalid argument to -S");
306 }
307
308 /* Now that both dirlist1 and dirlist2 contain sorted directory
309 * listings, we can start to go through dirlist1. If both listings
310 * contain the same file, then do a normal diff. Otherwise, behaviour
311 * is determined by whether the -N flag is set. */
312 while (*dirlist1 != NULL || *dirlist2 != NULL) {
313 dp1 = *dirlist1;
314 dp2 = *dirlist2;
315 pos = dp1 == NULL ? 1 : dp2 == NULL ? -1 : strcmp(dp1, dp2);
316 if (pos == 0) {
317 do_diff(p1, dp1, p2, dp2);
318 dirlist1++;
319 dirlist2++;
320 }
321 else if (pos < 0) {
322 if (cmd_flags & FLAG_N)
323 do_diff(p1, dp1, p2, NULL);
324 else
325 print_only(p1, strlen(p1) + 1, dp1);
326 dirlist1++;
327 }
328 else {
329 if (cmd_flags & FLAG_N)
330 do_diff(p1, NULL, p2, dp2);
331 else
332 print_only(p2, strlen(p2) + 1, dp2);
333 dirlist2++;
334 }
335 }
336}
337#endif
338
339static void
340print_only(const char *path, size_t dirlen, const char *entry)
341{
342 if (dirlen > 1)
343 dirlen--;
344 printf("Only in %.*s: %s\n", (int)dirlen, path, entry);
345}
346
347static void
348print_status(int val, char *path1, char *path2, char *entry)
349{
350 switch (val) {
351 case D_ONLY:
352 print_only(path1, strlen(path1), entry);
353 break;
354 case D_COMMON:
355 printf("Common subdirectories: %s%s and %s%s\n",
356 path1, entry ? entry : "", path2, entry ? entry : "");
357 break;
358 case D_BINARY:
359 printf("Binary files %s%s and %s%s differ\n",
360 path1, entry ? entry : "", path2, entry ? entry : "");
361 break;
362 case D_DIFFER:
363 if (cmd_flags & FLAG_q)
364 printf("Files %s%s and %s%s differ\n",
365 path1, entry ? entry : "",
366 path2, entry ? entry : "");
367 break;
368 case D_SAME:
369 if (cmd_flags & FLAG_s)
370 printf("Files %s%s and %s%s are identical\n",
371 path1, entry ? entry : "",
372 path2, entry ? entry : "");
373 break;
374 case D_MISMATCH1:
375 printf("File %s%s is a directory while file %s%s is a regular file\n",
376 path1, entry ? entry : "", path2, entry ? entry : "");
377 break;
378 case D_MISMATCH2:
379 printf("File %s%s is a regular file while file %s%s is a directory\n",
380 path1, entry ? entry : "", path2, entry ? entry : "");
381 break;
382 case D_SKIPPED1:
383 printf("File %s%s is not a regular file or directory and was skipped\n",
384 path1, entry ? entry : "");
385 break;
386 case D_SKIPPED2:
387 printf("File %s%s is not a regular file or directory and was skipped\n",
388 path2, entry ? entry : "");
389 break;
390 }
391}
392
393/*
394 * The following code uses an algorithm due to Harold Stone,
395 * which finds a pair of longest identical subsequences in
396 * the two files.
397 *
398 * The major goal is to generate the match vector J.
399 * J[i] is the index of the line in file1 corresponding
400 * to line i file0. J[i] = 0 if there is no
401 * such line in file1.
402 *
403 * Lines are hashed so as to work in core. All potential
404 * matches are located by sorting the lines of each file
405 * on the hash (called ``value''). In particular, this
406 * collects the equivalence classes in file1 together.
407 * Subroutine equiv replaces the value of each line in
408 * file0 by the index of the first element of its
409 * matching equivalence in (the reordered) file1.
410 * To save space equiv squeezes file1 into a single
411 * array member in which the equivalence classes
412 * are simply concatenated, except that their first
413 * members are flagged by changing sign.
414 *
415 * Next the indices that point into member are unsorted into
416 * array class according to the original order of file0.
417 *
418 * The cleverness lies in routine stone. This marches
419 * through the lines of file0, developing a vector klist
420 * of "k-candidates". At step i a k-candidate is a matched
421 * pair of lines x,y (x in file0 y in file1) such that
422 * there is a common subsequence of length k
423 * between the first i lines of file0 and the first y
424 * lines of file1, but there is no such subsequence for
425 * any smaller y. x is the earliest possible mate to y
426 * that occurs in such a subsequence.
427 *
428 * Whenever any of the members of the equivalence class of
429 * lines in file1 matable to a line in file0 has serial number
430 * less than the y of some k-candidate, that k-candidate
431 * with the smallest such y is replaced. The new
432 * k-candidate is chained (via pred) to the current
433 * k-1 candidate so that the actual subsequence can
434 * be recovered. When a member has serial number greater
435 * that the y of all k-candidates, the klist is extended.
436 * At the end, the longest subsequence is pulled out
437 * and placed in the array J by unravel
438 *
439 * With J in hand, the matches there recorded are
440 * checked against reality to assure that no spurious
441 * matches have crept in due to hashing. If they have,
442 * they are broken, and "jackpot" is recorded--a harmless
443 * matter except that a true match for a spuriously
444 * mated line may now be unnecessarily reported as a change.
445 *
446 * Much of the complexity of the program comes simply
447 * from trying to minimize core utilization and
448 * maximize the range of doable problems by dynamically
449 * allocating what is needed and reusing what is not.
450 * The core requirements for problems larger than somewhat
451 * are (in words) 2*length(file0) + length(file1) +
452 * 3*(number of k-candidates installed), typically about
453 * 6n words for files of length n.
454 */
455
456static int diffreg(char *ofile1, char *ofile2, int flags)
457{
458 char *file1 = ofile1;
459 char *file2 = ofile2;
460 FILE *f1 = NULL;
461 FILE *f2 = NULL;
462 int rval = D_SAME;
463 int i;
464
465 anychange = 0;
466 context_vec_ptr = context_vec_start - 1;
467
468 if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
469 return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
470 if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
471 goto closem;
472
473 if (flags & D_EMPTY1)
474 f1 = bb_xfopen(_PATH_DEVNULL, "r");
475 else {
476 if (strcmp(file1, "-") == 0)
477 f1 = stdin;
478 else
479 f1 = bb_xfopen(file1, "r");
480 }
481
482 if (flags & D_EMPTY2)
483 f2 = bb_xfopen(_PATH_DEVNULL, "r");
484 else {
485 if (strcmp(file2, "-") == 0)
486 f2 = stdin;
487 else
488 f2 = bb_xfopen(file2, "r");
489 }
490
491 switch (files_differ(f1, f2, flags)) {
492 case 0:
493 goto closem;
494 case 1:
495 break;
496 default:
497 /* error */
498 status |= 2;
499 goto closem;
500 }
501
502 if (!asciifile(f1) || !asciifile(f2)) {
503 rval = D_BINARY;
504 status |= 1;
505 goto closem;
506 }
507
508 prepare(0, f1, stb1.st_size);
509 prepare(1, f2, stb2.st_size);
510 prune();
511 sort(sfile[0], slen[0]);
512 sort(sfile[1], slen[1]);
513
514 member = (int *)file[1];
515 equiv(sfile[0], slen[0], sfile[1], slen[1], member);
516 member = xrealloc(member, (slen[1] + 2) * sizeof(int));
517
518 class = (int *)file[0];
519 unsort(sfile[0], slen[0], class);
520 class = xrealloc(class, (slen[0] + 2) * sizeof(int));
521
522 klist = xmalloc((slen[0] + 2) * sizeof(int));
523 clen = 0;
524 clistlen = 100;
525 clist = xmalloc(clistlen * sizeof(struct cand));
526 i = stone(class, slen[0], member, klist);
527 free(member);
528 free(class);
529
530 J = xrealloc(J, (len[0] + 2) * sizeof(int));
531 unravel(klist[i]);
532 free(clist);
533 free(klist);
534
535 ixold = xrealloc(ixold, (len[0] + 2) * sizeof(long));
536 ixnew = xrealloc(ixnew, (len[1] + 2) * sizeof(long));
537 check(file1, f1, file2, f2);
538 output(file1, f1, file2, f2);
539
540closem:
541 if (anychange) {
542 status |= 1;
543 if (rval == D_SAME)
544 rval = D_DIFFER;
545 }
546 if (f1 != NULL)
547 fclose(f1);
548 if (f2 != NULL)
549 fclose(f2);
550 if (file1 != ofile1)
551 free(file1);
552 if (file2 != ofile2)
553 free(file2);
554 return (rval);
555}
556
557/*
558 * Check to see if the given files differ.
559 * Returns 0 if they are the same, 1 if different, and -1 on error.
560 */
561static int
562files_differ(FILE *f1, FILE *f2, int flags)
563{
564 char buf1[BUFSIZ], buf2[BUFSIZ];
565 size_t i, j;
566
567 if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
568 (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
569 return (1);
570 while(1) {
571 i = fread(buf1, 1, sizeof(buf1), f1);
572 j = fread(buf2, 1, sizeof(buf2), f2);
573 if (i != j)
574 return (1);
575 if (i == 0 && j == 0) {
576 if (ferror(f1) || ferror(f2))
577 return (1);
578 return (0);
579 }
580 if (memcmp(buf1, buf2, i) != 0)
581 return (1);
582 }
583}
584
585static void
586prepare(int i, FILE *fd, off_t filesize)
587{
588 struct line *p;
589 int j, h;
590 size_t sz;
591
592 rewind(fd);
593
594 sz = (filesize <= FSIZE_MAX ? filesize : FSIZE_MAX) / 25;
595 if (sz < 100)
596 sz = 100;
597
598 p = xmalloc((sz + 3) * sizeof(struct line));
599 for (j = 0; (h = readhash(fd));) {
600 if (j == sz) {
601 sz = sz * 3 / 2;
602 p = xrealloc(p, (sz + 3) * sizeof(struct line));
603 }
604 p[++j].value = h;
605 }
606 len[i] = j;
607 file[i] = p;
608}
609
610static void
611prune(void)
612{
613 int i, j;
614
615 for (pref = 0; pref < len[0] && pref < len[1] &&
616 file[0][pref + 1].value == file[1][pref + 1].value;
617 pref++)
618 ;
619 for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
620 file[0][len[0] - suff].value == file[1][len[1] - suff].value;
621 suff++)
622 ;
623 for (j = 0; j < 2; j++) {
624 sfile[j] = file[j] + pref;
625 slen[j] = len[j] - pref - suff;
626 for (i = 0; i <= slen[j]; i++)
627 sfile[j][i].serial = i;
628 }
629}
630
631static void
632equiv(struct line *a, int n, struct line *b, int m, int *c)
633{
634 int i, j;
635
636 i = j = 1;
637 while (i <= n && j <= m) {
638 if (a[i].value < b[j].value)
639 a[i++].value = 0;
640 else if (a[i].value == b[j].value)
641 a[i++].value = j;
642 else
643 j++;
644 }
645 while (i <= n)
646 a[i++].value = 0;
647 b[m + 1].value = 0;
648 j = 0;
649 while (++j <= m) {
650 c[j] = -b[j].serial;
651 while (b[j + 1].value == b[j].value) {
652 j++;
653 c[j] = b[j].serial;
654 }
655 }
656 c[j] = -1;
657}
658
659static int isqrt(int n) {
660 int y, x = 1;
661 if (n == 0) return(0);
662
663 do {
664 y = x;
665 x = n / x;
666 x += y;
667 x /= 2;
668 } while ((x - y) > 1 || (x - y) < -1);
669
670 return (x);
671}
672
673static int
674stone(int *a, int n, int *b, int *c)
675{
676 int i, k, y, j, l;
677 int oldc, tc, oldl;
678 u_int numtries;
679#ifdef CONFIG_FEATURE_DIFF_MINIMAL
680 const u_int bound = (cmd_flags & FLAG_d) ? UINT_MAX : MAX(256, isqrt(n));
681#else
682 const u_int bound = MAX(256, isqrt(n));
683#endif
684 k = 0;
685 c[0] = newcand(0, 0, 0);
686 for (i = 1; i <= n; i++) {
687 j = a[i];
688 if (j == 0)
689 continue;
690 y = -b[j];
691 oldl = 0;
692 oldc = c[0];
693 numtries = 0;
694 do {
695 if (y <= clist[oldc].y)
696 continue;
697 l = search(c, k, y);
698 if (l != oldl + 1)
699 oldc = c[l - 1];
700 if (l <= k) {
701 if (clist[c[l]].y <= y)
702 continue;
703 tc = c[l];
704 c[l] = newcand(i, y, oldc);
705 oldc = tc;
706 oldl = l;
707 numtries++;
708 } else {
709 c[l] = newcand(i, y, oldc);
710 k++;
711 break;
712 }
713 } while ((y = b[++j]) > 0 && numtries < bound);
714 }
715 return (k);
716}
717
718static int
719newcand(int x, int y, int pred)
720{
721 struct cand *q;
722
723 if (clen == clistlen) {
724 clistlen = clistlen * 11 / 10;
725 clist = xrealloc(clist, clistlen * sizeof(struct cand));
726 }
727 q = clist + clen;
728 q->x = x;
729 q->y = y;
730 q->pred = pred;
731 return (clen++);
732}
733
734static int
735search(int *c, int k, int y)
736{
737 int i, j, l, t;
738
739 if (clist[c[k]].y < y) /* quick look for typical case */
740 return (k + 1);
741 i = 0;
742 j = k + 1;
743 while (1) {
744 l = i + j;
745 if ((l >>= 1) <= i)
746 break;
747 t = clist[c[l]].y;
748 if (t > y)
749 j = l;
750 else if (t < y)
751 i = l;
752 else
753 return (l);
754 }
755 return (l + 1);
756}
757
758static void
759unravel(int p)
760{
761 struct cand *q;
762 int i;
763
764 for (i = 0; i <= len[0]; i++)
765 J[i] = i <= pref ? i :
766 i > len[0] - suff ? i + len[1] - len[0] : 0;
767 for (q = clist + p; q->y != 0; q = clist + q->pred)
768 J[q->x + pref] = q->y + pref;
769}
770
771/*
772 * Check does double duty:
773 * 1. ferret out any fortuitous correspondences due
774 * to confounding by hashing (which result in "jackpot")
775 * 2. collect random access indexes to the two files
776 */
777static void
778check(char *file1, FILE *f1, char *file2, FILE *f2)
779{
780 int i, j, jackpot, c, d;
781 long ctold, ctnew;
782
783 rewind(f1);
784 rewind(f2);
785 j = 1;
786 ixold[0] = ixnew[0] = 0;
787 jackpot = 0;
788 ctold = ctnew = 0;
789 for (i = 1; i <= len[0]; i++) {
790 if (J[i] == 0) {
791 ixold[i] = ctold += skipline(f1);
792 continue;
793 }
794 while (j < J[i]) {
795 ixnew[j] = ctnew += skipline(f2);
796 j++;
797 }
798 if ((cmd_flags & FLAG_b) || (cmd_flags & FLAG_w) || (cmd_flags & FLAG_i)) {
799 while (1) {
800 c = getc(f1);
801 d = getc(f2);
802 /*
803 * GNU diff ignores a missing newline
804 * in one file if bflag || wflag.
805 */
806 if (((cmd_flags & FLAG_b) || (cmd_flags & FLAG_w)) &&
807 ((c == EOF && d == '\n') ||
808 (c == '\n' && d == EOF))) {
809 break;
810 }
811 ctold++;
812 ctnew++;
813 if ((cmd_flags & FLAG_b) && isspace(c) && isspace(d)) {
814 do {
815 if (c == '\n')
816 break;
817 ctold++;
818 } while (isspace(c = getc(f1)));
819 do {
820 if (d == '\n')
821 break;
822 ctnew++;
823 } while (isspace(d = getc(f2)));
824 } else if (cmd_flags & FLAG_w) {
825 while (isspace(c) && c != '\n') {
826 c = getc(f1);
827 ctold++;
828 }
829 while (isspace(d) && d != '\n') {
830 d = getc(f2);
831 ctnew++;
832 }
833 }
834 if (c != d) {
835 jackpot++;
836 J[i] = 0;
837 if (c != '\n' && c != EOF)
838 ctold += skipline(f1);
839 if (d != '\n' && c != EOF)
840 ctnew += skipline(f2);
841 break;
842 }
843 if (c == '\n' || c == EOF)
844 break;
845 }
846 } else {
847 while (1) {
848 ctold++;
849 ctnew++;
850 if ((c = getc(f1)) != (d = getc(f2))) {
851 J[i] = 0;
852 if (c != '\n' && c != EOF)
853 ctold += skipline(f1);
854 if (d != '\n' && c != EOF)
855 ctnew += skipline(f2);
856 break;
857 }
858 if (c == '\n' || c == EOF)
859 break;
860 }
861 }
862 ixold[i] = ctold;
863 ixnew[j] = ctnew;
864 j++;
865 }
866 for (; j <= len[1]; j++)
867 ixnew[j] = ctnew += skipline(f2);
868}
869
870/* shellsort CACM #201 */
871static void
872sort(struct line *a, int n)
873{
874 struct line *ai, *aim, w;
875 int j, m = 0, k;
876
877 if (n == 0)
878 return;
879 for (j = 1; j <= n; j *= 2)
880 m = 2 * j - 1;
881 for (m /= 2; m != 0; m /= 2) {
882 k = n - m;
883 for (j = 1; j <= k; j++) {
884 for (ai = &a[j]; ai > a; ai -= m) {
885 aim = &ai[m];
886 if (aim < ai)
887 break; /* wraparound */
888 if (aim->value > ai[0].value ||
889 (aim->value == ai[0].value &&
890 aim->serial > ai[0].serial))
891 break;
892 w.value = ai[0].value;
893 ai[0].value = aim->value;
894 aim->value = w.value;
895 w.serial = ai[0].serial;
896 ai[0].serial = aim->serial;
897 aim->serial = w.serial;
898 }
899 }
900 }
901}
902
903static void
904unsort(struct line *f, int l, int *b)
905{
906 int *a, i;
907
908 a = xmalloc((l + 1) * sizeof(int));
909 for (i = 1; i <= l; i++)
910 a[f[i].serial] = f[i].value;
911 for (i = 1; i <= l; i++)
912 b[i] = a[i];
913 free(a);
914}
915
916static int
917skipline(FILE *f)
918{
919 int i, c;
920
921 for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
922 continue;
923 return (i);
924}
925
926static void
927output(char *file1, FILE *f1, char *file2, FILE *f2)
928{
929 int m, i0, i1, j0, j1;
930
931 rewind(f1);
932 rewind(f2);
933 m = len[0];
934 J[0] = 0;
935 J[m + 1] = len[1] + 1;
936 for (i0 = 1; i0 <= m; i0 = i1 + 1) {
937 while (i0 <= m && J[i0] == J[i0 - 1] + 1)
938 i0++;
939 j0 = J[i0 - 1] + 1;
940 i1 = i0 - 1;
941 while (i1 < m && J[i1 + 1] == 0)
942 i1++;
943 j1 = J[i1 + 1] - 1;
944 J[i1] = j1;
945 change(file1, f1, file2, f2, i0, i1, j0, j1);
946 }
947 if (m == 0) {
948 change(file1, f1, file2, f2, 1, 0, 1, len[1]);
949 }
950 if (anychange != 0) {
951 dump_unified_vec(f1, f2);
952 }
953}
954
955static void uni_range(int a, int b)
956{
957 if (a < b)
958 printf("%d,%d", a, b - a + 1);
959 else if (a == b)
960 printf("%d", b);
961 else
962 printf("%d,0", b);
963}
964
965/*
966 * Indicate that there is a difference between lines a and b of the from file
967 * to get to lines c to d of the to file. If a is greater then b then there
968 * are no lines in the from file involved and this means that there were
969 * lines appended (beginning at b). If c is greater than d then there are
970 * lines missing from the to file.
971 */
972static void
973change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d)
974{
975 static size_t max_context = 64;
976
977 if (a > b && c > d) return;
978 if (cmd_flags & FLAG_q) return;
979
980 /*
981 * Allocate change records as needed.
982 */
983 if (context_vec_ptr == context_vec_end - 1) {
984 ptrdiff_t offset = context_vec_ptr - context_vec_start;
985 max_context <<= 1;
986 context_vec_start = xrealloc(context_vec_start,
987 max_context * sizeof(struct context_vec));
988 context_vec_end = context_vec_start + max_context;
989 context_vec_ptr = context_vec_start + offset;
990 }
991 if (anychange == 0) {
992 /*
993 * Print the context/unidiff header first time through.
994 */
995 print_header(file1, file2);
996 anychange = 1;
997 } else if (a > context_vec_ptr->b + (2 * context) + 1 &&
998 c > context_vec_ptr->d + (2 * context) + 1) {
999 /*
1000 * If this change is more than 'context' lines from the
1001 * previous change, dump the record and reset it.
1002 */
1003 dump_unified_vec(f1, f2);
1004 }
1005 context_vec_ptr++;
1006 context_vec_ptr->a = a;
1007 context_vec_ptr->b = b;
1008 context_vec_ptr->c = c;
1009 context_vec_ptr->d = d;
1010 return;
1011
1012}
1013
1014static int
1015fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile)
1016{
1017 int i, j, c, lastc, col, nc;
1018
1019 if (a > b)
1020 return (0);
1021 for (i = a; i <= b; i++) {
1022 fseek(lb, f[i - 1], SEEK_SET);
1023 nc = f[i] - f[i - 1];
1024 if (ch != '\0') {
1025 putchar(ch);
1026 if (cmd_flags & FLAG_T)
1027 putchar('\t');
1028 }
1029 col = 0;
1030 for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1031 if ((c = getc(lb)) == EOF) {
1032 puts("\n\\ No newline at end of file");
1033 return (0);
1034 }
1035 if (c == '\t' && (cmd_flags & FLAG_t)) {
1036 do {
1037 putchar(' ');
1038 } while (++col & 7);
1039 } else {
1040 putchar(c);
1041 col++;
1042 }
1043 }
1044 }
1045 return (0);
1046}
1047
1048/*
1049 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1050 */
1051static int
1052readhash(FILE *f)
1053{
1054 int i, t, space;
1055 int sum;
1056
1057 sum = 1;
1058 space = 0;
1059 if (!(cmd_flags & FLAG_b) && !(cmd_flags & FLAG_w)) {
1060 if (FLAG_i)
1061 for (i = 0; (t = getc(f)) != '\n'; i++) {
1062 if (t == EOF) {
1063 if (i == 0)
1064 return (0);
1065 break;
1066 }
1067 sum = sum * 127 + t;
1068 }
1069 else
1070 for (i = 0; (t = getc(f)) != '\n'; i++) {
1071 if (t == EOF) {
1072 if (i == 0)
1073 return (0);
1074 break;
1075 }
1076 sum = sum * 127 + t;
1077 }
1078 } else {
1079 for (i = 0;;) {
1080 switch (t = getc(f)) {
1081 case '\t':
1082 case '\r':
1083 case '\v':
1084 case '\f':
1085 case ' ':
1086 space++;
1087 continue;
1088 default:
1089 if (space && !(cmd_flags & FLAG_w)) {
1090 i++;
1091 space = 0;
1092 }
1093 sum = sum * 127 + t;
1094 i++;
1095 continue;
1096 case EOF:
1097 if (i == 0)
1098 return (0);
1099 /* FALLTHROUGH */
1100 case '\n':
1101 break;
1102 }
1103 break;
1104 }
1105 }
1106 /*
1107 * There is a remote possibility that we end up with a zero sum.
1108 * Zero is used as an EOF marker, so return 1 instead.
1109 */
1110 return (sum == 0 ? 1 : sum);
1111}
1112
1113static int
1114asciifile(FILE *f)
1115{
1116
1117 if ((cmd_flags & FLAG_a) || f == NULL)
1118 return (1);
1119#ifdef CONFIG_FEATURE_DIFF_BINARY
1120 unsigned char buf[BUFSIZ];
1121 int i, cnt;
1122
1123 rewind(f);
1124 cnt = fread(buf, 1, sizeof(buf), f);
1125 for (i = 0; i < cnt; i++)
1126 if (!isprint(buf[i]) && !isspace(buf[i]))
1127 return (0);
1128#endif
1129 return (1);
1130}
1131
1132/* dump accumulated "unified" diff changes */
1133static void
1134dump_unified_vec(FILE *f1, FILE *f2)
1135{
1136 struct context_vec *cvp = context_vec_start;
1137 int lowa, upb, lowc, upd;
1138 int a, b, c, d;
1139 char ch;
1140
1141 if (context_vec_start > context_vec_ptr)
1142 return;
1143
1144 b = d = 0; /* gcc */
1145 lowa = MAX(1, cvp->a - context);
1146 upb = MIN(len[0], context_vec_ptr->b + context);
1147 lowc = MAX(1, cvp->c - context);
1148 upd = MIN(len[1], context_vec_ptr->d + context);
1149
1150 fputs("@@ -", stdout);
1151 uni_range(lowa, upb);
1152 fputs(" +", stdout);
1153 uni_range(lowc, upd);
1154 fputs(" @@", stdout);
1155 putchar('\n');
1156
1157 /*
1158 * Output changes in "unified" diff format--the old and new lines
1159 * are printed together.
1160 */
1161 for (; cvp <= context_vec_ptr; cvp++) {
1162 a = cvp->a;
1163 b = cvp->b;
1164 c = cvp->c;
1165 d = cvp->d;
1166
1167 /*
1168 * c: both new and old changes
1169 * d: only changes in the old file
1170 * a: only changes in the new file
1171 */
1172 if (a <= b && c <= d)
1173 ch = 'c';
1174 else
1175 ch = (a <= b) ? 'd' : 'a';
1176
1177 switch (ch) {
1178 case 'c':
1179 fetch(ixold, lowa, a - 1, f1, ' ', 0);
1180 fetch(ixold, a, b, f1, '-', 0);
1181 fetch(ixnew, c, d, f2, '+', 0);
1182 break;
1183 case 'd':
1184 fetch(ixold, lowa, a - 1, f1, ' ', 0);
1185 fetch(ixold, a, b, f1, '-', 0);
1186 break;
1187 case 'a':
1188 fetch(ixnew, lowc, c - 1, f2, ' ', 0);
1189 fetch(ixnew, c, d, f2, '+', 0);
1190 break;
1191 }
1192 lowa = b + 1;
1193 lowc = d + 1;
1194 }
1195 fetch(ixnew, d + 1, upd, f2, ' ', 0);
1196
1197 context_vec_ptr = context_vec_start - 1;
1198}
1199
1200static void
1201print_header(const char *file1, const char *file2)
1202{
1203 if (label[0] != NULL)
1204 printf("%s %s\n", "---",
1205 label[0]);
1206 else
1207 printf("%s %s\t%s", "---",
1208 file1, ctime(&stb1.st_mtime));
1209 if (label[1] != NULL)
1210 printf("%s %s\n", "+++",
1211 label[1]);
1212 else
1213 printf("%s %s\t%s", "+++",
1214 file2, ctime(&stb2.st_mtime));
1215}
1216
1217extern int diff_main(int argc, char **argv) {
1218 char *ep;
1219 int gotstdin = 0;
1220
1221 char *U_opt;
1222 cmd_flags = bb_getopt_ulflags(argc, argv, "abdiNqrsS:tTU:wu", &start, &U_opt);
1223
1224 context = 3; /* This is the default number of lines of context. */
1225 if (cmd_flags & FLAG_U) {
1226 context = strtol(U_opt, &ep, 10);
1227 if (context == 0) {
1228 bb_error_msg("Invalid context length");
1229 bb_show_usage();
1230 }
1231 }
1232 argc -= optind;
1233 argv += optind;
1234
1235 /*
1236 * Do sanity checks, fill in stb1 and stb2 and call the appropriate
1237 * driver routine. Both drivers use the contents of stb1 and stb2.
1238 */
1239 if (argc < 2) {
1240 bb_error_msg("Missing filename");
1241 bb_show_usage();
1242 }
1243 if (strcmp(argv[0], "-") == 0) {
1244 fstat(STDIN_FILENO, &stb1);
1245 gotstdin = 1;
1246 } else if (stat(argv[0], &stb1) != 0)
1247 bb_perror_msg_and_die("Couldn't stat %s", argv[0]);
1248 if (strcmp(argv[1], "-") == 0) {
1249 fstat(STDIN_FILENO, &stb2);
1250 gotstdin = 1;
1251 } else if (stat(argv[1], &stb2) != 0)
1252 bb_perror_msg_and_die("Couldn't stat %s", argv[1]);
1253 if (gotstdin && (S_ISDIR(stb1.st_mode) || S_ISDIR(stb2.st_mode)))
1254 bb_error_msg_and_die("Can't compare - to a directory");
1255 if (S_ISDIR(stb1.st_mode) && S_ISDIR(stb2.st_mode)) {
1256#ifdef CONFIG_FEATURE_DIFF_DIR
1257 diffdir(argv[0], argv[1]);
1258#else
1259 bb_error_msg_and_die("Directory comparison not supported");
1260#endif
1261 }
1262 else {
1263 if (S_ISDIR(stb1.st_mode)) {
1264 argv[0] = concat_path_file(argv[0], argv[1]);
1265 if (stat(argv[0], &stb1) < 0)
1266 bb_perror_msg_and_die("Couldn't stat %s", argv[0]);
1267 }
1268 if (S_ISDIR(stb2.st_mode)) {
1269 argv[1] = concat_path_file(argv[1], argv[0]);
1270 if (stat(argv[1], &stb2) < 0)
1271 bb_perror_msg_and_die("Couldn't stat %s", argv[1]);
1272 }
1273 print_status(diffreg(argv[0], argv[1], 0), argv[0], argv[1], NULL);
1274 }
1275 exit(status);
1276}
1277