diff options
author | Bernhard Reutner-Fischer <rep.dot.nop@gmail.com> | 2006-04-06 08:15:24 +0000 |
---|---|---|
committer | Bernhard Reutner-Fischer <rep.dot.nop@gmail.com> | 2006-04-06 08:15:24 +0000 |
commit | 693a93608ae354b08583b5b18db4aedfe83e8e07 (patch) | |
tree | f1967971df4effb3958b01f2eb41d4e3ee00efdf | |
parent | 8f7d38970046c0ea53de911084561b79ffb2d6b9 (diff) | |
download | busybox-w32-693a93608ae354b08583b5b18db4aedfe83e8e07.tar.gz busybox-w32-693a93608ae354b08583b5b18db4aedfe83e8e07.tar.bz2 busybox-w32-693a93608ae354b08583b5b18db4aedfe83e8e07.zip |
- move code around to avoid the need for the prototypes.
-rw-r--r-- | coreutils/diff.c | 1144 |
1 files changed, 557 insertions, 587 deletions
diff --git a/coreutils/diff.c b/coreutils/diff.c index b2945dd87..17975ad20 100644 --- a/coreutils/diff.c +++ b/coreutils/diff.c | |||
@@ -3,14 +3,14 @@ | |||
3 | * Mini diff implementation for busybox, adapted from OpenBSD diff. | 3 | * Mini diff implementation for busybox, adapted from OpenBSD diff. |
4 | * | 4 | * |
5 | * Copyright (C) 2006 by Robert Sullivan <cogito.ergo.cogito@hotmail.com> | 5 | * Copyright (C) 2006 by Robert Sullivan <cogito.ergo.cogito@hotmail.com> |
6 | * | 6 | * |
7 | * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. | 7 | * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. |
8 | */ | 8 | */ |
9 | 9 | ||
10 | /* | 10 | /* |
11 | * Copyright (c) 2003 Todd C. Miller <Todd.Miller@courtesan.com> | 11 | * Copyright (c) 2003 Todd C. Miller <Todd.Miller@courtesan.com> |
12 | * | 12 | * |
13 | * Permission to | 13 | * Permission to |
14 | * use, copy, modify, and distribute this software for any | 14 | * use, copy, modify, and distribute this software for any |
15 | * purpose with or without fee is hereby granted, provided that the above | 15 | * purpose with or without fee is hereby granted, provided that the above |
16 | * copyright notice and this permission notice appear in all copies. | 16 | * copyright notice and this permission notice appear in all copies. |
@@ -71,8 +71,8 @@ | |||
71 | * D_SKIPPED2 - skipped path2 as it is a special file | 71 | * D_SKIPPED2 - skipped path2 as it is a special file |
72 | */ | 72 | */ |
73 | 73 | ||
74 | #define D_SAME 0 | 74 | #define D_SAME 0 |
75 | #define D_DIFFER 1 | 75 | #define D_DIFFER (1<<0) |
76 | #define D_BINARY (1<<1) | 76 | #define D_BINARY (1<<1) |
77 | #define D_COMMON (1<<2) | 77 | #define D_COMMON (1<<2) |
78 | #define D_ONLY (1<<3) | 78 | #define D_ONLY (1<<3) |
@@ -84,7 +84,7 @@ | |||
84 | 84 | ||
85 | /* Command line options */ | 85 | /* Command line options */ |
86 | static unsigned long cmd_flags; | 86 | static unsigned long cmd_flags; |
87 | #define FLAG_a 1 | 87 | #define FLAG_a (1<<0) |
88 | #define FLAG_b (1<<1) | 88 | #define FLAG_b (1<<1) |
89 | #define FLAG_d (1<<2) | 89 | #define FLAG_d (1<<2) |
90 | #define FLAG_i (1<<3) | 90 | #define FLAG_i (1<<3) |
@@ -145,207 +145,14 @@ static struct context_vec *context_vec_start; | |||
145 | static struct context_vec *context_vec_end; | 145 | static struct context_vec *context_vec_end; |
146 | static struct context_vec *context_vec_ptr; | 146 | static struct context_vec *context_vec_ptr; |
147 | 147 | ||
148 | static void output(char *, FILE *, char *, FILE *); | 148 | static void print_only(const char *path, size_t dirlen, const char *entry) |
149 | static void check(char *, FILE *, char *, FILE *); | ||
150 | static void uni_range(int, int); | ||
151 | static void dump_unified_vec(FILE *, FILE *); | ||
152 | static void prepare(int, FILE *, off_t); | ||
153 | static void prune(void); | ||
154 | static void equiv(struct line *, int, struct line *, int, int *); | ||
155 | static void unravel(int); | ||
156 | static void unsort(struct line *, int, int *); | ||
157 | static void change(char *, FILE *, char *, FILE *, int, int, int, int); | ||
158 | static void sort(struct line *, int); | ||
159 | static void print_header(const char *, const char *); | ||
160 | static int asciifile(FILE *); | ||
161 | static int fetch(long *, int, int, FILE *, int, int); | ||
162 | static int newcand(int, int, int); | ||
163 | static int search(int *, int, int); | ||
164 | static int skipline(FILE *); | ||
165 | static int stone(int *, int, int *, int *); | ||
166 | static int readhash(FILE *); | ||
167 | static int files_differ(FILE *, FILE *, int); | ||
168 | |||
169 | static int diffreg(char *, char *, int); | ||
170 | static void print_only(const char *, size_t, const char *); | ||
171 | static void print_status(int, char *, char *, char *); | ||
172 | |||
173 | #ifdef CONFIG_FEATURE_DIFF_DIR | ||
174 | static int dir_strcmp(const void *p1, const void *p2) { | ||
175 | return strcmp(*(char * const *)p1, *(char * const *)p2); | ||
176 | } | ||
177 | |||
178 | /* This function adds a filename to dl, the directory listing. */ | ||
179 | |||
180 | static int add_to_dirlist (const char *filename, struct stat *statbuf, void *userdata) { | ||
181 | dl_count++; | ||
182 | dl = xrealloc(dl, dl_count * sizeof(char *)); | ||
183 | dl[dl_count - 1] = bb_xstrdup(filename); | ||
184 | if (cmd_flags & FLAG_r) { | ||
185 | int *pp = (int *) userdata; | ||
186 | int path_len = *pp + 1; | ||
187 | dl[dl_count - 1] = &(dl[dl_count - 1])[path_len]; | ||
188 | } | ||
189 | return TRUE; | ||
190 | } | ||
191 | |||
192 | /* This returns a sorted directory listing. */ | ||
193 | static char **get_dir(char *path) { | ||
194 | |||
195 | int i; | ||
196 | |||
197 | /* Reset dl_count - there's no need to free dl as bb_xrealloc does | ||
198 | * the job nicely. */ | ||
199 | dl_count = 0; | ||
200 | |||
201 | /* If -r has been set, then the recursive_action function will be | ||
202 | * used. Unfortunately, this outputs the root directory along with | ||
203 | * the recursed paths, so use void *userdata to specify the string | ||
204 | * length of the root directory. It can then be removed in | ||
205 | * add_to_dirlist. */ | ||
206 | |||
207 | int path_len = strlen(path); | ||
208 | void *userdata = &path_len; | ||
209 | |||
210 | /* Now fill dl with a listing. */ | ||
211 | if (cmd_flags & FLAG_r) | ||
212 | recursive_action(path, TRUE, TRUE, FALSE, add_to_dirlist, NULL, userdata); | ||
213 | else { | ||
214 | DIR *dp; | ||
215 | struct dirent *ep; | ||
216 | if ((dp = opendir(path)) == NULL) | ||
217 | bb_error_msg("Error reading directory"); | ||
218 | while ((ep = readdir(dp))) { | ||
219 | if ((!strcmp(ep->d_name, "..")) || (!strcmp(ep->d_name, "."))) | ||
220 | continue; | ||
221 | add_to_dirlist(ep->d_name, NULL, NULL); | ||
222 | } | ||
223 | closedir(dp); | ||
224 | } | ||
225 | |||
226 | /* Sort dl alphabetically. */ | ||
227 | qsort(dl, dl_count, sizeof(char *), dir_strcmp); | ||
228 | |||
229 | /* Copy dl so that we can return it. */ | ||
230 | char **retval = xmalloc(dl_count * sizeof(char *)); | ||
231 | for (i = 0; i < dl_count; i++) | ||
232 | retval[i] = bb_xstrdup(dl[i]); | ||
233 | |||
234 | return retval; | ||
235 | } | ||
236 | |||
237 | static void do_diff (char *dir1, char *path1, char *dir2, char *path2) { | ||
238 | |||
239 | int flags = D_HEADER; | ||
240 | int val; | ||
241 | |||
242 | char *fullpath1 = bb_xasprintf("%s/%s", dir1, path1); | ||
243 | char *fullpath2 = bb_xasprintf("%s/%s", dir2, path2); | ||
244 | |||
245 | if (stat(fullpath1, &stb1) != 0) { | ||
246 | flags |= D_EMPTY1; | ||
247 | memset(&stb1, 0, sizeof(stb1)); | ||
248 | fullpath1 = bb_xasprintf("%s/%s", dir1, path2); | ||
249 | } | ||
250 | if (stat(fullpath2, &stb2) != 0) { | ||
251 | flags |= D_EMPTY2; | ||
252 | memset(&stb2, 0, sizeof(stb2)); | ||
253 | stb2.st_mode = stb1.st_mode; | ||
254 | fullpath2 = bb_xasprintf("%s/%s", dir2, path1); | ||
255 | } | ||
256 | |||
257 | if (stb1.st_mode == 0) | ||
258 | stb1.st_mode = stb2.st_mode; | ||
259 | |||
260 | if (S_ISDIR(stb1.st_mode) && S_ISDIR(stb2.st_mode)) { | ||
261 | printf("Common subdirectories: %s and %s\n", fullpath1, fullpath2); | ||
262 | return; | ||
263 | } | ||
264 | |||
265 | if (!S_ISREG(stb1.st_mode) && !S_ISDIR(stb1.st_mode)) | ||
266 | val = D_SKIPPED1; | ||
267 | else if (!S_ISREG(stb2.st_mode) && !S_ISDIR(stb2.st_mode)) | ||
268 | val = D_SKIPPED2; | ||
269 | else | ||
270 | val = diffreg(fullpath1, fullpath2, flags); | ||
271 | |||
272 | print_status(val, fullpath1, fullpath2, NULL); | ||
273 | } | ||
274 | |||
275 | static void diffdir (char *p1, char *p2) { | ||
276 | |||
277 | char **dirlist1, **dirlist2; | ||
278 | char *dp1, *dp2; | ||
279 | int dirlist1_count, dirlist2_count; | ||
280 | int pos; | ||
281 | |||
282 | /* Check for trailing slashes. */ | ||
283 | |||
284 | if (p1[strlen(p1) - 1] == '/') | ||
285 | p1[strlen(p1) - 1] = '\0'; | ||
286 | if (p2[strlen(p2) - 1] == '/') | ||
287 | p2[strlen(p2) - 1] = '\0'; | ||
288 | |||
289 | /* Get directory listings for p1 and p2. */ | ||
290 | |||
291 | dirlist1 = get_dir(p1); | ||
292 | dirlist1_count = dl_count; | ||
293 | dirlist1[dirlist1_count] = NULL; | ||
294 | dirlist2 = get_dir(p2); | ||
295 | dirlist2_count = dl_count; | ||
296 | dirlist2[dirlist2_count] = NULL; | ||
297 | |||
298 | /* If -S was set, find the starting point. */ | ||
299 | if (start) { | ||
300 | while (*dirlist1 != NULL && strcmp(*dirlist1, start) < 0) | ||
301 | dirlist1++; | ||
302 | while (*dirlist2 != NULL && strcmp(*dirlist2, start) < 0) | ||
303 | dirlist2++; | ||
304 | if ((*dirlist1 == NULL) || (*dirlist2 == NULL)) | ||
305 | bb_error_msg("Invalid argument to -S"); | ||
306 | } | ||
307 | |||
308 | /* Now that both dirlist1 and dirlist2 contain sorted directory | ||
309 | * listings, we can start to go through dirlist1. If both listings | ||
310 | * contain the same file, then do a normal diff. Otherwise, behaviour | ||
311 | * is determined by whether the -N flag is set. */ | ||
312 | while (*dirlist1 != NULL || *dirlist2 != NULL) { | ||
313 | dp1 = *dirlist1; | ||
314 | dp2 = *dirlist2; | ||
315 | pos = dp1 == NULL ? 1 : dp2 == NULL ? -1 : strcmp(dp1, dp2); | ||
316 | if (pos == 0) { | ||
317 | do_diff(p1, dp1, p2, dp2); | ||
318 | dirlist1++; | ||
319 | dirlist2++; | ||
320 | } | ||
321 | else if (pos < 0) { | ||
322 | if (cmd_flags & FLAG_N) | ||
323 | do_diff(p1, dp1, p2, NULL); | ||
324 | else | ||
325 | print_only(p1, strlen(p1) + 1, dp1); | ||
326 | dirlist1++; | ||
327 | } | ||
328 | else { | ||
329 | if (cmd_flags & FLAG_N) | ||
330 | do_diff(p1, NULL, p2, dp2); | ||
331 | else | ||
332 | print_only(p2, strlen(p2) + 1, dp2); | ||
333 | dirlist2++; | ||
334 | } | ||
335 | } | ||
336 | } | ||
337 | #endif | ||
338 | |||
339 | static void | ||
340 | print_only(const char *path, size_t dirlen, const char *entry) | ||
341 | { | 149 | { |
342 | if (dirlen > 1) | 150 | if (dirlen > 1) |
343 | dirlen--; | 151 | dirlen--; |
344 | printf("Only in %.*s: %s\n", (int)dirlen, path, entry); | 152 | printf("Only in %.*s: %s\n", (int)dirlen, path, entry); |
345 | } | 153 | } |
346 | 154 | ||
347 | static void | 155 | static void print_status(int val, char *path1, char *path2, char *entry) |
348 | print_status(int val, char *path1, char *path2, char *entry) | ||
349 | { | 156 | { |
350 | switch (val) { | 157 | switch (val) { |
351 | case D_ONLY: | 158 | case D_ONLY: |
@@ -391,175 +198,76 @@ print_status(int val, char *path1, char *path2, char *entry) | |||
391 | } | 198 | } |
392 | 199 | ||
393 | /* | 200 | /* |
394 | * The following code uses an algorithm due to Harold Stone, | 201 | * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578. |
395 | * which finds a pair of longest identical subsequences in | ||
396 | * the two files. | ||
397 | * | ||
398 | * The major goal is to generate the match vector J. | ||
399 | * J[i] is the index of the line in file1 corresponding | ||
400 | * to line i file0. J[i] = 0 if there is no | ||
401 | * such line in file1. | ||
402 | * | ||
403 | * Lines are hashed so as to work in core. All potential | ||
404 | * matches are located by sorting the lines of each file | ||
405 | * on the hash (called ``value''). In particular, this | ||
406 | * collects the equivalence classes in file1 together. | ||
407 | * Subroutine equiv replaces the value of each line in | ||
408 | * file0 by the index of the first element of its | ||
409 | * matching equivalence in (the reordered) file1. | ||
410 | * To save space equiv squeezes file1 into a single | ||
411 | * array member in which the equivalence classes | ||
412 | * are simply concatenated, except that their first | ||
413 | * members are flagged by changing sign. | ||
414 | * | ||
415 | * Next the indices that point into member are unsorted into | ||
416 | * array class according to the original order of file0. | ||
417 | * | ||
418 | * The cleverness lies in routine stone. This marches | ||
419 | * through the lines of file0, developing a vector klist | ||
420 | * of "k-candidates". At step i a k-candidate is a matched | ||
421 | * pair of lines x,y (x in file0 y in file1) such that | ||
422 | * there is a common subsequence of length k | ||
423 | * between the first i lines of file0 and the first y | ||
424 | * lines of file1, but there is no such subsequence for | ||
425 | * any smaller y. x is the earliest possible mate to y | ||
426 | * that occurs in such a subsequence. | ||
427 | * | ||
428 | * Whenever any of the members of the equivalence class of | ||
429 | * lines in file1 matable to a line in file0 has serial number | ||
430 | * less than the y of some k-candidate, that k-candidate | ||
431 | * with the smallest such y is replaced. The new | ||
432 | * k-candidate is chained (via pred) to the current | ||
433 | * k-1 candidate so that the actual subsequence can | ||
434 | * be recovered. When a member has serial number greater | ||
435 | * that the y of all k-candidates, the klist is extended. | ||
436 | * At the end, the longest subsequence is pulled out | ||
437 | * and placed in the array J by unravel | ||
438 | * | ||
439 | * With J in hand, the matches there recorded are | ||
440 | * checked against reality to assure that no spurious | ||
441 | * matches have crept in due to hashing. If they have, | ||
442 | * they are broken, and "jackpot" is recorded--a harmless | ||
443 | * matter except that a true match for a spuriously | ||
444 | * mated line may now be unnecessarily reported as a change. | ||
445 | * | ||
446 | * Much of the complexity of the program comes simply | ||
447 | * from trying to minimize core utilization and | ||
448 | * maximize the range of doable problems by dynamically | ||
449 | * allocating what is needed and reusing what is not. | ||
450 | * The core requirements for problems larger than somewhat | ||
451 | * are (in words) 2*length(file0) + length(file1) + | ||
452 | * 3*(number of k-candidates installed), typically about | ||
453 | * 6n words for files of length n. | ||
454 | */ | 202 | */ |
455 | 203 | static int readhash(FILE *f) | |
456 | static int diffreg(char *ofile1, char *ofile2, int flags) | ||
457 | { | 204 | { |
458 | char *file1 = ofile1; | 205 | int i, t, space; |
459 | char *file2 = ofile2; | 206 | int sum; |
460 | FILE *f1 = NULL; | ||
461 | FILE *f2 = NULL; | ||
462 | int rval = D_SAME; | ||
463 | int i; | ||
464 | |||
465 | anychange = 0; | ||
466 | context_vec_ptr = context_vec_start - 1; | ||
467 | |||
468 | if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode)) | ||
469 | return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2); | ||
470 | if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0) | ||
471 | goto closem; | ||
472 | |||
473 | if (flags & D_EMPTY1) | ||
474 | f1 = bb_xfopen(_PATH_DEVNULL, "r"); | ||
475 | else { | ||
476 | if (strcmp(file1, "-") == 0) | ||
477 | f1 = stdin; | ||
478 | else | ||
479 | f1 = bb_xfopen(file1, "r"); | ||
480 | } | ||
481 | 207 | ||
482 | if (flags & D_EMPTY2) | 208 | sum = 1; |
483 | f2 = bb_xfopen(_PATH_DEVNULL, "r"); | 209 | space = 0; |
484 | else { | 210 | if (!(cmd_flags & FLAG_b) && !(cmd_flags & FLAG_w)) { |
485 | if (strcmp(file2, "-") == 0) | 211 | if (FLAG_i) |
486 | f2 = stdin; | 212 | for (i = 0; (t = getc(f)) != '\n'; i++) { |
213 | if (t == EOF) { | ||
214 | if (i == 0) | ||
215 | return (0); | ||
216 | break; | ||
217 | } | ||
218 | sum = sum * 127 + t; | ||
219 | } | ||
487 | else | 220 | else |
488 | f2 = bb_xfopen(file2, "r"); | 221 | for (i = 0; (t = getc(f)) != '\n'; i++) { |
489 | } | 222 | if (t == EOF) { |
490 | 223 | if (i == 0) | |
491 | switch (files_differ(f1, f2, flags)) { | 224 | return (0); |
492 | case 0: | 225 | break; |
493 | goto closem; | 226 | } |
494 | case 1: | 227 | sum = sum * 127 + t; |
495 | break; | 228 | } |
496 | default: | 229 | } else { |
497 | /* error */ | 230 | for (i = 0;;) { |
498 | status |= 2; | 231 | switch (t = getc(f)) { |
499 | goto closem; | 232 | case '\t': |
500 | } | 233 | case '\r': |
501 | 234 | case '\v': | |
502 | if (!asciifile(f1) || !asciifile(f2)) { | 235 | case '\f': |
503 | rval = D_BINARY; | 236 | case ' ': |
504 | status |= 1; | 237 | space++; |
505 | goto closem; | 238 | continue; |
239 | default: | ||
240 | if (space && !(cmd_flags & FLAG_w)) { | ||
241 | i++; | ||
242 | space = 0; | ||
243 | } | ||
244 | sum = sum * 127 + t; | ||
245 | i++; | ||
246 | continue; | ||
247 | case EOF: | ||
248 | if (i == 0) | ||
249 | return (0); | ||
250 | /* FALLTHROUGH */ | ||
251 | case '\n': | ||
252 | break; | ||
253 | } | ||
254 | break; | ||
255 | } | ||
506 | } | 256 | } |
257 | /* | ||
258 | * There is a remote possibility that we end up with a zero sum. | ||
259 | * Zero is used as an EOF marker, so return 1 instead. | ||
260 | */ | ||
261 | return (sum == 0 ? 1 : sum); | ||
262 | } | ||
507 | 263 | ||
508 | prepare(0, f1, stb1.st_size); | ||
509 | prepare(1, f2, stb2.st_size); | ||
510 | prune(); | ||
511 | sort(sfile[0], slen[0]); | ||
512 | sort(sfile[1], slen[1]); | ||
513 | |||
514 | member = (int *)file[1]; | ||
515 | equiv(sfile[0], slen[0], sfile[1], slen[1], member); | ||
516 | member = xrealloc(member, (slen[1] + 2) * sizeof(int)); | ||
517 | |||
518 | class = (int *)file[0]; | ||
519 | unsort(sfile[0], slen[0], class); | ||
520 | class = xrealloc(class, (slen[0] + 2) * sizeof(int)); | ||
521 | |||
522 | klist = xmalloc((slen[0] + 2) * sizeof(int)); | ||
523 | clen = 0; | ||
524 | clistlen = 100; | ||
525 | clist = xmalloc(clistlen * sizeof(struct cand)); | ||
526 | i = stone(class, slen[0], member, klist); | ||
527 | free(member); | ||
528 | free(class); | ||
529 | 264 | ||
530 | J = xrealloc(J, (len[0] + 2) * sizeof(int)); | ||
531 | unravel(klist[i]); | ||
532 | free(clist); | ||
533 | free(klist); | ||
534 | |||
535 | ixold = xrealloc(ixold, (len[0] + 2) * sizeof(long)); | ||
536 | ixnew = xrealloc(ixnew, (len[1] + 2) * sizeof(long)); | ||
537 | check(file1, f1, file2, f2); | ||
538 | output(file1, f1, file2, f2); | ||
539 | |||
540 | closem: | ||
541 | if (anychange) { | ||
542 | status |= 1; | ||
543 | if (rval == D_SAME) | ||
544 | rval = D_DIFFER; | ||
545 | } | ||
546 | if (f1 != NULL) | ||
547 | fclose(f1); | ||
548 | if (f2 != NULL) | ||
549 | fclose(f2); | ||
550 | if (file1 != ofile1) | ||
551 | free(file1); | ||
552 | if (file2 != ofile2) | ||
553 | free(file2); | ||
554 | return (rval); | ||
555 | } | ||
556 | 265 | ||
557 | /* | 266 | /* |
558 | * Check to see if the given files differ. | 267 | * Check to see if the given files differ. |
559 | * Returns 0 if they are the same, 1 if different, and -1 on error. | 268 | * Returns 0 if they are the same, 1 if different, and -1 on error. |
560 | */ | 269 | */ |
561 | static int | 270 | static int files_differ(FILE *f1, FILE *f2, int flags) |
562 | files_differ(FILE *f1, FILE *f2, int flags) | ||
563 | { | 271 | { |
564 | char buf1[BUFSIZ], buf2[BUFSIZ]; | 272 | char buf1[BUFSIZ], buf2[BUFSIZ]; |
565 | size_t i, j; | 273 | size_t i, j; |
@@ -582,8 +290,7 @@ files_differ(FILE *f1, FILE *f2, int flags) | |||
582 | } | 290 | } |
583 | } | 291 | } |
584 | 292 | ||
585 | static void | 293 | static void prepare(int i, FILE *fd, off_t filesize) |
586 | prepare(int i, FILE *fd, off_t filesize) | ||
587 | { | 294 | { |
588 | struct line *p; | 295 | struct line *p; |
589 | int j, h; | 296 | int j, h; |
@@ -607,8 +314,7 @@ prepare(int i, FILE *fd, off_t filesize) | |||
607 | file[i] = p; | 314 | file[i] = p; |
608 | } | 315 | } |
609 | 316 | ||
610 | static void | 317 | static void prune(void) |
611 | prune(void) | ||
612 | { | 318 | { |
613 | int i, j; | 319 | int i, j; |
614 | 320 | ||
@@ -628,8 +334,7 @@ prune(void) | |||
628 | } | 334 | } |
629 | } | 335 | } |
630 | 336 | ||
631 | static void | 337 | static void equiv(struct line *a, int n, struct line *b, int m, int *c) |
632 | equiv(struct line *a, int n, struct line *b, int m, int *c) | ||
633 | { | 338 | { |
634 | int i, j; | 339 | int i, j; |
635 | 340 | ||
@@ -658,8 +363,8 @@ equiv(struct line *a, int n, struct line *b, int m, int *c) | |||
658 | 363 | ||
659 | static int isqrt(int n) { | 364 | static int isqrt(int n) { |
660 | int y, x = 1; | 365 | int y, x = 1; |
661 | if (n == 0) return(0); | 366 | if (n == 0) return(0); |
662 | 367 | ||
663 | do { | 368 | do { |
664 | y = x; | 369 | y = x; |
665 | x = n / x; | 370 | x = n / x; |
@@ -670,8 +375,48 @@ static int isqrt(int n) { | |||
670 | return (x); | 375 | return (x); |
671 | } | 376 | } |
672 | 377 | ||
673 | static int | 378 | |
674 | stone(int *a, int n, int *b, int *c) | 379 | static int newcand(int x, int y, int pred) |
380 | { | ||
381 | struct cand *q; | ||
382 | |||
383 | if (clen == clistlen) { | ||
384 | clistlen = clistlen * 11 / 10; | ||
385 | clist = xrealloc(clist, clistlen * sizeof(struct cand)); | ||
386 | } | ||
387 | q = clist + clen; | ||
388 | q->x = x; | ||
389 | q->y = y; | ||
390 | q->pred = pred; | ||
391 | return (clen++); | ||
392 | } | ||
393 | |||
394 | |||
395 | static int search(int *c, int k, int y) | ||
396 | { | ||
397 | int i, j, l, t; | ||
398 | |||
399 | if (clist[c[k]].y < y) /* quick look for typical case */ | ||
400 | return (k + 1); | ||
401 | i = 0; | ||
402 | j = k + 1; | ||
403 | while (1) { | ||
404 | l = i + j; | ||
405 | if ((l >>= 1) <= i) | ||
406 | break; | ||
407 | t = clist[c[l]].y; | ||
408 | if (t > y) | ||
409 | j = l; | ||
410 | else if (t < y) | ||
411 | i = l; | ||
412 | else | ||
413 | return (l); | ||
414 | } | ||
415 | return (l + 1); | ||
416 | } | ||
417 | |||
418 | |||
419 | static int stone(int *a, int n, int *b, int *c) | ||
675 | { | 420 | { |
676 | int i, k, y, j, l; | 421 | int i, k, y, j, l; |
677 | int oldc, tc, oldl; | 422 | int oldc, tc, oldl; |
@@ -715,67 +460,48 @@ stone(int *a, int n, int *b, int *c) | |||
715 | return (k); | 460 | return (k); |
716 | } | 461 | } |
717 | 462 | ||
718 | static int | 463 | static void unravel(int p) |
719 | newcand(int x, int y, int pred) | ||
720 | { | 464 | { |
721 | struct cand *q; | 465 | struct cand *q; |
466 | int i; | ||
722 | 467 | ||
723 | if (clen == clistlen) { | 468 | for (i = 0; i <= len[0]; i++) |
724 | clistlen = clistlen * 11 / 10; | 469 | J[i] = i <= pref ? i : |
725 | clist = xrealloc(clist, clistlen * sizeof(struct cand)); | 470 | i > len[0] - suff ? i + len[1] - len[0] : 0; |
726 | } | 471 | for (q = clist + p; q->y != 0; q = clist + q->pred) |
727 | q = clist + clen; | 472 | J[q->x + pref] = q->y + pref; |
728 | q->x = x; | ||
729 | q->y = y; | ||
730 | q->pred = pred; | ||
731 | return (clen++); | ||
732 | } | 473 | } |
733 | 474 | ||
734 | static int | 475 | |
735 | search(int *c, int k, int y) | 476 | static void unsort(struct line *f, int l, int *b) |
736 | { | 477 | { |
737 | int i, j, l, t; | 478 | int *a, i; |
738 | 479 | ||
739 | if (clist[c[k]].y < y) /* quick look for typical case */ | 480 | a = xmalloc((l + 1) * sizeof(int)); |
740 | return (k + 1); | 481 | for (i = 1; i <= l; i++) |
741 | i = 0; | 482 | a[f[i].serial] = f[i].value; |
742 | j = k + 1; | 483 | for (i = 1; i <= l; i++) |
743 | while (1) { | 484 | b[i] = a[i]; |
744 | l = i + j; | 485 | free(a); |
745 | if ((l >>= 1) <= i) | ||
746 | break; | ||
747 | t = clist[c[l]].y; | ||
748 | if (t > y) | ||
749 | j = l; | ||
750 | else if (t < y) | ||
751 | i = l; | ||
752 | else | ||
753 | return (l); | ||
754 | } | ||
755 | return (l + 1); | ||
756 | } | 486 | } |
757 | 487 | ||
758 | static void | 488 | static int skipline(FILE *f) |
759 | unravel(int p) | ||
760 | { | 489 | { |
761 | struct cand *q; | 490 | int i, c; |
762 | int i; | ||
763 | 491 | ||
764 | for (i = 0; i <= len[0]; i++) | 492 | for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++) |
765 | J[i] = i <= pref ? i : | 493 | continue; |
766 | i > len[0] - suff ? i + len[1] - len[0] : 0; | 494 | return (i); |
767 | for (q = clist + p; q->y != 0; q = clist + q->pred) | ||
768 | J[q->x + pref] = q->y + pref; | ||
769 | } | 495 | } |
770 | 496 | ||
497 | |||
771 | /* | 498 | /* |
772 | * Check does double duty: | 499 | * Check does double duty: |
773 | * 1. ferret out any fortuitous correspondences due | 500 | * 1. ferret out any fortuitous correspondences due |
774 | * to confounding by hashing (which result in "jackpot") | 501 | * to confounding by hashing (which result in "jackpot") |
775 | * 2. collect random access indexes to the two files | 502 | * 2. collect random access indexes to the two files |
776 | */ | 503 | */ |
777 | static void | 504 | static void check(char *file1, FILE *f1, char *file2, FILE *f2) |
778 | check(char *file1, FILE *f1, char *file2, FILE *f2) | ||
779 | { | 505 | { |
780 | int i, j, jackpot, c, d; | 506 | int i, j, jackpot, c, d; |
781 | long ctold, ctnew; | 507 | long ctold, ctnew; |
@@ -868,8 +594,7 @@ check(char *file1, FILE *f1, char *file2, FILE *f2) | |||
868 | } | 594 | } |
869 | 595 | ||
870 | /* shellsort CACM #201 */ | 596 | /* shellsort CACM #201 */ |
871 | static void | 597 | static void sort(struct line *a, int n) |
872 | sort(struct line *a, int n) | ||
873 | { | 598 | { |
874 | struct line *ai, *aim, w; | 599 | struct line *ai, *aim, w; |
875 | int j, m = 0, k; | 600 | int j, m = 0, k; |
@@ -900,57 +625,6 @@ sort(struct line *a, int n) | |||
900 | } | 625 | } |
901 | } | 626 | } |
902 | 627 | ||
903 | static void | ||
904 | unsort(struct line *f, int l, int *b) | ||
905 | { | ||
906 | int *a, i; | ||
907 | |||
908 | a = xmalloc((l + 1) * sizeof(int)); | ||
909 | for (i = 1; i <= l; i++) | ||
910 | a[f[i].serial] = f[i].value; | ||
911 | for (i = 1; i <= l; i++) | ||
912 | b[i] = a[i]; | ||
913 | free(a); | ||
914 | } | ||
915 | |||
916 | static int | ||
917 | skipline(FILE *f) | ||
918 | { | ||
919 | int i, c; | ||
920 | |||
921 | for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++) | ||
922 | continue; | ||
923 | return (i); | ||
924 | } | ||
925 | |||
926 | static void | ||
927 | output(char *file1, FILE *f1, char *file2, FILE *f2) | ||
928 | { | ||
929 | int m, i0, i1, j0, j1; | ||
930 | |||
931 | rewind(f1); | ||
932 | rewind(f2); | ||
933 | m = len[0]; | ||
934 | J[0] = 0; | ||
935 | J[m + 1] = len[1] + 1; | ||
936 | for (i0 = 1; i0 <= m; i0 = i1 + 1) { | ||
937 | while (i0 <= m && J[i0] == J[i0 - 1] + 1) | ||
938 | i0++; | ||
939 | j0 = J[i0 - 1] + 1; | ||
940 | i1 = i0 - 1; | ||
941 | while (i1 < m && J[i1 + 1] == 0) | ||
942 | i1++; | ||
943 | j1 = J[i1 + 1] - 1; | ||
944 | J[i1] = j1; | ||
945 | change(file1, f1, file2, f2, i0, i1, j0, j1); | ||
946 | } | ||
947 | if (m == 0) { | ||
948 | change(file1, f1, file2, f2, 1, 0, 1, len[1]); | ||
949 | } | ||
950 | if (anychange != 0) { | ||
951 | dump_unified_vec(f1, f2); | ||
952 | } | ||
953 | } | ||
954 | 628 | ||
955 | static void uni_range(int a, int b) | 629 | static void uni_range(int a, int b) |
956 | { | 630 | { |
@@ -962,57 +636,7 @@ static void uni_range(int a, int b) | |||
962 | printf("%d,0", b); | 636 | printf("%d,0", b); |
963 | } | 637 | } |
964 | 638 | ||
965 | /* | 639 | static int fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile) |
966 | * Indicate that there is a difference between lines a and b of the from file | ||
967 | * to get to lines c to d of the to file. If a is greater then b then there | ||
968 | * are no lines in the from file involved and this means that there were | ||
969 | * lines appended (beginning at b). If c is greater than d then there are | ||
970 | * lines missing from the to file. | ||
971 | */ | ||
972 | static void | ||
973 | change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d) | ||
974 | { | ||
975 | static size_t max_context = 64; | ||
976 | |||
977 | if (a > b && c > d) return; | ||
978 | if (cmd_flags & FLAG_q) return; | ||
979 | |||
980 | /* | ||
981 | * Allocate change records as needed. | ||
982 | */ | ||
983 | if (context_vec_ptr == context_vec_end - 1) { | ||
984 | ptrdiff_t offset = context_vec_ptr - context_vec_start; | ||
985 | max_context <<= 1; | ||
986 | context_vec_start = xrealloc(context_vec_start, | ||
987 | max_context * sizeof(struct context_vec)); | ||
988 | context_vec_end = context_vec_start + max_context; | ||
989 | context_vec_ptr = context_vec_start + offset; | ||
990 | } | ||
991 | if (anychange == 0) { | ||
992 | /* | ||
993 | * Print the context/unidiff header first time through. | ||
994 | */ | ||
995 | print_header(file1, file2); | ||
996 | anychange = 1; | ||
997 | } else if (a > context_vec_ptr->b + (2 * context) + 1 && | ||
998 | c > context_vec_ptr->d + (2 * context) + 1) { | ||
999 | /* | ||
1000 | * If this change is more than 'context' lines from the | ||
1001 | * previous change, dump the record and reset it. | ||
1002 | */ | ||
1003 | dump_unified_vec(f1, f2); | ||
1004 | } | ||
1005 | context_vec_ptr++; | ||
1006 | context_vec_ptr->a = a; | ||
1007 | context_vec_ptr->b = b; | ||
1008 | context_vec_ptr->c = c; | ||
1009 | context_vec_ptr->d = d; | ||
1010 | return; | ||
1011 | |||
1012 | } | ||
1013 | |||
1014 | static int | ||
1015 | fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile) | ||
1016 | { | 640 | { |
1017 | int i, j, c, lastc, col, nc; | 641 | int i, j, c, lastc, col, nc; |
1018 | 642 | ||
@@ -1045,81 +669,15 @@ fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile) | |||
1045 | return (0); | 669 | return (0); |
1046 | } | 670 | } |
1047 | 671 | ||
1048 | /* | 672 | static int asciifile(FILE *f) |
1049 | * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578. | ||
1050 | */ | ||
1051 | static int | ||
1052 | readhash(FILE *f) | ||
1053 | { | 673 | { |
1054 | int i, t, space; | ||
1055 | int sum; | ||
1056 | 674 | ||
1057 | sum = 1; | 675 | if ((cmd_flags & FLAG_a) || f == NULL) |
1058 | space = 0; | 676 | return (1); |
1059 | if (!(cmd_flags & FLAG_b) && !(cmd_flags & FLAG_w)) { | ||
1060 | if (FLAG_i) | ||
1061 | for (i = 0; (t = getc(f)) != '\n'; i++) { | ||
1062 | if (t == EOF) { | ||
1063 | if (i == 0) | ||
1064 | return (0); | ||
1065 | break; | ||
1066 | } | ||
1067 | sum = sum * 127 + t; | ||
1068 | } | ||
1069 | else | ||
1070 | for (i = 0; (t = getc(f)) != '\n'; i++) { | ||
1071 | if (t == EOF) { | ||
1072 | if (i == 0) | ||
1073 | return (0); | ||
1074 | break; | ||
1075 | } | ||
1076 | sum = sum * 127 + t; | ||
1077 | } | ||
1078 | } else { | ||
1079 | for (i = 0;;) { | ||
1080 | switch (t = getc(f)) { | ||
1081 | case '\t': | ||
1082 | case '\r': | ||
1083 | case '\v': | ||
1084 | case '\f': | ||
1085 | case ' ': | ||
1086 | space++; | ||
1087 | continue; | ||
1088 | default: | ||
1089 | if (space && !(cmd_flags & FLAG_w)) { | ||
1090 | i++; | ||
1091 | space = 0; | ||
1092 | } | ||
1093 | sum = sum * 127 + t; | ||
1094 | i++; | ||
1095 | continue; | ||
1096 | case EOF: | ||
1097 | if (i == 0) | ||
1098 | return (0); | ||
1099 | /* FALLTHROUGH */ | ||
1100 | case '\n': | ||
1101 | break; | ||
1102 | } | ||
1103 | break; | ||
1104 | } | ||
1105 | } | ||
1106 | /* | ||
1107 | * There is a remote possibility that we end up with a zero sum. | ||
1108 | * Zero is used as an EOF marker, so return 1 instead. | ||
1109 | */ | ||
1110 | return (sum == 0 ? 1 : sum); | ||
1111 | } | ||
1112 | |||
1113 | static int | ||
1114 | asciifile(FILE *f) | ||
1115 | { | ||
1116 | |||
1117 | if ((cmd_flags & FLAG_a) || f == NULL) | ||
1118 | return (1); | ||
1119 | #ifdef CONFIG_FEATURE_DIFF_BINARY | 677 | #ifdef CONFIG_FEATURE_DIFF_BINARY |
1120 | unsigned char buf[BUFSIZ]; | 678 | unsigned char buf[BUFSIZ]; |
1121 | int i, cnt; | 679 | int i, cnt; |
1122 | 680 | ||
1123 | rewind(f); | 681 | rewind(f); |
1124 | cnt = fread(buf, 1, sizeof(buf), f); | 682 | cnt = fread(buf, 1, sizeof(buf), f); |
1125 | for (i = 0; i < cnt; i++) | 683 | for (i = 0; i < cnt; i++) |
@@ -1130,8 +688,7 @@ asciifile(FILE *f) | |||
1130 | } | 688 | } |
1131 | 689 | ||
1132 | /* dump accumulated "unified" diff changes */ | 690 | /* dump accumulated "unified" diff changes */ |
1133 | static void | 691 | static void dump_unified_vec(FILE *f1, FILE *f2) |
1134 | dump_unified_vec(FILE *f1, FILE *f2) | ||
1135 | { | 692 | { |
1136 | struct context_vec *cvp = context_vec_start; | 693 | struct context_vec *cvp = context_vec_start; |
1137 | int lowa, upb, lowc, upd; | 694 | int lowa, upb, lowc, upd; |
@@ -1197,8 +754,8 @@ dump_unified_vec(FILE *f1, FILE *f2) | |||
1197 | context_vec_ptr = context_vec_start - 1; | 754 | context_vec_ptr = context_vec_start - 1; |
1198 | } | 755 | } |
1199 | 756 | ||
1200 | static void | 757 | |
1201 | print_header(const char *file1, const char *file2) | 758 | static void print_header(const char *file1, const char *file2) |
1202 | { | 759 | { |
1203 | if (label[0] != NULL) | 760 | if (label[0] != NULL) |
1204 | printf("%s %s\n", "---", | 761 | printf("%s %s\n", "---", |
@@ -1214,6 +771,419 @@ print_header(const char *file1, const char *file2) | |||
1214 | file2, ctime(&stb2.st_mtime)); | 771 | file2, ctime(&stb2.st_mtime)); |
1215 | } | 772 | } |
1216 | 773 | ||
774 | |||
775 | |||
776 | /* | ||
777 | * Indicate that there is a difference between lines a and b of the from file | ||
778 | * to get to lines c to d of the to file. If a is greater then b then there | ||
779 | * are no lines in the from file involved and this means that there were | ||
780 | * lines appended (beginning at b). If c is greater than d then there are | ||
781 | * lines missing from the to file. | ||
782 | */ | ||
783 | static void change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d) | ||
784 | { | ||
785 | static size_t max_context = 64; | ||
786 | |||
787 | if (a > b && c > d) return; | ||
788 | if (cmd_flags & FLAG_q) return; | ||
789 | |||
790 | /* | ||
791 | * Allocate change records as needed. | ||
792 | */ | ||
793 | if (context_vec_ptr == context_vec_end - 1) { | ||
794 | ptrdiff_t offset = context_vec_ptr - context_vec_start; | ||
795 | max_context <<= 1; | ||
796 | context_vec_start = xrealloc(context_vec_start, | ||
797 | max_context * sizeof(struct context_vec)); | ||
798 | context_vec_end = context_vec_start + max_context; | ||
799 | context_vec_ptr = context_vec_start + offset; | ||
800 | } | ||
801 | if (anychange == 0) { | ||
802 | /* | ||
803 | * Print the context/unidiff header first time through. | ||
804 | */ | ||
805 | print_header(file1, file2); | ||
806 | anychange = 1; | ||
807 | } else if (a > context_vec_ptr->b + (2 * context) + 1 && | ||
808 | c > context_vec_ptr->d + (2 * context) + 1) { | ||
809 | /* | ||
810 | * If this change is more than 'context' lines from the | ||
811 | * previous change, dump the record and reset it. | ||
812 | */ | ||
813 | dump_unified_vec(f1, f2); | ||
814 | } | ||
815 | context_vec_ptr++; | ||
816 | context_vec_ptr->a = a; | ||
817 | context_vec_ptr->b = b; | ||
818 | context_vec_ptr->c = c; | ||
819 | context_vec_ptr->d = d; | ||
820 | return; | ||
821 | |||
822 | } | ||
823 | |||
824 | |||
825 | static void output(char *file1, FILE *f1, char *file2, FILE *f2) | ||
826 | { | ||
827 | int m, i0, i1, j0, j1; | ||
828 | |||
829 | rewind(f1); | ||
830 | rewind(f2); | ||
831 | m = len[0]; | ||
832 | J[0] = 0; | ||
833 | J[m + 1] = len[1] + 1; | ||
834 | for (i0 = 1; i0 <= m; i0 = i1 + 1) { | ||
835 | while (i0 <= m && J[i0] == J[i0 - 1] + 1) | ||
836 | i0++; | ||
837 | j0 = J[i0 - 1] + 1; | ||
838 | i1 = i0 - 1; | ||
839 | while (i1 < m && J[i1 + 1] == 0) | ||
840 | i1++; | ||
841 | j1 = J[i1 + 1] - 1; | ||
842 | J[i1] = j1; | ||
843 | change(file1, f1, file2, f2, i0, i1, j0, j1); | ||
844 | } | ||
845 | if (m == 0) { | ||
846 | change(file1, f1, file2, f2, 1, 0, 1, len[1]); | ||
847 | } | ||
848 | if (anychange != 0) { | ||
849 | dump_unified_vec(f1, f2); | ||
850 | } | ||
851 | } | ||
852 | |||
853 | /* | ||
854 | * The following code uses an algorithm due to Harold Stone, | ||
855 | * which finds a pair of longest identical subsequences in | ||
856 | * the two files. | ||
857 | * | ||
858 | * The major goal is to generate the match vector J. | ||
859 | * J[i] is the index of the line in file1 corresponding | ||
860 | * to line i file0. J[i] = 0 if there is no | ||
861 | * such line in file1. | ||
862 | * | ||
863 | * Lines are hashed so as to work in core. All potential | ||
864 | * matches are located by sorting the lines of each file | ||
865 | * on the hash (called ``value''). In particular, this | ||
866 | * collects the equivalence classes in file1 together. | ||
867 | * Subroutine equiv replaces the value of each line in | ||
868 | * file0 by the index of the first element of its | ||
869 | * matching equivalence in (the reordered) file1. | ||
870 | * To save space equiv squeezes file1 into a single | ||
871 | * array member in which the equivalence classes | ||
872 | * are simply concatenated, except that their first | ||
873 | * members are flagged by changing sign. | ||
874 | * | ||
875 | * Next the indices that point into member are unsorted into | ||
876 | * array class according to the original order of file0. | ||
877 | * | ||
878 | * The cleverness lies in routine stone. This marches | ||
879 | * through the lines of file0, developing a vector klist | ||
880 | * of "k-candidates". At step i a k-candidate is a matched | ||
881 | * pair of lines x,y (x in file0 y in file1) such that | ||
882 | * there is a common subsequence of length k | ||
883 | * between the first i lines of file0 and the first y | ||
884 | * lines of file1, but there is no such subsequence for | ||
885 | * any smaller y. x is the earliest possible mate to y | ||
886 | * that occurs in such a subsequence. | ||
887 | * | ||
888 | * Whenever any of the members of the equivalence class of | ||
889 | * lines in file1 matable to a line in file0 has serial number | ||
890 | * less than the y of some k-candidate, that k-candidate | ||
891 | * with the smallest such y is replaced. The new | ||
892 | * k-candidate is chained (via pred) to the current | ||
893 | * k-1 candidate so that the actual subsequence can | ||
894 | * be recovered. When a member has serial number greater | ||
895 | * that the y of all k-candidates, the klist is extended. | ||
896 | * At the end, the longest subsequence is pulled out | ||
897 | * and placed in the array J by unravel | ||
898 | * | ||
899 | * With J in hand, the matches there recorded are | ||
900 | * checked against reality to assure that no spurious | ||
901 | * matches have crept in due to hashing. If they have, | ||
902 | * they are broken, and "jackpot" is recorded--a harmless | ||
903 | * matter except that a true match for a spuriously | ||
904 | * mated line may now be unnecessarily reported as a change. | ||
905 | * | ||
906 | * Much of the complexity of the program comes simply | ||
907 | * from trying to minimize core utilization and | ||
908 | * maximize the range of doable problems by dynamically | ||
909 | * allocating what is needed and reusing what is not. | ||
910 | * The core requirements for problems larger than somewhat | ||
911 | * are (in words) 2*length(file0) + length(file1) + | ||
912 | * 3*(number of k-candidates installed), typically about | ||
913 | * 6n words for files of length n. | ||
914 | */ | ||
915 | |||
916 | static int diffreg(char *ofile1, char *ofile2, int flags) | ||
917 | { | ||
918 | char *file1 = ofile1; | ||
919 | char *file2 = ofile2; | ||
920 | FILE *f1 = NULL; | ||
921 | FILE *f2 = NULL; | ||
922 | int rval = D_SAME; | ||
923 | int i; | ||
924 | |||
925 | anychange = 0; | ||
926 | context_vec_ptr = context_vec_start - 1; | ||
927 | |||
928 | if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode)) | ||
929 | return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2); | ||
930 | if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0) | ||
931 | goto closem; | ||
932 | |||
933 | if (flags & D_EMPTY1) | ||
934 | f1 = bb_xfopen(_PATH_DEVNULL, "r"); | ||
935 | else { | ||
936 | if (strcmp(file1, "-") == 0) | ||
937 | f1 = stdin; | ||
938 | else | ||
939 | f1 = bb_xfopen(file1, "r"); | ||
940 | } | ||
941 | |||
942 | if (flags & D_EMPTY2) | ||
943 | f2 = bb_xfopen(_PATH_DEVNULL, "r"); | ||
944 | else { | ||
945 | if (strcmp(file2, "-") == 0) | ||
946 | f2 = stdin; | ||
947 | else | ||
948 | f2 = bb_xfopen(file2, "r"); | ||
949 | } | ||
950 | |||
951 | switch (files_differ(f1, f2, flags)) { | ||
952 | case 0: | ||
953 | goto closem; | ||
954 | case 1: | ||
955 | break; | ||
956 | default: | ||
957 | /* error */ | ||
958 | status |= 2; | ||
959 | goto closem; | ||
960 | } | ||
961 | |||
962 | if (!asciifile(f1) || !asciifile(f2)) { | ||
963 | rval = D_BINARY; | ||
964 | status |= 1; | ||
965 | goto closem; | ||
966 | } | ||
967 | |||
968 | prepare(0, f1, stb1.st_size); | ||
969 | prepare(1, f2, stb2.st_size); | ||
970 | prune(); | ||
971 | sort(sfile[0], slen[0]); | ||
972 | sort(sfile[1], slen[1]); | ||
973 | |||
974 | member = (int *)file[1]; | ||
975 | equiv(sfile[0], slen[0], sfile[1], slen[1], member); | ||
976 | member = xrealloc(member, (slen[1] + 2) * sizeof(int)); | ||
977 | |||
978 | class = (int *)file[0]; | ||
979 | unsort(sfile[0], slen[0], class); | ||
980 | class = xrealloc(class, (slen[0] + 2) * sizeof(int)); | ||
981 | |||
982 | klist = xmalloc((slen[0] + 2) * sizeof(int)); | ||
983 | clen = 0; | ||
984 | clistlen = 100; | ||
985 | clist = xmalloc(clistlen * sizeof(struct cand)); | ||
986 | i = stone(class, slen[0], member, klist); | ||
987 | free(member); | ||
988 | free(class); | ||
989 | |||
990 | J = xrealloc(J, (len[0] + 2) * sizeof(int)); | ||
991 | unravel(klist[i]); | ||
992 | free(clist); | ||
993 | free(klist); | ||
994 | |||
995 | ixold = xrealloc(ixold, (len[0] + 2) * sizeof(long)); | ||
996 | ixnew = xrealloc(ixnew, (len[1] + 2) * sizeof(long)); | ||
997 | check(file1, f1, file2, f2); | ||
998 | output(file1, f1, file2, f2); | ||
999 | |||
1000 | closem: | ||
1001 | if (anychange) { | ||
1002 | status |= 1; | ||
1003 | if (rval == D_SAME) | ||
1004 | rval = D_DIFFER; | ||
1005 | } | ||
1006 | if (f1 != NULL) | ||
1007 | fclose(f1); | ||
1008 | if (f2 != NULL) | ||
1009 | fclose(f2); | ||
1010 | if (file1 != ofile1) | ||
1011 | free(file1); | ||
1012 | if (file2 != ofile2) | ||
1013 | free(file2); | ||
1014 | return (rval); | ||
1015 | } | ||
1016 | |||
1017 | #if ENABLE_FEATURE_DIFF_DIR | ||
1018 | static void do_diff (char *dir1, char *path1, char *dir2, char *path2) { | ||
1019 | |||
1020 | int flags = D_HEADER; | ||
1021 | int val; | ||
1022 | |||
1023 | char *fullpath1 = bb_xasprintf("%s/%s", dir1, path1); | ||
1024 | char *fullpath2 = bb_xasprintf("%s/%s", dir2, path2); | ||
1025 | |||
1026 | if (stat(fullpath1, &stb1) != 0) { | ||
1027 | flags |= D_EMPTY1; | ||
1028 | memset(&stb1, 0, sizeof(stb1)); | ||
1029 | fullpath1 = bb_xasprintf("%s/%s", dir1, path2); | ||
1030 | } | ||
1031 | if (stat(fullpath2, &stb2) != 0) { | ||
1032 | flags |= D_EMPTY2; | ||
1033 | memset(&stb2, 0, sizeof(stb2)); | ||
1034 | stb2.st_mode = stb1.st_mode; | ||
1035 | fullpath2 = bb_xasprintf("%s/%s", dir2, path1); | ||
1036 | } | ||
1037 | |||
1038 | if (stb1.st_mode == 0) | ||
1039 | stb1.st_mode = stb2.st_mode; | ||
1040 | |||
1041 | if (S_ISDIR(stb1.st_mode) && S_ISDIR(stb2.st_mode)) { | ||
1042 | printf("Common subdirectories: %s and %s\n", fullpath1, fullpath2); | ||
1043 | return; | ||
1044 | } | ||
1045 | |||
1046 | if (!S_ISREG(stb1.st_mode) && !S_ISDIR(stb1.st_mode)) | ||
1047 | val = D_SKIPPED1; | ||
1048 | else if (!S_ISREG(stb2.st_mode) && !S_ISDIR(stb2.st_mode)) | ||
1049 | val = D_SKIPPED2; | ||
1050 | else | ||
1051 | val = diffreg(fullpath1, fullpath2, flags); | ||
1052 | |||
1053 | print_status(val, fullpath1, fullpath2, NULL); | ||
1054 | } | ||
1055 | #endif | ||
1056 | |||
1057 | #ifdef CONFIG_FEATURE_DIFF_DIR | ||
1058 | static int dir_strcmp(const void *p1, const void *p2) { | ||
1059 | return strcmp(*(char * const *)p1, *(char * const *)p2); | ||
1060 | } | ||
1061 | |||
1062 | /* This function adds a filename to dl, the directory listing. */ | ||
1063 | |||
1064 | static int add_to_dirlist (const char *filename, struct stat *statbuf, void *userdata) { | ||
1065 | dl_count++; | ||
1066 | dl = xrealloc(dl, dl_count * sizeof(char *)); | ||
1067 | dl[dl_count - 1] = bb_xstrdup(filename); | ||
1068 | if (cmd_flags & FLAG_r) { | ||
1069 | int *pp = (int *) userdata; | ||
1070 | int path_len = *pp + 1; | ||
1071 | dl[dl_count - 1] = &(dl[dl_count - 1])[path_len]; | ||
1072 | } | ||
1073 | return TRUE; | ||
1074 | } | ||
1075 | |||
1076 | /* This returns a sorted directory listing. */ | ||
1077 | static char **get_dir(char *path) { | ||
1078 | |||
1079 | int i; | ||
1080 | |||
1081 | /* Reset dl_count - there's no need to free dl as bb_xrealloc does | ||
1082 | * the job nicely. */ | ||
1083 | dl_count = 0; | ||
1084 | |||
1085 | /* If -r has been set, then the recursive_action function will be | ||
1086 | * used. Unfortunately, this outputs the root directory along with | ||
1087 | * the recursed paths, so use void *userdata to specify the string | ||
1088 | * length of the root directory. It can then be removed in | ||
1089 | * add_to_dirlist. */ | ||
1090 | |||
1091 | int path_len = strlen(path); | ||
1092 | void *userdata = &path_len; | ||
1093 | |||
1094 | /* Now fill dl with a listing. */ | ||
1095 | if (cmd_flags & FLAG_r) | ||
1096 | recursive_action(path, TRUE, TRUE, FALSE, add_to_dirlist, NULL, userdata); | ||
1097 | else { | ||
1098 | DIR *dp; | ||
1099 | struct dirent *ep; | ||
1100 | if ((dp = opendir(path)) == NULL) | ||
1101 | bb_error_msg("Error reading directory"); | ||
1102 | while ((ep = readdir(dp))) { | ||
1103 | if ((!strcmp(ep->d_name, "..")) || (!strcmp(ep->d_name, "."))) | ||
1104 | continue; | ||
1105 | add_to_dirlist(ep->d_name, NULL, NULL); | ||
1106 | } | ||
1107 | closedir(dp); | ||
1108 | } | ||
1109 | |||
1110 | /* Sort dl alphabetically. */ | ||
1111 | qsort(dl, dl_count, sizeof(char *), dir_strcmp); | ||
1112 | |||
1113 | /* Copy dl so that we can return it. */ | ||
1114 | char **retval = xmalloc(dl_count * sizeof(char *)); | ||
1115 | for (i = 0; i < dl_count; i++) | ||
1116 | retval[i] = bb_xstrdup(dl[i]); | ||
1117 | |||
1118 | return retval; | ||
1119 | } | ||
1120 | |||
1121 | static void diffdir (char *p1, char *p2) { | ||
1122 | |||
1123 | char **dirlist1, **dirlist2; | ||
1124 | char *dp1, *dp2; | ||
1125 | int dirlist1_count, dirlist2_count; | ||
1126 | int pos; | ||
1127 | |||
1128 | /* Check for trailing slashes. */ | ||
1129 | |||
1130 | if (p1[strlen(p1) - 1] == '/') | ||
1131 | p1[strlen(p1) - 1] = '\0'; | ||
1132 | if (p2[strlen(p2) - 1] == '/') | ||
1133 | p2[strlen(p2) - 1] = '\0'; | ||
1134 | |||
1135 | /* Get directory listings for p1 and p2. */ | ||
1136 | |||
1137 | dirlist1 = get_dir(p1); | ||
1138 | dirlist1_count = dl_count; | ||
1139 | dirlist1[dirlist1_count] = NULL; | ||
1140 | dirlist2 = get_dir(p2); | ||
1141 | dirlist2_count = dl_count; | ||
1142 | dirlist2[dirlist2_count] = NULL; | ||
1143 | |||
1144 | /* If -S was set, find the starting point. */ | ||
1145 | if (start) { | ||
1146 | while (*dirlist1 != NULL && strcmp(*dirlist1, start) < 0) | ||
1147 | dirlist1++; | ||
1148 | while (*dirlist2 != NULL && strcmp(*dirlist2, start) < 0) | ||
1149 | dirlist2++; | ||
1150 | if ((*dirlist1 == NULL) || (*dirlist2 == NULL)) | ||
1151 | bb_error_msg("Invalid argument to -S"); | ||
1152 | } | ||
1153 | |||
1154 | /* Now that both dirlist1 and dirlist2 contain sorted directory | ||
1155 | * listings, we can start to go through dirlist1. If both listings | ||
1156 | * contain the same file, then do a normal diff. Otherwise, behaviour | ||
1157 | * is determined by whether the -N flag is set. */ | ||
1158 | while (*dirlist1 != NULL || *dirlist2 != NULL) { | ||
1159 | dp1 = *dirlist1; | ||
1160 | dp2 = *dirlist2; | ||
1161 | pos = dp1 == NULL ? 1 : dp2 == NULL ? -1 : strcmp(dp1, dp2); | ||
1162 | if (pos == 0) { | ||
1163 | do_diff(p1, dp1, p2, dp2); | ||
1164 | dirlist1++; | ||
1165 | dirlist2++; | ||
1166 | } | ||
1167 | else if (pos < 0) { | ||
1168 | if (cmd_flags & FLAG_N) | ||
1169 | do_diff(p1, dp1, p2, NULL); | ||
1170 | else | ||
1171 | print_only(p1, strlen(p1) + 1, dp1); | ||
1172 | dirlist1++; | ||
1173 | } | ||
1174 | else { | ||
1175 | if (cmd_flags & FLAG_N) | ||
1176 | do_diff(p1, NULL, p2, dp2); | ||
1177 | else | ||
1178 | print_only(p2, strlen(p2) + 1, dp2); | ||
1179 | dirlist2++; | ||
1180 | } | ||
1181 | } | ||
1182 | } | ||
1183 | #endif | ||
1184 | |||
1185 | |||
1186 | |||
1217 | extern int diff_main(int argc, char **argv) { | 1187 | extern int diff_main(int argc, char **argv) { |
1218 | char *ep; | 1188 | char *ep; |
1219 | int gotstdin = 0; | 1189 | int gotstdin = 0; |