diff options
Diffstat (limited to 'bzip2recover.c')
-rw-r--r-- | bzip2recover.c | 161 |
1 files changed, 136 insertions, 25 deletions
diff --git a/bzip2recover.c b/bzip2recover.c index ba3d175..286873b 100644 --- a/bzip2recover.c +++ b/bzip2recover.c | |||
@@ -9,7 +9,7 @@ | |||
9 | salvage from damaged files created by the accompanying | 9 | salvage from damaged files created by the accompanying |
10 | bzip2-1.0 program. | 10 | bzip2-1.0 program. |
11 | 11 | ||
12 | Copyright (C) 1996-2000 Julian R Seward. All rights reserved. | 12 | Copyright (C) 1996-2002 Julian R Seward. All rights reserved. |
13 | 13 | ||
14 | Redistribution and use in source and binary forms, with or without | 14 | Redistribution and use in source and binary forms, with or without |
15 | modification, are permitted provided that the following conditions | 15 | modification, are permitted provided that the following conditions |
@@ -57,6 +57,29 @@ | |||
57 | #include <stdlib.h> | 57 | #include <stdlib.h> |
58 | #include <string.h> | 58 | #include <string.h> |
59 | 59 | ||
60 | |||
61 | /* This program records bit locations in the file to be recovered. | ||
62 | That means that if 64-bit ints are not supported, we will not | ||
63 | be able to recover .bz2 files over 512MB (2^32 bits) long. | ||
64 | On GNU supported platforms, we take advantage of the 64-bit | ||
65 | int support to circumvent this problem. Ditto MSVC. | ||
66 | |||
67 | This change occurred in version 1.0.2; all prior versions have | ||
68 | the 512MB limitation. | ||
69 | */ | ||
70 | #ifdef __GNUC__ | ||
71 | typedef unsigned long long int MaybeUInt64; | ||
72 | # define MaybeUInt64_FMT "%Lu" | ||
73 | #else | ||
74 | #ifdef _MSC_VER | ||
75 | typedef unsigned __int64 MaybeUInt64; | ||
76 | # define MaybeUInt64_FMT "%I64u" | ||
77 | #else | ||
78 | typedef unsigned int MaybeUInt64; | ||
79 | # define MaybeUInt64_FMT "%u" | ||
80 | #endif | ||
81 | #endif | ||
82 | |||
60 | typedef unsigned int UInt32; | 83 | typedef unsigned int UInt32; |
61 | typedef int Int32; | 84 | typedef int Int32; |
62 | typedef unsigned char UChar; | 85 | typedef unsigned char UChar; |
@@ -66,13 +89,25 @@ typedef unsigned char Bool; | |||
66 | #define False ((Bool)0) | 89 | #define False ((Bool)0) |
67 | 90 | ||
68 | 91 | ||
69 | Char inFileName[2000]; | 92 | #define BZ_MAX_FILENAME 2000 |
70 | Char outFileName[2000]; | 93 | |
71 | Char progName[2000]; | 94 | Char inFileName[BZ_MAX_FILENAME]; |
95 | Char outFileName[BZ_MAX_FILENAME]; | ||
96 | Char progName[BZ_MAX_FILENAME]; | ||
97 | |||
98 | MaybeUInt64 bytesOut = 0; | ||
99 | MaybeUInt64 bytesIn = 0; | ||
72 | 100 | ||
73 | UInt32 bytesOut = 0; | ||
74 | UInt32 bytesIn = 0; | ||
75 | 101 | ||
102 | /*---------------------------------------------------*/ | ||
103 | /*--- Header bytes ---*/ | ||
104 | /*---------------------------------------------------*/ | ||
105 | |||
106 | #define BZ_HDR_B 0x42 /* 'B' */ | ||
107 | #define BZ_HDR_Z 0x5a /* 'Z' */ | ||
108 | #define BZ_HDR_h 0x68 /* 'h' */ | ||
109 | #define BZ_HDR_0 0x30 /* '0' */ | ||
110 | |||
76 | 111 | ||
77 | /*---------------------------------------------------*/ | 112 | /*---------------------------------------------------*/ |
78 | /*--- I/O errors ---*/ | 113 | /*--- I/O errors ---*/ |
@@ -116,6 +151,23 @@ void mallocFail ( Int32 n ) | |||
116 | } | 151 | } |
117 | 152 | ||
118 | 153 | ||
154 | /*---------------------------------------------*/ | ||
155 | void tooManyBlocks ( Int32 max_handled_blocks ) | ||
156 | { | ||
157 | fprintf ( stderr, | ||
158 | "%s: `%s' appears to contain more than %d blocks\n", | ||
159 | progName, inFileName, max_handled_blocks ); | ||
160 | fprintf ( stderr, | ||
161 | "%s: and cannot be handled. To fix, increase\n", | ||
162 | progName ); | ||
163 | fprintf ( stderr, | ||
164 | "%s: BZ_MAX_HANDLED_BLOCKS in bzip2recover.c, and recompile.\n", | ||
165 | progName ); | ||
166 | exit ( 1 ); | ||
167 | } | ||
168 | |||
169 | |||
170 | |||
119 | /*---------------------------------------------------*/ | 171 | /*---------------------------------------------------*/ |
120 | /*--- Bit stream I/O ---*/ | 172 | /*--- Bit stream I/O ---*/ |
121 | /*---------------------------------------------------*/ | 173 | /*---------------------------------------------------*/ |
@@ -254,27 +306,37 @@ Bool endsInBz2 ( Char* name ) | |||
254 | /*--- ---*/ | 306 | /*--- ---*/ |
255 | /*---------------------------------------------------*/ | 307 | /*---------------------------------------------------*/ |
256 | 308 | ||
309 | /* This logic isn't really right when it comes to Cygwin. */ | ||
310 | #ifdef _WIN32 | ||
311 | # define BZ_SPLIT_SYM '\\' /* path splitter on Windows platform */ | ||
312 | #else | ||
313 | # define BZ_SPLIT_SYM '/' /* path splitter on Unix platform */ | ||
314 | #endif | ||
315 | |||
257 | #define BLOCK_HEADER_HI 0x00003141UL | 316 | #define BLOCK_HEADER_HI 0x00003141UL |
258 | #define BLOCK_HEADER_LO 0x59265359UL | 317 | #define BLOCK_HEADER_LO 0x59265359UL |
259 | 318 | ||
260 | #define BLOCK_ENDMARK_HI 0x00001772UL | 319 | #define BLOCK_ENDMARK_HI 0x00001772UL |
261 | #define BLOCK_ENDMARK_LO 0x45385090UL | 320 | #define BLOCK_ENDMARK_LO 0x45385090UL |
262 | 321 | ||
322 | /* Increase if necessary. However, a .bz2 file with > 50000 blocks | ||
323 | would have an uncompressed size of at least 40GB, so the chances | ||
324 | are low you'll need to up this. | ||
325 | */ | ||
326 | #define BZ_MAX_HANDLED_BLOCKS 50000 | ||
263 | 327 | ||
264 | UInt32 bStart[20000]; | 328 | MaybeUInt64 bStart [BZ_MAX_HANDLED_BLOCKS]; |
265 | UInt32 bEnd[20000]; | 329 | MaybeUInt64 bEnd [BZ_MAX_HANDLED_BLOCKS]; |
266 | UInt32 rbStart[20000]; | 330 | MaybeUInt64 rbStart[BZ_MAX_HANDLED_BLOCKS]; |
267 | UInt32 rbEnd[20000]; | 331 | MaybeUInt64 rbEnd [BZ_MAX_HANDLED_BLOCKS]; |
268 | 332 | ||
269 | Int32 main ( Int32 argc, Char** argv ) | 333 | Int32 main ( Int32 argc, Char** argv ) |
270 | { | 334 | { |
271 | FILE* inFile; | 335 | FILE* inFile; |
272 | FILE* outFile; | 336 | FILE* outFile; |
273 | BitStream* bsIn, *bsWr; | 337 | BitStream* bsIn, *bsWr; |
274 | Int32 currBlock, b, wrBlock; | 338 | Int32 b, wrBlock, currBlock, rbCtr; |
275 | UInt32 bitsRead; | 339 | MaybeUInt64 bitsRead; |
276 | Int32 rbCtr; | ||
277 | |||
278 | 340 | ||
279 | UInt32 buffHi, buffLo, blockCRC; | 341 | UInt32 buffHi, buffLo, blockCRC; |
280 | Char* p; | 342 | Char* p; |
@@ -282,11 +344,37 @@ Int32 main ( Int32 argc, Char** argv ) | |||
282 | strcpy ( progName, argv[0] ); | 344 | strcpy ( progName, argv[0] ); |
283 | inFileName[0] = outFileName[0] = 0; | 345 | inFileName[0] = outFileName[0] = 0; |
284 | 346 | ||
285 | fprintf ( stderr, "bzip2recover 1.0: extracts blocks from damaged .bz2 files.\n" ); | 347 | fprintf ( stderr, |
348 | "bzip2recover 1.0.2: extracts blocks from damaged .bz2 files.\n" ); | ||
286 | 349 | ||
287 | if (argc != 2) { | 350 | if (argc != 2) { |
288 | fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n", | 351 | fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n", |
289 | progName, progName ); | 352 | progName, progName ); |
353 | switch (sizeof(MaybeUInt64)) { | ||
354 | case 8: | ||
355 | fprintf(stderr, | ||
356 | "\trestrictions on size of recovered file: None\n"); | ||
357 | break; | ||
358 | case 4: | ||
359 | fprintf(stderr, | ||
360 | "\trestrictions on size of recovered file: 512 MB\n"); | ||
361 | fprintf(stderr, | ||
362 | "\tto circumvent, recompile with MaybeUInt64 as an\n" | ||
363 | "\tunsigned 64-bit int.\n"); | ||
364 | break; | ||
365 | default: | ||
366 | fprintf(stderr, | ||
367 | "\tsizeof(MaybeUInt64) is not 4 or 8 -- " | ||
368 | "configuration error.\n"); | ||
369 | break; | ||
370 | } | ||
371 | exit(1); | ||
372 | } | ||
373 | |||
374 | if (strlen(argv[1]) >= BZ_MAX_FILENAME-20) { | ||
375 | fprintf ( stderr, | ||
376 | "%s: supplied filename is suspiciously (>= %d chars) long. Bye!\n", | ||
377 | progName, strlen(argv[1]) ); | ||
290 | exit(1); | 378 | exit(1); |
291 | } | 379 | } |
292 | 380 | ||
@@ -316,7 +404,8 @@ Int32 main ( Int32 argc, Char** argv ) | |||
316 | (bitsRead - bStart[currBlock]) >= 40) { | 404 | (bitsRead - bStart[currBlock]) >= 40) { |
317 | bEnd[currBlock] = bitsRead-1; | 405 | bEnd[currBlock] = bitsRead-1; |
318 | if (currBlock > 0) | 406 | if (currBlock > 0) |
319 | fprintf ( stderr, " block %d runs from %d to %d (incomplete)\n", | 407 | fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT |
408 | " to " MaybeUInt64_FMT " (incomplete)\n", | ||
320 | currBlock, bStart[currBlock], bEnd[currBlock] ); | 409 | currBlock, bStart[currBlock], bEnd[currBlock] ); |
321 | } else | 410 | } else |
322 | currBlock--; | 411 | currBlock--; |
@@ -330,17 +419,22 @@ Int32 main ( Int32 argc, Char** argv ) | |||
330 | ( (buffHi & 0x0000ffff) == BLOCK_ENDMARK_HI | 419 | ( (buffHi & 0x0000ffff) == BLOCK_ENDMARK_HI |
331 | && buffLo == BLOCK_ENDMARK_LO) | 420 | && buffLo == BLOCK_ENDMARK_LO) |
332 | ) { | 421 | ) { |
333 | if (bitsRead > 49) | 422 | if (bitsRead > 49) { |
334 | bEnd[currBlock] = bitsRead-49; else | 423 | bEnd[currBlock] = bitsRead-49; |
424 | } else { | ||
335 | bEnd[currBlock] = 0; | 425 | bEnd[currBlock] = 0; |
426 | } | ||
336 | if (currBlock > 0 && | 427 | if (currBlock > 0 && |
337 | (bEnd[currBlock] - bStart[currBlock]) >= 130) { | 428 | (bEnd[currBlock] - bStart[currBlock]) >= 130) { |
338 | fprintf ( stderr, " block %d runs from %d to %d\n", | 429 | fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT |
430 | " to " MaybeUInt64_FMT "\n", | ||
339 | rbCtr+1, bStart[currBlock], bEnd[currBlock] ); | 431 | rbCtr+1, bStart[currBlock], bEnd[currBlock] ); |
340 | rbStart[rbCtr] = bStart[currBlock]; | 432 | rbStart[rbCtr] = bStart[currBlock]; |
341 | rbEnd[rbCtr] = bEnd[currBlock]; | 433 | rbEnd[rbCtr] = bEnd[currBlock]; |
342 | rbCtr++; | 434 | rbCtr++; |
343 | } | 435 | } |
436 | if (currBlock >= BZ_MAX_HANDLED_BLOCKS) | ||
437 | tooManyBlocks(BZ_MAX_HANDLED_BLOCKS); | ||
344 | currBlock++; | 438 | currBlock++; |
345 | 439 | ||
346 | bStart[currBlock] = bitsRead; | 440 | bStart[currBlock] = bitsRead; |
@@ -400,10 +494,25 @@ Int32 main ( Int32 argc, Char** argv ) | |||
400 | wrBlock++; | 494 | wrBlock++; |
401 | } else | 495 | } else |
402 | if (bitsRead == rbStart[wrBlock]) { | 496 | if (bitsRead == rbStart[wrBlock]) { |
403 | outFileName[0] = 0; | 497 | /* Create the output file name, correctly handling leading paths. |
404 | sprintf ( outFileName, "rec%4d", wrBlock+1 ); | 498 | (31.10.2001 by Sergey E. Kusikov) */ |
405 | for (p = outFileName; *p != 0; p++) if (*p == ' ') *p = '0'; | 499 | Char* split; |
406 | strcat ( outFileName, inFileName ); | 500 | Int32 ofs, k; |
501 | for (k = 0; k < BZ_MAX_FILENAME; k++) | ||
502 | outFileName[k] = 0; | ||
503 | strcpy (outFileName, inFileName); | ||
504 | split = strrchr (outFileName, BZ_SPLIT_SYM); | ||
505 | if (split == NULL) { | ||
506 | split = outFileName; | ||
507 | } else { | ||
508 | ++split; | ||
509 | } | ||
510 | /* Now split points to the start of the basename. */ | ||
511 | ofs = split - outFileName; | ||
512 | sprintf (split, "rec%5d", wrBlock+1); | ||
513 | for (p = split; *p != 0; p++) if (*p == ' ') *p = '0'; | ||
514 | strcat (outFileName, inFileName + ofs); | ||
515 | |||
407 | if ( !endsInBz2(outFileName)) strcat ( outFileName, ".bz2" ); | 516 | if ( !endsInBz2(outFileName)) strcat ( outFileName, ".bz2" ); |
408 | 517 | ||
409 | fprintf ( stderr, " writing block %d to `%s' ...\n", | 518 | fprintf ( stderr, " writing block %d to `%s' ...\n", |
@@ -416,8 +525,10 @@ Int32 main ( Int32 argc, Char** argv ) | |||
416 | exit(1); | 525 | exit(1); |
417 | } | 526 | } |
418 | bsWr = bsOpenWriteStream ( outFile ); | 527 | bsWr = bsOpenWriteStream ( outFile ); |
419 | bsPutUChar ( bsWr, 'B' ); bsPutUChar ( bsWr, 'Z' ); | 528 | bsPutUChar ( bsWr, BZ_HDR_B ); |
420 | bsPutUChar ( bsWr, 'h' ); bsPutUChar ( bsWr, '9' ); | 529 | bsPutUChar ( bsWr, BZ_HDR_Z ); |
530 | bsPutUChar ( bsWr, BZ_HDR_h ); | ||
531 | bsPutUChar ( bsWr, BZ_HDR_0 + 9 ); | ||
421 | bsPutUChar ( bsWr, 0x31 ); bsPutUChar ( bsWr, 0x41 ); | 532 | bsPutUChar ( bsWr, 0x31 ); bsPutUChar ( bsWr, 0x41 ); |
422 | bsPutUChar ( bsWr, 0x59 ); bsPutUChar ( bsWr, 0x26 ); | 533 | bsPutUChar ( bsWr, 0x59 ); bsPutUChar ( bsWr, 0x26 ); |
423 | bsPutUChar ( bsWr, 0x53 ); bsPutUChar ( bsWr, 0x59 ); | 534 | bsPutUChar ( bsWr, 0x53 ); bsPutUChar ( bsWr, 0x59 ); |