aboutsummaryrefslogtreecommitdiff
path: root/bzip2recover.c
diff options
context:
space:
mode:
Diffstat (limited to 'bzip2recover.c')
-rw-r--r--bzip2recover.c161
1 files changed, 136 insertions, 25 deletions
diff --git a/bzip2recover.c b/bzip2recover.c
index ba3d175..286873b 100644
--- a/bzip2recover.c
+++ b/bzip2recover.c
@@ -9,7 +9,7 @@
9 salvage from damaged files created by the accompanying 9 salvage from damaged files created by the accompanying
10 bzip2-1.0 program. 10 bzip2-1.0 program.
11 11
12 Copyright (C) 1996-2000 Julian R Seward. All rights reserved. 12 Copyright (C) 1996-2002 Julian R Seward. All rights reserved.
13 13
14 Redistribution and use in source and binary forms, with or without 14 Redistribution and use in source and binary forms, with or without
15 modification, are permitted provided that the following conditions 15 modification, are permitted provided that the following conditions
@@ -57,6 +57,29 @@
57#include <stdlib.h> 57#include <stdlib.h>
58#include <string.h> 58#include <string.h>
59 59
60
61/* This program records bit locations in the file to be recovered.
62 That means that if 64-bit ints are not supported, we will not
63 be able to recover .bz2 files over 512MB (2^32 bits) long.
64 On GNU supported platforms, we take advantage of the 64-bit
65 int support to circumvent this problem. Ditto MSVC.
66
67 This change occurred in version 1.0.2; all prior versions have
68 the 512MB limitation.
69*/
70#ifdef __GNUC__
71 typedef unsigned long long int MaybeUInt64;
72# define MaybeUInt64_FMT "%Lu"
73#else
74#ifdef _MSC_VER
75 typedef unsigned __int64 MaybeUInt64;
76# define MaybeUInt64_FMT "%I64u"
77#else
78 typedef unsigned int MaybeUInt64;
79# define MaybeUInt64_FMT "%u"
80#endif
81#endif
82
60typedef unsigned int UInt32; 83typedef unsigned int UInt32;
61typedef int Int32; 84typedef int Int32;
62typedef unsigned char UChar; 85typedef unsigned char UChar;
@@ -66,13 +89,25 @@ typedef unsigned char Bool;
66#define False ((Bool)0) 89#define False ((Bool)0)
67 90
68 91
69Char inFileName[2000]; 92#define BZ_MAX_FILENAME 2000
70Char outFileName[2000]; 93
71Char progName[2000]; 94Char inFileName[BZ_MAX_FILENAME];
95Char outFileName[BZ_MAX_FILENAME];
96Char progName[BZ_MAX_FILENAME];
97
98MaybeUInt64 bytesOut = 0;
99MaybeUInt64 bytesIn = 0;
72 100
73UInt32 bytesOut = 0;
74UInt32 bytesIn = 0;
75 101
102/*---------------------------------------------------*/
103/*--- Header bytes ---*/
104/*---------------------------------------------------*/
105
106#define BZ_HDR_B 0x42 /* 'B' */
107#define BZ_HDR_Z 0x5a /* 'Z' */
108#define BZ_HDR_h 0x68 /* 'h' */
109#define BZ_HDR_0 0x30 /* '0' */
110
76 111
77/*---------------------------------------------------*/ 112/*---------------------------------------------------*/
78/*--- I/O errors ---*/ 113/*--- I/O errors ---*/
@@ -116,6 +151,23 @@ void mallocFail ( Int32 n )
116} 151}
117 152
118 153
154/*---------------------------------------------*/
155void tooManyBlocks ( Int32 max_handled_blocks )
156{
157 fprintf ( stderr,
158 "%s: `%s' appears to contain more than %d blocks\n",
159 progName, inFileName, max_handled_blocks );
160 fprintf ( stderr,
161 "%s: and cannot be handled. To fix, increase\n",
162 progName );
163 fprintf ( stderr,
164 "%s: BZ_MAX_HANDLED_BLOCKS in bzip2recover.c, and recompile.\n",
165 progName );
166 exit ( 1 );
167}
168
169
170
119/*---------------------------------------------------*/ 171/*---------------------------------------------------*/
120/*--- Bit stream I/O ---*/ 172/*--- Bit stream I/O ---*/
121/*---------------------------------------------------*/ 173/*---------------------------------------------------*/
@@ -254,27 +306,37 @@ Bool endsInBz2 ( Char* name )
254/*--- ---*/ 306/*--- ---*/
255/*---------------------------------------------------*/ 307/*---------------------------------------------------*/
256 308
309/* This logic isn't really right when it comes to Cygwin. */
310#ifdef _WIN32
311# define BZ_SPLIT_SYM '\\' /* path splitter on Windows platform */
312#else
313# define BZ_SPLIT_SYM '/' /* path splitter on Unix platform */
314#endif
315
257#define BLOCK_HEADER_HI 0x00003141UL 316#define BLOCK_HEADER_HI 0x00003141UL
258#define BLOCK_HEADER_LO 0x59265359UL 317#define BLOCK_HEADER_LO 0x59265359UL
259 318
260#define BLOCK_ENDMARK_HI 0x00001772UL 319#define BLOCK_ENDMARK_HI 0x00001772UL
261#define BLOCK_ENDMARK_LO 0x45385090UL 320#define BLOCK_ENDMARK_LO 0x45385090UL
262 321
322/* Increase if necessary. However, a .bz2 file with > 50000 blocks
323 would have an uncompressed size of at least 40GB, so the chances
324 are low you'll need to up this.
325*/
326#define BZ_MAX_HANDLED_BLOCKS 50000
263 327
264UInt32 bStart[20000]; 328MaybeUInt64 bStart [BZ_MAX_HANDLED_BLOCKS];
265UInt32 bEnd[20000]; 329MaybeUInt64 bEnd [BZ_MAX_HANDLED_BLOCKS];
266UInt32 rbStart[20000]; 330MaybeUInt64 rbStart[BZ_MAX_HANDLED_BLOCKS];
267UInt32 rbEnd[20000]; 331MaybeUInt64 rbEnd [BZ_MAX_HANDLED_BLOCKS];
268 332
269Int32 main ( Int32 argc, Char** argv ) 333Int32 main ( Int32 argc, Char** argv )
270{ 334{
271 FILE* inFile; 335 FILE* inFile;
272 FILE* outFile; 336 FILE* outFile;
273 BitStream* bsIn, *bsWr; 337 BitStream* bsIn, *bsWr;
274 Int32 currBlock, b, wrBlock; 338 Int32 b, wrBlock, currBlock, rbCtr;
275 UInt32 bitsRead; 339 MaybeUInt64 bitsRead;
276 Int32 rbCtr;
277
278 340
279 UInt32 buffHi, buffLo, blockCRC; 341 UInt32 buffHi, buffLo, blockCRC;
280 Char* p; 342 Char* p;
@@ -282,11 +344,37 @@ Int32 main ( Int32 argc, Char** argv )
282 strcpy ( progName, argv[0] ); 344 strcpy ( progName, argv[0] );
283 inFileName[0] = outFileName[0] = 0; 345 inFileName[0] = outFileName[0] = 0;
284 346
285 fprintf ( stderr, "bzip2recover 1.0: extracts blocks from damaged .bz2 files.\n" ); 347 fprintf ( stderr,
348 "bzip2recover 1.0.2: extracts blocks from damaged .bz2 files.\n" );
286 349
287 if (argc != 2) { 350 if (argc != 2) {
288 fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n", 351 fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n",
289 progName, progName ); 352 progName, progName );
353 switch (sizeof(MaybeUInt64)) {
354 case 8:
355 fprintf(stderr,
356 "\trestrictions on size of recovered file: None\n");
357 break;
358 case 4:
359 fprintf(stderr,
360 "\trestrictions on size of recovered file: 512 MB\n");
361 fprintf(stderr,
362 "\tto circumvent, recompile with MaybeUInt64 as an\n"
363 "\tunsigned 64-bit int.\n");
364 break;
365 default:
366 fprintf(stderr,
367 "\tsizeof(MaybeUInt64) is not 4 or 8 -- "
368 "configuration error.\n");
369 break;
370 }
371 exit(1);
372 }
373
374 if (strlen(argv[1]) >= BZ_MAX_FILENAME-20) {
375 fprintf ( stderr,
376 "%s: supplied filename is suspiciously (>= %d chars) long. Bye!\n",
377 progName, strlen(argv[1]) );
290 exit(1); 378 exit(1);
291 } 379 }
292 380
@@ -316,7 +404,8 @@ Int32 main ( Int32 argc, Char** argv )
316 (bitsRead - bStart[currBlock]) >= 40) { 404 (bitsRead - bStart[currBlock]) >= 40) {
317 bEnd[currBlock] = bitsRead-1; 405 bEnd[currBlock] = bitsRead-1;
318 if (currBlock > 0) 406 if (currBlock > 0)
319 fprintf ( stderr, " block %d runs from %d to %d (incomplete)\n", 407 fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT
408 " to " MaybeUInt64_FMT " (incomplete)\n",
320 currBlock, bStart[currBlock], bEnd[currBlock] ); 409 currBlock, bStart[currBlock], bEnd[currBlock] );
321 } else 410 } else
322 currBlock--; 411 currBlock--;
@@ -330,17 +419,22 @@ Int32 main ( Int32 argc, Char** argv )
330 ( (buffHi & 0x0000ffff) == BLOCK_ENDMARK_HI 419 ( (buffHi & 0x0000ffff) == BLOCK_ENDMARK_HI
331 && buffLo == BLOCK_ENDMARK_LO) 420 && buffLo == BLOCK_ENDMARK_LO)
332 ) { 421 ) {
333 if (bitsRead > 49) 422 if (bitsRead > 49) {
334 bEnd[currBlock] = bitsRead-49; else 423 bEnd[currBlock] = bitsRead-49;
424 } else {
335 bEnd[currBlock] = 0; 425 bEnd[currBlock] = 0;
426 }
336 if (currBlock > 0 && 427 if (currBlock > 0 &&
337 (bEnd[currBlock] - bStart[currBlock]) >= 130) { 428 (bEnd[currBlock] - bStart[currBlock]) >= 130) {
338 fprintf ( stderr, " block %d runs from %d to %d\n", 429 fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT
430 " to " MaybeUInt64_FMT "\n",
339 rbCtr+1, bStart[currBlock], bEnd[currBlock] ); 431 rbCtr+1, bStart[currBlock], bEnd[currBlock] );
340 rbStart[rbCtr] = bStart[currBlock]; 432 rbStart[rbCtr] = bStart[currBlock];
341 rbEnd[rbCtr] = bEnd[currBlock]; 433 rbEnd[rbCtr] = bEnd[currBlock];
342 rbCtr++; 434 rbCtr++;
343 } 435 }
436 if (currBlock >= BZ_MAX_HANDLED_BLOCKS)
437 tooManyBlocks(BZ_MAX_HANDLED_BLOCKS);
344 currBlock++; 438 currBlock++;
345 439
346 bStart[currBlock] = bitsRead; 440 bStart[currBlock] = bitsRead;
@@ -400,10 +494,25 @@ Int32 main ( Int32 argc, Char** argv )
400 wrBlock++; 494 wrBlock++;
401 } else 495 } else
402 if (bitsRead == rbStart[wrBlock]) { 496 if (bitsRead == rbStart[wrBlock]) {
403 outFileName[0] = 0; 497 /* Create the output file name, correctly handling leading paths.
404 sprintf ( outFileName, "rec%4d", wrBlock+1 ); 498 (31.10.2001 by Sergey E. Kusikov) */
405 for (p = outFileName; *p != 0; p++) if (*p == ' ') *p = '0'; 499 Char* split;
406 strcat ( outFileName, inFileName ); 500 Int32 ofs, k;
501 for (k = 0; k < BZ_MAX_FILENAME; k++)
502 outFileName[k] = 0;
503 strcpy (outFileName, inFileName);
504 split = strrchr (outFileName, BZ_SPLIT_SYM);
505 if (split == NULL) {
506 split = outFileName;
507 } else {
508 ++split;
509 }
510 /* Now split points to the start of the basename. */
511 ofs = split - outFileName;
512 sprintf (split, "rec%5d", wrBlock+1);
513 for (p = split; *p != 0; p++) if (*p == ' ') *p = '0';
514 strcat (outFileName, inFileName + ofs);
515
407 if ( !endsInBz2(outFileName)) strcat ( outFileName, ".bz2" ); 516 if ( !endsInBz2(outFileName)) strcat ( outFileName, ".bz2" );
408 517
409 fprintf ( stderr, " writing block %d to `%s' ...\n", 518 fprintf ( stderr, " writing block %d to `%s' ...\n",
@@ -416,8 +525,10 @@ Int32 main ( Int32 argc, Char** argv )
416 exit(1); 525 exit(1);
417 } 526 }
418 bsWr = bsOpenWriteStream ( outFile ); 527 bsWr = bsOpenWriteStream ( outFile );
419 bsPutUChar ( bsWr, 'B' ); bsPutUChar ( bsWr, 'Z' ); 528 bsPutUChar ( bsWr, BZ_HDR_B );
420 bsPutUChar ( bsWr, 'h' ); bsPutUChar ( bsWr, '9' ); 529 bsPutUChar ( bsWr, BZ_HDR_Z );
530 bsPutUChar ( bsWr, BZ_HDR_h );
531 bsPutUChar ( bsWr, BZ_HDR_0 + 9 );
421 bsPutUChar ( bsWr, 0x31 ); bsPutUChar ( bsWr, 0x41 ); 532 bsPutUChar ( bsWr, 0x31 ); bsPutUChar ( bsWr, 0x41 );
422 bsPutUChar ( bsWr, 0x59 ); bsPutUChar ( bsWr, 0x26 ); 533 bsPutUChar ( bsWr, 0x59 ); bsPutUChar ( bsWr, 0x26 );
423 bsPutUChar ( bsWr, 0x53 ); bsPutUChar ( bsWr, 0x59 ); 534 bsPutUChar ( bsWr, 0x53 ); bsPutUChar ( bsWr, 0x59 );