-
Notifications
You must be signed in to change notification settings - Fork 0
/
zpaq.cpp
3737 lines (3433 loc) · 121 KB
/
zpaq.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// zpaq.cpp - Journaling incremental deduplicating archiver
#define ZPAQ_VERSION "7.15"
/*
This software is provided as-is, with no warranty.
I, Matt Mahoney, release this software into
the public domain. This applies worldwide.
In some countries this may not be legally possible; if so:
I grant anyone the right to use this software for any purpose,
without any conditions, unless such conditions are required by law.
zpaq is a journaling (append-only) archiver for incremental backups.
Files are added only when the last-modified date has changed. Both the old
and new versions are saved. You can extract from old versions of the
archive by specifying a date or version number. zpaq supports 5
compression levels, deduplication, AES-256 encryption, and multi-threading
using an open, self-describing format for backward and forward
compatibility in Windows and Linux. See zpaq.pod for usage.
TO COMPILE:
This program needs libzpaq from http://mattmahoney.net/zpaq/
Recommended compile for Windows with MinGW:
g++ -O3 zpaq.cpp libzpaq.cpp -o zpaq
With Visual C++:
cl /O2 /EHsc zpaq.cpp libzpaq.cpp advapi32.lib
For Linux:
g++ -O3 -Dunix zpaq.cpp libzpaq.cpp -pthread -o zpaq
For BSD or OS/X
g++ -O3 -Dunix -DBSD zpaq.cpp libzpaq.cpp -pthread -o zpaq
Possible options:
-DDEBUG Enable run time checks and help screen for undocumented options.
-DNOJIT Don't assume x86 with SSE2 for libzpaq. Slower (disables JIT).
-Dunix Not Windows. Sometimes automatic in Linux. Needed for Mac OS/X.
-DBSD For BSD or OS/X.
-DPTHREAD Use Pthreads instead of Windows threads. Requires pthreadGC2.dll
or pthreadVC2.dll from http://sourceware.org/pthreads-win32/
-Dunixtest To make -Dunix work in Windows with MinGW.
-fopenmp Parallel divsufsort (faster, implies -pthread, broken in MinGW).
-pthread Required in Linux, implied by -fopenmp.
-O3 or /O2 Optimize (faster).
-o Name of output executable.
/EHsc Enable exception handing in VC++ (required).
advapi32.lib Required for libzpaq in VC++.
*/
#define _FILE_OFFSET_BITS 64 // In Linux make sizeof(off_t) == 8
#ifndef UNICODE
#define UNICODE // For Windows
#endif
#include "libzpaq.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <time.h>
#include <stdint.h>
#include <string>
#include <vector>
#include <map>
#include <algorithm>
#include <stdexcept>
#include <fcntl.h>
#ifndef DEBUG
#define NDEBUG 1
#endif
#include <assert.h>
#if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__))
#ifndef unix
#define unix 1
#endif
#endif
#ifdef unix
#define PTHREAD 1
#include <sys/param.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <unistd.h>
#include <dirent.h>
#include <utime.h>
#include <errno.h>
#ifdef BSD
#include <sys/sysctl.h>
#endif
#else // Assume Windows
#include <windows.h>
#include <io.h>
#endif
// For testing -Dunix in Windows
#ifdef unixtest
#define lstat(a,b) stat(a,b)
#define mkdir(a,b) mkdir(a)
#ifndef fseeko
#define fseeko(a,b,c) fseeko64(a,b,c)
#endif
#ifndef ftello
#define ftello(a) ftello64(a)
#endif
#endif
using std::string;
using std::vector;
using std::map;
using std::min;
using std::max;
using libzpaq::StringBuffer;
// Handle errors in libzpaq and elsewhere
void libzpaq::error(const char* msg) {
if (strstr(msg, "ut of memory")) throw std::bad_alloc();
throw std::runtime_error(msg);
}
using libzpaq::error;
// Portable thread types and functions for Windows and Linux. Use like this:
//
// // Create mutex for locking thread-unsafe code
// Mutex mutex; // shared by all threads
// init_mutex(mutex); // initialize in unlocked state
// Semaphore sem(n); // n >= 0 is initial state
//
// // Declare a thread function
// ThreadReturn thread(void *arg) { // arg points to in/out parameters
// lock(mutex); // wait if another thread has it first
// release(mutex); // allow another waiting thread to continue
// sem.wait(); // wait until n>0, then --n
// sem.signal(); // ++n to allow waiting threads to continue
// return 0; // must return 0 to exit thread
// }
//
// // Start a thread
// ThreadID tid;
// run(tid, thread, &arg); // runs in parallel
// join(tid); // wait for thread to return
// destroy_mutex(mutex); // deallocate resources used by mutex
// sem.destroy(); // deallocate resources used by semaphore
#ifdef PTHREAD
#include <pthread.h>
typedef void* ThreadReturn; // job return type
typedef pthread_t ThreadID; // job ID type
void run(ThreadID& tid, ThreadReturn(*f)(void*), void* arg)// start job
{pthread_create(&tid, NULL, f, arg);}
void join(ThreadID tid) {pthread_join(tid, NULL);} // wait for job
typedef pthread_mutex_t Mutex; // mutex type
void init_mutex(Mutex& m) {pthread_mutex_init(&m, 0);} // init mutex
void lock(Mutex& m) {pthread_mutex_lock(&m);} // wait for mutex
void release(Mutex& m) {pthread_mutex_unlock(&m);} // release mutex
void destroy_mutex(Mutex& m) {pthread_mutex_destroy(&m);} // destroy mutex
class Semaphore {
public:
Semaphore() {sem=-1;}
void init(int n) {
assert(n>=0);
assert(sem==-1);
pthread_cond_init(&cv, 0);
pthread_mutex_init(&mutex, 0);
sem=n;
}
void destroy() {
assert(sem>=0);
pthread_mutex_destroy(&mutex);
pthread_cond_destroy(&cv);
}
int wait() {
assert(sem>=0);
pthread_mutex_lock(&mutex);
int r=0;
if (sem==0) r=pthread_cond_wait(&cv, &mutex);
assert(sem>0);
--sem;
pthread_mutex_unlock(&mutex);
return r;
}
void signal() {
assert(sem>=0);
pthread_mutex_lock(&mutex);
++sem;
pthread_cond_signal(&cv);
pthread_mutex_unlock(&mutex);
}
private:
pthread_cond_t cv; // to signal FINISHED
pthread_mutex_t mutex; // protects cv
int sem; // semaphore count
};
#else // Windows
typedef DWORD ThreadReturn;
typedef HANDLE ThreadID;
void run(ThreadID& tid, ThreadReturn(*f)(void*), void* arg) {
tid=CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)f, arg, 0, NULL);
if (tid==NULL) error("CreateThread failed");
}
void join(ThreadID& tid) {WaitForSingleObject(tid, INFINITE);}
typedef HANDLE Mutex;
void init_mutex(Mutex& m) {m=CreateMutex(NULL, FALSE, NULL);}
void lock(Mutex& m) {WaitForSingleObject(m, INFINITE);}
void release(Mutex& m) {ReleaseMutex(m);}
void destroy_mutex(Mutex& m) {CloseHandle(m);}
class Semaphore {
public:
enum {MAXCOUNT=2000000000};
Semaphore(): h(NULL) {}
void init(int n) {assert(!h); h=CreateSemaphore(NULL, n, MAXCOUNT, NULL);}
void destroy() {assert(h); CloseHandle(h);}
int wait() {assert(h); return WaitForSingleObject(h, INFINITE);}
void signal() {assert(h); ReleaseSemaphore(h, 1, NULL);}
private:
HANDLE h; // Windows semaphore
};
#endif
// Global variables
int64_t global_start=0; // set to mtime() at start of main()
// In Windows, convert 16-bit wide string to UTF-8 and \ to /
#ifndef unix
string wtou(const wchar_t* s) {
assert(sizeof(wchar_t)==2); // Not true in Linux
assert((wchar_t)(-1)==65535);
string r;
if (!s) return r;
for (; *s; ++s) {
if (*s=='\\') r+='/';
else if (*s<128) r+=*s;
else if (*s<2048) r+=192+*s/64, r+=128+*s%64;
else r+=224+*s/4096, r+=128+*s/64%64, r+=128+*s%64;
}
return r;
}
// In Windows, convert UTF-8 string to wide string ignoring
// invalid UTF-8 or >64K. Convert "/" to slash (default "\").
std::wstring utow(const char* ss, char slash='\\') {
assert(sizeof(wchar_t)==2);
assert((wchar_t)(-1)==65535);
std::wstring r;
if (!ss) return r;
const unsigned char* s=(const unsigned char*)ss;
for (; s && *s; ++s) {
if (s[0]=='/') r+=slash;
else if (s[0]<128) r+=s[0];
else if (s[0]>=192 && s[0]<224 && s[1]>=128 && s[1]<192)
r+=(s[0]-192)*64+s[1]-128, ++s;
else if (s[0]>=224 && s[0]<240 && s[1]>=128 && s[1]<192
&& s[2]>=128 && s[2]<192)
r+=(s[0]-224)*4096+(s[1]-128)*64+s[2]-128, s+=2;
}
return r;
}
#endif
// Print a UTF-8 string to f (stdout, stderr) so it displays properly
void printUTF8(const char* s, FILE* f=stdout) {
assert(f);
assert(s);
#ifdef unix
fprintf(f, "%s", s);
#else
const HANDLE h=(HANDLE)_get_osfhandle(_fileno(f));
DWORD ft=GetFileType(h);
if (ft==FILE_TYPE_CHAR) {
fflush(f);
std::wstring w=utow(s, '/'); // Windows console: convert to UTF-16
DWORD n=0;
WriteConsole(h, w.c_str(), w.size(), &n, 0);
}
else // stdout redirected to file
fprintf(f, "%s", s);
#endif
}
// Return relative time in milliseconds
int64_t mtime() {
#ifdef unix
timeval tv;
gettimeofday(&tv, 0);
return tv.tv_sec*1000LL+tv.tv_usec/1000;
#else
int64_t t=GetTickCount();
if (t<global_start) t+=0x100000000LL;
return t;
#endif
}
// Convert 64 bit decimal YYYYMMDDHHMMSS to "YYYY-MM-DD HH:MM:SS"
// where -1 = unknown date, 0 = deleted.
string dateToString(int64_t date) {
if (date<=0) return " ";
string s="0000-00-00 00:00:00";
static const int t[]={18,17,15,14,12,11,9,8,6,5,3,2,1,0};
for (int i=0; i<14; ++i) s[t[i]]+=int(date%10), date/=10;
return s;
}
// Convert attributes to a readable format
string attrToString(int64_t attrib) {
string r=" ";
if ((attrib&255)=='u') {
r[0]="0pc3d5b7 9lBsDEF"[(attrib>>20)&15];
for (int i=0; i<4; ++i)
r[4-i]=(attrib>>(8+3*i))%8+'0';
}
else if ((attrib&255)=='w') {
for (int i=0, j=0; i<32; ++i) {
if ((attrib>>(i+8))&1) {
char c="RHS DAdFTprCoIEivs89012345678901"[i];
if (j<5) r[j]=c;
else r+=c;
++j;
}
}
}
return r;
}
// Convert seconds since 0000 1/1/1970 to 64 bit decimal YYYYMMDDHHMMSS
// Valid from 1970 to 2099.
int64_t decimal_time(time_t tt) {
if (tt==-1) tt=0;
int64_t t=(sizeof(tt)==4) ? unsigned(tt) : tt;
const int second=t%60;
const int minute=t/60%60;
const int hour=t/3600%24;
t/=86400; // days since Jan 1 1970
const int term=t/1461; // 4 year terms since 1970
t%=1461;
t+=(t>=59); // insert Feb 29 on non leap years
t+=(t>=425);
t+=(t>=1157);
const int year=term*4+t/366+1970; // actual year
t%=366;
t+=(t>=60)*2; // make Feb. 31 days
t+=(t>=123); // insert Apr 31
t+=(t>=185); // insert June 31
t+=(t>=278); // insert Sept 31
t+=(t>=340); // insert Nov 31
const int month=t/31+1;
const int day=t%31+1;
return year*10000000000LL+month*100000000+day*1000000
+hour*10000+minute*100+second;
}
// Convert decimal date to time_t - inverse of decimal_time()
time_t unix_time(int64_t date) {
if (date<=0) return -1;
static const int days[12]={0,31,59,90,120,151,181,212,243,273,304,334};
const int year=date/10000000000LL%10000;
const int month=(date/100000000%100-1)%12;
const int day=date/1000000%100;
const int hour=date/10000%100;
const int min=date/100%100;
const int sec=date%100;
return (day-1+days[month]+(year%4==0 && month>1)+((year-1970)*1461+1)/4)
*86400+hour*3600+min*60+sec;
}
/////////////////////////////// File //////////////////////////////////
// Windows/Linux compatible file type
#ifdef unix
typedef FILE* FP;
const FP FPNULL=NULL;
const char* const RB="rb";
const char* const WB="wb";
const char* const RBPLUS="rb+";
const char* const WBPLUS="wb+";
#else // Windows
typedef HANDLE FP;
const FP FPNULL=INVALID_HANDLE_VALUE;
typedef enum {RB, WB, RBPLUS, WBPLUS} MODE; // fopen modes
// Open file. Only modes "rb", "wb", "rb+" and "wb+" are supported.
FP fopen(const char* filename, MODE mode) {
assert(filename);
DWORD access=0;
if (mode!=WB) access=GENERIC_READ;
if (mode!=RB) access|=GENERIC_WRITE;
DWORD disp=OPEN_ALWAYS; // wb or wb+
if (mode==RB || mode==RBPLUS) disp=OPEN_EXISTING;
DWORD share=FILE_SHARE_READ;
if (mode==RB) share|=FILE_SHARE_WRITE|FILE_SHARE_DELETE;
return CreateFile(utow(filename).c_str(), access, share,
NULL, disp, FILE_ATTRIBUTE_NORMAL, NULL);
}
// Close file
int fclose(FP fp) {
return CloseHandle(fp) ? 0 : EOF;
}
// Read nobj objects of size size into ptr. Return number of objects read.
size_t fread(void* ptr, size_t size, size_t nobj, FP fp) {
DWORD r=0;
ReadFile(fp, ptr, size*nobj, &r, NULL);
if (size>1) r/=size;
return r;
}
// Write nobj objects of size size from ptr to fp. Return number written.
size_t fwrite(const void* ptr, size_t size, size_t nobj, FP fp) {
DWORD r=0;
WriteFile(fp, ptr, size*nobj, &r, NULL);
if (size>1) r/=size;
return r;
}
// Move file pointer by offset. origin is SEEK_SET (from start), SEEK_CUR,
// (from current position), or SEEK_END (from end).
int fseeko(FP fp, int64_t offset, int origin) {
if (origin==SEEK_SET) origin=FILE_BEGIN;
else if (origin==SEEK_CUR) origin=FILE_CURRENT;
else if (origin==SEEK_END) origin=FILE_END;
LONG h=uint64_t(offset)>>32;
SetFilePointer(fp, offset&0xffffffffull, &h, origin);
return GetLastError()!=NO_ERROR;
}
// Get file position
int64_t ftello(FP fp) {
LONG h=0;
DWORD r=SetFilePointer(fp, 0, &h, FILE_CURRENT);
return r+(uint64_t(h)<<32);
}
#endif
// Return true if a file or directory (UTF-8 without trailing /) exists.
bool exists(string filename) {
int len=filename.size();
if (len<1) return false;
if (filename[len-1]=='/') filename=filename.substr(0, len-1);
#ifdef unix
struct stat sb;
return !lstat(filename.c_str(), &sb);
#else
return GetFileAttributes(utow(filename.c_str()).c_str())
!=INVALID_FILE_ATTRIBUTES;
#endif
}
// Delete a file, return true if successful
bool delete_file(const char* filename) {
#ifdef unix
return remove(filename)==0;
#else
return DeleteFile(utow(filename).c_str());
#endif
}
#ifdef unix
// Print last error message
void printerr(const char* filename) {
perror(filename);
}
#else
// Print last error message
void printerr(const char* filename) {
fflush(stdout);
int err=GetLastError();
printUTF8(filename, stderr);
if (err==ERROR_FILE_NOT_FOUND)
fprintf(stderr, ": file not found\n");
else if (err==ERROR_PATH_NOT_FOUND)
fprintf(stderr, ": path not found\n");
else if (err==ERROR_ACCESS_DENIED)
fprintf(stderr, ": access denied\n");
else if (err==ERROR_SHARING_VIOLATION)
fprintf(stderr, ": sharing violation\n");
else if (err==ERROR_BAD_PATHNAME)
fprintf(stderr, ": bad pathname\n");
else if (err==ERROR_INVALID_NAME)
fprintf(stderr, ": invalid name\n");
else if (err==ERROR_NETNAME_DELETED)
fprintf(stderr, ": network name no longer available\n");
else
fprintf(stderr, ": Windows error %d\n", err);
}
#endif
// Close fp if open. Set date and attributes unless 0
void close(const char* filename, int64_t date, int64_t attr, FP fp=FPNULL) {
assert(filename);
#ifdef unix
if (fp!=FPNULL) fclose(fp);
if (date>0) {
struct utimbuf ub;
ub.actime=time(NULL);
ub.modtime=unix_time(date);
utime(filename, &ub);
}
if ((attr&255)=='u')
chmod(filename, attr>>8);
#else
const bool ads=strstr(filename, ":$DATA")!=0; // alternate data stream?
if (date>0 && !ads) {
if (fp==FPNULL)
fp=CreateFile(utow(filename).c_str(),
FILE_WRITE_ATTRIBUTES,
FILE_SHARE_READ|FILE_SHARE_WRITE|FILE_SHARE_DELETE,
NULL, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, NULL);
if (fp!=FPNULL) {
SYSTEMTIME st;
st.wYear=date/10000000000LL%10000;
st.wMonth=date/100000000%100;
st.wDayOfWeek=0; // ignored
st.wDay=date/1000000%100;
st.wHour=date/10000%100;
st.wMinute=date/100%100;
st.wSecond=date%100;
st.wMilliseconds=0;
FILETIME ft;
SystemTimeToFileTime(&st, &ft);
SetFileTime(fp, NULL, NULL, &ft);
}
}
if (fp!=FPNULL) CloseHandle(fp);
if ((attr&255)=='w' && !ads)
SetFileAttributes(utow(filename).c_str(), attr>>8);
#endif
}
// Print file open error and throw exception
void ioerr(const char* msg) {
printerr(msg);
throw std::runtime_error(msg);
}
// Create directories as needed. For example if path="/tmp/foo/bar"
// then create directories /, /tmp, and /tmp/foo unless they exist.
// Set date and attributes if not 0.
void makepath(string path, int64_t date=0, int64_t attr=0) {
for (unsigned i=0; i<path.size(); ++i) {
if (path[i]=='\\' || path[i]=='/') {
path[i]=0;
#ifdef unix
mkdir(path.c_str(), 0777);
#else
CreateDirectory(utow(path.c_str()).c_str(), 0);
#endif
path[i]='/';
}
}
// Set date and attributes
string filename=path;
if (filename!="" && filename[filename.size()-1]=='/')
filename=filename.substr(0, filename.size()-1); // remove trailing slash
close(filename.c_str(), date, attr);
}
#ifndef unix
// Truncate filename to length. Return -1 if error, else 0.
int truncate(const char* filename, int64_t length) {
std::wstring w=utow(filename);
HANDLE out=CreateFile(w.c_str(), GENERIC_READ | GENERIC_WRITE,
0, NULL, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
if (out!=INVALID_HANDLE_VALUE) {
LONG hi=length>>32;
if (SetFilePointer(out, length, &hi, FILE_BEGIN)
!=INVALID_SET_FILE_POINTER
&& SetEndOfFile(out)
&& CloseHandle(out))
return 0;
}
return -1;
}
#endif
/////////////////////////////// Archive ///////////////////////////////
// Convert non-negative decimal number x to string of at least n digits
string itos(int64_t x, int n=1) {
assert(x>=0);
assert(n>=0);
string r;
for (; x || n>0; x/=10, --n) r=string(1, '0'+x%10)+r;
return r;
}
// Replace * and ? in fn with part or digits of part
string subpart(string fn, int part) {
for (int j=fn.size()-1; j>=0; --j) {
if (fn[j]=='?')
fn[j]='0'+part%10, part/=10;
else if (fn[j]=='*')
fn=fn.substr(0, j)+itos(part)+fn.substr(j+1), part=0;
}
return fn;
}
// Base of InputArchive and OutputArchive
class ArchiveBase {
protected:
libzpaq::AES_CTR* aes; // NULL if not encrypted
FP fp; // currently open file or FPNULL
public:
ArchiveBase(): aes(0), fp(FPNULL) {}
~ArchiveBase() {
if (aes) delete aes;
if (fp!=FPNULL) fclose(fp);
}
bool isopen() {return fp!=FPNULL;}
};
// An InputArchive supports encrypted reading
class InputArchive: public ArchiveBase, public libzpaq::Reader {
vector<int64_t> sz; // part sizes
int64_t off; // current offset
string fn; // filename, possibly multi-part with wildcards
public:
// Open filename. If password then decrypt input.
InputArchive(const char* filename, const char* password=0);
// Read and return 1 byte or -1 (EOF)
int get() {
error("get() not implemented");
return -1;
}
// Read up to len bytes into obuf at current offset. Return 0..len bytes
// actually read. 0 indicates EOF.
int read(char* obuf, int len) {
int nr=fread(obuf, 1, len, fp);
if (nr==0) {
seek(0, SEEK_CUR);
nr=fread(obuf, 1, len, fp);
}
if (nr==0) return 0;
if (aes) aes->encrypt(obuf, nr, off);
off+=nr;
return nr;
}
// Like fseeko()
void seek(int64_t p, int whence);
// Like ftello()
int64_t tell() {
return off;
}
};
// Like fseeko. If p is out of range then close file.
void InputArchive::seek(int64_t p, int whence) {
if (!isopen()) return;
// Compute new offset
if (whence==SEEK_SET) off=p;
else if (whence==SEEK_CUR) off+=p;
else if (whence==SEEK_END) {
off=p;
for (unsigned i=0; i<sz.size(); ++i) off+=sz[i];
}
// Optimization for single file to avoid close and reopen
if (sz.size()==1) {
fseeko(fp, off, SEEK_SET);
return;
}
// Seek across multiple files
assert(sz.size()>1);
int64_t sum=0;
unsigned i;
for (i=0;; ++i) {
sum+=sz[i];
if (sum>off || i+1>=sz.size()) break;
}
const string next=subpart(fn, i+1);
fclose(fp);
fp=fopen(next.c_str(), RB);
if (fp==FPNULL) ioerr(next.c_str());
fseeko(fp, off-sum, SEEK_END);
}
// Open for input. Decrypt with password and using the salt in the
// first 32 bytes. If filename has wildcards then assume multi-part
// and read their concatenation.
InputArchive::InputArchive(const char* filename, const char* password):
off(0), fn(filename) {
assert(filename);
// Get file sizes
const string part0=subpart(filename, 0);
for (unsigned i=1; ; ++i) {
const string parti=subpart(filename, i);
if (i>1 && parti==part0) break;
fp=fopen(parti.c_str(), RB);
if (fp==FPNULL) break;
fseeko(fp, 0, SEEK_END);
sz.push_back(ftello(fp));
fclose(fp);
}
// Open first part
const string part1=subpart(filename, 1);
fp=fopen(part1.c_str(), RB);
if (!isopen()) ioerr(part1.c_str());
assert(fp!=FPNULL);
// Get encryption salt
if (password) {
char salt[32], key[32];
if (fread(salt, 1, 32, fp)!=32) error("cannot read salt");
libzpaq::stretchKey(key, password, salt);
aes=new libzpaq::AES_CTR(key, 32, salt);
off=32;
}
}
// An Archive is a file supporting encryption
class OutputArchive: public ArchiveBase, public libzpaq::Writer {
int64_t off; // preceding multi-part bytes
unsigned ptr; // write pointer in buf: 0 <= ptr <= BUFSIZE
enum {BUFSIZE=1<<16};
char buf[BUFSIZE]; // I/O buffer
public:
// Open. If password then encrypt output.
OutputArchive(const char* filename, const char* password=0,
const char* salt_=0, int64_t off_=0);
// Write pending output
void flush() {
assert(fp!=FPNULL);
if (aes) aes->encrypt(buf, ptr, ftello(fp)+off);
fwrite(buf, 1, ptr, fp);
ptr=0;
}
// Position the next read or write offset to p.
void seek(int64_t p, int whence) {
if (fp!=FPNULL) {
flush();
fseeko(fp, p, whence);
}
else if (whence==SEEK_SET) off=p;
else off+=p; // assume at end
}
// Return current file offset.
int64_t tell() const {
if (fp!=FPNULL) return ftello(fp)+ptr;
else return off;
}
// Write one byte
void put(int c) {
if (fp==FPNULL) ++off;
else {
if (ptr>=BUFSIZE) flush();
buf[ptr++]=c;
}
}
// Write buf[0..n-1]
void write(const char* ibuf, int len) {
if (fp==FPNULL) off+=len;
else while (len-->0) put(*ibuf++);
}
// Flush output and close
void close() {
if (fp!=FPNULL) {
flush();
fclose(fp);
}
fp=FPNULL;
}
};
// Create or update an existing archive or part. If filename is ""
// then keep track of position in off but do not write to disk. Otherwise
// open and encrypt with password if not 0. If the file exists then
// read the salt from the first 32 bytes and off_ must be 0. Otherwise
// encrypt assuming off_ previous bytes, of which the first 32 are salt_.
// If off_ is 0 then write salt_ to the first 32 bytes.
OutputArchive::OutputArchive(const char* filename, const char* password,
const char* salt_, int64_t off_): off(off_), ptr(0) {
assert(filename);
if (!*filename) return;
// Open existing file
char salt[32]={0};
fp=fopen(filename, RBPLUS);
if (isopen()) {
if (off!=0) error("file exists and off > 0");
if (password) {
if (fread(salt, 1, 32, fp)!=32) error("cannot read salt");
if (salt_ && memcmp(salt, salt_, 32)) error("salt mismatch");
}
seek(0, SEEK_END);
}
// Create new file
else {
fp=fopen(filename, WB);
if (!isopen()) ioerr(filename);
if (password) {
if (!salt_) error("salt not specified");
memcpy(salt, salt_, 32);
if (off==0 && fwrite(salt, 1, 32, fp)!=32) ioerr(filename);
}
}
// Set up encryption
if (password) {
char key[32];
libzpaq::stretchKey(key, password, salt);
aes=new libzpaq::AES_CTR(key, 32, salt);
}
}
///////////////////////// System info /////////////////////////////////
// Guess number of cores. In 32 bit mode, max is 2.
int numberOfProcessors() {
int rc=0; // result
#ifdef unix
#ifdef BSD // BSD or Mac OS/X
size_t rclen=sizeof(rc);
int mib[2]={CTL_HW, HW_NCPU};
if (sysctl(mib, 2, &rc, &rclen, 0, 0)!=0)
perror("sysctl");
#else // Linux
// Count lines of the form "processor\t: %d\n" in /proc/cpuinfo
// where %d is 0, 1, 2,..., rc-1
FILE *in=fopen("/proc/cpuinfo", "r");
if (!in) return 1;
std::string s;
int c;
while ((c=getc(in))!=EOF) {
if (c>='A' && c<='Z') c+='a'-'A'; // convert to lowercase
if (c>' ') s+=c; // remove white space
if (c=='\n') { // end of line?
if (s.size()>10 && s.substr(0, 10)=="processor:") {
c=atoi(s.c_str()+10);
if (c==rc) ++rc;
}
s="";
}
}
fclose(in);
#endif
#else
// In Windows return %NUMBER_OF_PROCESSORS%
const char* p=getenv("NUMBER_OF_PROCESSORS");
if (p) rc=atoi(p);
#endif
if (rc<1) rc=1;
if (sizeof(char*)==4 && rc>2) rc=2;
return rc;
}
////////////////////////////// misc ///////////////////////////////////
// For libzpaq output to a string less than 64K chars
struct StringWriter: public libzpaq::Writer {
string s;
void put(int c) {
if (s.size()>=65535) error("string too long");
s+=char(c);
}
};
// In Windows convert upper case to lower case.
inline int tolowerW(int c) {
#ifndef unix
if (c>='A' && c<='Z') return c-'A'+'a';
#endif
return c;
}
// Return true if strings a == b or a+"/" is a prefix of b
// or a ends in "/" and is a prefix of b.
// Match ? in a to any char in b.
// Match * in a to any string in b.
// In Windows, not case sensitive.
bool ispath(const char* a, const char* b) {
for (; *a; ++a, ++b) {
const int ca=tolowerW(*a);
const int cb=tolowerW(*b);
if (ca=='*') {
while (true) {
if (ispath(a+1, b)) return true;
if (!*b) return false;
++b;
}
}
else if (ca=='?') {
if (*b==0) return false;
}
else if (ca==cb && ca=='/' && a[1]==0)
return true;
else if (ca!=cb)
return false;
}
return *b==0 || *b=='/';
}
// Read 4 byte little-endian int and advance s
unsigned btoi(const char* &s) {
s+=4;
return (s[-4]&255)|((s[-3]&255)<<8)|((s[-2]&255)<<16)|((s[-1]&255)<<24);
}
// Read 8 byte little-endian int and advance s
int64_t btol(const char* &s) {
uint64_t r=btoi(s);
return r+(uint64_t(btoi(s))<<32);
}
/////////////////////////////// Jidac /////////////////////////////////
// A Jidac object represents an archive contents: a list of file
// fragments with hash, size, and archive offset, and a list of
// files with date, attributes, and list of fragment pointers.
// Methods add to, extract from, compare, and list the archive.
// enum for version
static const int64_t DEFAULT_VERSION=99999999999999LL; // unless -until
// fragment hash table entry
struct HT {
unsigned char sha1[20]; // fragment hash
int usize; // uncompressed size, -1 if unknown, -2 if not init
HT(const char* s=0, int u=-2) {
if (s) memcpy(sha1, s, 20);
else memset(sha1, 0, 20);
usize=u;
}
};
// filename entry
struct DT {
int64_t date; // decimal YYYYMMDDHHMMSS (UT) or 0 if deleted
int64_t size; // size or -1 if unknown
int64_t attr; // first 8 attribute bytes
int64_t data; // sort key or frags written. -1 = do not write
vector<unsigned> ptr; // fragment list
DT(): date(0), size(0), attr(0), data(0) {}
};
typedef map<string, DT> DTMap;
// list of blocks to extract
struct Block {
int64_t offset; // location in archive
int64_t usize; // uncompressed size, -1 if unknown (streaming)
int64_t bsize; // compressed size
vector<DTMap::iterator> files; // list of files pointing here
unsigned start; // index in ht of first fragment
unsigned size; // number of fragments to decompress
unsigned frags; // number of fragments in block
unsigned extracted; // number of fragments decompressed OK
enum {READY, WORKING, GOOD, BAD} state;
Block(unsigned s, int64_t o): offset(o), usize(-1), bsize(0), start(s),
size(0), frags(0), extracted(0), state(READY) {}
};
// Version info
struct VER {
int64_t date; // Date of C block, 0 if streaming
int64_t lastdate; // Latest date of any block
int64_t offset; // start of transaction C block
int64_t data_offset; // start of first D block
int64_t csize; // size of compressed data, -1 = no index
int updates; // file updates
int deletes; // file deletions
unsigned firstFragment;// first fragment ID
VER() {memset(this, 0, sizeof(*this));}