/par2cmdline-0.4-tbb-20100203/par2creatorsourcefile.cpp
C++ | 411 lines | 267 code | 62 blank | 82 comment | 39 complexity | 9d422ef0ef7dc49caa07c2571c431f07 MD5 | raw file
Possible License(s): GPL-2.0
1// This file is part of par2cmdline (a PAR 2.0 compatible file verification and
2// repair tool). See http://parchive.sourceforge.net for details of PAR 2.0.
3//
4// Copyright (c) 2003 Peter Brian Clements
5//
6// par2cmdline is free software; you can redistribute it and/or modify
7// it under the terms of the GNU General Public License as published by
8// the Free Software Foundation; either version 2 of the License, or
9// (at your option) any later version.
10//
11// par2cmdline is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15//
16// You should have received a copy of the GNU General Public License
17// along with this program; if not, write to the Free Software
18// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19//
20// Modifications for concurrent processing, Unicode support, and hierarchial
21// directory support are Copyright (c) 2007-2008 Vincent Tan.
22// Search for "#if WANT_CONCURRENT" for concurrent code.
23// Concurrent processing utilises Intel Thread Building Blocks 2.0,
24// Copyright (c) 2007 Intel Corp.
25
26#include "par2cmdline.h"
27
28/* **TMP**
29 extern void
30 dump_utf8_as_utf16(const string& name);
31// **TMP** */
32
33#ifdef _MSC_VER
34#ifdef _DEBUG
35#undef THIS_FILE
36static char THIS_FILE[]=__FILE__;
37#define new DEBUG_NEW
38#endif
39#endif
40
41Par2CreatorSourceFile::Par2CreatorSourceFile(void)
42{
43 descriptionpacket = 0;
44 verificationpacket = 0;
45 diskfile = 0;
46 blockcount = 0;
47 //diskfilename;
48 //parfilename;
49 contextfull = 0;
50}
51
52Par2CreatorSourceFile::~Par2CreatorSourceFile(void)
53{
54 delete descriptionpacket;
55 delete verificationpacket;
56 delete diskfile;
57 delete contextfull;
58}
59
60// Open the source file, compute the MD5 Hash of the whole file and the first
61// 16k of the file, and then compute the FileId and store the results
62// in a file description packet and a file verification packet.
63
64bool Par2CreatorSourceFile::Open(CommandLine::NoiseLevel noiselevel, const CommandLine::ExtraFile &extrafile, u64 blocksize, bool deferhashcomputation
65#if WANT_CONCURRENT_PAR2_FILE_OPENING
66 , tbb::mutex& cout_mutex, tbb::tick_count& last_cout
67#endif
68 )
69{
70 // Get the filename and filesize
71 diskfilename = extrafile.FileName();
72 filesize = extrafile.FileSize();
73
74 // Work out how many blocks the file will be sliced into
75 blockcount = (u32)((filesize + blocksize-1) / blocksize);
76
77 // Determine what filename to record in the PAR2 files
78 CommandLine* cl = CommandLine::get();
79 if (!cl) {
80#if WANT_CONCURRENT_PAR2_FILE_OPENING
81 tbb::mutex::scoped_lock l(cout_mutex);
82#endif
83 cerr << "error: missing cmd line - this should not happen!" << endl;
84 return false; // something is wrong
85 }
86
87 const string& bd = cl->GetBaseDirectory();
88 if (bd.empty()) {
89 string::size_type where;
90 if (string::npos != (where = diskfilename.find_last_of('\\')) ||
91 string::npos != (where = diskfilename.find_last_of('/')))
92 {
93 parfilename = diskfilename.substr(where+1);
94 }
95 else
96 {
97 parfilename = diskfilename;
98 }
99 } else {
100 string s(DiskFile::GetCanonicalPathname(diskfilename));
101#if defined(WIN32) || defined(__APPLE_CC__)
102 if (0 != stricmp(s.substr(0, bd.length()).c_str(), bd.c_str()))
103#else
104 if (s.substr(0, bd.length()) != bd)
105#endif
106 {
107#if WANT_CONCURRENT_PAR2_FILE_OPENING
108 tbb::mutex::scoped_lock l(cout_mutex);
109#endif
110 cerr << "error: file '" << s << "' is not in the base directory '" << bd << "'" << endl;
111 return false;
112 }
113 s.erase(0, bd.length()); // remove base_dir -> sub-path
114 if (s.empty()) {
115#if WANT_CONCURRENT_PAR2_FILE_OPENING
116 tbb::mutex::scoped_lock l(cout_mutex);
117#endif
118 cerr << "error: file name missing after removing base directory (" << bd << ") from path (" <<
119 DiskFile::GetCanonicalPathname(diskfilename) << ")" << endl;
120 return false; // a file name is needed
121 }
122 parfilename = s;
123 }
124//printf("about to store this in packet:\n"); dump_utf8_as_utf16(parfilename);
125 // Create the Description and Verification packets
126 descriptionpacket = new DescriptionPacket;
127 descriptionpacket->Create(parfilename, filesize);
128
129 verificationpacket = new VerificationPacket;
130 verificationpacket->Create(blockcount);
131
132 // Create the diskfile object
133 diskfile = new DiskFile;
134 diskfile->SetBlockCount(blockcount);
135
136 // Open the source file
137 if (!diskfile->Open(diskfilename, filesize))
138 return false;
139
140 // Do we want to defer the computation of the full file hash, and
141 // the block crc and hashes. This is only permitted if there
142 // is sufficient memory available to create all recovery blocks
143 // in one pass of the source files (i.e. chunksize == blocksize)
144 if (deferhashcomputation)
145 {
146 // Initialise a buffer to read the first 16k of the source file
147 size_t buffersize = 16 * 1024;
148 if (buffersize > filesize)
149 buffersize = (size_t)filesize;
150 char *buffer = new char[buffersize];
151
152 // Read the data from the file
153 if (!cl->GetCreateDummyParFiles() && !diskfile->Read(0, buffer, buffersize))
154 {
155 diskfile->Close();
156 delete [] buffer;
157 return false;
158 }
159
160 // Compute the hash of the data read from the file
161 MD5Context context;
162 context.Update(buffer, buffersize);
163 delete [] buffer;
164 MD5Hash hash;
165 context.Final(hash);
166
167 // Store the hash in the descriptionpacket and compute the file id
168 descriptionpacket->Hash16k(hash);
169
170 // Compute the fileid and store it in the verification packet.
171 descriptionpacket->ComputeFileId();
172 verificationpacket->FileId(descriptionpacket->FileId());
173
174 // Allocate an MD5 context for computing the file hash
175 // during the recovery data generation phase
176 contextfull = new MD5Context;
177 }
178 else
179 {
180 // Initialise a buffer to read the source file
181 size_t buffersize = 1024*1024;
182 if (buffersize > min(blocksize,filesize))
183 buffersize = (size_t)min(blocksize,filesize);
184 char *buffer = new char[buffersize];
185
186 // Get ready to start reading source file to compute the hashes and crcs
187 u64 offset = 0;
188 u32 blocknumber = 0;
189 u64 need = blocksize;
190
191 MD5Context filecontext;
192 MD5Context blockcontext;
193 u32 blockcrc = 0;
194
195 // Whilst we have not reached the end of the file
196 for (size_t want; offset < filesize; offset += want)
197 {
198 // Work out how much we can read
199 want = (size_t)min(filesize-offset, (u64)buffersize);
200
201 // Read some data from the file into the buffer
202 if (!cl->GetCreateDummyParFiles() && !diskfile->Read(offset, buffer, want))
203 {
204 diskfile->Close();
205 delete [] buffer;
206 return false;
207 }
208
209 // If the new data passes the 16k boundary, compute the 16k hash for the file
210 if (offset < 16384 && offset + want >= 16384)
211 {
212 filecontext.Update(buffer, (size_t)(16384-offset));
213
214 MD5Context temp = filecontext;
215 MD5Hash hash;
216 temp.Final(hash);
217
218 // Store the 16k hash in the file description packet
219 descriptionpacket->Hash16k(hash);
220
221 if (offset + want > 16384)
222 {
223 filecontext.Update(&buffer[16384-offset], (size_t)(offset+want)-16384);
224 }
225 }
226 else
227 {
228 filecontext.Update(buffer, want);
229 }
230
231 // Get ready to update block hashes and crcs
232 u32 used = 0;
233
234 // Whilst we have not used all of the data we just read
235 while (used < want)
236 {
237 // How much of it can we use for the current block
238 u32 use = (u32)min(need, (u64)(want-used));
239
240 blockcrc = ~0 ^ CRCUpdateBlock(~0 ^ blockcrc, use, &buffer[used]);
241 blockcontext.Update(&buffer[used], use);
242
243 used += use;
244 need -= use;
245
246 // Have we finished the current block
247 if (need == 0)
248 {
249 MD5Hash blockhash;
250 blockcontext.Final(blockhash);
251
252 // Store the block hash and block crc in the file verification packet.
253 verificationpacket->SetBlockHashAndCRC(blocknumber, blockhash, blockcrc);
254
255 blocknumber++;
256
257 // More blocks
258 if (blocknumber < blockcount)
259 {
260 need = blocksize;
261
262 blockcontext.Reset();
263 blockcrc = 0;
264 }
265 }
266 }
267
268 if (noiselevel > CommandLine::nlQuiet)
269 {
270#if WANT_CONCURRENT_PAR2_FILE_OPENING
271 tbb::tick_count now = tbb::tick_count::now();
272 if ((now - last_cout).seconds() >= 0.1) { // only update every 0.1 seconds
273#endif
274 // Display progress
275 u32 oldfraction = (u32)(1000 * offset / filesize);
276 u32 newfraction = (u32)(1000 * (offset + want) / filesize);
277 if (oldfraction != newfraction) {
278#if WANT_CONCURRENT_PAR2_FILE_OPENING
279 last_cout = now;
280 tbb::mutex::scoped_lock l(cout_mutex);
281#endif
282 cout << newfraction/10 << '.' << newfraction%10 << "%\r" << flush;
283 }
284#if WANT_CONCURRENT_PAR2_FILE_OPENING
285 }
286#endif
287 }
288 }
289
290 // Did we finish the last block
291 if (need > 0)
292 {
293 blockcrc = ~0 ^ CRCUpdateBlock(~0 ^ blockcrc, (size_t)need);
294 blockcontext.Update((size_t)need);
295
296 MD5Hash blockhash;
297 blockcontext.Final(blockhash);
298
299 // Store the block hash and block crc in the file verification packet.
300 verificationpacket->SetBlockHashAndCRC(blocknumber, blockhash, blockcrc);
301
302 blocknumber++;
303
304 need = 0;
305 }
306
307 // Finish computing the file hash.
308 MD5Hash filehash;
309 filecontext.Final(filehash);
310
311 // Store the file hash in the file description packet.
312 descriptionpacket->HashFull(filehash);
313
314 // Did we compute the 16k hash.
315 if (offset < 16384)
316 {
317 // Store the 16k hash in the file description packet.
318 descriptionpacket->Hash16k(filehash);
319 }
320
321 delete [] buffer;
322
323 // Compute the fileid and store it in the verification packet.
324 descriptionpacket->ComputeFileId();
325 verificationpacket->FileId(descriptionpacket->FileId());
326 }
327
328 return true;
329}
330
331void Par2CreatorSourceFile::Close(void)
332{
333 diskfile->Close();
334}
335
336
337void Par2CreatorSourceFile::RecordCriticalPackets(list<CriticalPacket*> &criticalpackets)
338{
339 // Add the file description packet and file verification packet to
340 // the critical packet list.
341 criticalpackets.push_back(descriptionpacket);
342 criticalpackets.push_back(verificationpacket);
343}
344
345bool Par2CreatorSourceFile::CompareLess(const Par2CreatorSourceFile* const &left, const Par2CreatorSourceFile* const &right)
346{
347 // Sort source files based on fileid
348 return left->descriptionpacket->FileId() < right->descriptionpacket->FileId();
349}
350
351const MD5Hash& Par2CreatorSourceFile::FileId(void) const
352{
353 // Get the file id hash
354 return descriptionpacket->FileId();
355}
356
357void Par2CreatorSourceFile::InitialiseSourceBlocks(vector<DataBlock>::iterator &sourceblock, u64 blocksize)
358{
359 for (u32 blocknum=0; blocknum<blockcount; blocknum++)
360 {
361 // Configure each source block to an appropriate offset and length within the source file.
362 sourceblock->SetLocation(diskfile, // file
363 blocknum * blocksize); // offset
364 sourceblock->SetLength(min(blocksize, filesize - (u64)blocknum * blocksize)); // length
365 sourceblock++;
366 }
367}
368
369void Par2CreatorSourceFile::UpdateHashes(u32 blocknumber, const void *buffer, size_t length)
370{
371 // Compute the crc and hash of the data
372 u32 blockcrc = ~0 ^ CRCUpdateBlock(~0, length, buffer);
373 MD5Context blockcontext;
374 blockcontext.Update(buffer, length);
375 MD5Hash blockhash;
376 blockcontext.Final(blockhash);
377
378 // Store the results in the verification packet
379 verificationpacket->SetBlockHashAndCRC(blocknumber, blockhash, blockcrc);
380
381
382 // Update the full file hash, but don't go beyond the end of the file
383#if 1 // 20070926 - bugfix for bad MD5 hashes when input file is >= 4GB in size
384 const u64 len = filesize - (u64) blocknumber * (u64) length;
385 if ((u64) length > len)
386 length = (size_t) len; // cast is safe because len must be <= 0xFFFFFFFF
387#else
388 if (length > filesize - blocknumber * length)
389 {
390 length = (size_t)(filesize - blocknumber * (u64)length);
391 }
392#endif
393 assert(contextfull != 0);
394
395 contextfull->Update(buffer, length);
396}
397
398void Par2CreatorSourceFile::FinishHashes(void)
399{
400 assert(contextfull != 0);
401
402 // Finish computation of the full file hash
403 MD5Hash hash;
404 contextfull->Final(hash);
405
406 // Store it in the description packet
407 descriptionpacket->HashFull(hash);
408#if 0 // 20070926 - used to debug above bug:
409 cout << "final MD5 hash for file '" << diskfilename << "' is " << hash << endl;
410#endif
411}