PageRenderTime 50ms CodeModel.GetById 29ms app.highlight 17ms RepoModel.GetById 0ms app.codeStats 0ms

/par2cmdline-0.4-tbb-20100203/par2creatorsourcefile.cpp

#
C++ | 411 lines | 267 code | 62 blank | 82 comment | 39 complexity | 9d422ef0ef7dc49caa07c2571c431f07 MD5 | raw file
Possible License(s): GPL-2.0
  1//  This file is part of par2cmdline (a PAR 2.0 compatible file verification and
  2//  repair tool). See http://parchive.sourceforge.net for details of PAR 2.0.
  3//
  4//  Copyright (c) 2003 Peter Brian Clements
  5//
  6//  par2cmdline is free software; you can redistribute it and/or modify
  7//  it under the terms of the GNU General Public License as published by
  8//  the Free Software Foundation; either version 2 of the License, or
  9//  (at your option) any later version.
 10//
 11//  par2cmdline is distributed in the hope that it will be useful,
 12//  but WITHOUT ANY WARRANTY; without even the implied warranty of
 13//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 14//  GNU General Public License for more details.
 15//
 16//  You should have received a copy of the GNU General Public License
 17//  along with this program; if not, write to the Free Software
 18//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 19//
 20//  Modifications for concurrent processing, Unicode support, and hierarchial
 21//  directory support are Copyright (c) 2007-2008 Vincent Tan.
 22//  Search for "#if WANT_CONCURRENT" for concurrent code.
 23//  Concurrent processing utilises Intel Thread Building Blocks 2.0,
 24//  Copyright (c) 2007 Intel Corp.
 25
 26#include "par2cmdline.h"
 27
 28/* **TMP**
 29	extern void
 30	dump_utf8_as_utf16(const string& name);
 31// **TMP** */
 32
 33#ifdef _MSC_VER
 34#ifdef _DEBUG
 35#undef THIS_FILE
 36static char THIS_FILE[]=__FILE__;
 37#define new DEBUG_NEW
 38#endif
 39#endif
 40
 41Par2CreatorSourceFile::Par2CreatorSourceFile(void)
 42{
 43  descriptionpacket = 0;
 44  verificationpacket = 0;
 45  diskfile = 0;
 46  blockcount = 0;
 47  //diskfilename;
 48  //parfilename;
 49  contextfull = 0;
 50}
 51
 52Par2CreatorSourceFile::~Par2CreatorSourceFile(void)
 53{
 54  delete descriptionpacket;
 55  delete verificationpacket;
 56  delete diskfile;
 57  delete contextfull;
 58}
 59
 60// Open the source file, compute the MD5 Hash of the whole file and the first
 61// 16k of the file, and then compute the FileId and store the results
 62// in a file description packet and a file verification packet.
 63
 64bool Par2CreatorSourceFile::Open(CommandLine::NoiseLevel noiselevel, const CommandLine::ExtraFile &extrafile, u64 blocksize, bool deferhashcomputation
 65#if WANT_CONCURRENT_PAR2_FILE_OPENING
 66  , tbb::mutex& cout_mutex, tbb::tick_count& last_cout
 67#endif
 68  )
 69{
 70  // Get the filename and filesize
 71  diskfilename = extrafile.FileName();
 72  filesize = extrafile.FileSize();
 73
 74  // Work out how many blocks the file will be sliced into
 75  blockcount = (u32)((filesize + blocksize-1) / blocksize);
 76
 77  // Determine what filename to record in the PAR2 files
 78  CommandLine* cl = CommandLine::get();
 79  if (!cl) {
 80#if WANT_CONCURRENT_PAR2_FILE_OPENING
 81    tbb::mutex::scoped_lock l(cout_mutex);
 82#endif
 83    cerr << "error: missing cmd line - this should not happen!" << endl;
 84    return false; // something is wrong
 85  }
 86
 87  const string& bd = cl->GetBaseDirectory();
 88  if (bd.empty()) {
 89    string::size_type where;
 90    if (string::npos != (where = diskfilename.find_last_of('\\')) ||
 91        string::npos != (where = diskfilename.find_last_of('/')))
 92    {
 93      parfilename = diskfilename.substr(where+1);
 94    }
 95    else
 96    {
 97      parfilename = diskfilename;
 98    }
 99  } else {
100    string s(DiskFile::GetCanonicalPathname(diskfilename));
101#if defined(WIN32) || defined(__APPLE_CC__)
102    if (0 != stricmp(s.substr(0, bd.length()).c_str(), bd.c_str()))
103#else
104    if (s.substr(0, bd.length()) != bd)
105#endif
106    {
107#if WANT_CONCURRENT_PAR2_FILE_OPENING
108      tbb::mutex::scoped_lock l(cout_mutex);
109#endif
110      cerr << "error: file '" << s << "' is not in the base directory '" << bd << "'" << endl;
111      return false;
112    }
113    s.erase(0, bd.length()); // remove base_dir -> sub-path
114    if (s.empty()) {
115#if WANT_CONCURRENT_PAR2_FILE_OPENING
116      tbb::mutex::scoped_lock l(cout_mutex);
117#endif
118      cerr << "error: file name missing after removing base directory (" << bd << ") from path (" <<
119              DiskFile::GetCanonicalPathname(diskfilename) << ")" << endl;
120      return false; // a file name is needed
121    }
122    parfilename = s;
123  }
124//printf("about to store this in packet:\n"); dump_utf8_as_utf16(parfilename);
125  // Create the Description and Verification packets
126  descriptionpacket = new DescriptionPacket;
127  descriptionpacket->Create(parfilename, filesize);
128
129  verificationpacket = new VerificationPacket;
130  verificationpacket->Create(blockcount);
131
132  // Create the diskfile object
133  diskfile  = new DiskFile;
134  diskfile->SetBlockCount(blockcount);
135
136  // Open the source file
137  if (!diskfile->Open(diskfilename, filesize))
138    return false;
139
140  // Do we want to defer the computation of the full file hash, and 
141  // the block crc and hashes. This is only permitted if there
142  // is sufficient memory available to create all recovery blocks
143  // in one pass of the source files (i.e. chunksize == blocksize)
144  if (deferhashcomputation)
145  {
146    // Initialise a buffer to read the first 16k of the source file
147    size_t buffersize = 16 * 1024;
148    if (buffersize > filesize)
149      buffersize = (size_t)filesize;
150    char *buffer = new char[buffersize];
151
152    // Read the data from the file
153    if (!cl->GetCreateDummyParFiles() && !diskfile->Read(0, buffer, buffersize))
154    {
155      diskfile->Close();
156      delete [] buffer;
157      return false;
158    }
159
160    // Compute the hash of the data read from the file
161    MD5Context context;
162    context.Update(buffer, buffersize);
163    delete [] buffer;
164    MD5Hash hash;
165    context.Final(hash);
166
167    // Store the hash in the descriptionpacket and compute the file id
168    descriptionpacket->Hash16k(hash);
169
170    // Compute the fileid and store it in the verification packet.
171    descriptionpacket->ComputeFileId();
172    verificationpacket->FileId(descriptionpacket->FileId());
173
174    // Allocate an MD5 context for computing the file hash
175    // during the recovery data generation phase
176    contextfull = new MD5Context;
177  }
178  else
179  {
180    // Initialise a buffer to read the source file
181    size_t buffersize = 1024*1024;
182    if (buffersize > min(blocksize,filesize))
183      buffersize = (size_t)min(blocksize,filesize);
184    char *buffer = new char[buffersize];
185
186    // Get ready to start reading source file to compute the hashes and crcs
187    u64 offset = 0;
188    u32 blocknumber = 0;
189    u64 need = blocksize;
190
191    MD5Context filecontext;
192    MD5Context blockcontext;
193    u32        blockcrc = 0;
194
195    // Whilst we have not reached the end of the file
196    for (size_t want; offset < filesize; offset += want)
197    {
198      // Work out how much we can read
199      want = (size_t)min(filesize-offset, (u64)buffersize);
200
201      // Read some data from the file into the buffer
202      if (!cl->GetCreateDummyParFiles() && !diskfile->Read(offset, buffer, want))
203      {
204        diskfile->Close();
205        delete [] buffer;
206        return false;
207      }
208
209      // If the new data passes the 16k boundary, compute the 16k hash for the file
210      if (offset < 16384 && offset + want >= 16384)
211      {
212        filecontext.Update(buffer, (size_t)(16384-offset));
213
214        MD5Context temp = filecontext;
215        MD5Hash hash;
216        temp.Final(hash);
217
218        // Store the 16k hash in the file description packet
219        descriptionpacket->Hash16k(hash);
220
221        if (offset + want > 16384)
222        {
223          filecontext.Update(&buffer[16384-offset], (size_t)(offset+want)-16384);
224        }
225      }
226      else
227      {
228        filecontext.Update(buffer, want);
229      }
230
231      // Get ready to update block hashes and crcs
232      u32 used = 0;
233
234      // Whilst we have not used all of the data we just read
235      while (used < want)
236      {
237        // How much of it can we use for the current block
238        u32 use = (u32)min(need, (u64)(want-used));
239
240        blockcrc = ~0 ^ CRCUpdateBlock(~0 ^ blockcrc, use, &buffer[used]);
241        blockcontext.Update(&buffer[used], use);
242
243        used += use;
244        need -= use;
245
246        // Have we finished the current block
247        if (need == 0)
248        {
249          MD5Hash blockhash;
250          blockcontext.Final(blockhash);
251
252          // Store the block hash and block crc in the file verification packet.
253          verificationpacket->SetBlockHashAndCRC(blocknumber, blockhash, blockcrc);
254
255          blocknumber++;
256
257          // More blocks
258          if (blocknumber < blockcount)
259          {
260            need = blocksize;
261
262            blockcontext.Reset();
263            blockcrc = 0;
264          }
265        }
266      }
267
268      if (noiselevel > CommandLine::nlQuiet)
269      {
270#if WANT_CONCURRENT_PAR2_FILE_OPENING
271        tbb::tick_count now = tbb::tick_count::now();
272        if ((now - last_cout).seconds() >= 0.1) { // only update every 0.1 seconds
273#endif
274          // Display progress
275          u32 oldfraction = (u32)(1000 * offset / filesize);
276          u32 newfraction = (u32)(1000 * (offset + want) / filesize);
277          if (oldfraction != newfraction) {
278#if WANT_CONCURRENT_PAR2_FILE_OPENING
279            last_cout = now;
280            tbb::mutex::scoped_lock l(cout_mutex);
281#endif
282            cout << newfraction/10 << '.' << newfraction%10 << "%\r" << flush;
283          }
284#if WANT_CONCURRENT_PAR2_FILE_OPENING
285        }
286#endif
287      }
288    }
289
290    // Did we finish the last block
291    if (need > 0)
292    {
293      blockcrc = ~0 ^ CRCUpdateBlock(~0 ^ blockcrc, (size_t)need);
294      blockcontext.Update((size_t)need);
295
296      MD5Hash blockhash;
297      blockcontext.Final(blockhash);
298
299      // Store the block hash and block crc in the file verification packet.
300      verificationpacket->SetBlockHashAndCRC(blocknumber, blockhash, blockcrc);
301
302      blocknumber++;
303
304      need = 0;
305    }
306
307    // Finish computing the file hash.
308    MD5Hash filehash;
309    filecontext.Final(filehash);
310
311    // Store the file hash in the file description packet.
312    descriptionpacket->HashFull(filehash);
313
314    // Did we compute the 16k hash.
315    if (offset < 16384)
316    {
317      // Store the 16k hash in the file description packet.
318      descriptionpacket->Hash16k(filehash);
319    }
320
321    delete [] buffer;
322
323    // Compute the fileid and store it in the verification packet.
324    descriptionpacket->ComputeFileId();
325    verificationpacket->FileId(descriptionpacket->FileId());
326  }
327
328  return true;
329}
330
331void Par2CreatorSourceFile::Close(void)
332{
333  diskfile->Close();
334}
335
336
337void Par2CreatorSourceFile::RecordCriticalPackets(list<CriticalPacket*> &criticalpackets)
338{
339  // Add the file description packet and file verification packet to
340  // the critical packet list.
341  criticalpackets.push_back(descriptionpacket);
342  criticalpackets.push_back(verificationpacket);
343}
344
345bool Par2CreatorSourceFile::CompareLess(const Par2CreatorSourceFile* const &left, const Par2CreatorSourceFile* const &right)
346{
347  // Sort source files based on fileid
348  return left->descriptionpacket->FileId() < right->descriptionpacket->FileId();
349}
350
351const MD5Hash& Par2CreatorSourceFile::FileId(void) const
352{
353  // Get the file id hash
354  return descriptionpacket->FileId();
355}
356
357void Par2CreatorSourceFile::InitialiseSourceBlocks(vector<DataBlock>::iterator &sourceblock, u64 blocksize)
358{
359  for (u32 blocknum=0; blocknum<blockcount; blocknum++)
360  {
361    // Configure each source block to an appropriate offset and length within the source file.
362    sourceblock->SetLocation(diskfile,                                       // file
363                             blocknum * blocksize);                          // offset
364    sourceblock->SetLength(min(blocksize, filesize - (u64)blocknum * blocksize)); // length
365    sourceblock++;
366  }
367}
368
369void Par2CreatorSourceFile::UpdateHashes(u32 blocknumber, const void *buffer, size_t length)
370{
371  // Compute the crc and hash of the data
372  u32 blockcrc = ~0 ^ CRCUpdateBlock(~0, length, buffer);
373  MD5Context blockcontext;
374  blockcontext.Update(buffer, length);
375  MD5Hash blockhash;
376  blockcontext.Final(blockhash);
377
378  // Store the results in the verification packet
379  verificationpacket->SetBlockHashAndCRC(blocknumber, blockhash, blockcrc);
380
381
382  // Update the full file hash, but don't go beyond the end of the file
383#if 1 // 20070926 - bugfix for bad MD5 hashes when input file is >= 4GB in size
384  const u64 len = filesize - (u64) blocknumber * (u64) length;
385  if ((u64) length > len)
386    length = (size_t) len; // cast is safe because len must be <= 0xFFFFFFFF
387#else
388  if (length > filesize - blocknumber * length)
389  {
390    length = (size_t)(filesize - blocknumber * (u64)length);
391  }
392#endif
393  assert(contextfull != 0);
394
395  contextfull->Update(buffer, length);
396}
397
398void Par2CreatorSourceFile::FinishHashes(void)
399{
400  assert(contextfull != 0);
401
402  // Finish computation of the full file hash
403  MD5Hash hash;
404  contextfull->Final(hash);
405
406  // Store it in the description packet
407  descriptionpacket->HashFull(hash);
408#if 0 // 20070926 - used to debug above bug:
409  cout << "final MD5 hash for file '" << diskfilename << "' is " << hash << endl;
410#endif
411}