PageRenderTime 57ms CodeModel.GetById 29ms RepoModel.GetById 0ms app.codeStats 0ms

/par2cmdline-0.4-tbb-20100203/par2creatorsourcefile.cpp

#
C++ | 411 lines | 267 code | 62 blank | 82 comment | 39 complexity | 9d422ef0ef7dc49caa07c2571c431f07 MD5 | raw file
Possible License(s): GPL-2.0
  1. // This file is part of par2cmdline (a PAR 2.0 compatible file verification and
  2. // repair tool). See http://parchive.sourceforge.net for details of PAR 2.0.
  3. //
  4. // Copyright (c) 2003 Peter Brian Clements
  5. //
  6. // par2cmdline is free software; you can redistribute it and/or modify
  7. // it under the terms of the GNU General Public License as published by
  8. // the Free Software Foundation; either version 2 of the License, or
  9. // (at your option) any later version.
  10. //
  11. // par2cmdline is distributed in the hope that it will be useful,
  12. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. // GNU General Public License for more details.
  15. //
  16. // You should have received a copy of the GNU General Public License
  17. // along with this program; if not, write to the Free Software
  18. // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  19. //
  20. // Modifications for concurrent processing, Unicode support, and hierarchial
  21. // directory support are Copyright (c) 2007-2008 Vincent Tan.
  22. // Search for "#if WANT_CONCURRENT" for concurrent code.
  23. // Concurrent processing utilises Intel Thread Building Blocks 2.0,
  24. // Copyright (c) 2007 Intel Corp.
  25. #include "par2cmdline.h"
  26. /* **TMP**
  27. extern void
  28. dump_utf8_as_utf16(const string& name);
  29. // **TMP** */
  30. #ifdef _MSC_VER
  31. #ifdef _DEBUG
  32. #undef THIS_FILE
  33. static char THIS_FILE[]=__FILE__;
  34. #define new DEBUG_NEW
  35. #endif
  36. #endif
  37. Par2CreatorSourceFile::Par2CreatorSourceFile(void)
  38. {
  39. descriptionpacket = 0;
  40. verificationpacket = 0;
  41. diskfile = 0;
  42. blockcount = 0;
  43. //diskfilename;
  44. //parfilename;
  45. contextfull = 0;
  46. }
  47. Par2CreatorSourceFile::~Par2CreatorSourceFile(void)
  48. {
  49. delete descriptionpacket;
  50. delete verificationpacket;
  51. delete diskfile;
  52. delete contextfull;
  53. }
  54. // Open the source file, compute the MD5 Hash of the whole file and the first
  55. // 16k of the file, and then compute the FileId and store the results
  56. // in a file description packet and a file verification packet.
  57. bool Par2CreatorSourceFile::Open(CommandLine::NoiseLevel noiselevel, const CommandLine::ExtraFile &extrafile, u64 blocksize, bool deferhashcomputation
  58. #if WANT_CONCURRENT_PAR2_FILE_OPENING
  59. , tbb::mutex& cout_mutex, tbb::tick_count& last_cout
  60. #endif
  61. )
  62. {
  63. // Get the filename and filesize
  64. diskfilename = extrafile.FileName();
  65. filesize = extrafile.FileSize();
  66. // Work out how many blocks the file will be sliced into
  67. blockcount = (u32)((filesize + blocksize-1) / blocksize);
  68. // Determine what filename to record in the PAR2 files
  69. CommandLine* cl = CommandLine::get();
  70. if (!cl) {
  71. #if WANT_CONCURRENT_PAR2_FILE_OPENING
  72. tbb::mutex::scoped_lock l(cout_mutex);
  73. #endif
  74. cerr << "error: missing cmd line - this should not happen!" << endl;
  75. return false; // something is wrong
  76. }
  77. const string& bd = cl->GetBaseDirectory();
  78. if (bd.empty()) {
  79. string::size_type where;
  80. if (string::npos != (where = diskfilename.find_last_of('\\')) ||
  81. string::npos != (where = diskfilename.find_last_of('/')))
  82. {
  83. parfilename = diskfilename.substr(where+1);
  84. }
  85. else
  86. {
  87. parfilename = diskfilename;
  88. }
  89. } else {
  90. string s(DiskFile::GetCanonicalPathname(diskfilename));
  91. #if defined(WIN32) || defined(__APPLE_CC__)
  92. if (0 != stricmp(s.substr(0, bd.length()).c_str(), bd.c_str()))
  93. #else
  94. if (s.substr(0, bd.length()) != bd)
  95. #endif
  96. {
  97. #if WANT_CONCURRENT_PAR2_FILE_OPENING
  98. tbb::mutex::scoped_lock l(cout_mutex);
  99. #endif
  100. cerr << "error: file '" << s << "' is not in the base directory '" << bd << "'" << endl;
  101. return false;
  102. }
  103. s.erase(0, bd.length()); // remove base_dir -> sub-path
  104. if (s.empty()) {
  105. #if WANT_CONCURRENT_PAR2_FILE_OPENING
  106. tbb::mutex::scoped_lock l(cout_mutex);
  107. #endif
  108. cerr << "error: file name missing after removing base directory (" << bd << ") from path (" <<
  109. DiskFile::GetCanonicalPathname(diskfilename) << ")" << endl;
  110. return false; // a file name is needed
  111. }
  112. parfilename = s;
  113. }
  114. //printf("about to store this in packet:\n"); dump_utf8_as_utf16(parfilename);
  115. // Create the Description and Verification packets
  116. descriptionpacket = new DescriptionPacket;
  117. descriptionpacket->Create(parfilename, filesize);
  118. verificationpacket = new VerificationPacket;
  119. verificationpacket->Create(blockcount);
  120. // Create the diskfile object
  121. diskfile = new DiskFile;
  122. diskfile->SetBlockCount(blockcount);
  123. // Open the source file
  124. if (!diskfile->Open(diskfilename, filesize))
  125. return false;
  126. // Do we want to defer the computation of the full file hash, and
  127. // the block crc and hashes. This is only permitted if there
  128. // is sufficient memory available to create all recovery blocks
  129. // in one pass of the source files (i.e. chunksize == blocksize)
  130. if (deferhashcomputation)
  131. {
  132. // Initialise a buffer to read the first 16k of the source file
  133. size_t buffersize = 16 * 1024;
  134. if (buffersize > filesize)
  135. buffersize = (size_t)filesize;
  136. char *buffer = new char[buffersize];
  137. // Read the data from the file
  138. if (!cl->GetCreateDummyParFiles() && !diskfile->Read(0, buffer, buffersize))
  139. {
  140. diskfile->Close();
  141. delete [] buffer;
  142. return false;
  143. }
  144. // Compute the hash of the data read from the file
  145. MD5Context context;
  146. context.Update(buffer, buffersize);
  147. delete [] buffer;
  148. MD5Hash hash;
  149. context.Final(hash);
  150. // Store the hash in the descriptionpacket and compute the file id
  151. descriptionpacket->Hash16k(hash);
  152. // Compute the fileid and store it in the verification packet.
  153. descriptionpacket->ComputeFileId();
  154. verificationpacket->FileId(descriptionpacket->FileId());
  155. // Allocate an MD5 context for computing the file hash
  156. // during the recovery data generation phase
  157. contextfull = new MD5Context;
  158. }
  159. else
  160. {
  161. // Initialise a buffer to read the source file
  162. size_t buffersize = 1024*1024;
  163. if (buffersize > min(blocksize,filesize))
  164. buffersize = (size_t)min(blocksize,filesize);
  165. char *buffer = new char[buffersize];
  166. // Get ready to start reading source file to compute the hashes and crcs
  167. u64 offset = 0;
  168. u32 blocknumber = 0;
  169. u64 need = blocksize;
  170. MD5Context filecontext;
  171. MD5Context blockcontext;
  172. u32 blockcrc = 0;
  173. // Whilst we have not reached the end of the file
  174. for (size_t want; offset < filesize; offset += want)
  175. {
  176. // Work out how much we can read
  177. want = (size_t)min(filesize-offset, (u64)buffersize);
  178. // Read some data from the file into the buffer
  179. if (!cl->GetCreateDummyParFiles() && !diskfile->Read(offset, buffer, want))
  180. {
  181. diskfile->Close();
  182. delete [] buffer;
  183. return false;
  184. }
  185. // If the new data passes the 16k boundary, compute the 16k hash for the file
  186. if (offset < 16384 && offset + want >= 16384)
  187. {
  188. filecontext.Update(buffer, (size_t)(16384-offset));
  189. MD5Context temp = filecontext;
  190. MD5Hash hash;
  191. temp.Final(hash);
  192. // Store the 16k hash in the file description packet
  193. descriptionpacket->Hash16k(hash);
  194. if (offset + want > 16384)
  195. {
  196. filecontext.Update(&buffer[16384-offset], (size_t)(offset+want)-16384);
  197. }
  198. }
  199. else
  200. {
  201. filecontext.Update(buffer, want);
  202. }
  203. // Get ready to update block hashes and crcs
  204. u32 used = 0;
  205. // Whilst we have not used all of the data we just read
  206. while (used < want)
  207. {
  208. // How much of it can we use for the current block
  209. u32 use = (u32)min(need, (u64)(want-used));
  210. blockcrc = ~0 ^ CRCUpdateBlock(~0 ^ blockcrc, use, &buffer[used]);
  211. blockcontext.Update(&buffer[used], use);
  212. used += use;
  213. need -= use;
  214. // Have we finished the current block
  215. if (need == 0)
  216. {
  217. MD5Hash blockhash;
  218. blockcontext.Final(blockhash);
  219. // Store the block hash and block crc in the file verification packet.
  220. verificationpacket->SetBlockHashAndCRC(blocknumber, blockhash, blockcrc);
  221. blocknumber++;
  222. // More blocks
  223. if (blocknumber < blockcount)
  224. {
  225. need = blocksize;
  226. blockcontext.Reset();
  227. blockcrc = 0;
  228. }
  229. }
  230. }
  231. if (noiselevel > CommandLine::nlQuiet)
  232. {
  233. #if WANT_CONCURRENT_PAR2_FILE_OPENING
  234. tbb::tick_count now = tbb::tick_count::now();
  235. if ((now - last_cout).seconds() >= 0.1) { // only update every 0.1 seconds
  236. #endif
  237. // Display progress
  238. u32 oldfraction = (u32)(1000 * offset / filesize);
  239. u32 newfraction = (u32)(1000 * (offset + want) / filesize);
  240. if (oldfraction != newfraction) {
  241. #if WANT_CONCURRENT_PAR2_FILE_OPENING
  242. last_cout = now;
  243. tbb::mutex::scoped_lock l(cout_mutex);
  244. #endif
  245. cout << newfraction/10 << '.' << newfraction%10 << "%\r" << flush;
  246. }
  247. #if WANT_CONCURRENT_PAR2_FILE_OPENING
  248. }
  249. #endif
  250. }
  251. }
  252. // Did we finish the last block
  253. if (need > 0)
  254. {
  255. blockcrc = ~0 ^ CRCUpdateBlock(~0 ^ blockcrc, (size_t)need);
  256. blockcontext.Update((size_t)need);
  257. MD5Hash blockhash;
  258. blockcontext.Final(blockhash);
  259. // Store the block hash and block crc in the file verification packet.
  260. verificationpacket->SetBlockHashAndCRC(blocknumber, blockhash, blockcrc);
  261. blocknumber++;
  262. need = 0;
  263. }
  264. // Finish computing the file hash.
  265. MD5Hash filehash;
  266. filecontext.Final(filehash);
  267. // Store the file hash in the file description packet.
  268. descriptionpacket->HashFull(filehash);
  269. // Did we compute the 16k hash.
  270. if (offset < 16384)
  271. {
  272. // Store the 16k hash in the file description packet.
  273. descriptionpacket->Hash16k(filehash);
  274. }
  275. delete [] buffer;
  276. // Compute the fileid and store it in the verification packet.
  277. descriptionpacket->ComputeFileId();
  278. verificationpacket->FileId(descriptionpacket->FileId());
  279. }
  280. return true;
  281. }
  282. void Par2CreatorSourceFile::Close(void)
  283. {
  284. diskfile->Close();
  285. }
  286. void Par2CreatorSourceFile::RecordCriticalPackets(list<CriticalPacket*> &criticalpackets)
  287. {
  288. // Add the file description packet and file verification packet to
  289. // the critical packet list.
  290. criticalpackets.push_back(descriptionpacket);
  291. criticalpackets.push_back(verificationpacket);
  292. }
  293. bool Par2CreatorSourceFile::CompareLess(const Par2CreatorSourceFile* const &left, const Par2CreatorSourceFile* const &right)
  294. {
  295. // Sort source files based on fileid
  296. return left->descriptionpacket->FileId() < right->descriptionpacket->FileId();
  297. }
  298. const MD5Hash& Par2CreatorSourceFile::FileId(void) const
  299. {
  300. // Get the file id hash
  301. return descriptionpacket->FileId();
  302. }
  303. void Par2CreatorSourceFile::InitialiseSourceBlocks(vector<DataBlock>::iterator &sourceblock, u64 blocksize)
  304. {
  305. for (u32 blocknum=0; blocknum<blockcount; blocknum++)
  306. {
  307. // Configure each source block to an appropriate offset and length within the source file.
  308. sourceblock->SetLocation(diskfile, // file
  309. blocknum * blocksize); // offset
  310. sourceblock->SetLength(min(blocksize, filesize - (u64)blocknum * blocksize)); // length
  311. sourceblock++;
  312. }
  313. }
  314. void Par2CreatorSourceFile::UpdateHashes(u32 blocknumber, const void *buffer, size_t length)
  315. {
  316. // Compute the crc and hash of the data
  317. u32 blockcrc = ~0 ^ CRCUpdateBlock(~0, length, buffer);
  318. MD5Context blockcontext;
  319. blockcontext.Update(buffer, length);
  320. MD5Hash blockhash;
  321. blockcontext.Final(blockhash);
  322. // Store the results in the verification packet
  323. verificationpacket->SetBlockHashAndCRC(blocknumber, blockhash, blockcrc);
  324. // Update the full file hash, but don't go beyond the end of the file
  325. #if 1 // 20070926 - bugfix for bad MD5 hashes when input file is >= 4GB in size
  326. const u64 len = filesize - (u64) blocknumber * (u64) length;
  327. if ((u64) length > len)
  328. length = (size_t) len; // cast is safe because len must be <= 0xFFFFFFFF
  329. #else
  330. if (length > filesize - blocknumber * length)
  331. {
  332. length = (size_t)(filesize - blocknumber * (u64)length);
  333. }
  334. #endif
  335. assert(contextfull != 0);
  336. contextfull->Update(buffer, length);
  337. }
  338. void Par2CreatorSourceFile::FinishHashes(void)
  339. {
  340. assert(contextfull != 0);
  341. // Finish computation of the full file hash
  342. MD5Hash hash;
  343. contextfull->Final(hash);
  344. // Store it in the description packet
  345. descriptionpacket->HashFull(hash);
  346. #if 0 // 20070926 - used to debug above bug:
  347. cout << "final MD5 hash for file '" << diskfilename << "' is " << hash << endl;
  348. #endif
  349. }