PageRenderTime 44ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 1ms

/system/jlib/jfcmp.hpp

http://github.com/hpcc-systems/HPCC-Platform
C++ Header | 477 lines | 420 code | 40 blank | 17 comment | 52 complexity | 4ff418599681e09e4898fb8d28657a23 MD5 | raw file
Possible License(s): BSD-3-Clause, LGPL-2.0, MIT
  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2015 HPCC Systems®.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #include "platform.h"
  14. #include "jlzw.hpp"
  15. #define COMMITTED ((size32_t)-1)
  16. #define FCMP_BUFFER_SIZE (0x100000)
  17. class jlib_decl CFcmpCompressor : public CSimpleInterfaceOf<ICompressor>
  18. {
  19. protected:
  20. size32_t blksz;
  21. size32_t bufalloc;
  22. MemoryBuffer inma; // equals blksize len
  23. MemoryBuffer *outBufMb; // used when dynamic output buffer (when open() used)
  24. size32_t outBufStart;
  25. byte *inbuf;
  26. size32_t inmax; // remaining
  27. size32_t inlen;
  28. size32_t inlenblk; // set to COMMITTED when so
  29. bool trailing;
  30. byte *outbuf;
  31. size32_t outlen;
  32. size32_t wrmax;
  33. size32_t dynamicOutSz;
  34. virtual void setinmax() = 0;
  35. virtual void flushcommitted() = 0;
  36. void initCommon(size32_t initialSize)
  37. {
  38. blksz = initialSize;
  39. *(size32_t *)outbuf = 0;
  40. outlen = sizeof(size32_t);
  41. inlen = 0;
  42. inlenblk = COMMITTED;
  43. setinmax();
  44. }
  45. public:
  46. CFcmpCompressor()
  47. {
  48. outlen = 0;
  49. outbuf = NULL; // only set on close
  50. bufalloc = 0;
  51. wrmax = 0; // set at open
  52. dynamicOutSz = 0;
  53. outBufMb = NULL;
  54. outBufStart = 0;
  55. inbuf = NULL;
  56. }
  57. virtual ~CFcmpCompressor()
  58. {
  59. if (bufalloc)
  60. free(outbuf);
  61. }
  62. virtual void open(void *buf,size32_t max)
  63. {
  64. if (max<1024)
  65. throw MakeStringException(-1,"CFcmpCompressor::open - block size (%d) not large enough", max);
  66. wrmax = max;
  67. if (buf)
  68. {
  69. if (bufalloc)
  70. free(outbuf);
  71. bufalloc = 0;
  72. outbuf = (byte *)buf;
  73. }
  74. else if (max>bufalloc)
  75. {
  76. if (bufalloc)
  77. free(outbuf);
  78. outbuf = (byte *)malloc(max);
  79. if (!outbuf)
  80. throw MakeStringException(-1,"CFcmpCompressor::open - out of memory, requesting %d bytes", max);
  81. bufalloc = max;
  82. }
  83. outBufMb = NULL;
  84. outBufStart = 0;
  85. dynamicOutSz = 0;
  86. inbuf = (byte *)inma.ensureCapacity(max);
  87. initCommon(max);
  88. }
  89. virtual void open(MemoryBuffer &mb, size32_t initialSize)
  90. {
  91. if (!initialSize)
  92. initialSize = FCMP_BUFFER_SIZE; // 1MB
  93. if (initialSize<1024)
  94. throw MakeStringException(-1,"CFcmpCompressor::open - block size (%d) not large enough", initialSize);
  95. wrmax = initialSize;
  96. if (bufalloc)
  97. {
  98. free(outbuf);
  99. bufalloc = 0;
  100. }
  101. inbuf = (byte *)inma.ensureCapacity(initialSize);
  102. outBufMb = &mb;
  103. outBufStart = mb.length();
  104. outbuf = (byte *)outBufMb->ensureCapacity(initialSize);
  105. dynamicOutSz = outBufMb->capacity();
  106. initCommon(initialSize);
  107. }
  108. virtual void close()
  109. {
  110. if (inlenblk!=COMMITTED)
  111. {
  112. inlen = inlenblk; // transaction failed
  113. inlenblk = COMMITTED;
  114. }
  115. flushcommitted();
  116. size32_t totlen = outlen+sizeof(size32_t)+inlen;
  117. assertex(blksz>=totlen);
  118. size32_t *tsize = (size32_t *)(outbuf+outlen);
  119. *tsize = inlen;
  120. memcpy(tsize+1,inbuf,inlen);
  121. outlen = totlen;
  122. *(size32_t *)outbuf += inlen;
  123. inbuf = NULL;
  124. if (outBufMb)
  125. {
  126. outBufMb->setWritePos(outBufStart+outlen);
  127. outBufMb = NULL;
  128. }
  129. }
  130. size32_t write(const void *buf,size32_t len)
  131. {
  132. // no more than wrmax per write (unless dynamically sizing)
  133. size32_t lenb = wrmax;
  134. byte *b = (byte *)buf;
  135. size32_t written = 0;
  136. while (len)
  137. {
  138. if (len < lenb)
  139. lenb = len;
  140. if (lenb+inlen>inmax)
  141. {
  142. if (trailing)
  143. return written;
  144. flushcommitted();
  145. if (lenb+inlen>inmax)
  146. {
  147. if (outBufMb) // sizing input buffer, but outBufMb!=NULL is condition of whether in use or not
  148. {
  149. blksz += len > FCMP_BUFFER_SIZE ? len : FCMP_BUFFER_SIZE;
  150. verifyex(inma.ensureCapacity(blksz));
  151. blksz = inma.capacity();
  152. inbuf = (byte *)inma.bufferBase();
  153. wrmax = blksz;
  154. setinmax();
  155. }
  156. lenb = inmax-inlen;
  157. if (len < lenb)
  158. lenb = len;
  159. }
  160. }
  161. if (lenb == 0)
  162. return written;
  163. memcpy(inbuf+inlen,b,lenb);
  164. b += lenb;
  165. inlen += lenb;
  166. len -= lenb;
  167. written += lenb;
  168. }
  169. return written;
  170. }
  171. void * bufptr()
  172. {
  173. assertex(!inbuf); // i.e. closed
  174. return outbuf;
  175. }
  176. size32_t buflen()
  177. {
  178. assertex(!inbuf); // i.e. closed
  179. return outlen;
  180. }
  181. void startblock()
  182. {
  183. inlenblk = inlen;
  184. }
  185. void commitblock()
  186. {
  187. inlenblk = COMMITTED;
  188. }
  189. };
  190. class jlib_decl CFcmpExpander : public CSimpleInterfaceOf<IExpander>
  191. {
  192. protected:
  193. byte *outbuf;
  194. size32_t outlen;
  195. size32_t bufalloc;
  196. const size32_t *in;
  197. public:
  198. CFcmpExpander()
  199. {
  200. outbuf = NULL;
  201. outlen = 0;
  202. bufalloc = 0;
  203. }
  204. virtual ~CFcmpExpander()
  205. {
  206. if (bufalloc)
  207. free(outbuf);
  208. }
  209. virtual size32_t init(const void *blk)
  210. {
  211. const size32_t *expsz = (const size32_t *)blk;
  212. outlen = *expsz;
  213. in = (expsz+1);
  214. return outlen;
  215. }
  216. virtual void expand(void *buf)
  217. {
  218. if (!outlen)
  219. return;
  220. if (buf)
  221. {
  222. if (bufalloc)
  223. free(outbuf);
  224. bufalloc = 0;
  225. outbuf = (unsigned char *)buf;
  226. }
  227. else if (outlen>bufalloc)
  228. {
  229. if (bufalloc)
  230. free(outbuf);
  231. bufalloc = outlen;
  232. outbuf = (unsigned char *)malloc(bufalloc);
  233. if (!outbuf)
  234. throw MakeStringException(MSGAUD_operator,0, "Out of memory in FcmpExpander::expand, requesting %d bytes", bufalloc);
  235. }
  236. size32_t done = 0;
  237. for (;;)
  238. {
  239. const size32_t szchunk = *in;
  240. in++;
  241. if (szchunk+done<outlen)
  242. {
  243. memcpy((byte *)buf+done, in, szchunk);
  244. size32_t written = szchunk;
  245. done += written;
  246. if (!written||(done>outlen))
  247. throw MakeStringException(0, "FcmpExpander - corrupt data(1) %d %d",written,szchunk);
  248. }
  249. else
  250. {
  251. if (szchunk+done!=outlen)
  252. throw MakeStringException(0, "FcmpExpander - corrupt data(2) %d %d",szchunk,outlen);
  253. memcpy((byte *)buf+done,in,szchunk);
  254. break;
  255. }
  256. in = (const size32_t *)(((const byte *)in)+szchunk);
  257. }
  258. }
  259. virtual void *bufptr() { return outbuf;}
  260. virtual size32_t buflen() { return outlen;}
  261. };
  262. struct FcmpCompressedFileTrailer
  263. {
  264. offset_t zfill1; // must be first
  265. offset_t expandedSize;
  266. __int64 compressedType;
  267. unsigned zfill2; // must be last
  268. };
  269. class CFcmpStream : public CSimpleInterfaceOf<IFileIOStream>
  270. {
  271. protected:
  272. Linked<IFileIO> baseio;
  273. offset_t expOffset; // expanded offset
  274. offset_t cmpOffset; // compressed offset in file
  275. bool reading;
  276. MemoryAttr ma;
  277. size32_t bufsize;
  278. size32_t bufpos; // reading only
  279. offset_t expSize;
  280. __int64 compType;
  281. public:
  282. CFcmpStream()
  283. {
  284. expOffset = 0;
  285. cmpOffset = 0;
  286. reading = true;
  287. bufpos = 0;
  288. bufsize = 0;
  289. }
  290. virtual ~CFcmpStream() { flush(); }
  291. virtual bool load()
  292. {
  293. bufpos = 0;
  294. bufsize = 0;
  295. if (expOffset==expSize)
  296. return false;
  297. size32_t sz[2];
  298. if (baseio->read(cmpOffset,sizeof(size32_t)*2,&sz)!=sizeof(size32_t)*2)
  299. return false;
  300. bufsize = sz[0];
  301. if (!bufsize)
  302. return false;
  303. cmpOffset += sizeof(size32_t)*2;
  304. if (ma.length()<bufsize)
  305. ma.allocate(bufsize);
  306. MemoryAttr cmpma;
  307. byte *cmpbuf = (byte *)cmpma.allocate(sz[1]);
  308. if (baseio->read(cmpOffset,sz[1],cmpbuf)!=sz[1])
  309. throw MakeStringException(-1,"CFcmpStream: file corrupt.1");
  310. memcpy(ma.bufferBase(), cmpbuf, sz[1]);
  311. size32_t amnt = sz[1];
  312. if (amnt!=bufsize)
  313. throw MakeStringException(-1,"CFcmpStream: file corrupt.2");
  314. cmpOffset += sz[1];
  315. return true;
  316. }
  317. virtual void save()
  318. {
  319. if (bufsize)
  320. {
  321. MemoryAttr dstma;
  322. byte *dst = (byte *)dstma.allocate(sizeof(size32_t)*2+bufsize);
  323. memcpy((sizeof(size32_t)*2+dst), ma.get(), bufsize);
  324. size32_t sz = bufsize;
  325. memcpy(dst,&bufsize,sizeof(size32_t));
  326. memcpy(dst+sizeof(size32_t),&sz,sizeof(size32_t));
  327. baseio->write(cmpOffset,sz+sizeof(size32_t)*2,dst);
  328. cmpOffset += sz+sizeof(size32_t)*2;
  329. }
  330. bufsize = 0;
  331. }
  332. virtual bool attach(IFileIO *_baseio)
  333. {
  334. baseio.set(_baseio);
  335. expOffset = 0;
  336. cmpOffset = 0;
  337. reading = true;
  338. bufpos = 0;
  339. bufsize = 0;
  340. FcmpCompressedFileTrailer trailer;
  341. offset_t filesize = baseio->size();
  342. if (filesize<sizeof(trailer))
  343. return false;
  344. baseio->read(filesize-sizeof(trailer),sizeof(trailer),&trailer);
  345. expSize = trailer.expandedSize;
  346. return trailer.compressedType==compType;
  347. }
  348. virtual void create(IFileIO *_baseio)
  349. {
  350. baseio.set(_baseio);
  351. expOffset = 0;
  352. cmpOffset = 0;
  353. reading = false;
  354. bufpos = 0;
  355. bufsize = 0;
  356. ma.allocate(FCMP_BUFFER_SIZE);
  357. expSize = (offset_t)-1;
  358. }
  359. virtual void seek(offset_t pos, IFSmode origin)
  360. {
  361. if ((origin==IFScurrent)&&(pos==0))
  362. return;
  363. if ((origin==IFSbegin)||(pos!=0))
  364. throw MakeStringException(-1,"CFcmpStream seek not supported");
  365. expOffset = 0;
  366. bufpos = 0;
  367. bufsize = 0;
  368. }
  369. virtual offset_t size()
  370. {
  371. return (expSize==(offset_t)-1)?0:expSize;
  372. }
  373. virtual offset_t tell()
  374. {
  375. return expOffset;
  376. }
  377. virtual size32_t read(size32_t len, void * data)
  378. {
  379. if (!reading)
  380. throw MakeStringException(-1,"CFcmpStream read to stream being written");
  381. size32_t ret=0;
  382. while (len)
  383. {
  384. size32_t cpy = bufsize-bufpos;
  385. if (!cpy)
  386. {
  387. if (!load())
  388. break;
  389. cpy = bufsize-bufpos;
  390. }
  391. if (cpy>len)
  392. cpy = len;
  393. memcpy(data,(const byte *)ma.get()+bufpos,cpy);
  394. bufpos += cpy;
  395. len -= cpy;
  396. ret += cpy;
  397. }
  398. expOffset += ret;
  399. return ret;
  400. }
  401. virtual size32_t write(size32_t len, const void * data)
  402. {
  403. if (reading)
  404. throw MakeStringException(-1,"CFcmpStream write to stream being read");
  405. size32_t ret = len;
  406. while (len+bufsize>FCMP_BUFFER_SIZE)
  407. {
  408. size32_t cpy = FCMP_BUFFER_SIZE-bufsize;
  409. memcpy((byte *)ma.bufferBase()+bufsize,data,cpy);
  410. data = (const byte *)data+cpy;
  411. len -= cpy;
  412. bufsize = FCMP_BUFFER_SIZE;
  413. save();
  414. }
  415. memcpy((byte *)ma.bufferBase()+bufsize,data,len);
  416. bufsize += len;
  417. expOffset += len;
  418. return ret;
  419. }
  420. virtual void flush()
  421. {
  422. if (!reading&&(expSize!=expOffset))
  423. {
  424. save();
  425. FcmpCompressedFileTrailer trailer;
  426. memset(&trailer,0,sizeof(trailer));
  427. trailer.compressedType = compType;
  428. trailer.expandedSize = expOffset;
  429. baseio->write(cmpOffset,sizeof(trailer),&trailer);
  430. expSize = expOffset;
  431. }
  432. }
  433. };