/tags/jet3d_dev_msvc2003/source/Engine/JetEngine/Bitmap/Compression/TBlock.c

# · C · 809 lines · 523 code · 130 blank · 156 comment · 47 complexity · 138067608100b6880f8f759cd668a340 MD5 · raw file

  1. /****************************************************************************************/
  2. /* TBLOCK.C */
  3. /* */
  4. /* Author: */
  5. /* Description: */
  6. /* */
  7. /* The contents of this file are subject to the Jet3D Public License */
  8. /* Version 1.02 (the "License"); you may not use this file except in */
  9. /* compliance with the License. You may obtain a copy of the License at */
  10. /* http://www.jet3d.com */
  11. /* */
  12. /* Software distributed under the License is distributed on an "AS IS" */
  13. /* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See */
  14. /* the License for the specific language governing rights and limitations */
  15. /* under the License. */
  16. /* */
  17. /* The Original Code is Jet3D, released December 12, 1999. */
  18. /* Copyright (C) 1996-1999 Eclipse Entertainment, L.L.C. All Rights Reserved */
  19. /* */
  20. /****************************************************************************************/
  21. /*{**** BOF ****/
  22. #include "TBlock.h"
  23. #include "codeimage.h"
  24. #include "Tsc.h"
  25. #include "Log.h"
  26. #include "Timer.h"
  27. TIMER_VARS(TBlock_All);
  28. TIMER_VARS(TBlock_Ram);
  29. TIMER_VARS(TBlock_Transpose);
  30. TIMER_VARS(TBlock_H);
  31. TIMER_VARS(TBlock_HB);
  32. TIMER_VARS(TBlock_V);
  33. TIMER_VARS(TBlock_VB);
  34. TIMER_VARS(TBlock_H_SpinUpDown);
  35. TIMER_VARS(TBlock_H_Waver);
  36. TIMER_VARS(TBlock_H_Block);
  37. TIMER_VARS(TBlock_V_UnBlock);
  38. TIMER_VARS(TBlock_V_Waver);
  39. //#define cachetouch_w(x,y)
  40. //#define cachetouch_r(x,y)
  41. /**
  42. with cachetouch disabled:
  43. TBlock_All : 0.039409 : 48.0 %
  44. TBlock_Ram : 0.000285 : 0.3 %
  45. TBlock_Transpose : 0.000000 : 0.0 %
  46. TBlock_H : 0.012152 : 14.8 % // these four numbers vary pretty wildly:
  47. TBlock_HB : 0.009154 : 11.2 %
  48. TBlock_V : 0.013977 : 17.0 %
  49. TBlock_VB : 0.003804 : 4.6 %
  50. with cachetouch:
  51. TBlock_All : 0.043376 : 48.9 %
  52. TBlock_Ram : 0.000291 : 0.3 %
  53. TBlock_Transpose : 0.000000 : 0.0 %
  54. TBlock_H : 0.012161 : 13.7 %
  55. TBlock_HB : 0.010536 : 11.9 %
  56. TBlock_V : 0.016425 : 18.5 %
  57. TBlock_VB : 0.003923 : 4.4 %
  58. **/
  59. typedef struct {
  60. jeWaveletFunc waver;
  61. int * blocks;
  62. int * trows[9];
  63. int stride8;
  64. int ** rows;
  65. } tblockInfo;
  66. #define DO8(x) do { x; x; x; x; x; x; x; x; } while(0)
  67. /*}{**** row <-> block copiers ****/
  68. void __inline rowtoblock(int * bptr,const int *row,int w8)
  69. {
  70. int x8;
  71. for(x8=w8;x8--;)
  72. {
  73. DO8(*bptr++ = *row++);
  74. bptr += 56;
  75. }
  76. }
  77. void __inline blocktorow(int * row,const int *bptr,int x)
  78. {
  79. int x8;
  80. x8 = x>>3;
  81. while(x8--)
  82. {
  83. DO8(*row++ = *bptr++);
  84. bptr += 56;
  85. }
  86. x = x&7;
  87. while(x--)
  88. {
  89. *row++ = *bptr++;
  90. }
  91. }
  92. void __inline blockvtorow(int * row,const int *bptr,int y,int stride8)
  93. {
  94. int y8;
  95. y8 = y>>3;
  96. while(y8--)
  97. {
  98. DO8(*row++ = *bptr; bptr += 8);
  99. bptr += stride8 - 64;
  100. }
  101. y = y&7;
  102. while(y--)
  103. {
  104. *row++ = *bptr; bptr += 8;
  105. }
  106. }
  107. void __inline rowtoblockv(int *bptr,const int *row,int y,int stride8)
  108. {
  109. int y8;
  110. y8 = y>>3;
  111. while(y8--)
  112. {
  113. DO8(*bptr = *row++; bptr += 8);
  114. bptr += stride8 - 64;
  115. }
  116. y = y&7;
  117. while(y--)
  118. {
  119. *bptr = *row++; bptr += 8;
  120. }
  121. }
  122. /***
  123. void rowtoblock8(int * bptr,const int **inrows,int w8)
  124. {
  125. int x8;
  126. int * rows[8];
  127. memcpy(rows,inrows,32);
  128. for(x8=w8;x8--;)
  129. {
  130. DO8(*bptr++ = *rows[0]++);
  131. DO8(*bptr++ = *rows[1]++);
  132. DO8(*bptr++ = *rows[2]++);
  133. DO8(*bptr++ = *rows[3]++);
  134. DO8(*bptr++ = *rows[4]++);
  135. DO8(*bptr++ = *rows[5]++);
  136. DO8(*bptr++ = *rows[6]++);
  137. DO8(*bptr++ = *rows[7]++);
  138. }
  139. }
  140. ***/
  141. void rowtoblock8(int * inbptr,const int **inrows,int w8)
  142. {
  143. uint32 rows[8],bptr;
  144. //__asm { int 3 };
  145. bptr = (uint32)inbptr;
  146. memcpy(rows,inrows,32);
  147. while(w8--)
  148. {
  149. copy32_8((char *)bptr,(char **)rows);
  150. rows[0] += 32; rows[1] += 32; rows[2] += 32; rows[3] += 32;
  151. rows[4] += 32; rows[5] += 32; rows[6] += 32; rows[7] += 32;
  152. bptr += 256;
  153. }
  154. }
  155. void blockvtorow8(int ** rows,const int *bptr,int h,int stride8)
  156. {
  157. int y8;
  158. int y;
  159. int *row0,*row1,*row2,*row3,*row4,*row5,*row6,*row7;
  160. row0 = rows[0]; cachetouch_w(row0,h>>3);
  161. row1 = rows[1]; cachetouch_w(row1,h>>3);
  162. row2 = rows[2]; cachetouch_w(row2,h>>3);
  163. row3 = rows[3]; cachetouch_w(row3,h>>3);
  164. row4 = rows[4]; cachetouch_w(row4,h>>3);
  165. row5 = rows[5]; cachetouch_w(row5,h>>3);
  166. row6 = rows[6]; cachetouch_w(row6,h>>3);
  167. row7 = rows[7]; cachetouch_w(row7,h>>3);
  168. y8 = h>>3;
  169. while(y8--)
  170. {
  171. DO8(*row0++ = bptr[0]; *row1++ = bptr[1]; *row2++ = bptr[2]; *row3++ = bptr[3]; \
  172. *row4++ = bptr[4]; *row5++ = bptr[5]; *row6++ = bptr[6]; *row7++ = bptr[7]; \
  173. bptr += 8; );
  174. bptr += stride8 - 64;
  175. }
  176. y = h&7;
  177. while(y--)
  178. {
  179. *row0++ = *bptr++;
  180. *row1++ = *bptr++;
  181. *row2++ = *bptr++;
  182. *row3++ = *bptr++;
  183. *row4++ = *bptr++;
  184. *row5++ = *bptr++;
  185. *row6++ = *bptr++;
  186. *row7++ = *bptr++;
  187. }
  188. }
  189. void rowtoblockv8(int *bptr,const int ** rows,int h,int stride8)
  190. {
  191. int y8;
  192. int y;
  193. const int *row0,*row1,*row2,*row3,*row4,*row5,*row6,*row7;
  194. row0 = rows[0]; row1 = rows[1];
  195. row2 = rows[2]; row3 = rows[3];
  196. row4 = rows[4]; row5 = rows[5];
  197. row6 = rows[6]; row7 = rows[7];
  198. y8 = h>>3;
  199. while(y8--)
  200. {
  201. cachetouch_w(bptr,1);
  202. DO8(bptr[0] = *row0++; bptr[1] = *row1++; bptr[2] = *row2++; bptr[3] = *row3++; \
  203. bptr[4] = *row4++; bptr[5] = *row5++; bptr[6] = *row6++; bptr[7] = *row7++; \
  204. bptr += 8; );
  205. bptr += stride8 - 64;
  206. }
  207. y = h&7;
  208. while(y--)
  209. {
  210. bptr[0] = *row0++;
  211. bptr[1] = *row1++;
  212. bptr[2] = *row2++;
  213. bptr[3] = *row3++;
  214. bptr[4] = *row4++;
  215. bptr[5] = *row5++;
  216. bptr[6] = *row6++;
  217. bptr[7] = *row7++;
  218. bptr += 8;
  219. }
  220. }
  221. /*}{**** transformers ; row <-> block **************/
  222. void untH(int starty,int endy,int w,tblockInfo * tbi)
  223. {
  224. int *workrow;
  225. int y8,yi,y,w8;
  226. int * bptr;
  227. int stride8,**rows,*blocks;
  228. jeWaveletFunc waver;
  229. TIMER_P(TBlock_H);
  230. waver = tbi->waver;
  231. stride8 = tbi->stride8;
  232. blocks = tbi->blocks;
  233. rows = tbi->rows;
  234. // (row+row) -> (brow)
  235. // <> we should make a roll-8 version, but then we need
  236. // a spin-up and a spin-down loop
  237. w8 = (w+7)>>3;
  238. y8 = (starty>>3);
  239. yi = (starty&7);
  240. bptr = blocks + stride8*y8 + 8*yi;
  241. for(y=starty;y<endy;y++)
  242. {
  243. workrow = rows[y-1];
  244. waver(workrow,rows[y],w); // workrow <- rows[y]
  245. rowtoblock(bptr,workrow,w8); // block <- workrow
  246. bptr += 8; // point to next line in blocks !
  247. yi++;
  248. if ( yi == 8 )
  249. {
  250. yi = 0; y8 ++;
  251. bptr = blocks + stride8*y8;
  252. }
  253. assert(y8 == ((y+1)>>3));
  254. }
  255. TIMER_Q(TBlock_H);
  256. }
  257. void untH2(int starty,int endy,int w,tblockInfo * tbi)
  258. {
  259. int y8,nexty,y,w8,i;
  260. int * bptr;
  261. int stride8,**rows,*blocks;
  262. jeWaveletFunc waver;
  263. TIMER_P(TBlock_H);
  264. waver = tbi->waver;
  265. stride8 = tbi->stride8;
  266. blocks = tbi->blocks;
  267. rows = tbi->rows;
  268. // (row+row) -> (brow)
  269. // a roll-8 version
  270. // with a spin-up and a spin-down loop
  271. y = starty;
  272. w8 = (w+7)>>3;
  273. bptr = blocks + stride8*(starty>>3) + 8*(starty&7);
  274. nexty = ((starty+7)&(~7));
  275. if ( nexty > endy ) nexty = endy;
  276. TIMER_P(TBlock_H_SpinUpDown);
  277. for(;y<nexty;y++)
  278. {
  279. waver(rows[y-1],rows[y],w); // workrow <- rows[y]
  280. rowtoblock(bptr,rows[y-1],w8); // block <- workrow
  281. bptr += 8; // point to next line in blocks !
  282. }
  283. TIMER_Q(TBlock_H_SpinUpDown);
  284. bptr = blocks + stride8*(y>>3);
  285. y8 = (endy - y)>>3;
  286. nexty = y + (y8<<3);
  287. /**
  288. ----
  289. separated :
  290. TBlock_H_Waver : 0.005721 : 6.7 %
  291. TBlock_H_Block : 0.008363 : 9.9 %
  292. ----
  293. TIMER_P(TBlock_H_Waver);
  294. for(i=y;i<nexty;i++)
  295. {
  296. // this is cache optimal ; read a row, then write it
  297. waver(rows[i-1],rows[i],w); // workrow <- rows[y]
  298. }
  299. TIMER_Q(TBlock_H_Waver);
  300. TIMER_P(TBlock_H_Block);
  301. while(y8--)
  302. {
  303. rowtoblock8(bptr,rows + y-1,w8); // blocks <- rows
  304. y += 8;
  305. bptr += stride8;
  306. }
  307. TIMER_Q(TBlock_H_Block);
  308. ----
  309. merged:
  310. TBlock_H_Waver : 0.005636 : 6.8 %
  311. TBlock_H_Block : 0.006693 : 8.1 %
  312. ----
  313. copy32_8 assembly:
  314. TBlock_H_Waver : 0.005728 : 6.4 %
  315. TBlock_H_Block : 0.006458 : 7.2 %
  316. **/
  317. while(y8--)
  318. {
  319. TIMER_P(TBlock_H_Waver);
  320. cachetouch_w(rows[y-1],w8); // this row may not be in cache yet
  321. //cachetouch_r(rows[y],w8);
  322. for(i=0;i<8;i++)
  323. {
  324. // this is cache optimal ; read a row, then write it
  325. waver(rows[y+i-1],rows[y+i],w); // workrow <- rows[y]
  326. }
  327. TIMER_Q(TBlock_H_Waver);
  328. TIMER_P(TBlock_H_Block);
  329. // all rows should be in cache now
  330. rowtoblock8(bptr,(const int **)(rows + y-1),w8); // blocks <- rows
  331. TIMER_Q(TBlock_H_Block);
  332. y += 8;
  333. bptr += stride8;
  334. }
  335. TIMER_P(TBlock_H_SpinUpDown);
  336. for(;y<endy;y++)
  337. {
  338. waver(rows[y-1],rows[y],w); // workrow <- rows[y]
  339. rowtoblock(bptr,rows[y-1],w8); // block <- workrow
  340. bptr += 8; // point to next line in blocks !
  341. }
  342. TIMER_Q(TBlock_H_SpinUpDown);
  343. TIMER_Q(TBlock_H);
  344. }
  345. void untHb(int starty,int endy,int w,tblockInfo * tbi)
  346. {
  347. int *workrow,*row;
  348. int y8,yi,y,w8;
  349. int * bptr;
  350. int stride8,**rows,*blocks;
  351. jeWaveletFunc waver;
  352. TIMER_P(TBlock_HB);
  353. waver = tbi->waver;
  354. stride8 = tbi->stride8;
  355. blocks = tbi->blocks;
  356. rows = tbi->rows;
  357. // (brow+row) -> (block row)
  358. // <> we should make a roll-8 version,
  359. w8 = (w+7)>>3;
  360. y8 = (starty>>3);
  361. yi = (starty&7);
  362. bptr = blocks + stride8*y8 + 8*yi;
  363. for(y=starty;y<endy;y++)
  364. {
  365. row = rows[y];
  366. workrow = rows[y-1];
  367. cachetouch_w(row,w8);
  368. blocktorow(row,bptr,w>>1); // get the LL out of blocks
  369. // the LH part is already in row[]
  370. waver(workrow,row,w); // workrow <- row ; write to the row we just read from
  371. rowtoblock(bptr,workrow,w8);// block <- workrow ; back in the blocks
  372. yi++;
  373. bptr += 8; // point to next line in blocks !
  374. if ( yi == 8 )
  375. {
  376. yi = 0; y8 ++;
  377. bptr = blocks + stride8*y8;
  378. }
  379. assert(y8 == ((y+1)>>3));
  380. }
  381. TIMER_Q(TBlock_HB);
  382. }
  383. void untV2(int w,int h,tblockInfo * tbi)
  384. {
  385. int x8,xi,y;
  386. int * bptr;
  387. int stride8,**rows,*blocks;
  388. jeWaveletFunc waver;
  389. // this is just bad:
  390. // TBlock_V_UnBlock : 0.009621 : 11.4 %
  391. // TBlock_V_Waver : 0.007168 : 8.5 %
  392. TIMER_P(TBlock_V);
  393. waver = tbi->waver;
  394. stride8 = tbi->stride8;
  395. blocks = tbi->blocks;
  396. rows = tbi->rows;
  397. // this is only done once, at the very end
  398. // at this point all our data is in the blocks,
  399. // so we can trash anything in the rows
  400. // (bcolumn) -> (row)
  401. x8 = w>>3;
  402. xi = w&7;
  403. bptr = blocks;
  404. y = -1;
  405. TIMER_P(TBlock_V_UnBlock);
  406. while(x8--)
  407. {
  408. blockvtorow8(rows+y,bptr,h,stride8);
  409. bptr += 64; // step past 8 columns in blocks !
  410. y += 8;
  411. }
  412. TIMER_Q(TBlock_V_UnBlock);
  413. while(xi--)
  414. {
  415. cachetouch_w(rows[y],h>>3);
  416. blockvtorow(rows[y],bptr,h,stride8);
  417. bptr ++; // point to next column in blocks !
  418. y++;
  419. }
  420. TIMER_P(TBlock_V_Waver);
  421. cachetouch_w(rows[w-1],h>>3);
  422. for(y = w - 1;(y>=0);y--)
  423. {
  424. // this is cache-optimal : we read from row (y) then write to row (y)
  425. waver(rows[y],rows[y-1],h);
  426. }
  427. TIMER_Q(TBlock_V_Waver);
  428. TIMER_Q(TBlock_V);
  429. }
  430. void untV3(int w,int h,tblockInfo * tbi)
  431. {
  432. int x8,xi,y,i;
  433. int * bptr;
  434. int stride8,**rows,**trows,*blocks;
  435. jeWaveletFunc waver;
  436. // The Waver is slow cuz we're writing to memory not in cache at all
  437. // on a K7 or P3, we the cachetouch_w fixes everything
  438. // TBlock_V_UnBlock : 0.006368 : 7.4 %
  439. // TBlock_V_Waver : 0.007712 : 9.0 %
  440. TIMER_P(TBlock_V);
  441. waver = tbi->waver;
  442. stride8 = tbi->stride8;
  443. blocks = tbi->blocks;
  444. rows = tbi->rows;
  445. trows = tbi->trows;
  446. // this is only done once, at the very end
  447. // at this point all our data is in the blocks,
  448. // so we can trash anything in the rows
  449. // (bcolumn) -> (row)
  450. x8 = w>>3;
  451. xi = w&7;
  452. bptr = blocks;
  453. y = 0;
  454. while(x8--)
  455. {
  456. TIMER_P(TBlock_V_UnBlock);
  457. blockvtorow8(trows,bptr,h,stride8);
  458. TIMER_Q(TBlock_V_UnBlock);
  459. TIMER_P(TBlock_V_Waver);
  460. for(i=0;i<8;i++)
  461. {
  462. cachetouch_w(rows[y+i],h>>3);
  463. waver(rows[y+i],trows[i],h);
  464. }
  465. TIMER_Q(TBlock_V_Waver);
  466. bptr += 64; // step past 8 columns in blocks !
  467. y += 8;
  468. }
  469. cachetouch_w(trows[0],h>>3);
  470. while(xi--)
  471. {
  472. blockvtorow(trows[0],bptr,h,stride8);
  473. waver(rows[y],trows[0],h);
  474. bptr ++; // point to next column in blocks !
  475. y++;
  476. }
  477. TIMER_Q(TBlock_V);
  478. }
  479. void untV4(int w,int h,tblockInfo * tbi)
  480. {
  481. int x8,xi,y,i;
  482. int * bptr;
  483. int stride8,**rows,*workrow,*blocks;
  484. int *zrows[8];
  485. jeWaveletFunc waver;
  486. // well, we sped up the Waver, but the UnBlock still hurts
  487. // TBlock_V_UnBlock : 0.009015 : 11.1 %
  488. // TBlock_V_Waver : 0.004652 : 5.7 %
  489. TIMER_P(TBlock_V);
  490. waver = tbi->waver;
  491. stride8 = tbi->stride8;
  492. blocks = tbi->blocks;
  493. rows = tbi->rows;
  494. workrow = tbi->rows[-1];
  495. // this is only done once, at the very end
  496. // at this point all our data is in the blocks,
  497. // so we can trash anything in the rows
  498. // (bcolumn) -> (row)
  499. x8 = w>>3;
  500. xi = w&7;
  501. bptr = blocks;
  502. y = 0;
  503. zrows[0] = workrow;
  504. cachetouch_w(workrow,h>>3);
  505. while(x8--)
  506. {
  507. for(i=1;i<8;i++)
  508. zrows[i] = rows[y+i-1];
  509. TIMER_P(TBlock_V_UnBlock);
  510. blockvtorow8(zrows,bptr,h,stride8);
  511. TIMER_Q(TBlock_V_UnBlock);
  512. TIMER_P(TBlock_V_Waver);
  513. for(i=7;i>=0;i--)
  514. {
  515. cachetouch_w(rows[y+i],h>>3);
  516. // write to i, read from (i-1), step backwards; this is cache-optimal
  517. waver(rows[y+i],zrows[i],h);
  518. }
  519. TIMER_Q(TBlock_V_Waver);
  520. bptr += 64; // step past 8 columns in blocks !
  521. y += 8;
  522. }
  523. cachetouch_w(workrow,h>>3);
  524. while(xi--)
  525. {
  526. blockvtorow(workrow,bptr,h,stride8);
  527. waver(rows[y],workrow,h);
  528. bptr ++; // point to next column in blocks !
  529. y++;
  530. }
  531. TIMER_Q(TBlock_V);
  532. }
  533. void untVb3(int w,int h,tblockInfo * tbi)
  534. {
  535. int x8,xi,y;
  536. int * bptr;
  537. int stride8,**trows,*blocks;
  538. jeWaveletFunc waver;
  539. TIMER_P(TBlock_VB);
  540. waver = tbi->waver;
  541. stride8 = tbi->stride8;
  542. blocks = tbi->blocks;
  543. trows = tbi->trows;
  544. // (bcolumn) -> (bcolumn)
  545. // read the whole set of bcolumns out to rows,
  546. // then wavelet all the rows
  547. // then read 'em back to bcolumns
  548. x8 = w>>3;
  549. bptr = blocks;
  550. while(x8--)
  551. {
  552. // read 8 columns out to rows
  553. blockvtorow8(trows+1,bptr,h,stride8);
  554. // wave 'em, shifting down one
  555. cachetouch_w(trows[0],h>>3);
  556. for(y=1;y<9;y++)
  557. waver(trows[y-1],trows[y],h);
  558. // now put 'em back in blocks :
  559. rowtoblockv8(bptr,(const int **)trows,h,stride8);
  560. bptr += 64; // step past 8 columns in blocks !
  561. }
  562. // spin down:
  563. xi = w&7;
  564. cachetouch_w(trows[1],h>>3);
  565. for(y=0;y<xi;y++)
  566. {
  567. blockvtorow(trows[0],bptr,h,stride8);
  568. waver(trows[1],trows[0],h);
  569. rowtoblockv(bptr,trows[1],h,stride8);
  570. bptr ++; // point to next column in blocks !
  571. }
  572. TIMER_Q(TBlock_VB);
  573. }
  574. /*}{*** IT ********/
  575. void untransformBlocked(image *im,int levels,jeWaveletFunc waver,jeBoolean doLHs)
  576. {
  577. int p,l;
  578. tblockInfo tbi;
  579. int * blocks;
  580. int width8,height8,stride8,w,h;
  581. int ** rows;
  582. int ** trows;
  583. int imw,imh,ims;
  584. Log_Printf("Doing untransformBlocked\n");
  585. TIMER_P(TBlock_All);
  586. imw = im->width;
  587. imh = im->height;
  588. ims = im->stride;
  589. width8 = (imw + 7)>>3;
  590. height8 = (imh + 7)>>3;
  591. stride8 = (((ims + 7)>>3)<<6) + 3;
  592. w = (ims + 7)>>3;
  593. h = w<<3;
  594. TIMER_P(TBlock_Ram);
  595. blocks = (int *)jeRam_Allocate(sizeof(int)*(stride8*w + 9*h));
  596. assert(blocks);
  597. TIMER_Q(TBlock_Ram);
  598. trows = tbi.trows;
  599. trows[0] = blocks + stride8*w;
  600. for(l=1;l<9;l++)
  601. trows[l] = trows[l-1] + h;
  602. tbi.blocks = blocks;
  603. tbi.stride8 = stride8;
  604. tbi.waver = waver;
  605. pushTSC();
  606. for(p=0;p<(im->planes);p++)
  607. {
  608. rows = im->data[p];
  609. tbi.rows = rows;
  610. for (l = levels-1; l >= 0; l--)
  611. {
  612. w = imw >> l;
  613. h = imh >> l;
  614. /* untransform into blocks */
  615. //<> seems a shame not to use the blocks to transpose
  616. if ( doLHs )
  617. {
  618. TIMER_P(TBlock_Transpose);
  619. transposeHL(im,p,l);
  620. TIMER_Q(TBlock_Transpose);
  621. }
  622. if ( l == (levels - 1) )
  623. {
  624. untH2(0,h,w,&tbi);
  625. }
  626. else
  627. {
  628. untHb(0,h>>1,w,&tbi);
  629. untH2(h>>1,h,w,&tbi);
  630. }
  631. /* Columns */
  632. if ( l == 0 )
  633. {
  634. untV4(w,h,&tbi);
  635. }
  636. else
  637. {
  638. untVb3(w,h,&tbi);
  639. }
  640. assert(jeRam_IsValidPtr(blocks));
  641. }
  642. }
  643. showPopTSC("untrans blocked");
  644. TIMER_P(TBlock_Ram);
  645. jeRam_Free(blocks);
  646. TIMER_Q(TBlock_Ram);
  647. // we did a transpose !
  648. swapints(im->width,im->height);
  649. TIMER_Q(TBlock_All);
  650. }
  651. void TBlock_DoReport(void)
  652. {
  653. TIMER_REPORT(TBlock_All);
  654. TIMER_REPORT(TBlock_Ram);
  655. TIMER_REPORT(TBlock_Transpose);
  656. TIMER_REPORT(TBlock_H);
  657. TIMER_REPORT(TBlock_HB);
  658. TIMER_REPORT(TBlock_V);
  659. TIMER_REPORT(TBlock_VB);
  660. TIMER_REPORT(TBlock_V_UnBlock);
  661. TIMER_REPORT(TBlock_V_Waver);
  662. TIMER_REPORT(TBlock_H_SpinUpDown);
  663. TIMER_REPORT(TBlock_H_Waver);
  664. TIMER_REPORT(TBlock_H_Block);
  665. }
  666. /*}*** EOF ********/