PageRenderTime 116ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/Show/avc/itrans.cpp

http://github.com/mbebenita/Broadway
C++ | 307 lines | 243 code | 39 blank | 25 comment | 25 complexity | a076dc897a4fc8b5b74a85a399d44c49 MD5 | raw file
Possible License(s): BSD-3-Clause
  1. /* ------------------------------------------------------------------
  2. * Copyright (C) 1998-2009 PacketVideo
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
  13. * express or implied.
  14. * See the License for the specific language governing permissions
  15. * and limitations under the License.
  16. * -------------------------------------------------------------------
  17. */
  18. #include "avclib_common.h"
  19. /* input are in the first 16 elements of block,
  20. output must be in the location specified in Figure 8-6. */
  21. /* subclause 8.5.6 */
  22. void Intra16DCTrans(int16 *block, int Qq, int Rq)
  23. {
  24. int m0, m1, m2, m3;
  25. int j, offset;
  26. int16 *inout;
  27. int scale = dequant_coefres[Rq][0];
  28. inout = block;
  29. for (j = 0; j < 4; j++)
  30. {
  31. m0 = inout[0] + inout[4];
  32. m1 = inout[0] - inout[4];
  33. m2 = inout[8] + inout[12];
  34. m3 = inout[8] - inout[12];
  35. inout[0] = m0 + m2;
  36. inout[4] = m0 - m2;
  37. inout[8] = m1 - m3;
  38. inout[12] = m1 + m3;
  39. inout += 64;
  40. }
  41. inout = block;
  42. if (Qq >= 2) /* this way should be faster than JM */
  43. { /* they use (((m4*scale)<<(QPy/6))+2)>>2 for both cases. */
  44. Qq -= 2;
  45. for (j = 0; j < 4; j++)
  46. {
  47. m0 = inout[0] + inout[64];
  48. m1 = inout[0] - inout[64];
  49. m2 = inout[128] + inout[192];
  50. m3 = inout[128] - inout[192];
  51. inout[0] = ((m0 + m2) * scale) << Qq;
  52. inout[64] = ((m0 - m2) * scale) << Qq;
  53. inout[128] = ((m1 - m3) * scale) << Qq;
  54. inout[192] = ((m1 + m3) * scale) << Qq;
  55. inout += 4;
  56. }
  57. }
  58. else
  59. {
  60. Qq = 2 - Qq;
  61. offset = 1 << (Qq - 1);
  62. for (j = 0; j < 4; j++)
  63. {
  64. m0 = inout[0] + inout[64];
  65. m1 = inout[0] - inout[64];
  66. m2 = inout[128] + inout[192];
  67. m3 = inout[128] - inout[192];
  68. inout[0] = (((m0 + m2) * scale + offset) >> Qq);
  69. inout[64] = (((m0 - m2) * scale + offset) >> Qq);
  70. inout[128] = (((m1 - m3) * scale + offset) >> Qq);
  71. inout[192] = (((m1 + m3) * scale + offset) >> Qq);
  72. inout += 4;
  73. }
  74. }
  75. return ;
  76. }
  77. /* see subclase 8.5.8 */
  78. void itrans(int16 *block, uint8 *pred, uint8 *cur, int width)
  79. {
  80. int e0, e1, e2, e3; /* note, at every step of the calculation, these values */
  81. /* shall never exceed 16bit sign value, but we don't check */
  82. int i; /* to save the cycles. */
  83. int16 *inout;
  84. inout = block;
  85. for (i = 4; i > 0; i--)
  86. {
  87. e0 = inout[0] + inout[2];
  88. e1 = inout[0] - inout[2];
  89. e2 = (inout[1] >> 1) - inout[3];
  90. e3 = inout[1] + (inout[3] >> 1);
  91. inout[0] = e0 + e3;
  92. inout[1] = e1 + e2;
  93. inout[2] = e1 - e2;
  94. inout[3] = e0 - e3;
  95. inout += 16;
  96. }
  97. for (i = 4; i > 0; i--)
  98. {
  99. e0 = block[0] + block[32];
  100. e1 = block[0] - block[32];
  101. e2 = (block[16] >> 1) - block[48];
  102. e3 = block[16] + (block[48] >> 1);
  103. e0 += e3;
  104. e3 = (e0 - (e3 << 1)); /* e0-e3 */
  105. e1 += e2;
  106. e2 = (e1 - (e2 << 1)); /* e1-e2 */
  107. e0 += 32;
  108. e1 += 32;
  109. e2 += 32;
  110. e3 += 32;
  111. #ifdef USE_PRED_BLOCK
  112. e0 = pred[0] + (e0 >> 6);
  113. if ((uint)e0 > 0xFF) e0 = 0xFF & (~(e0 >> 31)); /* clip */
  114. e1 = pred[20] + (e1 >> 6);
  115. if ((uint)e1 > 0xFF) e1 = 0xFF & (~(e1 >> 31)); /* clip */
  116. e2 = pred[40] + (e2 >> 6);
  117. if ((uint)e2 > 0xFF) e2 = 0xFF & (~(e2 >> 31)); /* clip */
  118. e3 = pred[60] + (e3 >> 6);
  119. if ((uint)e3 > 0xFF) e3 = 0xFF & (~(e3 >> 31)); /* clip */
  120. *cur = e0;
  121. *(cur += width) = e1;
  122. *(cur += width) = e2;
  123. cur[width] = e3;
  124. cur -= (width << 1);
  125. cur++;
  126. pred++;
  127. #else
  128. OSCL_UNUSED_ARG(pred);
  129. e0 = *cur + (e0 >> 6);
  130. if ((uint)e0 > 0xFF) e0 = 0xFF & (~(e0 >> 31)); /* clip */
  131. *cur = e0;
  132. e1 = *(cur += width) + (e1 >> 6);
  133. if ((uint)e1 > 0xFF) e1 = 0xFF & (~(e1 >> 31)); /* clip */
  134. *cur = e1;
  135. e2 = *(cur += width) + (e2 >> 6);
  136. if ((uint)e2 > 0xFF) e2 = 0xFF & (~(e2 >> 31)); /* clip */
  137. *cur = e2;
  138. e3 = cur[width] + (e3 >> 6);
  139. if ((uint)e3 > 0xFF) e3 = 0xFF & (~(e3 >> 31)); /* clip */
  140. cur[width] = e3;
  141. cur -= (width << 1);
  142. cur++;
  143. #endif
  144. block++;
  145. }
  146. return ;
  147. }
  148. /* see subclase 8.5.8 */
  149. void ictrans(int16 *block, uint8 *pred, uint8 *cur, int width)
  150. {
  151. int e0, e1, e2, e3; /* note, at every step of the calculation, these values */
  152. /* shall never exceed 16bit sign value, but we don't check */
  153. int i; /* to save the cycles. */
  154. int16 *inout;
  155. inout = block;
  156. for (i = 4; i > 0; i--)
  157. {
  158. e0 = inout[0] + inout[2];
  159. e1 = inout[0] - inout[2];
  160. e2 = (inout[1] >> 1) - inout[3];
  161. e3 = inout[1] + (inout[3] >> 1);
  162. inout[0] = e0 + e3;
  163. inout[1] = e1 + e2;
  164. inout[2] = e1 - e2;
  165. inout[3] = e0 - e3;
  166. inout += 16;
  167. }
  168. for (i = 4; i > 0; i--)
  169. {
  170. e0 = block[0] + block[32];
  171. e1 = block[0] - block[32];
  172. e2 = (block[16] >> 1) - block[48];
  173. e3 = block[16] + (block[48] >> 1);
  174. e0 += e3;
  175. e3 = (e0 - (e3 << 1)); /* e0-e3 */
  176. e1 += e2;
  177. e2 = (e1 - (e2 << 1)); /* e1-e2 */
  178. e0 += 32;
  179. e1 += 32;
  180. e2 += 32;
  181. e3 += 32;
  182. #ifdef USE_PRED_BLOCK
  183. e0 = pred[0] + (e0 >> 6);
  184. if ((uint)e0 > 0xFF) e0 = 0xFF & (~(e0 >> 31)); /* clip */
  185. e1 = pred[12] + (e1 >> 6);
  186. if ((uint)e1 > 0xFF) e1 = 0xFF & (~(e1 >> 31)); /* clip */
  187. e2 = pred[24] + (e2 >> 6);
  188. if ((uint)e2 > 0xFF) e2 = 0xFF & (~(e2 >> 31)); /* clip */
  189. e3 = pred[36] + (e3 >> 6);
  190. if ((uint)e3 > 0xFF) e3 = 0xFF & (~(e3 >> 31)); /* clip */
  191. *cur = e0;
  192. *(cur += width) = e1;
  193. *(cur += width) = e2;
  194. cur[width] = e3;
  195. cur -= (width << 1);
  196. cur++;
  197. pred++;
  198. #else
  199. OSCL_UNUSED_ARG(pred);
  200. e0 = *cur + (e0 >> 6);
  201. if ((uint)e0 > 0xFF) e0 = 0xFF & (~(e0 >> 31)); /* clip */
  202. *cur = e0;
  203. e1 = *(cur += width) + (e1 >> 6);
  204. if ((uint)e1 > 0xFF) e1 = 0xFF & (~(e1 >> 31)); /* clip */
  205. *cur = e1;
  206. e2 = *(cur += width) + (e2 >> 6);
  207. if ((uint)e2 > 0xFF) e2 = 0xFF & (~(e2 >> 31)); /* clip */
  208. *cur = e2;
  209. e3 = cur[width] + (e3 >> 6);
  210. if ((uint)e3 > 0xFF) e3 = 0xFF & (~(e3 >> 31)); /* clip */
  211. cur[width] = e3;
  212. cur -= (width << 1);
  213. cur++;
  214. #endif
  215. block++;
  216. }
  217. return ;
  218. }
  219. /* see subclause 8.5.7 */
  220. void ChromaDCTrans(int16 *block, int Qq, int Rq)
  221. {
  222. int c00, c01, c10, c11;
  223. int f0, f1, f2, f3;
  224. int scale = dequant_coefres[Rq][0];
  225. c00 = block[0] + block[4];
  226. c01 = block[0] - block[4];
  227. c10 = block[64] + block[68];
  228. c11 = block[64] - block[68];
  229. f0 = c00 + c10;
  230. f1 = c01 + c11;
  231. f2 = c00 - c10;
  232. f3 = c01 - c11;
  233. if (Qq >= 1)
  234. {
  235. Qq -= 1;
  236. block[0] = (f0 * scale) << Qq;
  237. block[4] = (f1 * scale) << Qq;
  238. block[64] = (f2 * scale) << Qq;
  239. block[68] = (f3 * scale) << Qq;
  240. }
  241. else
  242. {
  243. block[0] = (f0 * scale) >> 1;
  244. block[4] = (f1 * scale) >> 1;
  245. block[64] = (f2 * scale) >> 1;
  246. block[68] = (f3 * scale) >> 1;
  247. }
  248. return ;
  249. }
  250. void copy_block(uint8 *pred, uint8 *cur, int width, int pred_pitch)
  251. {
  252. uint32 temp;
  253. temp = *((uint32*)pred);
  254. pred += pred_pitch;
  255. *((uint32*)cur) = temp;
  256. cur += width;
  257. temp = *((uint32*)pred);
  258. pred += pred_pitch;
  259. *((uint32*)cur) = temp;
  260. cur += width;
  261. temp = *((uint32*)pred);
  262. pred += pred_pitch;
  263. *((uint32*)cur) = temp;
  264. cur += width;
  265. temp = *((uint32*)pred);
  266. *((uint32*)cur) = temp;
  267. return ;
  268. }