PageRenderTime 950ms CodeModel.GetById 684ms app.highlight 141ms RepoModel.GetById 121ms app.codeStats 0ms

/Show/avc/itrans.cpp

http://github.com/mbebenita/Broadway
C++ | 307 lines | 243 code | 39 blank | 25 comment | 25 complexity | a076dc897a4fc8b5b74a85a399d44c49 MD5 | raw file
  1/* ------------------------------------------------------------------
  2 * Copyright (C) 1998-2009 PacketVideo
  3 *
  4 * Licensed under the Apache License, Version 2.0 (the "License");
  5 * you may not use this file except in compliance with the License.
  6 * You may obtain a copy of the License at
  7 *
  8 *      http://www.apache.org/licenses/LICENSE-2.0
  9 *
 10 * Unless required by applicable law or agreed to in writing, software
 11 * distributed under the License is distributed on an "AS IS" BASIS,
 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 13 * express or implied.
 14 * See the License for the specific language governing permissions
 15 * and limitations under the License.
 16 * -------------------------------------------------------------------
 17 */
 18#include "avclib_common.h"
 19
 20/* input are in the first 16 elements of block,
 21   output must be in the location specified in Figure 8-6. */
 22/* subclause 8.5.6 */
 23void Intra16DCTrans(int16 *block, int Qq, int Rq)
 24{
 25    int m0, m1, m2, m3;
 26    int j, offset;
 27    int16 *inout;
 28    int scale = dequant_coefres[Rq][0];
 29
 30    inout = block;
 31    for (j = 0; j < 4; j++)
 32    {
 33        m0 = inout[0] + inout[4];
 34        m1 = inout[0] - inout[4];
 35        m2 = inout[8] + inout[12];
 36        m3 = inout[8] - inout[12];
 37
 38
 39        inout[0] = m0 + m2;
 40        inout[4] = m0 - m2;
 41        inout[8] = m1 - m3;
 42        inout[12] = m1 + m3;
 43        inout += 64;
 44    }
 45
 46    inout = block;
 47
 48    if (Qq >= 2)  /* this way should be faster than JM */
 49    {           /* they use (((m4*scale)<<(QPy/6))+2)>>2 for both cases. */
 50        Qq -= 2;
 51        for (j = 0; j < 4; j++)
 52        {
 53            m0 = inout[0] + inout[64];
 54            m1 = inout[0] - inout[64];
 55            m2 = inout[128] + inout[192];
 56            m3 = inout[128] - inout[192];
 57
 58            inout[0] = ((m0 + m2) * scale) << Qq;
 59            inout[64] = ((m0 - m2) * scale) << Qq;
 60            inout[128] = ((m1 - m3) * scale) << Qq;
 61            inout[192] = ((m1 + m3) * scale) << Qq;
 62            inout += 4;
 63        }
 64    }
 65    else
 66    {
 67        Qq = 2 - Qq;
 68        offset = 1 << (Qq - 1);
 69
 70        for (j = 0; j < 4; j++)
 71        {
 72            m0 = inout[0] + inout[64];
 73            m1 = inout[0] - inout[64];
 74            m2 = inout[128] + inout[192];
 75            m3 = inout[128] - inout[192];
 76
 77            inout[0] = (((m0 + m2) * scale + offset) >> Qq);
 78            inout[64] = (((m0 - m2) * scale + offset) >> Qq);
 79            inout[128] = (((m1 - m3) * scale + offset) >> Qq);
 80            inout[192] = (((m1 + m3) * scale + offset) >> Qq);
 81            inout += 4;
 82        }
 83    }
 84
 85    return ;
 86}
 87
 88/* see subclase 8.5.8 */
 89void itrans(int16 *block, uint8 *pred, uint8 *cur, int width)
 90{
 91    int e0, e1, e2, e3; /* note, at every step of the calculation, these values */
 92    /* shall never exceed 16bit sign value, but we don't check */
 93    int i;           /* to save the cycles. */
 94    int16 *inout;
 95
 96    inout = block;
 97
 98    for (i = 4; i > 0; i--)
 99    {
100        e0 = inout[0] + inout[2];
101        e1 = inout[0] - inout[2];
102        e2 = (inout[1] >> 1) - inout[3];
103        e3 = inout[1] + (inout[3] >> 1);
104
105        inout[0] = e0 + e3;
106        inout[1] = e1 + e2;
107        inout[2] = e1 - e2;
108        inout[3] = e0 - e3;
109
110        inout += 16;
111    }
112
113    for (i = 4; i > 0; i--)
114    {
115        e0 = block[0] + block[32];
116        e1 = block[0] - block[32];
117        e2 = (block[16] >> 1) - block[48];
118        e3 = block[16] + (block[48] >> 1);
119
120        e0 += e3;
121        e3 = (e0 - (e3 << 1)); /* e0-e3 */
122        e1 += e2;
123        e2 = (e1 - (e2 << 1)); /* e1-e2 */
124        e0 += 32;
125        e1 += 32;
126        e2 += 32;
127        e3 += 32;
128#ifdef USE_PRED_BLOCK
129        e0 = pred[0] + (e0 >> 6);
130        if ((uint)e0 > 0xFF)   e0 = 0xFF & (~(e0 >> 31));  /* clip */
131        e1 = pred[20] + (e1 >> 6);
132        if ((uint)e1 > 0xFF)   e1 = 0xFF & (~(e1 >> 31));  /* clip */
133        e2 = pred[40] + (e2 >> 6);
134        if ((uint)e2 > 0xFF)   e2 = 0xFF & (~(e2 >> 31));  /* clip */
135        e3 = pred[60] + (e3 >> 6);
136        if ((uint)e3 > 0xFF)   e3 = 0xFF & (~(e3 >> 31));  /* clip */
137        *cur = e0;
138        *(cur += width) = e1;
139        *(cur += width) = e2;
140        cur[width] = e3;
141        cur -= (width << 1);
142        cur++;
143        pred++;
144#else
145        OSCL_UNUSED_ARG(pred);
146
147        e0 = *cur + (e0 >> 6);
148        if ((uint)e0 > 0xFF)   e0 = 0xFF & (~(e0 >> 31));  /* clip */
149        *cur = e0;
150        e1 = *(cur += width) + (e1 >> 6);
151        if ((uint)e1 > 0xFF)   e1 = 0xFF & (~(e1 >> 31));  /* clip */
152        *cur = e1;
153        e2 = *(cur += width) + (e2 >> 6);
154        if ((uint)e2 > 0xFF)   e2 = 0xFF & (~(e2 >> 31));  /* clip */
155        *cur = e2;
156        e3 = cur[width] + (e3 >> 6);
157        if ((uint)e3 > 0xFF)   e3 = 0xFF & (~(e3 >> 31));  /* clip */
158        cur[width] = e3;
159        cur -= (width << 1);
160        cur++;
161#endif
162        block++;
163    }
164
165    return ;
166}
167
168/* see subclase 8.5.8 */
169void ictrans(int16 *block, uint8 *pred, uint8 *cur, int width)
170{
171    int e0, e1, e2, e3; /* note, at every step of the calculation, these values */
172    /* shall never exceed 16bit sign value, but we don't check */
173    int i;           /* to save the cycles. */
174    int16 *inout;
175
176    inout = block;
177
178    for (i = 4; i > 0; i--)
179    {
180        e0 = inout[0] + inout[2];
181        e1 = inout[0] - inout[2];
182        e2 = (inout[1] >> 1) - inout[3];
183        e3 = inout[1] + (inout[3] >> 1);
184
185        inout[0] = e0 + e3;
186        inout[1] = e1 + e2;
187        inout[2] = e1 - e2;
188        inout[3] = e0 - e3;
189
190        inout += 16;
191    }
192
193    for (i = 4; i > 0; i--)
194    {
195        e0 = block[0] + block[32];
196        e1 = block[0] - block[32];
197        e2 = (block[16] >> 1) - block[48];
198        e3 = block[16] + (block[48] >> 1);
199
200        e0 += e3;
201        e3 = (e0 - (e3 << 1)); /* e0-e3 */
202        e1 += e2;
203        e2 = (e1 - (e2 << 1)); /* e1-e2 */
204        e0 += 32;
205        e1 += 32;
206        e2 += 32;
207        e3 += 32;
208#ifdef USE_PRED_BLOCK
209        e0 = pred[0] + (e0 >> 6);
210        if ((uint)e0 > 0xFF)   e0 = 0xFF & (~(e0 >> 31));  /* clip */
211        e1 = pred[12] + (e1 >> 6);
212        if ((uint)e1 > 0xFF)   e1 = 0xFF & (~(e1 >> 31));  /* clip */
213        e2 = pred[24] + (e2 >> 6);
214        if ((uint)e2 > 0xFF)   e2 = 0xFF & (~(e2 >> 31));  /* clip */
215        e3 = pred[36] + (e3 >> 6);
216        if ((uint)e3 > 0xFF)   e3 = 0xFF & (~(e3 >> 31));  /* clip */
217        *cur = e0;
218        *(cur += width) = e1;
219        *(cur += width) = e2;
220        cur[width] = e3;
221        cur -= (width << 1);
222        cur++;
223        pred++;
224#else
225        OSCL_UNUSED_ARG(pred);
226
227        e0 = *cur + (e0 >> 6);
228        if ((uint)e0 > 0xFF)   e0 = 0xFF & (~(e0 >> 31));  /* clip */
229        *cur = e0;
230        e1 = *(cur += width) + (e1 >> 6);
231        if ((uint)e1 > 0xFF)   e1 = 0xFF & (~(e1 >> 31));  /* clip */
232        *cur = e1;
233        e2 = *(cur += width) + (e2 >> 6);
234        if ((uint)e2 > 0xFF)   e2 = 0xFF & (~(e2 >> 31));  /* clip */
235        *cur = e2;
236        e3 = cur[width] + (e3 >> 6);
237        if ((uint)e3 > 0xFF)   e3 = 0xFF & (~(e3 >> 31));  /* clip */
238        cur[width] = e3;
239        cur -= (width << 1);
240        cur++;
241#endif
242        block++;
243    }
244
245    return ;
246}
247
248/* see subclause 8.5.7 */
249void ChromaDCTrans(int16 *block, int Qq, int Rq)
250{
251    int c00, c01, c10, c11;
252    int f0, f1, f2, f3;
253    int scale = dequant_coefres[Rq][0];
254
255    c00 = block[0] + block[4];
256    c01 = block[0] - block[4];
257    c10 = block[64] + block[68];
258    c11 = block[64] - block[68];
259
260    f0 = c00 + c10;
261    f1 = c01 + c11;
262    f2 = c00 - c10;
263    f3 = c01 - c11;
264
265    if (Qq >= 1)
266    {
267        Qq -= 1;
268        block[0] = (f0 * scale) << Qq;
269        block[4] = (f1 * scale) << Qq;
270        block[64] = (f2 * scale) << Qq;
271        block[68] = (f3 * scale) << Qq;
272    }
273    else
274    {
275        block[0] = (f0 * scale) >> 1;
276        block[4] = (f1 * scale) >> 1;
277        block[64] = (f2 * scale) >> 1;
278        block[68] = (f3 * scale) >> 1;
279    }
280
281    return ;
282}
283
284
285void copy_block(uint8 *pred, uint8 *cur, int width, int pred_pitch)
286{
287    uint32 temp;
288
289    temp = *((uint32*)pred);
290    pred += pred_pitch;
291    *((uint32*)cur) = temp;
292    cur += width;
293    temp = *((uint32*)pred);
294    pred += pred_pitch;
295    *((uint32*)cur) = temp;
296    cur += width;
297    temp = *((uint32*)pred);
298    pred += pred_pitch;
299    *((uint32*)cur) = temp;
300    cur += width;
301    temp = *((uint32*)pred);
302    *((uint32*)cur) = temp;
303
304    return ;
305}
306
307