144void FDCT32(
int *buf,
int *dest,
int offset,
int oddBlock,
int gb)
147 const int *cptr = dcttab;
148 int a0, a1, a2, a3, a4, a5, a6, a7;
149 int b0, b1, b2, b3, b4, b5, b6, b7;
159 for (i = 0; i < 32; i++)
174 for (i = 4; i > 0; i--) {
175 a0 = buf[0]; a7 = buf[7]; a3 = buf[3]; a4 = buf[4];
176 b0 = a0 + a7; b7 = MULSHIFT32(*cptr++, a0 - a7) << 1;
177 b3 = a3 + a4; b4 = MULSHIFT32(*cptr++, a3 - a4) << 3;
178 a0 = b0 + b3; a3 = MULSHIFT32(*cptr, b0 - b3) << 1;
179 a4 = b4 + b7; a7 = MULSHIFT32(*cptr++, b7 - b4) << 1;
181 a1 = buf[1]; a6 = buf[6]; a2 = buf[2]; a5 = buf[5];
182 b1 = a1 + a6; b6 = MULSHIFT32(*cptr++, a1 - a6) << 1;
183 b2 = a2 + a5; b5 = MULSHIFT32(*cptr++, a2 - a5) << 1;
184 a1 = b1 + b2; a2 = MULSHIFT32(*cptr, b1 - b2) << 2;
185 a5 = b5 + b6; a6 = MULSHIFT32(*cptr++, b6 - b5) << 2;
187 b0 = a0 + a1; b1 = MULSHIFT32(
COS4_0, a0 - a1) << 1;
188 b2 = a2 + a3; b3 = MULSHIFT32(
COS4_0, a3 - a2) << 1;
189 buf[0] = b0; buf[1] = b1;
190 buf[2] = b2 + b3; buf[3] = b3;
192 b4 = a4 + a5; b5 = MULSHIFT32(
COS4_0, a4 - a5) << 1;
193 b6 = a6 + a7; b7 = MULSHIFT32(
COS4_0, a7 - a6) << 1;
195 buf[4] = b4 + b6; buf[5] = b5 + b7;
196 buf[6] = b5 + b6; buf[7] = b7;
203 d = dest + 64*16 + ((offset - oddBlock) & 7) + (oddBlock ? 0 :
VBUF_LENGTH);
204 s = buf[ 0]; d[0] = d[8] = s;
209 s = buf[ 1]; d[0] = d[8] = s; d += 64;
211 tmp = buf[25] + buf[29];
212 s = buf[17] + tmp; d[0] = d[8] = s; d += 64;
213 s = buf[ 9] + buf[13]; d[0] = d[8] = s; d += 64;
214 s = buf[21] + tmp; d[0] = d[8] = s; d += 64;
216 tmp = buf[29] + buf[27];
217 s = buf[ 5]; d[0] = d[8] = s; d += 64;
218 s = buf[21] + tmp; d[0] = d[8] = s; d += 64;
219 s = buf[13] + buf[11]; d[0] = d[8] = s; d += 64;
220 s = buf[19] + tmp; d[0] = d[8] = s; d += 64;
222 tmp = buf[27] + buf[31];
223 s = buf[ 3]; d[0] = d[8] = s; d += 64;
224 s = buf[19] + tmp; d[0] = d[8] = s; d += 64;
225 s = buf[11] + buf[15]; d[0] = d[8] = s; d += 64;
226 s = buf[23] + tmp; d[0] = d[8] = s; d += 64;
229 s = buf[ 7]; d[0] = d[8] = s; d += 64;
230 s = buf[23] + tmp; d[0] = d[8] = s; d += 64;
231 s = buf[15]; d[0] = d[8] = s; d += 64;
232 s = tmp; d[0] = d[8] = s;
235 d = dest + 16 + ((offset - oddBlock) & 7) + (oddBlock ? 0 :
VBUF_LENGTH);
237 s = buf[ 1]; d[0] = d[8] = s; d += 64;
239 tmp = buf[30] + buf[25];
240 s = buf[17] + tmp; d[0] = d[8] = s; d += 64;
241 s = buf[14] + buf[ 9]; d[0] = d[8] = s; d += 64;
242 s = buf[22] + tmp; d[0] = d[8] = s; d += 64;
243 s = buf[ 6]; d[0] = d[8] = s; d += 64;
245 tmp = buf[26] + buf[30];
246 s = buf[22] + tmp; d[0] = d[8] = s; d += 64;
247 s = buf[10] + buf[14]; d[0] = d[8] = s; d += 64;
248 s = buf[18] + tmp; d[0] = d[8] = s; d += 64;
249 s = buf[ 2]; d[0] = d[8] = s; d += 64;
251 tmp = buf[28] + buf[26];
252 s = buf[18] + tmp; d[0] = d[8] = s; d += 64;
253 s = buf[12] + buf[10]; d[0] = d[8] = s; d += 64;
254 s = buf[20] + tmp; d[0] = d[8] = s; d += 64;
255 s = buf[ 4]; d[0] = d[8] = s; d += 64;
257 tmp = buf[24] + buf[28];
258 s = buf[20] + tmp; d[0] = d[8] = s; d += 64;
259 s = buf[ 8] + buf[12]; d[0] = d[8] = s; d += 64;
260 s = buf[16] + tmp; d[0] = d[8] = s;
267 d = dest + 64*16 + ((offset - oddBlock) & 7) + (oddBlock ? 0 :
VBUF_LENGTH);
268 s = d[0];
CLIP_2N(s, 31 - es); d[0] = d[8] = (s << es);
271 for (i = 16; i <= 31; i++) {
272 s = d[0];
CLIP_2N(s, 31 - es); d[0] = d[8] = (s << es); d += 64;
275 d = dest + 16 + ((offset - oddBlock) & 7) + (oddBlock ? 0 :
VBUF_LENGTH);
276 for (i = 15; i >= 0; i--) {
277 s = d[0];
CLIP_2N(s, 31 - es); d[0] = d[8] = (s << es); d += 64;