PS2SDK
PS2 Homebrew Libraries
Loading...
Searching...
No Matches
libmpeg_core_c.c
1/*
2# _____ ___ ____ ___ ____
3# ____| | ____| | | |____|
4# | ___| |____ ___| ____| | \ PS2DEV Open Source Project.
5#-----------------------------------------------------------------------
6# Copyright (c) 2006-2007 Eugene Plotnikov <e-plotnikov@operamail.com>
7# Licenced under Academic Free License version 2.0
8# Review ps2sdk README & LICENSE files for further details.
9# Based on refernce software of MSSG
10*/
11
12#include <stdlib.h>
13#include <stdint.h>
14#include <stdio.h>
15#include <string.h>
16#include <kernel.h>
17#include <ee_regs.h>
18
19#include "libmpeg.h"
20#include "libmpeg_internal.h"
21
22static u8 s_DMAPack[128];
23static u32 s_DataBuf[2];
24static int ( * s_SetDMA_func) ( void* );
25static void * s_SetDMA_arg;
26static u32 s_IPUState[8];
27static int* s_pEOF;
28static int s_Sema;
29static u32 s_CSCParam[3];
30static int s_CSCID;
31static u8 s_CSCFlag;
32
33extern s32 _mpeg_dmac_handler( s32 channel, void *arg, void *addr );
34
35void _MPEG_Initialize ( _MPEGContext* arg0, int ( * arg1) ( void* ), void* arg2, int* arg3)
36{
37 (void)arg0;
38
39 *R_EE_IPU_CTRL = 0x40000000;
40 while ((s32)*R_EE_IPU_CTRL < 0);
41 *R_EE_IPU_CMD = 0;
42 while ((s32)*R_EE_IPU_CTRL < 0);
43 *R_EE_IPU_CTRL |= 0x800000;
44 *R_EE_D3_QWC = 0;
45 *R_EE_D4_QWC = 0;
46 s_SetDMA_func = arg1;
47 s_SetDMA_arg = arg2;
48 s_pEOF = arg3;
49 *s_pEOF = 0;
50 // TODO: check if this is the correct options for the semaphore
51 ee_sema_t sema;
52 memset(&sema, 0, sizeof(sema));
53 sema.init_count = 0;
54 sema.max_count = 1;
55 sema.option = 0;
56 s_Sema = CreateSema(&sema);
57 s_CSCID = AddDmacHandler2(3, _mpeg_dmac_handler, 0, &s_CSCParam);
58 s_DataBuf[0] = 0;
59 s_DataBuf[1] = 0;
60}
61
62void _MPEG_Destroy ( void )
63{
64 while (s_CSCFlag != 0);
65 RemoveDmacHandler(3, s_CSCID);
66 DeleteSema(s_Sema);
67}
68
69void _ipu_suspend ( void )
70{
71 int eie;
72 do
73 {
74 DI();
75 EE_SYNCP();
76 asm volatile ("mfc0\t%0, $12" : "=r" (eie));
77 eie &= 0x10000;
78 }
79 while (eie != 0);
80 *R_EE_D4_CHCR &= ~0x100;
81 *R_EE_D_ENABLEW = *R_EE_D_ENABLER & ~0x100;
82 EI();
83 s_IPUState[0] = *R_EE_D4_CHCR;
84 s_IPUState[1] = *R_EE_D4_MADR;
85 s_IPUState[2] = *R_EE_D4_QWC;
86 while ((*R_EE_IPU_CTRL & 0xf0) != 0);
87 do
88 {
89 DI();
90 EE_SYNCP();
91 asm volatile ("mfc0\t%0, $12" : "=r" (eie));
92 eie &= 0x10000;
93 }
94 while (eie != 0);
95 *R_EE_D3_CHCR &= ~0x100;
96 *R_EE_D_ENABLEW = *R_EE_D_ENABLER & ~0x100;
97 EI();
98 s_IPUState[3] = *R_EE_D3_CHCR;
99 s_IPUState[4] = *R_EE_D3_MADR;
100 s_IPUState[5] = *R_EE_D3_QWC;
101 s_IPUState[6] = *R_EE_IPU_CTRL;
102 s_IPUState[7] = *R_EE_IPU_BP;
103}
104
105void _MPEG_Suspend ( void )
106{
107 while (s_CSCFlag != 0);
108 return _ipu_suspend();
109}
110
111void _ipu_resume ( void )
112{
113 if (s_IPUState[5] != 0)
114 {
115 *R_EE_D3_MADR = s_IPUState[4];
116 *R_EE_D3_QWC = s_IPUState[5];
117 *R_EE_D3_CHCR = s_IPUState[3] | 0x100;
118 }
119 u32 var2 = (s_IPUState[7] >> 0x10 & 3) + (s_IPUState[7] >> 8 & 0xf);
120 u32 var3 = (s_IPUState[2]) + var2;
121 if (var3 != 0)
122 {
123 *R_EE_IPU_CMD = (s_IPUState[7]) & 0x7f;
124 while (((*R_EE_IPU_CTRL) & 0x80000000) != 0);
125 *R_EE_IPU_CTRL = s_IPUState[6];
126 *R_EE_D4_MADR = (s_IPUState[1]) - var2 * 0x10;
127 *R_EE_D4_QWC = var3;
128 *R_EE_D4_CHCR = s_IPUState[0] | 0x100;
129 }
130}
131
132void _MPEG_Resume ( void )
133{
134 return _ipu_resume();
135}
136
137s32 _mpeg_dmac_handler( s32 channel, void *arg, void *addr )
138{
139 (void)channel;
140 (void)addr;
141
142 u32 *carg = arg;
143 u32 var1 = carg[2];
144 if (var1 == 0)
145 {
146 iDisableDmac(3);
147 iSignalSema(s_Sema);
148 s_CSCFlag = 0;
149 return ~0;
150 }
151 u32 var2 = var1;
152 if (0x3fe < (int)var1)
153 {
154 var2 = 0x3ff;
155 }
156 *R_EE_D3_MADR = carg[1];
157 *R_EE_D4_MADR = carg[0];
158 carg[0] += var2 * 0x180;
159 carg[1] += var2 * 0x400;
160 carg[2] = var1 - var2;
161 *R_EE_D3_QWC = var2 * 0x400 >> 4;
162 *R_EE_D4_QWC = var2 * 0x180 >> 4;
163 *R_EE_D4_CHCR = 0x101;
164 *R_EE_IPU_CMD = var2 | 0x70000000;
165 *R_EE_D3_CHCR = 0x100;
166 return ~0;
167}
168
169int _MPEG_CSCImage ( void* arg0, void* arg1, int arg2 )
170{
171 _ipu_suspend();
172 *R_EE_IPU_CMD = 0;
173 *R_EE_D_STAT = 8;
174 int var1 = arg2;
175 if (0x3fe < var1)
176 {
177 var1 = 0x3ff;
178 }
179 s_CSCParam[2] = arg2 - var1;
180 *R_EE_D3_MADR = (u32)arg1;
181 *R_EE_D4_MADR = (u32)arg0;
182 s_CSCParam[0] = (int)arg0 + var1 * 0x180;
183 s_CSCParam[1] = (int)arg1 + var1 * 0x400;
184 *R_EE_D4_QWC = var1 * 0x180;
185 *R_EE_D3_QWC = var1 * 0x400;
186 EnableDmac(3);
187 var1 |= 0x70000000;
188 *R_EE_D4_CHCR = 0x101;
189 *R_EE_IPU_CMD = var1;
190 *R_EE_D3_CHCR = 0x100;
191 s_CSCFlag = 68; // TODO: validate this
192 WaitSema(s_Sema);
193 _ipu_resume();
194 return var1;
195}
196
197void _ipu_sync( void )
198{
199 if ((s32)*R_EE_IPU_CTRL >= 0)
200 {
201 return;
202 }
203 while (1)
204 {
205 while (1)
206 {
207 if ((*R_EE_IPU_CTRL & 0x4000) != 0)
208 {
209 return;
210 }
211 u32 var0 = *R_EE_IPU_BP;
212 if ((int)((((var0 & 0xff00) >> 1) + ((var0 & 0x30000) >> 9)) - (var0 & 0x7f)) < 0x20)
213 {
214 break;
215 }
216LAB_0001041c:
217 if ((u32)(-1) < *R_EE_IPU_CTRL)
218 {
219 return;
220 }
221 }
222
223 if ((int)*R_EE_D4_QWC < 1)
224 {
225 if (s_SetDMA_func(s_SetDMA_arg) == 0)
226 {
227 *s_pEOF = 0x20;
228 s_DataBuf[0] = 0x20;
229 s_DataBuf[1] = 0x1b7;
230 }
231 goto LAB_0001041c;
232 }
233 }
234}
235
236u32 _ipu_sync_data( void )
237{
238 if ((u64)(-1) < *R_EE_IPU_CMD)
239 {
240 return *R_EE_IPU_BP;
241 }
242 u32 var3 = *R_EE_IPU_BP;
243 do
244 {
245 while (0x1f < (((var3 & 0xff00) >> 1) + ((var3 & 0x30000) >> 9)) - (var3 & 0x7f))
246 {
247LAB_000104b8:
248 if ((u64)(-1) < *R_EE_IPU_CMD)
249 {
250 return var3;
251 }
252 var3 = *R_EE_IPU_BP;
253 }
254 if (*R_EE_D4_QWC < 1)
255 {
256 if (s_SetDMA_func(s_SetDMA_arg) == 0)
257 {
258 *s_pEOF = 0x20;
259 return var3;
260 }
261 goto LAB_000104b8;
262 }
263 var3 = *R_EE_IPU_BP;
264 }
265 while (1);
266}
267
268unsigned int _ipu_get_bits( unsigned int arg0 )
269{
270 _ipu_sync();
271 if (s_DataBuf[0] < arg0)
272 {
273 *R_EE_IPU_CMD = 0x40000000;
274 s_DataBuf[1] = _ipu_sync_data();
275 s_DataBuf[0] = 0x20;
276 }
277 *R_EE_IPU_CMD = arg0 | 0x40000000;
278 u32 var3 = s_DataBuf[1] >> (-arg0 & 0x1f);
279 s_DataBuf[0] = s_DataBuf[0] - arg0;
280 s_DataBuf[1] = s_DataBuf[1] << (arg0 & 0x1f);
281 return var3;
282}
283
284unsigned int _MPEG_GetBits ( unsigned int arg0 )
285{
286 return _ipu_get_bits(arg0);
287}
288
289unsigned int _ipu_show_bits ( unsigned int arg0 )
290{
291 if (s_DataBuf[0] < arg0)
292 {
293 _ipu_sync();
294 *R_EE_IPU_CMD = 0x40000000;
295 s_DataBuf[1] = _ipu_sync_data();
296 s_DataBuf[0] = 0x20;
297 }
298 return s_DataBuf[1] >> (-arg0 & 0x1f);
299}
300
301unsigned int _MPEG_ShowBits ( unsigned int arg0 )
302{
303 return _ipu_show_bits(arg0);
304}
305
306void _ipu_align_bits( void )
307{
308 _ipu_sync();
309 u32 var3 = -(*R_EE_IPU_BP & 7) & 7;
310 if (var3 != 0)
311 {
312 _MPEG_GetBits(var3);
313 }
314}
315
316void _MPEG_AlignBits ( void )
317{
318 return _ipu_align_bits();
319}
320
321unsigned int _MPEG_NextStartCode ( void )
322{
323 _MPEG_AlignBits();
324 while (_MPEG_ShowBits(0x18) != 1)
325 {
326 _MPEG_GetBits(8);
327 }
328 return _MPEG_ShowBits(0x20);
329}
330
331void _MPEG_SetDefQM ( int arg0 )
332{
333 (void)arg0;
334
335 _ipu_suspend();
336 *R_EE_IPU_CMD = 0;
337 while (((*R_EE_IPU_CTRL) & 0x80000000) != 0);
338 R_EE_IPU_in_FIFO[0] = 0x13101008;
339 R_EE_IPU_in_FIFO[1] = 0x16161310;
340 R_EE_IPU_in_FIFO[2] = 0x16161616;
341 R_EE_IPU_in_FIFO[3] = 0x1B1A181A;
342 R_EE_IPU_in_FIFO[0] = 0x1A1A1B1B;
343 R_EE_IPU_in_FIFO[1] = 0x1B1B1A1A;
344 R_EE_IPU_in_FIFO[2] = 0x1D1D1D1B;
345 R_EE_IPU_in_FIFO[3] = 0x1D222222;
346 R_EE_IPU_in_FIFO[0] = 0x1B1B1D1D;
347 R_EE_IPU_in_FIFO[1] = 0x20201D1D;
348 R_EE_IPU_in_FIFO[2] = 0x26252222;
349 R_EE_IPU_in_FIFO[3] = 0x22232325;
350 R_EE_IPU_in_FIFO[0] = 0x28262623;
351 R_EE_IPU_in_FIFO[1] = 0x30302828;
352 R_EE_IPU_in_FIFO[2] = 0x38382E2E;
353 R_EE_IPU_in_FIFO[3] = 0x5345453A;
354 *R_EE_IPU_CMD = 0x50000000;
355 while (((*R_EE_IPU_CTRL) & 0x80000000) != 0);
356 R_EE_IPU_in_FIFO[0] = 0x10101010;
357 R_EE_IPU_in_FIFO[1] = 0x10101010;
358 R_EE_IPU_in_FIFO[2] = 0x10101010;
359 R_EE_IPU_in_FIFO[3] = 0x10101010;
360 R_EE_IPU_in_FIFO[0] = 0x10101010;
361 R_EE_IPU_in_FIFO[1] = 0x10101010;
362 R_EE_IPU_in_FIFO[2] = 0x10101010;
363 R_EE_IPU_in_FIFO[3] = 0x10101010;
364 R_EE_IPU_in_FIFO[0] = 0x10101010;
365 R_EE_IPU_in_FIFO[1] = 0x10101010;
366 R_EE_IPU_in_FIFO[2] = 0x10101010;
367 R_EE_IPU_in_FIFO[3] = 0x10101010;
368 R_EE_IPU_in_FIFO[0] = 0x10101010;
369 R_EE_IPU_in_FIFO[1] = 0x10101010;
370 R_EE_IPU_in_FIFO[2] = 0x10101010;
371 R_EE_IPU_in_FIFO[3] = 0x10101010;
372 *R_EE_IPU_CMD = 0x58000000;
373 while (((*R_EE_IPU_CTRL) & 0x80000000) != 0);
374 _MPEG_Resume();
375}
376
377void _MPEG_SetQM ( int arg0 )
378{
379 _ipu_sync();
380 *R_EE_IPU_CMD = arg0 << 0x1b | 0x50000000;
381 s_DataBuf[0] = 0;
382}
383
384int _MPEG_GetMBAI ( void )
385{
386 _ipu_sync();
387 int var5 = 0;
388 u32 var4 = 0;
389 while (1)
390 {
391 *R_EE_IPU_CMD = 0x30000000;
392 var4 = _ipu_sync_data();
393 var4 &= 0xffff;
394 if (var4 == 0)
395 {
396 return 0;
397 }
398 if (var4 < 0x22)
399 {
400 break;
401 }
402 if (var4 == 0x23)
403 {
404 var5 += 0x21;
405 }
406 }
407 s_DataBuf[0] = 0x20;
408 s_DataBuf[1] = *R_EE_IPU_TOP;
409 return var5 + (int)var4;
410}
411
412int _MPEG_GetMBType ( void )
413{
414 _ipu_sync();
415 *R_EE_IPU_CMD = 0x34000000;
416 u32 var4 = _ipu_sync_data();
417 if (var4 != 0)
418 {
419 var4 &= 0xffff;
420 s_DataBuf[0] = 0x20;
421 s_DataBuf[1] = *R_EE_IPU_TOP;
422 }
423 return (int)var4;
424}
425
426int _MPEG_GetMotionCode ( void )
427{
428 _ipu_sync();
429 *R_EE_IPU_CMD = 0x38000000;
430 u32 var4 = _ipu_sync_data();
431 if (var4 == 0)
432 {
433 var4 = 0x8000;
434 }
435 else
436 {
437 var4 &= 0xffff;
438 s_DataBuf[0] = 0x20;
439 s_DataBuf[1] = *R_EE_IPU_TOP;
440 }
441 return (int)var4;
442}
443
444int _MPEG_GetDMVector ( void )
445{
446 _ipu_sync();
447 *R_EE_IPU_CMD = 0x3c000000;
448 u32 var4 = _ipu_sync_data();
449 var4 &= 0xffff;
450 s_DataBuf[0] = 0x20;
451 s_DataBuf[1] = *R_EE_IPU_TOP;
452 return (int)var4;
453}
454
455void _MPEG_SetIDCP ( void )
456{
457 unsigned int var1 = _MPEG_GetBits(2);
458 *R_EE_IPU_CTRL = (*R_EE_IPU_CTRL & ~0x30000) | var1 << 0x10;
459}
460
461void _MPEG_SetQSTIVFAS ( void )
462{
463 unsigned int var1 = _MPEG_GetBits(1);
464 unsigned int var2 = _MPEG_GetBits(1);
465 unsigned int var3 = _MPEG_GetBits(1);
466 *R_EE_IPU_CTRL = (*R_EE_IPU_CTRL & ~0x700000) | var1 << 0x16 | var2 << 0x15 | var3 << 0x14;
467}
468
469void _MPEG_SetPCT ( unsigned int arg0 )
470{
471 u32 var3 = *R_EE_IPU_CTRL;
472 if (-1 < (int)var3)
473 {
474 *R_EE_IPU_CTRL = (var3 & ~0x7000000) | arg0 << 0x18;
475 return;
476 }
477 // TODO: validate. Bugged and in wrong place?
478 _ipu_sync();
479}
480
481void _MPEG_BDEC ( int arg0, int arg1, int arg2, int arg3, void* arg4 )
482{
483 *R_EE_D3_MADR = ((uint32_t)arg4 & ~0xf0000000) | 0x80000000;
484 *R_EE_D3_QWC = 0x30;
485 *R_EE_D3_CHCR = 0x100;
486 _ipu_sync();
487 *R_EE_IPU_CMD = arg0 << 0x1b | 0x20000000 | arg1 << 0x1a | arg2 << 0x19 | arg3 << 0x10;
488}
489
490int _MPEG_WaitBDEC ( void )
491{
492 while (1)
493 {
494 _ipu_sync();
495 if ((*s_pEOF != 0))
496 {
497 break;
498 }
499 u32 var1 = *R_EE_D3_QWC;
500 if ((*R_EE_IPU_CTRL & 0x4000) != 0)
501 {
502 break;
503 }
504 if (var1 == 0)
505 {
506 s_DataBuf[0] = 0x20;
507 s_DataBuf[1] = *R_EE_IPU_TOP;
508 return 1;
509 }
510 }
511 _ipu_suspend();
512 // XXX: $t1 is not set in this function, so probably from another function?
513 *R_EE_IPU_CTRL = 0x40000000;
514 _ipu_resume();
515 int eie;
516 do
517 {
518 DI();
519 EE_SYNCP();
520 asm volatile ("mfc0\t%0, $12" : "=r" (eie));
521 eie &= 0x10000;
522 }
523 while (eie != 0);
524 *R_EE_D_ENABLEW = *R_EE_D_ENABLER | 0x10000;
525 *R_EE_D3_CHCR = 0;
526 *R_EE_D_ENABLEW = *R_EE_D_ENABLER & ~0x10000;
527 EI();
528 *R_EE_D3_QWC = 0;
529 s_DataBuf[0] = 0;
530 s_DataBuf[1] = 0;
531 return 0;
532}
533
534void _MPEG_dma_ref_image ( _MPEGMacroBlock8* arg0, _MPEGMotion* arg1, s64 arg2, int arg3 )
535{
536 u8* var00 = (u8*)arg0;
537 _MPEGMotion* var01 = (_MPEGMotion*)arg1;
538 u32 var3 = 4;
539 if (arg2 < 5)
540 {
541 var3 = arg2;
542 }
543 u64 var5 = (uint64_t)var3;
544 if (arg2 >> 0x1f < 1)
545 {
546 // TODO: correct implementation of CONCAT44?
547 var5 = ((u64)(arg2 >> 0x1f) << 32) | var3;
548 }
549 if (0 < var5)
550 {
551 while ((*R_EE_D9_CHCR & 0x100) != 0);
552 *R_EE_D9_QWC = 0;
553 *R_EE_D9_SADR = (u32)arg0 & ~0xf0000000;
554 *R_EE_D9_SADR = (u32)&s_DMAPack;
555 u32 *var2 = (u32 *)((u32)&s_DMAPack | 0x20000000);
556 u32 *var6;
557 do
558 {
559 var6 = var2;
560 u8 * var1 = var01->m_pSrc;
561 var5 -= 1;
562 var6[0] = 0x30000030;
563 var6[1] = (u32)var1;
564 var6[4] = 0x30000030;
565 var6[5] = (u32)var1 + arg3 * 0x180;
566 var01->m_pSrc = var00;
567 var00 += 4;
568 var2 = var6 + 2; // + 8 bytewise
569 var01 += 1;
570 }
571 while (var5 != 0);
572 var6[4] = 0x30;
573 var01 += 1;
574 var01->MC_Luma = (void *)0x0;
575 EE_SYNCL();
576 *R_EE_D9_CHCR = 0x105;
577 }
578}
579
580void _MPEG_put_block_fr(_MPEGMotions *a1)
581{
582 u8 *m_pMBDstY;
583 u8 *m_pSrc;
584 int count;
585 u128 reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8, reg9;
586
587 __asm__
588 (
589 "pnor %[reg1], $zero, $zero\n"
590 : [reg1] "=r"(reg1)
591 );
592 m_pMBDstY = a1->m_pMBDstY;
593 m_pSrc = a1->m_pSrc;
594 count = 6;
595 __asm__
596 (
597 "psrlh %[reg1], %[reg1], 8\n"
598 : [reg1] "+r"(reg1)
599 );
600 do
601 {
602 reg2 = ((u128 *)m_pSrc)[0];
603 reg9 = ((u128 *)m_pSrc)[1];
604 reg8 = ((u128 *)m_pSrc)[2];
605 reg7 = ((u128 *)m_pSrc)[3];
606 count -= 1;
607 reg3 = ((u128 *)m_pSrc)[4];
608 reg4 = ((u128 *)m_pSrc)[5];
609 reg5 = ((u128 *)m_pSrc)[6];
610 reg6 = ((u128 *)m_pSrc)[7];
611 m_pSrc += 128;
612 __asm__
613 (
614 "pmaxh %[reg2], $zero, %[reg2]\n"
615 "pmaxh %[reg9], $zero, %[reg9]\n"
616 "pmaxh %[reg8], $zero, %[reg8]\n"
617 "pmaxh %[reg7], $zero, %[reg7]\n"
618 "pmaxh %[reg3], $zero, %[reg3]\n"
619 "pmaxh %[reg4], $zero, %[reg4]\n"
620 "pmaxh %[reg5], $zero, %[reg5]\n"
621 "pmaxh %[reg6], $zero, %[reg6]\n"
622 "pminh %[reg2], %[reg1], %[reg2]\n"
623 "pminh %[reg9], %[reg1], %[reg9]\n"
624 "pminh %[reg8], %[reg1], %[reg8]\n"
625 "pminh %[reg7], %[reg1], %[reg7]\n"
626 "pminh %[reg3], %[reg1], %[reg3]\n"
627 "pminh %[reg4], %[reg1], %[reg4]\n"
628 "pminh %[reg5], %[reg1], %[reg5]\n"
629 "pminh %[reg6], %[reg1], %[reg6]\n"
630 "ppacb %[reg2], %[reg9], %[reg2]\n"
631 "ppacb %[reg8], %[reg7], %[reg8]\n"
632 "ppacb %[reg3], %[reg4], %[reg3]\n"
633 "ppacb %[reg5], %[reg6], %[reg5]\n"
634 : [reg2] "+r"(reg2), [reg3] "+r"(reg3), [reg4] "+r"(reg4), [reg5] "+r"(reg5), [reg6] "+r"(reg6), [reg7] "+r"(reg7), [reg8] "+r"(reg8), [reg9] "+r"(reg9)
635 : [reg1] "r"(reg1)
636 );
637 ((u128 *)m_pMBDstY)[0] = reg2;
638 ((u128 *)m_pMBDstY)[1] = reg8;
639 ((u128 *)m_pMBDstY)[2] = reg3;
640 ((u128 *)m_pMBDstY)[3] = reg5;
641 m_pMBDstY += 64;
642 }
643 while ( count > 0 );
644}
645
646void _MPEG_put_block_fl(_MPEGMotions *a1)
647{
648 u8 *m_pMBDstY;
649 u8 *m_pSrc;
650 int count;
651 u128 reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8, reg9;
652
653 __asm__
654 (
655 "pnor %[reg1], $zero, $zero\n"
656 : [reg1] "=r"(reg1)
657 );
658 m_pMBDstY = a1->m_pMBDstY;
659 m_pSrc = a1->m_pSrc;
660 count = 4;
661 __asm__
662 (
663 "psrlh %[reg1], %[reg1], 8\n"
664 : [reg1] "+r"(reg1)
665 );
666 do
667 {
668 reg2 = ((u128 *)m_pSrc)[0x00];
669 reg9 = ((u128 *)m_pSrc)[0x01];
670 reg8 = ((u128 *)m_pSrc)[0x02];
671 reg7 = ((u128 *)m_pSrc)[0x03];
672 count -= 1;
673 reg3 = ((u128 *)m_pSrc)[0x10];
674 reg4 = ((u128 *)m_pSrc)[0x11];
675 reg5 = ((u128 *)m_pSrc)[0x12];
676 reg6 = ((u128 *)m_pSrc)[0x13];
677 m_pSrc += 64;
678 __asm__
679 (
680 "pmaxh %[reg2], $zero, %[reg2]\n"
681 "pmaxh %[reg9], $zero, %[reg9]\n"
682 "pmaxh %[reg8], $zero, %[reg8]\n"
683 "pmaxh %[reg7], $zero, %[reg7]\n"
684 "pmaxh %[reg3], $zero, %[reg3]\n"
685 "pmaxh %[reg4], $zero, %[reg4]\n"
686 "pmaxh %[reg5], $zero, %[reg5]\n"
687 "pmaxh %[reg6], $zero, %[reg6]\n"
688 "pminh %[reg2], %[reg1], %[reg2]\n"
689 "pminh %[reg9], %[reg1], %[reg9]\n"
690 "pminh %[reg8], %[reg1], %[reg8]\n"
691 "pminh %[reg7], %[reg1], %[reg7]\n"
692 "pminh %[reg3], %[reg1], %[reg3]\n"
693 "pminh %[reg4], %[reg1], %[reg4]\n"
694 "pminh %[reg5], %[reg1], %[reg5]\n"
695 "pminh %[reg6], %[reg1], %[reg6]\n"
696 "ppacb %[reg2], %[reg9], %[reg2]\n"
697 "ppacb %[reg8], %[reg7], %[reg8]\n"
698 "ppacb %[reg3], %[reg4], %[reg3]\n"
699 "ppacb %[reg5], %[reg6], %[reg5]\n"
700 : [reg2] "+r"(reg2), [reg3] "+r"(reg3), [reg4] "+r"(reg4), [reg5] "+r"(reg5), [reg6] "+r"(reg6), [reg7] "+r"(reg7), [reg8] "+r"(reg8), [reg9] "+r"(reg9)
701 : [reg1] "r"(reg1)
702 );
703 ((u128 *)m_pMBDstY)[0] = reg2;
704 ((u128 *)m_pMBDstY)[1] = reg3;
705 ((u128 *)m_pMBDstY)[2] = reg8;
706 ((u128 *)m_pMBDstY)[3] = reg5;
707 m_pMBDstY += 64;
708 }
709 while ( count > 0 );
710 count += 2;
711 do
712 {
713 reg2 = ((u128 *)m_pSrc)[0x10];
714 reg9 = ((u128 *)m_pSrc)[0x11];
715 reg8 = ((u128 *)m_pSrc)[0x12];
716 reg7 = ((u128 *)m_pSrc)[0x13];
717 count -= 1;
718 reg3 = ((u128 *)m_pSrc)[0x14];
719 reg4 = ((u128 *)m_pSrc)[0x15];
720 reg5 = ((u128 *)m_pSrc)[0x16];
721 reg6 = ((u128 *)m_pSrc)[0x17];
722 m_pSrc += 128;
723 __asm__
724 (
725 "pmaxh %[reg2], $zero, %[reg2]\n"
726 "pmaxh %[reg9], $zero, %[reg9]\n"
727 "pmaxh %[reg8], $zero, %[reg8]\n"
728 "pmaxh %[reg7], $zero, %[reg7]\n"
729 "pmaxh %[reg3], $zero, %[reg3]\n"
730 "pmaxh %[reg4], $zero, %[reg4]\n"
731 "pmaxh %[reg5], $zero, %[reg5]\n"
732 "pmaxh %[reg6], $zero, %[reg6]\n"
733 "pminh %[reg2], %[reg1], %[reg2]\n"
734 "pminh %[reg9], %[reg1], %[reg9]\n"
735 "pminh %[reg8], %[reg1], %[reg8]\n"
736 "pminh %[reg7], %[reg1], %[reg7]\n"
737 "pminh %[reg3], %[reg1], %[reg3]\n"
738 "pminh %[reg4], %[reg1], %[reg4]\n"
739 "pminh %[reg5], %[reg1], %[reg5]\n"
740 "pminh %[reg6], %[reg1], %[reg6]\n"
741 "ppacb %[reg2], %[reg9], %[reg2]\n"
742 "ppacb %[reg8], %[reg7], %[reg8]\n"
743 "ppacb %[reg3], %[reg4], %[reg3]\n"
744 "ppacb %[reg5], %[reg6], %[reg5]\n"
745 : [reg2] "+r"(reg2), [reg3] "+r"(reg3), [reg4] "+r"(reg4), [reg5] "+r"(reg5), [reg6] "+r"(reg6), [reg7] "+r"(reg7), [reg8] "+r"(reg8), [reg9] "+r"(reg9)
746 : [reg1] "r"(reg1)
747 );
748 ((u128 *)m_pMBDstY)[0] = reg2;
749 ((u128 *)m_pMBDstY)[1] = reg8;
750 ((u128 *)m_pMBDstY)[2] = reg3;
751 ((u128 *)m_pMBDstY)[3] = reg5;
752 m_pMBDstY += 64;
753 }
754 while ( count > 0 );
755}
756
757void _MPEG_put_block_il(_MPEGMotions *a1)
758{
759 u8 *m_pMBDstY;
760 u8 *m_pSrc;
761 int count;
762 u8 *m_pMBDstCbCr;
763 u8 *v28;
764 u128 reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8, reg9;
765
766 __asm__
767 (
768 "pnor %[reg1], $zero, $zero\n"
769 : [reg1] "=r"(reg1)
770 );
771 m_pMBDstY = a1->m_pMBDstY;
772 m_pSrc = a1->m_pSrc;
773 count = 4;
774 v28 = &m_pMBDstY[a1->m_Stride];
775 __asm__
776 (
777 "psrlh %[reg1], %[reg1], 8\n"
778 : [reg1] "+r"(reg1)
779 );
780 do
781 {
782 reg2 = ((u128 *)m_pSrc)[0x00];
783 reg9 = ((u128 *)m_pSrc)[0x01];
784 reg8 = ((u128 *)m_pSrc)[0x02];
785 reg7 = ((u128 *)m_pSrc)[0x03];
786 count -= 1;
787 reg3 = ((u128 *)m_pSrc)[0x10];
788 reg4 = ((u128 *)m_pSrc)[0x11];
789 reg5 = ((u128 *)m_pSrc)[0x12];
790 reg6 = ((u128 *)m_pSrc)[0x13];
791 m_pSrc += 64;
792 __asm__
793 (
794 "pmaxh %[reg2], $zero, %[reg2]\n"
795 "pmaxh %[reg9], $zero, %[reg9]\n"
796 "pmaxh %[reg8], $zero, %[reg8]\n"
797 "pmaxh %[reg7], $zero, %[reg7]\n"
798 "pmaxh %[reg3], $zero, %[reg3]\n"
799 "pmaxh %[reg4], $zero, %[reg4]\n"
800 "pmaxh %[reg5], $zero, %[reg5]\n"
801 "pmaxh %[reg6], $zero, %[reg6]\n"
802 "pminh %[reg2], %[reg1], %[reg2]\n"
803 "pminh %[reg9], %[reg1], %[reg9]\n"
804 "pminh %[reg8], %[reg1], %[reg8]\n"
805 "pminh %[reg7], %[reg1], %[reg7]\n"
806 "pminh %[reg3], %[reg1], %[reg3]\n"
807 "pminh %[reg4], %[reg1], %[reg4]\n"
808 "pminh %[reg5], %[reg1], %[reg5]\n"
809 "pminh %[reg6], %[reg1], %[reg6]\n"
810 "ppacb %[reg2], %[reg9], %[reg2]\n"
811 "ppacb %[reg8], %[reg7], %[reg8]\n"
812 "ppacb %[reg3], %[reg4], %[reg3]\n"
813 "ppacb %[reg5], %[reg6], %[reg5]\n"
814 : [reg2] "+r"(reg2), [reg3] "+r"(reg3), [reg4] "+r"(reg4), [reg5] "+r"(reg5), [reg6] "+r"(reg6), [reg7] "+r"(reg7), [reg8] "+r"(reg8), [reg9] "+r"(reg9)
815 : [reg1] "r"(reg1)
816 );
817 ((u128 *)m_pMBDstY)[0x00] = reg2;
818 ((u128 *)m_pMBDstY)[0x02] = reg8;
819 m_pMBDstY += 64;
820 ((u128 *)v28)[0x00] = reg3;
821 ((u128 *)v28)[0x02] = reg5;
822 v28 += 64;
823 }
824 while ( count > 0 );
825 m_pMBDstCbCr = a1->m_pMBDstCbCr;
826 count = 2;
827 v28 = &m_pMBDstCbCr[a1->m_Stride];
828 do
829 {
830 reg2 = ((u128 *)m_pSrc)[0x10];
831 reg9 = ((u128 *)m_pSrc)[0x11];
832 reg8 = ((u128 *)m_pSrc)[0x12];
833 reg7 = ((u128 *)m_pSrc)[0x13];
834 count -= 1;
835 reg3 = ((u128 *)m_pSrc)[0x14];
836 reg4 = ((u128 *)m_pSrc)[0x15];
837 reg5 = ((u128 *)m_pSrc)[0x16];
838 reg6 = ((u128 *)m_pSrc)[0x17];
839 m_pSrc += 128;
840 __asm__
841 (
842 "pmaxh %[reg2], $zero, %[reg2]\n"
843 "pmaxh %[reg9], $zero, %[reg9]\n"
844 "pmaxh %[reg8], $zero, %[reg8]\n"
845 "pmaxh %[reg7], $zero, %[reg7]\n"
846 "pmaxh %[reg3], $zero, %[reg3]\n"
847 "pmaxh %[reg4], $zero, %[reg4]\n"
848 "pmaxh %[reg5], $zero, %[reg5]\n"
849 "pmaxh %[reg6], $zero, %[reg6]\n"
850 "pminh %[reg2], %[reg1], %[reg2]\n"
851 "pminh %[reg9], %[reg1], %[reg9]\n"
852 "pminh %[reg8], %[reg1], %[reg8]\n"
853 "pminh %[reg7], %[reg1], %[reg7]\n"
854 "pminh %[reg3], %[reg1], %[reg3]\n"
855 "pminh %[reg4], %[reg1], %[reg4]\n"
856 "pminh %[reg5], %[reg1], %[reg5]\n"
857 "pminh %[reg6], %[reg1], %[reg6]\n"
858 "ppacb %[reg2], $zero, %[reg2]\n"
859 "ppacb %[reg9], $zero, %[reg9]\n"
860 "ppacb %[reg8], $zero, %[reg8]\n"
861 "ppacb %[reg7], $zero, %[reg7]\n"
862 "ppacb %[reg3], $zero, %[reg3]\n"
863 "ppacb %[reg4], $zero, %[reg4]\n"
864 "ppacb %[reg5], $zero, %[reg5]\n"
865 "ppacb %[reg6], $zero, %[reg6]\n"
866 : [reg2] "+r"(reg2), [reg3] "+r"(reg3), [reg4] "+r"(reg4), [reg5] "+r"(reg5), [reg6] "+r"(reg6), [reg7] "+r"(reg7), [reg8] "+r"(reg8), [reg9] "+r"(reg9)
867 : [reg1] "r"(reg1)
868 );
869 ((u64 *)m_pMBDstCbCr)[0x00] = reg2;
870 ((u64 *)m_pMBDstCbCr)[0x02] = reg9;
871 ((u64 *)m_pMBDstCbCr)[0x04] = reg8;
872 ((u64 *)m_pMBDstCbCr)[0x06] = reg7;
873 ((u64 *)v28)[0x00] = reg3;
874 ((u64 *)v28)[0x02] = reg4;
875 ((u64 *)v28)[0x04] = reg5;
876 ((u64 *)v28)[0x06] = reg6;
877 m_pMBDstCbCr += 64;
878 v28 += 64;
879 }
880 while ( count > 0 );
881}
882
883void _MPEG_add_block_frfr(_MPEGMotions *a1)
884{
885 u8 *m_pMBDstY;
886 u8 *m_pSPRBlk;
887 u8 *m_pSPRRes;
888 int count;
889 u128 reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8, reg9;
890
891 __asm__
892 (
893 "pnor %[reg1], $zero, $zero\n"
894 : [reg1] "=r"(reg1)
895 );
896 m_pMBDstY = a1->m_pMBDstY;
897 m_pSPRBlk = a1->m_pSPRBlk;
898 m_pSPRRes = a1->m_pSPRRes;
899 count = 6;
900 __asm__
901 (
902 "psrlh %[reg1], %[reg1], 8\n"
903 : [reg1] "+r"(reg1)
904 );
905 do
906 {
907 reg2 = ((u128 *)m_pSPRBlk)[0x00];
908 reg9 = ((u128 *)m_pSPRBlk)[0x01];
909 reg8 = ((u128 *)m_pSPRBlk)[0x02];
910 reg7 = ((u128 *)m_pSPRBlk)[0x03];
911 count -= 1;
912 reg3 = ((u128 *)m_pSPRRes)[0x00];
913 reg4 = ((u128 *)m_pSPRRes)[0x01];
914 reg5 = ((u128 *)m_pSPRRes)[0x02];
915 reg6 = ((u128 *)m_pSPRRes)[0x03];
916 __asm__
917 (
918 "paddh %[reg2], %[reg2], %[reg3]\n"
919 "paddh %[reg9], %[reg9], %[reg4]\n"
920 "paddh %[reg8], %[reg8], %[reg5]\n"
921 "paddh %[reg7], %[reg7], %[reg6]\n"
922 "pmaxh %[reg2], $zero, %[reg2]\n"
923 "pmaxh %[reg9], $zero, %[reg9]\n"
924 "pmaxh %[reg8], $zero, %[reg8]\n"
925 "pmaxh %[reg7], $zero, %[reg7]\n"
926 "pminh %[reg2], %[reg1], %[reg2]\n"
927 "pminh %[reg9], %[reg1], %[reg9]\n"
928 "pminh %[reg8], %[reg1], %[reg8]\n"
929 "pminh %[reg7], %[reg1], %[reg7]\n"
930 "ppacb %[reg2], %[reg9], %[reg2]\n"
931 "ppacb %[reg8], %[reg7], %[reg8]\n"
932 : [reg2] "+r"(reg2), [reg3] "+r"(reg3), [reg4] "+r"(reg4), [reg5] "+r"(reg5), [reg6] "+r"(reg6), [reg7] "+r"(reg7), [reg8] "+r"(reg8), [reg9] "+r"(reg9)
933 : [reg1] "r"(reg1)
934 );
935 ((u128 *)m_pMBDstY)[0x00] = reg2;
936 ((u128 *)m_pMBDstY)[0x01] = reg8;
937 reg3 = ((u128 *)m_pSPRBlk)[0x04];
938 reg4 = ((u128 *)m_pSPRBlk)[0x05];
939 reg5 = ((u128 *)m_pSPRBlk)[0x06];
940 reg6 = ((u128 *)m_pSPRBlk)[0x07];
941 m_pSPRBlk += 128;
942 reg2 = ((u128 *)m_pSPRRes)[0x04];
943 reg9 = ((u128 *)m_pSPRRes)[0x05];
944 reg8 = ((u128 *)m_pSPRRes)[0x06];
945 reg7 = ((u128 *)m_pSPRRes)[0x07];
946 m_pSPRRes += 128;
947 __asm__
948 (
949 "paddh %[reg3], %[reg3], %[reg2]\n"
950 "paddh %[reg4], %[reg4], %[reg9]\n"
951 "paddh %[reg5], %[reg5], %[reg8]\n"
952 "paddh %[reg6], %[reg6], %[reg7]\n"
953 "pmaxh %[reg3], $zero, %[reg3]\n"
954 "pmaxh %[reg4], $zero, %[reg4]\n"
955 "pmaxh %[reg5], $zero, %[reg5]\n"
956 "pmaxh %[reg6], $zero, %[reg6]\n"
957 "pminh %[reg3], %[reg1], %[reg3]\n"
958 "pminh %[reg4], %[reg1], %[reg4]\n"
959 "pminh %[reg5], %[reg1], %[reg5]\n"
960 "pminh %[reg6], %[reg1], %[reg6]\n"
961 "ppacb %[reg3], %[reg4], %[reg3]\n"
962 "ppacb %[reg5], %[reg6], %[reg5]\n"
963 : [reg2] "+r"(reg2), [reg3] "+r"(reg3), [reg4] "+r"(reg4), [reg5] "+r"(reg5), [reg6] "+r"(reg6), [reg7] "+r"(reg7), [reg8] "+r"(reg8), [reg9] "+r"(reg9)
964 : [reg1] "r"(reg1)
965 );
966 ((u128 *)m_pMBDstY)[0x02] = reg3;
967 ((u128 *)m_pMBDstY)[0x03] = reg5;
968 m_pMBDstY += 64;
969 }
970 while ( count > 0 );
971}
972
973void _MPEG_add_block_ilfl(_MPEGMotions *a1)
974{
975 u8 *m_pMBDstY;
976 u8 *m_pSPRBlk;
977 u8 *m_pSPRRes;
978 int count;
979 u8 *v6;
980 u8 *m_pMBDstCbCr;
981 u128 reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8, reg9;
982
983 __asm__
984 (
985 "pnor %[reg1], $zero, $zero\n"
986 : [reg1] "=r"(reg1)
987 );
988 m_pMBDstY = a1->m_pMBDstY;
989 m_pSPRBlk = a1->m_pSPRBlk;
990 m_pSPRRes = a1->m_pSPRRes;
991 count = 4;
992 __asm__
993 (
994 "psrlh %[reg1], %[reg1], 8\n"
995 : [reg1] "+r"(reg1)
996 );
997 v6 = &m_pMBDstY[a1->m_Stride];
998 do
999 {
1000 reg2 = ((u128 *)m_pSPRBlk)[0x00];
1001 reg9 = ((u128 *)m_pSPRBlk)[0x01];
1002 reg8 = ((u128 *)m_pSPRBlk)[0x02];
1003 reg7 = ((u128 *)m_pSPRBlk)[0x03];
1004 count -= 1;
1005 reg3 = ((u128 *)m_pSPRRes)[0x00];
1006 reg4 = ((u128 *)m_pSPRRes)[0x01];
1007 reg5 = ((u128 *)m_pSPRRes)[0x02];
1008 reg6 = ((u128 *)m_pSPRRes)[0x03];
1009 __asm__
1010 (
1011 "paddh %[reg2], %[reg2], %[reg3]\n"
1012 "paddh %[reg9], %[reg9], %[reg4]\n"
1013 "paddh %[reg8], %[reg8], %[reg5]\n"
1014 "paddh %[reg7], %[reg7], %[reg6]\n"
1015 "pmaxh %[reg2], $zero, %[reg2]\n"
1016 "pmaxh %[reg9], $zero, %[reg9]\n"
1017 "pmaxh %[reg8], $zero, %[reg8]\n"
1018 "pmaxh %[reg7], $zero, %[reg7]\n"
1019 "pminh %[reg2], %[reg1], %[reg2]\n"
1020 "pminh %[reg9], %[reg1], %[reg9]\n"
1021 "pminh %[reg8], %[reg1], %[reg8]\n"
1022 "pminh %[reg7], %[reg1], %[reg7]\n"
1023 "ppacb %[reg2], %[reg9], %[reg2]\n"
1024 "ppacb %[reg8], %[reg7], %[reg8]\n"
1025 : [reg2] "+r"(reg2), [reg3] "+r"(reg3), [reg4] "+r"(reg4), [reg5] "+r"(reg5), [reg6] "+r"(reg6), [reg7] "+r"(reg7), [reg8] "+r"(reg8), [reg9] "+r"(reg9)
1026 : [reg1] "r"(reg1)
1027 );
1028 ((u128 *)m_pMBDstY)[0] = reg2;
1029 ((u128 *)m_pMBDstY)[2] = reg8;
1030 reg3 = ((u128 *)m_pSPRBlk)[0x10];
1031 reg4 = ((u128 *)m_pSPRBlk)[0x11];
1032 reg5 = ((u128 *)m_pSPRBlk)[0x12];
1033 reg6 = ((u128 *)m_pSPRBlk)[0x13];
1034 m_pSPRBlk += 64;
1035 reg2 = ((u128 *)m_pSPRRes)[0x10];
1036 reg9 = ((u128 *)m_pSPRRes)[0x11];
1037 reg8 = ((u128 *)m_pSPRRes)[0x12];
1038 reg7 = ((u128 *)m_pSPRRes)[0x13];
1039 __asm__
1040 (
1041 "paddh %[reg3], %[reg3], %[reg2]\n"
1042 "paddh %[reg4], %[reg4], %[reg9]\n"
1043 "paddh %[reg5], %[reg5], %[reg8]\n"
1044 "paddh %[reg6], %[reg6], %[reg7]\n"
1045 "pmaxh %[reg3], $zero, %[reg3]\n"
1046 "pmaxh %[reg4], $zero, %[reg4]\n"
1047 "pmaxh %[reg5], $zero, %[reg5]\n"
1048 "pmaxh %[reg6], $zero, %[reg6]\n"
1049 "pminh %[reg3], %[reg1], %[reg3]\n"
1050 "pminh %[reg4], %[reg1], %[reg4]\n"
1051 "pminh %[reg5], %[reg1], %[reg5]\n"
1052 "pminh %[reg6], %[reg1], %[reg6]\n"
1053 "ppacb %[reg3], %[reg4], %[reg3]\n"
1054 "ppacb %[reg5], %[reg6], %[reg5]\n"
1055 : [reg2] "+r"(reg2), [reg3] "+r"(reg3), [reg4] "+r"(reg4), [reg5] "+r"(reg5), [reg6] "+r"(reg6), [reg7] "+r"(reg7), [reg8] "+r"(reg8), [reg9] "+r"(reg9)
1056 : [reg1] "r"(reg1)
1057 );
1058 ((u128 *)v6)[0x00] = reg3;
1059 ((u128 *)v6)[0x02] = reg5;
1060 v6 += 64;
1061 m_pSPRRes += 64;
1062 }
1063 while ( count > 0 );
1064 m_pMBDstCbCr = a1->m_pMBDstCbCr;
1065 count = 2;
1066 v6 = &m_pMBDstCbCr[a1->m_Stride];
1067 do
1068 {
1069 reg2 = ((u128 *)m_pSPRBlk)[0x10];
1070 reg9 = ((u128 *)m_pSPRBlk)[0x11];
1071 reg8 = ((u128 *)m_pSPRBlk)[0x12];
1072 reg7 = ((u128 *)m_pSPRBlk)[0x13];
1073 count -= 1;
1074 reg3 = ((u128 *)m_pSPRRes)[0x10];
1075 reg4 = ((u128 *)m_pSPRRes)[0x11];
1076 reg5 = ((u128 *)m_pSPRRes)[0x12];
1077 reg6 = ((u128 *)m_pSPRRes)[0x13];
1078 __asm__
1079 (
1080 "paddh %[reg2], %[reg2], %[reg3]\n"
1081 "paddh %[reg9], %[reg9], %[reg4]\n"
1082 "paddh %[reg8], %[reg8], %[reg5]\n"
1083 "paddh %[reg7], %[reg7], %[reg6]\n"
1084 "pmaxh %[reg2], $zero, %[reg2]\n"
1085 "pmaxh %[reg9], $zero, %[reg9]\n"
1086 "pmaxh %[reg8], $zero, %[reg8]\n"
1087 "pmaxh %[reg7], $zero, %[reg7]\n"
1088 "pminh %[reg2], %[reg1], %[reg2]\n"
1089 "pminh %[reg9], %[reg1], %[reg9]\n"
1090 "pminh %[reg8], %[reg1], %[reg8]\n"
1091 "pminh %[reg7], %[reg1], %[reg7]\n"
1092 "ppacb %[reg2], $zero, %[reg2]\n"
1093 "ppacb %[reg9], $zero, %[reg9]\n"
1094 "ppacb %[reg8], $zero, %[reg8]\n"
1095 "ppacb %[reg7], $zero, %[reg7]\n"
1096 : [reg2] "+r"(reg2), [reg3] "+r"(reg3), [reg4] "+r"(reg4), [reg5] "+r"(reg5), [reg6] "+r"(reg6), [reg7] "+r"(reg7), [reg8] "+r"(reg8), [reg9] "+r"(reg9)
1097 : [reg1] "r"(reg1)
1098 );
1099 ((u64 *)m_pMBDstCbCr)[0] = reg2;
1100 ((u64 *)m_pMBDstCbCr)[2] = reg9;
1101 ((u64 *)m_pMBDstCbCr)[4] = reg8;
1102 ((u64 *)m_pMBDstCbCr)[6] = reg7;
1103 reg3 = ((u128 *)m_pSPRBlk)[0x14];
1104 reg4 = ((u128 *)m_pSPRBlk)[0x15];
1105 reg5 = ((u128 *)m_pSPRBlk)[0x16];
1106 reg6 = ((u128 *)m_pSPRBlk)[0x17];
1107 m_pSPRBlk += 128;
1108 reg2 = ((u128 *)m_pSPRRes)[0x14];
1109 reg9 = ((u128 *)m_pSPRRes)[0x15];
1110 reg8 = ((u128 *)m_pSPRRes)[0x16];
1111 reg7 = ((u128 *)m_pSPRRes)[0x17];
1112 __asm__
1113 (
1114 "paddh %[reg3], %[reg3], %[reg2]\n"
1115 "paddh %[reg4], %[reg4], %[reg9]\n"
1116 "paddh %[reg5], %[reg5], %[reg8]\n"
1117 "paddh %[reg6], %[reg6], %[reg7]\n"
1118 "pmaxh %[reg3], $zero, %[reg3]\n"
1119 "pmaxh %[reg4], $zero, %[reg4]\n"
1120 "pmaxh %[reg5], $zero, %[reg5]\n"
1121 "pmaxh %[reg6], $zero, %[reg6]\n"
1122 "pminh %[reg3], %[reg1], %[reg3]\n"
1123 "pminh %[reg4], %[reg1], %[reg4]\n"
1124 "pminh %[reg5], %[reg1], %[reg5]\n"
1125 "pminh %[reg6], %[reg1], %[reg6]\n"
1126 "ppacb %[reg3], $zero, %[reg3]\n"
1127 "ppacb %[reg4], $zero, %[reg4]\n"
1128 "ppacb %[reg5], $zero, %[reg5]\n"
1129 "ppacb %[reg6], $zero, %[reg6]\n"
1130 : [reg2] "+r"(reg2), [reg3] "+r"(reg3), [reg4] "+r"(reg4), [reg5] "+r"(reg5), [reg6] "+r"(reg6), [reg7] "+r"(reg7), [reg8] "+r"(reg8), [reg9] "+r"(reg9)
1131 : [reg1] "r"(reg1)
1132 );
1133 ((u64 *)v6)[0x00] = reg3;
1134 ((u64 *)v6)[0x02] = reg4;
1135 ((u64 *)v6)[0x04] = reg5;
1136 ((u64 *)v6)[0x06] = reg6;
1137 m_pMBDstCbCr += 64;
1138 v6 += 64;
1139 }
1140 while ( count > 0 );
1141}
1142
1143void _MPEG_add_block_frfl(_MPEGMotions *a1)
1144{
1145 u8 *m_pSPRBlk;
1146 u8 *m_pSPRRes;
1147 int count;
1148 u8 *m_pMBDstCbCr;
1149 u8 *m_pMBDstY;
1150 u128 reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8, reg9;
1151
1152 __asm__
1153 (
1154 "pnor %[reg1], $zero, $zero\n"
1155 : [reg1] "=r"(reg1)
1156 );
1157 m_pSPRBlk = a1->m_pSPRBlk;
1158 m_pSPRRes = a1->m_pSPRRes;
1159 m_pMBDstY = a1->m_pMBDstY;
1160 count = 4;
1161 __asm__
1162 (
1163 "psrlh %[reg1], %[reg1], 8\n"
1164 : [reg1] "+r"(reg1)
1165 );
1166 do
1167 {
1168 reg2 = ((u128 *)m_pSPRBlk)[0x00];
1169 reg9 = ((u128 *)m_pSPRBlk)[0x01];
1170 reg8 = ((u128 *)m_pSPRBlk)[0x02];
1171 reg7 = ((u128 *)m_pSPRBlk)[0x03];
1172 count -= 1;
1173 reg3 = ((u128 *)m_pSPRRes)[0x00];
1174 reg4 = ((u128 *)m_pSPRRes)[0x01];
1175 reg5 = ((u128 *)m_pSPRRes)[0x10];
1176 reg6 = ((u128 *)m_pSPRRes)[0x11];
1177 __asm__
1178 (
1179 "paddh %[reg2], %[reg2], %[reg3]\n"
1180 "paddh %[reg9], %[reg9], %[reg4]\n"
1181 "paddh %[reg8], %[reg8], %[reg5]\n"
1182 "paddh %[reg7], %[reg7], %[reg6]\n"
1183 "pmaxh %[reg2], $zero, %[reg2]\n"
1184 "pmaxh %[reg9], $zero, %[reg9]\n"
1185 "pmaxh %[reg8], $zero, %[reg8]\n"
1186 "pmaxh %[reg7], $zero, %[reg7]\n"
1187 "pminh %[reg2], %[reg1], %[reg2]\n"
1188 "pminh %[reg9], %[reg1], %[reg9]\n"
1189 "pminh %[reg8], %[reg1], %[reg8]\n"
1190 "pminh %[reg7], %[reg1], %[reg7]\n"
1191 "ppacb %[reg2], %[reg9], %[reg2]\n"
1192 "ppacb %[reg8], %[reg7], %[reg8]\n"
1193 : [reg2] "+r"(reg2), [reg3] "+r"(reg3), [reg4] "+r"(reg4), [reg5] "+r"(reg5), [reg6] "+r"(reg6), [reg7] "+r"(reg7), [reg8] "+r"(reg8), [reg9] "+r"(reg9)
1194 : [reg1] "r"(reg1)
1195 );
1196 ((u128 *)m_pMBDstY)[0x00] = reg2;
1197 ((u128 *)m_pMBDstY)[0x01] = reg8;
1198 reg3 = ((u128 *)m_pSPRBlk)[0x04];
1199 reg4 = ((u128 *)m_pSPRBlk)[0x05];
1200 reg5 = ((u128 *)m_pSPRBlk)[0x06];
1201 reg6 = ((u128 *)m_pSPRBlk)[0x07];
1202 m_pSPRBlk += 128;
1203 reg2 = ((u128 *)m_pSPRRes)[0x02];
1204 reg9 = ((u128 *)m_pSPRRes)[0x03];
1205 reg8 = ((u128 *)m_pSPRRes)[0x12];
1206 reg7 = ((u128 *)m_pSPRRes)[0x13];
1207 __asm__
1208 (
1209 "paddh %[reg3], %[reg3], %[reg2]\n"
1210 "paddh %[reg4], %[reg4], %[reg9]\n"
1211 "paddh %[reg5], %[reg5], %[reg8]\n"
1212 "paddh %[reg6], %[reg6], %[reg7]\n"
1213 "pmaxh %[reg3], $zero, %[reg3]\n"
1214 "pmaxh %[reg4], $zero, %[reg4]\n"
1215 "pmaxh %[reg5], $zero, %[reg5]\n"
1216 "pmaxh %[reg6], $zero, %[reg6]\n"
1217 "pminh %[reg3], %[reg1], %[reg3]\n"
1218 "pminh %[reg4], %[reg1], %[reg4]\n"
1219 "pminh %[reg5], %[reg1], %[reg5]\n"
1220 "pminh %[reg6], %[reg1], %[reg6]\n"
1221 "ppacb %[reg3], %[reg4], %[reg3]\n"
1222 "ppacb %[reg5], %[reg6], %[reg5]\n"
1223 : [reg2] "+r"(reg2), [reg3] "+r"(reg3), [reg4] "+r"(reg4), [reg5] "+r"(reg5), [reg6] "+r"(reg6), [reg7] "+r"(reg7), [reg8] "+r"(reg8), [reg9] "+r"(reg9)
1224 : [reg1] "r"(reg1)
1225 );
1226 ((u128 *)m_pMBDstY)[0x02] = reg3;
1227 ((u128 *)m_pMBDstY)[0x03] = reg5;
1228 m_pSPRRes += 64;
1229 m_pMBDstY += 64;
1230 }
1231 while ( count > 0 );
1232 m_pMBDstCbCr = a1->m_pMBDstCbCr;
1233 count = 2;
1234 do
1235 {
1236 reg2 = ((u128 *)m_pSPRBlk)[0x00];
1237 reg9 = ((u128 *)m_pSPRBlk)[0x01];
1238 reg8 = ((u128 *)m_pSPRBlk)[0x02];
1239 reg7 = ((u128 *)m_pSPRBlk)[0x03];
1240 count -= 1;
1241 reg3 = ((u128 *)m_pSPRRes)[0x10];
1242 reg4 = ((u128 *)m_pSPRRes)[0x14];
1243 reg5 = ((u128 *)m_pSPRRes)[0x11];
1244 reg6 = ((u128 *)m_pSPRRes)[0x15];
1245 __asm__
1246 (
1247 "paddh %[reg2], %[reg2], %[reg3]\n"
1248 "paddh %[reg9], %[reg9], %[reg4]\n"
1249 "paddh %[reg8], %[reg8], %[reg5]\n"
1250 "paddh %[reg7], %[reg7], %[reg6]\n"
1251 "pmaxh %[reg2], $zero, %[reg2]\n"
1252 "pmaxh %[reg9], $zero, %[reg9]\n"
1253 "pmaxh %[reg8], $zero, %[reg8]\n"
1254 "pmaxh %[reg7], $zero, %[reg7]\n"
1255 "pminh %[reg2], %[reg1], %[reg2]\n"
1256 "pminh %[reg9], %[reg1], %[reg9]\n"
1257 "pminh %[reg8], %[reg1], %[reg8]\n"
1258 "pminh %[reg7], %[reg1], %[reg7]\n"
1259 "ppacb %[reg2], %[reg9], %[reg2]\n"
1260 "ppacb %[reg8], %[reg7], %[reg8]\n"
1261 : [reg2] "+r"(reg2), [reg3] "+r"(reg3), [reg4] "+r"(reg4), [reg5] "+r"(reg5), [reg6] "+r"(reg6), [reg7] "+r"(reg7), [reg8] "+r"(reg8), [reg9] "+r"(reg9)
1262 : [reg1] "r"(reg1)
1263 );
1264 ((u128 *)m_pMBDstCbCr)[0x00] = reg2;
1265 ((u128 *)m_pMBDstCbCr)[0x01] = reg8;
1266 reg3 = ((u128 *)m_pSPRBlk)[0x04];
1267 reg4 = ((u128 *)m_pSPRBlk)[0x05];
1268 reg5 = ((u128 *)m_pSPRBlk)[0x06];
1269 reg6 = ((u128 *)m_pSPRBlk)[0x07];
1270 m_pSPRBlk += 128;
1271 reg2 = ((u128 *)m_pSPRRes)[0x12];
1272 reg9 = ((u128 *)m_pSPRRes)[0x16];
1273 reg8 = ((u128 *)m_pSPRRes)[0x13];
1274 reg7 = ((u128 *)m_pSPRRes)[0x17];
1275 __asm__
1276 (
1277 "paddh %[reg3], %[reg3], %[reg2]\n"
1278 "paddh %[reg4], %[reg4], %[reg9]\n"
1279 "paddh %[reg5], %[reg5], %[reg8]\n"
1280 "paddh %[reg6], %[reg6], %[reg7]\n"
1281 "pmaxh %[reg3], $zero, %[reg3]\n"
1282 "pmaxh %[reg4], $zero, %[reg4]\n"
1283 "pmaxh %[reg5], $zero, %[reg5]\n"
1284 "pmaxh %[reg6], $zero, %[reg6]\n"
1285 "pminh %[reg3], %[reg1], %[reg3]\n"
1286 "pminh %[reg4], %[reg1], %[reg4]\n"
1287 "pminh %[reg5], %[reg1], %[reg5]\n"
1288 "pminh %[reg6], %[reg1], %[reg6]\n"
1289 "ppacb %[reg3], %[reg4], %[reg3]\n"
1290 "ppacb %[reg5], %[reg6], %[reg5]\n"
1291 : [reg2] "+r"(reg2), [reg3] "+r"(reg3), [reg4] "+r"(reg4), [reg5] "+r"(reg5), [reg6] "+r"(reg6), [reg7] "+r"(reg7), [reg8] "+r"(reg8), [reg9] "+r"(reg9)
1292 : [reg1] "r"(reg1)
1293 );
1294 ((u128 *)m_pMBDstCbCr)[0x02] = reg3;
1295 ((u128 *)m_pMBDstCbCr)[0x03] = reg5;
1296 m_pMBDstCbCr += 64;
1297 m_pSPRRes += 128;
1298 }
1299 while ( count > 0 );
1300}
1301
1302// TODO: verify delay slots
1303void _MPEG_do_mc ( _MPEGMotion* arg0 )
1304{
1305 int var0 = 16; // addiu $v0, $zero, 16
1306 u8* arg1 = arg0->m_pSrc; // lw $a1, 0($a0)
1307 // addiu $sp, $sp, -16
1308 u16* arg2 = (u16 *)arg0->m_pDstY; // lw $a2, 4($a0)
1309 int arg3 = arg0->m_X; // lw $a3, 12($a0)
1310 int tmp0 = arg0->m_Y; // lw $t0, 16($a0)
1311 int tmp1 = arg0->m_H; // lw $t1, 20($a0)
1312 int tmp2 = arg0->m_fInt; // lw $t2, 24($a0)
1313 int tmp4 = arg0->m_Field; // lw $t4, 28($a0)
1314 // lw $t5, 32($a0) <-- MC_Luma
1315 tmp0 -= tmp4; // subu $t0, $t0, $t4
1316 tmp4 <<= 4; // sll $t4, $t4, 4
1317 arg1 += tmp4; // addu $a1, $a1, $t4
1318 int var1 = var0 - tmp0; // subu $v1, $v0, $t0
1319 int tmp3 = var0 << tmp2; // sllv $t3, $v0, $t2
1320 var1 >>= tmp2; // srlv $v1, $v1, $t2
1321 int ta = tmp0 << 4; // sll $at, $t0, 4
1322 // sw $ra, 0($sp)
1323 arg1 += ta; // addu $a1, $a1, $at
1324 ta = tmp1 - var1; // subu $at, $t1, $v1
1325 arg0->MC_Luma(arg1, arg2, arg3, tmp3, var1, ta); // jalr $t5
1326 arg1 = arg0->m_pSrc; // lw $a1, 0($a0)
1327 arg2 = (u16 *)arg0->m_pDstCbCr; // lw $a2, 8($a0)
1328 // lw $t5, 36($a0) <-- MC_Chroma
1329 arg1 += 256; // addiu $a1, $a1, 256
1330 tmp4 >>= 1; // srl $t4, $t4, 1
1331 arg3 >>= 1; // srl $a3, $a3, 1
1332 tmp0 >>= 1; // srl $t0, $t0, 1
1333 tmp1 >>= 1; // srl $t1, $t1, 1
1334 // lw $ra, 0($sp)
1335 tmp0 >>= tmp2; // srlv $t0, $t0, $t2
1336 arg1 += tmp4; // addu $a1, $a1, $t4
1337 var0 = 8; // addiu $v0, $zero, 8
1338 tmp0 <<= tmp2; // sllv $t0, $t0, $t2
1339 var1 = var0 - tmp0; // subu $v1, $v0, $t0
1340 tmp3 = var0 << tmp2; // sllv $t3, $v0, $t2
1341 var1 >>= tmp2; // srlv $v1, $v1, $t2
1342 ta = tmp0 << 3; // sll $at, $t0, 3
1343 arg1 += ta; // addu $a1, $a1, $at
1344 ta = tmp1 - var1; // subu $at, $t1, $v1
1345 arg0->MC_Chroma(arg1, arg2, arg3, tmp3, var1, ta); // jr $t5
1346 // addiu $sp, $sp, 16
1347}
1348
1349
1350static inline void set_mtsab_to_value(int value)
1351{
1352 if (value == 1)
1353 {
1354 // XXX: mtsab difference?
1355#if 1
1356 __asm__ volatile
1357 (
1358 "mtsab $zero, 1\n"
1359 );
1360#else
1361 int tmp;
1362 __asm__ volatile
1363 (
1364 "addiu %[tmp], $zero, 1\n"
1365 "mtsab %[tmp], 0\n"
1366 : [tmp] "=r"(tmp)
1367 );
1368#endif
1369 }
1370 else
1371 {
1372 int tmp = value;
1373 __asm__ volatile
1374 (
1375 "mtsab %[tmp], 0\n"
1376 :
1377 : [tmp] "r"(tmp)
1378 );
1379 }
1380}
1381
1382void _MPEG_put_luma(u8 *a1, u16 *a2, int a3, int a4, int var1, int ta)
1383{
1384 int count;
1385 int count2;
1386 u8 *m_pSrc;
1387 u16 *m_pDstY;
1388 u128 reg1, reg2;
1389
1390 count = var1;
1391 count2 = ta;
1392 m_pSrc = a1;
1393 m_pDstY = a2;
1394 set_mtsab_to_value(a3);
1395 do
1396 {
1397 do
1398 {
1399 reg1 = ((u128 *)m_pSrc)[0x00];
1400 reg2 = ((u128 *)m_pSrc)[0x18];
1401 m_pSrc += a4;
1402 count -= 1;
1403 __asm__
1404 (
1405 "qfsrv %[reg1], %[reg2], %[reg1]\n"
1406 "pextlb %[reg2], $zero, %[reg1]\n"
1407 "pextub %[reg1], $zero, %[reg1]\n"
1408 : [reg1] "+r"(reg1), [reg2] "+r"(reg2)
1409 );
1410 ((u128 *)m_pDstY)[0x00] = reg2;
1411 ((u128 *)m_pDstY)[0x01] = reg1;
1412 }
1413 while ( count > 0 );
1414 m_pDstY += 16;
1415 count = count2;
1416 m_pSrc += 512;
1417 }
1418 while ( count2 > 0 );
1419}
1420
1421void _MPEG_put_chroma(u8 *a1, u16 *a2, int a3, int a4, int var1, int ta)
1422{
1423 int count;
1424 int count2;
1425 u8 *m_pSrc;
1426 u16 *m_pDstCbCr;
1427 u128 reg1, reg2, reg3, reg4;
1428
1429 count = var1;
1430 count2 = ta;
1431 m_pSrc = a1;
1432 m_pDstCbCr = a2;
1433 set_mtsab_to_value(a3);
1434 do
1435 {
1436 do
1437 {
1438 reg1 = ((u64 *)m_pSrc)[0x00];
1439 reg2 = ((u64 *)m_pSrc)[0x01];
1440 reg3 = ((u64 *)m_pSrc)[0x06];
1441 reg4 = ((u64 *)m_pSrc)[0x07];
1442 m_pSrc += a4;
1443 count -= 1;
1444 __asm__
1445 (
1446 "pcpyld %[reg1], %[reg3], %[reg1]\n"
1447 "pcpyld %[reg2], %[reg4], %[reg2]\n"
1448 "qfsrv %[reg1], %[reg1], %[reg1]\n"
1449 "qfsrv %[reg2], %[reg2], %[reg2]\n"
1450 "pextlb %[reg1], $zero, %[reg1]\n"
1451 "pextlb %[reg2], $zero, %[reg2]\n"
1452 : [reg1] "+r"(reg1), [reg2] "+r"(reg2)
1453 : [reg3] "r"(reg3), [reg4] "r"(reg4)
1454 );
1455 ((u128 *)m_pDstCbCr)[0x00] = reg1;
1456 ((u128 *)m_pDstCbCr)[0x08] = reg2;
1457 }
1458 while ( count > 0 );
1459 m_pDstCbCr += 8;
1460 count = count2;
1461 m_pSrc += 704;
1462 }
1463 while ( count2 > 0 );
1464}
1465
1466void _MPEG_put_luma_X(u8 *a1, u16 *a2, int a3, int a4, int var1, int ta)
1467{
1468 int count;
1469 int count2;
1470 u8 *m_pSrc;
1471 u16 *m_pDstY;
1472 u128 reg1, reg2, reg3, reg4, reg5;
1473
1474 count = var1;
1475 count2 = ta;
1476 m_pSrc = a1;
1477 m_pDstY = a2;
1478 __asm__
1479 (
1480 "pnor %[reg1], $zero, $zero\n"
1481 "psrlh %[reg1], %[reg1], 0xF\n"
1482 : [reg1] "=r"(reg1)
1483 );
1484 do
1485 {
1486 do
1487 {
1488 reg2 = ((u128 *)m_pSrc)[0x00];
1489 reg3 = ((u128 *)m_pSrc)[0x18];
1490 set_mtsab_to_value(a3);
1491 __asm__
1492 (
1493 "qfsrv %[reg4], %[reg3], %[reg2]\n"
1494 "qfsrv %[reg5], %[reg2], %[reg3]\n"
1495 "pextlb %[reg2], $zero, %[reg4]\n"
1496 "pextub %[reg3], $zero, %[reg4]\n"
1497 : [reg2] "+r"(reg2), [reg3] "+r"(reg3), [reg4] "=r"(reg4), [reg5] "=r"(reg5)
1498 );
1499 m_pSrc += a4;
1500 set_mtsab_to_value(1);
1501 count -= 1;
1502 __asm__
1503 (
1504 "qfsrv %[reg5], %[reg5], %[reg4]\n"
1505 "pextlb %[reg4], $zero, %[reg5]\n"
1506 "pextub %[reg5], $zero, %[reg5]\n"
1507 "paddh %[reg2], %[reg2], %[reg4]\n"
1508 "paddh %[reg3], %[reg3], %[reg5]\n"
1509 "paddh %[reg2], %[reg2], %[reg1]\n"
1510 "paddh %[reg3], %[reg3], %[reg1]\n"
1511 "psrlh %[reg2], %[reg2], 1\n"
1512 "psrlh %[reg3], %[reg3], 1\n"
1513 : [reg2] "+r"(reg2), [reg3] "+r"(reg3), [reg4] "+r"(reg4), [reg5] "+r"(reg5)
1514 : [reg1] "r"(reg1)
1515 );
1516 ((u128 *)m_pDstY)[0x00] = reg2;
1517 ((u128 *)m_pDstY)[0x01] = reg3;
1518 }
1519 while ( count > 0 );
1520 m_pDstY += 16;
1521 count = count2;
1522 m_pSrc += 512;
1523 }
1524 while ( count2 > 0 );
1525}
1526
1527void _MPEG_put_chroma_X(u8 *a1, u16 *a2, int a3, int a4, int var1, int ta)
1528{
1529 int count;
1530 int count2;
1531 u8 *m_pSrc;
1532 u16 *m_pDstCbCr;
1533 u128 reg1, reg2, reg3, reg4, reg5, reg6, reg7;
1534
1535 count = var1;
1536 count2 = ta;
1537 m_pSrc = a1;
1538 m_pDstCbCr = a2;
1539 __asm__
1540 (
1541 "pnor %[reg1], $zero, $zero\n"
1542 "psrlh %[reg1], %[reg1], 0xF\n"
1543 : [reg1] "=r"(reg1)
1544 );
1545 do
1546 {
1547 do
1548 {
1549 reg2 = ((u64 *)m_pSrc)[0x00];
1550 reg3 = ((u64 *)m_pSrc)[0x01];
1551 reg4 = ((u64 *)m_pSrc)[0x06];
1552 reg5 = ((u64 *)m_pSrc)[0x07];
1553 __asm__
1554 (
1555 "pcpyld %[reg2], %[reg4], %[reg2]\n"
1556 "pcpyld %[reg3], %[reg5], %[reg3]\n"
1557 : [reg2] "+r"(reg2), [reg3] "+r"(reg3), [reg4] "+r"(reg4), [reg5] "+r"(reg5)
1558 );
1559 set_mtsab_to_value(a3);
1560 __asm__
1561 (
1562 "qfsrv %[reg2], %[reg2], %[reg2]\n"
1563 "qfsrv %[reg3], %[reg3], %[reg3]\n"
1564 : [reg2] "+r"(reg2), [reg3] "+r"(reg3)
1565 );
1566 m_pSrc += a4;
1567 count -= 1;
1568 set_mtsab_to_value(1);
1569 __asm__
1570 (
1571 "qfsrv %[reg7], %[reg2], %[reg2]\n"
1572 "qfsrv %[reg6], %[reg3], %[reg3]\n"
1573 "pextlb %[reg2], $zero, %[reg2]\n"
1574 "pextlb %[reg3], $zero, %[reg3]\n"
1575 "pextlb %[reg7], $zero, %[reg7]\n"
1576 "pextlb %[reg6], $zero, %[reg6]\n"
1577 "paddh %[reg2], %[reg2], %[reg7]\n"
1578 "paddh %[reg3], %[reg3], %[reg6]\n"
1579 "paddh %[reg2], %[reg2], %[reg1]\n"
1580 "paddh %[reg3], %[reg3], %[reg1]\n"
1581 "psrlh %[reg2], %[reg2], 1\n"
1582 "psrlh %[reg3], %[reg3], 1\n"
1583 : [reg2] "+r"(reg2), [reg3] "+r"(reg3), [reg6] "=r"(reg6), [reg7] "=r"(reg7)
1584 : [reg1] "r"(reg1)
1585 );
1586 ((u128 *)m_pDstCbCr)[0x00] = reg2;
1587 ((u128 *)m_pDstCbCr)[0x08] = reg3;
1588 }
1589 while ( count > 0 );
1590 m_pDstCbCr += 8;
1591 count = count2;
1592 m_pSrc += 704;
1593 }
1594 while ( count2 > 0 );
1595}
1596
1597void _MPEG_put_luma_Y(u8 *a1, u16 *a2, int a3, int a4, int var1, int ta)
1598{
1599 int count;
1600 int count2;
1601 u8 *m_pSrc;
1602 u16 *m_pDstY;
1603 u128 reg1, reg2, reg3, reg4, reg5, reg6;
1604
1605 count = var1;
1606 count2 = ta;
1607 m_pSrc = a1;
1608 m_pDstY = a2;
1609 set_mtsab_to_value(a3);
1610 reg5 = ((u128 *)m_pSrc)[0x00];
1611 reg6 = ((u128 *)m_pSrc)[0x18];
1612 m_pSrc = &m_pSrc[a4];
1613 count -= 1;
1614 __asm__
1615 (
1616 "qfsrv %[reg5], %[reg6], %[reg5]\n"
1617 "pextub %[reg6], $zero, %[reg5]\n"
1618 "pextlb %[reg5], $zero, %[reg5]\n"
1619 : [reg5] "+r"(reg5), [reg6] "+r"(reg6)
1620 );
1621 if ( !count )
1622 goto LABEL_5;
1623 count2 += 1;
1624 do
1625 {
1626 do
1627 {
1628 reg3 = ((u128 *)m_pSrc)[0x00];
1629 reg4 = ((u128 *)m_pSrc)[0x18];
1630 m_pSrc += a4;
1631 count -= 1;
1632 __asm__
1633 (
1634 "qfsrv %[reg3], %[reg4], %[reg3]\n"
1635 "pextub %[reg4], $zero, %[reg3]\n"
1636 "pextlb %[reg3], $zero, %[reg3]\n"
1637 "paddh %[reg2], %[reg4], %[reg6]\n"
1638 "pnor %[reg6], $zero, $zero\n"
1639 "paddh %[reg1], %[reg3], %[reg5]\n"
1640 "psrlh %[reg6], %[reg6], 0xF\n"
1641 "por %[reg5], $zero, %[reg3]\n"
1642 "paddh %[reg1], %[reg1], %[reg6]\n"
1643 "paddh %[reg2], %[reg2], %[reg6]\n"
1644 "por %[reg6], $zero, %[reg4]\n"
1645 "psrlh %[reg1], %[reg1], 1\n"
1646 "psrlh %[reg2], %[reg2], 1\n"
1647 : [reg1] "=r"(reg1), [reg2] "=r"(reg2), [reg3] "+r"(reg3), [reg4] "+r"(reg4), [reg5] "+r"(reg5), [reg6] "+r"(reg6)
1648 );
1649 ((u128 *)m_pDstY)[0x00] = reg1;
1650 ((u128 *)m_pDstY)[0x01] = reg2;
1651 }
1652 while ( count > 0 );
1653 m_pDstY += 16;
1654LABEL_5:
1655 count = count2;
1656 m_pSrc += 512;
1657 }
1658 while ( count2 > 0 );
1659}
1660
1661void _MPEG_put_chroma_Y(u8 *a1, u16 *a2, int a3, int a4, int var1, int ta)
1662{
1663 int count;
1664 int count2;
1665 u8 *m_pSrc;
1666 u16 *m_pDstCbCr;
1667 u128 reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8, reg9, regA;
1668
1669 count = var1;
1670 count2 = ta;
1671 m_pSrc = a1;
1672 m_pDstCbCr = a2;
1673 set_mtsab_to_value(a3);
1674 reg2 = ((u64 *)m_pSrc)[0x00];
1675 reg3 = ((u64 *)m_pSrc)[0x01];
1676 regA = ((u64 *)m_pSrc)[0x06];
1677 reg9 = ((u64 *)m_pSrc)[0x07];
1678 __asm__
1679 (
1680 "pnor %[reg1], $zero, $zero\n"
1681 : [reg1] "=r"(reg1)
1682 );
1683 m_pSrc = &m_pSrc[a4];
1684 count -= 1;
1685 __asm__
1686 (
1687 "psrlh %[reg1], %[reg1], 0xF\n"
1688 "pcpyld %[reg2], %[regA], %[reg2]\n"
1689 "pcpyld %[reg3], %[reg9], %[reg3]\n"
1690 "qfsrv %[reg2], %[reg2], %[reg2]\n"
1691 "qfsrv %[reg3], %[reg3], %[reg3]\n"
1692 "pextlb %[reg2], $zero, %[reg2]\n"
1693 "pextlb %[reg3], $zero, %[reg3]\n"
1694 : [reg1] "+r"(reg1), [reg2] "+r"(reg2), [reg3] "+r"(reg3), [reg9] "+r"(reg9), [regA] "+r"(regA)
1695 );
1696 if ( !count )
1697 goto LABEL_5;
1698 count2 += 1;
1699 do
1700 {
1701 do
1702 {
1703 reg4 = ((u64 *)m_pSrc)[0x00];
1704 reg5 = ((u64 *)m_pSrc)[0x01];
1705 reg6 = ((u64 *)m_pSrc)[0x06];
1706 reg7 = ((u64 *)m_pSrc)[0x07];
1707 m_pSrc += a4;
1708 count -= 1;
1709 __asm__
1710 (
1711 "pcpyld %[reg4], %[reg6], %[reg4]\n"
1712 "pcpyld %[reg5], %[reg7], %[reg5]\n"
1713 "qfsrv %[reg4], %[reg4], %[reg4]\n"
1714 "qfsrv %[reg5], %[reg5], %[reg5]\n"
1715 "pextlb %[reg4], $zero, %[reg4]\n"
1716 "pextlb %[reg5], $zero, %[reg5]\n"
1717 "paddh %[reg9], %[reg4], %[reg2]\n"
1718 "paddh %[reg8], %[reg5], %[reg3]\n"
1719 "por %[reg2], $zero, %[reg4]\n"
1720 "por %[reg3], $zero, %[reg5]\n"
1721 "paddh %[reg9], %[reg9], %[reg1]\n"
1722 "paddh %[reg8], %[reg8], %[reg1]\n"
1723 "psrlh %[reg9], %[reg9], 1\n"
1724 "psrlh %[reg8], %[reg8], 1\n"
1725 : [reg2] "+r"(reg2), [reg3] "+r"(reg3), [reg4] "+r"(reg4), [reg5] "+r"(reg5), [reg8] "=r"(reg8), [reg9] "=r"(reg9)
1726 : [reg1] "r"(reg1), [reg6] "r"(reg6), [reg7] "r"(reg7)
1727 );
1728 ((u128 *)m_pDstCbCr)[0x00] = reg9;
1729 ((u128 *)m_pDstCbCr)[0x08] = reg8;
1730 }
1731 while ( count > 0 );
1732 m_pDstCbCr += 8;
1733LABEL_5:
1734 count = count2;
1735 m_pSrc += 704;
1736 }
1737 while ( count2 > 0 );
1738}
1739
1740void _MPEG_put_luma_XY(u8 *a1, u16 *a2, int a3, int a4, int var1, int ta)
1741{
1742 int count;
1743 int count2;
1744 u8 *m_pSrc;
1745 u16 *m_pDstY;
1746 u128 reg1, reg2, reg3, reg4, reg5, reg6;
1747
1748 count = var1;
1749 count2 = ta;
1750 m_pSrc = a1;
1751 m_pDstY = a2;
1752 set_mtsab_to_value(a3);
1753 reg2 = ((u128 *)m_pSrc)[0x00];
1754 reg5 = ((u128 *)m_pSrc)[0x18];
1755 m_pSrc = &m_pSrc[a4];
1756 __asm__
1757 (
1758 "qfsrv %[reg6], %[reg5], %[reg2]\n"
1759 "qfsrv %[reg1], %[reg2], %[reg5]\n"
1760 : [reg1] "=r"(reg1), [reg6] "=r"(reg6)
1761 : [reg2] "r"(reg2), [reg5] "r"(reg5)
1762 );
1763 count -= 1;
1764 __asm__
1765 (
1766 "pextlb %[reg2], $zero, %[reg6]\n"
1767 "pextub %[reg5], $zero, %[reg6]\n"
1768 : [reg2] "+r"(reg2), [reg5] "+r"(reg5)
1769 : [reg6] "r"(reg6)
1770 );
1771 set_mtsab_to_value(1);
1772 __asm__
1773 (
1774 "qfsrv %[reg1], %[reg1], %[reg6]\n"
1775 "pextlb %[reg6], $zero, %[reg1]\n"
1776 "pextub %[reg1], $zero, %[reg1]\n"
1777 "paddh %[reg2], %[reg2], %[reg6]\n"
1778 "paddh %[reg5], %[reg5], %[reg1]\n"
1779 : [reg1] "+r"(reg1), [reg2] "+r"(reg2), [reg5] "+r"(reg5), [reg6] "+r"(reg6)
1780 );
1781 if ( !count )
1782 goto LABEL_5;
1783 count2 += 1;
1784 do
1785 {
1786 do
1787 {
1788 reg3 = ((u128 *)m_pSrc)[0x00];
1789 reg4 = ((u128 *)m_pSrc)[0x18];
1790 set_mtsab_to_value(a3);
1791 m_pSrc += a4;
1792 __asm__
1793 (
1794 "qfsrv %[reg6], %[reg4], %[reg3]\n"
1795 "qfsrv %[reg1], %[reg3], %[reg4]\n"
1796 : [reg1] "+r"(reg1), [reg6] "+r"(reg6)
1797 : [reg3] "r"(reg3), [reg4] "r"(reg4)
1798 );
1799 count -= 1;
1800 __asm__
1801 (
1802 "pextlb %[reg3], $zero, %[reg6]\n"
1803 "pextub %[reg4], $zero, %[reg6]\n"
1804 : [reg3] "=r"(reg3), [reg4] "=r"(reg4)
1805 : [reg6] "r"(reg6)
1806 );
1807 set_mtsab_to_value(1);
1808 __asm__
1809 (
1810 "qfsrv %[reg1], %[reg1], %[reg6]\n"
1811 "pextlb %[reg6], $zero, %[reg1]\n"
1812 "pextub %[reg1], $zero, %[reg1]\n"
1813 "paddh %[reg3], %[reg3], %[reg6]\n"
1814 "paddh %[reg4], %[reg4], %[reg1]\n"
1815 "paddh %[reg6], %[reg2], %[reg3]\n"
1816 "paddh %[reg1], %[reg5], %[reg4]\n"
1817 "por %[reg2], $zero, %[reg3]\n"
1818 "pnor %[reg3], $zero, $zero\n"
1819 "por %[reg5], $zero, %[reg4]\n"
1820 "psrlh %[reg3], %[reg3], 0xF\n"
1821 "psllh %[reg3], %[reg3], 1\n"
1822 "paddh %[reg6], %[reg6], %[reg3]\n"
1823 "paddh %[reg1], %[reg1], %[reg3]\n"
1824 "psrlh %[reg6], %[reg6], 2\n"
1825 "psrlh %[reg1], %[reg1], 2\n"
1826 : [reg1] "+r"(reg1), [reg2] "+r"(reg2), [reg3] "+r"(reg3), [reg4] "+r"(reg4), [reg5] "+r"(reg5), [reg6] "+r"(reg6)
1827 );
1828 ((u128 *)m_pDstY)[0x00] = reg6;
1829 ((u128 *)m_pDstY)[0x01] = reg1;
1830 }
1831 while ( count > 0 );
1832 m_pDstY += 16;
1833LABEL_5:
1834 count = count2;
1835 m_pSrc += 512;
1836 }
1837 while ( count2 > 0 );
1838}
1839
1840void _MPEG_put_chroma_XY(u8 *a1, u16 *a2, int a3, int a4, int var1, int ta)
1841{
1842 int count;
1843 int count2;
1844 u8 *m_pSrc;
1845 u16 *m_pDstCbCr;
1846 u128 reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8, reg9;
1847
1848 count = var1;
1849 count2 = ta;
1850 m_pSrc = a1;
1851 m_pDstCbCr = a2;
1852 set_mtsab_to_value(a3);
1853 __asm__
1854 (
1855 "pnor %[reg1], $zero, $zero\n"
1856 : [reg1] "=r"(reg1)
1857 );
1858 reg3 = ((u64 *)m_pSrc)[0x00];
1859 reg2 = ((u64 *)m_pSrc)[0x01];
1860 set_mtsab_to_value(1);
1861 reg9 = ((u64 *)m_pSrc)[0x06];
1862 reg8 = ((u64 *)m_pSrc)[0x07];
1863 __asm__
1864 (
1865 "pcpyld %[reg3], %[reg9], %[reg3]\n"
1866 "pcpyld %[reg2], %[reg8], %[reg2]\n"
1867 "qfsrv %[reg3], %[reg3], %[reg3]\n"
1868 "qfsrv %[reg2], %[reg2], %[reg2]\n"
1869 "psrlh %[reg1], %[reg1], 0xF\n"
1870 "psllh %[reg1], %[reg1], 1\n"
1871 : [reg1] "+r"(reg1), [reg2] "+r"(reg2), [reg3] "+r"(reg3)
1872 : [reg8] "r"(reg8), [reg9] "r"(reg9)
1873 );
1874 m_pSrc = &m_pSrc[a4];
1875 count -= 1;
1876 __asm__
1877 (
1878 "qfsrv %[reg9], %[reg3], %[reg3]\n"
1879 "qfsrv %[reg8], %[reg2], %[reg2]\n"
1880 "pextlb %[reg3], $zero, %[reg3]\n"
1881 "pextlb %[reg2], $zero, %[reg2]\n"
1882 "pextlb %[reg9], $zero, %[reg9]\n"
1883 "pextlb %[reg8], $zero, %[reg8]\n"
1884 "paddh %[reg3], %[reg3], %[reg9]\n"
1885 "paddh %[reg9], %[reg2], %[reg8]\n"
1886 : [reg2] "+r"(reg2), [reg3] "+r"(reg3), [reg8] "+r"(reg8), [reg9] "+r"(reg9)
1887 );
1888 if ( !count )
1889 goto LABEL_5;
1890 count2 += 1;
1891 do
1892 {
1893 do
1894 {
1895 reg4 = ((u64 *)m_pSrc)[0x00];
1896 reg6 = ((u64 *)m_pSrc)[0x01];
1897 set_mtsab_to_value(a3);
1898 reg5 = ((u64 *)m_pSrc)[0x06];
1899 reg7 = ((u64 *)m_pSrc)[0x07];
1900 __asm__
1901 (
1902 "pcpyld %[reg4], %[reg5], %[reg4]\n"
1903 "pcpyld %[reg6], %[reg7], %[reg6]\n"
1904 "qfsrv %[reg4], %[reg4], %[reg4]\n"
1905 "qfsrv %[reg6], %[reg6], %[reg6]\n"
1906 : [reg4] "+r"(reg4), [reg6] "+r"(reg6)
1907 : [reg5] "r"(reg5), [reg7] "r"(reg7)
1908 );
1909 m_pSrc += a4;
1910 count -= 1;
1911 set_mtsab_to_value(1);
1912 __asm__
1913 (
1914 "qfsrv %[reg5], %[reg4], %[reg4]\n"
1915 "qfsrv %[reg7], %[reg6], %[reg6]\n"
1916 "pextlb %[reg4], $zero, %[reg4]\n"
1917 "pextlb %[reg6], $zero, %[reg6]\n"
1918 "pextlb %[reg5], $zero, %[reg5]\n"
1919 "pextlb %[reg7], $zero, %[reg7]\n"
1920 "paddh %[reg4], %[reg4], %[reg5]\n"
1921 "paddh %[reg5], %[reg6], %[reg7]\n"
1922 "paddh %[reg6], %[reg3], %[reg4]\n"
1923 "paddh %[reg7], %[reg9], %[reg5]\n"
1924 "por %[reg3], $zero, %[reg4]\n"
1925 "por %[reg9], $zero, %[reg5]\n"
1926 "paddh %[reg6], %[reg6], %[reg1]\n"
1927 "paddh %[reg7], %[reg7], %[reg1]\n"
1928 "psrlh %[reg6], %[reg6], 2\n"
1929 "psrlh %[reg7], %[reg7], 2\n"
1930 : [reg3] "+r"(reg3), [reg4] "+r"(reg4), [reg5] "+r"(reg5), [reg6] "+r"(reg6), [reg7] "+r"(reg7), [reg9] "+r"(reg9)
1931 : [reg1] "r"(reg1)
1932 );
1933 ((u128 *)m_pDstCbCr)[0x00] = reg6;
1934 ((u128 *)m_pDstCbCr)[0x08] = reg7;
1935 }
1936 while ( count > 0 );
1937 m_pDstCbCr += 8;
1938LABEL_5:
1939 count = count2;
1940 m_pSrc += 704;
1941 }
1942 while ( count2 > 0 );
1943}
1944
1945void _MPEG_avg_luma(u8 *a1, u16 *a2, int a3, int a4, int var1, int ta)
1946{
1947 int count;
1948 int count2;
1949 u8 *m_pSrc;
1950 u16 *m_pDstY;
1951 u128 reg1, reg2, reg3, reg4, reg5, reg6;
1952
1953 count = var1;
1954 count2 = ta;
1955 m_pSrc = a1;
1956 m_pDstY = a2;
1957 set_mtsab_to_value(a3);
1958 do
1959 {
1960 do
1961 {
1962 reg3 = ((u128 *)m_pSrc)[0x00];
1963 reg4 = ((u128 *)m_pSrc)[0x18];
1964 m_pSrc += a4;
1965 count -= 1;
1966 __asm__
1967 (
1968 "qfsrv %[reg3], %[reg4], %[reg3]\n"
1969 "pextlb %[reg4], $zero, %[reg3]\n"
1970 "pextub %[reg3], $zero, %[reg3]\n"
1971 : [reg3] "+r"(reg3), [reg4] "+r"(reg4)
1972 );
1973 reg6 = ((u128 *)m_pDstY)[0x00];
1974 reg1 = ((u128 *)m_pDstY)[0x01];
1975 __asm__
1976 (
1977 "paddh %[reg4], %[reg4], %[reg6]\n"
1978 "paddh %[reg3], %[reg3], %[reg1]\n"
1979 "pcgth %[reg6], %[reg4], $zero\n"
1980 "pcgth %[reg1], %[reg3], $zero\n"
1981 "pceqh %[reg2], %[reg4], $zero\n"
1982 "pceqh %[reg5], %[reg3], $zero\n"
1983 "psrlh %[reg6], %[reg6], 0xF\n"
1984 "psrlh %[reg1], %[reg1], 0xF\n"
1985 "psrlh %[reg2], %[reg2], 0xF\n"
1986 "psrlh %[reg5], %[reg5], 0xF\n"
1987 "por %[reg6], %[reg6], %[reg2]\n"
1988 "por %[reg1], %[reg1], %[reg5]\n"
1989 "paddh %[reg4], %[reg4], %[reg6]\n"
1990 "paddh %[reg3], %[reg3], %[reg1]\n"
1991 "psrlh %[reg4], %[reg4], 1\n"
1992 "psrlh %[reg3], %[reg3], 1\n"
1993 : [reg1] "+r"(reg1), [reg2] "=r"(reg2), [reg3] "+r"(reg3), [reg4] "+r"(reg4), [reg5] "=r"(reg5), [reg6] "+r"(reg6)
1994 );
1995 ((u128 *)m_pDstY)[0x00] = reg4;
1996 ((u128 *)m_pDstY)[0x01] = reg3;
1997 }
1998 while ( count > 0 );
1999 m_pDstY += 16;
2000 count = count2;
2001 m_pSrc += 512;
2002 }
2003 while ( count2 > 0 );
2004}
2005
2006void _MPEG_avg_chroma(u8 *a1, u16 *a2, int a3, int a4, int var1, int ta)
2007{
2008 int count;
2009 int count2;
2010 u8 *m_pSrc;
2011 u16 *m_pDstCbCr;
2012 u128 reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8;
2013
2014 count = var1;
2015 count2 = ta;
2016 m_pSrc = a1;
2017 m_pDstCbCr = a2;
2018 set_mtsab_to_value(a3);
2019 do
2020 {
2021 do
2022 {
2023 reg3 = ((u64 *)m_pSrc)[0x00];
2024 reg4 = ((u64 *)m_pSrc)[0x01];
2025 count -= 1;
2026 reg5 = ((u64 *)m_pSrc)[0x06];
2027 reg6 = ((u64 *)m_pSrc)[0x07];
2028 m_pSrc += a4;
2029 __asm__
2030 (
2031 "pcpyld %[reg3], %[reg5], %[reg3]\n"
2032 "pcpyld %[reg4], %[reg6], %[reg4]\n"
2033 "qfsrv %[reg3], %[reg3], %[reg3]\n"
2034 "qfsrv %[reg4], %[reg4], %[reg4]\n"
2035 "pextlb %[reg3], $zero, %[reg3]\n"
2036 "pextlb %[reg4], $zero, %[reg4]\n"
2037 : [reg3] "+r"(reg3), [reg4] "+r"(reg4)
2038 : [reg5] "r"(reg5), [reg6] "r"(reg6)
2039 );
2040 reg8 = ((u128 *)m_pDstCbCr)[0x00];
2041 reg7 = ((u128 *)m_pDstCbCr)[0x08];
2042 __asm__
2043 (
2044 "paddh %[reg3], %[reg3], %[reg8]\n"
2045 "paddh %[reg4], %[reg4], %[reg7]\n"
2046 "pcgth %[reg8], %[reg3], $zero\n"
2047 "pcgth %[reg7], %[reg4], $zero\n"
2048 "pceqh %[reg2], %[reg3], $zero\n"
2049 "pceqh %[reg1], %[reg4], $zero\n"
2050 "psrlh %[reg8], %[reg8], 0xF\n"
2051 "psrlh %[reg7], %[reg7], 0xF\n"
2052 "psrlh %[reg2], %[reg2], 0xF\n"
2053 "psrlh %[reg1], %[reg1], 0xF\n"
2054 "por %[reg8], %[reg8], %[reg2]\n"
2055 "por %[reg7], %[reg7], %[reg1]\n"
2056 "paddh %[reg3], %[reg3], %[reg8]\n"
2057 "paddh %[reg4], %[reg4], %[reg7]\n"
2058 "psrlh %[reg3], %[reg3], 1\n"
2059 "psrlh %[reg4], %[reg4], 1\n"
2060 : [reg1] "=r"(reg1), [reg2] "=r"(reg2), [reg3] "+r"(reg3), [reg4] "+r"(reg4), [reg7] "+r"(reg7), [reg8] "+r"(reg8)
2061 );
2062 ((u128 *)m_pDstCbCr)[0x00] = reg4;
2063 ((u128 *)m_pDstCbCr)[0x08] = reg3;
2064 }
2065 while ( count > 0 );
2066 m_pDstCbCr += 8;
2067 count = count2;
2068 m_pSrc += 704;
2069 }
2070 while ( count2 > 0 );
2071}
2072
2073void _MPEG_avg_luma_X(u8 *a1, u16 *a2, int a3, int a4, int var1, int ta)
2074{
2075 int count;
2076 int count2;
2077 u8 *m_pSrc;
2078 u16 *m_pDstY;
2079 u128 reg1, reg2, reg3, reg4, reg5, reg6;
2080
2081 count = var1;
2082 count2 = ta;
2083 m_pSrc = a1;
2084 m_pDstY = a2;
2085 __asm__
2086 (
2087 "pnor %[reg2], $zero, $zero\n"
2088 "psrlh %[reg2], %[reg2], 0xF\n"
2089 : [reg2] "=r"(reg2)
2090 );
2091 do
2092 {
2093 do
2094 {
2095 reg3 = ((u128 *)m_pSrc)[0x00];
2096 reg4 = ((u128 *)m_pSrc)[0x18];
2097 set_mtsab_to_value(a3);
2098 __asm__
2099 (
2100 "qfsrv %[reg5], %[reg4], %[reg3]\n"
2101 "qfsrv %[reg6], %[reg3], %[reg4]\n"
2102 "pextlb %[reg3], $zero, %[reg5]\n"
2103 "pextub %[reg4], $zero, %[reg5]\n"
2104 : [reg3] "+r"(reg3), [reg4] "+r"(reg4), [reg5] "=r"(reg5), [reg6] "=r"(reg6)
2105 );
2106 m_pSrc += a4;
2107 set_mtsab_to_value(1);
2108 count -= 1;
2109 __asm__
2110 (
2111 "qfsrv %[reg6], %[reg6], %[reg5]\n"
2112 "pextlb %[reg5], $zero, %[reg6]\n"
2113 "pextub %[reg6], $zero, %[reg6]\n"
2114 "paddh %[reg3], %[reg3], %[reg5]\n"
2115 "paddh %[reg4], %[reg4], %[reg6]\n"
2116 "paddh %[reg3], %[reg3], %[reg2]\n"
2117 "paddh %[reg4], %[reg4], %[reg2]\n"
2118 "psrlh %[reg3], %[reg3], 1\n"
2119 "psrlh %[reg4], %[reg4], 1\n"
2120 : [reg3] "+r"(reg3), [reg4] "+r"(reg4), [reg5] "+r"(reg5), [reg6] "+r"(reg6)
2121 : [reg2] "r"(reg2)
2122 );
2123 reg6 = ((u128 *)m_pDstY)[0x00];
2124 reg1 = ((u128 *)m_pDstY)[0x01];
2125 __asm__
2126 (
2127 "paddh %[reg3], %[reg3], %[reg6]\n"
2128 "paddh %[reg4], %[reg4], %[reg1]\n"
2129 "pcgth %[reg6], %[reg3], $zero\n"
2130 "pceqh %[reg1], %[reg3], $zero\n"
2131 "psrlh %[reg6], %[reg6], 0xF\n"
2132 "psrlh %[reg1], %[reg1], 0xF\n"
2133 "por %[reg6], %[reg6], %[reg1]\n"
2134 "paddh %[reg3], %[reg3], %[reg6]\n"
2135 "pcgth %[reg6], %[reg4], $zero\n"
2136 "pceqh %[reg1], %[reg4], $zero\n"
2137 "psrlh %[reg6], %[reg6], 0xF\n"
2138 "psrlh %[reg1], %[reg1], 0xF\n"
2139 "por %[reg6], %[reg6], %[reg1]\n"
2140 "paddh %[reg4], %[reg4], %[reg6]\n"
2141 "psrlh %[reg3], %[reg3], 1\n"
2142 "psrlh %[reg4], %[reg4], 1\n"
2143 : [reg1] "+r"(reg1), [reg3] "+r"(reg3), [reg4] "+r"(reg4), [reg6] "+r"(reg6)
2144 );
2145 ((u128 *)m_pDstY)[0x00] = reg3;
2146 ((u128 *)m_pDstY)[0x01] = reg4;
2147 }
2148 while ( count > 0 );
2149 m_pDstY += 16;
2150 count = count2;
2151 m_pSrc += 512;
2152 }
2153 while ( count2 > 0 );
2154}
2155
2156void _MPEG_avg_chroma_X(u8 *a1, u16 *a2, int a3, int a4, int var1, int ta)
2157{
2158 int count;
2159 int count2;
2160 u8 *m_pSrc;
2161 u16 *m_pDstCbCr;
2162 u128 reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8, reg9;
2163
2164 count = var1;
2165 count2 = ta;
2166 m_pSrc = a1;
2167 m_pDstCbCr = a2;
2168 __asm__
2169 (
2170 "pnor %[reg2], $zero, $zero\n"
2171 "psrlh %[reg2], %[reg2], 0xF\n"
2172 : [reg2] "=r"(reg2)
2173 );
2174 do
2175 {
2176 do
2177 {
2178 reg4 = ((u64 *)m_pSrc)[0x00];
2179 reg5 = ((u64 *)m_pSrc)[0x01];
2180 set_mtsab_to_value(a3);
2181 reg6 = ((u64 *)m_pSrc)[0x06];
2182 reg7 = ((u64 *)m_pSrc)[0x07];
2183 __asm__
2184 (
2185 "pcpyld %[reg4], %[reg6], %[reg4]\n"
2186 "pcpyld %[reg5], %[reg7], %[reg5]\n"
2187 "qfsrv %[reg4], %[reg4], %[reg4]\n"
2188 "qfsrv %[reg5], %[reg5], %[reg5]\n"
2189 : [reg4] "+r"(reg4), [reg5] "+r"(reg5)
2190 : [reg6] "r"(reg6), [reg7] "r"(reg7)
2191 );
2192 m_pSrc += a4;
2193 count -= 1;
2194 set_mtsab_to_value(1);
2195 __asm__
2196 (
2197 "qfsrv %[reg9], %[reg4], %[reg4]\n"
2198 "qfsrv %[reg8], %[reg5], %[reg5]\n"
2199 "pextlb %[reg4], $zero, %[reg4]\n"
2200 "pextlb %[reg5], $zero, %[reg5]\n"
2201 "pextlb %[reg9], $zero, %[reg9]\n"
2202 "pextlb %[reg8], $zero, %[reg8]\n"
2203 "paddh %[reg4], %[reg4], %[reg9]\n"
2204 "paddh %[reg5], %[reg5], %[reg8]\n"
2205 "paddh %[reg4], %[reg4], %[reg2]\n"
2206 "paddh %[reg5], %[reg5], %[reg2]\n"
2207 "psrlh %[reg4], %[reg4], 1\n"
2208 "psrlh %[reg5], %[reg5], 1\n"
2209 : [reg4] "+r"(reg4), [reg5] "+r"(reg5), [reg8] "=r"(reg8), [reg9] "=r"(reg9)
2210 : [reg2] "r"(reg2)
2211 );
2212 reg9 = ((u128 *)m_pDstCbCr)[0x00];
2213 reg8 = ((u128 *)m_pDstCbCr)[0x08];
2214 __asm__
2215 (
2216 "paddh %[reg4], %[reg4], %[reg9]\n"
2217 "paddh %[reg5], %[reg5], %[reg8]\n"
2218 "pcgth %[reg9], %[reg4], $zero\n"
2219 "pcgth %[reg8], %[reg5], $zero\n"
2220 "pceqh %[reg1], %[reg4], $zero\n"
2221 "pceqh %[reg3], %[reg5], $zero\n"
2222 "psrlh %[reg9], %[reg9], 0xF\n"
2223 "psrlh %[reg8], %[reg8], 0xF\n"
2224 "psrlh %[reg1], %[reg1], 0xF\n"
2225 "psrlh %[reg3], %[reg3], 0xF\n"
2226 "por %[reg9], %[reg9], %[reg1]\n"
2227 "por %[reg8], %[reg8], %[reg3]\n"
2228 "paddh %[reg4], %[reg4], %[reg9]\n"
2229 "paddh %[reg5], %[reg5], %[reg8]\n"
2230 "psrlh %[reg4], %[reg4], 1\n"
2231 "psrlh %[reg5], %[reg5], 1\n"
2232 : [reg1] "=r"(reg1), [reg3] "=r"(reg3), [reg4] "+r"(reg4), [reg5] "+r"(reg5), [reg8] "+r"(reg8), [reg9] "+r"(reg9)
2233 );
2234 ((u128 *)m_pDstCbCr)[0x00] = reg4;
2235 ((u128 *)m_pDstCbCr)[0x08] = reg5;
2236 }
2237 while ( count > 0 );
2238 m_pDstCbCr += 8;
2239 count = count2;
2240 m_pSrc += 704;
2241 }
2242 while ( count2 > 0 );
2243}
2244
2245void _MPEG_avg_luma_Y(u8 *a1, u16 *a2, int a3, int a4, int var1, int ta)
2246{
2247 int count;
2248 int count2;
2249 u8 *m_pSrc;
2250 u16 *m_pDstY;
2251 u128 reg1, reg2, reg3, reg4, reg5, reg6;
2252
2253 count = var1;
2254 count2 = ta;
2255 m_pSrc = a1;
2256 m_pDstY = a2;
2257 set_mtsab_to_value(a3);
2258 reg5 = ((u128 *)m_pSrc)[0x00];
2259 reg6 = ((u128 *)m_pSrc)[0x18];
2260 m_pSrc = &m_pSrc[a4];
2261 count -= 1;
2262 __asm__
2263 (
2264 "qfsrv %[reg5], %[reg6], %[reg5]\n"
2265 "pextub %[reg6], $zero, %[reg5]\n"
2266 "pextlb %[reg5], $zero, %[reg5]\n"
2267 : [reg5] "+r"(reg5), [reg6] "+r"(reg6)
2268 );
2269 if ( !count )
2270 goto LABEL_5;
2271 count2 += 1;
2272 do
2273 {
2274 do
2275 {
2276 reg3 = ((u128 *)m_pSrc)[0x00];
2277 reg4 = ((u128 *)m_pSrc)[0x18];
2278 m_pSrc += a4;
2279 count -= 1;
2280 __asm__
2281 (
2282 "qfsrv %[reg3], %[reg4], %[reg3]\n"
2283 "pextub %[reg4], $zero, %[reg3]\n"
2284 "pextlb %[reg3], $zero, %[reg3]\n"
2285 "paddh %[reg2], %[reg4], %[reg6]\n"
2286 "pnor %[reg6], $zero, $zero\n"
2287 "paddh %[reg1], %[reg3], %[reg5]\n"
2288 "psrlh %[reg6], %[reg6], 0xF\n"
2289 "por %[reg5], $zero, %[reg3]\n"
2290 "paddh %[reg1], %[reg1], %[reg6]\n"
2291 "paddh %[reg2], %[reg2], %[reg6]\n"
2292 "por %[reg6], $zero, %[reg4]\n"
2293 "psrlh %[reg1], %[reg1], 1\n"
2294 "psrlh %[reg2], %[reg2], 1\n"
2295 : [reg1] "=r"(reg1), [reg2] "=r"(reg2), [reg3] "+r"(reg3), [reg4] "+r"(reg4), [reg5] "+r"(reg5), [reg6] "+r"(reg6)
2296 );
2297 reg3 = ((u128 *)m_pDstY)[0x00];
2298 reg4 = ((u128 *)m_pDstY)[0x01];
2299 __asm__
2300 (
2301 "paddh %[reg1], %[reg1], %[reg3]\n"
2302 "paddh %[reg2], %[reg2], %[reg4]\n"
2303 "pcgth %[reg3], %[reg1], $zero\n"
2304 "pceqh %[reg4], %[reg1], $zero\n"
2305 "psrlh %[reg3], %[reg3], 0xF\n"
2306 "psrlh %[reg4], %[reg4], 0xF\n"
2307 "por %[reg3], %[reg3], %[reg4]\n"
2308 "paddh %[reg1], %[reg1], %[reg3]\n"
2309 "pcgth %[reg3], %[reg2], $zero\n"
2310 "pceqh %[reg4], %[reg2], $zero\n"
2311 "psrlh %[reg3], %[reg3], 0xF\n"
2312 "psrlh %[reg4], %[reg4], 0xF\n"
2313 "por %[reg3], %[reg3], %[reg4]\n"
2314 "paddh %[reg2], %[reg2], %[reg3]\n"
2315 "psrlh %[reg1], %[reg1], 1\n"
2316 "psrlh %[reg2], %[reg2], 1\n"
2317 : [reg1] "+r"(reg1), [reg2] "+r"(reg2), [reg3] "+r"(reg3), [reg4] "+r"(reg4)
2318 );
2319 ((u128 *)m_pDstY)[0x00] = reg1;
2320 ((u128 *)m_pDstY)[0x01] = reg2;
2321 }
2322 while ( count > 0 );
2323 m_pDstY += 16;
2324LABEL_5:
2325 count = count2;
2326 m_pSrc += 512;
2327 }
2328 while ( count2 > 0 );
2329}
2330
2331void _MPEG_avg_chroma_Y(u8 *a1, u16 *a2, int a3, int a4, int var1, int ta)
2332{
2333 int count;
2334 int count2;
2335 u8 *m_pSrc;
2336 u16 *m_pDstCbCr;
2337 u128 reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8, reg9, regA;
2338
2339 count = var1;
2340 count2 = ta;
2341 m_pSrc = a1;
2342 m_pDstCbCr = a2;
2343 set_mtsab_to_value(a3);
2344 reg2 = ((u64 *)m_pSrc)[0x00];
2345 reg3 = ((u64 *)m_pSrc)[0x01];
2346 regA = ((u64 *)m_pSrc)[0x06];
2347 reg9 = ((u64 *)m_pSrc)[0x07];
2348 __asm__
2349 (
2350 "pnor %[reg1], $zero, $zero\n"
2351 : [reg1] "=r"(reg1)
2352 );
2353 m_pSrc = &m_pSrc[a4];
2354 count -= 1;
2355 __asm__
2356 (
2357 "psrlh %[reg1], %[reg1], 0xF\n"
2358 "pcpyld %[reg2], %[regA], %[reg2]\n"
2359 "pcpyld %[reg3], %[reg9], %[reg3]\n"
2360 "qfsrv %[reg2], %[reg2], %[reg2]\n"
2361 "qfsrv %[reg3], %[reg3], %[reg3]\n"
2362 "pextlb %[reg2], $zero, %[reg2]\n"
2363 "pextlb %[reg3], $zero, %[reg3]\n"
2364 : [reg1] "+r"(reg1), [reg2] "+r"(reg2), [reg3] "+r"(reg3)
2365 : [reg9] "r"(reg9), [regA] "r"(regA)
2366 );
2367 if ( !count )
2368 goto LABEL_5;
2369 count2 += 1;
2370 do
2371 {
2372 do
2373 {
2374 reg4 = ((u64 *)m_pSrc)[0x00];
2375 reg5 = ((u64 *)m_pSrc)[0x01];
2376 count -= 1;
2377 reg6 = ((u64 *)m_pSrc)[0x06];
2378 reg7 = ((u64 *)m_pSrc)[0x07];
2379 m_pSrc += a4;
2380 __asm__
2381 (
2382 "pcpyld %[reg4], %[reg6], %[reg4]\n"
2383 "pcpyld %[reg5], %[reg7], %[reg5]\n"
2384 "qfsrv %[reg4], %[reg4], %[reg4]\n"
2385 "qfsrv %[reg5], %[reg5], %[reg5]\n"
2386 "pextlb %[reg4], $zero, %[reg4]\n"
2387 "pextlb %[reg5], $zero, %[reg5]\n"
2388 "paddh %[reg9], %[reg4], %[reg2]\n"
2389 "paddh %[reg8], %[reg5], %[reg3]\n"
2390 "por %[reg2], $zero, %[reg4]\n"
2391 "por %[reg3], $zero, %[reg5]\n"
2392 "paddh %[reg9], %[reg9], %[reg1]\n"
2393 "paddh %[reg8], %[reg8], %[reg1]\n"
2394 "psrlh %[reg9], %[reg9], 1\n"
2395 "psrlh %[reg8], %[reg8], 1\n"
2396 : [reg2] "+r"(reg2), [reg3] "+r"(reg3), [reg4] "+r"(reg4), [reg5] "+r"(reg5), [reg8] "=r"(reg8), [reg9] "=r"(reg9)
2397 : [reg1] "r"(reg1), [reg6] "r"(reg6), [reg7] "r"(reg7)
2398 );
2399 reg4 = ((u128 *)m_pDstCbCr)[0x00];
2400 reg5 = ((u128 *)m_pDstCbCr)[0x80];
2401 __asm__
2402 (
2403 "paddh %[reg9], %[reg9], %[reg4]\n"
2404 "paddh %[reg8], %[reg8], %[reg5]\n"
2405 "pcgth %[reg4], %[reg9], $zero\n"
2406 "pceqh %[reg5], %[reg9], $zero\n"
2407 "psrlh %[reg4], %[reg4], 0xF\n"
2408 "psrlh %[reg5], %[reg5], 0xF\n"
2409 "por %[reg4], %[reg4], %[reg5]\n"
2410 "paddh %[reg9], %[reg9], %[reg4]\n"
2411 "pcgth %[reg4], %[reg8], $zero\n"
2412 "pceqh %[reg5], %[reg8], $zero\n"
2413 "psrlh %[reg4], %[reg4], 0xF\n"
2414 "psrlh %[reg5], %[reg5], 0xF\n"
2415 "por %[reg4], %[reg4], %[reg5]\n"
2416 "paddh %[reg8], %[reg8], %[reg4]\n"
2417 "psrlh %[reg9], %[reg9], 1\n"
2418 "psrlh %[reg8], %[reg8], 1\n"
2419 : [reg4] "+r"(reg4), [reg5] "+r"(reg5), [reg8] "+r"(reg8), [reg9] "+r"(reg9)
2420 );
2421 ((u128 *)m_pDstCbCr)[0x00] = reg9;
2422 ((u128 *)m_pDstCbCr)[0x08] = reg8;
2423 }
2424 while ( count > 0 );
2425 m_pDstCbCr += 8;
2426LABEL_5:
2427 count = count2;
2428 m_pSrc += 704;
2429 }
2430 while ( count2 > 0 );
2431}
2432
2433void _MPEG_avg_luma_XY(u8 *a1, u16 *a2, int a3, int a4, int var1, int ta)
2434{
2435 int count;
2436 int count2;
2437 u8 *m_pSrc;
2438 u16 *m_pDstY;
2439 u128 reg1, reg2, reg3, reg4, reg5, reg6;
2440
2441 count = var1;
2442 count2 = ta;
2443 m_pSrc = a1;
2444 m_pDstY = a2;
2445 set_mtsab_to_value(a3);
2446 reg2 = ((u128 *)m_pSrc)[0x00];
2447 reg5 = ((u128 *)m_pSrc)[0x18];
2448 m_pSrc = &m_pSrc[a4];
2449 __asm__
2450 (
2451 "qfsrv %[reg6], %[reg5], %[reg2]\n"
2452 "qfsrv %[reg1], %[reg2], %[reg5]\n"
2453 : [reg1] "=r"(reg1), [reg6] "=r"(reg6)
2454 : [reg2] "r"(reg2), [reg5] "r"(reg5)
2455 );
2456 count -= 1;
2457 __asm__
2458 (
2459 "pextlb %[reg2], $zero, %[reg6]\n"
2460 "pextub %[reg5], $zero, %[reg6]\n"
2461 : [reg2] "+r"(reg2), [reg5] "+r"(reg5)
2462 : [reg6] "r"(reg6)
2463 );
2464 set_mtsab_to_value(1);
2465 __asm__
2466 (
2467 "qfsrv %[reg1], %[reg1], %[reg6]\n"
2468 "pextlb %[reg6], $zero, %[reg1]\n"
2469 "pextub %[reg1], $zero, %[reg1]\n"
2470 "paddh %[reg2], %[reg2], %[reg6]\n"
2471 "paddh %[reg5], %[reg5], %[reg1]\n"
2472 : [reg1] "+r"(reg1), [reg2] "+r"(reg2), [reg5] "+r"(reg5), [reg6] "+r"(reg6)
2473 );
2474 if ( !count )
2475 goto LABEL_5;
2476 count2 += 1;
2477 do
2478 {
2479 do
2480 {
2481 reg3 = ((u128 *)m_pSrc)[0x00];
2482 reg4 = ((u128 *)m_pSrc)[0x18];
2483 set_mtsab_to_value(a3);
2484 m_pSrc += a4;
2485 __asm__
2486 (
2487 "qfsrv %[reg6], %[reg4], %[reg3]\n"
2488 "qfsrv %[reg1], %[reg3], %[reg4]\n"
2489 : [reg1] "=r"(reg1), [reg6] "=r"(reg6)
2490 : [reg3] "r"(reg3), [reg4] "r"(reg4)
2491 );
2492 count -= 1;
2493 __asm__
2494 (
2495 "pextlb %[reg3], $zero, %[reg6]\n"
2496 "pextub %[reg4], $zero, %[reg6]\n"
2497 : [reg3] "=r"(reg3), [reg4] "=r"(reg4)
2498 : [reg6] "r"(reg6)
2499 );
2500 set_mtsab_to_value(1);
2501 __asm__
2502 (
2503 "qfsrv %[reg1], %[reg1], %[reg6]\n"
2504 "pextlb %[reg6], $zero, %[reg1]\n"
2505 "pextub %[reg1], $zero, %[reg1]\n"
2506 "paddh %[reg3], %[reg3], %[reg6]\n"
2507 "paddh %[reg4], %[reg4], %[reg1]\n"
2508 "paddh %[reg6], %[reg2], %[reg3]\n"
2509 "paddh %[reg1], %[reg5], %[reg4]\n"
2510 "por %[reg2], $zero, %[reg3]\n"
2511 "pnor %[reg3], $zero, $zero\n"
2512 "por %[reg5], $zero, %[reg4]\n"
2513 "psrlh %[reg3], %[reg3], 0xF\n"
2514 "psllh %[reg3], %[reg3], 1\n"
2515 "paddh %[reg6], %[reg6], %[reg3]\n"
2516 "paddh %[reg1], %[reg1], %[reg3]\n"
2517 "psrlh %[reg6], %[reg6], 2\n"
2518 "psrlh %[reg1], %[reg1], 2\n"
2519 : [reg1] "+r"(reg1), [reg2] "+r"(reg2), [reg3] "+r"(reg3), [reg4] "+r"(reg4), [reg5] "+r"(reg5), [reg6] "+r"(reg6)
2520 );
2521 reg3 = ((u128 *)m_pDstY)[0x00];
2522 reg4 = ((u128 *)m_pDstY)[0x01];
2523 __asm__
2524 (
2525 "paddh %[reg6], %[reg6], %[reg3]\n"
2526 "paddh %[reg1], %[reg1], %[reg4]\n"
2527 "pcgth %[reg3], %[reg6], $zero\n"
2528 "pceqh %[reg4], %[reg6], $zero\n"
2529 "psrlh %[reg3], %[reg3], 0xF\n"
2530 "psrlh %[reg4], %[reg4], 0xF\n"
2531 "por %[reg3], %[reg3], %[reg4]\n"
2532 "paddh %[reg6], %[reg6], %[reg3]\n"
2533 "pcgth %[reg3], %[reg1], $zero\n"
2534 "pceqh %[reg4], %[reg1], $zero\n"
2535 "psrlh %[reg3], %[reg3], 0xF\n"
2536 "psrlh %[reg4], %[reg4], 0xF\n"
2537 "por %[reg3], %[reg3], %[reg4]\n"
2538 "paddh %[reg1], %[reg1], %[reg3]\n"
2539 "psrlh %[reg6], %[reg6], 1\n"
2540 "psrlh %[reg1], %[reg1], 1\n"
2541 : [reg1] "+r"(reg1), [reg3] "+r"(reg3), [reg4] "+r"(reg4), [reg6] "+r"(reg6)
2542 );
2543 ((u128 *)m_pDstY)[0x00] = reg6;
2544 ((u128 *)m_pDstY)[0x01] = reg1;
2545 }
2546 while ( count > 0 );
2547 m_pDstY += 16;
2548LABEL_5:
2549 count = count2;
2550 m_pSrc += 512;
2551 }
2552 while ( count2 > 0 );
2553}
2554
2555void _MPEG_avg_chroma_XY(u8 *a1, u16 *a2, int a3, int a4, int var1, int ta)
2556{
2557 int count;
2558 int count2;
2559 u8 *m_pSrc;
2560 u16 *m_pDstCbCr;
2561 u128 reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8, reg9;
2562
2563 count = var1;
2564 count2 = ta;
2565 m_pSrc = a1;
2566 m_pDstCbCr = a2;
2567 set_mtsab_to_value(a3);
2568 __asm__
2569 (
2570 "pnor %[reg1], $zero, $zero\n"
2571 : [reg1] "=r"(reg1)
2572 );
2573 reg3 = ((u64 *)m_pSrc)[0x00];
2574 reg2 = ((u64 *)m_pSrc)[0x01];
2575 set_mtsab_to_value(1);
2576 reg9 = ((u64 *)m_pSrc)[0x06];
2577 reg8 = ((u64 *)m_pSrc)[0x07];
2578 __asm__
2579 (
2580 "pcpyld %[reg3], %[reg9], %[reg3]\n"
2581 "pcpyld %[reg2], %[reg8], %[reg2]\n"
2582 "qfsrv %[reg3], %[reg3], %[reg3]\n"
2583 "qfsrv %[reg2], %[reg2], %[reg2]\n"
2584 "psrlh %[reg1], %[reg1], 0xF\n"
2585 "psllh %[reg1], %[reg1], 1\n"
2586 : [reg1] "+r"(reg1), [reg2] "+r"(reg2), [reg3] "+r"(reg3)
2587 : [reg8] "r"(reg8), [reg9] "r"(reg9)
2588 );
2589 m_pSrc = &m_pSrc[a4];
2590 count -= 1;
2591 __asm__
2592 (
2593 "qfsrv %[reg9], %[reg3], %[reg3]\n"
2594 "qfsrv %[reg8], %[reg2], %[reg2]\n"
2595 "pextlb %[reg3], $zero, %[reg3]\n"
2596 "pextlb %[reg2], $zero, %[reg2]\n"
2597 "pextlb %[reg9], $zero, %[reg9]\n"
2598 "pextlb %[reg8], $zero, %[reg8]\n"
2599 "paddh %[reg3], %[reg3], %[reg9]\n"
2600 "paddh %[reg9], %[reg2], %[reg8]\n"
2601 : [reg2] "+r"(reg2), [reg3] "+r"(reg3), [reg8] "=r"(reg8), [reg9] "=r"(reg9)
2602 );
2603 if ( !count )
2604 goto LABEL_5;
2605 count2 += 1;
2606 do
2607 {
2608 do
2609 {
2610 reg4 = ((u64 *)m_pSrc)[0x00];
2611 reg6 = ((u64 *)m_pSrc)[0x01];
2612 set_mtsab_to_value(a3);
2613 reg5 = ((u64 *)m_pSrc)[0x06];
2614 reg7 = ((u64 *)m_pSrc)[0x07];
2615 __asm__
2616 (
2617 "pcpyld %[reg4], %[reg5], %[reg4]\n"
2618 "pcpyld %[reg6], %[reg7], %[reg6]\n"
2619 "qfsrv %[reg4], %[reg4], %[reg4]\n"
2620 "qfsrv %[reg6], %[reg6], %[reg6]\n"
2621 : [reg4] "+r"(reg4), [reg6] "+r"(reg6)
2622 : [reg5] "r"(reg5), [reg7] "r"(reg7)
2623 );
2624 m_pSrc += a4;
2625 count -= 1;
2626 set_mtsab_to_value(1);
2627 __asm__
2628 (
2629 "qfsrv %[reg5], %[reg4], %[reg4]\n"
2630 "qfsrv %[reg7], %[reg6], %[reg6]\n"
2631 "pextlb %[reg4], $zero, %[reg4]\n"
2632 "pextlb %[reg6], $zero, %[reg6]\n"
2633 "pextlb %[reg5], $zero, %[reg5]\n"
2634 "pextlb %[reg7], $zero, %[reg7]\n"
2635 "paddh %[reg4], %[reg4], %[reg5]\n"
2636 "paddh %[reg5], %[reg6], %[reg7]\n"
2637 "paddh %[reg6], %[reg3], %[reg4]\n"
2638 "paddh %[reg7], %[reg9], %[reg5]\n"
2639 "por %[reg3], $zero, %[reg4]\n"
2640 "por %[reg9], $zero, %[reg5]\n"
2641 "paddh %[reg6], %[reg6], %[reg1]\n"
2642 "paddh %[reg7], %[reg7], %[reg1]\n"
2643 "psrlh %[reg6], %[reg6], 2\n"
2644 "psrlh %[reg7], %[reg7], 2\n"
2645 : [reg3] "+r"(reg3), [reg4] "+r"(reg4), [reg5] "+r"(reg5), [reg6] "+r"(reg6), [reg7] "+r"(reg7), [reg9] "+r"(reg9)
2646 : [reg1] "r"(reg1)
2647 );
2648 reg4 = ((u128 *)m_pDstCbCr)[0x00];
2649 reg5 = ((u128 *)m_pDstCbCr)[0x08];
2650 __asm__
2651 (
2652 "paddh %[reg6], %[reg6], %[reg4]\n"
2653 "paddh %[reg7], %[reg7], %[reg5]\n"
2654 "pcgth %[reg4], %[reg6], $zero\n"
2655 "pceqh %[reg5], %[reg6], $zero\n"
2656 "psrlh %[reg4], %[reg4], 0xF\n"
2657 "psrlh %[reg5], %[reg5], 0xF\n"
2658 "por %[reg4], %[reg4], %[reg5]\n"
2659 "paddh %[reg6], %[reg6], %[reg4]\n"
2660 "pcgth %[reg4], %[reg7], $zero\n"
2661 "pceqh %[reg5], %[reg7], $zero\n"
2662 "psrlh %[reg4], %[reg4], 0xF\n"
2663 "psrlh %[reg5], %[reg5], 0xF\n"
2664 "por %[reg4], %[reg4], %[reg5]\n"
2665 "paddh %[reg7], %[reg7], %[reg4]\n"
2666 "psrlh %[reg6], %[reg6], 1\n"
2667 "psrlh %[reg7], %[reg7], 1\n"
2668 : [reg4] "+r"(reg4), [reg5] "+r"(reg5), [reg6] "+r"(reg6), [reg7] "+r"(reg7)
2669 );
2670 ((u128 *)m_pDstCbCr)[0x00] = reg6;
2671 ((u128 *)m_pDstCbCr)[0x08] = reg7;
2672 }
2673 while ( count > 0 );
2674 m_pDstCbCr += 8;
2675LABEL_5:
2676 count = count2;
2677 m_pSrc += 704;
2678 }
2679 while ( count2 > 0 );
2680}
2681
#define R_EE_D9_CHCR
Definition ee_regs.h:438
#define R_EE_D9_SADR
Definition ee_regs.h:446
#define R_EE_D_ENABLER
Definition ee_regs.h:560
#define R_EE_IPU_CMD
Definition ee_regs.h:238
#define R_EE_D4_MADR
Definition ee_regs.h:404
#define R_EE_IPU_in_FIFO
Definition ee_regs.h:358
#define R_EE_D3_QWC
Definition ee_regs.h:400
#define R_EE_D3_MADR
Definition ee_regs.h:398
#define R_EE_D4_CHCR
Definition ee_regs.h:402
#define R_EE_IPU_TOP
Definition ee_regs.h:244
#define R_EE_D3_CHCR
Definition ee_regs.h:396
#define R_EE_IPU_CTRL
Definition ee_regs.h:240
#define R_EE_D4_QWC
Definition ee_regs.h:406
#define R_EE_D_ENABLEW
Definition ee_regs.h:562
#define R_EE_D9_QWC
Definition ee_regs.h:442
#define R_EE_D_STAT
Definition ee_regs.h:450
#define R_EE_IPU_BP
Definition ee_regs.h:242
u32 count
start sector of fragmented bd/file