22 __asm__ __volatile__ (
24 "lqc2 $vf1, 0x00(%2) \n"
25 "lqc2 $vf2, 0x10(%2) \n"
26 "lqc2 $vf3, 0x20(%2) \n"
27 "lqc2 $vf4, 0x30(%2) \n"
28 "lqc2 $vf5, 0x00(%1) \n"
29 "vmulaw $ACC, $vf4, $vf0\n"
30 "vmaddax $ACC, $vf1, $vf5\n"
31 "vmadday $ACC, $vf2, $vf5\n"
32 "vmaddz $vf6, $vf3, $vf5\n"
33 "sqc2 $vf6, 0x00(%0) \n"
35 "lqc2 vf1, 0x00(%2) \n"
36 "lqc2 vf2, 0x10(%2) \n"
37 "lqc2 vf3, 0x20(%2) \n"
38 "lqc2 vf4, 0x30(%2) \n"
39 "lqc2 vf5, 0x00(%1) \n"
40 "vmulaw ACC, vf4, vf0 \n"
41 "vmaddax ACC, vf1, vf5 \n"
42 "vmadday ACC, vf2, vf5 \n"
43 "vmaddz vf6, vf3, vf5 \n"
44 "sqc2 vf6, 0x00(%0) \n"
46 : :
"r" (output),
"r" (input0),
"r" (input1)
51 memset(output, 0,
sizeof(VECTOR));
52 for (i = 0; i < 4; i += 1)
55 for (j = 0; j < 4; j += 1)
57 output[j] += input1[(4 * i) + j] * (i != 3 ? input0[i] : 1.0f);
63 void vector_clamp(VECTOR output, VECTOR input0,
float min,
float max) {
70 if (work[0] < min) { work[0] = min; }
71 if (work[1] < min) { work[1] = min; }
72 if (work[2] < min) { work[2] = min; }
73 if (work[3] < min) { work[3] = min; }
76 if (work[0] > max) { work[0] = max; }
77 if (work[1] > max) { work[1] = max; }
78 if (work[2] > max) { work[2] = max; }
79 if (work[3] > max) { work[3] = max; }
88 __asm__ __volatile__ (
90 "lqc2 $vf1, 0x00(%1) \n"
91 "sqc2 $vf1, 0x00(%0) \n"
93 "lqc2 vf1, 0x00(%1) \n"
94 "sqc2 vf1, 0x00(%0) \n"
96 : :
"r" (output),
"r" (input0)
100 memcpy(output, input0,
sizeof(VECTOR));
108 work0[0] = (input0[0] / input0[3]);
109 work0[1] = (input0[1] / input0[3]);
110 work0[2] = (input0[2] / input0[3]);
114 work1[0] = (input1[0] / input1[3]);
115 work1[1] = (input1[1] / input1[3]);
116 work1[2] = (input1[2] / input1[3]);
120 return (work0[0] * work1[0]) + (work0[1] * work1[1]) + (work0[2] * work1[2]);
128 work[0] = input0[0] * input1[0];
129 work[1] = input0[1] * input1[1];
130 work[2] = input0[2] * input1[2];
131 work[3] = input0[3] * input1[3];
140 __asm__ __volatile__ (
142 "lqc2 $vf1, 0x00(%1) \n"
143 "vmul.xyz $vf2, $vf1, $vf1\n"
144 "vmulax.w $ACC, $vf0, $vf2\n"
145 "vmadday.w $ACC, $vf0, $vf2\n"
146 "vmaddz.w $vf2, $vf0, $vf2\n"
147 "vrsqrt $Q, $vf0w, $vf2w\n"
148 "vsub.w $vf1, $vf0, $vf0\n"
150 "vmulq.xyz $vf1, $vf1, $Q \n"
151 "sqc2 $vf1, 0x00(%0) \n"
153 "lqc2 vf1, 0x00(%1) \n"
154 "vmul.xyz vf2, vf1, vf1 \n"
155 "vmulax.w ACC, vf0, vf2 \n"
156 "vmadday.w ACC, vf0, vf2 \n"
157 "vmaddz.w vf2, vf0, vf2 \n"
158 "vrsqrt Q, vf0w, vf2w \n"
159 "vsub.w vf1, vf0, vf0 \n"
161 "vmulq.xyz vf1, vf1, Q \n"
162 "sqc2 vf1, 0x00(%0) \n"
164 : :
"r" (output),
"r" (input0)
169 q = 1.0f / sqrtf((input0[0] * input0[0]) + (input0[1] * input0[1]) + (input0[2] * input0[2]));
170 output[0] = input0[0] * q;
171 output[1] = input0[1] * q;
172 output[2] = input0[2] * q;
179 __asm__ __volatile__ (
181 "lqc2 $vf1, 0x00(%1) \n"
182 "lqc2 $vf2, 0x00(%2) \n"
183 "vopmula.xyz $ACC, $vf1, $vf2\n"
184 "vopmsub.xyz $vf2, $vf2, $vf1\n"
185 "vsub.w $vf2, $vf0, $vf0\n"
186 "sqc2 $vf2, 0x00(%0) \n"
188 "lqc2 vf1, 0x00(%1) \n"
189 "lqc2 vf2, 0x00(%2) \n"
190 "vopmula.xyz ACC, vf1, vf2 \n"
191 "vopmsub.xyz vf2, vf2, vf1 \n"
192 "vsub.w vf2, vf0, vf0 \n"
193 "sqc2 vf2, 0x00(%0) \n"
195 : :
"r" (output),
"r" (input0),
"r" (input1)
199 output[0] = input0[1] * input1[2] - input1[1] * input0[2];
200 output[1] = input0[2] * input1[0] - input1[2] * input0[0];
201 output[2] = input0[0] * input1[1] - input1[0] * input0[1];
208 work[0] = addend[0]+summand[0];
209 work[1] = addend[1]+summand[1];
210 work[2] = addend[2]+summand[2];
211 work[3] = addend[3]+summand[3];
217 work[0] = multiplicand[1] * multiplier[2] - multiplicand[2] * multiplier[1];
218 work[1] = multiplicand[2] * multiplier[0] - multiplicand[0] * multiplier[2];
219 work[2] = multiplicand[0] * multiplier[1] - multiplicand[1] * multiplier[0];
228 VECTOR triangle_normal;
242 __asm__ __volatile__ (
244 "lqc2 $vf1, 0x00(%1) \n"
245 "lqc2 $vf2, 0x10(%1) \n"
246 "lqc2 $vf3, 0x20(%1) \n"
247 "lqc2 $vf4, 0x30(%1) \n"
248 "sqc2 $vf1, 0x00(%0) \n"
249 "sqc2 $vf2, 0x10(%0) \n"
250 "sqc2 $vf3, 0x20(%0) \n"
251 "sqc2 $vf4, 0x30(%0) \n"
253 "lqc2 vf1, 0x00(%1) \n"
254 "lqc2 vf2, 0x10(%1) \n"
255 "lqc2 vf3, 0x20(%1) \n"
256 "lqc2 vf4, 0x30(%1) \n"
257 "sqc2 vf1, 0x00(%0) \n"
258 "sqc2 vf2, 0x10(%0) \n"
259 "sqc2 vf3, 0x20(%0) \n"
260 "sqc2 vf4, 0x30(%0) \n"
262 : :
"r" (output),
"r" (input0)
266 memcpy(output, input0,
sizeof(MATRIX));
278 work[0x0C] = -(input0[0x0C] * work[0x00] + input0[0x0D] * work[0x04] + input0[0x0E] * work[0x08]);
279 work[0x0D] = -(input0[0x0C] * work[0x01] + input0[0x0D] * work[0x05] + input0[0x0E] * work[0x09]);
280 work[0x0E] = -(input0[0x0C] * work[0x02] + input0[0x0D] * work[0x06] + input0[0x0E] * work[0x0A]);
290 __asm__ __volatile__ (
292 "lqc2 $vf1, 0x00(%1) \n"
293 "lqc2 $vf2, 0x10(%1) \n"
294 "lqc2 $vf3, 0x20(%1) \n"
295 "lqc2 $vf4, 0x30(%1) \n"
296 "lqc2 $vf5, 0x00(%2) \n"
297 "lqc2 $vf6, 0x10(%2) \n"
298 "lqc2 $vf7, 0x20(%2) \n"
299 "lqc2 $vf8, 0x30(%2) \n"
300 "vmulax.xyzw $ACC, $vf5, $vf1\n"
301 "vmadday.xyzw $ACC, $vf6, $vf1\n"
302 "vmaddaz.xyzw $ACC, $vf7, $vf1\n"
303 "vmaddw.xyzw $vf1, $vf8, $vf1\n"
304 "vmulax.xyzw $ACC, $vf5, $vf2\n"
305 "vmadday.xyzw $ACC, $vf6, $vf2\n"
306 "vmaddaz.xyzw $ACC, $vf7, $vf2\n"
307 "vmaddw.xyzw $vf2, $vf8, $vf2\n"
308 "vmulax.xyzw $ACC, $vf5, $vf3\n"
309 "vmadday.xyzw $ACC, $vf6, $vf3\n"
310 "vmaddaz.xyzw $ACC, $vf7, $vf3\n"
311 "vmaddw.xyzw $vf3, $vf8, $vf3\n"
312 "vmulax.xyzw $ACC, $vf5, $vf4\n"
313 "vmadday.xyzw $ACC, $vf6, $vf4\n"
314 "vmaddaz.xyzw $ACC, $vf7, $vf4\n"
315 "vmaddw.xyzw $vf4, $vf8, $vf4\n"
316 "sqc2 $vf1, 0x00(%0) \n"
317 "sqc2 $vf2, 0x10(%0) \n"
318 "sqc2 $vf3, 0x20(%0) \n"
319 "sqc2 $vf4, 0x30(%0) \n"
321 "lqc2 vf1, 0x00(%1) \n"
322 "lqc2 vf2, 0x10(%1) \n"
323 "lqc2 vf3, 0x20(%1) \n"
324 "lqc2 vf4, 0x30(%1) \n"
325 "lqc2 vf5, 0x00(%2) \n"
326 "lqc2 vf6, 0x10(%2) \n"
327 "lqc2 vf7, 0x20(%2) \n"
328 "lqc2 vf8, 0x30(%2) \n"
329 "vmulax.xyzw ACC, vf5, vf1 \n"
330 "vmadday.xyzw ACC, vf6, vf1 \n"
331 "vmaddaz.xyzw ACC, vf7, vf1 \n"
332 "vmaddw.xyzw vf1, vf8, vf1 \n"
333 "vmulax.xyzw ACC, vf5, vf2 \n"
334 "vmadday.xyzw ACC, vf6, vf2 \n"
335 "vmaddaz.xyzw ACC, vf7, vf2 \n"
336 "vmaddw.xyzw vf2, vf8, vf2 \n"
337 "vmulax.xyzw ACC, vf5, vf3 \n"
338 "vmadday.xyzw ACC, vf6, vf3 \n"
339 "vmaddaz.xyzw ACC, vf7, vf3 \n"
340 "vmaddw.xyzw vf3, vf8, vf3 \n"
341 "vmulax.xyzw ACC, vf5, vf4 \n"
342 "vmadday.xyzw ACC, vf6, vf4 \n"
343 "vmaddaz.xyzw ACC, vf7, vf4 \n"
344 "vmaddw.xyzw vf4, vf8, vf4 \n"
345 "sqc2 vf1, 0x00(%0) \n"
346 "sqc2 vf2, 0x10(%0) \n"
347 "sqc2 vf3, 0x20(%0) \n"
348 "sqc2 vf4, 0x30(%0) \n"
350 : :
"r" (output),
"r" (input0),
"r" (input1)
355 memset(output, 0,
sizeof(MATRIX));
356 for (i = 0; i < 4; i += 1)
359 for (j = 0; j < 4; j += 1)
362 for (k = 0; k < 4; k += 1)
364 output[(4 * i) + k] = input1[(4 * j) + k] * input0[(4 * i) + j];
376 work[0x00] = cosf(input1[2]);
377 work[0x01] = sinf(input1[2]);
378 work[0x04] = -sinf(input1[2]);
379 work[0x05] = cosf(input1[2]);
384 work[0x00] = cosf(input1[1]);
385 work[0x02] = -sinf(input1[1]);
386 work[0x08] = sinf(input1[1]);
387 work[0x0A] = cosf(input1[1]);
392 work[0x05] = cosf(input1[0]);
393 work[0x06] = sinf(input1[0]);
394 work[0x09] = -sinf(input1[0]);
395 work[0x0A] = cosf(input1[0]);
405 work[0x00] = input1[0];
406 work[0x05] = input1[1];
407 work[0x0A] = input1[2];
417 work[0x0C] = input1[0];
418 work[0x0D] = input1[1];
419 work[0x0E] = input1[2];
428 work[0x00] = input0[0x00];
429 work[0x01] = input0[0x04];
430 work[0x02] = input0[0x08];
431 work[0x03] = input0[0x0C];
432 work[0x04] = input0[0x01];
433 work[0x05] = input0[0x05];
434 work[0x06] = input0[0x09];
435 work[0x07] = input0[0x0D];
436 work[0x08] = input0[0x02];
437 work[0x09] = input0[0x06];
438 work[0x0A] = input0[0x0A];
439 work[0x0B] = input0[0x0E];
440 work[0x0C] = input0[0x03];
441 work[0x0D] = input0[0x07];
442 work[0x0E] = input0[0x0B];
443 work[0x0F] = input0[0x0F];
453 memset(output, 0,
sizeof(MATRIX));
454 output[0x00] = 1.00f;
455 output[0x05] = 1.00f;
456 output[0x0A] = 1.00f;
457 output[0x0F] = 1.00f;
484 work0[0] = -translation[0];
485 work0[1] = -translation[1];
486 work0[2] = -translation[2];
487 work0[3] = translation[3];
490 work1[0] = -rotation[0];
491 work1[1] = -rotation[1];
492 work1[2] = -rotation[2];
493 work1[3] = rotation[3];
502 void create_view_screen(MATRIX view_screen,
float aspect,
float left,
float right,
float bottom,
float top,
float near,
float far) {
505 left = (left * aspect); right = (right * aspect);
509 view_screen[0x00] = (2 * near) / (right - left);
510 view_screen[0x05] = (2 * near) / (top - bottom);
511 view_screen[0x08] = (right + left) / (right - left);
512 view_screen[0x09] = (top + bottom) / (top - bottom);
513 view_screen[0x0A] = (far + near) / (far - near);
514 view_screen[0x0B] = -1.00f;
515 view_screen[0x0E] = (2 * far * near) / (far - near);
516 view_screen[0x0F] = 0.00f;
520 void create_local_screen(MATRIX local_screen, MATRIX local_world, MATRIX world_view, MATRIX view_screen) {
534 __asm__ __volatile__ (
536 "lqc2 $vf1, 0x00(%3) \n"
537 "lqc2 $vf2, 0x10(%3) \n"
538 "lqc2 $vf3, 0x20(%3) \n"
539 "lqc2 $vf4, 0x30(%3) \n"
541 "lqc2 $vf6, 0x00(%2) \n"
542 "vmulaw $ACC, $vf4, $vf0\n"
543 "vmaddax $ACC, $vf1, $vf6\n"
544 "vmadday $ACC, $vf2, $vf6\n"
545 "vmaddz $vf7, $vf3, $vf6\n"
546 "vdiv $Q, $vf0w, $vf7w\n"
548 "vmulq.xyzw $vf7, $vf7, $Q \n"
549 "sqc2 $vf7, 0x00(%0) \n"
555 "lqc2 vf1, 0x00(%3) \n"
556 "lqc2 vf2, 0x10(%3) \n"
557 "lqc2 vf3, 0x20(%3) \n"
558 "lqc2 vf4, 0x30(%3) \n"
560 "lqc2 vf6, 0x00(%2) \n"
561 "vmulaw ACC, vf4, vf0 \n"
562 "vmaddax ACC, vf1, vf6 \n"
563 "vmadday ACC, vf2, vf6 \n"
564 "vmaddz vf7, vf3, vf6 \n"
565 "vdiv Q, vf0w, vf7w \n"
567 "vmulq.xyzw vf7, vf7, Q \n"
568 "sqc2 vf7, 0x00(%0) \n"
574 :
"+r" (output),
"+r" (
count),
"+r" (normals) :
"r" (local_light)
579 for (i = 0; i <
count; i += 1)
582 memset(output[i], 0,
sizeof(output[i]));
583 for (j = 0; j < 4; j += 1)
586 for (k = 0; k < 4; k += 1)
588 output[i][k] += local_light[(4 * j) + k] * (j != 3 ? normals[i][j] : 1.0f);
591 for (j = 0; j < 4; j += 1)
593 output[i][j] *= 1.0f / output[i][3];
599 void calculate_lights(VECTOR *output,
int count, VECTOR *normals, VECTOR *light_direction, VECTOR *light_colour,
const int *light_type,
int light_count) {
600 int loop0, loop1;
float intensity;
603 memset(output, 0,
sizeof(VECTOR) *
count);
606 for (loop0=0;loop0<
count;loop0++) {
609 for (loop1=0;loop1<light_count;loop1++) {
624 if (intensity < 0.00f) { intensity = 0.00f; }
627 }
else { intensity = 0.00f; }
630 if (intensity > 0.00f) {
633 output[loop0][0] += (light_colour[loop1][0] * intensity);
634 output[loop0][1] += (light_colour[loop1][1] * intensity);
635 output[loop0][2] += (light_colour[loop1][2] * intensity);
636 output[loop0][3] = 1.00f;
650 for (loop0=0;loop0<
count;loop0++) {
653 output[loop0][0] = (colours[loop0][0] * lights[loop0][0]);
654 output[loop0][1] = (colours[loop0][1] * lights[loop0][1]);
655 output[loop0][2] = (colours[loop0][2] * lights[loop0][2]);
658 vector_clamp(output[loop0], output[loop0], 0.00f, 1.99f);
666 __asm__ __volatile__ (
668 "lqc2 $vf1, 0x00(%3) \n"
669 "lqc2 $vf2, 0x10(%3) \n"
670 "lqc2 $vf3, 0x20(%3) \n"
671 "lqc2 $vf4, 0x30(%3) \n"
673 "lqc2 $vf6, 0x00(%2) \n"
674 "vmulaw $ACC, $vf4, $vf0\n"
675 "vmaddax $ACC, $vf1, $vf6\n"
676 "vmadday $ACC, $vf2, $vf6\n"
677 "vmaddz $vf7, $vf3, $vf6\n"
678 "vclipw.xyz $vf7, $vf7 \n"
686 "sqc2 $0, 0x00(%0) \n"
689 "vdiv $Q, $vf0w, $vf7w\n"
691 "vmulq.xyz $vf7, $vf7, $Q \n"
692 "sqc2 $vf7, 0x00(%0) \n"
694 "lqc2 vf1, 0x00(%3) \n"
695 "lqc2 vf2, 0x10(%3) \n"
696 "lqc2 vf3, 0x20(%3) \n"
697 "lqc2 vf4, 0x30(%3) \n"
699 "lqc2 vf6, 0x00(%2) \n"
700 "vmulaw ACC, vf4, vf0 \n"
701 "vmaddax ACC, vf1, vf6 \n"
702 "vmadday ACC, vf2, vf6 \n"
703 "vmaddz vf7, vf3, vf6 \n"
704 "vclipw.xyz vf7, vf7 \n"
712 "sqc2 vi00, 0x00(%0) \n"
715 "vdiv Q, vf0w, vf7w \n"
717 "vmulq.xyz vf7, vf7, Q \n"
718 "sqc2 vf7, 0x00(%0) \n"
725 :
"+r" (output),
"+r" (
count),
"+r" (vertices) :
"r" (local_screen)
730 for (i = 0; i <
count; i += 1)
734 memset(output[i], 0,
sizeof(output[i]));
735 for (j = 0; j < 4; j += 1)
738 for (k = 0; k < 4; k += 1)
740 output[i][k] += local_screen[(4 * j) + k] * (j != 3 ? vertices[i][j] : 1.0f);
744 for (j = 0; j < 3; j += 1)
748 if ((output[i][j] > fabsf(output[i][3])) || (output[i][j] < -fabsf(output[i][3])))
756 for (j = 0; j < 3; j += 1)
758 output[i][j] *= 0.0f;
764 for (j = 0; j < 3; j += 1)
766 output[i][j] *= 1.0f / output[i][3];