21 __asm__ __volatile__ (
23 "lqc2 $vf1, 0x00(%2) \n"
24 "lqc2 $vf2, 0x10(%2) \n"
25 "lqc2 $vf3, 0x20(%2) \n"
26 "lqc2 $vf4, 0x30(%2) \n"
27 "lqc2 $vf5, 0x00(%1) \n"
28 "vmulaw $ACC, $vf4, $vf0\n"
29 "vmaddax $ACC, $vf1, $vf5\n"
30 "vmadday $ACC, $vf2, $vf5\n"
31 "vmaddz $vf6, $vf3, $vf5\n"
32 "sqc2 $vf6, 0x00(%0) \n"
34 "lqc2 vf1, 0x00(%2) \n"
35 "lqc2 vf2, 0x10(%2) \n"
36 "lqc2 vf3, 0x20(%2) \n"
37 "lqc2 vf4, 0x30(%2) \n"
38 "lqc2 vf5, 0x00(%1) \n"
39 "vmulaw ACC, vf4, vf0 \n"
40 "vmaddax ACC, vf1, vf5 \n"
41 "vmadday ACC, vf2, vf5 \n"
42 "vmaddz vf6, vf3, vf5 \n"
43 "sqc2 vf6, 0x00(%0) \n"
45 : :
"r" (output),
"r" (input0),
"r" (input1)
50 void vector_clamp(VECTOR output, VECTOR input0,
float min,
float max) {
57 if (work[0] < min) { work[0] = min; }
58 if (work[1] < min) { work[1] = min; }
59 if (work[2] < min) { work[2] = min; }
60 if (work[3] < min) { work[3] = min; }
63 if (work[0] > max) { work[0] = max; }
64 if (work[1] > max) { work[1] = max; }
65 if (work[2] > max) { work[2] = max; }
66 if (work[3] > max) { work[3] = max; }
74 __asm__ __volatile__ (
76 "lqc2 $vf1, 0x00(%1) \n"
77 "sqc2 $vf1, 0x00(%0) \n"
79 "lqc2 vf1, 0x00(%1) \n"
80 "sqc2 vf1, 0x00(%0) \n"
82 : :
"r" (output),
"r" (input0)
91 work0[0] = (input0[0] / input0[3]);
92 work0[1] = (input0[1] / input0[3]);
93 work0[2] = (input0[2] / input0[3]);
97 work1[0] = (input1[0] / input1[3]);
98 work1[1] = (input1[1] / input1[3]);
99 work1[2] = (input1[2] / input1[3]);
103 return (work0[0] * work1[0]) + (work0[1] * work1[1]) + (work0[2] * work1[2]);
111 work[0] = input0[0] * input1[0];
112 work[1] = input0[1] * input1[1];
113 work[2] = input0[2] * input1[2];
114 work[3] = input0[3] * input1[3];
122 __asm__ __volatile__ (
124 "lqc2 $vf1, 0x00(%1) \n"
125 "vmul.xyz $vf2, $vf1, $vf1\n"
126 "vmulax.w $ACC, $vf0, $vf2\n"
127 "vmadday.w $ACC, $vf0, $vf2\n"
128 "vmaddz.w $vf2, $vf0, $vf2\n"
129 "vrsqrt $Q, $vf0w, $vf2w\n"
130 "vsub.w $vf1, $vf0, $vf0\n"
132 "vmulq.xyz $vf1, $vf1, $Q \n"
133 "sqc2 $vf1, 0x00(%0) \n"
135 "lqc2 vf1, 0x00(%1) \n"
136 "vmul.xyz vf2, vf1, vf1 \n"
137 "vmulax.w ACC, vf0, vf2 \n"
138 "vmadday.w ACC, vf0, vf2 \n"
139 "vmaddz.w vf2, vf0, vf2 \n"
140 "vrsqrt Q, vf0w, vf2w \n"
141 "vsub.w vf1, vf0, vf0 \n"
143 "vmulq.xyz vf1, vf1, Q \n"
144 "sqc2 vf1, 0x00(%0) \n"
146 : :
"r" (output),
"r" (input0)
152 __asm__ __volatile__ (
154 "lqc2 $vf1, 0x00(%1) \n"
155 "lqc2 $vf2, 0x00(%2) \n"
156 "vopmula.xyz $ACC, $vf1, $vf2\n"
157 "vopmsub.xyz $vf2, $vf2, $vf1\n"
158 "vsub.w $vf2, $vf0, $vf0\n"
159 "sqc2 $vf2, 0x00(%0) \n"
161 "lqc2 vf1, 0x00(%1) \n"
162 "lqc2 vf2, 0x00(%2) \n"
163 "vopmula.xyz ACC, vf1, vf2 \n"
164 "vopmsub.xyz vf2, vf2, vf1 \n"
165 "vsub.w vf2, vf0, vf0 \n"
166 "sqc2 vf2, 0x00(%0) \n"
168 : :
"r" (output),
"r" (input0),
"r" (input1)
175 work[0] = addend[0]+summand[0];
176 work[1] = addend[1]+summand[1];
177 work[2] = addend[2]+summand[2];
178 work[3] = addend[3]+summand[3];
184 work[0] = multiplicand[1] * multiplier[2] - multiplicand[2] * multiplier[1];
185 work[1] = multiplicand[2] * multiplier[0] - multiplicand[0] * multiplier[2];
186 work[2] = multiplicand[0] * multiplier[1] - multiplicand[1] * multiplier[0];
195 VECTOR triangle_normal;
208 __asm__ __volatile__ (
210 "lqc2 $vf1, 0x00(%1) \n"
211 "lqc2 $vf2, 0x10(%1) \n"
212 "lqc2 $vf3, 0x20(%1) \n"
213 "lqc2 $vf4, 0x30(%1) \n"
214 "sqc2 $vf1, 0x00(%0) \n"
215 "sqc2 $vf2, 0x10(%0) \n"
216 "sqc2 $vf3, 0x20(%0) \n"
217 "sqc2 $vf4, 0x30(%0) \n"
219 "lqc2 vf1, 0x00(%1) \n"
220 "lqc2 vf2, 0x10(%1) \n"
221 "lqc2 vf3, 0x20(%1) \n"
222 "lqc2 vf4, 0x30(%1) \n"
223 "sqc2 vf1, 0x00(%0) \n"
224 "sqc2 vf2, 0x10(%0) \n"
225 "sqc2 vf3, 0x20(%0) \n"
226 "sqc2 vf4, 0x30(%0) \n"
228 : :
"r" (output),
"r" (input0)
241 work[0x0C] = -(input0[0x0C] * work[0x00] + input0[0x0D] * work[0x04] + input0[0x0E] * work[0x08]);
242 work[0x0D] = -(input0[0x0C] * work[0x01] + input0[0x0D] * work[0x05] + input0[0x0E] * work[0x09]);
243 work[0x0E] = -(input0[0x0C] * work[0x02] + input0[0x0D] * work[0x06] + input0[0x0E] * work[0x0A]);
252 __asm__ __volatile__ (
254 "lqc2 $vf1, 0x00(%1) \n"
255 "lqc2 $vf2, 0x10(%1) \n"
256 "lqc2 $vf3, 0x20(%1) \n"
257 "lqc2 $vf4, 0x30(%1) \n"
258 "lqc2 $vf5, 0x00(%2) \n"
259 "lqc2 $vf6, 0x10(%2) \n"
260 "lqc2 $vf7, 0x20(%2) \n"
261 "lqc2 $vf8, 0x30(%2) \n"
262 "vmulax.xyzw $ACC, $vf5, $vf1\n"
263 "vmadday.xyzw $ACC, $vf6, $vf1\n"
264 "vmaddaz.xyzw $ACC, $vf7, $vf1\n"
265 "vmaddw.xyzw $vf1, $vf8, $vf1\n"
266 "vmulax.xyzw $ACC, $vf5, $vf2\n"
267 "vmadday.xyzw $ACC, $vf6, $vf2\n"
268 "vmaddaz.xyzw $ACC, $vf7, $vf2\n"
269 "vmaddw.xyzw $vf2, $vf8, $vf2\n"
270 "vmulax.xyzw $ACC, $vf5, $vf3\n"
271 "vmadday.xyzw $ACC, $vf6, $vf3\n"
272 "vmaddaz.xyzw $ACC, $vf7, $vf3\n"
273 "vmaddw.xyzw $vf3, $vf8, $vf3\n"
274 "vmulax.xyzw $ACC, $vf5, $vf4\n"
275 "vmadday.xyzw $ACC, $vf6, $vf4\n"
276 "vmaddaz.xyzw $ACC, $vf7, $vf4\n"
277 "vmaddw.xyzw $vf4, $vf8, $vf4\n"
278 "sqc2 $vf1, 0x00(%0) \n"
279 "sqc2 $vf2, 0x10(%0) \n"
280 "sqc2 $vf3, 0x20(%0) \n"
281 "sqc2 $vf4, 0x30(%0) \n"
283 "lqc2 vf1, 0x00(%1) \n"
284 "lqc2 vf2, 0x10(%1) \n"
285 "lqc2 vf3, 0x20(%1) \n"
286 "lqc2 vf4, 0x30(%1) \n"
287 "lqc2 vf5, 0x00(%2) \n"
288 "lqc2 vf6, 0x10(%2) \n"
289 "lqc2 vf7, 0x20(%2) \n"
290 "lqc2 vf8, 0x30(%2) \n"
291 "vmulax.xyzw ACC, vf5, vf1 \n"
292 "vmadday.xyzw ACC, vf6, vf1 \n"
293 "vmaddaz.xyzw ACC, vf7, vf1 \n"
294 "vmaddw.xyzw vf1, vf8, vf1 \n"
295 "vmulax.xyzw ACC, vf5, vf2 \n"
296 "vmadday.xyzw ACC, vf6, vf2 \n"
297 "vmaddaz.xyzw ACC, vf7, vf2 \n"
298 "vmaddw.xyzw vf2, vf8, vf2 \n"
299 "vmulax.xyzw ACC, vf5, vf3 \n"
300 "vmadday.xyzw ACC, vf6, vf3 \n"
301 "vmaddaz.xyzw ACC, vf7, vf3 \n"
302 "vmaddw.xyzw vf3, vf8, vf3 \n"
303 "vmulax.xyzw ACC, vf5, vf4 \n"
304 "vmadday.xyzw ACC, vf6, vf4 \n"
305 "vmaddaz.xyzw ACC, vf7, vf4 \n"
306 "vmaddw.xyzw vf4, vf8, vf4 \n"
307 "sqc2 vf1, 0x00(%0) \n"
308 "sqc2 vf2, 0x10(%0) \n"
309 "sqc2 vf3, 0x20(%0) \n"
310 "sqc2 vf4, 0x30(%0) \n"
312 : :
"r" (output),
"r" (input0),
"r" (input1)
322 work[0x00] = cosf(input1[2]);
323 work[0x01] = sinf(input1[2]);
324 work[0x04] = -sinf(input1[2]);
325 work[0x05] = cosf(input1[2]);
330 work[0x00] = cosf(input1[1]);
331 work[0x02] = -sinf(input1[1]);
332 work[0x08] = sinf(input1[1]);
333 work[0x0A] = cosf(input1[1]);
338 work[0x05] = cosf(input1[0]);
339 work[0x06] = sinf(input1[0]);
340 work[0x09] = -sinf(input1[0]);
341 work[0x0A] = cosf(input1[0]);
351 work[0x00] = input1[0];
352 work[0x05] = input1[1];
353 work[0x0A] = input1[2];
363 work[0x0C] = input1[0];
364 work[0x0D] = input1[1];
365 work[0x0E] = input1[2];
374 work[0x00] = input0[0x00];
375 work[0x01] = input0[0x04];
376 work[0x02] = input0[0x08];
377 work[0x03] = input0[0x0C];
378 work[0x04] = input0[0x01];
379 work[0x05] = input0[0x05];
380 work[0x06] = input0[0x09];
381 work[0x07] = input0[0x0D];
382 work[0x08] = input0[0x02];
383 work[0x09] = input0[0x06];
384 work[0x0A] = input0[0x0A];
385 work[0x0B] = input0[0x0E];
386 work[0x0C] = input0[0x03];
387 work[0x0D] = input0[0x07];
388 work[0x0E] = input0[0x0B];
389 work[0x0F] = input0[0x0F];
399 memset(output, 0,
sizeof(MATRIX));
400 output[0x00] = 1.00f;
401 output[0x05] = 1.00f;
402 output[0x0A] = 1.00f;
403 output[0x0F] = 1.00f;
430 work0[0] = -translation[0];
431 work0[1] = -translation[1];
432 work0[2] = -translation[2];
433 work0[3] = translation[3];
436 work1[0] = -rotation[0];
437 work1[1] = -rotation[1];
438 work1[2] = -rotation[2];
439 work1[3] = rotation[3];
448 void create_view_screen(MATRIX view_screen,
float aspect,
float left,
float right,
float bottom,
float top,
float near,
float far) {
451 left = (left * aspect); right = (right * aspect);
455 view_screen[0x00] = (2 * near) / (right - left);
456 view_screen[0x05] = (2 * near) / (top - bottom);
457 view_screen[0x08] = (right + left) / (right - left);
458 view_screen[0x09] = (top + bottom) / (top - bottom);
459 view_screen[0x0A] = (far + near) / (far - near);
460 view_screen[0x0B] = -1.00f;
461 view_screen[0x0E] = (2 * far * near) / (far - near);
462 view_screen[0x0F] = 0.00f;
466 void create_local_screen(MATRIX local_screen, MATRIX local_world, MATRIX world_view, MATRIX view_screen) {
479 __asm__ __volatile__ (
481 "lqc2 $vf1, 0x00(%3) \n"
482 "lqc2 $vf2, 0x10(%3) \n"
483 "lqc2 $vf3, 0x20(%3) \n"
484 "lqc2 $vf4, 0x30(%3) \n"
486 "lqc2 $vf6, 0x00(%2) \n"
487 "vmulaw $ACC, $vf4, $vf0\n"
488 "vmaddax $ACC, $vf1, $vf6\n"
489 "vmadday $ACC, $vf2, $vf6\n"
490 "vmaddz $vf7, $vf3, $vf6\n"
491 "vdiv $Q, $vf0w, $vf7w\n"
493 "vmulq.xyzw $vf7, $vf7, $Q \n"
494 "sqc2 $vf7, 0x00(%0) \n"
500 "lqc2 vf1, 0x00(%3) \n"
501 "lqc2 vf2, 0x10(%3) \n"
502 "lqc2 vf3, 0x20(%3) \n"
503 "lqc2 vf4, 0x30(%3) \n"
505 "lqc2 vf6, 0x00(%2) \n"
506 "vmulaw ACC, vf4, vf0 \n"
507 "vmaddax ACC, vf1, vf6 \n"
508 "vmadday ACC, vf2, vf6 \n"
509 "vmaddz vf7, vf3, vf6 \n"
510 "vdiv Q, vf0w, vf7w \n"
512 "vmulq.xyzw vf7, vf7, Q \n"
513 "sqc2 vf7, 0x00(%0) \n"
519 :
"+r" (output),
"+r" (
count),
"+r" (normals) :
"r" (local_light)
524 void calculate_lights(VECTOR *output,
int count, VECTOR *normals, VECTOR *light_direction, VECTOR *light_colour,
const int *light_type,
int light_count) {
525 int loop0, loop1;
float intensity;
528 memset(output, 0,
sizeof(VECTOR) *
count);
531 for (loop0=0;loop0<
count;loop0++) {
534 for (loop1=0;loop1<light_count;loop1++) {
549 if (intensity < 0.00f) { intensity = 0.00f; }
552 }
else { intensity = 0.00f; }
555 if (intensity > 0.00f) {
558 output[loop0][0] += (light_colour[loop1][0] * intensity);
559 output[loop0][1] += (light_colour[loop1][1] * intensity);
560 output[loop0][2] += (light_colour[loop1][2] * intensity);
561 output[loop0][3] = 1.00f;
575 for (loop0=0;loop0<
count;loop0++) {
578 output[loop0][0] = (colours[loop0][0] * lights[loop0][0]);
579 output[loop0][1] = (colours[loop0][1] * lights[loop0][1]);
580 output[loop0][2] = (colours[loop0][2] * lights[loop0][2]);
583 vector_clamp(output[loop0], output[loop0], 0.00f, 1.99f);
590 __asm__ __volatile__ (
592 "lqc2 $vf1, 0x00(%3) \n"
593 "lqc2 $vf2, 0x10(%3) \n"
594 "lqc2 $vf3, 0x20(%3) \n"
595 "lqc2 $vf4, 0x30(%3) \n"
597 "lqc2 $vf6, 0x00(%2) \n"
598 "vmulaw $ACC, $vf4, $vf0\n"
599 "vmaddax $ACC, $vf1, $vf6\n"
600 "vmadday $ACC, $vf2, $vf6\n"
601 "vmaddz $vf7, $vf3, $vf6\n"
602 "vclipw.xyz $vf7, $vf7 \n"
610 "sqc2 $0, 0x00(%0) \n"
613 "vdiv $Q, $vf0w, $vf7w\n"
615 "vmulq.xyz $vf7, $vf7, $Q \n"
616 "sqc2 $vf7, 0x00(%0) \n"
618 "lqc2 vf1, 0x00(%3) \n"
619 "lqc2 vf2, 0x10(%3) \n"
620 "lqc2 vf3, 0x20(%3) \n"
621 "lqc2 vf4, 0x30(%3) \n"
623 "lqc2 vf6, 0x00(%2) \n"
624 "vmulaw ACC, vf4, vf0 \n"
625 "vmaddax ACC, vf1, vf6 \n"
626 "vmadday ACC, vf2, vf6 \n"
627 "vmaddz vf7, vf3, vf6 \n"
628 "vclipw.xyz vf7, vf7 \n"
636 "sqc2 vi00, 0x00(%0) \n"
639 "vdiv Q, vf0w, vf7w \n"
641 "vmulq.xyz vf7, vf7, Q \n"
642 "sqc2 vf7, 0x00(%0) \n"
649 :
"+r" (output),
"+r" (
count),
"+r" (vertices) :
"r" (local_screen)