/*
 *	framflt3.c
 *
 *	Px_ZCu
 *
 *	CLiP - Common Library for P/ECE
 *	Copyright (C) 2001-2008 Naoyuki Sawa
 *
 *	* Sun Jul 20 13:26:33 JST 2008 Naoyuki Sawa
 *	- 쐬JnB
 *	  RAM֔zuA_xNgƁA_s̊֐łB
 *	* Sun Aug 17 22:24:29 JST 2008 Naoyuki Sawa
 *	- vecfvec3fAmatfmat3fAvecxvec3xAmatxmat3xɖOύX܂B
 *	  Suŕϊ̂ŁA{ȑÕRgȂǂׂĖOύXĂ܂B
 *	* Tue Aug 19 20:48:34 JST 2008 Naoyuki Sawa
 *	- mat3f_xform̃RgŁAWX^Ă̂C܂B
 *	  vOɂ͕ύXL܂B
 */
#include "clip.h"

#ifndef PIECE
#define NOASM	// ̃V|`ƁACłgp܂B */
#endif /*PIECE*/

/****************************************************************************
 *	
 ****************************************************************************/

#ifdef NOASM
void vec3f_xform(const mat3f* mat, vec3f* vec) {
	vec3f tmp = *vec;
	vec->x = (mat->a00 * tmp.x) + (mat->a01 * tmp.y) + (mat->a02 * tmp.z) + mat->a03;
	vec->y = (mat->a10 * tmp.x) + (mat->a11 * tmp.y) + (mat->a12 * tmp.z) + mat->a13;
	vec->z = (mat->a20 * tmp.x) + (mat->a21 * tmp.y) + (mat->a22 * tmp.z) + mat->a23;
}
#else /*NOASM*/
asm("
		.code
		.align		1
		.global		vec3f_xform
vec3f_xform:
		pushn		%r2
		;//
		ld.w		%r0, %r12		;// %r0  := mat
		ld.w		%r1, %r13		;// %r1  := vec
		ld.w		%r2, 3			;// %r2  := cnt
		ld.w		%r4, [%r13]+		;// %r4  := tmp.x
		ld.w		%r5, [%r13]+		;// %r5  := tmp.y
		ld.w		%r6, [%r13]		;// %r6  := tmp.z
vec3f_xform_loop:
		ld.w		%r12, [%r0]+		;// %r12 :=        mat->a00        , %r0  := &mat->a01
		xcall.d		__mulsf3		;// %r10 := sum  = mat->a00 * tmp.x
		ld.w		%r13, %r4		;// %r13 :=                   tmp.x			*delay*
		ld.w		%r7, %r10		;// %r7  := sum
		;//
		ld.w		%r12, [%r0]+		;// %r12 :=        mat->a01        , %r0  := &mat->a02
		xcall.d		__mulsf3		;// %r10 :=        mat->a01 * tmp.y
		ld.w		%r13, %r5		;// %r13 :=                   tmp.y			*delay*
		ld.w		%r12, %r10		;// %r12 :=        mat->a01 * tmp.y
		xcall.d		__addsf3		;// %r10 := sum += mat->a01 * tmp.y
		ld.w		%r13, %r7		;// %r13 := sum						*delay*
		ld.w		%r7, %r10		;// %r7  := sum
		;//
		ld.w		%r12, [%r0]+		;// %r12 :=        mat->a02        , %r0  := &mat->a03
		xcall.d		__mulsf3		;// %r10 :=        mat->a02 * tmp.z
		ld.w		%r13, %r6		;// %r13 :=                   tmp.z			*delay*
		ld.w		%r12, %r10		;// %r12 :=        mat->a02 * tmp.z
		xcall.d		__addsf3		;// %r10 := sum += mat->a02 * tmp.z
		ld.w		%r13, %r7		;// %r13 := sum						*delay*
		;//
		ld.w		%r12, [%r0]+		;// %r12 :=        mat->a03        , %r0  := &mat->a10
		xcall.d		__addsf3		;// %r10 := sum += mat->a03
		ld.w		%r13, %r10		;// %r13 := sum						*delay*
		;//
		ld.w		[%r1]+, %r10		;// vec->x = sum
		sub		%r2, 1			;// %r2  := cnt--
		jrne		vec3f_xform_loop
		;//
		popn		%r2
		ret
");
#endif /*NOASM*/

/****************************************************************************
 *	
 ****************************************************************************/

#ifdef NOASM
void mat3f_xform(mat3f* mat1, const mat3f* mat2) {
	mat3f tmp = *mat1;
	mat1->a00 = (tmp.a00 * mat2->a00) + (tmp.a01 * mat2->a10) + (tmp.a02 * mat2->a20);
	mat1->a01 = (tmp.a00 * mat2->a01) + (tmp.a01 * mat2->a11) + (tmp.a02 * mat2->a21);
	mat1->a02 = (tmp.a00 * mat2->a02) + (tmp.a01 * mat2->a12) + (tmp.a02 * mat2->a22);
	mat1->a03 = (tmp.a00 * mat2->a03) + (tmp.a01 * mat2->a13) + (tmp.a02 * mat2->a23) + tmp.a03;
	mat1->a10 = (tmp.a10 * mat2->a00) + (tmp.a11 * mat2->a10) + (tmp.a12 * mat2->a20);
	mat1->a11 = (tmp.a10 * mat2->a01) + (tmp.a11 * mat2->a11) + (tmp.a12 * mat2->a21);
	mat1->a12 = (tmp.a10 * mat2->a02) + (tmp.a11 * mat2->a12) + (tmp.a12 * mat2->a22);
	mat1->a13 = (tmp.a10 * mat2->a03) + (tmp.a11 * mat2->a13) + (tmp.a12 * mat2->a23) + tmp.a13;
	mat1->a20 = (tmp.a20 * mat2->a00) + (tmp.a21 * mat2->a10) + (tmp.a22 * mat2->a20);
	mat1->a21 = (tmp.a20 * mat2->a01) + (tmp.a21 * mat2->a11) + (tmp.a22 * mat2->a21);
	mat1->a22 = (tmp.a20 * mat2->a02) + (tmp.a21 * mat2->a12) + (tmp.a22 * mat2->a22);
	mat1->a23 = (tmp.a20 * mat2->a03) + (tmp.a21 * mat2->a13) + (tmp.a22 * mat2->a23) + tmp.a23;
}
#else /*NOASM*/
asm("
		.code
		.align		1
		.global		mat3f_xform
mat3f_xform:
		pushn		%r2
		xsub		%sp, %sp, 48		;// %sp  := &tmp
		ld.w		%r0, %r12		;// %r0  := mat1
		ld.w		%r1, %r13		;// %r1  := mat2
		ld.w		%r12, %sp		;// %r12 := &tmp
		xld.w		%r14, 48		;// %r14 :=             sizeof(mat3f)
		xcall.d		memcpy			;//  memcpy(&tmp, mat1, sizeof(mat3f))
		ld.w		%r13, %r0		;// %r13 :=       mat1					*delay*
		;//
		xld.w		%r2, __mulsf3		;// %r2  := __mulsf3
		xld.w		%r4, __addsf3		;// %r4  := __addsf3
		ld.w		%r5, 3			;// %r5  := cnt1
		;//-------------------------------------;
mat3f_xform_lp1:
		ld.w		%r6, 4			;// %r6  := cnt2
		;//-------------------------------------;
mat3f_xform_lp2:
		xld.w		%r12, [%sp+0]		;// %r12 :=        tmp.a00
		ld.w		%r13, [%r1]		;// %r13 :=                  mat2->a00
		call.d		%r2			;// %r10 := sum  = tmp.a00 * mat2->a00
		add		%r1, 16			;// %r1  :=                 &mat2->a10			*delay*
		ld.w		%r7, %r10		;// %r7  := sum
		;//
		xld.w		%r12, [%sp+4]		;// %r12 :=        tmp.a01
		ld.w		%r13, [%r1]		;// %r13 :=                  mat2->a10
		call.d		%r2			;// %r10 :=        tmp.a01 * mat2->a10
		add		%r1, 16			;// %r1  :=                 &mat2->a20			*delay*
		;//
		ld.w		%r12, %r10		;// %r12 :=        tmp.a01 * mat2->a10
		call.d		%r4			;// %r10 := sum += tmp.a01 * mat2->a10
		ld.w		%r13, %r7		;// %r13 := sum						*delay*
;//		ld.w		%r7, %r10		;// %r7  := sum -----------------------+
		;//					;//                                    |
		xld.w		%r12, [%sp+8]		;// %r12 :=        tmp.a02             |
		ld.w		%r13, [%r1]		;// %r13 :=                  mat2->a20 |
		call.d		%r2			;// %r10 :=        tmp.a02 * mat2->a20 |
		ld.w		%r7, %r10		;// <----------------------------------+		*delay*
		;//
		ld.w		%r12, %r10		;// %r12 :=        tmp.a02 * mat2->a20
		call.d		%r4			;// %r10 := sum += tmp.a02 * mat2->a20
		ld.w		%r13, %r7		;// %r13 := sum						*delay*
		;//
		sub		%r6, 1			;// %r6  := cnt2--
		jreq		4			;// if(cnt2)
		 ld.w		[%r0]+, %r10		;//   mat1->a00 = sum, %r0  := &mat1->a01
		 jp.d		mat3f_xform_lp2
		 sub		%r1, 28			;//                    %r1  := &mat2->a01		*delay*
		;//-------------------------------------;// else
		xld.w		%r12, [%sp+12]		;//   %r12 :=        tmp.a03
		call.d		%r4			;//   %r10 := sum += tmp.a03
		ld.w		%r13, %r10		;//   %r13 := sum					*delay*
		ld.w		[%r0]+, %r10		;//   mat1->a03 = sum, %r0  := &mat1->a10
		;//
		xadd		%sp, %sp, 16		;//                    %sp  :=   &tmp.a10
		sub		%r5, 1			;//   %r5  := cnt1--
		jrne.d		mat3f_xform_lp1
		sub		%r1, 44			;//                    %r1  := &mat2->a00		*delay*
		;//-------------------------------------;
		;// %sp3+16ĊɌɖ߂Ă܂B
		popn		%r2
		ret
");
#endif /*NOASM*/

/****************************************************************************
 *	
 ****************************************************************************/

