/*	
 *	framfix.c
 *
 *	RAM]FŒ菬ZCu
 *
 *	CLiP - Common Library for P/ECE
 *	Copyright (C) 2001-2003 Naoyuki Sawa
 *
 *	* Tue Apr  2 12:30:00 JST 2003 Naoyuki Sawa
 *	- clipfix.cړĂ܂B
 *	* Tue Jul 11 06:51:46 JST 2006 Naoyuki Sawa
 *	- AZułfmulfdiv܂B
 *	  ret߂̒xXbg𗘗pAꂼ1NbN̍łB
 *	  fmuĺA3.25%Ȃ܂Bfdiv́A1.36%Ȃ܂B
 */
#include "clip.h"

/****************************************************************************
 *	fmul
 ****************************************************************************/

#ifdef FIX_NOASM
fixed
fmul(fixed a, fixed b)
{
	long long aa, bb, cc;
	aa = (int)a;
	bb = (int)b;
	cc = (aa * bb) >> FRACT_BITS;
	/*                   fedcba9876543210 */
	//if(cc < (long long)0xffffffff80000000ll || cc > (long long)0x000000007fffffffll) {
	//	die("##fmul\n%08x*%08x\n%08x:%08x", (int)aa, (int)bb, (int)(cc >> 32), (int)(cc));
	//}long long͈͔̔rgcc33̃oOŐȂBႦcc=0x3f8łG[ɂȂĂ܂B
	//   ͈͔ŕA0ɂȂ悤ɃoCAXĂ畄Ȃrs悤łB
	//   Ƃ낪AoCAXl̃RpCǗ32rbgȂɂȂĂ炵A
	//   ̗ł 0x80000000ll 𑫂Ȃ΂Ȃ̂ɁAۂɐꂽR[hł 0xffffffff80000000ll 𑫂Ă܂Ă܂B
	//   炭 (int)0x80000000 ̍ŏʃrbggĂ܂Ă̂Ǝv܂B
	//   ȏ̂悤ȗRɂAƂlong long^̕ϐłA32rbgtŕ\Ȃ萔lp͈͔rׂ͔łB
	if((cc & 0xffffffff80000000ll) != 0x0000000000000000ll &&
	   (cc & 0xffffffff80000000ll) != 0xffffffff80000000ll) {
		die("##fmul\n%08x*%08x\n%08x:%08x", (int)aa, (int)bb, (int)(cc >> 32), (int)(cc));
	}
	return (fixed)(int)cc;
}
#else /*FIX_NOASM*/
#if (FRACT_BITS != 10) || (VALUE_BITS != 22)
#error Œ菬p[^zlƈقȂ܂B
#endif
	asm("
	.code
	.align 1
	.global fmul
fmul:
	mlt.w %r12, %r13
	ld.w %r10, %alr
	ld.w %r11, %ahr
	xsrl %r10, 10			; 10=FRACT_BITS
	xsll %r11, 22			; 22=VALUE_BITS
	;//or %r10, %r11
	;//ret
	;//2006/07/11 ܂B
	ret.d
	or %r10, %r11			; *delay*
	");
#endif /*FIX_NOASM*/

/****************************************************************************
 *	fdiv
 ****************************************************************************/

#ifdef FIX_NOASM
fixed
fdiv(fixed a, fixed b)
{
	long long aa, bb, cc;
	aa = (int)a;
	bb = (int)b;
	cc = (aa << FRACT_BITS) / bb;
	/*                   fedcba9876543210 */
	//if(cc < (long long)0xffffffff80000000ll || cc > (long long)0x000000007fffffffll) {
	//	die("##fdiv\n%08x/%08x\n%08x:%08x", (int)aa, (int)bb, (int)(cc >> 32), (int)(cc));
	//}long long͈͔̔rgcc33̃oOŐȂBႦcc=0x3f8łG[ɂȂĂ܂B
	//   ͈͔ŕA0ɂȂ悤ɃoCAXĂ畄Ȃrs悤łB
	//   Ƃ낪AoCAXl̃RpCǗ32rbgȂɂȂĂ炵A
	//   ̗ł 0x80000000ll 𑫂Ȃ΂Ȃ̂ɁAۂɐꂽR[hł 0xffffffff80000000ll 𑫂Ă܂Ă܂B
	//   炭 (int)0x80000000 ̍ŏʃrbggĂ܂Ă̂Ǝv܂B
	//   ȏ̂悤ȗRɂAƂlong long^̕ϐłA32rbgtŕ\Ȃ萔lp͈͔rׂ͔łB
	if((cc & 0xffffffff80000000ll) != 0x0000000000000000ll &&
	   (cc & 0xffffffff80000000ll) != 0xffffffff80000000ll) {
		die("##fdiv\n%08x/%08x\n%08x:%08x", (int)aa, (int)bb, (int)(cc >> 32), (int)(cc));
	}
	return (fixed)(int)cc;
}
#else /*FIX_NOASM*/
#if (FRACT_BITS != 10) || (~FRACT_MASK != -1024)
#error Œ菬p[^zlƈقȂ܂B
#endif
	asm("
	.code
	.align 1
	.global fdiv
fdiv:
	ld.w %alr, %r12
	div0s %r13
	div1 %r13			;  1
	div1 %r13			;  2
	div1 %r13			;  3
	div1 %r13			;  4
	div1 %r13			;  5
	div1 %r13			;  6
	div1 %r13			;  7
	div1 %r13			;  8
	div1 %r13			;  9
	div1 %r13			; 10=FRACT_BITS
	ld.w %r11, %psr			; %psr(N)ۑ
	ld.w %r10, %alr
	xand %r10, %r10, -1024		; -1024=~FRACT_MASK(-1023Ȃ!)
	ld.w %alr, %r10
	ld.w %psr, %r11			; %psr(N)
	div1 %r13			;  1
	div1 %r13			;  2
	div1 %r13			;  3
	div1 %r13			;  4
	div1 %r13			;  5
	div1 %r13			;  6
	div1 %r13			;  7
	div1 %r13			;  8
	div1 %r13			;  9
	div1 %r13			; 10
	div1 %r13			; 11
	div1 %r13			; 12
	div1 %r13			; 13
	div1 %r13			; 14
	div1 %r13			; 15
	div1 %r13			; 16
	div1 %r13			; 17
	div1 %r13			; 18
	div1 %r13			; 19
	div1 %r13			; 20
	div1 %r13			; 21
	div1 %r13			; 22
	div1 %r13			; 23
	div1 %r13			; 24
	div1 %r13			; 25
	div1 %r13			; 26
	div1 %r13			; 27
	div1 %r13			; 28
	div1 %r13			; 29
	div1 %r13			; 30
	div1 %r13			; 31
	div1 %r13			; 32
	div2s %r13
	div3s
	;//ld.w %r10, %alr
	;//ret
	;//2006/07/11 ܂B
	ret.d
	ld.w %r10, %alr			; *delay* (undoc'd)
	");
#endif /*FIX_NOASM*/

/****************************************************************************
 *	fsqrt
 ****************************************************************************/

#ifdef FIX_NOASM
fixed
fsqrt(fixed a)
{
	long long aa, bb, cc, ii;
	if((int)a < 0) DIE();
	aa = (long long)(int)a << FRACT_BITS;
	bb = 0; /* aa0ȂAaa==bb*bbƂȂAɃ[vI܂ */
	for(ii = 1ll/*ullvKv!!*/ << 31; ii != 0; ii >>= 1) {
		cc = bb * bb;
		if(cc < aa) {
			bb += ii;
		} else if(cc > aa) {
			bb -= ii;
		} else {
			break;
		}
	}
	return (fixed)(int)bb;
}
#else /*FIX_NOASM*/
#if (FRACT_BITS != 10) || (VALUE_BITS != 22)
#error Œ菬p[^zlƈقȂ܂B
#endif
	asm("
	.code
	.align 1
	.global fsqrt
fsqrt:
	ld.w %r13, %r12		; %r13:%r12 = a<<FRACT_BITS
	xsll %r12, 10		; 10=FRACT_BITS
	xsrl %r13, 22		; 22=VALUE_BITS
	;
	xld.w %r10, 0
	xld.w %r11, 0x80000000	; TODO:ƏȐn߂Ă
	;
__fsqrt_loop:
	mltu.w %r10, %r10	; %ahr:%alr = %r10*%r10
	ld.w %r15, %ahr
	cmp %r15, %r13
	xjrult __fsqrt_ult
	xjrugt __fsqrt_ugt
	ld.w %r14, %alr		; (ld.w %rd,%ss delays!!) -> 2006/07/11ǋL undoc'ddelayA̍sdelayɂӖB
	cmp %r14, %r12
	xjrult __fsqrt_ult
	xjrugt __fsqrt_ugt
	ret			; %r10*%r10 == a<<FRACT_SCALE
	;
__fsqrt_ult:			; %r10*%r10 <  a<<FRACT_SCALE
	add %r10, %r11
	xsrl %r11, 1
	xjrne __fsqrt_loop
	ret
	;
__fsqrt_ugt:			; %r10*%r10 >  a<<FRACT_SCALE
	sub %r10, %r11
	xsrl %r11, 1
	xjrne __fsqrt_loop
	ret
	");
#endif /*FIX_NOASM*/

/****************************************************************************
 *	vxform
 ****************************************************************************/

#ifdef FIX_NOASM
vector
vxform(matrix a, vector b)
{
	return (vector){
		fadd(fadd(fadd(fmul(A00, Bx), fmul(A01, By)), fmul(A02, Bz)), A03),
		fadd(fadd(fadd(fmul(A10, Bx), fmul(A11, By)), fmul(A12, Bz)), A13),
		fadd(fadd(fadd(fmul(A20, Bx), fmul(A21, By)), fmul(A22, Bz)), A23)};
}
#else /*FIX_NOASM*/
/* C[`ł́AZƂɏrbg킹̂߂̃Vtg܂B
 * asm[`ł́AϘaɂ܂Ƃ߂ăVtgsAvĂ܂B
 */
	asm("
	.code
	.align 1
	.global vxform
vxform:
	ld.w	%r10, %r12		; %r10=ʊi[|C^߂liS5U1C33000C_J.pdf p.85 \̂Ԃ֐ւ̈n QƁj
	;				; %r12ɓnꂽB̌ʊi[p|C^A%r10ɂ̂܂ܕԂȂ΂ȂKłB
	;
	xld.w	%r4, [%sp+52]		; %r4=Bx
	xld.w	%r5, [%sp+56]		; %r5=By
	xld.w	%r6, [%sp+60]		; %r6=Bz
	;
	xld.w	%r11, [%sp+ 4]		; %r13:12=A00*Bx
	mlt.w	%r11, %r4
	ld.w	%r12, %alr
	ld.w	%r13, %ahr
	xld.w	%r11, [%sp+ 8]		; %r13:%r12+=A01*By
	mlt.w	%r11, %r5
	ld.w	%r11, %alr
	add	%r12, %r11
	ld.w	%r11, %ahr
	adc	%r13, %r11
	xld.w	%r11, [%sp+12]		; %r13:%r12+=A02*Bz
	mlt.w	%r11, %r6
	ld.w	%r11, %alr
	add	%r12, %r11
	ld.w	%r11, %ahr
	adc	%r13, %r11
	xsrl	%r12, 10		; 10=FRACT_BITS
	xsll	%r13, 22		; 22=VALUE_BITS
	or	%r12, %r13
	xld.w	%r11, [%sp+16]		; %r12+=A03
	add	%r12, %r11
	xld.w	[%r10+ 0], %r12
	;
	xld.w	%r11, [%sp+20]		; %r13:12=A10*Bx
	mlt.w	%r11, %r4
	ld.w	%r12, %alr
	ld.w	%r13, %ahr
	xld.w	%r11, [%sp+24]		; %r13:%r12+=A11*By
	mlt.w	%r11, %r5
	ld.w	%r11, %alr
	add	%r12, %r11
	ld.w	%r11, %ahr
	adc	%r13, %r11
	xld.w	%r11, [%sp+28]		; %r13:%r12+=A12*Bz
	mlt.w	%r11, %r6
	ld.w	%r11, %alr
	add	%r12, %r11
	ld.w	%r11, %ahr
	adc	%r13, %r11
	xsrl	%r12, 10		; 10=FRACT_BITS
	xsll	%r13, 22		; 22=VALUE_BITS
	or	%r12, %r13
	xld.w	%r11, [%sp+32]		; %r12+=A13
	add	%r12, %r11
	xld.w	[%r10+ 4], %r12
	;
	xld.w	%r11, [%sp+36]		; %r13:12=A20*Bx
	mlt.w	%r11, %r4
	ld.w	%r12, %alr
	ld.w	%r13, %ahr
	xld.w	%r11, [%sp+40]		; %r13:%r12+=A21*By
	mlt.w	%r11, %r5
	ld.w	%r11, %alr
	add	%r12, %r11
	ld.w	%r11, %ahr
	adc	%r13, %r11
	xld.w	%r11, [%sp+44]		; %r13:%r12+=A22*Bz
	mlt.w	%r11, %r6
	ld.w	%r11, %alr
	add	%r12, %r11
	ld.w	%r11, %ahr
	adc	%r13, %r11
	xsrl	%r12, 10		; 10=FRACT_BITS
	xsll	%r13, 22		; 22=VALUE_BITS
	or	%r12, %r13
	xld.w	%r11, [%sp+48]		; %r12+=A23
	add	%r12, %r11
	xld.w	[%r10+ 8], %r12
	;
	ret
	");
#endif /*FIX_NOASM*/

/****************************************************************************
 *	mxform
 ****************************************************************************/

#ifdef FIX_NOASM
matrix
mxform(matrix a, matrix b)
{
	return (matrix){
		     fadd(fadd(fmul(A00, B00), fmul(A01, B10)), fmul(A02, B20)),
		     fadd(fadd(fmul(A00, B01), fmul(A01, B11)), fmul(A02, B21)),
		     fadd(fadd(fmul(A00, B02), fmul(A01, B12)), fmul(A02, B22)),
		fadd(fadd(fadd(fmul(A00, B03), fmul(A01, B13)), fmul(A02, B23)), A03),
		     fadd(fadd(fmul(A10, B00), fmul(A11, B10)), fmul(A12, B20)),
		     fadd(fadd(fmul(A10, B01), fmul(A11, B11)), fmul(A12, B21)),
		     fadd(fadd(fmul(A10, B02), fmul(A11, B12)), fmul(A12, B22)),
		fadd(fadd(fadd(fmul(A10, B03), fmul(A11, B13)), fmul(A12, B23)), A13),
		     fadd(fadd(fmul(A20, B00), fmul(A21, B10)), fmul(A22, B20)),
		     fadd(fadd(fmul(A20, B01), fmul(A21, B11)), fmul(A22, B21)),
		     fadd(fadd(fmul(A20, B02), fmul(A21, B12)), fmul(A22, B22)),
		fadd(fadd(fadd(fmul(A20, B03), fmul(A21, B13)), fmul(A22, B23)), A23)};
}
#else /*FIX_NOASM*/
/* C[`ł́AZƂɏrbg킹̂߂̃Vtg܂B
 * asm[`ł́AϘaɂ܂Ƃ߂ăVtgsAvĂ܂B
 */
	asm("
	.code
	.align 1
	.global mxform
mxform:
	ld.w	%r10, %r12		; %r10=ʊi[|C^߂liS5U1C33000C_J.pdf p.85 \̂Ԃ֐ւ̈n QƁj
	;				; %r12ɓnꂽB̌ʊi[p|C^A%r10ɂ̂܂ܕԂȂ΂ȂKłB
	;
	xld.w	%r4, [%sp+ 4]		; %r4=A00
	xld.w	%r5, [%sp+ 8]		; %r5=A01
	xld.w	%r6, [%sp+12]		; %r6=A02
	xld.w	%r7, [%sp+16]		; %r7=A03
	;
	xld.w	%r11, [%sp+52]		; %r13:%r12=A00*B00
	mlt.w	%r4, %r11
	ld.w	%r12, %alr
	ld.w	%r13, %ahr
	xld.w	%r11, [%sp+68]		; %r13:%r12+=A01*B10
	mlt.w	%r5, %r11
	ld.w	%r11, %alr
	add	%r12, %r11
	ld.w	%r11, %ahr
	adc	%r13, %r11
	xld.w	%r11, [%sp+84]		; %r13:%r12+=A02*B20
	mlt.w	%r6, %r11
	ld.w	%r11, %alr
	add	%r12, %r11
	ld.w	%r11, %ahr
	adc	%r13, %r11
	xsrl	%r12, 10		; 10=FRACT_BITS
	xsll	%r13, 22		; 22=VALUE_BITS
	or	%r12, %r13
	xld.w	[%r10+ 0], %r12
	;
	xld.w	%r11, [%sp+56]		; %r13:%r12=A00*B01
	mlt.w	%r4, %r11
	ld.w	%r12, %alr
	ld.w	%r13, %ahr
	xld.w	%r11, [%sp+72]		; %r13:%r12+=A01*B11
	mlt.w	%r5, %r11
	ld.w	%r11, %alr
	add	%r12, %r11
	ld.w	%r11, %ahr
	adc	%r13, %r11
	xld.w	%r11, [%sp+88]		; %r13:%r12+=A02*B21
	mlt.w	%r6, %r11
	ld.w	%r11, %alr
	add	%r12, %r11
	ld.w	%r11, %ahr
	adc	%r13, %r11
	xsrl	%r12, 10		; 10=FRACT_BITS
	xsll	%r13, 22		; 22=VALUE_BITS
	or	%r12, %r13
	xld.w	[%r10+ 4], %r12
	;
	xld.w	%r11, [%sp+60]		; %r13:%r12=A00*B02
	mlt.w	%r4, %r11
	ld.w	%r12, %alr
	ld.w	%r13, %ahr
	xld.w	%r11, [%sp+76]		; %r13:%r12+=A01*B12
	mlt.w	%r5, %r11
	ld.w	%r11, %alr
	add	%r12, %r11
	ld.w	%r11, %ahr
	adc	%r13, %r11
	xld.w	%r11, [%sp+92]		; %r13:%r12+=A02*B22
	mlt.w	%r6, %r11
	ld.w	%r11, %alr
	add	%r12, %r11
	ld.w	%r11, %ahr
	adc	%r13, %r11
	xsrl	%r12, 10		; 10=FRACT_BITS
	xsll	%r13, 22		; 22=VALUE_BITS
	or	%r12, %r13
	xld.w	[%r10+ 8], %r12
	;
	xld.w	%r11, [%sp+64]		; %r13:%r12=A00*B03
	mlt.w	%r4, %r11
	ld.w	%r12, %alr
	ld.w	%r13, %ahr
	xld.w	%r11, [%sp+80]		; %r13:%r12+=A01*B13
	mlt.w	%r5, %r11
	ld.w	%r11, %alr
	add	%r12, %r11
	ld.w	%r11, %ahr
	adc	%r13, %r11
	xld.w	%r11, [%sp+96]		; %r13:%r12+=A02*B23
	mlt.w	%r6, %r11
	ld.w	%r11, %alr
	add	%r12, %r11
	ld.w	%r11, %ahr
	adc	%r13, %r11
	xsrl	%r12, 10		; 10=FRACT_BITS
	xsll	%r13, 22		; 22=VALUE_BITS
	or	%r12, %r13
	add	%r12, %r7		; %r12+=A03
	xld.w	[%r10+12], %r12
	;
	xld.w	%r4, [%sp+20]		; %r4=A10
	xld.w	%r5, [%sp+24]		; %r5=A11
	xld.w	%r6, [%sp+28]		; %r6=A12
	xld.w	%r7, [%sp+32]		; %r7=A13
	;
	xld.w	%r11, [%sp+52]		; %r13:%r12=A10*B00
	mlt.w	%r4, %r11
	ld.w	%r12, %alr
	ld.w	%r13, %ahr
	xld.w	%r11, [%sp+68]		; %r13:%r12+=A11*B10
	mlt.w	%r5, %r11
	ld.w	%r11, %alr
	add	%r12, %r11
	ld.w	%r11, %ahr
	adc	%r13, %r11
	xld.w	%r11, [%sp+84]		; %r13:%r12+=A12*B20
	mlt.w	%r6, %r11
	ld.w	%r11, %alr
	add	%r12, %r11
	ld.w	%r11, %ahr
	adc	%r13, %r11
	xsrl	%r12, 10		; 10=FRACT_BITS
	xsll	%r13, 22		; 22=VALUE_BITS
	or	%r12, %r13
	xld.w	[%r10+16], %r12
	;
	xld.w	%r11, [%sp+56]		; %r13:%r12=A10*B01
	mlt.w	%r4, %r11
	ld.w	%r12, %alr
	ld.w	%r13, %ahr
	xld.w	%r11, [%sp+72]		; %r13:%r12+=A11*B11
	mlt.w	%r5, %r11
	ld.w	%r11, %alr
	add	%r12, %r11
	ld.w	%r11, %ahr
	adc	%r13, %r11
	xld.w	%r11, [%sp+88]		; %r13:%r12+=A12*B21
	mlt.w	%r6, %r11
	ld.w	%r11, %alr
	add	%r12, %r11
	ld.w	%r11, %ahr
	adc	%r13, %r11
	xsrl	%r12, 10		; 10=FRACT_BITS
	xsll	%r13, 22		; 22=VALUE_BITS
	or	%r12, %r13
	xld.w	[%r10+20], %r12
	;
	xld.w	%r11, [%sp+60]		; %r13:%r12=A10*B02
	mlt.w	%r4, %r11
	ld.w	%r12, %alr
	ld.w	%r13, %ahr
	xld.w	%r11, [%sp+76]		; %r13:%r12+=A11*B12
	mlt.w	%r5, %r11
	ld.w	%r11, %alr
	add	%r12, %r11
	ld.w	%r11, %ahr
	adc	%r13, %r11
	xld.w	%r11, [%sp+92]		; %r13:%r12+=A12*B22
	mlt.w	%r6, %r11
	ld.w	%r11, %alr
	add	%r12, %r11
	ld.w	%r11, %ahr
	adc	%r13, %r11
	xsrl	%r12, 10		; 10=FRACT_BITS
	xsll	%r13, 22		; 22=VALUE_BITS
	or	%r12, %r13
	xld.w	[%r10+24], %r12
	;
	xld.w	%r11, [%sp+64]		; %r13:%r12=A10*B03
	mlt.w	%r4, %r11
	ld.w	%r12, %alr
	ld.w	%r13, %ahr
	xld.w	%r11, [%sp+80]		; %r13:%r12+=A11*B13
	mlt.w	%r5, %r11
	ld.w	%r11, %alr
	add	%r12, %r11
	ld.w	%r11, %ahr
	adc	%r13, %r11
	xld.w	%r11, [%sp+96]		; %r13:%r12+=A12*B23
	mlt.w	%r6, %r11
	ld.w	%r11, %alr
	add	%r12, %r11
	ld.w	%r11, %ahr
	adc	%r13, %r11
	xsrl	%r12, 10		; 10=FRACT_BITS
	xsll	%r13, 22		; 22=VALUE_BITS
	or	%r12, %r13
	add	%r12, %r7		; %r12+=A13
	xld.w	[%r10+28], %r12
	;
	xld.w	%r4, [%sp+36]		; %r4=A20
	xld.w	%r5, [%sp+40]		; %r5=A21
	xld.w	%r6, [%sp+44]		; %r6=A22
	xld.w	%r7, [%sp+48]		; %r7=A23
	;
	xld.w	%r11, [%sp+52]		; %r13:%r12=A20*B00
	mlt.w	%r4, %r11
	ld.w	%r12, %alr
	ld.w	%r13, %ahr
	xld.w	%r11, [%sp+68]		; %r13:%r12+=A21*B10
	mlt.w	%r5, %r11
	ld.w	%r11, %alr
	add	%r12, %r11
	ld.w	%r11, %ahr
	adc	%r13, %r11
	xld.w	%r11, [%sp+84]		; %r13:%r12+=A22*B20
	mlt.w	%r6, %r11
	ld.w	%r11, %alr
	add	%r12, %r11
	ld.w	%r11, %ahr
	adc	%r13, %r11
	xsrl	%r12, 10		; 10=FRACT_BITS
	xsll	%r13, 22		; 22=VALUE_BITS
	or	%r12, %r13
	xld.w	[%r10+32], %r12
	;
	xld.w	%r11, [%sp+56]		; %r13:%r12=A20*B01
	mlt.w	%r4, %r11
	ld.w	%r12, %alr
	ld.w	%r13, %ahr
	xld.w	%r11, [%sp+72]		; %r13:%r12+=A21*B11
	mlt.w	%r5, %r11
	ld.w	%r11, %alr
	add	%r12, %r11
	ld.w	%r11, %ahr
	adc	%r13, %r11
	xld.w	%r11, [%sp+88]		; %r13:%r12+=A22*B21
	mlt.w	%r6, %r11
	ld.w	%r11, %alr
	add	%r12, %r11
	ld.w	%r11, %ahr
	adc	%r13, %r11
	xsrl	%r12, 10		; 10=FRACT_BITS
	xsll	%r13, 22		; 22=VALUE_BITS
	or	%r12, %r13
	xld.w	[%r10+36], %r12
	;
	xld.w	%r11, [%sp+60]		; %r13:%r12=A20*B02
	mlt.w	%r4, %r11
	ld.w	%r12, %alr
	ld.w	%r13, %ahr
	xld.w	%r11, [%sp+76]		; %r13:%r12+=A21*B12
	mlt.w	%r5, %r11
	ld.w	%r11, %alr
	add	%r12, %r11
	ld.w	%r11, %ahr
	adc	%r13, %r11
	xld.w	%r11, [%sp+92]		; %r13:%r12+=A22*B22
	mlt.w	%r6, %r11
	ld.w	%r11, %alr
	add	%r12, %r11
	ld.w	%r11, %ahr
	adc	%r13, %r11
	xsrl	%r12, 10		; 10=FRACT_BITS
	xsll	%r13, 22		; 22=VALUE_BITS
	or	%r12, %r13
	xld.w	[%r10+40], %r12
	;
	xld.w	%r11, [%sp+64]		; %r13:%r12=A20*B03
	mlt.w	%r4, %r11
	ld.w	%r12, %alr
	ld.w	%r13, %ahr
	xld.w	%r11, [%sp+80]		; %r13:%r12+=A21*B13
	mlt.w	%r5, %r11
	ld.w	%r11, %alr
	add	%r12, %r11
	ld.w	%r11, %ahr
	adc	%r13, %r11
	xld.w	%r11, [%sp+96]		; %r13:%r12+=A22*B23
	mlt.w	%r6, %r11
	ld.w	%r11, %alr
	add	%r12, %r11
	ld.w	%r11, %ahr
	adc	%r13, %r11
	xsrl	%r12, 10		; 10=FRACT_BITS
	xsll	%r13, 22		; 22=VALUE_BITS
	or	%r12, %r13
	add	%r12, %r7		; %r12+=A23
	xld.w	[%r10+44], %r12
	;
	ret
	");
#endif /*FIX_NOASM*/

