;**********************************************
;*	Copyright (C), 1996 SEIKO EPSON Corp.
;*		ALL RIGHTS RESERVED
;*
;*	filename : addsf3.s
;*
;*	Single floating point add function
;*						  subtract function					
;*		input: %r12, %r13
;*		output: %r10
;*
;*	Begin					1996/09/12	V. Chan
;*  Fixed sign bug			1997/02/17	V. Chan
;*  Fixed a precision bug	1997/02/24	V. Chan
;*  Fixed endledd loop bug  2001/07.11 T.Tazaki
;*  Ver4.7̏C荞  2006/07/10 Naoyuki Sawa
;*  - keep/PIECEJtfp_lib̕sɂ-20060710.zipQƂĂB
;*    Ver4.7̃R[ĥ܂܂łȂA̗VvɕύXēKp܂B
;*    Ver4.7̋tAZuéALzipt@C̃hLgQƂĂB
;*
;**********************************************

.code
.align 1
.global __addsf3
.global __subsf3

;#macro	VARSHIFT $1, $2, $3
	; used in 32-bit variable shifting
	; $1 = input register
	; $2 = shift amount
	; $3 = shift instruction

;$$1:
;	cmp		$2, 8		; if temp <= 8 then goto $$2
;	jrle	$$2

;	$3		$1, 8		; shift input register 8 bits
;	jp.d	$$1
;	sub		$2, 8		; temp = temp - 8

;$$2:
;	$3		$1, $2		; last shift
;#endm

__subsf3:
	ext	0x1000		; 	xxor	%r13, %r13, 0x80000000	; flip sign bit of 2nd input
	ext	0x0
	xor	%r13,0x0

__addsf3:
	pushn	%r3				; save register values
	ext	0x3		; 	xld.w	%r15, 0xff		; set overflow comparison value
	ld.w	%r15,0x3f

	ld.w	%r0, %r12		; put sign of input1 (%r12) into %r0
	rl		%r0, 1			; rotate left 1 bit
	and		%r0, 1			; use mask to keep LSB
	ld.w	%r4, %r0		; temp = sign1

	ld.w	%r2, %r13		; put sign of input2 (%r13) into %r2
	rl		%r2, 1			; rotate left 1 bit
	and		%r2, 1			; use mask to keep LSB

	ld.w	%r1, %r12		; put exponent1 into %r1
	sll		%r1, 1
	srl	%r1,0x8		; 	xsrl 	%r1, 24
	srl	%r1,0x8
	srl	%r1,0x8

	cmp		%r1, %r15		; check exp1 for overflow value
	ext	overflow@rm	; 	xjrge	overflow
	jrge	overflow@rl

	ld.w	%r3, %r13		; put exponent2 into %r3
	sll 	%r3, 1
	srl	%r3,0x8		; 	xsrl	%r3, 24
	srl	%r3,0x8
	srl	%r3,0x8
	ld.w	%r0, %r2		; put sign2 into %r0 for NaN sign

	cmp		%r3, %r15		; check exp2 for overflow value
	ext	overflow@rm	; 	xjrge	overflow
	jrge	overflow@rl
		
	cmp		%r1, %r3		; compare exp1 and exp2
	jrlt.d	ex1ltex2
	ld.w	%r0, %r4		; restore sign1

	; case: exp1 >= exp2
	ld.w	%r4, %r1		; temp = exp1 - exp2 (difference)
	sub		%r4, %r3
	cmp		%r4, 0x18		; difference >= 0x18 (24-bits it too large)
	ld.w	%r10, %r12		; put first input into result register, %r10
	ext	end@rm		; 	xjrge	end				; return first input
	jrge	end@rl
	jp		continue

ex1ltex2:
	; case: exp2 > exp1
	ld.w 	%r4, %r3		; temp = exp2 - exp1
	sub		%r4, %r1
	cmp	%r4,0x18	; 	xcmp	%r4, 0x18		; difference >= 0x18
	ld.w	%r10, %r13		; put second input into result register, %r10
	jrlt 	continue
	ld.w	%r0, %r2		; load second sign into result sign register
	ext	end@rm		; 	xjp		end				; return second input
	jp	end@rl

continue:
	ext	0xf		; 	xld.w	%r14, 0x7fffff	; set mask for isolating mantissa
	ext	0x1fff
	ld.w	%r14,0x3f
	ext	0x10		; 	xld.w	%r4, 0x800000	; set implied bit
	ext	0x0
	ld.w	%r4,0x0

	; isolate mantissa1
	cmp		%r1, 0			; if exp1 = 0
	jreq.d	getman2			; then jump to getman2
	and		%r12, %r14		; clear first 9 bits of %r12
	or		%r12, %r4		; if exp1 != 0 then add implied bit (normal)

getman2:
	; isolate mantissa2
	cmp		%r3, 0			; if exp2 = 0
	jreq.d	cmpexp			; then jump to cmpexp
	and		%r13, %r14		; clear first 9 bits of %r13
	or		%r13, %r4		; if exp2 != 0 then add implied bit (normal)

cmpexp:
	; compare exponents -- %r1 will be result exponent
	; if exp1 > exp2 then mantissa2 is shifted to the right
	; if exp2 > exp1 then mantissa1 is shifted to the right

	; shift mantissa left for increased precision
	sll		%r12, 1
	sll		%r13, 1

	; xflag indicates which input (1 or 2) is the smaller input
	ld.w	%r5, 0			; xflag = 0

	cmp		%r1, %r3
	jreq	negation		; if exp1 = exp2 then jump to negation
	jrgt	man2			; if exp1 > exp2 then jump to man2

	; case: exp1 < exp2
	cmp		%r1, 0
	ld.w	%r4, %r1		; temp = exp1
	jrne.d	shftm1			; if exp1 != 0 then normal
	ld.w	%r1, %r3		; result exp = exp2
	sub 	%r3, 1			; else denormal --> decrement shift

shftm1:
	sub		%r3, %r4		; shift amount = exp2 - temp (exp1)

	;xsrl		%r12, %r3	; man1 >> shift
	; used in 32-bit variable shifting	; 	VARSHIFT %r12, %r3, srl
	; $1 = input register
	; $2 = shift amount
	; $3 = shift instruction

__LX0001:				; __L0001:
	cmp		%r3, 8		; if temp <= 8 then goto $$2
	jrle	__LX0002	; 	jrle	__L0002

	srl		%r12, 8		; shift input register 8 bits
	jp.d	__LX0001	; 	jp.d	__L0001
	sub		%r3, 8		; temp = temp - 8

__LX0002:				; __L0002:
	srl		%r12, %r3		; last shift
	jp.d	negation
	ld.w	%r5, 2			; xflag = 2

man2:
	; case: exp1 > exp2
	cmp		%r3, 0
	jrne.d	shftm2			; if exp2 != 0 then normal
	ld.w	%r4, %r1		; shift = exp1
	sub		%r4, 1			; else denormal -- decrement shift

shftm2:
	sub		%r4, %r3		; shift = exp1 - exp2

	;srl	%r13, %r4		; man2 >> shift
	; used in 32-bit variable shifting	; 	VARSHIFT %r13, %r4, srl
	; $1 = input register
	; $2 = shift amount
	; $3 = shift instruction

__LX0003:				; __L0003:
	cmp		%r4, 8		; if temp <= 8 then goto $$2
	jrle	__LX0004	; 	jrle	__L0004

	srl		%r13, 8		; shift input register 8 bits
	jp.d	__LX0003	; 	jp.d	__L0003
	sub		%r4, 8		; temp = temp - 8

__LX0004:				; __L0004:
	srl		%r13, %r4		; last shift

	ld.w	%r5, 1			; xflag = 1

negation:
	; xflag = 0 if exp1 = exp2
	;		  1 if exp1 > exp2
	;		  2 if exp1 < exp2
	; %r0 will now be result sign bit

	; if exp1 = exp2 then this is the case
	; sign1	(%r0)  sign2 (%r2)	result sign (%r0)
	;	0			  0				0	<== no change
	;	1			  0				0	<== change to 1 if result < 0
	;	0			  1				0	<== change to 1 if result < 0
	;	1			  1				1	<== no change

	cmp		%r0, %r2		; compare sign bits
	jreq	sign			; if equal then jump to sign
	cmp		%r0, 1
	jrne	negm2			; if sign1 != 1 then jump to negm2

	; case: sign1 = 1
	cmp		%r5, 0			; only change %r0 if xflag = 0
	jrne	negm1
	ld.w	%r0, 0			; %r0 is now temp result sign (positive)

negm1:
	not		%r12, %r12		; negate man1
	jp.d	sign			; delayed jump to sign
	add		%r12, 1
	
negm2:
	not		%r13, %r13		; negate man2
	add		%r13, 1

sign:
	; fix sign, case where exp2 > exp1 and sign2 = 1
	; if xflag = 2 then result sign = sign2 (%r0 = %r2)
	; if xflag = 1 then result sign = sign1	(%r0 = %r0)
	; if xflag = 0 then result sign = 0 or 1
	cmp		%r5, 2
	jrne	addition		; begin addition

	; case: xflag = 2
	ld.w	%r0, %r2		; result sign = sign2

addition:
	; %r0 = result sign, %r1 = result exponent
	; %r12 = mantissa1, %r13 = mantissa2
	; %r10 = result

	add		%r12, %r13		; add man1 and man2
	ld.w 	%r10, %r12		; put result (%r12) in %r10

	cmp		%r10, 0
	jreq.d	end				; jump to end if result = 0
	ld.w 	%r4, 0			; clear temp register
	jrgt	precount		; if result > 0 then continue with normalize

	; case: result < 0
	not		%r10, %r10		; result = ~result + 1
	add		%r10, 1

	cmp		%r5, 0			; if xflag != 0 then goto count
	jrne	precount
	ld.w 	%r0, 1			; sign was 0 now sign = 1 

precount:
;;{{BUGFIX
;;	srl		%r10, 1			; compensate for shifts on 117, 118
;;	jreq	expchk			; if equal then jump to expchk add 2001.07.11
;;}}BUGFIX{{
	cmp	%r10,0x1	; 	xcmp	%r10, 1			; %r10!=0͏Ōς݂Ȃ̂ŁAł͗L肦ȂB
	jrne.d	bugfix1		; 	xjrne.d	bugfix1			; āA%r10==1̏ꍇAᏈΗǂB
	srl		%r10, 1			; *delay*
	sub	%r1,0x1		; 	xsub	%r1, %r1, 1		; %r10==1̏ꍇ́A2s̏sB
	ld.w	%r10,0x1	; 	xld.w	%r10, 1			; s΍̒ǉ́AꂾłB
bugfix1:
;;}}BUGFIX
	ld.w	%r12, %r10		; copy new result to %r12 for counting

count:
	scan1	%r2, %r12		; count = # of 0's before leading 1 in result
	jruge	expchk			; if count !=8 then goto expchk
	add		%r4, 8			; add 8 to loop counter
	jp.d	count
	sll		%r12, 8			; shift 8 leading 0's out of %r12

expchk:
	add		%r2, %r4		; count = count + loop counter

	cmp		%r1, 0			; if exp != 0 then jump to normalize
	jrne	normalize

	; case: denormal + denormal (sisu = 0)
	cmp		%r2, 8			; if count != 8 then jump to finish
	jrne	finish
	jp.d	finish
	ld.w 	%r1, 1			; if count = 8 then normal result

normalize:
	sub		%r2, 8			; count = count - 8
	jreq	finish			; if count = 8 then no normalize needed
	jrlt	shftrght		; if count <= 8 then normal result with carry-over

	; case: count > 8
	cmp		%r1, %r2		; if exp <= count then denormal
	jrle	denormal

	ld.w	%r4, %r2		; %r4 = temp = count
	; used in 32-bit variable shifting	; 	VARSHIFT  %r10, %r2, sll
	; $1 = input register
	; $2 = shift amount
	; $3 = shift instruction

__LX0005:				; __L0005:
	cmp		%r2, 8		; if temp <= 8 then goto $$2
	jrle	__LX0006	; 	jrle	__L0006

	sll		%r10, 8		; shift input register 8 bits
	jp.d	__LX0005	; 	jp.d	__L0005
	sub		%r2, 8		; temp = temp - 8

__LX0006:				; __L0006:
	sll		%r10, %r2		; last shift
	;xsll	%r10, %r2		; result << count

	jp.d	finish
	sub		%r1, %r4		; exp = exp - count

denormal:
	sub 	%r1, 1			; exp = exp - 1 (shift to denormal position)
	; used in 32-bit variable shifting	; 	VARSHIFT %r10, %r1, sll
	; $1 = input register
	; $2 = shift amount
	; $3 = shift instruction

__LX0007:				; __L0007:
	cmp		%r1, 8		; if temp <= 8 then goto $$2
	jrle	__LX0008	; 	jrle	__L0008

	sll		%r10, 8		; shift input register 8 bits
	jp.d	__LX0007	; 	jp.d	__L0007
	sub		%r1, 8		; temp = temp - 8

__LX0008:				; __L0008:
	sll		%r10, %r1		; last shift
	;xsll 	%r10, %r1		; result << exp
	jp.d	finish
	ld.w	%r1, 0			; exp = 0 (denormal result)

shftrght:
	srl		%r10, 1			; result >> 1
	add		%r1, 1			; exp = exp + 1

	; overflow check
	cmp		%r1, %r15
	jrlt	finish			; if exp < 0xff then jump to finish

overflow:
	ext	0xff0		; 	xld.w	%r10, 0x7f800000	; put infinity into result
	ext	0x0
	ld.w	%r10,0x0
	jp		end

finish:
	; %r0 = sign, %r1 = exponent, %r10 = mantissa

	and		%r10, %r14		; isolate mantissa

	rr	%r1,0x8		; 	xrr		%r1, 9			; position exponent bits to [30:23]
	rr	%r1,0x1
	or		%r10, %r1

end:
	rr		%r0, 1			; position sign bit to MSB
	or		%r10, %r0		; add sign bit

	popn	%r3				; restore register values
	ret
