;	
;	framppua.s
;
;	P/ECE PPU (RICOH RP2C02) Emulator (1/4𑜓x)
;
;	CLiP - Common Library for P/ECE
;	Copyright (C) 2001-2005 Naoyuki Sawa
;
;	* Wed Feb 09 04:34:00 JST 2005 Naoyuki Sawa
;	- 쐬JnB
;
;#include "clipppua.h"
;/	
;/	framppua.h
;/
;/	P/ECE PPU (RICOH RP2C02) Emulator
;/
;/	CLiP - Common Library for P/ECE
;/	Copyright (C) 2001-2005 Naoyuki Sawa
;/
;/	* Sun Jan 30 21:25:00 JST 2005 Naoyuki Sawa
;/	- 쐬JnB
;/

;/ ̃t@CC/AZu\[XQƂ܂B
;/ CL̒`܂߂Ă͂܂B

;/ ̃V{`ƁAAZuR[hgp܂B
;/ ̃V{`Ȃ΁AbR[hgp܂B
;#define PPU_ASM
;#ifdef PPU_ASM

	.code
	.align 1

;****************************************************************************
;	
;****************************************************************************

;
;	PPU\
;
;#define PPU_CONTROL1	 0	; + 0, 1
;#define PPU_CONTROL2	 1	; + 1, 1
;#define PPU_STATUS	 2	; + 2, 1
;#define SPRRAM_ADDRESS	 3	; + 3, 1
;#define VRAM_ADDRESS1	 4	; + 4, 2
;#define VRAM_ADDRESS2	 6	; + 6, 2
;#define FLAGS		 8	; + 8, 1
;#define VRAM_BUFFER	 9	; + 9, 1
;#define SCAN_START	10	; +10, 1
;#define SCAN_END	11	; +11, 1
;#define VBUFF		12	; +12, 4
;#define CLUT		16	; +16,32

;****************************************************************************
;	
;****************************************************************************

	.global ppu4_reduce
ppu4_reduce:
	ext	0x0		; 	xld.w	%r14, 0x20			; X̃[v   (pɂɎĝŃWX^ɕێ)
	ld.w	%r14,0x20
	ext	0x2		; 	xld.w	%r15, 0x80			; 1C̃ItZbg (pɂɎĝŃWX^ɕێ)
	ld.w	%r15,0x0
	;
	ld.w	%r4,0x8		; 	xld.w	%r4, 0x8			; z = 120/15
ppu4_reduce_DO_Z:				; do {
	;
	ld.w	%r5,0x2		; 	xld.w	%r5, 2				;   i = 2
	ld.w	%r6,0xb		; 	xld.w	%r6, 11				;   y = 11
ppu4_reduce_DO_Y:				;   do {
	;
	ld.w	%r7, %r14			;     x = 128/4
ppu4_reduce_DO_X:				;     do {
	;
	ext	0x80		; 	xld.w	%r10, [%r13+0x80]		;       v = tmp[0] + tmp[128]
	ld.w	%r10,[%r13]
	 ld.w	%r11, [%r13]+			;       tmp += 4
	add	%r11, %r10
	;
	ld.ub	%r10, %r11			;       *dst++ = (unsigned char)v >> 1
	srl	%r10,0x1	; 	xsrl	%r10, 1
	ld.b	[%r12]+, %r10
	srl	%r11,0x8	; 	xsrl	%r11, 8				;       v >>= 8
	;
	ld.ub	%r10, %r11			;       *dst++ = (unsigned char)v >> 1
	srl	%r10,0x1	; 	xsrl	%r10, 1
	ld.b	[%r12]+, %r10
	srl	%r11,0x8	; 	xsrl	%r11, 8				;       v >>= 8
	;
	ld.ub	%r10, %r11			;       *dst++ = (unsigned char)v >> 1
	srl	%r10,0x1	; 	xsrl	%r10, 1
	ld.b	[%r12]+, %r10
	srl	%r11,0x8	; 	xsrl	%r11, 8				;       v >>= 8
	;
	ld.ub	%r10, %r11			;       *dst++ = (unsigned char)v >> 1
	srl	%r10,0x1	; 	xsrl	%r10, 1
	ld.b	[%r12]+, %r10
	srl	%r11,0x8	; 	xsrl	%r11, 8				;       v >>= 8
	;
	sub	%r7,0x1		; 	xsub	%r7, %r7, 1			;     } while(--x)
	jrne	ppu4_reduce_DO_X	; 	xjrne	ppu4_reduce_DO_X
	;
	sub	%r5,0x1		; 	xsub	%r5, %r5, 1
	jrne	3				;     if(--i)
	add	%r13, %r15			;     (skip?) tmp += 128*(2-1)
	ld.w	%r5, 3				;     (skip?) i = 3           
	;
	sub	%r6,0x1		; 	xsub	%r6, %r6, 1			;   } while(--y)
	jrne	ppu4_reduce_DO_Y	; 	xjrne	ppu4_reduce_DO_Y
	;
	sub	%r4,0x1		; 	xsub	%r4, %r4, 1			; } while(--z)
	jrne	ppu4_reduce_DO_Z	; 	xjrne	ppu4_reduce_DO_Z
	;
	ret

;****************************************************************************
;	
;****************************************************************************

;
; [in]
;	%r12	xpos
;	%r13	ypos
;	%r14	code
;	%r15	palno
;
	.global	ppu4_draw_chr
ppu4_draw_chr:
	ext	ppu+0x0@h	; 	xld.w	%r7, ppu			; %r7 = ppu
	ext	ppu+0x0@m
	ld.w	%r7,ppu+0x0@l
	;
	ext	0xa		; 	xld.ub	%r4, [%r7+0xa]		; %r4 = scan_start
	ld.ub	%r4,[%r7]
	ext	0xb		; 	xld.ub	%r5, [%r7+0xb]		; %r5 = scan_end
	ld.ub	%r5,[%r7]
	;
	sra	%r4,0x1		; 	xsra	%r4, 1				; scan_start /= 2
	sra	%r5,0x1		; 	xsra	%r5, 1				; scan_end   /= 2
	sra	%r12,0x1	; 	xsra	%r12, 1				; xpos       /= 2
	sra	%r13,0x1	; 	xsra	%r13, 1				; ypos       /= 2
	;
	sub	%r4, 4				; if(ypos <= scan_start-4) goto EXIT
	cmp	%r13, %r4
	jrle.d	ppu4_draw_chr_EXIT	; 	xjrle.d	ppu4_draw_chr_EXIT
	add	%r4, 4				; *delay*
	cmp	%r13, %r5			; if(ypos >= scan_end) goto EXIT
	jrge	ppu4_draw_chr_EXIT	; 	xjrge	ppu4_draw_chr_EXIT
	cmp	%r12,0x3c	; 	xcmp	%r12, -4			; if(xpos <= 0-4) goto EXIT
	jrle	ppu4_draw_chr_EXIT	; 	xjrle	ppu4_draw_chr_EXIT
	ext	0x2		; 	xcmp	%r12, 128			; if(xpos >= 128) goto EXIT
	cmp	%r12,0x0
	jrge	ppu4_draw_chr_EXIT	; 	xjrge	ppu4_draw_chr_EXIT
	;
	ext	0xc		; 	xld.w	%r6, [%r7+0xc]		; %r6 = vbuff
	ld.w	%r6,[%r7]
	add	%r7,0x10	; 	xadd	%r7, %r7, 16			; %r7 = clut
	;
	sla	%r13,0x7	; 	xsla	%r13, 7				; vbuff += ypos * 128 + xpos
	add	%r6, %r13
	sra	%r13,0x7	; 	xsra	%r13, 7
	add	%r6, %r12
	;
	sll	%r15,0x2	; 	xsll	%r15, 2				; clut += palno << 2
	add	%r7, %r15
	;
	sll	%r14,0x4	; 	xsll	%r14, 4				; %r14 = pattern = &vram[0x0000|code<<4]
	ext	ppu_vram+0x0@ah
	ext	ppu_vram+0x0@al
	add	%r14, %r14
	;
	cmp	%r13, %r4			; if(ypos < scan_start) goto CLIP
	jrlt	ppu4_draw_chr_CLIP	; 	xjrlt	ppu4_draw_chr_CLIP
	sub	%r5, 4				; if(ypos > scan_end-4) goto CLIP
	cmp	%r13, %r5
	jrgt.d	ppu4_draw_chr_CLIP	; 	xjrgt.d	ppu4_draw_chr_CLIP
	add	%r5, 4				; *delay*
	ext	0x1		; 	xcmp	%r12, 0x7c			; if(xpos < 0 || xpos > 128-4) goto CLIP
	cmp	%r12,0x3c
	jrugt	ppu4_draw_chr_CLIP	; 	xjrugt	ppu4_draw_chr_CLIP
	;--------------------------------------------------------------------
ppu4_draw_chr_FAST:
	ld.w	%r13,0x4	; 	xld.w	%r13, 4				; %r13 = y = 4
ppu4_draw_chr_FAST_DO_Y:			; do {
	;
	ld.ub	%r10,[%r14]	; 	xld.ub	%r10, [%r14+0x0]			;   %r10 = pattern[0] (lobit = 00000000 00000000 00000000 a-c-e-g-)
	ext	0x8		; 	xld.ub	%r11, [%r14+0x8]			;   %r11 = pattern[1] (hibit = 00000000 00000000 00000000 A-C-E-G-)
	ld.ub	%r11,[%r14]
	swap	%r10, %r10			;   lobit = a-c-e-g- 00000000 00000000 00000000
	swap	%r11, %r11			;   hibit = A-C-E-G- 00000000 00000000 00000000
	;
	ld.w	%r12,0x4	; 	xld.w	%r12, 4				;   %r12 = x = 4
ppu4_draw_chr_FAST_DO_X:			;   do {
	;
	add	%r11, %r11			;     hibit = -C-E-G-0 00000000 00000000 00000000, %psr(C) = A
	adc	%r10, %r10			;     lobit = -c-e-g-0 00000000 00000000 0000000A, %psr(C) = a
	adc	%r10, %r10			;     lobit = c-e-g-00 00000000 00000000 000000Aa
	ext	0x3		; 	xand	%r15, %r10, 3			;     %r15  = 00000000 00000000 00000000 000000Aa
	and	%r15,%r10
	jreq.d	ppu4_draw_chr_FAST_SKIP_X	; 	xjreq.d	ppu4_draw_chr_FAST_SKIP_X	;     if(!v) goto SKIP_X
	sll	%r11, 1				;     hibit = C-E-G-00 00000000 00000000 00000000  *delay*
	;
	add	%r15, %r7			;     *vbuff = clut[v]
	ld.ub	%r15, [%r15]
	ld.b	[%r6], %r15
	;
ppu4_draw_chr_FAST_SKIP_X:
	sub	%r12,0x1	; 	xsub	%r12, %r12, 1
	jrne.d	ppu4_draw_chr_FAST_DO_X	; 	xjrne.d	ppu4_draw_chr_FAST_DO_X		;   } while(--x)
	add	%r6, 1				;   vbuff++ *delay*
	;
	ext	0x1		; 	xadd	%r6, %r6, 0x7c			;   vbuff += 128-4
	add	%r6,0x3c
	;
	sub	%r13,0x1	; 	xsub	%r13, %r13, 1
	jrne.d	ppu4_draw_chr_FAST_DO_Y	; 	xjrne.d	ppu4_draw_chr_FAST_DO_Y		; } while(--y)
	add	%r14, 2				; pattern += 2 *delay*
	;
	jp	ppu4_draw_chr_EXIT	; 	xjp	ppu4_draw_chr_EXIT
	;--------------------------------------------------------------------
ppu4_draw_chr_CLIP:
	pushn	%r1
	;
	ld.w	%r1,0x4		; 	xld.w	%r1, 4				; %r1 = y = 4
ppu4_draw_chr_CLIP_DO_Y:			; do {
	;
	cmp	%r13, %r4			;   if(ypos < scan_start) goto SKIP_Y
	jrlt	ppu4_draw_chr_CLIP_SKIP_Y	; 	xjrlt	ppu4_draw_chr_CLIP_SKIP_Y
	cmp	%r13, %r5			;   if(ypos >= scan_end) goto SKIP_Y
	jrge	ppu4_draw_chr_CLIP_SKIP_Y	; 	xjrge	ppu4_draw_chr_CLIP_SKIP_Y
	;
	ld.ub	%r10,[%r14]	; 	xld.ub	%r10, [%r14+0x0]			;   %r10 = pattern[0] (lobit = 00000000 00000000 00000000 a-c-e-g-)
	ext	0x8		; 	xld.ub	%r11, [%r14+0x8]			;   %r11 = pattern[1] (hibit = 00000000 00000000 00000000 A-C-E-G-)
	ld.ub	%r11,[%r14]
	swap	%r10, %r10			;   lobit = a-c-e-g- 00000000 00000000 00000000
	swap	%r11, %r11			;   hibit = A-C-E-G- 00000000 00000000 00000000
	;
	ld.w	%r0,0x4		; 	xld.w	%r0, 4				;   %r0 = x = 4
ppu4_draw_chr_CLIP_DO_X:			;   do {
	;
	add	%r11, %r11			;     hibit = -C-E-G-0 00000000 00000000 00000000, %psr(C) = A
	adc	%r10, %r10			;     lobit = -c-e-g-0 00000000 00000000 0000000A, %psr(C) = a
	adc	%r10, %r10			;     lobit = c-e-g-00 00000000 00000000 000000Aa
	ext	0x3		; 	xand	%r15, %r10, 3			;     %r15  = 00000000 00000000 00000000 000000Aa
	and	%r15,%r10
	jreq.d	ppu4_draw_chr_CLIP_SKIP_X	; 	xjreq.d	ppu4_draw_chr_CLIP_SKIP_X	;     if(!v) goto SKIP_X
	sll	%r11, 1				;     hibit = C-E-G-00 00000000 00000000 00000000  *delay*
	;
	ext	0x2		; 	xcmp	%r12, 128			;     if(x < 0 || x >= 128) goto SKIP_X
	cmp	%r12,0x0
	jruge	ppu4_draw_chr_CLIP_SKIP_X	; 	xjruge	ppu4_draw_chr_CLIP_SKIP_X
	;
	add	%r15, %r7			;     *vbuff = clut[v]
	ld.ub	%r15, [%r15]
	ld.b	[%r6], %r15
	;
ppu4_draw_chr_CLIP_SKIP_X:
	add	%r12,0x1	; 	xadd	%r12, %r12, 1			;     xpos++
	;
	sub	%r0,0x1		; 	xsub	%r0, %r0, 1
	jrne.d	ppu4_draw_chr_CLIP_DO_X	; 	xjrne.d	ppu4_draw_chr_CLIP_DO_X		;   } while(--x)
	add	%r6, 1				;   vbuff++ *delay*
	;
	sub	%r12,0x4	; 	xsub	%r12, %r12, 4			;   xpos  -= 4
	sub	%r6,0x4		; 	xsub	%r6, %r6, 4			;   vbuff -= 4
ppu4_draw_chr_CLIP_SKIP_Y:
	add	%r13,0x1	; 	xadd	%r13, %r13, 1			;   ypos++
	ext	0x2		; 	xadd	%r6, %r6, 128			;   vbuff += 128
	add	%r6,0x0
	;
	sub	%r1,0x1		; 	xsub	%r1, %r1, 1
	jrne.d	ppu4_draw_chr_CLIP_DO_Y	; 	xjrne.d	ppu4_draw_chr_CLIP_DO_Y		; } while(--y)
	add	%r14, 2				; pattern += 2 *delay*
	;
	popn	%r1
	;--------------------------------------------------------------------
ppu4_draw_chr_EXIT:
	ret

;****************************************************************************
;	
;****************************************************************************

;#endif /*PPU_ASM*/
