;	
;	framppua.s
;
;	P/ECE PPU (RICOH RP2C02) Emulator (1/4𑜓x)
;
;	CLiP - Common Library for P/ECE
;	Copyright (C) 2001-2005 Naoyuki Sawa
;
;	* Wed Feb 09 04:34:00 JST 2005 Naoyuki Sawa
;	- 쐬JnB
;
#include "clipppua.h"
#ifdef PPU_ASM

	.code
	.align 1

;****************************************************************************
;	
;****************************************************************************

;
;	PPU\
;
#define PPU_CONTROL1	 0	; + 0, 1
#define PPU_CONTROL2	 1	; + 1, 1
#define PPU_STATUS	 2	; + 2, 1
#define SPRRAM_ADDRESS	 3	; + 3, 1
#define VRAM_ADDRESS1	 4	; + 4, 2
#define VRAM_ADDRESS2	 6	; + 6, 2
#define FLAGS		 8	; + 8, 1
#define VRAM_BUFFER	 9	; + 9, 1
#define SCAN_START	10	; +10, 1
#define SCAN_END	11	; +11, 1
#define VBUFF		12	; +12, 4
#define CLUT		16	; +16,32

;****************************************************************************
;	
;****************************************************************************

	.global ppu4_reduce
ppu4_reduce:
	xld.w	%r14, 128/4			; X̃[v   (pɂɎĝŃWX^ɕێ)
	xld.w	%r15, 128*1			; 1C̃ItZbg (pɂɎĝŃWX^ɕێ)
	;
	xld.w	%r4, 120/15			; z = 120/15
ppu4_reduce_DO_Z:				; do {
	;
	xld.w	%r5, 2				;   i = 2
	xld.w	%r6, 11				;   y = 11
ppu4_reduce_DO_Y:				;   do {
	;
	ld.w	%r7, %r14			;     x = 128/4
ppu4_reduce_DO_X:				;     do {
	;
	xld.w	%r10, [%r13+128]		;       v = tmp[0] + tmp[128]
	 ld.w	%r11, [%r13]+			;       tmp += 4
	add	%r11, %r10
	;
	ld.ub	%r10, %r11			;       *dst++ = (unsigned char)v >> 1
	xsrl	%r10, 1
	ld.b	[%r12]+, %r10
	xsrl	%r11, 8				;       v >>= 8
	;
	ld.ub	%r10, %r11			;       *dst++ = (unsigned char)v >> 1
	xsrl	%r10, 1
	ld.b	[%r12]+, %r10
	xsrl	%r11, 8				;       v >>= 8
	;
	ld.ub	%r10, %r11			;       *dst++ = (unsigned char)v >> 1
	xsrl	%r10, 1
	ld.b	[%r12]+, %r10
	xsrl	%r11, 8				;       v >>= 8
	;
	ld.ub	%r10, %r11			;       *dst++ = (unsigned char)v >> 1
	xsrl	%r10, 1
	ld.b	[%r12]+, %r10
	xsrl	%r11, 8				;       v >>= 8
	;
	xsub	%r7, %r7, 1			;     } while(--x)
	xjrne	ppu4_reduce_DO_X
	;
	xsub	%r5, %r5, 1
	jrne	3				;     if(--i)
	add	%r13, %r15			;     (skip?) tmp += 128*(2-1)
	ld.w	%r5, 3				;     (skip?) i = 3           
	;
	xsub	%r6, %r6, 1			;   } while(--y)
	xjrne	ppu4_reduce_DO_Y
	;
	xsub	%r4, %r4, 1			; } while(--z)
	xjrne	ppu4_reduce_DO_Z
	;
	ret

;****************************************************************************
;	
;****************************************************************************

;
; [in]
;	%r12	xpos
;	%r13	ypos
;	%r14	code
;	%r15	palno
;
	.global	ppu4_draw_chr
ppu4_draw_chr:
	xld.w	%r7, ppu			; %r7 = ppu
	;
	xld.ub	%r4, [%r7+SCAN_START]		; %r4 = scan_start
	xld.ub	%r5, [%r7+SCAN_END]		; %r5 = scan_end
	;
	xsra	%r4, 1				; scan_start /= 2
	xsra	%r5, 1				; scan_end   /= 2
	xsra	%r12, 1				; xpos       /= 2
	xsra	%r13, 1				; ypos       /= 2
	;
	sub	%r4, 4				; if(ypos <= scan_start-4) goto EXIT
	cmp	%r13, %r4
	xjrle.d	ppu4_draw_chr_EXIT
	add	%r4, 4				; *delay*
	cmp	%r13, %r5			; if(ypos >= scan_end) goto EXIT
	xjrge	ppu4_draw_chr_EXIT
	xcmp	%r12, 0-4			; if(xpos <= 0-4) goto EXIT
	xjrle	ppu4_draw_chr_EXIT
	xcmp	%r12, 128			; if(xpos >= 128) goto EXIT
	xjrge	ppu4_draw_chr_EXIT
	;
	xld.w	%r6, [%r7+VBUFF]		; %r6 = vbuff
	xadd	%r7, %r7, CLUT			; %r7 = clut
	;
	xsla	%r13, 7				; vbuff += ypos * 128 + xpos
	add	%r6, %r13
	xsra	%r13, 7
	add	%r6, %r12
	;
	xsll	%r15, 2				; clut += palno << 2
	add	%r7, %r15
	;
	xsll	%r14, 4				; %r14 = pattern = &vram[0x0000|code<<4]
	ext	ppu_vram+0x0000@ah
	ext	ppu_vram+0x0000@al
	add	%r14, %r14
	;
	cmp	%r13, %r4			; if(ypos < scan_start) goto CLIP
	xjrlt	ppu4_draw_chr_CLIP
	sub	%r5, 4				; if(ypos > scan_end-4) goto CLIP
	cmp	%r13, %r5
	xjrgt.d	ppu4_draw_chr_CLIP
	add	%r5, 4				; *delay*
	xcmp	%r12, 128-4			; if(xpos < 0 || xpos > 128-4) goto CLIP
	xjrugt	ppu4_draw_chr_CLIP
	;--------------------------------------------------------------------
ppu4_draw_chr_FAST:
	xld.w	%r13, 4				; %r13 = y = 4
ppu4_draw_chr_FAST_DO_Y:			; do {
	;
	xld.ub	%r10, [%r14+0]			;   %r10 = pattern[0] (lobit = 00000000 00000000 00000000 a-c-e-g-)
	xld.ub	%r11, [%r14+8]			;   %r11 = pattern[1] (hibit = 00000000 00000000 00000000 A-C-E-G-)
	swap	%r10, %r10			;   lobit = a-c-e-g- 00000000 00000000 00000000
	swap	%r11, %r11			;   hibit = A-C-E-G- 00000000 00000000 00000000
	;
	xld.w	%r12, 4				;   %r12 = x = 4
ppu4_draw_chr_FAST_DO_X:			;   do {
	;
	add	%r11, %r11			;     hibit = -C-E-G-0 00000000 00000000 00000000, %psr(C) = A
	adc	%r10, %r10			;     lobit = -c-e-g-0 00000000 00000000 0000000A, %psr(C) = a
	adc	%r10, %r10			;     lobit = c-e-g-00 00000000 00000000 000000Aa
	xand	%r15, %r10, 3			;     %r15  = 00000000 00000000 00000000 000000Aa
	xjreq.d	ppu4_draw_chr_FAST_SKIP_X	;     if(!v) goto SKIP_X
	sll	%r11, 1				;     hibit = C-E-G-00 00000000 00000000 00000000  *delay*
	;
	add	%r15, %r7			;     *vbuff = clut[v]
	ld.ub	%r15, [%r15]
	ld.b	[%r6], %r15
	;
ppu4_draw_chr_FAST_SKIP_X:
	xsub	%r12, %r12, 1
	xjrne.d	ppu4_draw_chr_FAST_DO_X		;   } while(--x)
	add	%r6, 1				;   vbuff++ *delay*
	;
	xadd	%r6, %r6, 128-4			;   vbuff += 128-4
	;
	xsub	%r13, %r13, 1
	xjrne.d	ppu4_draw_chr_FAST_DO_Y		; } while(--y)
	add	%r14, 2				; pattern += 2 *delay*
	;
	xjp	ppu4_draw_chr_EXIT
	;--------------------------------------------------------------------
ppu4_draw_chr_CLIP:
	pushn	%r1
	;
	xld.w	%r1, 4				; %r1 = y = 4
ppu4_draw_chr_CLIP_DO_Y:			; do {
	;
	cmp	%r13, %r4			;   if(ypos < scan_start) goto SKIP_Y
	xjrlt	ppu4_draw_chr_CLIP_SKIP_Y
	cmp	%r13, %r5			;   if(ypos >= scan_end) goto SKIP_Y
	xjrge	ppu4_draw_chr_CLIP_SKIP_Y
	;
	xld.ub	%r10, [%r14+0]			;   %r10 = pattern[0] (lobit = 00000000 00000000 00000000 a-c-e-g-)
	xld.ub	%r11, [%r14+8]			;   %r11 = pattern[1] (hibit = 00000000 00000000 00000000 A-C-E-G-)
	swap	%r10, %r10			;   lobit = a-c-e-g- 00000000 00000000 00000000
	swap	%r11, %r11			;   hibit = A-C-E-G- 00000000 00000000 00000000
	;
	xld.w	%r0, 4				;   %r0 = x = 4
ppu4_draw_chr_CLIP_DO_X:			;   do {
	;
	add	%r11, %r11			;     hibit = -C-E-G-0 00000000 00000000 00000000, %psr(C) = A
	adc	%r10, %r10			;     lobit = -c-e-g-0 00000000 00000000 0000000A, %psr(C) = a
	adc	%r10, %r10			;     lobit = c-e-g-00 00000000 00000000 000000Aa
	xand	%r15, %r10, 3			;     %r15  = 00000000 00000000 00000000 000000Aa
	xjreq.d	ppu4_draw_chr_CLIP_SKIP_X	;     if(!v) goto SKIP_X
	sll	%r11, 1				;     hibit = C-E-G-00 00000000 00000000 00000000  *delay*
	;
	xcmp	%r12, 128			;     if(x < 0 || x >= 128) goto SKIP_X
	xjruge	ppu4_draw_chr_CLIP_SKIP_X
	;
	add	%r15, %r7			;     *vbuff = clut[v]
	ld.ub	%r15, [%r15]
	ld.b	[%r6], %r15
	;
ppu4_draw_chr_CLIP_SKIP_X:
	xadd	%r12, %r12, 1			;     xpos++
	;
	xsub	%r0, %r0, 1
	xjrne.d	ppu4_draw_chr_CLIP_DO_X		;   } while(--x)
	add	%r6, 1				;   vbuff++ *delay*
	;
	xsub	%r12, %r12, 4			;   xpos  -= 4
	xsub	%r6, %r6, 4			;   vbuff -= 4
ppu4_draw_chr_CLIP_SKIP_Y:
	xadd	%r13, %r13, 1			;   ypos++
	xadd	%r6, %r6, 128			;   vbuff += 128
	;
	xsub	%r1, %r1, 1
	xjrne.d	ppu4_draw_chr_CLIP_DO_Y		; } while(--y)
	add	%r14, 2				; pattern += 2 *delay*
	;
	popn	%r1
	;--------------------------------------------------------------------
ppu4_draw_chr_EXIT:
	ret

;****************************************************************************
;	
;****************************************************************************

#endif /*PPU_ASM*/
