/*	
 *	clipdct2.h
 *
 *	񎟌DCT
 *
 *	CLiP - Common Library for P/ECE
 *	Copyright (C) 2001-2009 Naoyuki Sawa
 *
 *	* Sat Jan 31 22:35:45 JST 2009 Naoyuki Sawa
 *	- VK쐬B
 *	- fdct8x8half()Aidct8x8half()쐬AAZus܂B
 */
#include "clip.h"

/****************************************************************************
 *	
 ****************************************************************************/

/* =ROUND(IF((ROW()-1)=0,SQRT(1/8),SQRT(2/8))*COS(((2*(COLUMN()-1)+1)*(ROW()-1)*PI())/(2*8))*65536,0) */
static const short fdct8x8half_tbl[8 * 8] = {
	23170, 23170, 23170, 23170, 23170, 23170, 23170, 23170,
	32138, 27246, 18205,  6393, -6393,-18205,-27246,-32138,
	30274, 12540,-12540,-30274,-30274,-12540, 12540, 30274,
	27246, -6393,-32138,-18205, 18205, 32138,  6393,-27246,
	23170,-23170,-23170, 23170, 23170,-23170,-23170, 23170,
	18205,-32138,  6393, 27246,-27246, -6393, 32138,-18205,
	12540,-30274, 30274,-12540,-12540, 30274,-30274, 12540,
	 6393,-18205, 27246,-32138, 32138,-27246, 18205, -6393,
};
/* =ROUND(IF((COLUMN()-1)=0,SQRT(1/8),SQRT(2/8))*COS(((2*(ROW()-1)+1)*(COLUMN()-1)*PI())/(2*8))*65536,0) */
static const short idct8x8half_tbl[8 * 8] = {
	23170, 32138, 30274, 27246, 23170, 18205, 12540,  6393,
	23170, 27246, 12540, -6393,-23170,-32138,-30274,-18205,
	23170, 18205,-12540,-32138,-23170,  6393, 30274, 27246,
	23170,  6393,-30274,-18205, 23170, 27246,-12540,-32138,
	23170, -6393,-30274, 18205, 23170,-27246,-12540, 32138,
	23170,-18205,-12540, 32138,-23170, -6393, 30274,-27246,
	23170,-27246, 12540,  6393,-23170, 32138,-30274, 18205,
	23170,-32138, 30274,-27246, 23170,-18205, 12540, -6393,
};

/*--------------------------------------------------------------------------*/

#ifndef PIECE
static void _dct8x8half(const short* src/*[8 * 8]*/, short* dst/*[8 * 8]*/, const short* _tbl/*[8 * 8]*/) {
	int i;
	int j;
	int k;
	int l;
	int sum;
	short* out;
	const short* tbl;
	short tmp[8 * 8];

	out = tmp;
	i = 2;
	do {
		j = 8;
		do {
			tbl = _tbl;
			k = 8;
			do {
				sum = 32768;
				l = 8;
				do {
					sum += *src++ * *tbl++;
				} while(--l);
				*out = sum >> 16;
				out += 8;
				src -= 8;
			} while(--k);
			out -= 8 * 8 - 1;
			src += 8;
		} while(--j);
		out = dst;
		src = tmp;
	} while(--i);
}
void fdct8x8half(const short* src/*[8 * 8]*/, short* dst/*[8 * 8]*/) {
	_dct8x8half(src, dst, fdct8x8half_tbl);
}
void idct8x8half(const short* src/*[8 * 8]*/, short* dst/*[8 * 8]*/) {
	_dct8x8half(src, dst, idct8x8half_tbl);
}
#else /*PIECE*/
void fdct8x8half(const short* src/*[8 * 8]*/, short* dst/*[8 * 8]*/);
void idct8x8half(const short* src/*[8 * 8]*/, short* dst/*[8 * 8]*/);
asm("
		.code
		.align		1
		.global		fdct8x8half
		.global		idct8x8half
fdct8x8half:
		xld.w		%r14, fdct8x8half_tbl		;// %r14 := _tbl = fdct8x8half_tbl
		jp		_dct8x8half
idct8x8half:
		xld.w		%r14, idct8x8half_tbl		;// %r14 := _tbl = idct8x8half_tbl
_dct8x8half:
		xsub		%sp, %sp, 128			;// %sp  := _tmp
		xld.w		%r4, 32768			;// %r4  := 32768
		;//
		ld.w		%r5, %sp			;// %r5  := out = tmp
		ld.w		%r6, 2				;// %r6  := i = 2
_dct8x8half_I:
		ld.w		%r7, 8				;// %r7  := j = 8
_dct8x8half_J:
		ld.w		%r11, %r14			;// %r11 := tbl = _tbl
		ld.w		%r15, 8				;// %r15 := k = 8
_dct8x8half_K:
		ld.w		%alr, %r4			;// %alr := sum = 32768
		ld.w		%r10, 8				;// %r10 := l = 8
		mac		%r10				;// {%ahr:%alr} += mac{%r10,%r11,%r12}
		ld.w		%r10, %alr			;// %r10 := sum
		xsra		%r10, 16			;// %r10 := sum >>= 16
		ld.h		[%r5], %r10			;// *out := sum
		;//
		add		%r5, 16				;// %r5  := out += 8
		sub		%r15, 1				;// %r15 := k--
		jrne.d		_dct8x8half_K
		sub		%r12, 16			;// %r12 := src -= 8				*delay*
		;//
		xsub		%r5, %r5, 126			;// %r5  := out -= 8 * 8 - 1
		sub		%r7, 1				;// %r7  := j--
		jrne.d		_dct8x8half_J
		add		%r12, 16			;// %r12 := src += 8				*delay*
		;//
		ld.w		%r5, %r13			;// %r5  := out = dst
		sub		%r6, 1				;// %r6  := i--
		jrne.d		_dct8x8half_I
		ld.w		%r12, %sp			;// %r12 := src = tmp				*delay*(undoc'd)
		;//
		xadd		%sp, %sp, 128
		ret
");
#endif /*PIECE*/

