/*	
 *	clipdct.c
 *
 *	URTCϊ
 *
 *	CLiP - Common Library for P/ECE
 *	Copyright (C) 2001-2003 Naoyuki Sawa
 *
 *	* Mon Oct 20 04:14:00 JST 2003 Naoyuki Sawa
 *	- VK쐬B
 */
#include "clip.h"

/****************************************************************************
 *	DCT_REAL
 ****************************************************************************/

static void _dct_real(int N, const double* T, const double* src, double* dst);

DCT_REAL*
dct_real_new(int N)
{
	DCT_REAL* dct;
	int opt, k, n;
	double pi, ck;

	/* lŎw肳ĂAu₷DCTvI܂B */
	opt = N < 0;
	N = abs(N);

	/* DCT_REAL\̂̃mۂ܂B */
	dct = (DCT_REAL*)calloc(1, sizeof(DCT_REAL));
	if(!dct) DIE();

	/* i[܂B */
	dct->N = N;

	/* GAXZ[tpobt@mۂ܂B */
	dct->v = (double*)calloc(N, sizeof(double));
	if(!dct->v) DIE();

	/* ϊs̃mۂ܂B */
	dct->T[0] = (double*)calloc(N * N, sizeof(double));
	if(!dct->T[0]) DIE();
	dct->T[1] = (double*)calloc(N * N, sizeof(double));
	if(!dct->T[1]) DIE();

	/* ϊs܂B */
	pi = acos(-1);
	for(k = 0; k < N; k++) {
		if(!opt) {
			/* DCTBϊs͐KsBFDCŤW͊ecos̐ÚN{(k=0)܂́(N/2){(k<>0)łB */
			ck = k == 0 ? sqrt(1.0 / N)	/* k = 0        */
			            : sqrt(2.0 / N);	/* k = 1`(N-1) */
			for(n = 0; n < N; n++) {
				dct->T[0][k * N + n] = ck * cos(((2 * n + 1) * k * pi) / (2 * N)); /* FDCT */
				dct->T[1][n * N + k] = ck * cos(((2 * n + 1) * k * pi) / (2 * N)); /* IDCT */
			}
		} else {
			/* ₷DCTBϊs͐KĂȂBFDCŤW͊ecos̐Û(1{)ƂȂ܂B */
			ck = k == 0 ? 1.0 / N		/* k = 0        */
			            : 2.0 / N;		/* k = 1`(N-1) */
			for(n = 0; n < N; n++) {
				dct->T[0][k * N + n] = ck * cos(((2 * n + 1) * k * pi) / (2 * N)); /* FDCT */
				dct->T[1][n * N + k] =      cos(((2 * n + 1) * k * pi) / (2 * N)); /* IDCT */
			}
		}
	}

	return dct;
}

void
dct_real_free(DCT_REAL* dct)
{
	/* GAXZ[tpobt@J܂B */
	free(dct->v);

	/* ϊs̃J܂B */
	free(dct->T[0]);
	free(dct->T[1]);

	/* DCT\̂̃J܂B */
	free(dct);
}

/* Forward Discrete Cosine Transform */
void
fdct_real(DCT_REAL* dct, const double* src, double* dst)
{
	if(src == dst) src = (const double*)memcpy(dct->v, src, sizeof(double) * dct->N);
	_dct_real(dct->N, dct->T[0/*FDCT*/], src, dst);
}

/* Inverse Discrete Cosine Transform */
void
idct_real(DCT_REAL* dct, const double* src, double* dst)
{
	if(src == dst) src = (const double*)memcpy(dct->v, src, sizeof(double) * dct->N);
	_dct_real(dct->N, dct->T[1/*IDCT*/], src, dst);
}

static void
_dct_real(int N, const double* T, const double* src, double* dst)
{
	int k, n;
	double tmp;
	const double* v;

	for(k = 0; k < N; k++) {
		v = src;
		tmp = 0;
		for(n = 0; n < N; n++) {
			tmp += *T++ * *v++;
		}
		*dst++ = tmp;
	}
}

/****************************************************************************
 *	DCT_HALF
 ****************************************************************************/

#define DCT_HALF_SHIFT	8			/* ϊs̏rbg() */
#define DCT_HALF_SCALE	(1 << DCT_HALF_SHIFT)	/* ϊšŒ菬XP[Ol */

static void _dct_half(int N, const short* T, const short* src, short* dst) __attribute__((unused));

DCT_HALF*
dct_half_new(int N)
{
	DCT_HALF* dct;
	int opt, k, n;
	double pi, ck;

	/* lŎw肳ĂAu₷DCTvI܂B */
	opt = N < 0;
	N = abs(N);

	/* DCT_HALF\̂̃mۂ܂B */
	dct = (DCT_HALF*)calloc(1, sizeof(DCT_HALF));
	if(!dct) DIE();

	/* i[܂B */
	dct->N = N;

	/* GAXZ[tpobt@mۂ܂B */
	dct->v = (short*)calloc(N, sizeof(short));
	if(!dct->v) DIE();

	/* ϊs̃mۂ܂B */
	dct->T[0] = (short*)calloc(N * N, sizeof(short));
	if(!dct->T[0]) DIE();
	dct->T[1] = (short*)calloc(N * N, sizeof(short));
	if(!dct->T[1]) DIE();

	/* ϊs܂B */
	pi = acos(-1);
	for(k = 0; k < N; k++) {
		if(!opt) {
			/* DCTBϊs͐KsBFDCŤW͊ecos̐ÚN{(k=0)܂́(N/2){(k<>0)łB */
			ck = k == 0 ? sqrt(1.0 / N)	/* k = 0        */
			            : sqrt(2.0 / N);	/* k = 1`(N-1) */
			for(n = 0; n < N; n++) {
				dct->T[0][k * N + n] = (short)(ck * cos(((2 * n + 1) * k * pi) / (2 * N)) * DCT_HALF_SCALE); /* FDCT(DCT-II ) */
				dct->T[1][n * N + k] = (short)(ck * cos(((2 * n + 1) * k * pi) / (2 * N)) * DCT_HALF_SCALE); /* IDCT(DCT-III) */
			}
		} else {
			/* ₷DCTBϊs͐KĂȂBFDCŤW͊ecos̐Û(1{)ƂȂ܂B */
			ck = k == 0 ? 1.0 / N		/* k = 0        */
			            : 2.0 / N;		/* k = 1`(N-1) */
			for(n = 0; n < N; n++) {
				dct->T[0][k * N + n] = (short)(ck * cos(((2 * n + 1) * k * pi) / (2 * N)) * DCT_HALF_SCALE); /* FDCT */
				dct->T[1][n * N + k] = (short)(     cos(((2 * n + 1) * k * pi) / (2 * N)) * DCT_HALF_SCALE); /* IDCT */
			}
		}
	}

	return dct;
}

void
dct_half_free(DCT_HALF* dct)
{
	/* GAXZ[tpobt@J܂B */
	free(dct->v);

	/* ϊs̃J܂B */
	free(dct->T[0]);
	free(dct->T[1]);

	/* DCT\̂̃J܂B */
	free(dct);
}

/* Forward Discrete Cosine Transform */
void
fdct_half(DCT_HALF* dct, const short* src, short* dst)
{
	if(src == dst) src = (const short*)memcpy(dct->v, src, sizeof(short) * dct->N);
	_dct_half(dct->N, dct->T[0/*FDCT*/], src, dst);
}

/* Inverse Discrete Cosine Transform */
void
idct_half(DCT_HALF* dct, const short* src, short* dst)
{
	if(src == dst) src = (const short*)memcpy(dct->v, src, sizeof(short) * dct->N);
	_dct_half(dct->N, dct->T[1/*IDCT*/], src, dst);
}

static void
_dct_half(int N, const short* T, const short* src, short* dst)
{
#if 0
//	/* ᑬ */
//	int k, n;
//	int tmp;
//	const short* v;
//	for(k = 0; k < N; k++) {
//		v = src;
//		tmp = 0;
//		for(n = 0; n < N; n++) {
//			tmp += *T++ * *v++;
//		}
//		*dst++ = tmp >> DCT_HALF_SHIFT;
//	}
#else
	/*  */
	asm("
	; !!
	; mac߂͎I%ahr:%alrZbg܂!!B
	; macߎsOɖI0[hKv܂B
	;
	ld.w %alr, %r8		; macp%alrZbg
	ld.w %r7, %r12		; %r7 <= N ([vJE^)
	ld.w %r6, %r13		; %r6 <= T
_dct_half_L10:
	;
	ld.w %r5, %r14		; %r5 <= src
	ld.w %r4, %r12		; %r4 <= N
	mac %r4			; ݍ݉Z
	ld.w %r4, %alr
	xsra %r4, 8		; Œ菬킹
	ld.h [%r15]+, %r4	; ʊi[
	;
	xsub %r7, %r7, 1	; [v
	xjrne.d _dct_half_L10
	ld.w %alr, %r8		; (delay) ڈȍ~macp%alrZbg
	;
	ret
	");
#endif
}

