/*
 *	clipncs.c
 *
 *	Ȑ
 *
 *	CLiP - Common Library for P/ECE
 *	Copyright (C) 2001-2009 Naoyuki Sawa
 *
 *	* Tue Sep 22 12:42:29 JST 2009 Naoyuki Sawa
 *	- VK쐬B
 *	* Wed Sep 23 12:48:12 JST 2009 Naoyuki Sawa
 *	- NCS_interpolate()́AԂT܂B
 *	  AZuÓAASYł̍łB
 *	* Wed Sep 23 15:38:37 JST 2009 Naoyuki Sawa
 *	- NCS_interpolate()AZuA܂B
 *	* Sat Sep 26 16:40:28 JST 2009 Naoyuki Sawa
 *	- ferguson_coons_interpolate()ACRS_interpolate()ǉ܂B
 *	* Sat Sep 26 23:15:21 JST 2009 Naoyuki Sawa
 *	- ferguson_coons_interpolate()AZuA܂B
 */
#include "clip.h"

#ifndef PIECE
#define NOASM	// ̃V|`ƁACłgp܂B */
#endif /*PIECE*/

/****************************************************************************
 *	R3XvC(NCS : Natural cubic spline)
 ****************************************************************************/

NCS*
NCS_new(const float* t, int t_stride, const float* p, int p_stride, int n)
{
#define T(i) (*(const float*)((const char*)t + (t_stride * (i))))
#define P(i) (*(const float*)((const char*)p + (p_stride * (i))))

	NCS* self;
	int n_seg;
	float* u/*[n_seg+1]*/; /* u[0]=u[n_seg]=0 */
	int i;

	/* ȂƂ̐ړ_͕Kv */
	if(n <= 0) {
		DIE();
	}

	/* ړ_1ł͕ԂłȂ̂ŁAW2̕ԂƌȂ */
	if(n == 1) {
		static const float _t[2] = { 0.0f, 1.0f };
		t = _t;
		t_stride = sizeof(float);
		p_stride = 0; /* W*pQƂ邱Ƃɂ */
		n = 2;
	}

	/* Ԑ = ړ_ - 1 */
	n_seg = n - 1;

	/* \̂̃m */
	self = calloc(sizeof(NCS) + sizeof(NCS_SEG) * n_seg, 1);
	if(!self) {
		DIE();
	}

	/* Ԑi[ */
	self->n_seg = n_seg;

	/* Ả̃m */
	u = calloc(sizeof(float) * (n_seg + 1), 1); /* u[0]=u[n_seg]=0 */
	if(!u) {
		DIE();
	}
	if(n_seg >= 2) {
		/* A */
		float* m/*[n_seg-1][n_seg-1]*/;
		float* v/*[n_seg-1]*/;
		m = calloc(sizeof(float) * (n_seg - 1) * (n_seg - 1), 1);
		if(!m) {
			DIE();
		}
		v = calloc(sizeof(float) * (n_seg - 1), 1);
		if(!v) {
			DIE();
		}
		for(i = 0; i < n_seg - 1; i++) {
			float h0 = T(i + 1) - T(i    );
			float h1 = T(i + 2) - T(i + 1);
			m[i * (n_seg - 1) + i] = (h0 + h1) * 2.0f;
			if(i < n_seg - 2) {
				m[i * (n_seg - 1) + (i + 1)] = h1;
				m[(i + 1) * (n_seg - 1) + i] = h1;
			}
			v[i] = (((P(i + 2) - P(i + 1)) / h1) - ((P(i + 1) - P(i)) / h0)) * 6.0f;
		}
		solve_simultaneous_equations(m, v, u + 1, n_seg - 1); /* u[0]=u[n_seg]=0 */
		free(m);
		free(v);
	}

	/* eԂ̌Wi[ */
	for(i = 0; i < n_seg; i++) {
		self->seg[i].t = T(i);
		self->seg[i].a = (u[i + 1] - u[i] ) / (T(i + 1) - T(i)) / 6.0f;
		self->seg[i].b = u[i] / 2.0f;
		self->seg[i].c = ((P(i + 1) - P(i)) / (T(i + 1) - T(i))) -
		                 ((T(i + 1) - T(i)) * (u[i + 1] + u[i] * 2.0f) / 6.0f);
		self->seg[i].d = P(i);
	}

	/* Ả̃J */
	free(u);

	return self;

#undef T
#undef P
}

/*--------------------------------------------------------------------------*/

void
NCS_free(NCS* self)
{
	free(self);
}

/*--------------------------------------------------------------------------*/

#ifdef NOASM
float
NCS_interpolate(NCS* self, float t)
{
//{{2009/09/23:̂ߕύX
//	NCS_SEG* seg = self->seg;
//	NCS_SEG* end = self->seg + (self->n_seg - 1);
//
//	/* ̎܂ދԂ擾܂B
//	 * ŏ̋ԂOȂ΁Aŏ̋ԂɊ܂܂̂Ƃ܂B
//	 * Ō̋ԂȂ΁AŌ̋ԂɊ܂܂̂Ƃ܂B
//	 */
//	while((seg < end) && ((seg + 1)->t <= t)) {
//		seg++;
//	}
//* Wed Sep 23 12:48:12 JST 2009 Naoyuki Sawa
//- ԂT܂Bʂ́AςĂ܂B
//- ܂ł́AsegɌ݂̋Ԃ̃|C^ێA̋ԂtƔrĂ܂A
//  sseg̋Ԃ֐i߁AŌ1߂Č݂̋ԂƂ悤ɕύX܂B
//  ̕ύXɔAend͍Ō̋ԂłȂAŌ̋+1ێ悤ɂȂ܂B
	NCS_SEG* seg = self->seg;
	NCS_SEG* end = self->seg + self->n_seg;

	/* ̎܂ދԂ擾܂B
	 * ŏ̋ԂOȂ΁Aŏ̋ԂɊ܂܂̂Ƃ܂B
	 * Ō̋ԂȂ΁AŌ̋ԂɊ܂܂̂Ƃ܂B
	 */
	do {
		seg++;
	} while((seg != end) && (seg->t <= t));
	seg--;
//}}2009/09/23:̂ߕύX

	/* A̋Ԃ̊Jn̑΂Ƃ܂B */
	t -= seg->t;

	/* ̋Ԃ̌WgāANCSԂs܂B */
	return ((seg->a * t + seg->b) * t + seg->c) * t + seg->d;
}
#else /*NOASM*/
asm("
		.code
		.align		1
		.global		NCS_interpolate
NCS_interpolate:
		ld.w		%r4, %r13		;// %r4  := t
		ld.w		%r5, [%r12]+		;// %r5  := n_seg, %r12 := seg
		ld.w		%r6, %r12		;// %r6  := seg
		ld.w		%r9, 20			;// %r9  := sizeof(NCS_SEG)
		mlt.h		%r5, %r9		;// %alr := sizeof(NCS_SEG) * n_seg
		ld.w		%r5, %alr		;// %r5  := sizeof(NCS_SEG) * n_seg
		add		%r5, %r6		;// %r5  := end = seg + n_seg
		;//
NCS_interpolate_L1:					;// for(;;) {
		add		%r6, 20			;//   %r6  := seg++
		cmp		%r6, %r5		;//   if(seg == end)
		jreq		NCS_interpolate_L2	;//     break
		ld.w		%r12, [%r6]		;//   %r12 := seg->t
		xcall.d		__fcmps			;//   if(seg->t > t)
		ld.w		%r13, %r4		;//   %r13 := t			*delay*
		jrle		NCS_interpolate_L1	;//     break
NCS_interpolate_L2:					;// }
		sub		%r6, 20			;// seg--
		ld.w		%r13, [%r6]+		;// %r13 :=      seg->t
		xcall.d		__subsf3		;// %r10 := t -= seg->t
		ld.w		%r12, %r4		;// %r12 := t			*delay*
		ld.w		%r4, %r10		;// %r4  := t
		;//-------------------------------------;//
		ld.w		%r12, [%r6]+		;// %r12 :=     seg->a
		xcall.d		__mulsf3		;// %r10 := p = seg->a * t
		ld.w		%r13, %r4		;// %r13 :=              t	*delay*
		;//
		ld.w		%r13, [%r6]+		;// %r13 :=      seg->b
		xcall.d		__addsf3		;// %r10 := p += seg->b
		ld.w		%r12, %r10		;// %r12 := p			*delay*
		;//
		ld.w		%r12, %r10		;// %r12 := p
		xcall.d		__mulsf3		;// %r10 := p *= t
		ld.w		%r13, %r4		;// %r13 :=      t		*delay*
		;//
		ld.w		%r13, [%r6]+		;// %r13 :=      seg->c
		xcall.d		__addsf3		;// %r10 := p += seg->c
		ld.w		%r12, %r10		;// %r12 := p			*delay*
		;//
		ld.w		%r12, %r10		;// %r12 := p
		xcall.d		__mulsf3		;// %r10 := p *= t
		ld.w		%r13, %r4		;// %r13 :=      t		*delay*
		;//
		ld.w		%r13, [%r6]		;// %r13 :=      seg->d
		xjp.d		__addsf3		;// %r10 := p += seg->d
		ld.w		%r12, %r10		;// %r12 := p			*delay*
");
#endif /*NOASM*/

/****************************************************************************
 *	Ferguson/Coons(t@[K\/N[Y)Ȑ
 ****************************************************************************/

#ifdef NOASM
float
ferguson_coons_interpolate(float p0, float p1, float v0, float v1, float t)
{
	/* ȉ̃R[ȟvŹAFerguson/CoonsȐԂ̎:
	 *   (2p0-2p1+v0+v1)t^3 + (-3p0+3p1-2v0-v1)t^2 + v0t + p0
	 * ǂvZł悤ό`̂ŁAʂ͓łB
	 */
	float a = p0 - p1;    /* (p0 - p1)                     */
	float b = a + v0;     /* (p0 - p1)     +  v0           */
	float c = a + b + v1; /* (p0 - p1) * 2 + (v0     + v1) */
	/*               b + c = (p0 - p1) * 3 + (v0 * 2 + v1) */
	return ((c * t - b - c) * t + v0) * t + p0;
}
#else /*NOASM*/
asm("
		.code
		.align		1
		.global		ferguson_coons_interpolate
ferguson_coons_interpolate:
		;// %r12    := p0
		;// %r13    := p1
		;// %r14    := v0
		;// %r15    := v1
		;// [%sp+4] := t
		ld.w		%r4, %r12		;// %r4  := p0
		ld.w		%r5, %r14		;// %r5  := v0
		xcall.d		__subsf3		;// %r10 := a = p0 - p1
		ld.w		%r6, %r15		;// %r6  := v1					*delay*
		;// %r4     := p0
		;// %r5     := v0
		;// %r6     := v1
		;// %r10    := a
		;// [%sp+4] := t
		ld.w		%r12, %r10		;// %r12 :=     a
		ld.w		%r13, %r5		;// %r13 :=         v0
		xcall.d		__addsf3		;// %r10 := b = a + v0
		ld.w		%r7, %r10		;// %r7  := a					*delay*
		;// %r4     := p0
		;// %r5     := v0
		;// %r6     := v1
		;// %r7     := a
		;// %r10    := b
		;// [%sp+4] := t
		ld.w		%r12, %r7		;// %r12 := a
		ld.w		%r13, %r10		;// %r13 :=     b
		xcall.d		__addsf3		;// %r10 := a + b
		ld.w		%r7, %r10		;// %r7  := b					*delay*
		;// %r4     := p0
		;// %r5     := v0
		;// %r6     := v1
		;// %r7     := b
		;// %r10    := a + b
		;// [%sp+4] := t
		ld.w		%r12, %r10		;// %r12 :=     a + b
		xcall.d		__addsf3		;// %r10 := c = a + b + v1
		ld.w		%r13, %r6		;// %r13 :=             v1			*delay*
		;// %r4     := p0
		;// %r5     := v0
		;// %r7     := b
		;// %r10    := c
		;// [%sp+4] := t
		ld.w		%r12, %r10		;// %r12 := c
		xld.w		%r13, [%sp+4]		;// %r13 :=     t
		xcall.d		__mulsf3		;// %r10 := c * t
		ld.w		%r6, %r10		;// %r6  := c					*delay*
		;// %r4     := p0
		;// %r5     := v0
		;// %r6     := c
		;// %r7     := b
		;// %r10    := c * t
		;// [%sp+4] := t
		ld.w		%r12, %r10		;// %r12 := c * t
		xcall.d		__subsf3		;// %r10 := c * t - b
		ld.w		%r13, %r7		;// %r13 :=         b				*delay*
		;// %r4     := p0
		;// %r5     := v0
		;// %r6     := c
		;// %r10    := c * t - b
		;// [%sp+4] := t
		ld.w		%r12, %r10		;// %r12 := c * t - b
		xcall.d		__subsf3		;// %r10 := c * t - b - c
		ld.w		%r13, %r6		;// %r13 :=             c			*delay*
		;// %r4     := p0
		;// %r5     := v0
		;// %r10    := c * t - b - c
		;// [%sp+4] := t
		xld.w		%r13, [%sp+4]		;// %r13 :=                   t
		xcall.d		__mulsf3		;// %r10 := (c * t - b - c) * t
		ld.w		%r12, %r10		;// %r12 := (c * t - b - c)			*delay*
		;// %r4     := p0
		;// %r5     := v0
		;// %r10    := (c * t - b - c) * t
		;// [%sp+4] := t
		ld.w		%r12, %r10		;// %r12 := (c * t - b - c) * t
		xcall.d		__addsf3		;// %r10 := (c * t - b - c) * t + v0
		ld.w		%r13, %r5		;// %r13 :=                       v0		*delay*
		;// %r4     := p0
		;// %r10    := (c * t - b - c) * t + v0
		;// [%sp+4] := t
		xld.w		%r13, [%sp+4]		;// %r13 :=                              t
		xcall.d		__mulsf3		;// %r10 := ((c * t - b - c) * t + v0) * t
		ld.w		%r12, %r10		;// %r12 := ((c * t - b - c) * t + v0)		*delay*
		;// %r4     := p0
		;// %r10    := ((c * t - b - c) * t + v0) * t
		ld.w		%r12, %r10		;// %r12 := ((c * t - b - c) * t + v0) * t
		xjp.d		__addsf3		;// %r10 := ((c * t - b - c) * t + v0) * t + p0
		ld.w		%r13, %r4		;// %r13 :=                                  p0	*delay*
");
#endif /*NOASM*/

/****************************************************************************
 *	Lbg}XvC(CRS : Catmull-Rom spline)
 ****************************************************************************/

static float CRS_get_p(const float* p, int p_stride, int n, int i);

float
CRS_interpolate(const float* p, int p_stride, int n, float t)
{
	int i;
	float pN;
	float p0;
	float p1;
	float p2;
	float v0;
	float v1;

	/* Ԃ߂ */
	i = floorf(t);
	t -= i;

	/* ߓ_擾 */
	pN = CRS_get_p(p, p_stride, n, i - 1); /* Ԃ̑O̐ߓ_ */
	p0 = CRS_get_p(p, p_stride, n, i    ); /* Ԃ̊Jnߓ_ */
	p1 = CRS_get_p(p, p_stride, n, i + 1); /* Ԃ̏Iߓ_ */
	p2 = CRS_get_p(p, p_stride, n, i + 2); /* Ԃ̎̐ߓ_ */

	/* ڐxNg߂ */
	v0 = (p1 - pN) * 0.5f; /* Ԃ̊Jnߓ_̐ڐxNg */
	v1 = (p2 - p0) * 0.5f; /* Ԃ̏Iߓ_̐ڐxNg */

	/* Ferguson/CoonsȐԂs */
	return ferguson_coons_interpolate(p0, p1, v0, v1, t);
}

/* CfNXi̐ߓ_擾邩A܂́A܂B
 * [in]
 *	p,p_stride,n	NCS_new()̈gpƓłBNCS_new()̃RgQƂĂB
 *	i		ߓ_̃CfNXB
 *			0`(n-1)͈̔͂Ȃ΁A^ꂽߓ_̂܂ܕԂ܂B
 *			0A(n-1)߂Ȃ΁Aߓ_̈ʒu𐄑܂B
 * [out]
 *	߂l		ߓ_B
 * [note]
 *	* Sat Sep 26 12:13:44 JST 2009 Naoyuki Sawa
 *	- u[_O̐ߓ_v̎Zo@ɂāAȉɐ܂B
 *	- Catmull-RomXvC̒`ł́A[_܂ށA[Ԃɂ͋Ȑ`܂B
 *	  [ԂɋȐ`ɂ́A[_̐ڐxNgKvłA[_1O̐ߓ_KvƂȂ邩łB
 *	- ȂA[Ԃ܂ŋȐ`A֗łB
 *	  ĂCatmull-RomXvC`惋[`̎́A[Ԃ܂ŋȐ`悤ɂȂĂ悤łB
 *	  [_1O̐ߓ_͖Iɗ^ĂȂ̂ŁAKvA̕@͒`Ă܂B
 *	  [_1O̐ߓ_𐄑@́AeƂɈقȂ邽߁A[Ԃ̕`͂܂܂ƂȂ܂B
 *	- ΂Pȕ@́A1̐ߓ_[_ցÄʒuA1O̐ߓ_Ƃ@łB
 *	  ł̗ǂ̂łA1̐ߓ_̕ŁAȐO֏cŁAsRɌ錇_܂B
 *	- ṓA2,1,[_̎O̐ߓ_gāA1O̐ߓ_𐄑@g܂B
 *	  O̐ߓ_AxRɕł΁AK؂Ȉʒu1O̐ߓ_쐬łA[Ԃ̋ȐRɕ`܂B
 *	  ASY̋̓IȓéACRS_get_p()̒̃RgQƂĂB
 *	  q̕@́AD-StormЂWebTCgɂAuf[J[ǔvZ@vQlɂ܂B
 *	  ̎Akeep/f[J[ǔvZ@.7zɕۑ܂B
 *	- ȂAD-StormЂ́uf[J[ǔvZ@vɂ́A1O̐ߓ_߂@܂łLqĂ܂B
 *	  A菇JԂāA2O,3O,...ƁAłRȈʒuɒ[_𑝂₹邱ƂɋCt܂B
 *	  ȉ̃R[hɂ́Ȁgݍł܂B
 */
static float
CRS_get_p(const float* p, int p_stride, int n, int i)
{
#define P(i) (*(const float*)((const char*)p + (p_stride * (i))))

	float a;
	float b;
	float c;
	float d;

	/* ȂƂ̐ߓ_͕Kv */
	if(n <= 0) {
		DIE();
	}

	/* ߓ_1Ȃ΁ACfNXɊ֌WAB̐ߓ_Ԃ */
	if(n == 1) {
		return P(0);
	}

	/* CfNXO֒߂̏ꍇc */
	if(i < 0) {
		i = -i; /* (CfNXߐ) */
		/* ߓ_2Ȃ΁A([_){(1[_)~(CfNXߐ) ̈ʒuƂ */
		if(n == 2) {
			return P(0) + (P(0) - P(1)) * i;
		}
		/* ߓ_3ȏȂ΁A(2){(1[_)~3(1O) ̈ʒu߂
		 * ̏A(CfNXߐ)AJԂʒuƂ
		 */
		a = P(2); /* (2) */
		b = P(1); /* (1) */
		c = P(0); /* ([_) */
		/*{{ʏ*/
		for(;;) {
			d = a + (c - b) * 3.0f; /* (1O) */
			if(!--i) {
				return d;
			}
			a = b; /* (1)A(2)Ƃ */
			b = c; /* ([_)   A(1)Ƃ */
			c = d; /* (1O)A([_)   Ƃ */
		}
		/*}}ʏ*/
	}

	/* CfNX֒߂̏ꍇc */
	if(i > n - 1) {
		i -= n - 1; /* (CfNXߐ) */
		/* ߓ_2Ȃ΁A([_){(1[_)~(CfNXߐ) ̈ʒuƂ */
		if(n == 2) {
			return P(n - 1) + (P(n - 1) - P(n - 2)) * i;
		}
		/* ߓ_3ȏȂ΁A(2){(1[_)~3(1O) ̈ʒu߂
		 * ̏A(CfNXߐ)AJԂʒuƂ
		 */
		a = P(n - 3); /* (2) */
		b = P(n - 2); /* (1) */
		c = P(n - 1); /* ([_) */
		/*{{ʏ*/
		for(;;) {
			d = a + (c - b) * 3.0f; /* (1O) */
			if(!--i) {
				return d;
			}
			a = b; /* (1)A(2)Ƃ */
			b = c; /* ([_)   A(1)Ƃ */
			c = d; /* (1O)A([_)   Ƃ */
		}
		/*}}ʏ*/
	}

	/* CfNXL͈͂̏ꍇ */
	return P(i);

#undef P
}

