/*	
 *	clipwell.c
 *
 *	ASYWELL
 *
 *	CLiP - Common Library for P/ECE
 *	Copyright (C) 2001-2012 Naoyuki Sawa
 *
 *	* Thu Dec 06 17:08:54 JST 2012 Naoyuki Sawa
 *	- clipmisc.c 番܂B
 */
#include "clip.h"

/****************************************************************************
 *	ASYWELL
 ****************************************************************************/

/* - WELL̕Wł́AMT̕WƈāAWIȏ@(=xNg̍쐬@)񎦂ĂȂ悤łB
 *   ƂAstate[0...15]ɏȂƂ1ȏA1bit܂܂Ă΁Av݂łB
 *   WELĹAxNg0bitԂłA߂ɒEoł̂łB
 * - Ƃ͌AxNg0bit߂ƁAŏ̗̐l0ɂȂĂ܂̂ŁAx_ȕ]܂łB
 *   ŁAPȗ(rnd32֐=Xorshift@)gāAxNg쐬邱Ƃɂ܂B
 *   ̃vbgtH[Ɏčsꍇ́AW֐srandErandgāAxNg쐬悤ɕύXĂ\܂B
 */
void WELL_init(WELL* self, unsigned s) {
	unsigned* p = self->state;
	int i = 16;
	do {
		*p++ = rnd32(&s);
	} while(--i);
}

/*--------------------------------------------------------------------------*/

/* - 2^512-1WELL̕W"WELL512a.c"ɁAAZuՂ悤ɁȀlĎ܂B
 *   ّw Hw dqfUCw VyN ̑Ƙ_u[WELL̃n[hEFAƕ]v(2008N220)
 *   (http://www.hpc.se.ritsumei.ac.jp/papers/b08/arai.pdf)QlɂĒ܂B
 */
#ifndef PIECE
unsigned WELL_next(WELL* self) {
	unsigned* p = self->state;	/* %r12 */
	unsigned tmp1;	/* %r11 */
	unsigned tmp2;	/* %r13 */
	unsigned i;	/* %r14 */
	//
	unsigned V0;	/* %r13 */
	unsigned VM1;	/* %r14 */
	unsigned VM2;	/* %r15 */
	unsigned z0;	/* %r4 */
	unsigned z1;	/* %r5 */
	unsigned z2;	/* %r6 */
	unsigned newV0;	/* %r10 */
	unsigned newV1;	/* %r11 */

	V0  = p[ 0];
	VM1 = p[13];
	VM2 = p[ 9];

	z0  = p[15];
	z1  = V0;		/* ȍ~ V0  ͔j󂵂ėǂ */
	z1 ^= V0  << 16;	/* ȍ~ V0  ͕sv */
	z1 ^= VM1;		/* ȍ~ VM1 ͔j󂵂ėǂ */
	z1 ^= VM1 << 15;	/* ȍ~ VM1 ͕sv */
	z2  = VM2;		/* ȍ~ VM2 ͔j󂵂ėǂ */
	z2 ^= VM2 >> 11;	/* ȍ~ VM2 ͕sv */

	newV0  = z0;		/* ȍ~ z0  ͔j󂵂ėǂ */
	newV1  = z1;		/* ȍ~ z1  ͔j󂵂ėǂ */
	newV0 ^= z2;
	newV1 ^= z2;		/* ȍ~ z2  ͔j󂵂ėǂ */
	newV0 ^= z0 <<  2;	/* ȍ~ z0  ͕sv */
	newV0 ^= z1 << 18;	/* ȍ~ z1  ͕sv */
	newV0 ^= z2 << 28;	/* ȍ~ z2  ͕sv */
	newV0 ^= (newV1 << 5) & 0xDA442D24;	/* ŉʃjúu4v͖ӖƎvAWȂĂ̂ŏ] */

	*p++ = newV0;
	tmp1 = newV1;
	i = 15;
	do {
		tmp2 = *p;
		*p++ = tmp1;
		tmp1 = tmp2;
	} while(--i);
	return newV0;
}
#else /*PIECE*/
unsigned WELL_next(WELL* self);
asm("
		.code
		.align		1
		.global		WELL_next
WELL_next:
		;// %r12 := p = self->state
		xld.w		%r13, [%r12+ 0]		;// %r13 := V0  = p[ 0]
		xld.w		%r14, [%r12+52]		;// %r14 := VM1 = p[13]
		xld.w		%r15, [%r12+36]		;// %r15 := VM2 = p[ 9]
		;// %r12 := p
		;// %r13 := V0
		;// %r14 := VM1
		;// %r15 := VM2
		xld.w		%r4, [%r12+60]		;// %r4  := z0  = p[15]
		ld.w		%r5, %r13		;// %r5  := z1  = V0
		xsll		%r13, 16		;// %r13 :=       V0  << 16
		xor		%r5, %r13		;// %r5  := z1 ^= V0  << 16
		xor		%r5, %r14		;// %r5  := z1 ^= VM1
		xsll		%r14, 15		;// %r14 :=       VM1 << 15
		xor		%r5, %r14		;// %r5  := z1 ^= VM1 << 15
		ld.w		%r6, %r15		;// %r6  := z2  = VM2
		xsrl		%r15, 11		;// %r15 :=       VM2 >> 11
		xor		%r6, %r15		;// %r6  := z2 ^= VM2 >> 11
		;// %r4  := z0
		;// %r5  := z1
		;// %r6  := z2
		;// %r12 := p
		ld.w		%r10, %r4		;// %r10 := newV0  = z0
		ld.w		%r11, %r5		;// %r11 := newV1  = z1
		xor		%r10, %r6		;// %r10 := newV0 ^= z2
		xor		%r11, %r6		;// %r11 := newV1 ^= z2
		xsll		%r4,  2			;// %r4  :=          z0 <<  2
		xor		%r10, %r4		;// %r10 := newV0 ^= z0 <<  2
		xsll		%r5, 18			;// %r5  :=          z0 << 18
		xor		%r10, %r5		;// %r10 := newV0 ^= z0 << 18
		xsll		%r6, 28			;// %r6  :=          z0 << 28
		xor		%r10, %r6		;// %r10 := newV0 ^= z0 << 28
		ld.w		%r7, %r11		;// %r7  :=           newV0
		xsll		%r7,  5			;// %r7  :=           newV0 << 5
		xand		%r7, %r7, 0xDA442D24	;// %r7  :=          (newV1 << 5) & 0xDA442D24
		xor		%r10, %r7		;// %r10 := newV0 ^= (newV1 << 5) & 0xDA442D24
		;// %r10 := newV0
		;// %r11 := tmp1 = newV1
		;// %r12 := p
		ld.w		[%r12]+, %r10		;// *p++  = newV0
		ld.w		%r14, 15		;// %r14 := i = 15
		 ld.w		%r13, [%r12]		;// %r13 := tmp2 = *p
		 ld.w		[%r12]+, %r11		;// *p++  = tmp1
		 sub		%r14, 1			;// %r14 := i--
		 jrne.d		-3
		 ld.w		%r11, %r13		;// %r11 := tmp1 = tmp2		*delay*
		ret
");
#endif /*PIECE*/

