/*	
 *	cliprenf.c
 *
 *	CLiP - Common Library for P/ECE
 *	Copyright (C) 2009 Naoyuki Sawa
 *
 *	* Sun Aug 10 01:12:48 JST 2008 Naoyuki Sawa
 *	- 1st [XB
 *	- _3D`惂W[łB
 *	  ܂܂RgĂ܂񂪁AASY͂قclipren3.cƓȂ̂ŁAclipren3.cQƂĂB
 *	- ȑOɂA_3D`惂W[gĂ܂A(keep/01_ŉZ`R[hۑ/cliprend.c)A
 *	  x߂ɁAŒ菬_3D`惂W[(clipren3.c)ɒuo܂܂B
 *	  ̂сA_ZƂɂAӂсA_3D`惂W[gp\ƂȂ܂B
 *	  ȑO̕_3D`惂W[̂܂܂ł͂ȂAŒ菬_łœꂽHv荞ŁAĂ܂B
 *	  Œ菬_ł̐\ƊrׂƁA_ɂ܂A悻2/3`3/4x̐\͏oĂƎv܂B
 *	* Fri Aug 15 23:16:48 JST 2008 Naoyuki Sawa
 *	- rendf_wire()ǉ܂B
 *	  `̉render_line()𗘗pĂAꎞIRENDER\̂쐬ƂI[o[wbh܂ł܂A
 *	  _NbsOȂǂAZuœK邱ƂɂāArender_wire()ƑFȂxBł܂B
 *	* Sun Aug 17 22:24:29 JST 2008 Naoyuki Sawa
 *	- vecfvec3fAmatfmat3fAvecxvec3xAmatxmat3xɖOύX܂B
 *	  Suŕϊ̂ŁA{ȑÕRgȂǂׂĖOύXĂ܂B
 *	* Fri Aug 22 00:41:54 JST 2008 Naoyuki Sawa
 *	- rendf_line()ǉ܂Brender_line_3d()̕_łłB
 *	  RENDF3D`pŁA3D炩Ȃ̂ŁAu_3dṽTtBbNX͏Ȃ܂B
 *	* Sun Nov 09 12:46:57 JST 2008 Naoyuki Sawa
 *	- JÔ߂ɁARENDF\̂ɃtB[hǉƁArendf_init()ɏǉ܂B
 *	- rendf_vfc_sphere()Ameshf_bounding_sphere()Awiref_bounding_sphere()ǉ܂B
 *	* Sat Apr 11 05:29:02 JST 2009 Naoyuki Sawa
 *	- rendf_mesh()̒ōsĂ̈ꕔAselect_draw_scanline()ɕ܂B
 *	  ̕ύXŔAclipren3.cW[́ARgQƂĂB
 */
#include "clip.h"

/*****************************************************************************
 *	RENDF
 *****************************************************************************/

/* NbsOp_ */
typedef struct _CLIPVERTEX {
	float x, y, z;		/* + 0,12: _W */
	float s, t;		/* +12, 8: eNX`W */
} CLIPVERTEX;			/* =20 */

/* _def_vbuff[]ɊmۂA`pꎞ[N̍\ */
#define XFORMED_VECTORS_MAX	(sizeof _def_vbuff / sizeof(vec3f))	/* 128~8812=938: WIRE̍ő咸_BMESH̍ő咸_́AMESH.nvunsigned charȂ̂ŁAɍS炸255łB */
typedef struct _RENDF_WORK_VBUFF {
	vec3f xformed_vectors[XFORMED_VECTORS_MAX];			/* 12*938=11256: ϊςݒ_Wobt@ */
} RENDF_WORK_VBUFF;							/* v  =11256: _def_vbuff[128*88=11264]̃TCY𒴂Ȃ悤!! */
#define _rendf_work_vbuff	((RENDF_WORK_VBUFF*)_def_vbuff)
#define _xformed_vectors	(_rendf_work_vbuff->xformed_vectors)

/* FRAM4̈ɊmۂA`pꎞ[N̍\ */
#define N_VERTICES_MAX		8					/* 1|S̍ő咸_B */
typedef struct _RENDF_WORK_FRAM4 {
	CLIPVERTEX clip_vertices[2][N_VERTICES_MAX];			/* 20*2*8= 320: NbsOp_obt@ */
	RENDVERTEX rend_vertices[N_VERTICES_MAX];			/* 10*8  =  80: `p_obt@ */
} RENDF_WORK_FRAM4;							/* v  = 400: FRAM4̈̃TCY(FRAM4_SIZE=512)𒴂Ȃ悤!! */
#define _rendf_work_fram4	((RENDF_WORK_FRAM4*)FRAM4_START)
#define _clip_vertices		(_rendf_work_fram4->clip_vertices)
#define _rend_vertices		(_rendf_work_fram4->rend_vertices)

//#define NOASM	/* ASYmFpCŎgꍇ́ANOASM`ĉB */
/*static*/ const void* xform_vectors(const mat3f* m, const vec3f* v_in/*[nv]*/, vec3f* v_out/*[nv]*/, int nv);
/*static*/ int pick_vertices(CLIPVERTEX* cv, const vec3f* v, const TCOORD* c, const FACE* f);
/*static*/ void inhomogenize(CLIPVERTEX* cv, int nv);
/*static*/ int cull_back(const CLIPVERTEX* cv, int nv);
/*static*/ void init_vertices(RENDF* r, RENDVERTEX* rv, const CLIPVERTEX* cv, int nv);
/*static*/ int clip_z(RENDF* r, CLIPVERTEX* cv0, CLIPVERTEX* cv1, int nsrc);
/*static*/ int clip_xy(RENDF* r, CLIPVERTEX* cv0, CLIPVERTEX* cv1, int nsrc);
/*static*/ int clip_z_sub(RENDF* r, CLIPVERTEX* cv0, CLIPVERTEX* cv1, int nsrc);
/*static*/ int clip_xy_sub(RENDF* r, CLIPVERTEX* cv0, CLIPVERTEX* cv1, int nsrc);
//{{2008/08/09:_Z덷ɂNbsOoOopB蓮mFł\łB
/*static*/ void detect_error(const RENDVERTEX* rv, int nv, int w, int h);
//}}2008/08/09:_Z덷ɂNbsOoOopB蓮mFł\łB

/* XLC`֐̑I */
static DSLPROC select_draw_scanline(RENDF* r, DSLPARAM* param);	/*{{2009/04/11:ǉ}}*/

/*****************************************************************************
 *	
 *****************************************************************************/

void
rendf_init(RENDF* r, SURFACE* surface, int x, int y, int width, int height, float front, float back, float fovy)
{
	int left;
	int right;
	int top;
	int bottom;

	memset(r, 0, sizeof(RENDF));

	r->surface = surface;
	r->front   = front;
	r->back    = back;

	if(!width ) width  = surface->w;
	if(!height) height = surface->h;

	left   = x;
	right  = x + width;
	top    = y;
	bottom = y + height;

	if(left   <          0) left   =          0;
	if(right  > surface->w) right  = surface->w;
	if(top    <          0) top    =          0;
	if(bottom > surface->h) bottom = surface->h;

	r->left   = left;
	r->right  = right;
	r->top    = top;
	r->bottom = bottom;

	r->matrix = mat3f_1;
	mat3f_viewportz(&r->matrix, x, y, width, height);	// (r[|[gϊ)
	//{{2008/11/09:폜
	//mat3f_perspect(&r->matrix, fovy, (float)width / (float)height, front);
	// * Sun Nov 09 05:06:33 JST 2008 Naoyuki Sawa
	// - JȌ̒ŁAmat3f_perspect()̓ɑvZŝŁA
	//   mat3f_perspect()g킸ɁAځAmat3f_frustum()g悤ɕύX܂B
	//}}2008/11/09:폜

	//{{2008/11/09:ǉ
	/*{{View Frustum Culling*/
	r->vfc_height_2 = front * tanf(fovy / 2.0f);	// ONbsOʂ̍̔
	r->vfc_width_2 = r->vfc_height_2 * ((float)width / (float)height);	// ONbsOʂ̔̕
	mat3f_frustum(&r->matrix, r->vfc_width_2 * 2.0f, r->vfc_height_2 * 2.0f, front);	// (r[|[gϊ~ϊ)
	r->vfc_matrix = r->matrix;
	mat3f_inv(&r->vfc_matrix);	// (r[|[gϊ~ϊ)̋ts
	/*}}View Frustum Culling*/
	//}}2008/11/09:ǉ
}

/*---------------------------------------------------------------------------*/

void
rendf_save(RENDF* r)
{
	if(r->save_cnt >= RENDF_SAVE_MAX) {
		DIE();
	}
	memcpy(&r->save[r->save_cnt++], r, sizeof(RENDF_SAVE));
}

/*---------------------------------------------------------------------------*/

void
rendf_restore(RENDF* r)
{
	if(r->save_cnt < 0) {
		DIE();
	}
	memcpy(r, &r->save[--r->save_cnt], sizeof(RENDF_SAVE));
}

/*****************************************************************************
 *	
 *****************************************************************************/

void
rendf_mesh(RENDF* r, const MESH* mesh)
{
	int n_vectors;
	int n_tcoords;
	int n_faces;
	int n_vertices;
	const vec3f* vectors;
	const TCOORD* tcoords;
	const FACE *face;

//{{2009/04/11:ύX
//	DSLPARAM param;
//	DSLPROC draw_scanline;
//	TEXTURE* texture = r->texture;
//	SURFACE* surface = r->surface;
//	param.surface_w = surface->w;
//	param.surface_h = surface->h;
//	param.surface_vbuff = surface->vbuff;
//	param.surface_zbuff = surface->zbuff;
//	if(!texture) {
//		draw_scanline = draw_scanline_wz_color;
//	} else {
//		param.texture_w = texture->header.w;
//		param.texture_h = texture->header.h;
//		param.texture_buf = texture->buf;
//		switch(texture->header.bpp) {
//		case 1:
//			if(!texture->header.mask) {
//				draw_scanline = draw_scanline_wz_1bit;
//			} else {
//				param.texture_mask = texture->mask;
//				draw_scanline = draw_scanline_wz_1bitmask;
//			}
//			break;
//		default/*2*/:
//			if(!texture->header.mask) {
//				draw_scanline = draw_scanline_wz_2bit;
//			} else {
//				param.texture_mask = texture->mask;
//				draw_scanline = draw_scanline_wz_2bitmask;
//			}
//			break;
//		}
//	}
//2009/04/11:XLC`֐̑IAselect_draw_scanline()ɕ܂B
	DSLPARAM param;
	DSLPROC draw_scanline = select_draw_scanline(r, &param);
//}}2009/04/11:ύX

	n_vectors = mesh->nv;
	n_tcoords = mesh->nc;
	n_faces = mesh->nf;
	vectors = (const vec3f*)(mesh + 1);
	tcoords = xform_vectors(&r->matrix, vectors, _xformed_vectors, n_vectors);
	face = (const FACE*)(tcoords + n_tcoords);

	do {
		n_vertices = pick_vertices(_clip_vertices[0], _xformed_vectors, tcoords, face);
		if(n_vertices < 3) goto L_SKIP;

		n_vertices = clip_z(r, _clip_vertices[0], _clip_vertices[1], n_vertices);
		if(n_vertices < 3) goto L_SKIP;

		inhomogenize(_clip_vertices[0], n_vertices); /* 񓯎W */
		if(cull_back(_clip_vertices[0], n_vertices)) goto L_SKIP; /* obNtF[XJO */

		n_vertices = clip_xy(r, _clip_vertices[0], _clip_vertices[1], n_vertices);
		if(n_vertices < 3) goto L_SKIP;

		init_vertices(r, _rend_vertices, _clip_vertices[0], n_vertices);

		//{{2008/08/09:_Z덷ɂNbsOoOopB蓮mFł\łB
		detect_error(_rend_vertices, n_vertices, param.surface_w, param.surface_h);
		//}}2008/08/09:_Z덷ɂNbsOoOopB蓮mFł\łB

		param.fore_color = face->c      & 15;
		param.back_color = face->c >> 4 & 15;
		draw_primitive(_rend_vertices, n_vertices, &param, draw_scanline);
L_SKIP:
		face = (FACE*)((FACEVERTEX*)(face + 1) + face->nfv);
	} while(--n_faces);
}

/*---------------------------------------------------------------------------*/

#ifdef NOASM
static const void* xform_vectors(const mat3f* m, const vec3f* v_in/*[nv]*/, vec3f* v_out/*[nv]*/, int nv) {
	do {
		*v_out = *v_in++;
		vec3f_xform(m, v_out++);
	} while(--nv);
	return v_in;
}
#else /*NOASM*/
asm("
		.code
		.align		1
xform_vectors:
		pushn		%r3
		ld.w		%r0, %r12			;// %r0  := m
		ld.w		%r1, %r13			;// %r1  := v_in
		ld.w		%r2, %r14			;// %r2  := v_out
		ld.w		%r3, %r15			;// %r3  := nv
xform_vectors_LOOP:
		ld.w		%r4, [%r1]+			;// %r4  := v_in->x
		ld.w		%r5, [%r1]+			;// %r5  := v_in->y
		ld.w		%r6, [%r1]+			;// %r6  := v_in->z, %r1  := v_in++
		ld.w		%r13, %r2			;// %r13 := v_out
		ld.w		[%r2]+, %r4			;// v_out->x store
		ld.w		[%r2]+, %r5			;// v_out->y store
		ld.w		[%r2]+, %r6			;// v_out->z store, %r2  := v_out++
		xcall.d		vec3f_xform			;// vec3f_xform(m, v_out)
		ld.w		%r12, %r0			;// %r12 := m					*delay*
		sub		%r3, 1				;// %r3  := nv--
		jrne		xform_vectors_LOOP
		ld.w		%r10, %r1			;// %r10 := v_in
		popn		%r3
		ret
");
#endif /*NOASM*/

/*---------------------------------------------------------------------------*/

#ifdef NOASM
static int pick_vertices(CLIPVERTEX* cv, const vec3f* v, const TCOORD* c, const FACE* f) {
	int i;
	int iv;
	int ic;
	int nfv;
	FACEVERTEX* fv;
	const vec3f* pv;
	const TCOORD* pc;

	nfv = f->nfv;
	if(nfv > N_VERTICES_MAX) {
		nfv = N_VERTICES_MAX;
	}
	fv = (FACEVERTEX*)(f + 1);

	i = nfv;
	do {
		iv = fv->iv;
		ic = fv->ic;
		pv = &v[iv];
		pc = &c[ic];
		cv->x = pv->x;
		cv->y = pv->y;
		cv->z = pv->z;
		cv->s = pc->s + 0.5f; /* eNZ̒S|Cg */
		cv->t = pc->t + 0.5f; /* eNZ̒S|Cg */
		fv++;
		cv++;
	} while(--i);

	return nfv;
}
#else /*REND_NOASM*/
asm("
		.code
		.align	1
pick_vertices:
		pushn		%r3
		;//
		ld.w		%r0, %r12			;// %r0  := cv
		ld.w		%r1, %r13			;// %r1  := v
		ld.w		%r2, %r14			;// %r2  := c
		ld.w		%r3, %r15			;// %r3  := f
		;//
		ld.ub		%r4, [%r3]			;// %r4  := nfv = f->nfv
		xld.w		%r5, 0x3f000000			;// %r5  := 0.5f				*anti-interlock*
		cmp		%r4, 8				;// if(nfv > N_VERTICES_MAX)
		jrle.d		3
		 add		%r3, 2				;// %r3  := fv  = f->fv				*delay*
		 ld.w		%r4, 8				;//   %r4  := nfv = N_VERTICES_MAX
		ld.w		%r6, %r4			;// %r6  := i = nfv
		;//
pick_vertices_LOOP:
		ld.ub		%r12, [%r3]+			;// %r12 := iv = fv->iv
		ld.w		%r13, 12			;// %r13 :=      sizeof(vec3f)
		mlt.h		%r12, %r13			;// %alr := iv * sizeof(vec3f)
		ld.w		%r12, %alr			;// %r12 := iv * sizeof(vec3f)
		add		%r12, %r1			;// %r12 := pv = &v[iv]
		;//
		ld.w		%r13, [%r12]+			;// %r13 := x = pv->x
		ld.w		%r14, [%r12]+			;// %r14 := y = pv->y
		ld.w		%r15, [%r12]			;// %r15 := z = pv->z
		ld.w		[%r0]+, %r13			;// cv->x = x
		ld.w		[%r0]+, %r14			;// cv->y = y
		ld.w		[%r0]+, %r15			;// cv->z = z
		;//
		ld.ub		%r7, [%r3]+			;// %r7  := ic = fv->ic
		sla		%r7, 1				;// %alr := ic * sizeof(TCOORD)
		add		%r7, %r2			;// %r7  := pc = &c[ic]
		;//
		ld.ub		%r12, [%r7]+			;// %r12 :=        s = pc->s
		xcall		__floatsisf			;// %r10 := (float)s
		ld.w		%r12, %r10			;// %r12 := (float)s
		xcall.d		__addsf3			;// %r10 := (float)s + 0.5f
		ld.w		%r13, %r5			;// %r13 :=            0.5f			*delay*
		ld.w		[%r0]+, %r10			;// cv->s = (float)s + 0.5f
		;//
		ld.ub		%r12, [%r7]			;// %r12 :=        t = pc->t
		xcall		__floatsisf			;// %r10 := (float)t
		ld.w		%r12, %r10			;// %r12 := (float)t
		xcall.d		__addsf3			;// %r10 := (float)t + 0.5f
		ld.w		%r13, %r5			;// %r13 :=            0.5f			*delay*
		ld.w		[%r0]+, %r10			;// cv->t = (float)t + 0.5f
		;//
		sub		%r6, 1				;// %r6  := i--
		jrne		pick_vertices_LOOP
		;//
		popn		%r3
		ret.d
		ld.w		%r10, %r4			;// return nfv					*delay*
");
#endif /*REND_NOASM*/

/*---------------------------------------------------------------------------*/

#ifdef NOASM
static void inhomogenize(CLIPVERTEX* cv, int nv) { /* 񓯎W */
	do {
		cv->x /= cv->z;
		cv->y /= cv->z;
		cv++;
	} while(--nv);
}
#else /*NOASM*/
asm("
		.code
		.align		1
inhomogenize:
		ld.w		%r4, %r12			;// %r4  := cv
		ld.w		%r5, %r13			;// %r5  := nv
inhomogenize_LOOP:
		xld.w		%r6, [%r4+8]			;// %r6  := cv->z
		ld.w		%r12, [%r4]			;// %r12 := cv->x
		xcall.d		__divsf3			;// %r10 := cv->x / cv->z
		ld.w		%r13, %r6			;// %r13 := cv->z				*delay*
		ld.w		[%r4]+, %r10			;// cv->x store
		;//
		ld.w		%r12, [%r4]			;// %r12 := cv->y
		xcall.d		__divsf3			;// %r10 := cv->y / cv->z
		ld.w		%r13, %r6			;// %r13 := cv->z				*delay*
		ld.w		[%r4], %r10			;// cv->y store
		;//
		sub		%r5, 1				;// %r5  := nv--
		jrne.d		inhomogenize_LOOP
		add		%r4, 16				;// %r4  := cv++				*delay*
		ret
");
#endif /*NOASM*/

/*---------------------------------------------------------------------------*/

#ifdef NOASM
static int cull_back(const CLIPVERTEX* cv, int nv) { /* obNtF[XJO */
	float xN0 = cv[0].x - cv[nv - 1].x;
	float yN0 = cv[0].y - cv[nv - 1].y;
	float x01 = cv[1].x - cv[     0].x;
	float y01 = cv[1].y - cv[     0].y;
	float s = (xN0 * y01) - (x01 * yN0);
	return s <= 0.0f;
}
#else /*NOASM*/
asm("
		.code
		.align		1
cull_back:
		;// %r12 := cv
		;// %r13 := nv
		ld.w		%r4, %r12			;// %r4  := &cv[  0]
		ld.w		%r5, 20				;// %r5  := sizeof(CLIPVERTEX)
		mlt.h		%r5, %r13			;// %alr := sizeof(CLIPVERTEX) * nv
		ld.w		%r5, %alr			;// %r5  := sizeof(CLIPVERTEX) * nv
		;// %r4  := &cv[  0]
		;// %r5  := sizeof(CLIPVERTEX) * nv
		ld.w		%r6, [%r4]+			;// %r6  :=  cv[  0].x
		ld.w		%r7, [%r4]			;// %r7  :=  cv[  0].y
		add		%r4, 16				;// %r4  := &cv[  1]
		;// %r4  := &cv[  1]
		;// %r5  := sizeof(CLIPVERTEX) * nv
		;// %r6  :=  cv[  0].x
		;// %r7  :=  cv[  0].y
		ld.w		%r12, [%r4]+			;// %r12 :=  cv[  1].x
		xcall.d		__subsf3			;// %r10 :=  cv[  1].x - cv[  0].x = x01 --+
		ld.w		%r13, %r6			;// %r13 :=              cv[  0].x         |	*delay*
		;// %r4  := &cv[  1].y				                                           |
		;// %r5  := sizeof(CLIPVERTEX) * nv		                                           |
		;// %r6  :=  cv[  0].x				                                           |
		;// %r7  :=  cv[  0].y				                                           |
		;// %r10 := x01					                                           |
		ld.w		%r12, [%r4]			;// %r12 :=  cv[  1].y                     |
		sub		%r4, 44				;// %r4  := &cv[ -1]                       |
		add		%r4, %r5			;// %r4  := &cv[n-1]                       |
		ld.w		%r13, %r7			;// %r13 :=              cv[  0].y         |
		xcall.d		__subsf3			;// %r10 :=  cv[  1].y - cv[  0].y = y01 --|-+
		ld.w		%r5, %r10			;// %r5  :=                          x01 <-+ |	*delay*
		;// %r4  := &cv[n-1]				                                             |
		;// %r5  := x01					                                             |
		;// %r6  :=  cv[  0].x				                                             |
		;// %r7  :=  cv[  0].y				                                             |
		;// %r10 := y01					                                             |
		ld.w		%r12, %r6			;// %r12 :=  cv[  0].x                       |
		ld.w		%r13, [%r4]+			;// %r13 :=              cv[n-1].x           |
		xcall.d		__subsf3			;// %r10 :=  cv[  0].x - cv[n-1].x = xN0 --+ |
		ld.w		%r6, %r10			;// %r6  :=                          y01 <-|-+	*delay*
		;// %r4  := &cv[n-1].y				                                           |
		;// %r5  := x01					                                           |
		;// %r6  := y01					                                           |
		;// %r7  :=  cv[  0].y				                                           |
		;// %r10 := xN0					                                           |
		ld.w		%r12, %r7			;// %r12 :=  cv[  0].y                     |
		ld.w		%r13, [%r4]			;// %r13 :=              cv[n-1].y         |
		xcall.d		__subsf3			;// %r10 :=  cv[  0].y - cv[n-1].y = yN0 --|-+
		ld.w		%r7, %r10			;// %r7  :=                          xN0 <-+ |	*delay*
		;// %r5  := x01					                                             |
		;// %r6  := y01					                                             |
		;// %r7  := xN0					                                             |
		;// %r10 := yN0					                                             |
		ld.w		%r12, %r5			;// %r12 :=  x01                             |
		xcall.d		__mulsf3			;// %r10 := (x01 * yN0) -------------------+ |
		ld.w		%r13, %r10			;// %r13 :=        yN0  <------------------|-+	*delay*
		ld.w		%r4, %r10			;// %r4  := (x01 * yN0) <------------------+
		;// %r4  := (x01 * yN0)				                                           |
		;// %r6  := y01					                                           |
		;// %r7  := xN0					                                           |
		ld.w		%r12, %r6			;// %r12 :=  y01                           |
		xcall.d		__mulsf3			;// %r10 := (y01 * xN0) -------------------|-+
		ld.w		%r13, %r7			;// %r12 :=        xN0                     | |	*delay*
		;// %r4  := (x01 * yN0)				                                           | |
		;// %r10 := (y01 * xN0)				                                           | |
		ld.w		%r12, %r4			;// %r12 := (x01 * yN0) <------------------+ |
		xcall.d		__fcmps				;// %psr := (x01 * yN0) - (y01 * xN0)        |
		ld.w		%r13, %r10			;// %r13 :=               (y01 * xN0) <------+	*delay*
		;// %psr := (x01 * yN0) - (y01 * xN0)
		jrlt.d		3
		 ld.w		%r10, 0				;// if((x01 * yN0) <  (y01 * xN0)) return 0	*delay*
		 ld.w		%r10, 1				;// if((x01 * yN0) >= (y01 * xN0)) return 1
		ret
");
#endif /*NOASM*/

/*---------------------------------------------------------------------------*/

#ifdef NOASM
static void init_vertices(RENDF* r, RENDVERTEX* rv, const CLIPVERTEX* cv, int nv) {
	do {
		rv->x = cv->x;
		rv->y = cv->y;
		rv->z = (((cv->z - r->front) * r->back) / ((r->back - r->front) * cv->z)) * 0xffff; /* 8.8bit */
		rv->s = cv->s * 0x100; /* 8.8bit */
		rv->t = cv->t * 0x100; /* 8.8bit */
		cv++;
		rv++;
	} while(--nv);
}
#else /*NOASM*/
//         (cv->z - r->front) * r->back              cv->z - r->front   r->back * 65535.0f
// rv->z = ---------------------------- * 65535.0f = ---------------- * ------------------
//         (r->back - r->front) * cv->z              cv->z              r->back - r->front
asm("
		.code
		.align		1
init_vertices:
		pushn		%r3
		;//
		xld.w		%r0, __mulsf3			;// %r0  := __mulsf3
		xld.w		%r1, __fixunssfsi		;// %r1  := __fixunssfsi
		xld.w		%r2, 0x43800000			;// %r2  := 256.0f
		;//
		add		%r12, 56			;// %r12 := &r->front
		ld.w		%r3, [%r12]+			;// %r3  :=  r->front
		ld.w		%r4, [%r12]			;// %r4  :=  r->back
		ld.w		%r5, %r13			;// %r5  := rv
		ld.w		%r6, %r14			;// %r6  := cv
		ld.w		%r7, %r15			;// %r7  := nv
		;//
		xld.w		%r13, 0x477fff00		;// %r13 :=             65535.0f
		call.d		%r0				;// %r10 :=  (r->back * 65535.0f)
		ld.w		%r12, %r4			;// %r12 :=   r->back											*delay*
		;//
		ld.w		%r12, %r4			;// %r12 :=                          r->back
		ld.w		%r4, %r10			;// %r4  :=  (r->back * 65535.0f)
		xcall.d		__subsf3			;// %r10 :=                         (r->back - r->front)
		ld.w		%r13, %r3			;// %r13 :=                                    r->front							*delay*
		;//
		ld.w		%r12, %r4			;// %r12 :=  (r->back * 65535.0f)
		xcall.d		__divsf3			;// %r10 := ((r->back * 65535.0f) / (r->back - r->front))
		ld.w		%r13, %r10			;// %r13 :=                         (r->back - r->front)						*delay*
		ld.w		%r4, %r10			;// %r4  := ((r->back * 65535.0f) / (r->back - r->front))
		;//
init_vertices_LOOP:
		ld.w		%r12, [%r6]+			;// %r12 :=      cv->x
		call		%r1				;// %r10 := (int)cv->x
		ld.h		[%r5]+, %r10			;// rv->x = (int)cv->x
		;//
		ld.w		%r12, [%r6]+			;// %r12 :=      cv->y
		call		%r1				;// %r10 := (int)cv->y
		ld.h		[%r5]+, %r10			;// rv->y = (int)cv->y
		;//
		ld.w		%r12, [%r6]			;// %r12 :=         cv->z
		xcall.d		__subsf3			;// %r10 :=        (cv->z - r->front) -----------+
		ld.w		%r13, %r3			;// %r13 :=                 r->front             |							*delay*
		;//						                                                 |
		ld.w		%r13, [%r6]+			;// %r13 :=                             cv->z    |
		xcall.d		__divsf3			;// %r10 :=       ((cv->z - r->front) / cv->z) --|-+
		ld.w		%r12, %r10			;// %r12 :=        (cv->z - r->front) <----------+ |							*delay*
		;//						                                                   |
		ld.w		%r12, %r10			;// %r12 :=       ((cv->z - r->front) / cv->z) <---+
		call.d		%r0				;// %r10 :=      (((cv->z - r->front) / cv->z) * ((r->back * 65535.0f) / (r->back - r->front))) --+
		ld.w		%r13, %r4			;// %r13 :=                                      ((r->back * 65535.0f) / (r->back - r->front))    |	*delay*
		;//						                                                                                                  |
		call.d		%r1				;// %r10 := (int)(((cv->z - r->front) / cv->z) * ((r->back * 65535.0f) / (r->back - r->front)))   |
		ld.w		%r12, %r10			;// %r12 :=      (((cv->z - r->front) / cv->z) * ((r->back * 65535.0f) / (r->back - r->front))) <-+	*delay*
		ld.h		[%r5]+, %r10			;// rv->z = (int)(((cv->z - r->front) / cv->z) * ((r->back * 65535.0f) / (r->back - r->front)))
		;//
		ld.w		%r12, [%r6]+			;// %r12 :=       cv->s
		call.d		%r0				;// %r10 :=      (cv->s * 256.0f) --+
		ld.w		%r13, %r2			;// %r13 :=               256.0f    |									*delay*
		;//						                                    |
		call.d		%r1				;// %r10 := (int)(cv->s * 256.0f)   |
		ld.w		%r12, %r10			;// %r12 :=      (cv->s * 256.0f) <-+									*delay*
		ld.h		[%r5]+, %r10			;// rv->s = (int)(cv->s * 256.0f)
		;//
		ld.w		%r12, [%r6]+			;// %r12 :=       cv->t
		call.d		%r0				;// %r10 :=      (cv->t * 256.0f) --+
		ld.w		%r13, %r2			;// %r13 :=               256.0f    |									*delay*
		;//						                                    |
		call.d		%r1				;// %r10 := (int)(cv->t * 256.0f)   |
		ld.w		%r12, %r10			;// %r12 :=      (cv->t * 256.0f) <-+									*delay*
		ld.h		[%r5]+, %r10			;// rv->t = (int)(cv->t * 256.0f)
		;//
		sub		%r7, 1				;// %r7  := nv--
		jrne		init_vertices_LOOP
		;//
		popn		%r3
		ret
");
#endif /*NOASM*/

/*---------------------------------------------------------------------------*/

#ifdef NOASM
static int clip_z(RENDF* r, CLIPVERTEX* cv0, CLIPVERTEX* cv1, int nsrc) {
	CLIPVERTEX* v = cv0;
	int i = nsrc;
	do {
		if(v->z < r->front) goto L_CLIP;
		if(v->z > r->back ) goto L_CLIP;
		v++;
	} while(--i);
	return nsrc;
L_CLIP:
	return clip_z_sub(r, cv0, cv1, nsrc);
}
#else /*NOASM*/
asm("
		.code
		.align		1
clip_z:
		xsub		%sp, %sp, 16
		xld.w		[%sp+0], %r12			;// [%sp+0]  := r
		xld.w		[%sp+4], %r13			;// [%sp+4]  := cv0
		xld.w		[%sp+8], %r14			;// [%sp+8]  := cv1
		xld.w		[%sp+12], %r15			;// [%sp+12] := nsrc
		;//
		xadd		%r4, %r13, 8			;// %r4  := &v->z = &cv0->z
		ld.w		%r5, %r15			;// %r5  := i = nsrc
		add		%r12, 56			;// %r12 := &r->front
		ld.w		%r6, [%r12]+			;// %r6  :=  r->front
		ld.w		%r7, [%r12]			;// %r7  :=  r->bottom
clip_z_LOOP:
		ld.w		%r12, [%r4]			;// %r12 := v->z
		xcall.d		__fcmps				;//      if(v->z < r->top) go clip
		ld.w		%r13, %r6			;// %r13 :=        r->top			*delay*
		jrlt		clip_z_CLIP
		;//
		ld.w		%r12, [%r4]			;// %r12 := v->z
		xcall.d		__fcmps				;//      if(v->x > r->bottom) go clip
		ld.w		%r13, %r7			;// %r13 :=        r->bottom			*delay*
		jrgt		clip_z_CLIP
		;//
		sub		%r5, 1				;// %r5  := i--
		jrne.d		clip_z_LOOP
		add		%r4, 20				;// %r4  := v++					*delay*
		;//
		xld.w		%r10, [%sp+12]			;// return nsrc
		xadd		%sp, %sp, 16
		ret
		;//
clip_z_CLIP:
		xld.w		%r12, [%sp+0]			;// %r12 := r
		xld.w		%r13, [%sp+4]			;// %r13 := cv0
		xld.w		%r14, [%sp+8]			;// %r14 := cv1
		xld.w		%r15, [%sp+12]			;// %r15 := nsrc
		xjp.d		clip_z_sub
		xadd		%sp, %sp, 16			;//						*delay*
");
#endif /*NOASM*/

/*---------------------------------------------------------------------------*/

#ifdef NOASM
static int clip_xy(RENDF* r, CLIPVERTEX* cv0, CLIPVERTEX* cv1, int nsrc) {
	CLIPVERTEX* v = cv0;
	int i = nsrc;
	do {
		if(v->x < r->left  ) goto L_CLIP;
		if(v->x > r->right ) goto L_CLIP;
		if(v->y < r->top   ) goto L_CLIP;
		if(v->y > r->bottom) goto L_CLIP;
		v++;
	} while(--i);
	return nsrc;
L_CLIP:
	return clip_xy_sub(r, cv0, cv1, nsrc);
}
#else /*NOASM*/
asm("
		.code
		.align		1
clip_xy:
		xsub		%sp, %sp, 16
		xld.w		[%sp+0], %r12			;// [%sp+0]  := r
		xld.w		[%sp+4], %r13			;// [%sp+4]  := cv0
		xld.w		[%sp+8], %r14			;// [%sp+8]  := cv1
		xld.w		[%sp+12], %r15			;// [%sp+12] := nsrc
		;//
		ld.w		%r4, %r13			;// %r4  := v = cv0
		ld.w		%r5, %r15			;// %r5  := i = nsrc
		xadd		%r6, %r12, 64			;// %r6  := &r->left
clip_xy_LOOP:
		ld.w		%r12, [%r4]			;// %r12 := v->x
		ld.w		%r13, [%r6]+			;// %r13 :=        r->left
		xcall		__fcmps				;//      if(v->x < r->left) go clip
		jrlt		clip_xy_CLIP
		;//
		ld.w		%r12, [%r4]+			;// %r12 := v->x
		ld.w		%r13, [%r6]+			;// %r13 :=        r->right
		xcall		__fcmps				;//      if(v->x > r->right) go clip
		jrgt		clip_xy_CLIP
		;//
		ld.w		%r12, [%r4]			;// %r12 := v->y
		ld.w		%r13, [%r6]+			;// %r13 :=        r->top
		xcall		__fcmps				;//      if(v->y < r->top) go clip
		jrlt		clip_xy_CLIP
		;//
		ld.w		%r12, [%r4]			;// %r12 := v->y
		ld.w		%r13, [%r6]			;// %r13 :=        r->bottom
		xcall.d		__fcmps				;//      if(v->y > r->bottom) go clip
		sub		%r6, 12				;// %r6  := &r->left				*delay*
		jrgt		clip_xy_CLIP
		;//
		sub		%r5, 1				;// %r5  := i--
		jrne.d		clip_xy_LOOP
		add		%r4, 16				;// %r4  := v++					*delay*
		;//
		xld.w		%r10, [%sp+12]			;// return nsrc
		xadd		%sp, %sp, 16
		ret
		;//
clip_xy_CLIP:
		xld.w		%r12, [%sp+0]			;// %r12 := r
		xld.w		%r13, [%sp+4]			;// %r13 := cv0
		xld.w		%r14, [%sp+8]			;// %r14 := cv1
		xld.w		%r15, [%sp+12]			;// %r15 := nsrc
		xjp.d		clip_xy_sub
		xadd		%sp, %sp, 16			;//						*delay*
");
#endif /*NOASM*/

/*---------------------------------------------------------------------------*/

#ifdef NOASM
static int clip_z_sub(RENDF* r, CLIPVERTEX* cv0, CLIPVERTEX* cv1, int nsrc) {
#define CLIP(SRC, DST, OP, LIM)					\
	src = SRC;						\
	dst = DST;						\
	ndst = 0;						\
	for(i = 0, v0 = src; i < nsrc; i++, v0++) {		\
		v1 = (i < nsrc - 1) ? (v0 + 1) : src;		\
		if(v0->z OP LIM) {				\
			*dst = *v0;				\
			dst++;					\
			ndst++;					\
			if(ndst == N_VERTICES_MAX) break;	\
			if(v1->z OP LIM) {			\
				continue;			\
			} else {				\
				/* go clip */			\
			}					\
		} else {					\
			if(v1->z OP LIM) {			\
				/* go clip */			\
			} else {				\
				continue;			\
			}					\
		}						\
		/* do clip */					\
		dst->z = LIM;					\
		n0 = LIM - v0->z;				\
		n1 = v1->z - LIM;				\
		n01 = n0 + n1;					\
		n001 = n0 / n01;				\
		dst->x = v0->x + ((v1->x - v0->x) * n001);	\
		dst->y = v0->y + ((v1->y - v0->y) * n001);	\
		dst->s = v0->s + ((v1->s - v0->s) * n001);	\
		dst->t = v0->t + ((v1->t - v0->t) * n001);	\
		dst++;						\
		ndst++;						\
		if(ndst == N_VERTICES_MAX) break;		\
	}							\
	if(ndst < 3) return 0;					\

	CLIPVERTEX* src;
	CLIPVERTEX* dst;
	CLIPVERTEX* v0;
	CLIPVERTEX* v1;
	int i;
	int ndst;
	float n0;
	float n1;
	float n01;
	float n001;

	CLIP(cv0, cv1, >=, r->front); nsrc = ndst;
	CLIP(cv1, cv0, <=, r->back ); return ndst;

#undef CLIP
}
#else /*NOASM*/
asm("
		.code
		.align		1
clip_z_sub:
		pushn		%r3
		xsub		%sp, %sp, 12
		;//
		add		%r12, 56				;// %r12    := &r->front
		ld.w		%r0, [%r12]+				;// %r0     := front
		ld.w		%r1, [%r12]				;// %r1     := back
		xld.w		[%sp+0], %r1				;// [%sp+0] := back
		xld.w		[%sp+4], %r13				;// [%sp+4] := cv0
		xld.w		[%sp+8], %r14				;// [%sp+8] := cv1
		;// %r0   := front
		;//-----------------------------------------------------;
		ld.w		%r1, %r13				;// %r1   := v0 = cv0
		ld.w		%r2, %r14				;// %r2   := dst = cv1
		ld.w		%r3, %r15				;// %r3   := i = nsrc
		ld.w		%r4, 8					;// %r4   := (8-ndst) = 8
clip_z_sub_FRONT_LOOP:
		;// %r0   := front
		;// %r1   := v0
		;// %r2   := dst
		;// %r3   := i
		;// %r4   := (8-ndst)
		ld.w		%r5, %r1				;// %r5   := v1 = (v0 + 1)
;//		add		%r5, 20					;// ------------+
		cmp		%r3, 1					;// if(i == 1)  |
		jrne.d		3					;//             |
		 add		%r5, 20					;// <-----------+						*delay*
		 xld.w		%r5, [%sp+4]				;//   %r5 := v1 = cv0
		;// %r0   := front
		;// %r1   := v0
		;// %r2   := dst
		;// %r3   := i
		;// %r4   := (8-ndst)
		;// %r5   := v1
		xld.w		%r12, [%r1+8]				;// %r12  := v0->z
		xcall.d		__fcmps					;//       if(v0->z >= front)
		ld.w		%r13, %r0				;// %r13  :=          front					*delay*
		jrlt		clip_z_sub_FRONT_ELSE
		;// %r0   := front
		;// %r1   := v0
		;// %r2   := dst
		;// %r3   := i
		;// %r4   := (8-ndst)
		;// %r5   := v1
		xcall		clip_xy_z_sub_COPY			;// *dst++ = *v0++, (8-ndst)--
		jreq		clip_z_sub_FRONT_BREAK			;// if(!(8-ndst)) { break }
		;// %r0   := front
		;// %r1   := v0+1
		;// %r2   := dst+1
		;// %r3   := i
		;// %r4   := (8-ndst)-1
		;// %r5   := v1
		xld.w		%r12, [%r5+8]				;// %r12  := v1->z
		xcall.d		__fcmps					;//       if(v1->z >= front) { continue } else { go clip }
		ld.w		%r13, %r0				;// %r13  :=          front					*delay*
		jrge		clip_z_sub_FRONT_CONTINUE
		jp		clip_z_sub_FRONT_CLIP
		;// %r0   := front
		;// %r1   := v0+1
		;// %r2   := dst+1
		;// %r3   := i
		;// %r4   := (8-ndst)-1
clip_z_sub_FRONT_ELSE:
		;// %r0   := front
		;// %r1   := v0
		;// %r2   := dst
		;// %r3   := i
		;// %r4   := (8-ndst)
		;// %r5   := v1
		xld.w		%r12, [%r5+8]				;// %r12  := v1->z
		xcall.d		__fcmps					;//       if(v1->z >= front) { go clip } else { continue }
		ld.w		%r13, %r0				;// %r13  :=          front					*delay*
		jrlt.d		clip_z_sub_FRONT_CONTINUE
		add		%r1, 20					;// %r1   := v0++						*delay*
		;// %r0   := front
		;// %r1   := v0+1
		;// %r2   := dst
		;// %r3   := i
		;// %r4   := (8-ndst)
		;// %r5   := v1
clip_z_sub_FRONT_CLIP:
		sub		%r1, 20					;// %r1   := v0
		;// %r0   := front
		;// %r1   := v0
		;// %r2   := dst or dst+1
		;// %r3   := i
		;// %r4   := (8-ndst) or (8-ndst)-1
		;// %r5   := v1
		xld.w		%r6, [%r1+8]				;// %r6   := v0->z
		xld.w		%r7, [%r5+8]				;// %r7   := v1->z
		xcall		clip_z_sub_DIV				;// %r6   := n001
		;// %r0   := front
		;// %r1   := v0
		;// %r2   := dst or dst+1
		;// %r3   := i
		;// %r4   := (8-ndst) or (8-ndst)-1
		;// %r5   := v1
		;// %r6   := n001
		xcall		clip_xy_z_sub_CLIP			;// dst->x = (v0->x + ((v1->x - v0->x) * n001))
		xcall		clip_xy_z_sub_CLIP			;// dst->y = (v0->y + ((v1->y - v0->y) * n001))
		ld.w		[%r2]+, %r0				;// dst->z = front
		add		%r1, 4					;// %r1   := &v0->s
		xcall.d		clip_xy_z_sub_CLIP			;// dst->s = (v0->s + ((v1->s - v0->s) * n001))
		add		%r5, 4					;// %r5   := &v1->s						*delay*
		xcall		clip_xy_z_sub_CLIP			;// dst->t = (v0->t + ((v1->t - v0->t) * n001))
		;// %r0   := front
		;// %r1   := v0+1
		;// %r2   := dst+1 or dst+2
		;// %r3   := i
		;// %r4   := (8-ndst) or (8-ndst)-1
		;// %r5   := v1
		sub		%r4, 1					;// %r4   := (8-ndst)--
		jreq		clip_z_sub_FRONT_BREAK			;// if(!(8-ndst)) { break }
		;// %r0   := front
		;// %r1   := v0+1
		;// %r2   := dst+1 or dst+2
		;// %r3   := i
		;// %r4   := (8-ndst)-1 or (8-ndst)-2
clip_z_sub_FRONT_CONTINUE:
		;// %r0   := front
		;// %r1   := v0+1
		;// %r2   := dst+1 or dst+2
		;// %r3   := i
		;// %r4   := (8-ndst)-1 or (8-ndst)-2
		sub		%r3, 1					;// %r3   := i--
		jrne		clip_z_sub_FRONT_LOOP
		;// %r4   := (8-ndst)-1 or (8-ndst)-2
		cmp		%r4, 5					;// if(ndst < 3) { return } /* if((8-ndst) > 5) { return } */
		xjrgt		clip_z_sub_RET
clip_z_sub_FRONT_BREAK:							;// (break̏(8-ndst)=0Ȃ̂ŁȀ͕svłB)
		;//-----------------------------------------------------;
		xld.w		%r0, [%sp+0]				;// %r0   := back
		xld.w		%r1, [%sp+8]				;// %r1   := v0 = cv1
		xld.w		%r2, [%sp+4]				;// %r2   := dst = cv0
		ld.w		%r3, 8					;// %r3   := i = nsrc = ndst = 8 - (8-ndst)
		sub		%r3, %r4
		ld.w		%r4, 8					;// %r4   := (8-ndst) = 8
clip_z_sub_BACK_LOOP:
		;// %r0   := back
		;// %r1   := v0
		;// %r2   := dst
		;// %r3   := i
		;// %r4   := (8-ndst)
		ld.w		%r5, %r1				;// %r5   := v1 = (v0 + 1)
;//		add		%r5, 20					;// ------------+
		cmp		%r3, 1					;// if(i == 1)  |
		jrne.d		3					;//             |
		 add		%r5, 20					;// <-----------+						*delay*
		 xld.w		%r5, [%sp+8]				;//   %r5 := v1 = cv1
		;// %r0   := back
		;// %r1   := v0
		;// %r2   := dst
		;// %r3   := i
		;// %r4   := (8-ndst)
		;// %r5   := v1
		xld.w		%r12, [%r1+8]				;// %r12  := v0->z
		xcall.d		__fcmps					;//       if(v0->z <= back)
		ld.w		%r13, %r0				;// %r13  :=          back					*delay*
		jrgt		clip_z_sub_BACK_ELSE
		;// %r0   := back
		;// %r1   := v0
		;// %r2   := dst
		;// %r3   := i
		;// %r4   := (8-ndst)
		;// %r5   := v1
		xcall		clip_xy_z_sub_COPY			;// *dst++ = *v0++, (8-ndst)--
		jreq		clip_z_sub_BACK_BREAK			;// if(!(8-ndst)) { break }
		;// %r0   := back
		;// %r1   := v0+1
		;// %r2   := dst+1
		;// %r3   := i
		;// %r4   := (8-ndst)-1
		;// %r5   := v1
		xld.w		%r12, [%r5+8]				;// %r12  := v1->z
		xcall.d		__fcmps					;//       if(v1->z <= back) { continue } else { go clip }
		ld.w		%r13, %r0				;// %r13  :=          back					*delay*
		jrle		clip_z_sub_BACK_CONTINUE
		jp		clip_z_sub_BACK_CLIP
		;// %r0   := back
		;// %r1   := v0+1
		;// %r2   := dst+1
		;// %r3   := i
		;// %r4   := (8-ndst)-1
clip_z_sub_BACK_ELSE:
		;// %r0   := back
		;// %r1   := v0
		;// %r2   := dst
		;// %r3   := i
		;// %r4   := (8-ndst)
		;// %r5   := v1
		xld.w		%r12, [%r5+8]				;// %r12  := v1->z
		xcall.d		__fcmps					;//       if(v1->z <= back) { go clip } else { continue }
		ld.w		%r13, %r0				;// %r13  :=          back					*delay*
		jrgt.d		clip_z_sub_BACK_CONTINUE
		add		%r1, 20					;// %r1   := v0++						*delay*
		;// %r0   := back
		;// %r1   := v0+1
		;// %r2   := dst
		;// %r3   := i
		;// %r4   := (8-ndst)
		;// %r5   := v1
clip_z_sub_BACK_CLIP:
		sub		%r1, 20					;// %r1   := v0
		;// %r0   := back
		;// %r1   := v0
		;// %r2   := dst or dst+1
		;// %r3   := i
		;// %r4   := (8-ndst) or (8-ndst)-1
		;// %r5   := v1
		xld.w		%r6, [%r1+8]				;// %r6   := v0->z
		xld.w		%r7, [%r5+8]				;// %r7   := v1->z
		xcall		clip_z_sub_DIV				;// %r6   := n001
		;// %r0   := back
		;// %r1   := v0
		;// %r2   := dst or dst+1
		;// %r3   := i
		;// %r4   := (8-ndst) or (8-ndst)-1
		;// %r5   := v1
		;// %r6   := n001
		xcall		clip_xy_z_sub_CLIP			;// dst->x = (v0->x + ((v1->x - v0->x) * n001))
		xcall		clip_xy_z_sub_CLIP			;// dst->y = (v0->y + ((v1->y - v0->y) * n001))
		ld.w		[%r2]+, %r0				;// dst->z = back
		add		%r1, 4					;// %r1   := &v0->s
		xcall.d		clip_xy_z_sub_CLIP			;// dst->s = (v0->s + ((v1->s - v0->s) * n001))
		add		%r5, 4					;// %r5   := &v1->s						*delay*
		xcall		clip_xy_z_sub_CLIP			;// dst->t = (v0->t + ((v1->t - v0->t) * n001))
		;// %r0   := back
		;// %r1   := v0+1
		;// %r2   := dst+1 or dst+2
		;// %r3   := i
		;// %r4   := (8-ndst) or (8-ndst)-1
		;// %r5   := v1
		sub		%r4, 1					;// %r4   := (8-ndst)--
		jreq		clip_z_sub_BACK_BREAK			;// if(!(8-ndst)) { break }
		;// %r0   := back
		;// %r1   := v0+1
		;// %r2   := dst+1 or dst+2
		;// %r3   := i
		;// %r4   := (8-ndst)-1 or (8-ndst)-2
clip_z_sub_BACK_CONTINUE:
		;// %r0   := back
		;// %r1   := v0+1
		;// %r2   := dst+1 or dst+2
		;// %r3   := i
		;// %r4   := (8-ndst)-1 or (8-ndst)-2
		sub		%r3, 1					;// %r3   := i--
		jrne		clip_z_sub_BACK_LOOP
		;// %r4   := (8-ndst)-1 or (8-ndst)-2
;//sv		cmp		%r4, 5					;// if(ndst < 3) { return } /* if((8-ndst) > 5) { return } */
;//sv		xjrgt		clip_z_sub_RET
clip_z_sub_BACK_BREAK:							;// (break̏(8-ndst)=0Ȃ̂ŁȀ͕svłB)
		;//-----------------------------------------------------;
clip_z_sub_RET:
		;// %r4   := (8-ndst)
		xadd		%sp, %sp, 12
		popn		%r3
		;//
		ld.w		%r10, 8					;// %r10  := ndst = 8 - (8-ndst)
		ret.d
		sub		%r10, %r4				;//								*delay*
;//
;// [in]
;//		%r0		limit
;//		%r1		v0
;//		%r5		v1
;//		%r6		v0->i
;//		%r7		v1->i
;// [out]
;//		%r6		n001
;// [mod]
;//		%r7,%r9,%r10,%r11,%r12,%r13,%r14,%r15,%alr,%ahr,%psr
;//
clip_z_sub_DIV:
		;// %r0  := limit
		;// %r1  := v0
		;// %r5  := v1
		;// %r6  := v0->i
		;// %r7  := v1->i
		ld.w		%r12, %r7			;// %r12 :=        v1->i
		xcall.d		__subsf3			;// %r10 := n01 = (v1->i - v0->i) -+
		ld.w		%r13, %r6			;// %r13 :=                v0->i   |					*delay*
		ld.w		%r7, %r10			;// %r7  := n01 <------------------+
		;// %r0  := limit
		;// %r1  := v0
		;// %r5  := v1
		;// %r6  := v0->i
		;// %r7  := n01
		ld.w		%r12, %r0			;// %r12 :=       limit
		xcall.d		__subsf3			;// %r10 := n0 = (limit - v0->i) -+
		ld.w		%r13, %r6			;// %r13 :=               v0->i   |					*delay*
		;// %r0  := limit				;//                               |
		;// %r1  := v0					;//                               |
		;// %r5  := v1					;//                               |
		;// %r7  := n01					;//                               |
		;// %r10 := n0					;//                               |
		ld.w		%r12, %r10			;// %r12 :=         n0 <----------+
		xcall.d		__divsf3			;// %r10 := n001 = (n0 / n01) -+
		ld.w		%r13, %r7			;// %r13 :=              n01   |					*delay*
		ret.d						;//                            |
		ld.w		%r6, %r10			;// %r6  := n001 <-------------+					*delay*
		;// %r0  := limit
		;// %r1  := v0
		;// %r5  := v1
		;// %r6  := n001
");
#endif /*NOASM*/

/*---------------------------------------------------------------------------*/

/* ASMclip_xy_subASMclip_z_sub̋ʃ[`łB
 * Ȃׂextcallł悤ɁAclip_xy_subclip_z_sub̒Ԃɒu܂B
 */
#ifndef NOASM
asm("
		.code
		.align		1
;//
;// [in]
;//		%r1		v0
;//		%r2		dst
;//		%r4		(8-ndst)
;// [out]
;//		%r1		v0+1
;//		%r2		dst+1
;//		%r4		(8-ndst)-1
;//		%psr		(8-ndst)-1 == 0
;// [mod]
;//		%r11,%r12,%r13,%r14,%r15
;//
clip_xy_z_sub_COPY:
		;// %r1  := v0
		;// %r2  := dst
		;// %r4  := (8-ndst)
		ld.w		%r11, [%r1]+			;// %r11  := v0->x
		ld.w		%r12, [%r1]+			;// %r12  := v0->y
		ld.w		%r13, [%r1]+			;// %r13  := v0->z
		ld.w		%r14, [%r1]+			;// %r14  := v0->s
		ld.w		%r15, [%r1]+			;// %r15  := v0->t, %r1  := v0+1
		;// %r1  := v0+1
		;// %r2  := dst
		;// %r4  := (8-ndst)
		ld.w		[%r2]+, %r11			;// dst->x = v0->x
		ld.w		[%r2]+, %r12			;// dst->y = v0->y
		ld.w		[%r2]+, %r13			;// dst->z = v0->z
		ld.w		[%r2]+, %r14			;// dst->s = v0->s
		ld.w		[%r2]+, %r15			;// dst->t = v0->t, %r2  := dst+1
		;// %r1  := v0+1
		;// %r2  := dst+1
		;// %r4  := (8-ndst)
		ret.d
		sub		%r4, 1				;// %r3   := (8-ndst)--							*delay*
		;// %r1  := v0+1
		;// %r2  := dst+1
		;// %r4  := (8-ndst)-1
		;// %psr := (8-ndst)-1 == 0
;//
;// [in]
;//		%r1		&v0->i
;//		%r2		&dst->i
;//		%r5		&v1->i
;//		%r6		n001
;// [out]
;//		%r1		&v0->(i+1)
;//		%r2		&dst->(i+1)
;//		%r5		&v1->(i+1)
;// [mod]
;//		%r9,%r10,%r11,%r12,%r13,%r14,%r15,%alr,%ahr,%psr
;//
clip_xy_z_sub_CLIP:
		;// %r1  := &v0->i
		;// %r2  := &dst->i
		;// %r5  := &v1->i
		;// %r6  := n001
		ld.w		%r12, [%r5]+			;// %r12  :=            v1->i                  , %r5  := &v1->(i+1)
		ld.w		%r13, [%r1]			;// %r13  :=                    v0->i
		xcall		__subsf3			;// %r10  :=           (v1->i - v0->i)
		;// %r1  := &v0->i
		;// %r2  := &dst->i
		;// %r5  := &v1->(i+1)
		;// %r6  := n001
		;// %r10 := (v1->i - v0->i)
		ld.w		%r12, %r10			;// %r12  :=           (v1->i - v0->i)
		xcall.d		__mulsf3			;// %r10  :=          ((v1->i - v0->i) * n001)
		ld.w		%r13, %r6			;// %r13  :=                             n001				*delay*
		;// %r1  := &v0->i
		;// %r2  := &dst->i
		;// %r5  := &v1->(i+1)
		;// %r6  := n001
		;// %r10 := ((v1->i - v0->i) * n001)
		ld.w		%r12, [%r1]+			;// %r12  :=  v0->i
		xcall.d		__addsf3			;// %r10  := (v0->i + ((v1->i - v0->i) * n001)), %r1  := &v0->(i+1)
		ld.w		%r13, %r10			;// %r13  :=          ((v1->i - v0->i) * n001)				*delay*
		;// %r1  := &v0->(i+1)
		;// %r2  := &dst->i
		;// %r5  := &v1->(i+1)
		;// %r6  := n001
		;// %r10 := (v0->i + ((v1->i - v0->i) * n001))
		ld.w		[%r2]+, %r10			;// dst->i = (v0->i + ((v1->i - v0->i) * n001)), %r2  := &dst->(i+1)
		;// %r1  := &v0->(i+1)
		;// %r2  := &dst->(i+1)
		;// %r5  := &v1->(i+1)
		;// %r6  := n001
		ret
");
#endif /*NOASM*/

/*---------------------------------------------------------------------------*/

#ifdef NOASM
static int clip_xy_sub(RENDF* r, CLIPVERTEX* cv0, CLIPVERTEX* cv1, int nsrc) {
#define CLIP(SRC, DST, X, Y, OP, LIM)				\
	src = SRC;						\
	dst = DST;						\
	ndst = 0;						\
	for(i = 0, v0 = src; i < nsrc; i++, v0++) {		\
		v1 = (i < nsrc - 1) ? (v0 + 1) : src;		\
		if(v0->X OP LIM) {				\
			*dst = *v0;				\
			dst++;					\
			ndst++;					\
			if(ndst == N_VERTICES_MAX) break;	\
			if(v1->X OP LIM) {			\
				continue;			\
			} else {				\
				/* go clip */			\
			}					\
		} else {					\
			if(v1->X OP LIM) {			\
				/* go clip */			\
			} else {				\
				continue;			\
			}					\
		}						\
		/* do clip */					\
		dst->X = LIM;					\
		n0 = LIM - v0->X;				\
		n1 = v1->X - LIM;				\
		n01 = n0 + n1;					\
		n001 = n0 / n01;				\
		dst->Y = v0->Y + ((v1->Y - v0->Y) * n001);	\
		n0 *= v0->z;					\
		n1 *= v1->z;					\
		n01  = n0 + n1;					\
		n001 = n0 / n01;				\
		dst->z = v0->z + ((v1->z - v0->z) * n001);	\
		dst->s = v0->s + ((v1->s - v0->s) * n001);	\
		dst->t = v0->t + ((v1->t - v0->t) * n001);	\
		dst++;						\
		ndst++;						\
		if(ndst == N_VERTICES_MAX) break;		\
	}							\
	if(ndst < 3) return 0;					\

	CLIPVERTEX* src;
	CLIPVERTEX* dst;
	CLIPVERTEX* v0;
	CLIPVERTEX* v1;
	int i;
	int ndst;
	float n0;
	float n1;
	float n01;
	float n001;

	CLIP(cv0, cv1, x, y, >=, r->left  ); nsrc = ndst;
	CLIP(cv1, cv0, x, y, <=, r->right ); nsrc = ndst;
	CLIP(cv0, cv1, y, x, >=, r->top   ); nsrc = ndst;
	CLIP(cv1, cv0, y, x, <=, r->bottom); return ndst;

#undef CLIP
}
#else /*NOASM*/
asm("
		.code
		.align		1
clip_xy_sub:
		pushn		%r3
		xsub		%sp, %sp, 20
		;//
		xadd		%r12, %r12, 64				;// %r12     := &r->left
		ld.w		%r0, [%r12]+				;// %r0      := left
		ld.w		%r1, [%r12]+				;// %r1      := right
		ld.w		%r2, [%r12]+				;// %r2      := top
		ld.w		%r3, [%r12]				;// %r3      := bottom
		xld.w		[%sp+0], %r1				;// [%sp+0]  := right
		xld.w		[%sp+4], %r2				;// [%sp+4]  := top
		xld.w		[%sp+8], %r3				;// [%sp+8]  := bottom
		xld.w		[%sp+12], %r13				;// [%sp+12] := cv0
		xld.w		[%sp+16], %r14				;// [%sp+16] := cv1
		;// %r0   := left
		;//-----------------------------------------------------;
		ld.w		%r1, %r13				;// %r1   := v0 = cv0
		ld.w		%r2, %r14				;// %r2   := dst = cv1
		ld.w		%r3, %r15				;// %r3   := i = nsrc
		ld.w		%r4, 8					;// %r4   := (8-ndst) = 8
clip_xy_sub_LEFT_LOOP:
		;// %r0   := left
		;// %r1   := v0
		;// %r2   := dst
		;// %r3   := i
		;// %r4   := (8-ndst)
		ld.w		%r5, %r1				;// %r5   := v1 = (v0 + 1)
;//		add		%r5, 20					;// ------------+
		cmp		%r3, 1					;// if(i == 1)  |
		jrne.d		3					;//             |
		 add		%r5, 20					;// <-----------+						*delay*
		 xld.w		%r5, [%sp+12]				;//   %r5 := v1 = cv0
		;// %r0   := left
		;// %r1   := v0
		;// %r2   := dst
		;// %r3   := i
		;// %r4   := (8-ndst)
		;// %r5   := v1
		xld.w		%r12, [%r1+0]				;// %r12  := v0->x
		xcall.d		__fcmps					;//       if(v0->x >= left)
		ld.w		%r13, %r0				;// %r13  :=          left					*delay*
		jrlt		clip_xy_sub_LEFT_ELSE
		;// %r0   := left
		;// %r1   := v0
		;// %r2   := dst
		;// %r3   := i
		;// %r4   := (8-ndst)
		;// %r5   := v1
		xcall		clip_xy_z_sub_COPY			;// *dst++ = *v0++, (8-ndst)--
		jreq		clip_xy_sub_LEFT_BREAK			;// if(!(8-ndst)) { break }
		;// %r0   := left
		;// %r1   := v0+1
		;// %r2   := dst+1
		;// %r3   := i
		;// %r4   := (8-ndst)-1
		;// %r5   := v1
		xld.w		%r12, [%r5+0]				;// %r12  := v1->x
		xcall.d		__fcmps					;//       if(v1->x >= left) { continue } else { go clip }
		ld.w		%r13, %r0				;// %r13  :=          left					*delay*
		jrge		clip_xy_sub_LEFT_CONTINUE
		jp		clip_xy_sub_LEFT_CLIP
		;// %r0   := left
		;// %r1   := v0+1
		;// %r2   := dst+1
		;// %r3   := i
		;// %r4   := (8-ndst)-1
clip_xy_sub_LEFT_ELSE:
		;// %r0   := left
		;// %r1   := v0
		;// %r2   := dst
		;// %r3   := i
		;// %r4   := (8-ndst)
		;// %r5   := v1
		xld.w		%r12, [%r5+0]				;// %r12  := v1->x
		xcall.d		__fcmps					;//       if(v1->x >= left) { go clip } else { continue }
		ld.w		%r13, %r0				;// %r13  :=          left					*delay*
		jrlt.d		clip_xy_sub_LEFT_CONTINUE
		add		%r1, 20					;// %r1   := v0++						*delay*
		;// %r0   := left
		;// %r1   := v0+1
		;// %r2   := dst
		;// %r3   := i
		;// %r4   := (8-ndst)
		;// %r5   := v1
clip_xy_sub_LEFT_CLIP:
		sub		%r1, 20					;// %r1   := v0
		;// %r0   := left
		;// %r1   := v0
		;// %r2   := dst or dst+1
		;// %r3   := i
		;// %r4   := (8-ndst) or (8-ndst)-1
		;// %r5   := v1
		xld.w		%r6, [%r1+0]				;// %r6   := v0->x
		xld.w		%r7, [%r5+0]				;// %r7   := v1->x
		xcall		clip_xy_sub_DIV				;// %r6   := n001, %r7  := m001
		;// %r0   := left
		;// %r1   := v0
		;// %r2   := dst or dst+1
		;// %r3   := i
		;// %r4   := (8-ndst) or (8-ndst)-1
		;// %r5   := v1
		;// %r6   := n001
		;// %r7   := m001
		ld.w		[%r2]+, %r0				;// dst->x = left
		add		%r1, 4					;// %r1   := &v0->y
		xcall.d		clip_xy_z_sub_CLIP			;// dst->y = (v0->y + ((v1->y - v0->y) * n001))
		add		%r5, 4					;// %r5   := &v1->y						*delay*
		xcall.d		clip_xy_z_sub_CLIP			;// dst->z = (v0->z + ((v1->z - v0->z) * m001))
		ld.w		%r6, %r7				;// %r6   := m001						*delay*
		xcall		clip_xy_z_sub_CLIP			;// dst->s = (v0->s + ((v1->s - v0->s) * m001))
		xcall		clip_xy_z_sub_CLIP			;// dst->t = (v0->t + ((v1->t - v0->t) * m001))
		;// %r0   := left
		;// %r1   := v0+1
		;// %r2   := dst+1 or dst+2
		;// %r3   := i
		;// %r4   := (8-ndst) or (8-ndst)-1
		;// %r5   := v1
		sub		%r4, 1					;// %r4   := (8-ndst)--
		jreq		clip_xy_sub_LEFT_BREAK			;// if(!(8-ndst)) { break }
		;// %r0   := left
		;// %r1   := v0+1
		;// %r2   := dst+1 or dst+2
		;// %r3   := i
		;// %r4   := (8-ndst)-1 or (8-ndst)-2
clip_xy_sub_LEFT_CONTINUE:
		;// %r0   := left
		;// %r1   := v0+1
		;// %r2   := dst+1 or dst+2
		;// %r3   := i
		;// %r4   := (8-ndst)-1 or (8-ndst)-2
		sub		%r3, 1					;// %r3   := i--
		jrne		clip_xy_sub_LEFT_LOOP
		;// %r4   := (8-ndst)-1 or (8-ndst)-2
		cmp		%r4, 5					;// if(ndst < 3) { return } /* if((8-ndst) > 5) { return } */
		xjrgt		clip_xy_sub_RET
clip_xy_sub_LEFT_BREAK:							;// (break̏(8-ndst)=0Ȃ̂ŁȀ͕svłB)
		;//-----------------------------------------------------;
		xld.w		%r0, [%sp+0]				;// %r0   := right
		xld.w		%r1, [%sp+16]				;// %r1   := v0 = cv1
		xld.w		%r2, [%sp+12]				;// %r2   := dst = cv0
		ld.w		%r3, 8					;// %r3   := i = nsrc = ndst = 8 - (8-ndst)
		sub		%r3, %r4
		ld.w		%r4, 8					;// %r4   := (8-ndst) = 8
clip_xy_sub_RIGHT_LOOP:
		;// %r0   := right
		;// %r1   := v0
		;// %r2   := dst
		;// %r3   := i
		;// %r4   := (8-ndst)
		ld.w		%r5, %r1				;// %r5   := v1 = (v0 + 1)
;//		add		%r5, 20					;// ------------+
		cmp		%r3, 1					;// if(i == 1)  |
		jrne.d		3					;//             |
		 add		%r5, 20					;// <-----------+						*delay*
		 xld.w		%r5, [%sp+16]				;//   %r5 := v1 = cv1
		;// %r0   := right
		;// %r1   := v0
		;// %r2   := dst
		;// %r3   := i
		;// %r4   := (8-ndst)
		;// %r5   := v1
		xld.w		%r12, [%r1+0]				;// %r12  := v0->x
		xcall.d		__fcmps					;//       if(v0->x <= right)
		ld.w		%r13, %r0				;// %r13  :=          right					*delay*
		jrgt		clip_xy_sub_RIGHT_ELSE
		;// %r0   := right
		;// %r1   := v0
		;// %r2   := dst
		;// %r3   := i
		;// %r4   := (8-ndst)
		;// %r5   := v1
		xcall		clip_xy_z_sub_COPY			;// *dst++ = *v0++, (8-ndst)--
		jreq		clip_xy_sub_RIGHT_BREAK			;// if(!(8-ndst)) { break }
		;// %r0   := right
		;// %r1   := v0+1
		;// %r2   := dst+1
		;// %r3   := i
		;// %r4   := (8-ndst)-1
		;// %r5   := v1
		xld.w		%r12, [%r5+0]				;// %r12  := v1->x
		xcall.d		__fcmps					;//       if(v1->x <= right) { continue } else { go clip }
		ld.w		%r13, %r0				;// %r13  :=          right					*delay*
		jrle		clip_xy_sub_RIGHT_CONTINUE
		jp		clip_xy_sub_RIGHT_CLIP
		;// %r0   := right
		;// %r1   := v0+1
		;// %r2   := dst+1
		;// %r3   := i
		;// %r4   := (8-ndst)-1
clip_xy_sub_RIGHT_ELSE:
		;// %r0   := right
		;// %r1   := v0
		;// %r2   := dst
		;// %r3   := i
		;// %r4   := (8-ndst)
		;// %r5   := v1
		xld.w		%r12, [%r5+0]				;// %r12  := v1->x
		xcall.d		__fcmps					;//       if(v1->x <= right) { go clip } else { continue }
		ld.w		%r13, %r0				;// %r13  :=          right					*delay*
		jrgt.d		clip_xy_sub_RIGHT_CONTINUE
		add		%r1, 20					;// %r1   := v0++						*delay*
		;// %r0   := right
		;// %r1   := v0+1
		;// %r2   := dst
		;// %r3   := i
		;// %r4   := (8-ndst)
		;// %r5   := v1
clip_xy_sub_RIGHT_CLIP:
		sub		%r1, 20					;// %r1   := v0
		;// %r0   := right
		;// %r1   := v0
		;// %r2   := dst or dst+1
		;// %r3   := i
		;// %r4   := (8-ndst) or (8-ndst)-1
		;// %r5   := v1
		xld.w		%r6, [%r1+0]				;// %r6   := v0->x
		xld.w		%r7, [%r5+0]				;// %r7   := v1->x
		xcall		clip_xy_sub_DIV				;// %r6   := n001, %r7  := m001
		;// %r0   := right
		;// %r1   := v0
		;// %r2   := dst or dst+1
		;// %r3   := i
		;// %r4   := (8-ndst) or (8-ndst)-1
		;// %r5   := v1
		;// %r6   := n001
		;// %r7   := m001
		ld.w		[%r2]+, %r0				;// dst->x = right
		add		%r1, 4					;// %r1   := &v0->y
		xcall.d		clip_xy_z_sub_CLIP			;// dst->y = (v0->y + ((v1->y - v0->y) * n001))
		add		%r5, 4					;// %r5   := &v1->y						*delay*
		xcall.d		clip_xy_z_sub_CLIP			;// dst->z = (v0->z + ((v1->z - v0->z) * m001))
		ld.w		%r6, %r7				;// %r6   := m001						*delay*
		xcall		clip_xy_z_sub_CLIP			;// dst->s = (v0->s + ((v1->s - v0->s) * m001))
		xcall		clip_xy_z_sub_CLIP			;// dst->t = (v0->t + ((v1->t - v0->t) * m001))
		;// %r0   := right
		;// %r1   := v0+1
		;// %r2   := dst+1 or dst+2
		;// %r3   := i
		;// %r4   := (8-ndst) or (8-ndst)-1
		;// %r5   := v1
		sub		%r4, 1					;// %r4   := (8-ndst)--
		jreq		clip_xy_sub_RIGHT_BREAK			;// if(!(8-ndst)) { break }
		;// %r0   := right
		;// %r1   := v0+1
		;// %r2   := dst+1 or dst+2
		;// %r3   := i
		;// %r4   := (8-ndst)-1 or (8-ndst)-2
clip_xy_sub_RIGHT_CONTINUE:
		;// %r0   := right
		;// %r1   := v0+1
		;// %r2   := dst+1 or dst+2
		;// %r3   := i
		;// %r4   := (8-ndst)-1 or (8-ndst)-2
		sub		%r3, 1					;// %r3   := i--
		jrne		clip_xy_sub_RIGHT_LOOP
		;// %r4   := (8-ndst)-1 or (8-ndst)-2
		cmp		%r4, 5					;// if(ndst < 3) { return } /* if((8-ndst) > 5) { return } */
		xjrgt		clip_xy_sub_RET
clip_xy_sub_RIGHT_BREAK:						;// (break̏(8-ndst)=0Ȃ̂ŁȀ͕svłB)
		;//-----------------------------------------------------;
		xld.w		%r0, [%sp+4]				;// %r0   := top
		xld.w		%r1, [%sp+12]				;// %r1   := v0 = cv0
		xld.w		%r2, [%sp+16]				;// %r2   := dst = cv1
		ld.w		%r3, 8					;// %r3   := i = nsrc = ndst = 8 - (8-ndst)
		sub		%r3, %r4
		ld.w		%r4, 8					;// %r4   := (8-ndst) = 8
clip_xy_sub_TOP_LOOP:
		;// %r0   := top
		;// %r1   := v0
		;// %r2   := dst
		;// %r3   := i
		;// %r4   := (8-ndst)
		ld.w		%r5, %r1				;// %r5   := v1 = (v0 + 1)
;//		add		%r5, 20					;// ------------+
		cmp		%r3, 1					;// if(i == 1)  |
		jrne.d		3					;//             |
		 add		%r5, 20					;// <-----------+						*delay*
		 xld.w		%r5, [%sp+12]				;//   %r5 := v1 = cv0
		;// %r0   := top
		;// %r1   := v0
		;// %r2   := dst
		;// %r3   := i
		;// %r4   := (8-ndst)
		;// %r5   := v1
		xld.w		%r12, [%r1+4]				;// %r12  := v0->y
		xcall.d		__fcmps					;//       if(v0->y >= top)
		ld.w		%r13, %r0				;// %r13  :=          top					*delay*
		jrlt		clip_xy_sub_TOP_ELSE
		;// %r0   := top
		;// %r1   := v0
		;// %r2   := dst
		;// %r3   := i
		;// %r4   := (8-ndst)
		;// %r5   := v1
		xcall		clip_xy_z_sub_COPY			;// *dst++ = *v0++, (8-ndst)--
		jreq		clip_xy_sub_TOP_BREAK			;// if(!(8-ndst)) { break }
		;// %r0   := top
		;// %r1   := v0+1
		;// %r2   := dst+1
		;// %r3   := i
		;// %r4   := (8-ndst)-1
		;// %r5   := v1
		xld.w		%r12, [%r5+4]				;// %r12  := v1->y
		xcall.d		__fcmps					;//       if(v1->y >= top) { continue } else { go clip }
		ld.w		%r13, %r0				;// %r13  :=          top					*delay*
		jrge		clip_xy_sub_TOP_CONTINUE
		jp		clip_xy_sub_TOP_CLIP
		;// %r0   := top
		;// %r1   := v0+1
		;// %r2   := dst+1
		;// %r3   := i
		;// %r4   := (8-ndst)-1
clip_xy_sub_TOP_ELSE:
		;// %r0   := top
		;// %r1   := v0
		;// %r2   := dst
		;// %r3   := i
		;// %r4   := (8-ndst)
		;// %r5   := v1
		xld.w		%r12, [%r5+4]				;// %r12  := v1->y
		xcall.d		__fcmps					;//       if(v1->y >= top) { go clip } else { continue }
		ld.w		%r13, %r0				;// %r13  :=          top					*delay*
		jrlt.d		clip_xy_sub_TOP_CONTINUE
		add		%r1, 20					;// %r1   := v0++						*delay*
		;// %r0   := top
		;// %r1   := v0+1
		;// %r2   := dst
		;// %r3   := i
		;// %r4   := (8-ndst)
		;// %r5   := v1
clip_xy_sub_TOP_CLIP:
		sub		%r1, 20					;// %r1   := v0
		;// %r0   := top
		;// %r1   := v0
		;// %r2   := dst or dst+1
		;// %r3   := i
		;// %r4   := (8-ndst) or (8-ndst)-1
		;// %r5   := v1
		xld.w		%r6, [%r1+4]				;// %r6   := v0->y
		xld.w		%r7, [%r5+4]				;// %r7   := v1->y
		xcall		clip_xy_sub_DIV				;// %r6   := n001, %r7  := m001
		;// %r0   := top
		;// %r1   := v0
		;// %r2   := dst or dst+1
		;// %r3   := i
		;// %r4   := (8-ndst) or (8-ndst)-1
		;// %r5   := v1
		;// %r6   := n001
		;// %r7   := m001
		xcall		clip_xy_z_sub_CLIP			;// dst->x = (v0->x + ((v1->x - v0->x) * n001))
		ld.w		[%r2]+, %r0				;// dst->y = top
		add		%r1, 4					;// %r1   := &v0->z
		add		%r5, 4					;// %r5   := &v1->z
		xcall.d		clip_xy_z_sub_CLIP			;// dst->z = (v0->z + ((v1->z - v0->z) * m001))
		ld.w		%r6, %r7				;// %r6   := m001						*delay*
		xcall		clip_xy_z_sub_CLIP			;// dst->s = (v0->s + ((v1->s - v0->s) * m001))
		xcall		clip_xy_z_sub_CLIP			;// dst->t = (v0->t + ((v1->t - v0->t) * m001))
		;// %r0   := top
		;// %r1   := v0+1
		;// %r2   := dst+1 or dst+2
		;// %r3   := i
		;// %r4   := (8-ndst) or (8-ndst)-1
		;// %r5   := v1
		sub		%r4, 1					;// %r4   := (8-ndst)--
		jreq		clip_xy_sub_TOP_BREAK			;// if(!(8-ndst)) { break }
		;// %r0   := top
		;// %r1   := v0+1
		;// %r2   := dst+1 or dst+2
		;// %r3   := i
		;// %r4   := (8-ndst)-1 or (8-ndst)-2
clip_xy_sub_TOP_CONTINUE:
		;// %r0   := top
		;// %r1   := v0+1
		;// %r2   := dst+1 or dst+2
		;// %r3   := i
		;// %r4   := (8-ndst)-1 or (8-ndst)-2
		sub		%r3, 1					;// %r3   := i--
		jrne		clip_xy_sub_TOP_LOOP
		;// %r4   := (8-ndst)-1 or (8-ndst)-2
		cmp		%r4, 5					;// if(ndst < 3) { return } /* if((8-ndst) > 5) { return } */
		xjrgt		clip_xy_sub_RET
clip_xy_sub_TOP_BREAK:							;// (break̏(8-ndst)=0Ȃ̂ŁȀ͕svłB)
		;//-----------------------------------------------------;
		xld.w		%r0, [%sp+8]				;// %r0   := bottom
		xld.w		%r1, [%sp+16]				;// %r1   := v0 = cv1
		xld.w		%r2, [%sp+12]				;// %r2   := dst = cv0
		ld.w		%r3, 8					;// %r3   := i = nsrc = ndst = 8 - (8-ndst)
		sub		%r3, %r4
		ld.w		%r4, 8					;// %r4   := (8-ndst) = 8
clip_xy_sub_BOTTOM_LOOP:
		;// %r0   := bottom
		;// %r1   := v0
		;// %r2   := dst
		;// %r3   := i
		;// %r4   := (8-ndst)
		ld.w		%r5, %r1				;// %r5   := v1 = (v0 + 1)
;//		add		%r5, 20					;// ------------+
		cmp		%r3, 1					;// if(i == 1)  |
		jrne.d		3					;//             |
		 add		%r5, 20					;// <-----------+						*delay*
		 xld.w		%r5, [%sp+16]				;//   %r5 := v1 = cv1
		;// %r0   := bottom
		;// %r1   := v0
		;// %r2   := dst
		;// %r3   := i
		;// %r4   := (8-ndst)
		;// %r5   := v1
		xld.w		%r12, [%r1+4]				;// %r12  := v0->y
		xcall.d		__fcmps					;//       if(v0->y <= bottom)
		ld.w		%r13, %r0				;// %r13  :=          bottom					*delay*
		jrgt		clip_xy_sub_BOTTOM_ELSE
		;// %r0   := bottom
		;// %r1   := v0
		;// %r2   := dst
		;// %r3   := i
		;// %r4   := (8-ndst)
		;// %r5   := v1
		xcall		clip_xy_z_sub_COPY			;// *dst++ = *v0++, (8-ndst)--
		jreq		clip_xy_sub_BOTTOM_BREAK		;// if(!(8-ndst)) { break }
		;// %r0   := bottom
		;// %r1   := v0+1
		;// %r2   := dst+1
		;// %r3   := i
		;// %r4   := (8-ndst)-1
		;// %r5   := v1
		xld.w		%r12, [%r5+4]				;// %r12  := v1->y
		xcall.d		__fcmps					;//       if(v1->y <= bottom) { continue } else { go clip }
		ld.w		%r13, %r0				;// %r13  :=          bottom					*delay*
		jrle		clip_xy_sub_BOTTOM_CONTINUE
		jp		clip_xy_sub_BOTTOM_CLIP
		;// %r0   := bottom
		;// %r1   := v0+1
		;// %r2   := dst+1
		;// %r3   := i
		;// %r4   := (8-ndst)-1
clip_xy_sub_BOTTOM_ELSE:
		;// %r0   := bottom
		;// %r1   := v0
		;// %r2   := dst
		;// %r3   := i
		;// %r4   := (8-ndst)
		;// %r5   := v1
		xld.w		%r12, [%r5+4]				;// %r12  := v1->y
		xcall.d		__fcmps					;//       if(v1->y <= bottom) { go clip } else { continue }
		ld.w		%r13, %r0				;// %r13  :=          bottom					*delay*
		jrgt.d		clip_xy_sub_BOTTOM_CONTINUE
		add		%r1, 20					;// %r1   := v0++						*delay*
		;// %r0   := bottom
		;// %r1   := v0+1
		;// %r2   := dst
		;// %r3   := i
		;// %r4   := (8-ndst)
		;// %r5   := v1
clip_xy_sub_BOTTOM_CLIP:
		sub		%r1, 20					;// %r1   := v0
		;// %r0   := bottom
		;// %r1   := v0
		;// %r2   := dst or dst+1
		;// %r3   := i
		;// %r4   := (8-ndst) or (8-ndst)-1
		;// %r5   := v1
		xld.w		%r6, [%r1+4]				;// %r6   := v0->y
		xld.w		%r7, [%r5+4]				;// %r7   := v1->y
		xcall		clip_xy_sub_DIV				;// %r6   := n001, %r7  := m001
		;// %r0   := bottom
		;// %r1   := v0
		;// %r2   := dst or dst+1
		;// %r3   := i
		;// %r4   := (8-ndst) or (8-ndst)-1
		;// %r5   := v1
		;// %r6   := n001
		;// %r7   := m001
		xcall		clip_xy_z_sub_CLIP			;// dst->x = (v0->x + ((v1->x - v0->x) * n001))
		ld.w		[%r2]+, %r0				;// dst->y = bottom
		add		%r1, 4					;// %r1   := &v0->z
		add		%r5, 4					;// %r5   := &v1->z
		xcall.d		clip_xy_z_sub_CLIP			;// dst->z = (v0->z + ((v1->z - v0->z) * m001))
		ld.w		%r6, %r7				;// %r6   := m001						*delay*
		xcall		clip_xy_z_sub_CLIP			;// dst->s = (v0->s + ((v1->s - v0->s) * m001))
		xcall		clip_xy_z_sub_CLIP			;// dst->t = (v0->t + ((v1->t - v0->t) * m001))
		;// %r0   := bottom
		;// %r1   := v0+1
		;// %r2   := dst+1 or dst+2
		;// %r3   := i
		;// %r4   := (8-ndst) or (8-ndst)-1
		;// %r5   := v1
		sub		%r4, 1					;// %r4   := (8-ndst)--
		jreq		clip_xy_sub_BOTTOM_BREAK		;// if(!(8-ndst)) { break }
		;// %r0   := bottom
		;// %r1   := v0+1
		;// %r2   := dst+1 or dst+2
		;// %r3   := i
		;// %r4   := (8-ndst)-1 or (8-ndst)-2
clip_xy_sub_BOTTOM_CONTINUE:
		;// %r0   := bottom
		;// %r1   := v0+1
		;// %r2   := dst+1 or dst+2
		;// %r3   := i
		;// %r4   := (8-ndst)-1 or (8-ndst)-2
		sub		%r3, 1					;// %r3   := i--
		jrne		clip_xy_sub_BOTTOM_LOOP
		;// %r4   := (8-ndst)-1 or (8-ndst)-2
;//sv		cmp		%r4, 5					;// if(ndst < 3) { return } /* if((8-ndst) > 5) { return } */
;//sv		xjrgt		clip_xy_sub_RET
clip_xy_sub_BOTTOM_BREAK:						;// (break̏(8-ndst)=0Ȃ̂ŁȀ͕svłB)
		;//-----------------------------------------------------;
clip_xy_sub_RET:
		;// %r4   := (8-ndst)
		xadd		%sp, %sp, 20
		popn		%r3
		;//
		ld.w		%r10, 8					;// %r10  := ndst = 8 - (8-ndst)
		ret.d
		sub		%r10, %r4				;//								*delay*
;//
;// [in]
;//		%r0		limit
;//		%r1		v0
;//		%r5		v1
;//		%r6		v0->i
;//		%r7		v1->i
;// [out]
;//		%r6		n001
;//		%r7		m001
;// [mod]
;//		%r9,%r10,%r11,%r12,%r13,%r14,%r15,%alr,%ahr,%psr
;//
clip_xy_sub_DIV:
		;// %r0  := limit
		;// %r1  := v0
		;// %r5  := v1
		;// %r6  := v0->i
		;// %r7  := v1->i
		ld.w		%r12, %r0			;// %r12 :=       limit
		xcall.d		__subsf3			;// %r10 := n0 = (limit - v0->i) -+
		ld.w		%r13, %r6			;// %r13 :=               v0->i   |					*delay*
		ld.w		%r6, %r10			;// %r6  := n0 <------------------+
		;// %r0  := limit
		;// %r1  := v0
		;// %r5  := v1
		;// %r6  := n0
		;// %r7  := v1->i
		ld.w		%r12, %r7			;// %r12 :=       v1->i
		xcall.d		__subsf3			;// %r10 := n1 = (v1->i - limit) -+
		ld.w		%r13, %r0			;// %r13 :=               limit   |					*delay*
		ld.w		%r7, %r10			;// %r7  := n1 <------------------+
		;// %r0  := limit
		;// %r1  := v0
		;// %r5  := v1
		;// %r6  := n0
		;// %r7  := n1
		ld.w		%r12, %r6			;// %r12 :=        n0
		xcall.d		__addsf3			;// %r10 := n01 = (n0 + n1) ---+
		ld.w		%r13, %r10			;// %r13 :=             n1     |					*delay*
		;// %r0  := limit				;//                            |
		;// %r1  := v0					;//                            |
		;// %r5  := v1					;//                            |
		;// %r6  := n0					;//                            |
		;// %r7  := n1					;//                            |
		;// %r10 := n01					;//                            |
		ld.w		%r12, %r6			;// %r12 :=         n0         |
		xcall.d		__divsf3			;// %r10 := n001 = (n0 / n01) -|-+
		ld.w		%r13, %r10			;// %r13 :=              n01 <-+ |					*delay*
		;// %r0  := limit				;//                              |
		;// %r1  := v0					;//                              |
		;// %r5  := v1					;//                              |
		;// %r6  := n0					;//                              |
		;// %r7  := n1					;//                              |
		;// %r10 := n001				;//                              |
		;//---------------------------------------------;//                              |
		ld.w		%r12, %r6			;// %r12 :=       n0             |
		xld.w		%r13, [%r1+8]			;// %r13 :=            v0->z     |
		xcall.d		__mulsf3			;// %r10 := m0 = (n0 * v0->z) ---|-+
		ld.w		%r6, %r10			;// %r6  := n001 <---------------+ |					*delay*
		;// %r0  := limit				;//                                |
		;// %r1  := v0					;//                                |
		;// %r5  := v1					;//                                |
		;// %r6  := n001				;//                                |
		;// %r7  := n1					;//                                |
		;// %r10 := m0					;//                                |
		ld.w		%r12, %r7			;// %r12 :=       n1               |
		xld.w		%r13, [%r5+8]			;// %r13 :=            v1->z       |
		xcall.d		__mulsf3			;// %r10 := m1 = (n1 * v1->z) -----|-+
		ld.w		%r7, %r10			;// %r7  := m0 <-------------------+ |					*delay*
		;// %r0  := limit				;//                                  |
		;// %r1  := v0					;//                                  |
		;// %r5  := v1					;//                                  |
		;// %r6  := n001				;//                                  |
		;// %r7  := m0					;//                                  |
		;// %r10 := m1					;//                                  |
		ld.w		%r12, %r7			;// %r12 :=        m0                |
		xcall.d		__addsf3			;// %r10 := m01 = (m0 + m1) ---------|-+
		ld.w		%r13, %r10			;// %r13 :=             m1 <---------+ |				*delay*
		;// %r0  := limit				;//                                    |
		;// %r1  := v0					;//                                    |
		;// %r5  := v1					;//                                    |
		;// %r6  := n001				;//                                    |
		;// %r7  := m0					;//                                    |
		;// %r10 := m01					;//                                    |
		ld.w		%r12, %r7			;// %r12 :=         m0                 |
		xcall.d		__divsf3			;// %r10 := m001 = (m0 / m01) ---------|-+
		ld.w		%r13, %r10			;// %r13 :=              m01 <---------+ |				*delay*
		ret.d						;//                                      |
		ld.w		%r7, %r10			;// %r7  := m001 <-----------------------+				*delay*
		;// %r0  := limit
		;// %r1  := v0
		;// %r5  := v1
		;// %r6  := n001
		;// %r7  := m001
");
#endif /*NOASM*/

/*---------------------------------------------------------------------------*/

//{{2008/08/09:_Z덷ɂNbsOoOopB蓮mFł\łB
//
// * Sat Aug 09 22:50:21 JST 2008 Naoyuki Sawa
// - NbsOoO̊뜜ɂāA܂B
// - _Z͌덷Ȃ߁ANbsǑʂ͈͊OɂȂ邱Ƃ̂ł͂ȂƊ뜜Ă܂B
//   NbsǑʂɈe^ȁA_Ž덷̃eXgR[hAȉɎ܂B
//
//	/* uBorland C++ 5.5.1 for Win32vŃeXg܂B */
//	int main() {
//		volatile float a;
//		volatile float b;
//		volatile float c;
//		volatile float d;
//
//		a = 0.1f;
//		b = 0.2f;
//		c = a - b;			/*  0.1 - 0.6 -> -0.5 */
//		d = c + b;			/* -0.5 + 0.6 ->  0.1 */
//		if(d == a) printf("d = a\n");	/* ʂ͂ɂȂ܂ */
//		if(d <  a) printf("d < a\n");
//		if(d >  a) printf("d > a\n");
//
//		a = 0.1f;
//		b = 0.3f;
//		c = a - b;			/*  0.1 - 0.3 -> -0.2 */
//		d = c + b;			/* -0.2 + 0.3 ->  0.1 */
//		if(d == a) printf("d = a\n");
//		if(d <  a) printf("d < a\n");	/* ʂ͂ɂȂ܂ */
//		if(d >  a) printf("d > a\n");
//
//		a = 0.1f;
//		b = 0.6f;
//		c = a - b;			/*  0.1 - 0.6 -> -0.5 */
//		d = c + b;			/* -0.5 + 0.6 ->  0.1 */
//		if(d == a) printf("d = a\n");
//		if(d <  a) printf("d < a\n");
//		if(d >  a) printf("d > a\n");	/* ʂ͂ɂȂ܂ */
//	}
//
// - ̃eXgR[h́Aud = (a - b) + bv̌vZsAvZ͓lƂȂׂdar̂łB
//   덷̂߂ɁAlƂȂȂƂ͎dL܂񂪁A召ǂɊO邩\łȂ̂łB
// - P/ECEœl̃eXgsƁA͂\łȂO܂BBorland C++̌ʂƂ͕Kv܂B
//   P/ECE̖ł͂ȂAWindowsŕ_gꍇÂ悤ȕsmȂ̂Ǝv܂B
// - _Ž덷ANbsǑʂɈe^P[X𐄑Ă݂܂B
//   ƂΕʃNbsÓAxNbsOAyNbsȌԂŏs܂B
//   xNbsȌŁAxE𒴉߂Ă钸_ɑ΂xEAyWlɑ΂ĕZs܂B
//   ̎_ŁAׂĂxWĺAxE͈͓̔łB(łA덷͗L蓾܂)
//   yNbsȌŁAyE𒴉߂Ă钸_ɑ΂yEAxWlɑ΂ĕZs܂B
//   ̂ƂAxWlɑ΂镪Ž덷ŁAӂxE𒴉߂Ă܂P[X̂ł͂ȂƊ뜜܂B
// - ۂɏq̂悤Ȗ𔭌WlAĂ͂܂B
//   ƁAZ덷͈̔͂𐸖ɕ͂čl΁Aq̂悤Ȗ𔭌Wl͖̂m܂B
//   Â悤ȍWlLꍇA傫ȖƂȂ܂B
// - eNX`WzWl͈͊OƂȂꍇ́A`悪邾łACtՂA傫Ȗɂ͂Ȃ܂B
//   Ax,yWl͈͊OƂȂꍇAvbuff[]Õj󂵁A̓肪oOƂȂ鋰ꂪ܂B
// - ŁAŒAvbuff[]Õj󂷂邱Ƃ͔悤A`FbNsƂɂ܂B
//   ̃`FbN̂߂ɁA1%x̐\ቺ܂AS̃g[hItƂĂ͏[Ǝv܂B
//   蓮mFANbsOoO̊뜜ȂAOƂł邩m܂B
// - eNX`WzWlAx,yWl(ʓɐݒ肵)NbsO͈͂𒴉߂ꍇ̌́AsĂ܂B
//   _ł̔rKvŁA\ቺ傫邩łB
//   ̂悤ȏꍇ́A`悪邾Ȃ̂ŁACtƂłƎv܂B
//
#ifdef NOASM
static void detect_error(const RENDVERTEX* rv, int nv, int w, int h) {
	do {
		if(((unsigned)rv->x > (unsigned)w) ||
		   ((unsigned)rv->y > (unsigned)h)) {
			DIE();
		}
	} while(--nv);
}
#else /*NOASM*/
asm("
		.code
		.align		1
detect_error:
		ld.uh		%r4, [%r12]+			;// %r4  := rv->x
		ld.uh		%r5, [%r12]			;// %r5  := rv->y
		cmp		%r4, %r14			;// if((unsigned)rv->x > (unsigned)w) DIE()
		jrugt		detect_error_DIE
		cmp		%r5, %r15			;// if((unsigned)rv->y > (unsigned)h) DIE()
		jrugt		detect_error_DIE
		sub		%r13, 1				;// %r13 := nv--
		jrne.d		detect_error
		add		%r12, 8				;// %r12 := rv++							*delay*
		ret
");
//static void detect_error_DIE() __attribute__((unused));
//static void detect_error_DIE() { DIE(); }
//* Sat Aug 09 19:36:27 JST 2008 Naoyuki Sawa
//- ֐̑w肷ɂ́Â悤ɐ錾ƒ`𕪂KvƎvĂ܂A
//  ȉ̈ʒuɑ΁A֐̒`Ƒ̎w𓯎ɍs݂łB
static void __attribute__((noreturn,unused)) detect_error_DIE() { DIE(); }
#endif /*NOASM*/
//}}2008/08/09:_Z덷ɂNbsOoOopB蓮mFł\łB

/*---------------------------------------------------------------------------*/

/* * Sat Apr 11 05:29:02 JST 2009 Naoyuki Sawa
 * - rendf_mesh()̒ōsĂ̈ꕔAselect_draw_scanline()ɕ܂B
 *   ̕ύXŔAclipren3.cW[́ARgQƂĂB
 */
static DSLPROC
select_draw_scanline(RENDF* r, DSLPARAM* param)
{
	static const DSLPROC proc_table[/*i*/] = {
		draw_scanline_woz_color,	/* 0 */
		draw_scanline_woz_1bit,		/* 1 */
		draw_scanline_woz_2bit,		/* 2 */
		draw_scanline_woz_1bitmask,	/* 3 */
		draw_scanline_woz_2bitmask,	/* 4 */
		draw_scanline_wz_color,		/* 5 */
		draw_scanline_wz_1bit,		/* 6 */
		draw_scanline_wz_2bit,		/* 7 */
		draw_scanline_wz_1bitmask,	/* 8 */
		draw_scanline_wz_2bitmask,	/* 9 */
	};
	int i = 0;
	TEXTURE* texture = r->texture;
	SURFACE* surface = r->surface;

	param->surface_w = surface->w;
	param->surface_h = surface->h;
	param->surface_vbuff = surface->vbuff;
	if(surface->zbuff) {
		i += 5;
		param->surface_zbuff = surface->zbuff;
	}
	if(texture) {
		i += texture->header.bpp; /* 1 or 2 */
		param->texture_w = texture->header.w;
		param->texture_h = texture->header.h;
		param->texture_buf = texture->buf;
		if(texture->header.mask) {
			i += 2;
			param->texture_mask = texture->mask;
		}
	}

	return proc_table[i];
}

/*****************************************************************************
 *	
 *****************************************************************************/

#ifdef NOASM
void rendf_wire(RENDF* r, const WIRE* wire, int color) {
	/* ݂̎ł́A`̉render_line()𗘗pĂ̂ŁARENDER\̂KvłB
	 * render_line()QƂARENDER\̂̐擪Ɠ\́A_~[\̂pӂ܂B
	 * RENDER\̑Ŝ1KB߂TCYȂ̂ŁAŜƃX^bNӂ邨ꂪ܂B
	 */
	/*typedef*/ struct /*_RENDER*/ {
		SURFACE* surface;	/* + 0,4 */
		short left;		/* + 4,2 */
		short top;		/* + 6,2 */
		short right;		/* + 8,2 */
		short bottom;		/* +10,2 */
		//fixed front;
		//fixed back;
		//RENDERCONTEXT* context;
		//RENDERCONTEXT context_stack[];
	} /*RENDER*/ render = {
		r->surface,
		r->left,
		r->top,
		r->right,
		r->bottom,
	};
	//
	int n_vectors;
	int n_segment;
	const vec3f* vectors;
	const WSEG* segment;
	vec3f v0;
	vec3f v1;

	n_vectors = wire->nv;
	n_segment = wire->ns;
	vectors = (const vec3f*)(wire + 1);
	segment = xform_vectors(&r->matrix, vectors, _xformed_vectors, n_vectors);

	do {
		v0 = _xformed_vectors[segment->iv0];
		v1 = _xformed_vectors[segment->iv1];
		segment++;

#define CLIP(A, B, OP, LIM) 						\
		if(!(v##A.z OP LIM)) {					\
			if(!(v##B.z OP LIM)) {				\
				continue;				\
			} else {					\
				float xAB = v##B.x - v##A.x;		\
				float yAB = v##B.y - v##A.y;		\
				float zAB = v##B.z - v##A.z;		\
				float zAL = LIM    - v##A.z;		\
				v##A.x = v##A.x + ((xAB * zAL) / zAB);	\
				v##A.y = v##A.y + ((yAB * zAL) / zAB);	\
				v##A.z = LIM;				\
			}						\
		}
		CLIP(0, 1, >=, r->front);
		CLIP(0, 1, <=, r->back );
		CLIP(1, 0, >=, r->front);
		CLIP(1, 0, <=, r->back );
#undef CLIP

		v0.x /= v0.z;
		v0.y /= v0.z;
		v1.x /= v1.z;
		v1.y /= v1.z;

		render_line((RENDER*)&render, v0.x, v0.y, v1.x, v1.y, color);

	} while(--n_segment);
}
#else /*NOASM*/
asm("
		.code
		.align		1
		.global		rendf_wire
rendf_wire:
		pushn		%r3
		xsub		%sp, %sp, 28
		;// %r12 := r
		;// %r13 := wire
		;// %r14 := color
		xld.w		[%sp+4], %r14			;// [%sp+4]  := color (render_line()ւ̑6)
		;//
		ld.h		%r15, [%r13]+			;// %r15     := n_vectors = wire->nv
		ld.h		%r0, [%r13]+			;// %r0      := n_segment = wire->ns, %r13 := vectors
		xld.w		%r14, _def_vbuff		;// %r14     := _xformed_vectors    , %r10 := segment
		xcall.d		xform_vectors			;// %r10     := segment
		ld.w		%r2, %r12			;// %r2      := r					*delay*
		ld.w		%r1, %r10			;// %r1      := segment
		;// %r0      := n_segment
		;// %r1      := segment
		;// %r2      := r
		;// [%sp+4]  := color
		add		%r2, 52				;// %r2      :=     &r->surface
		ld.w		%r12, [%r2]+			;// %r12     :=      r->surface
		xld.w		[%sp+8], %r12			;// [%sp+8]  := render->surface
		;//
		ld.w		%r12, [%r2]+			;// %r12     :=      r->front
		xld.w		[%sp+20], %r12			;// [%sp+20] :=         front
		;//
		ld.w		%r12, [%r2]+			;// %r12     :=      r->back
		xld.w		[%sp+24], %r12			;// [%sp+24] :=         back
		;// %r0      := n_segment
		;// %r1      := segment
		;// [%sp+4]  := color
		;// [%sp+8]  := render->surface
		;// [%sp+20] := front
		;// [%sp+24] := back
		ld.w		%r12, [%r2]+			;// %r12     :=      r->left
		xcall		__fixunssfsi			;// %r10     :=    (int)left
		xld.h		[%sp+12], %r10			;// [%sp+12] := render->left
		;//
		ld.w		%r12, [%r2]+			;// %r12     :=      r->right
		xcall		__fixunssfsi			;// %r10     :=    (int)right
		xld.h		[%sp+16], %r10			;// [%sp+16] := render->right
		;//
		ld.w		%r12, [%r2]+			;// %r12     :=      r->top
		xcall		__fixunssfsi			;// %r10     :=    (int)top
		xld.h		[%sp+14], %r10			;// [%sp+14] := render->top
		;//
		ld.w		%r12, [%r2]			;// %r12     :=      r->bottom
		xcall		__fixunssfsi			;// %r10     :=    (int)bottom
		xld.h		[%sp+18], %r10			;// [%sp+18] := render->bottom
		;// %r0      := n_segment
		;// %r1      := segment
		;// [%sp+4]  := color
		;// [%sp+8]  := render->surface
		;// [%sp+12] := render->left
		;// [%sp+14] := render->top
		;// [%sp+16] := render->right
		;// [%sp+18] := render->bottom
		;// [%sp+20] := front
		;// [%sp+24] := back
		;//---------------------------------------------;
rendf_wire_LOOP:
		ld.w		%r12, 12			;// %r12 := sizeof(vec3f)
		xld.w		%r13, _def_vbuff		;// %r13 := _xformed_vectors
		;//
		ld.h		%r14, [%r1]+			;// %r14 := iv0 = segment->iv0
		mlt.h		%r14, %r12			;// %alr := iv0 * sizeof(vec3f)
		ld.w		%r14, %alr			;// %r14 := iv0 * sizeof(vec3f)
		add		%r14, %r13			;// %r14 := v0 = &_xformed_vectors[iv0]
		ld.w		%r2, [%r14]+			;// %r2  := v0.x
		ld.w		%r3, [%r14]+			;// %r3  := v0.y
		ld.w		%r4, [%r14]			;// %r4  := v0.z
		;//
		ld.h		%r14, [%r1]+			;// %r14 := iv1 = segment->iv1
		mlt.h		%r14, %r12			;// %alr := iv1 * sizeof(vec3f)
		ld.w		%r14, %alr			;// %r14 := iv1 * sizeof(vec3f)
		add		%r14, %r13			;// %r14 := v1 = &_xformed_vectors[iv1]
		ld.w		%r5, [%r14]+			;// %r5  := v1.x
		ld.w		%r6, [%r14]+			;// %r6  := v1.y
		ld.w		%r7, [%r14]			;// %r7  := v1.z
		;// %r0      := n_segment
		;// %r1      := segment
		;// %r2      := v0.x
		;// %r3      := v0.y
		;// %r4      := v0.z
		;// %r5      := v1.x
		;// %r6      := v1.y
		;// %r7      := v1.z
		;// [%sp+4]  := color
		;// [%sp+8]  := render->surface
		;// [%sp+12] := render->left
		;// [%sp+14] := render->top
		;// [%sp+16] := render->right
		;// [%sp+18] := render->bottom
		;// [%sp+20] := front
		;// [%sp+24] := back
		;//---------------------------------------------;
		call		rendf_wire_CMPZ
		ld.w		%r12, %r2			;// %r12 := v0.x
		ld.w		%r13, %r3			;// %r13 := v0.y
		ld.w		%r14, %r4			;// %r14 := v0.z
		ld.w		%r2, %r5			;// %r2  := v1.x
		ld.w		%r3, %r6			;// %r3  := v1.y
		ld.w		%r4, %r7			;// %r4  := v1.z
		ld.w		%r5, %r12			;// %r5  := v0.x
		ld.w		%r6, %r13			;// %r6  := v0.y
		call.d		rendf_wire_CMPZ
		ld.w		%r7, %r14			;// %r7  := v0.z					*delay*
		;// %r0      := n_segment
		;// %r1      := segment
		;// %r2      := v1.x
		;// %r3      := v1.y
		;// %r4      := v1.z
		;// %r5      := v0.x
		;// %r6      := v0.y
		;// %r7      := v0.z
		;// [%sp+4]  := color
		;// [%sp+8]  := render->surface
		;// [%sp+12] := render->left
		;// [%sp+14] := render->top
		;// [%sp+16] := render->right
		;// [%sp+18] := render->bottom
		;// [%sp+20] := front
		;// [%sp+24] := back
		;//---------------------------------------------;
		ld.w		%r12, %r5			;// %r12    :=      v0.x
		xcall.d		__divsf3			;// %r10    :=      v0.x /= v0.z -+
		ld.w		%r13, %r7			;// %r13    :=              v0.z  |			*delay*
		xcall.d		__fixunssfsi			;// %r10    := (int)v0.x --+      |
		ld.w		%r12, %r10			;// %r12    :=      v0.x <-|------+			*delay*
		ld.w		%r5, %r10			;// %r5     := (int)v0.x <-+
		;//
		ld.w		%r12, %r6			;// %r12    :=      v0.y
		xcall.d		__divsf3			;// %r10    :=      v0.y /= v0.z -+
		ld.w		%r13, %r7			;// %r13    :=              v0.z  |			*delay*
		xcall.d		__fixunssfsi			;// %r10    := (int)v0.y --+      |
		ld.w		%r12, %r10			;// %r12    :=      v0.y <-|------+			*delay*
		ld.w		%r6, %r10			;// %r6     := (int)v0.y <-+
		;//
		ld.w		%r12, %r2			;// %r12    :=      v1.x
		xcall.d		__divsf3			;// %r10    :=      v1.x /= v1.z -+
		ld.w		%r13, %r4			;// %r13    :=              v1.z  |			*delay*
		xcall.d		__fixunssfsi			;// %r10    := (int)v1.x --+      |
		ld.w		%r12, %r10			;// %r12    :=      v1.x <-|------+			*delay*
		ld.w		%r2, %r10			;// %r2     := (int)v1.x <-+
		;//
		ld.w		%r12, %r3			;// %r12    :=      v1.y
		xcall.d		__divsf3			;// %r10    :=      v1.y /= v1.z -+
		ld.w		%r13, %r4			;// %r13    :=              v1.z  |			*delay*
		xcall.d		__fixunssfsi			;// %r10    := (int)v1.y --+      |
		ld.w		%r12, %r10			;// %r12    :=      v1.y <-|------+			*delay*
		xld.w		[%sp+0], %r10			;// [%sp+0] := (int)v1.y <-+ (render_line()ւ̑5)
		;// %r0      := n_segment
		;// %r1      := segment
		;// %r2      := v1.x
		;// %r5      := v0.x
		;// %r6      := v0.y
		;// [%sp+0]  := v1.y
		;// [%sp+4]  := color
		;// [%sp+8]  := render->surface
		;// [%sp+12] := render->left
		;// [%sp+14] := render->top
		;// [%sp+16] := render->right
		;// [%sp+18] := render->bottom
		;// [%sp+20] := front
		;// [%sp+24] := back
		xadd		%r12, %sp, 8			;// %r12    := render
		ld.w		%r13, %r5			;// %r13    := v0.x
		ld.w		%r14, %r6			;// %r14    := v0.y
		xcall.d		render_line			;// render_line(render, v0.x, v0.y, v1.x, v1.y, color)
		ld.w		%r15, %r2			;// %r15    := v1.x					*delay*
		;// %r0      := n_segment
		;// %r1      := segment
		;// [%sp+4]  := color
		;// [%sp+8]  := render->surface
		;// [%sp+12] := render->left
		;// [%sp+14] := render->top
		;// [%sp+16] := render->right
		;// [%sp+18] := render->bottom
		;// [%sp+20] := front
		;// [%sp+24] := back
		;//---------------------------------------------;
rendf_wire_CONTINUE:
		sub		%r0, 1				;// %r0  := n_segment--
		jrne		rendf_wire_LOOP
		;//
		xadd		%sp, %sp, 28
		popn		%r3
		ret
		;//-------------------------------------------------------------------------------------------------------------;
rendf_wire_CMPZ:
		;// %r0      := n_segment
		;// %r1      := segment
		;// %r2      := vA.x
		;// %r3      := vA.y
		;// %r4      := vA.z
		;// %r5      := vB.x
		;// %r6      := vB.y
		;// %r7      := vB.z
		;// [%sp+0]  := retp
		;// [%sp+8]  := color
		;// [%sp+12]  := render->surface
		;// [%sp+16] := render->left
		;// [%sp+18] := render->top
		;// [%sp+20] := render->right
		;// [%sp+22] := render->bottom
		;// [%sp+24] := front
		;// [%sp+28] := back
		;//---------------------------------------------;
		xld.w		%r13, [%sp+24]			;// %r13 :=         front
		xcall.d		__fcmps				;// %psr := (vA.z - front)
		ld.w		%r12, %r4			;// %r12 :=  vA.z					*delay*
		jrge		rendf_wire_CMPZ_ELSE1		;// if(!(vA.z >= front)) {
		;//
		xld.w		%r13, [%sp+24]			;//   %r13 :=         front
		xcall.d		__fcmps				;//   %psr := (vB.z - front)
		ld.w		%r12, %r7			;//   %r12 :=  vB.z					*delay*
		jrlt		rendf_wire_CMPZ_CONTINUE	;//   if(!(vB.z >= front)) { continue }
		;//
		xld.w		%r12, [%sp+24]			;//   %r12 :=         front
		call		rendf_wire_CLIP			;//   %r2  := vA.x, %r3  := vA.y
		xld.w		%r4, [%sp+24]			;//   %r4  := vA.z  = front
rendf_wire_CMPZ_ELSE1:						;// }
		;//---------------------------------------------;
		xld.w		%r13, [%sp+28]			;// %r13 :=         back
		xcall.d		__fcmps				;// %psr := (vA.z - back)
		ld.w		%r12, %r4			;// %r12 :=  vA.z					*delay*
		jrle		rendf_wire_CMPZ_ELSE2		;// if(!(vA.z <= back)) {
		;//
		xld.w		%r13, [%sp+28]			;//   %r13 :=         back
		xcall.d		__fcmps				;//   %psr := (vB.z - back)
		ld.w		%r12, %r7			;//   %r12 :=  vB.z					*delay*
		jrgt		rendf_wire_CMPZ_CONTINUE	;//   if(!(vB.z <= back)) { continue }
		;//
		xld.w		%r12, [%sp+28]			;//   %r12 :=         back
		call		rendf_wire_CLIP			;//   %r2  := vA.x, %r3  := vA.y
		xld.w		%r4, [%sp+28]			;//   %r4  := vA.z  = back
rendf_wire_CMPZ_ELSE2:						;// }
		;//---------------------------------------------;
		ret
rendf_wire_CMPZ_CONTINUE:
		jp.d		rendf_wire_CONTINUE		;// retp̂ĂāArendf_wire_CONTINUE֖߂܂B
		add		%sp, 1				;//							*delay*
		;//-------------------------------------------------------------------------------------------------------------;
rendf_wire_CLIP:
		;// %r2  := vA.x
		;// %r3  := vA.y
		;// %r4  := vA.z
		;// %r5  := vB.x
		;// %r6  := vB.y
		;// %r7  := vB.z
		;// %r12 := LIM
		xcall.d		__subsf3			;// %r10 := zAL = (LIM - vA.z) --+
		ld.w		%r13, %r4			;// %r13 :=              vA.z    |			*delay*
		;//						;//                              |
		ld.w		%r12, %r7			;// %r12 :=        vB.z          |
		ld.w		%r13, %r4			;// %r13 :=               vA.z   |
		xcall.d		__subsf3			;// %r10 := zAB = (vB.z - vA.z) -|-+
		ld.w		%r4, %r10			;// %r4  := zAL <----------------+ |			*delay*
		;//						;//                                |
		ld.w		%r12, %r4			;// %r12 :=  zAL                   |
		xcall.d		__divsf3			;// %r10 := (zAL / zAB) --+        |
		ld.w		%r13, %r10			;// %r13 :=        zAB  <-|--------+			*delay*
		ld.w		%r4, %r10			;// %r4  := (zAL / zAB) <-+
		;// %r2  := vA.x
		;// %r3  := vA.y
		;// %r4  := (zAL / zAB)
		;// %r5  := vB.x
		;// %r6  := vB.y
		;// %r7  := vB.z
		;//---------------------------------------------;
		ld.w		%r12, %r5			;// %r12 :=        vB.x
		xcall.d		__subsf3			;// %r10 := xAB = (vB.x - vA.x) -+
		ld.w		%r13, %r2			;// %r13 :=               vA.x   |			*delay*
		;//						;//                              |
		ld.w		%r12, %r10			;// %r12 :=  xAB <---------------+
		xcall.d		__mulsf3			;// %r10 := (xAB * (zAL / zAB)) ----------+
		ld.w		%r13, %r4			;// %r13 :=        (zAL / zAB)            |		*delay*
		;//						;//                                       |
		ld.w		%r12, %r2			;// %r12 := vA.x                          |		*delay*
		xcall.d		__addsf3			;// %r10 := vA.x += (xAB * (zAL / zAB)) --|-+
		ld.w		%r13, %r10			;// %r13 :=         (xAB * (zAL / zAB)) <-+ |		*delay*
		ld.w		%r2, %r10			;// %r2  := vA.x += (xAB * (zAL / zAB)) <---+
		;// %r2  := vA.x += (xAB * (zAL / zAB))
		;// %r3  := vA.y
		;// %r4  := (zAL / zAB)
		;// %r5  := vB.x
		;// %r6  := vB.y
		;// %r7  := vB.z
		;//---------------------------------------------;
		ld.w		%r12, %r6			;// %r12 :=        vB.y
		xcall.d		__subsf3			;// %r10 := yAB = (vB.y - vA.y) -+
		ld.w		%r13, %r3			;// %r13 :=               vA.y   |			*delay*
		;//						;//                              |
		ld.w		%r12, %r10			;// %r12 :=  yAB <---------------+
		xcall.d		__mulsf3			;// %r10 := (yAB * (zAL / zAB)) ----------+
		ld.w		%r13, %r4			;// %r13 :=        (zAL / zAB)            |		*delay*
		;//						;//                                       |
		ld.w		%r12, %r3			;// %r12 := vA.y                          |		*delay*
		xcall.d		__addsf3			;// %r10 := vA.y += (yAB * (zAL / zAB)) --|-+
		ld.w		%r13, %r10			;// %r13 :=         (yAB * (zAL / zAB)) <-+ |
		ret.d						;//                                         |
		ld.w		%r3, %r10			;// %r3  := vA.y += (yAB * (zAL / zAB)) <---+		*delay*
		;// %r2  := vA.x += (xAB * (zAL / zAB))
		;// %r3  := vA.y += (yAB * (zAL / zAB))
		;// %r4  := (zAL / zAB)
		;// %r5  := vB.x
		;// %r6  := vB.y
		;// %r7  := vB.z
");
#endif /*NOASM*/

/*****************************************************************************
 *	
 *****************************************************************************/

#ifdef NOASM
void
rendf_line(RENDF* r, const vec3f* v0, const vec3f* v1, int color)
{
	struct {
		WIRE w;
		vec3f v[2];
		WSEG s[1];
	} wire;
	wire.w.nv = 2;
	wire.w.ns = 1;
	wire.v[0].x = v0->x;
	wire.v[0].y = v0->y;
	wire.v[0].z = v0->z;
	wire.v[1].x = v1->x;
	wire.v[1].y = v1->y;
	wire.v[1].z = v1->z;
	wire.s[0].iv0 = 0;
	wire.s[0].iv1 = 1;
	rendf_wire(r, &wire.w, color);
}
#else /*NOASM*/
asm("
		.code
		.align		1
		.global		rendf_line
rendf_line:
		xsub		%sp, %sp, 32
		;//
		xld.w		%r4, 0x10002			;// %r4[15:0] := 2, %r4[31:16] := 1 -------+
		xld.w		[%sp+0], %r4			;// wire.w.nv  = 2, wire.w.ns   = 1        |
		;//						;//                                        |
		ld.w		%r5, [%r13]+			;// %r5  := v0->x                          |
		ld.w		%r6, [%r13]+			;// %r6  := v0->y                          |
		ld.w		%r7, [%r13]			;// %r7  := v0->z                          |
		xld.w		[%sp+4], %r5			;// wire.v[0].x = v0->x                    |
		xld.w		[%sp+8], %r6			;// wire.v[0].y = v0->y                    |
		xld.w		[%sp+12], %r7			;// wire.v[0].z = v0->z                    |
		;//						;//                                        |
		ld.w		%r5, [%r14]+			;// %r5  := v1->x                          |
		ld.w		%r6, [%r14]+			;// %r6  := v1->y                          |
		ld.w		%r7, [%r14]			;// %r7  := v1->z                          |
		xld.w		[%sp+16], %r5			;// wire.v[1].x = v1->x                    |
		xld.w		[%sp+20], %r6			;// wire.v[1].y = v1->y                    |
		xld.w		[%sp+24], %r7			;// wire.v[1].z = v1->z                    |
		;//						;//                                        |
		sub		%r4, 2				;// %r4[15:0]    := 0, %r4[31:16]   := 1 <-+
		xld.w		[%sp+28], %r4			;// wire.s[0].iv0 = 0, wire.s[0].iv1 = 1
		;//
		ld.w		%r13, %sp			;// %r13 :=       &wire.w
		xcall.d		rendf_wire			;// rendf_wire(r, &wire.w, color)
		ld.w		%r14, %r15			;// %r14 :=                color			*delay*
		;//
		xadd		%sp, %sp, 32
		ret
");
#endif /*NOASM*/

/*****************************************************************************
 *	JO
 *****************************************************************************/

/* * Sun Nov 09 12:24:31 JST 2008 Naoyuki Sawa
 * - TODO: E{bNXgJOAȒPɎłłB
 * - 菇́A悻ȉ̂悤ɂȂƎv܂B
 *	1. (fr[ϊ)oāAE{bNX̊p8_ϊB
 *	2. 6̃NbsOʂꂼɂāA̔sB
 *	2-1. E{bNX̊p8_ׂĂANbsOʂ̊OɊOĂAE{bNX͎̊SɊOɗLB
 *	3. 6̃NbsOʂׂĂɂāA2-1.̏ɂĂ͂܂Ȃ΁AE{bNX͎̓ꕔɊ܂܂B
 * - E{bNXgJOȂ΁Aϊs̃XP[Ol̂ŁAgՂȂƎv܂B
 *   AEgJÔׂŁA|SfłȂ΁AJỎbȂm܂B
 * - ܂݂āA\𑪂ȂAE{bNXgJOĂ݂悤Ǝv܂B
 */

/*---------------------------------------------------------------------------*/

int rendf_vfc_sphere(RENDF* r, float bounding_sphere) {
	/* ϊśA(r[|[gϊ~ϊ~fr[ϊ)Aׂčꂽ̂łB
	 * (r[|[gϊ~ϊ)̋ts|āA(fr[ϊ)擾܂B
	 * āA(fr[ϊ)̕sړA_Wnł́AE̒SWƂȂ܂B
	 */
	vec3f center = { r->matrix.a03, r->matrix.a13, r->matrix.a23 };
	vec3f_xform(&r->vfc_matrix, &center);
	if((center.z < (r->front - bounding_sphere)) ||		/* ONbvʂO֔a */
	   (center.z > (r->back  + bounding_sphere))) {		/* Nbvʂ֔a */
		return 0;
	}
	/* ȉ̌vŹAvec2f_distance_l_p(a,b,c)̕ό`łB(a=_={0,0})
	 * vec2f_distance_l_p()́A߂邽߂ɁAʂ̐ΒlĂ܂A
	 * ȉ̌vZł́AΒl炸ɁAOւ̋Aւ̋𔻒fĂ܂B
	 */
	{
		vec2f v = { r->front };
		vec2f p = { center.z };
		float d;
		v.y = r->vfc_width_2;
		p.y = center.x;
		d = vec2f_mag(&v) * bounding_sphere;
		if((            vec2f_cross(&v, &p) >  d) ||	/* ENbvʂE֔a */
		   (v.y = -v.y, vec2f_cross(&v, &p) < -d)) {	/* Nbvʂ֔a */
			return 0;
		}
		v.y = r->vfc_height_2;
		p.y = center.y;
		d = vec2f_mag(&v) * bounding_sphere;
		if((            vec2f_cross(&v, &p) >  d) ||	/* Nbvʂ֔a */
		   (v.y = -v.y, vec2f_cross(&v, &p) < -d)) {	/* Nbvʂ֔a */
			return 0;
		}
	}
	return 1;
}

/*---------------------------------------------------------------------------*/

static float vec3f_bounding_sphere(const vec3f* v, int nv) {
	float r_max = 0.0f;
	do {
		float r = vec3f_mag(v);
		if(r > r_max) {
			r_max = r;
		}
		v++;
	} while(--nv);
	return r_max;
}

float meshf_bounding_sphere(const MESH* mesh) {
	return vec3f_bounding_sphere((const vec3f*)(mesh + 1), mesh->nv);
}

float wiref_bounding_sphere(const WIRE* wire) {
	return vec3f_bounding_sphere((const vec3f*)(wire + 1), wire->nv);
}

/*---------------------------------------------------------------------------*/
