/* gte.c */
/*
     1999 by BERO

     modified by LDChen

     document from:

     http://psx.rules.org
     doomed@c64.org

     not tested BIG_ENDIAN
*/

#include "fpse.h"

typedef INT16 MATRIX[10];

typedef struct {
    INT32 x,y,z;
} LVECTOR;

typedef union {
    INT32   LongW;
    struct {
#ifndef MSB_FIRST
        INT16   Low,High;
#else
        INT16   High,Low;
#endif
    } ShortW;
} INT16W32;

typedef union {
    UINT32  LongW;
    struct {
#ifndef MSB_FIRST
        UINT16  Low,High;
#else
        UINT16  High,Low;
#endif
    } ShortW;
} UINT16W32;

#ifndef MSB_FIRST

typedef struct {
    INT16 x,y,z,pad;
} SVECTOR;

typedef struct {
    INT16   x,y;
} VECTOR2D;

typedef struct {
    UINT8 r,g,b,code;
} RGBcd;

#else

typedef struct {
    INT16 y,x,pad,z;
} SVECTOR;

typedef struct {
    INT16   y,x;
} VECTOR2D;

typedef struct {
    UINT8 code,b,g,r;
} RGBcd;

#endif /* BIG_ENDIAN */

typedef struct {
/* DATA_REGS */
    SVECTOR     v0;
    SVECTOR     v1;
    SVECTOR     v2;
    RGBcd       rgb;
    UINT16W32   otz;
    INT16W32    ir0;
    INT16W32    ir1;
    INT16W32    ir2;
    INT16W32    ir3;
    VECTOR2D    sxy0;
    VECTOR2D    sxy1;
    VECTOR2D    sxy2;
    VECTOR2D    sxyp;
    UINT16W32   sz0;
    UINT16W32   sz1;
    UINT16W32   sz2;
    UINT16W32   sz3;
    RGBcd       rgb0;
    RGBcd       rgb1;
    RGBcd       rgb2;
    INT32       res1;
    INT32       mac0;
    INT32       mac1;
    INT32       mac2;
    INT32       mac3;
    INT32       irgb;
    INT32       orgb;
    INT32       lzcs;
    INT32       lzcr;

/* CTRL_REGS */
    MATRIX      rot;
    LVECTOR     tr;
    MATRIX      light;
    LVECTOR     bk;
    MATRIX      color;
    LVECTOR     fc;
    INT32       ofx,ofy;
    UINT16W32   h;
    INT16W32    dqa;
    INT16W32    dqb;
    INT16W32    zsf3;
    INT16W32    zsf4;
    INT32       flag;
} REGS;

#define	VX0	r->v0.x
#define	VY0	r->v0.y
#define	VZ0	r->v0.z
#define	VX1	r->v1.x
#define	VY1	r->v1.y
#define	VZ1	r->v1.z
#define	VX2	r->v2.x
#define	VY2	r->v2.y
#define	VZ2	r->v2.z
#define	RGB	r->rgb
#define	R	(UINT32)(r->rgb.r)
#define	G	(UINT32)(r->rgb.g)
#define	B	(UINT32)(r->rgb.b)
#define	OTZ	r->otz.LongW
#define	IR0     (INT32)(r->ir0.ShortW.Low)
#define	IR1     (INT32)(r->ir1.ShortW.Low)
#define	IR2     (INT32)(r->ir2.ShortW.Low)
#define	IR3     (INT32)(r->ir3.ShortW.Low)
#define	IR0_32  (r->ir0.LongW)
#define	IR1_32  (r->ir1.LongW)
#define	IR2_32  (r->ir2.LongW)
#define	IR3_32  (r->ir3.LongW)
#define	SX0	r->sxy0.x
#define	SY0	r->sxy0.y
#define	SX1	r->sxy1.x
#define	SY1	r->sxy1.y
#define	SX2	r->sxy2.x
#define	SY2	r->sxy2.y
#define	SXP	r->sxyp.x
#define	SYP	r->sxyp.y
#define	SXY0	r->sxy0
#define	SXY1	r->sxy1
#define	SXY2	r->sxy2
#define	SZ0	(UINT32)(r->sz0.ShortW.Low)
#define	SZ1	(UINT32)(r->sz1.ShortW.Low)
#define	SZ2	(UINT32)(r->sz2.ShortW.Low)
#define	SZ3	(UINT32)(r->sz3.ShortW.Low)
#define	SZ0_32  r->sz0.LongW
#define	SZ1_32  r->sz1.LongW
#define	SZ2_32  r->sz2.LongW
#define	SZ3_32  r->sz3.LongW

#define	RGB0	r->rgb0
#define	RGB1	r->rgb1
#define	RGB2	r->rgb2
#define	MAC0	r->mac0
#define	MAC1	r->mac1
#define	MAC2	r->mac2
#define	MAC3	r->mac3
#define	IRGB	r->irgb
#define	ORGB	r->orgb
#define	LZCS	r->lzcs
#define	LZCR	r->lzcr

#ifndef MSB_FIRST
#define   _11  0
#define   _12  1
#define   _13  2
#define   _21  3
#define   _22  4
#define   _23  5
#define   _31  6
#define   _32  7
#define   _33  8
#else
#define   _11  1
#define   _12  0
#define   _13  3
#define   _21  2
#define   _22  5
#define   _23  4
#define   _31  7
#define   _32  6
#define   _33  9
#endif

#define	R11	r->rot[_11]
#define	R12	r->rot[_12]
#define	R13	r->rot[_13]
#define	R21	r->rot[_21]
#define	R22	r->rot[_22]
#define	R23	r->rot[_23]
#define	R31	r->rot[_31]
#define	R32	r->rot[_32]
#define	R33	r->rot[_33]
#define	TRX	r->tr.x
#define	TRY	r->tr.y
#define	TRZ	r->tr.z
#define	L11	r->light[_11]
#define	L12	r->light[_12]
#define	L13	r->light[_13]
#define	L21	r->light[_21]
#define	L22	r->light[_22]
#define	L23	r->light[_23]
#define	L31	r->light[_31]
#define	L32	r->light[_32]
#define	L33	r->light[_33]
#define	RBK	r->bk.x
#define	GBK	r->bk.y
#define	BBK	r->bk.z
#define	LR1	r->color[_11]
#define	LR2	r->color[_12]
#define	LR3	r->color[_13]
#define	LG1	r->color[_21]
#define	LG2	r->color[_22]
#define	LG3	r->color[_23]
#define	LB1	r->color[_31]
#define	LB2	r->color[_32]
#define	LB3	r->color[_33]
#define	RFC	r->fc.x
#define	GFC	r->fc.y
#define	BFC	r->fc.z
#define	OFX	(r->ofx>>16)
#define	OFY	(r->ofy>>16)
#define	H	(UINT32)(r->h.ShortW.Low)
#define	DQA	(INT32)(r->dqa.ShortW.Low)
#define	DQB	(INT32)(r->dqb.LongW)
#define	ZSF3	(INT32)(r->zsf3.ShortW.Low)
#define	ZSF4	(INT32)(r->zsf4.ShortW.Low)
#define	FLAG	r->flag

#define	CODE	r->rgb.code
#define	R0	r->rgb0.r
#define	G0	r->rgb0.g
#define	B0	r->rgb0.b
#define	R1	r->rgb1.r
#define	G1	r->rgb1.g
#define	B1	r->rgb1.b
#define	R2	r->rgb2.r
#define	G2	r->rgb2.g
#define	B2	r->rgb2.b
#define	CD2	r->rgb2.code

#define	FIX(a)	((a)>>12)
#define CFIX(a) ((a)/256.0)

#define LOWLM(a)    (((a) < 0) ? 0 : (a))

#define	LIMIT(a,min,max)	if (a<min) a=min; else if (a>max) a=max
/* (a<min)?min:(a>max)?max:a */

#define	DEFFUNC(func)	static void func(REGS *r)

#ifdef __GNUC__
/* GCC specific */
#define LIM0(a,max,bit) 		\
	({INT32 t=(INT32)(a);		\
	if ((UINT32)(t)>max) {		\
		FLAG |= (1<<bit);	\
		t = ~(t >> 0x1F);	\
	} t &= max; t; })

#define	LIM(a,min,max,bit) \
	({int t=a; \
	if (t<min) {t=min;FLAG|=1<<bit; } \
	else if (t>max) {t=max;FLAG|=1<<bit; } \
	else {} t; })
/* endif gcc specific */
#else
#define	LIM(a,min,max,bit) \
	( ((a)<min) ? (FLAG|=(1<<bit),min) : \
	( ((a)>max) ? (FLAG|=(1<<bit),max) : (/*FLAG&=~(1<<bit),*/(a)) ) )
#endif

/* A1,A2,A3,F needs 64bit calculate, so ignore */
#define	Lm_B1(a)	LIM(a,-32768,32767,24)
#define	Lm_B2(a)	LIM(a,-32768,32767,23)
#define	Lm_B3(a)	LIM(a,-32768,32767,22)

#define	Lm1_B1(a)	LIM0(a,0x7FFF,24)
#define	Lm1_B2(a)	LIM0(a,0x7FFF,23)
#define	Lm1_B3(a)	LIM0(a,0x7FFF,22)

#define	Lm_C1(a)	LIM0(a,0xFF,21)
#define	Lm_C2(a)	LIM0(a,0xFF,20)
#define	Lm_C3(a)	LIM0(a,0xFF,19)
#define	Lm_D(a)		LIM0(a,0xFFFF,18)
#define	Lm_G1(a)	LIM(a,-1024,1023,14)
#define	Lm_G2(a)	LIM(a,-1024,1023,13)
#define	Lm_H(a)		LIM0(a,0xFFF,12)

#define	MAC2RGB()	\
	RGB0 = RGB1; RGB1 = RGB2; \
	R2 = Lm_C1(MAC1); \
	G2 = Lm_C2(MAC2); \
	B2 = Lm_C3(MAC3); \
	CD2 = CODE

#define	MAC2RGBx(nshift)            \
	RGB0 = RGB1; RGB1 = RGB2;   \
	R2 = Lm_C1(MAC1 >> nshift); \
	G2 = Lm_C2(MAC2 >> nshift); \
	B2 = Lm_C3(MAC3 >> nshift); \
	CD2 = CODE

#define	MAC2IR()	\
	IR1_32 = Lm_B1(MAC1); \
	IR2_32 = Lm_B2(MAC2); \
	IR3_32 = Lm_B3(MAC3)

#define	MAC2IR1()	\
	IR1_32 = Lm1_B1(MAC1); \
	IR2_32 = Lm1_B2(MAC2); \
	IR3_32 = Lm1_B3(MAC3)

#define MAC2SZ(a,z,h2,zz)				\
	{	INT32 t=a;				\
		if ((UINT32)(t) > 0xFFFF) {		\
			FLAG |= (1<<18);		\
			t = ~(t >> 0x1F);		\
		}					\
		t &= 0xFFFF; z = zz = t;		\
		if (t < h2) {				\
			if (t<1) FLAG |= (1<<18);	\
			z = h2; FLAG |= (1<<17);	\
		}					\
	}

#define I2F(a)      (double)(a)
#define F2I(a)      (INT32)(a)

#define	F_R11	I2F(R11)
#define	F_R12	I2F(R12)
#define	F_R13	I2F(R13)
#define	F_R21	I2F(R21)
#define	F_R22	I2F(R22)
#define	F_R23	I2F(R23)
#define	F_R31	I2F(R31)
#define	F_R32	I2F(R32)
#define	F_R33	I2F(R33)

#define	F_L11	I2F(L11)
#define	F_L12	I2F(L12)
#define	F_L13	I2F(L13)
#define	F_L21	I2F(L21)
#define	F_L22	I2F(L22)
#define	F_L23	I2F(L23)
#define	F_L31	I2F(L31)
#define	F_L32	I2F(L32)
#define	F_L33	I2F(L33)

#define	F_LR1	I2F(LR1)
#define	F_LR2	I2F(LR2)
#define	F_LR3	I2F(LR3)
#define	F_LG1	I2F(LG1)
#define	F_LG2	I2F(LG2)
#define	F_LG3	I2F(LG3)
#define	F_LB1	I2F(LB1)
#define	F_LB2	I2F(LB2)
#define	F_LB3	I2F(LB3)

#define	F_VX0	I2F(VX0)
#define	F_VY0	I2F(VY0)
#define	F_VZ0	I2F(VZ0)
#define	F_VX1	I2F(VX1)
#define	F_VY1	I2F(VY1)
#define	F_VZ1	I2F(VZ1)
#define	F_VX2	I2F(VX2)
#define	F_VY2	I2F(VY2)
#define	F_VZ2	I2F(VZ2)

#define F_DQA       I2F(DQA << 16)
#define F_DQB       I2F(DQB)
#define F_H         I2F(H)
#define F_FIX(a)    (double)((a) / 4096.0)
#define F2I_FIX(a)  (INT32)(F_FIX(a))


#define DEPTH	f_rz = F_DQB + F_DQA*f_rz;	\
		MAC0 = F2I(f_rz);		\
		IR0_32 = Lm_H(F2I_FIX(f_rz))

DEFFUNC(RTPS)
{
	/* RTPS */
	int z,rz;
	double f_rz;

	FLAG = 0;
	SZ0_32 = SZ1; SZ1_32=SZ2; SZ2_32=SZ3;
	SXY0 = SXY1; SXY1=SXY2;
	MAC1 = F2I_FIX(F_R11*F_VX0 + F_R12*F_VY0 + F_R13*F_VZ0)+TRX;
	MAC2 = F2I_FIX(F_R21*F_VX0 + F_R22*F_VY0 + F_R23*F_VZ0)+TRY;
	MAC3 = F2I_FIX(F_R31*F_VX0 + F_R32*F_VY0 + F_R33*F_VZ0)+TRZ;
	MAC2IR();
	rz = H >> 1;
	MAC2SZ(MAC3,z,rz,SZ3_32);
	f_rz = F_H/I2F(z);
	SX2 = Lm_G1( F2I(I2F(MAC1)*f_rz)+OFX );
	SY2 = Lm_G2( F2I(I2F(MAC2)*f_rz)+OFY );

// printf("DQB=%08x - DQA=%08x - H=%08x - Z=%08x\n",DQB,DQA,H,z);
	DEPTH;
}

DEFFUNC(RTPT)
{
// RTPT
	int z,hme;
	double f_rz,fx,fy,fz;

	FLAG = 0;
	SZ0_32 = SZ3;
	hme = H >> 1;

	fx = F_FIX(F_R11*F_VX0 + F_R12*F_VY0 + F_R13*F_VZ0)+I2F(TRX);
	fy = F_FIX(F_R21*F_VX0 + F_R22*F_VY0 + F_R23*F_VZ0)+I2F(TRY);
	fz = F_FIX(F_R31*F_VX0 + F_R32*F_VY0 + F_R33*F_VZ0)+I2F(TRZ);
	MAC2SZ(F2I(fz),z,hme,SZ1_32);
	f_rz = F_H/I2F(z);
	SX0 = Lm_G1( F2I(fx*f_rz)+OFX );
	SY0 = Lm_G2( F2I(fy*f_rz)+OFY );

	fx = F_FIX(F_R11*F_VX1 + F_R12*F_VY1 + F_R13*F_VZ1)+I2F(TRX);
	fy = F_FIX(F_R21*F_VX1 + F_R22*F_VY1 + F_R23*F_VZ1)+I2F(TRY);
	fz = F_FIX(F_R31*F_VX1 + F_R32*F_VY1 + F_R33*F_VZ1)+I2F(TRZ);
	MAC2SZ(F2I(fz),z,hme,SZ2_32);
	f_rz = F_H/I2F(z);
	SX1 = Lm_G1( F2I(fx*f_rz)+OFX );
	SY1 = Lm_G2( F2I(fy*f_rz)+OFY );

	fx = F_FIX(F_R11*F_VX2 + F_R12*F_VY2 + F_R13*F_VZ2)+I2F(TRX);
	MAC1 = F2I(fx);
	fy = F_FIX(F_R21*F_VX2 + F_R22*F_VY2 + F_R23*F_VZ2)+I2F(TRY);
	MAC2 = F2I(fy);
	fz = F_FIX(F_R31*F_VX2 + F_R32*F_VY2 + F_R33*F_VZ2)+I2F(TRZ);
	MAC3 = F2I(fz);
	MAC2SZ(MAC3,z,hme,SZ3_32);
	f_rz = F_H/I2F(z);
	MAC2IR();
	SX2 = Lm_G1( F2I(fx*f_rz)+OFX );
	SY2 = Lm_G2( F2I(fy*f_rz)+OFY );

	DEPTH;
}

/* MVMVA functions */
#define M(a)    (double)(m[a])
#define Vx      (double)(v->x)
#define Vy      (double)(v->y)
#define Vz      (double)(v->z)
#define Cx      (INT32)(cv->x)
#define Cy      (INT32)(cv->y)
#define Cz      (INT32)(cv->z)

static void mvmva(REGS *r,SVECTOR *v,MATRIX m)
{
     MAC1 = F2I_FIX(M(_11)*Vx + M(_12)*Vy + M(_13)*Vz);
     MAC2 = F2I_FIX(M(_21)*Vx + M(_22)*Vy + M(_23)*Vz);
     MAC3 = F2I_FIX(M(_31)*Vx + M(_32)*Vy + M(_33)*Vz);
     FLAG = 0;
}

static void mvmva0(REGS *r,SVECTOR *v,MATRIX m)
{
     MAC1 = F2I(M(_11)*Vx + M(_12)*Vy + M(_13)*Vz);
     MAC2 = F2I(M(_21)*Vx + M(_22)*Vy + M(_23)*Vz);
     MAC3 = F2I(M(_31)*Vx + M(_32)*Vy + M(_33)*Vz);
     FLAG = 0;
}

static void mvmva_cv(REGS *r,SVECTOR *v,MATRIX m,LVECTOR *cv)
{
     MAC1 = F2I_FIX(M(_11)*Vx + M(_12)*Vy + M(_13)*Vz) + Cx;
     MAC2 = F2I_FIX(M(_21)*Vx + M(_22)*Vy + M(_23)*Vz) + Cy;
     MAC3 = F2I_FIX(M(_31)*Vx + M(_32)*Vy + M(_33)*Vz) + Cz;
     FLAG = 0;
}

static void mvmva_cv_ir(REGS *r,MATRIX m,LVECTOR *cv)
{
     MAC1 = F2I_FIX(M(_11)*IR1 + M(_12)*IR2 + M(_13)*IR3)+Cx;
     MAC2 = F2I_FIX(M(_21)*IR1 + M(_22)*IR2 + M(_23)*IR3)+Cy;
     MAC3 = F2I_FIX(M(_31)*IR1 + M(_32)*IR2 + M(_33)*IR3)+Cz;
     FLAG = 0;
}

static void mvmva_ir(REGS *r,MATRIX m)
{
     MAC1 = F2I_FIX(M(_11)*IR1 + M(_12)*IR2 + M(_13)*IR3);
     MAC2 = F2I_FIX(M(_21)*IR1 + M(_22)*IR2 + M(_23)*IR3);
     MAC3 = F2I_FIX(M(_31)*IR1 + M(_32)*IR2 + M(_33)*IR3);
     FLAG = 0;
}

static void mvmva_ir0(REGS *r,MATRIX m)
{
     MAC1 = F2I(M(_11)*IR1 + M(_12)*IR2 + M(_13)*IR3);
     MAC2 = F2I(M(_21)*IR1 + M(_22)*IR2 + M(_23)*IR3);
     MAC3 = F2I(M(_31)*IR1 + M(_32)*IR2 + M(_33)*IR3);
     FLAG = 0;
}

DEFFUNC(rtv0) { mvmva(r,&r->v0,r->rot); MAC2IR(); }
DEFFUNC(rtv0_0) { mvmva0(r,&r->v0,r->rot); MAC2IR(); }
DEFFUNC(rtv1) { mvmva(r,&r->v1,r->rot); MAC2IR(); }
DEFFUNC(rtv2) { mvmva(r,&r->v2,r->rot); MAC2IR(); }
DEFFUNC(rtir) { mvmva_ir(r,r->rot); MAC2IR(); }
DEFFUNC(rtir0) { mvmva_ir0(r,r->rot); MAC2IR(); }

DEFFUNC(rtv0tr) { mvmva_cv(r,&r->v0,r->rot,&r->tr); MAC2IR(); }
DEFFUNC(rtv1tr) { mvmva_cv(r,&r->v1,r->rot,&r->tr); MAC2IR(); }
DEFFUNC(rtv2tr) { mvmva_cv(r,&r->v2,r->rot,&r->tr); MAC2IR(); }
DEFFUNC(rtirtr) { mvmva_cv_ir(r,r->rot,&r->tr); MAC2IR(); }

DEFFUNC(rtv0bk) { mvmva_cv(r,&r->v0,r->rot,&r->bk); MAC2IR(); }
DEFFUNC(rtv1bk) { mvmva_cv(r,&r->v1,r->rot,&r->bk); MAC2IR(); }
DEFFUNC(rtv2bk) { mvmva_cv(r,&r->v2,r->rot,&r->bk); MAC2IR(); }
DEFFUNC(rtirbk) { mvmva_cv_ir(r,r->rot,&r->bk); MAC2IR(); }

DEFFUNC(ll) { mvmva(r,&r->v0,r->light); MAC2IR1(); }

DEFFUNC(llv0) { mvmva(r,&r->v0,r->light); MAC2IR(); }
DEFFUNC(llv1) { mvmva(r,&r->v1,r->light); MAC2IR(); }
DEFFUNC(llv2) { mvmva(r,&r->v2,r->light); MAC2IR(); }
DEFFUNC(llir) { mvmva_ir(r,r->light); MAC2IR(); }

DEFFUNC(llv0tr) { mvmva_cv(r,&r->v0,r->light,&r->tr); MAC2IR(); }
DEFFUNC(llv1tr) { mvmva_cv(r,&r->v1,r->light,&r->tr); MAC2IR(); }
DEFFUNC(llv2tr) { mvmva_cv(r,&r->v2,r->light,&r->tr); MAC2IR(); }
DEFFUNC(llirtr) { mvmva_cv_ir(r,r->light,&r->tr); MAC2IR(); }

DEFFUNC(llv0bk) { mvmva_cv(r,&r->v0,r->light,&r->bk); MAC2IR(); }
DEFFUNC(llv1bk) { mvmva_cv(r,&r->v1,r->light,&r->bk); MAC2IR(); }
DEFFUNC(llv2bk) { mvmva_cv(r,&r->v2,r->light,&r->bk); MAC2IR(); }
DEFFUNC(llirbk) { mvmva_cv_ir(r,r->light,&r->bk); MAC2IR(); }

DEFFUNC(lc) {}

DEFFUNC(lcv0) { mvmva(r,&r->v0,r->color); MAC2IR(); }
DEFFUNC(lcv1) { mvmva(r,&r->v1,r->color); MAC2IR(); }
DEFFUNC(lcv2) { mvmva(r,&r->v2,r->color); MAC2IR(); }
DEFFUNC(lcir) { mvmva_ir(r,r->color); MAC2IR(); }

DEFFUNC(lcv0tr) { mvmva_cv(r,&r->v0,r->color,&r->tr); MAC2IR(); }
DEFFUNC(lcv1tr) { mvmva_cv(r,&r->v1,r->color,&r->tr); MAC2IR(); }
DEFFUNC(lcv2tr) { mvmva_cv(r,&r->v2,r->color,&r->tr); MAC2IR(); }
DEFFUNC(lcirtr) { mvmva_cv_ir(r,r->color,&r->tr); MAC2IR(); }

DEFFUNC(lcv0bk) { mvmva_cv(r,&r->v0,r->color,&r->bk); MAC2IR(); }
DEFFUNC(lcv1bk) { mvmva_cv(r,&r->v1,r->color,&r->bk); MAC2IR(); }
DEFFUNC(lcv2bk) { mvmva_cv(r,&r->v2,r->color,&r->bk); MAC2IR(); }
DEFFUNC(lcirbk) { mvmva_cv_ir(r,r->color,&r->bk); MAC2IR(); }


DEFFUNC(DCPL)
{
	int tmp;

	tmp = (R*IR1)>>8;
	MAC1 = tmp + FIX( IR0*Lm_B1(RFC-tmp) );
	tmp = (G*IR2)>>8;
	MAC2 = tmp + FIX( IR0*Lm_B2(GFC-tmp) );
	tmp = (B*IR3)>>8;
	MAC3 = tmp + FIX( IR0*Lm_B3(BFC-tmp) );
	FLAG=0;
	MAC2IR();
	MAC2RGBx(4);
}

DEFFUNC(DCPS)
{
        int tmp;

	tmp = R << 4;
	MAC1 = tmp + FIX(IR0*Lm_B1(RFC-tmp));
	tmp = G << 4;
	MAC2 = tmp + FIX(IR0*Lm_B2(GFC-tmp));
	tmp = B << 4;
	MAC3 = tmp + FIX(IR0*Lm_B3(BFC-tmp));
	FLAG = 0;
	MAC2IR();
	MAC2RGBx(4);
}

DEFFUNC(INTPL)
{
     MAC1 = IR1 + FIX(IR0*Lm_B1(RFC-IR1));
     MAC2 = IR2 + FIX(IR0*Lm_B2(GFC-IR2));
     MAC3 = IR3 + FIX(IR0*Lm_B3(BFC-IR3));
     FLAG = 0;
     MAC2IR();
     MAC2RGB();
}

/* SQR */
DEFFUNC(sqr0)
{
     MAC1 = IR1*IR1;
     MAC2 = IR2*IR2;
     MAC3 = IR3*IR3;
     FLAG = 0;
     MAC2IR1();
}

DEFFUNC(sqr12)
{
     MAC1 = FIX(IR1*IR1);
     MAC2 = FIX(IR2*IR2);
     MAC3 = FIX(IR3*IR3);
     FLAG = 0;
     MAC2IR1();
}

#define	_NCS(n) {\
	double f,f1,f2,f3; 					\
	f1 = f2 = f3 = 0; 					\
	f = F_L11*F_VX##n + F_L12*F_VY##n + F_L13*F_VZ##n; 	\
	if (f > 0) {						\
		f1 += F_LR1*f;					\
		f2 += F_LG1*f;					\
		f3 += F_LB1*f;					\
	}							\
	f = F_L21*F_VX##n + F_L22*F_VY##n + F_L23*F_VZ##n; 	\
	if (f > 0) {						\
		f1 += F_LR2*f;					\
		f2 += F_LG2*f;					\
		f3 += F_LB2*f;					\
	}							\
	f = F_L31*F_VX##n + F_L32*F_VY##n + F_L33*F_VZ##n; 	\
	if (f > 0) {						\
		f1 += F_LR3*f;					\
		f2 += F_LG3*f;					\
		f3 += F_LB3*f;					\
	}							\
	MAC1 = F2I(LOWLM(I2F(RBK) + F_FIX(f1)));		\
	MAC2 = F2I(LOWLM(I2F(GBK) + F_FIX(f2)));		\
	MAC3 = F2I(LOWLM(I2F(BBK) + F_FIX(f3)));		\
	MAC2RGBx(15); }

#define	_NCDS(n) {							\
	int tmp;							\
	double f,f1,f2,f3; 						\
	f1 = f2 = f3 = 0; 						\
	f = F_FIX(F_L11*F_VX##n + F_L12*F_VY##n + F_L13*F_VZ##n); 	\
	if (f > 0) {							\
		f1 += F_LR1*f;						\
		f2 += F_LG1*f;						\
		f3 += F_LB1*f;						\
	}								\
	f = F_FIX(F_L21*F_VX##n + F_L22*F_VY##n + F_L23*F_VZ##n); 	\
	if (f > 0) {							\
		f1 += F_LR2*f;						\
		f2 += F_LG2*f;						\
		f3 += F_LB2*f;						\
	}								\
	f = F_FIX(F_L31*F_VX##n + F_L32*F_VY##n + F_L33*F_VZ##n); 	\
	if (f > 0) {							\
		f1 += F_LR3*f;						\
		f2 += F_LG3*f;						\
		f3 += F_LB3*f;						\
	}								\
	tmp = F2I(CFIX(LOWLM(I2F(RBK) + F_FIX(f1))*I2F(R)));		\
	MAC1 = tmp + (IR0*LOWLM(RFC-tmp));				\
	tmp = F2I(CFIX(LOWLM(I2F(GBK) + F_FIX(f2))*I2F(G)));		\
	MAC2 = tmp + (IR0*LOWLM(GFC-tmp));				\
	tmp = F2I(CFIX(LOWLM(I2F(BBK) + F_FIX(f3))*I2F(B)));		\
	MAC3 = tmp + (IR0*LOWLM(BFC-tmp));				\
	MAC2RGBx(4); }

#define	_NCCS(n) { \
	int t1,t2,t3,tt1,tt2,tt3; \
	t1 = Lm1_B1(FIX(L11*VX##n + L12*VY##n + L13*VZ##n)); \
	t2 = Lm1_B2(FIX(L21*VX##n + L22*VY##n + L23*VZ##n)); \
	t3 = Lm1_B3(FIX(L31*VX##n + L32*VY##n + L33*VZ##n)); \
	tt1 = Lm1_B1(RBK + FIX(LR1*t1 +LR2*t2 + LR3*t3)); \
	tt2 = Lm1_B2(GBK + FIX(LG1*t1 +LG2*t2 + LG3*t3)); \
	tt3 = Lm1_B3(BBK + FIX(LB1*t1 +LB2*t2 + LB3*t3)); \
	MAC1 = FIX(R*tt1); \
	MAC2 = FIX(G*tt2); \
	MAC3 = FIX(B*tt3); \
	MAC2RGBx(4); }

DEFFUNC(NCS)
{
	FLAG = 0;
	_NCS(0);
	MAC2IR1();
}

DEFFUNC(NCT)
{
	/* NCS for V0,V1,V2 */
	FLAG = 0;
	_NCS(0);
	_NCS(1);
	_NCS(2);
	MAC2IR1();
}

DEFFUNC(NCDS)
{
	FLAG = 0;
	_NCDS(0);
	MAC2IR1();
}

DEFFUNC(NCDT)
{
	/* NCDS for V0,V1,V2 */
	FLAG = 0;
	_NCDS(0);
	_NCDS(1);
	_NCDS(2);
	MAC2IR1();
}

DEFFUNC(DPCT)
{
    int tmp,cnt=3;

    FLAG = 0;
    do {
	tmp = R0 << 4;
	MAC1 = tmp + FIX(IR0*Lm_B1(RFC-tmp));
	tmp = G0 << 4;
	MAC2 = tmp + FIX(IR0*Lm_B2(GFC-tmp));
	tmp = B0 << 4;
	MAC3 = tmp + FIX(IR0*Lm_B3(BFC-tmp));
	MAC2IR();
	MAC2RGBx(4);
    } while (--cnt > 0);
}

DEFFUNC(NCCS)
{
	FLAG = 0;
	_NCCS(0);
	MAC2IR1();
}

DEFFUNC(NCCT)
{
	/* NCCS for V0,V1,V2 */
	FLAG = 0;
	_NCCS(0);
	_NCCS(1);
	_NCCS(2);
	MAC2IR1();
}

DEFFUNC(CDP)
{
	int t1,t2,t3;
	t1 = RBK + FIX(LR1*IR1 +LR2*IR2 + LR3*IR3);
	t2 = GBK + FIX(LG1*IR1 +LG2*IR2 + LG3*IR3);
	t3 = BBK + FIX(LB1*IR1 +LB2*IR2 + LB3*IR3);
	MAC1 = R*t1 + FIX(IR0*Lm_B1(RFC-R*t1));
	MAC2 = G*t2 + FIX(IR0*Lm_B2(GFC-G*t2));
	MAC3 = B*t3 + FIX(IR0*Lm_B3(BFC-B*t3));
	MAC2IR1();
	MAC2RGB();
}

DEFFUNC(CC)
{
	int t1,t2,t3;
	t1 = RBK + FIX(LR1*IR1 +LR2*IR2 + LR3*IR3);
	t2 = GBK + FIX(LG1*IR1 +LG2*IR2 + LG3*IR3);
	t3 = BBK + FIX(LB1*IR1 +LB2*IR2 + LB3*IR3);
	MAC1 = R*t1;
	MAC2 = G*t2;
	MAC3 = B*t3;
	MAC2IR1();
	MAC2RGB();
}

DEFFUNC(NCLIP)
{
	/* SX0*SY1+SX1*SY2+SX2*SY0-SX0*SY2-SX1*SY0-SX2*SY1 */
	FLAG = 0;
	MAC0 = SX0*(SY1-SY2)+SX1*(SY2-SY0)+SX2*(SY0-SY1);
}

DEFFUNC(AVSZ3)
{
	FLAG = 0;
	MAC0 = FIX((SZ1 + SZ2 + SZ3)*ZSF3);
	OTZ = Lm_D(MAC0);
}

DEFFUNC(AVSZ4)
{
	FLAG = 0;
	MAC0 = FIX((SZ0 + SZ1 + SZ2 + SZ3)*ZSF4);
	OTZ = Lm_D(MAC0);
}


#define	D1	*(INT32*)&R11
#define	D2	*(INT32*)&R22
#define	D3	*(INT32*)&R33

/* OP */
DEFFUNC(op0)
{
	MAC1 = D2*IR3 - D3*IR2;
	MAC2 = D3*IR1 - D1*IR3;
	MAC3 = D1*IR2 - D2*IR1;
	MAC2IR();
}

DEFFUNC(op12)
{
	MAC1 = FIX(D2*IR3 - D3*IR2);
	MAC2 = FIX(D3*IR1 - D1*IR3);
	MAC3 = FIX(D1*IR2 - D2*IR1);
	MAC2IR();
}

/* GPF */
DEFFUNC(gpf0)
{
	FLAG = 0;
	MAC1 = IR0*IR1;
	MAC2 = IR0*IR2;
	MAC3 = IR0*IR3;
	MAC2IR();
	MAC2RGB();
}

DEFFUNC(gpf12)
{
	FLAG = 0;

	MAC1 = FIX(IR0*IR1);
	MAC2 = FIX(IR0*IR2);
	MAC3 = FIX(IR0*IR3);
	MAC2IR();
	MAC2RGB();
}

DEFFUNC(gpl0)
{
	FLAG = 0;

	MAC1 += IR0*IR1;
	MAC2 += IR0*IR2;
	MAC3 += IR0*IR3;
	MAC2IR();
	MAC2RGB();
}

DEFFUNC(gpl12)
{
	FLAG = 0;

	MAC1 += FIX(IR0*IR1);
	MAC2 += FIX(IR0*IR2);
	MAC3 += FIX(IR0*IR3);
	MAC2IR();
	MAC2RGB();
}


typedef struct {
	INT32	opcode;
	void (*func)(REGS *r);
} COP2TBL;

COP2TBL cop2tbl[] = {
	{0x0180001,RTPS},
	{0x0280030,RTPT},
/*	{0x0400012,MVMVA}, */
	{0x0680029,DCPL},
	{0x0780010,DCPS},
	{0x0980011,INTPL},
/*	{0x0a00428,SQR}, */
	{0x0c8041e,NCS},
	{0x0d80420,NCT},
	{0x0e80413,NCDS},
	{0x0f80416,NCDT},
	{0x0f8002a,DPCT},
	{0x108041b,NCCS},
	{0x118043f,NCCT},
	{0x1280414,CDP},
	{0x138041c,CC},
	{0x1400006,NCLIP},
	{0x158002d,AVSZ3},
	{0x168002e,AVSZ4},
/*	{0x170000c,OP},
	{0x190003d,GPF},
	{0x1a0003d,GPL}, */

	{0x0486012,rtv0},
	{0x0406012,rtv0_0},
	{0x048e012,rtv1},
	{0x0496012,rtv2},
	{0x049e012,rtir},
	{0x041e012,rtir0},
	{0x0480012,rtv0tr},
	{0x0488012,rtv1tr},
	{0x0490012,rtv2tr},
	{0x0498012,rtirtr},
	{0x0482012,rtv0bk},
	{0x048a012,rtv1bk},
	{0x0492012,rtv2bk},
	{0x049a012,rtirbk},
	{0x04a6412,ll},
	{0x04a6012,llv0},
	{0x04ae012,llv1},
	{0x04b6012,llv2},
	{0x04be012,llir},
	{0x04a0012,llv0tr},
	{0x04a8012,llv1tr},
	{0x04b0012,llv2tr},
	{0x04b8012,llirtr},
	{0x04a2012,llv0bk},
	{0x04aa012,llv1bk},
	{0x04b2012,llv2bk},
	{0x04ba012,llirbk},
	{0x04da412,lc},
	{0x04c6012,lcv0},
	{0x04ce012,lcv1},
	{0x04d6012,lcv2},
	{0x04de012,lcir},
	{0x04c0012,lcv0tr},
	{0x04c8012,lcv1tr},
	{0x04d0012,lcv2tr},
	{0x04d8012,lcirtr},
	{0x04c2012,lcv0bk},
	{0x04ca012,lcv1bk},
	{0x04d2012,lcv2bk},
	{0x04da012,lcirbk},
	{0x0a80428,sqr12},
	{0x0a00428,sqr0},
	{0x178000c,op12},
	{0x170000c,op0},
	{0x198003d,gpf12},
	{0x190003d,gpf0},
	{0x1a8003e,gpl12},
	{0x1a0003e,gpl0},

	{0,NULL}
};

cop2func cop2funcptr(int code)
{
	COP2TBL *tblp;
	for(tblp=cop2tbl;tblp->opcode;tblp++) {
		if (tblp->opcode==code) {
// printf("Cop2 = %08x\n",code);
			return (cop2func)tblp->func;
		}
	}
	printf("unsupport cop2:%x\n",code);

	return NULL;
}


int cop2(int code,UINT32 *r)
{
	cop2func func = cop2funcptr(code);
	if (func) { func(r); return 0;}
	return -1;
}

DEFFUNC(LZCR_read)
{
	int tmp = LZCS,cnt;
	if (tmp&0x80000000) tmp=~tmp;
	for(cnt=0;cnt<32 && (tmp&0x80000000)==0;cnt++,tmp<<=1);
	LZCR = cnt;
}

DEFFUNC(ORGB_read)
{
	ORGB = ((IR1 & 0xF80) >> 7) |
               ((IR2 & 0xF80) >> 2) |
               ((IR2 & 0xF80) << 3);
}

DEFFUNC(IRGB_write)
{
	UINT32 data = IRGB;

	IR1_32 = (data&31)<<4;
	IR2_32 = ((data>>5)&31)<<4;
	IR3_32 = ((data>>10)&31)<<4;
}

DEFFUNC(FLAG_read)
{
	if (FLAG&0x7f87e000) FLAG|=(1<<31); else FLAG&=(1<<31);
}

cop2func cop2readfuncptr(int regno)
{
	switch(regno) {
	case 29: return (cop2func)ORGB_read;
	case 31: return (cop2func)LZCR_read;
	case 31+32: return (cop2func)FLAG_read;
	}
	return NULL;
}

cop2func cop2writefuncptr(int regno)
{
	switch(regno) {
	case 28: return (cop2func)IRGB_write;
	}
	return NULL;
}

void cop2read(int regno,UINT32 *r)
{
	cop2func func=cop2readfuncptr(regno);
	if (func) func(r);
}

void cop2write(int regno,UINT32 *r)
{
	cop2func func=cop2writefuncptr(regno);
	if (func) func(r);
}
