/******************************************************************************

    M2 Triangle Engine

******************************************************************************/

#include "M2x.h"
#include "core.h"
#include "cpuexec.h"
#include "memory.h"
#include "log.h"
#include "video.h"
#include "konamim2.h"
#include "m2defs.h"

#include "trengine.h"
#include <math.h>


/**************************************
 *
 *  Defines
 *
 *************************************/

#define TE_LOGGING	1

#define GC			regs.GeneralControlRegs
#define SE			regs.SetupEngineRegs
#define ES			regs.EdgeSpanWalkerRegs
#define TM			regs.TextureMapperRegs
#define DB			regs.DestBlendRegs


#ifndef MIN
#define MIN(a, b)	(((a) < (b)) ? (a) : (b))
#endif

#ifndef MAX
#define MAX(a, b)	(((a) > (b)) ? (a) : (b))
#endif

/**************************************
 *
 *  Prototypes
 *
 *************************************/

void RegisterWrite(UINT32 offset, UINT32 data);

void ProcessEdges(VTX *v1, VTX *v2, VTX *v3,
				  float dy_long, float dy_0, float dy_1,
				  float xystep_long, float xystep_0, float xystep_1,
				  float xstep_long, float xstep_0, float xstep_1,
				  slope *sr, slope *sg, slope *sb, slope *sa,
				  slope *suw, slope *svw,
				  slope *sw,
				  int flags, enum _scan_ scan);

void ProcessSpan(int y, int xs, int xe,
				 float r, float g, float b, float a,
				 float rddx, float gddx, float bddx, float addx,
				 float uw, float vw, float w,
				 float uwddx, float vwddx, float wddx,
				 enum _scan_ scan, bool omit_right,
				 int flags);

/**************************************
 *
 *  Statics
 *
 *************************************/

UINT8 rs, gs, bs, as, dsbsrc;	/* Source */
UINT8 rsrc, gsrc, bsrc, asrc;	/* Source surface */
UINT8 rti, gti, bti, ati, ssbti;
UINT8 rt, gt, bt, at;			/* Texture */
UINT8 rd, gd, bd, ad;			/* Destination */
UINT8 rblend, gblend, bblend;

int dsbsel;
int dsbconst;

int dsbs;
int dsb;
int ssb;

int dis, ssbdis, adis, rgbdis, fbclipdis, winclipdis, zclipdis;

int dith_x, dith_y;


void LogRegWrite(UINT32 addr, UINT32 data, int flag)
{
	static FileHandle handle = 0;

	if (!handle)
	{
		Core.FileMan->Delete("tmp\\", "te_log.bin");
		Core.FileMan->Open("tmp\\", "te_log.bin", FILE_READ | FILE_WRITE | FILE_OPEN_ALWAYS, handle);
	}

	Core.FileMan->Write(handle, &data, 4);
}

/**************************************
 *
 *  Structures
 *
 *************************************/

static struct
{
	/* Registers */
	union
	{
		struct
		{
			UINT32 TEMasterMode;
			UINT32 Reserved;
			UINT32 TEICntlData;
			UINT32 TEICntl;
			UINT32 TEDCntlData;
			UINT32 TEDCntl;
			UINT32 IWP;
			UINT32 IRP;
			UINT32 IntEn;
			UINT32 IntStat;
			UINT32 VertexControl;
		};
		UINT32 regs[11];
	} GeneralControlRegs;

	union
	{
		struct
		{
			UINT32 a_x, a_y;
			UINT32 a_r, a_g, a_b, a_a;
			UINT32 a_W;
			UINT32 a_uW, a_vW;
			UINT32 iv1, iv2, iv3, iv4, iv5, iv6, iv7;

			UINT32 b_x, b_y;
			UINT32 b_r, b_g, b_b, b_a;
			UINT32 b_W;
			UINT32 b_uW, b_vW;
			UINT32 iv8, iv9, iv10, iv11, iv12, iv13, iv14;

			UINT32 c_x, c_y;
			UINT32 rd, gd, bd, c_a;
			UINT32 c_W;
			UINT32 c_uW, c_vW;
			UINT32 iv15, iv16, iv17, iv18, iv19, iv20, iv21;

			UINT32 Reserved[16];
			UINT32 VertexState;
		};
		UINT32 regs[65];
	} SetupEngineRegs;

	union
	{
		struct
		{
			UINT32 ESCntl;
			UINT32 ESCapAddr;
			UINT32 ESCapData;
		};
		UINT32 regs[3];
	} EdgeSpanWalkerRegs;

	union
	{
		struct
		{
			UINT32 TxtCntl;
			UINT32 TxtLdCntl;
			UINT32 TxtAddrCntl;
			UINT32 TxtPIPCntl;
			UINT32 TxtTABCntl;
			union
			{
				UINT32 TxtLODBase0;
				UINT32 TxtLdDstBase;
				UINT32 TxtMMDestBase;
			};
			UINT32 TxtLODBase1;
			UINT32 TxtLODBase2;
			UINT32 TxtLODBase3;
			union
			{
				UINT32 TxtLdSrcBase;
				UINT32 TxtMMSrcBase;
			};
			union
			{
				UINT32 TxtByteCnt;
				UINT32 TxtRowCnt;
				UINT32 TxtTexCnt;
			};
			UINT32 UVmax;
			UINT32 UVmask;
			UINT32 TxtSrcType01;
			UINT32 TxtSrcType23;
			UINT32 TxtExpType;
			union
			{
				UINT32 TxtSRCConst0;
				UINT32 TxtCLRConst;
				UINT32 TxtCATConst0;
			};
			union
			{
				UINT32 TxtSRCConst1;
				UINT32 TxtPIPConst1;
			};
			union
			{
				UINT32 TxtSRCConst2;
				UINT32 TxtCATIConst0;
			};
			union
			{
				UINT32 TxtSRCConst3;
				UINT32 TxtCATIConst1;
			};
			UINT32 TxtSrcExp;
		};
		UINT32 regs[21];
	} TextureMapperRegs;

	union
	{
		struct
		{
			UINT32 Snoop;
			UINT32 SuperGenControl;
			UINT32 UserGenControl;
			UINT32 DiscardControl;
			UINT32 Status;
			UINT32 IntCntl;
			UINT32 FBClip;
			UINT32 XWinClip;
			UINT32 YWinClip;
			UINT32 DestCntl;
			UINT32 DestBaseAddr;
			UINT32 DestXStride;
			UINT32 SrcCntl;
			UINT32 SrcBaseAddr;
			UINT32 SrcXStride;
			UINT32 SrcOffset;
			UINT32 ZCntl;
			UINT32 ZBaseAddr;
			UINT32 ZOffset;
			UINT32 ZClip;
			UINT32 SSBDSBCntl;
			UINT32 ConstIn;
			UINT32 TxtMultCntl;
			UINT32 TxtCoefConst0;
			UINT32 TxtCoefConst1;
			UINT32 SrcMultCntl;
			UINT32 SrcCoefConst0;
			UINT32 SrcCoefConst1;
			UINT32 ALUCntl;
			UINT32 SrcAlphaCntl;
			UINT32 DestAlphaCntl;
			UINT32 DestAlphaConst;
			UINT32 DitherMatA;
			UINT32 DitherMatB;
		};
		UINT32 regs[34];
	} DestBlendRegs;
} regs;

/* Pixel Index Pen RAM */
static UINT32	*PIPRAM;

/* Texture RAM */
static UINT8	*TRAM;

/* Instruction sequencer */
static bool		TEActive;
static bool		TEPaused;
static bool		TEStopped;
static UINT32	IRP_next;

/* Vertex setup  */
static UINT32	needed_verts;
static VTX		*v_o, *v_a;
static VTX		*v_m, *v_b;
static VTX		*v_n, *v_c;


static const char *access_types[] =
{
	"RW ", "BAD", "RS ", "C  "
};

static const char *units[] =
{
	"GCVTX",
	"SETUP",
	"EDGSP",
	"TEXTR",
	"DESTB",
};

static const char *gen_ctl_regs[] =
{
	"TEMasterMode",
	"Reserved",
	"TEICntlData",
	"TEICntl",
	"TEDCntlData",
	"TEDCntl",
	"IWP",
	"IRP",
	"IntEn",
	"IntStat",
	"Vertex Control",
};

static const char *setup_regs[] =
{
	"_x",
	"_y",
	"_r",
	"_g",
	"_b",
	"_a",
	"_W",
	"_uW",
	"_vW",
};

static const char *desbtlend_regs[] =
{
	"Snoop",
	"SuperGenControl",
	"UserGenControl",
	"Discard Control",
	"Status",
	"Int Cntl",
	"FBClip",
	"XWinClip",
	"YWinClip",
	"DestCntl",
	"DestBaseAddr",
	"DestXStride",
	"SrcCntl",
	"SrcBaseAddr",
	"SrcXStride",
	"SrcOffset",
	"ZCntl",
	"ZBaseAddr",
	"ZOffset",
	"ZClip",
	"SSBDSBCntl",
	"ConstIn",
	"TxtMultCntl",
	"TxtCoefConst0",
	"TxtCoefConst1",
	"SrcMultCntl",
	"SrcCoefConst0",
	"SrcCoefConst1",
	"SrcMultCntl",
	"SrcCoefconst0",
	"SrcCoefconst1",
	"ALUCntl",
	"SrcAlphaCntl",
	"DestAlphaCntl",
	"DestAlphaConst",
	"DitherMatA",
	"DitherMatB",
};

/**************************************
 *
 *  Private Functions
 *
 *************************************/

// DEBUG
void SaveTextureToFile(UINT8 *src, UINT32 bytes, UINT32 type)
{
	char buf[256];

//	FileHandle handle;

#pragma message("File routines don't work when there's multiple files!!")

	if (type == 0)
	{
		sprintf(buf, "tmp\\%x", TM.TxtLdSrcBase);
		FILE *tex = fopen(buf, "wb");

		if (tex)
		{
			fwrite(src, 1, bytes, tex);
			fclose(tex);
		}
	}
	else
	{
		sprintf(buf, "tmp\\%x.pip", src);
		FILE *pip = fopen(buf, "wb");
		if (pip)
		{
			fwrite(PIPRAM, 1, 256*4, pip);
			fclose(pip);
		}
	}
}


const char *GetReg(int unit, int reg)
{
	if (unit == 0)
	{
		if (reg < sizeof(gen_ctl_regs) / sizeof(const char *))
			return gen_ctl_regs[reg];
	}
	else if (unit == 1)
	{
		return "???";
	}
	else if (unit == 4)
	{
		if (reg < sizeof(desbtlend_regs) / sizeof(const char *))
			return desbtlend_regs[reg];
	}

	return "????????";
}

/* Extract a bitfield */
UINT32 ReadBits(const UINT8 *mem, UINT32 b_addr, UINT32 bits)
{
	UINT32 by_addr = (b_addr >> 3) & ~3;
	UINT32 src_val = READ32_BE(mem, by_addr & RAM_SIZE_MASK);

	if (((b_addr & 31) + bits) > 32)
	{
		UINT32 src_val2 = READ32_BE(RAM, (by_addr + 4) & RAM_SIZE_MASK);

		UINT32 r_bits = ((b_addr & 31) + bits) - 32;
		UINT32 l_bits = bits - r_bits;

		src_val &= (1 << l_bits) - 1;
		src_val <<= r_bits;

		// Position the right hand side
		src_val2 >>= (32 - r_bits);
		src_val2 &= (1 << r_bits) - 1;

		src_val |= src_val2;
	}
	else
	{
		src_val = src_val >> (32 - (bits + (b_addr & 31)));
	}

	return src_val &= (1 << bits) - 1;
}

/**************************************
 *
 *  Rendering Functions
 *
 *************************************/

static void CalculateSlopeData( slope *s,
								float q1, float q2, float q3,
								struct slope_calc_data *sd )
{
	float detx = q1*sd->y23 + q2*sd->y31 + q3*sd->y12;
	float dety = q1*sd->x23 + q2*sd->x31 + q3*sd->x12;
	float detxy = sd->xstep_long * detx - dety;
	s->slope = detxy * sd->iAria;
	s->dDx = detx * sd->iAria;
}

void ProcessTriangle(int flags)
{
	enum _scan_ scan;
	VTX *v1, *v2, *v3;

	/* Truncate x and y to integers */
	v_a->x = TRUNC(v_a->x); v_a->y = TRUNC(v_a->y);
	v_b->x = TRUNC(v_b->x); v_b->y = TRUNC(v_b->y);
	v_c->x = TRUNC(v_c->x); v_c->y = TRUNC(v_c->y);

	/* Scale and truncate colors */
	if (flags & VTX_FLAG_SHAD)
	{
		v_a->r = TRUNC(v_a->r * 255.0);
		v_a->g = TRUNC(v_a->g * 255.0);
		v_a->b = TRUNC(v_a->b * 255.0);
		v_a->a = TRUNC(v_a->a * 255.0);

		v_b->r = TRUNC(v_b->r * 255.0);
		v_b->g = TRUNC(v_b->g * 255.0);
		v_b->b = TRUNC(v_b->b * 255.0);
		v_b->a = TRUNC(v_b->a * 255.0);

		v_c->r = TRUNC(v_c->r * 255.0);
		v_c->g = TRUNC(v_c->g * 255.0);
		v_c->b = TRUNC(v_c->b * 255.0);
		v_c->a = TRUNC(v_c->a * 255.0);
	}

	/* Sort the vertices into top, middle and bottom */
	int a = 0;

	if ((v_a->y - v_b->y) < 0.0) a |= 4;
	if ((v_b->y - v_c->y) < 0.0) a |= 2;
	if ((v_c->y - v_a->y) < 0.0) a |= 1;

	switch (a)
	{
		case 1: v1 = v_c; v2 = v_b; v3 = v_a; break;
		case 2: v1 = v_b; v2 = v_a; v3 = v_c; break;
		case 3: v1 = v_b; v2 = v_c; v3 = v_a; break;
		case 4: v1 = v_a; v2 = v_c; v3 = v_b; break;
		case 5: v1 = v_c; v2 = v_a; v3 = v_b; break;
		case 6: v1 = v_a; v2 = v_b; v3 = v_c; break;
		case 0: /* Degenerate */
		case 7: return;
	}

	/* Store the sort result */
	SE.VertexState &= ~VERTEX_STATE_TSORT_MASK;
	SE.VertexState |= a << VERTEX_STATE_TSORT_SHIFT;

	/* Determine the W Range */
	if (flags & VTX_FLAG_PRSP)
	{
		#define EXP_ZERO	127
		#define WEXP0		(EXP_ZERO - 3)
		#define WEXP1		(EXP_ZERO - 6)
		#define WEXP2		(EXP_ZERO - 9)

		#define FP_EXP(x)	(((*(UINT32*)&(x)) >> 23) & 0xff)

		UINT32 W1Exp = FP_EXP(v1->rw);
		UINT32 W2Exp = FP_EXP(v2->rw);
		UINT32 W3Exp = FP_EXP(v3->rw);
		UINT32 WRange;

		if (W1Exp >= WEXP0 || W2Exp >= WEXP0 || W3Exp >= WEXP0)
			WRange = 0;
		else if (W1Exp >= WEXP1 || W2Exp >= WEXP1 || W3Exp >= WEXP1)
			WRange = 1;
		else if (W1Exp >= WEXP2 || W2Exp >= WEXP2 || W3Exp >= WEXP2)
			WRange = 2;
		else
			WRange = 3;
	}

	/* Calculate the deltas */
	float y12, y23, y31;
	float x12, x23, x31;

	y12 = v1->y - v2->y;
	y23 = v2->y - v3->y;
	y31 = v3->y - v1->y;

	x12 = v1->x - v2->x;
	x23 = v2->x - v3->x;
	x31 = v3->x - v1->x;

	/* Calculate the triangle area */
	float Aria, iAria;
	Aria = v1->x*y23 + v2->x*y31 + v3->x*y12;

	/* Reject degenerates */
	if (Aria == 0)
		return;

	iAria = 1 / Aria;

	if (Aria > 0)
		scan = LR;
	else
		scan = RL;

	/* Calculate vertex slopes */
	float xslope_0, xslope_1, xslope_long;
	float xstep_0, xstep_1, xstep_long;
	
	float abs_y12 = abs(y12);
	float abs_y23 = abs(y23);
	float abs_y31 = abs(y31);

	/* Avoid division by zero */
	xslope_0 = (abs_y12 > 0.0f) ? (-x12 / abs_y12) : 0.0f;
	xslope_1 = (abs_y23 > 0.0f) ? (-x23 / abs_y23) : 0.0f;
	xslope_long = (abs_y31 > 0.0f) ? (x31 / abs_y31) : 0.0f;

	if (scan == LR)
	{
		xstep_long = floor(xslope_long);
		xstep_0 = ceil(xslope_0);
		xstep_1 = ceil(xslope_1);
	}
	else
	{
		xstep_long = ceil(xslope_long);
		xstep_0 = floor(xslope_0);
		xstep_1 = floor(xslope_1);
	}

	float xystep_0, xystep_1, xystep_long;
	float dy_0, dy_1, dy_long;

	xystep_0 = x12 - xstep_0 * y12;
	xystep_1 = x23 - xstep_1 * y23;
	xystep_long = x31 - xstep_long * y31;

	dy_0 = -y12;
	dy_1 = -y23;
	dy_long = -y31;

	/* Calculate color and texture gradients */
	slope r_data;
	slope g_data;
	slope b_data;
	slope a_data;
	slope w_data;
	slope uw_data;
	slope vw_data;

	struct slope_calc_data sd;

	sd.y23 = y23;
	sd.y31 = y31;
	sd.y12 = y12;
	sd.x23 = x23;
	sd.x31 = x31;
	sd.x12 = x12;
	sd.xstep_long = xstep_long;
	sd.iAria = iAria;


	if (flags & VTX_FLAG_SHAD)
	{
		CalculateSlopeData(&r_data, v1->r, v2->r, v3->r, &sd);
		CalculateSlopeData(&g_data, v1->g, v2->g, v3->g, &sd);
		CalculateSlopeData(&b_data, v1->b, v2->b, v3->b, &sd);
		CalculateSlopeData(&a_data, v1->a, v2->a, v3->a, &sd);
	}

	if (flags & VTX_FLAG_TEXT)
	{
		CalculateSlopeData(&uw_data, v1->uw, v2->uw, v3->uw, &sd);
		CalculateSlopeData(&vw_data, v1->vw, v2->vw, v3->vw, &sd);
	}

	if (flags & VTX_FLAG_PRSP)
	{
		CalculateSlopeData(&w_data, v1->rw, v2->rw, v3->rw, &sd);
	}

	/* TODO: Write to the edge walker registers */
	ProcessEdges(v1, v2, v3,
				 dy_long, dy_0, dy_1,
				 xystep_long, xystep_0, xystep_1,
				 xstep_long, xstep_0, xstep_1,
				 &r_data, &g_data, &b_data, &a_data,
				 &uw_data, &vw_data,
				 &w_data,
				 flags, scan);
}


static void ProcessEdges(VTX *v1, VTX *v2, VTX *v3,
						 float dy_long, float dy_0, float dy_1,
						 float xystep_long, float xystep_0, float xystep_1,
						 float xstep_long, float xstep_0, float xstep_1,
						 slope *sr, slope *sg, slope *sb, slope *sa,
						 slope *suw, slope *svw,
						 slope *sw,
						 int flags, enum _scan_ scan)
{
#define INS(v)	((scan == LR) ? ((v) <= 0.0) : ((v) >= 0.0))
#define EIN(v)	((scan == LR) ? ((v) < 0.0) : ((v) > 0.0))
#define OUTS(v)	(!EIN(v))
#define EOUT(v)	(!INS(v))

	int dsp_off = ES.ESCntl & (1 << (31 - 31));
	int du_scan = ES.ESCntl & (1 << (31 - 30));

	#pragma message(__FUNCTION__ ": Need to implement DUSCAN")
	du_scan = 0;

	float cf_long = 0.0f;
	float cf_short = 0.0f;

	float xystep;
	float xstep;
	float dy;

	float r, g, b, a;
	float uw, vw;
	float w;

	int y;
	int xs;
	int xe;

	bool omit_right = true;

	/* Determine starting values */
	y = (int)v1->y;

	/* Flat top triangles are a special case */
	if (v1->y == v2->y)
	{
		xystep = xystep_1;
		xstep = xstep_1;
		dy = dy_1;

		if (scan == LR)
		{
			if (v1->x < v2->x)
			{
				r = v1->r;
				g = v1->g;
				b = v1->b;
				a = v1->a;
				uw = v1->uw;
				vw = v1->vw;
				w = v1->rw;

				xs = (int)v1->x;
				xe = (int)v2->x;
			}
			else
			{
				r = v2->r;
				g = v2->g;
				b = v2->b;
				a = v2->a;
				uw = v2->uw;
				vw = v2->vw;
				w = v2->rw;

				xs = (int)v2->x;
				xe = (int)v1->x;
			}
		}
		else
		{
			if (v1->x < v2->x)
			{
				r = v2->r;
				g = v2->g;
				b = v2->b;
				a = v2->a;
				uw = v2->uw;
				vw = v2->vw;
				w = v2->rw;

				xs = (int)v2->x;
				xe = (int)v1->x;
			}
			else
			{
				r = v1->r;
				g = v1->g;
				b = v1->b;
				a = v1->a;
				uw = v1->uw;
				vw = v1->vw;
				w = v1->rw;

				xs = (int)v1->x;
				xe = (int)v2->x;
			}
		}
	}
	else
	{
		xystep = xystep_0;
		xstep = xstep_0;
		dy = dy_0;
		r = v1->r;
		g = v1->g;
		b = v1->b;
		a = v1->a;
		uw = v1->uw;
		vw = v1->vw;
		w = v1->rw;

		xs = (int)v1->x;
		xe = xs;
	}

	while (1)
	{
		/* Send the span for texturing */
		ProcessSpan( y, xs, xe,
					 r, g, b, a,
					 sr->dDx, sg->dDx, sb->dDx, sa->dDx,
					 uw, vw, w,
					 suw->dDx, svw->dDx, sw->dDx,
					 scan, omit_right,
					 flags );

		omit_right = false;

		cf_short += xystep;
		cf_long += xystep_long;
		bool step_back = false;

		if (scan == LR)
		{
			xs += (int)xstep_long;
			xe += (int)xstep;

			if (EOUT(cf_long))
			{
				cf_long += dy_long;
				step_back = true;
				++xs;
			}

			if (dsp_off ? EOUT(cf_short) : OUTS(cf_short))
			{
				cf_short -= dy;
				--xe;
			}
		}
		else
		{
			xe += (int)xstep;
			xs += (int)xstep_long;

			if (EOUT(cf_short))
			{
				cf_short += dy;
				++xe;
			}

			if (dsp_off ? EOUT(cf_long) : OUTS(cf_long))
			{
				cf_long -= dy_long;
				step_back = true;
				--xs;
			}
		}

		/* Update the color values */
		if (flags & VTX_FLAG_SHAD)
		{
			if (scan == LR)
			{
				r += step_back ? (sr->slope + sr->dDx) : sr->slope;
				g += step_back ? (sg->slope + sg->dDx) : sg->slope;
				b += step_back ? (sb->slope + sb->dDx) : sb->slope;
				a += step_back ? (sa->slope + sa->dDx) : sa->slope;
			}
			else
			{
				r += step_back ? (sr->slope - sr->dDx) : sr->slope;
				g += step_back ? (sg->slope - sg->dDx) : sg->slope;
				b += step_back ? (sb->slope - sb->dDx) : sb->slope;
				a += step_back ? (sa->slope - sa->dDx) : sa->slope;
			}

			#pragma message ("Colors should be clamped to 0x7ffff")
			FCLAMP(r);
			FCLAMP(g);
			FCLAMP(b);
			FCLAMP(a);
		}

		if (flags & VTX_FLAG_TEXT)
		{
			if (scan == LR)
			{
				uw += step_back ? (suw->slope + suw->dDx) : suw->slope;
				vw += step_back ? (svw->slope + svw->dDx) : svw->slope;
			}
			else
			{
				uw += step_back ? (suw->slope - suw->dDx) : suw->slope;
				vw += step_back ? (svw->slope - svw->dDx) : svw->slope;
			}
		}

		if (flags & VTX_FLAG_PRSP)
		{
			if (scan == LR)
				w += step_back ? (sw->slope + sw->dDx) : sw->slope;
			else
				w += step_back ? (sw->slope - sw->dDx) : sw->slope;
		}

		/* Update Y */
		int next_y = y + (du_scan ? -1 : 1);

		if (next_y == v2->y)
		{
			cf_short = 0;
			xystep = xystep_1;
			xstep = xstep_1;
			dy = dy_1;
			xe = (int)v2->x;

			if (scan == LR)
				omit_right = true;
		}
		if (next_y == v3->y)
			break;

		y = next_y;
	}
}

static void GetTexel(float uf, float vf, float w, UINT8 &r, UINT8 &g,UINT8 &b, UINT8 &a, UINT8 &ssb)
{
	UINT32 base_addr;
	UINT32 offset;
	UINT32 u;
	UINT32 v;
	UINT32 u_mask = (TM.UVmask >> 16);
	UINT32 v_mask = TM.UVmask & 0x3ff;
	UINT32 u_max = (TM.UVmax >> 16) & 0x3ff;
	UINT32 v_max = TM.UVmax & 0x3ff;

	/* TODO: Increment for mipmapping (see p156) */

	/* Round and mask the co-ordinates */
	u = (int)uf & u_mask;
	v = (int)vf & v_mask;

#pragma message ("Currently masking UVmax with UVmask")
	u_max &= u_mask;
	v_max &= v_mask;

	/* Clamp */
	u = (u > u_max) ? u_max : ((u < 0) ? 0: u);
	v = (v > v_max) ? v_max : ((v < 0) ? 0: v);

	/* TODO: LOD Calculation required */
	base_addr = TM.TxtLODBase0;

	/* Calculate address offset. TODO: LOD */
	offset = (v * (u_max + 1) + u);

	/* TODO: Tdepth */


	if (TM.TxtExpType & TEXTYPE_LITERAL_MASK)
	{
		/* Texel address */
		UINT32 texel = READ32_BE(TRAM, base_addr + offset);
		UINT32 TxtType = TM.TxtExpType;

		if (TxtType & TEXTYPE_COLORINDEX_ON_MASK)
		{
			UINT32 c_depth = (TxtType & TEXTYPE_CDEPTH_MASK) >> TEXTYPE_CDEPTH_SHIFT;
			UINT32 mask = (1 << c_depth) - 1;
			r = texel & mask;
			texel >>= c_depth;
			g = texel & mask;
			texel >>= c_depth;
			b = texel & mask;
			texel >>= c_depth;
			
		}
		if (TxtType & TEXTYPE_ALPHAON_MASK)
		{
			UINT32 a_depth = (TxtType & TEXTYPE_ADEPTH_MASK) >> TEXTYPE_ADEPTH_SHIFT;
			UINT32 mask = (1 << a_depth) - 1;
			a = texel & a_depth;

			if (a_depth == 4)
				a = (a << 4) | a;
			else
				a = (a << 1) | (a >> 6);
		}
		if (TxtType & TEXTYPE_SSBON_MASK)
		{
			ssb = texel & 1;
		}
	}
	else
	{
		/* Indexed texture format */
		UINT32 TxtType = TM.TxtExpType;
		int c_on = TxtType & TEXTYPE_COLORINDEX_ON_MASK;
		int a_on = TxtType & TEXTYPE_ALPHAON_MASK;
		int ssb_on = TxtType & TEXTYPE_SSBON_MASK;
		UINT32 bits = 0;
		int ssbtex = 0;

		UINT32 c_val = 0;

		if (c_on)
			bits += (TxtType & TEXTYPE_CDEPTH_MASK) >> TEXTYPE_CDEPTH_SHIFT;
		if (a_on)
			bits += (TxtType & TEXTYPE_ADEPTH_MASK) >> TEXTYPE_ADEPTH_SHIFT;
		if (ssb_on)
			bits += 1;

		/* Texel address */
		UINT32 t_addr = (base_addr + offset);
		UINT32 b_addr = t_addr * bits;
		UINT32 t_mask = (1 << bits) - 1;

		UINT32 texel = READ32_BE(TRAM, (b_addr >> 3) & ~3);
		texel = texel >> (32 - (bits + (b_addr & 31)));
		texel &= t_mask;

		if (c_on)
		{
			UINT32 c_depth = (TxtType & TEXTYPE_CDEPTH_MASK) >> TEXTYPE_CDEPTH_SHIFT;
			UINT32 pipidx = texel & ((1 << c_depth) - 1);

			c_val = PIPRAM[((TM.TxtPIPCntl & TXTPIPCNTL_IDXOFFSET_MASK) + pipidx) & 0xff];
		}

		if (a_on)
		{
			LOGERROR("Alpha channel in this texture\n");
		}

		if (ssb_on)
		{
			LOGERROR("SSB in this texture\n");
		}

		/* SSB */
		switch ((TM.TxtPIPCntl & TXTPIPCNTL_SSBSEL_MASK) >> TXTPIPCNTL_SSBSEL_SHIFT)
		{
			case PIPSEL_CONSTANT:
			{
				if (TM.TxtExpType & TEXTYPE_LITERAL_MASK)
					ssb = ssbtex;
				else
					ssb = (c_val >> 31) & 1;

				UINT32 cnst = ssb ? TM.TxtSRCConst1 : TM.TxtSRCConst0;
				ssb = (cnst >> 31) & 1;
				break;
			}
			case PIPSEL_TRAM:
			{
				ssb = ssbtex;
				break;
			}
			case PIPSEL_PIP:
			{
				ssb = (c_val >> 31) & 1;
				break;
			}
		}

		switch ((TM.TxtPIPCntl & TXTPIPCNTL_COLORSEL_MASK) >> TXTPIPCNTL_COLORSEL_SHIFT)
		{
			case PIPSEL_CONSTANT:
			{
				UINT32 cnst = ssb ? TM.TxtSRCConst1 : TM.TxtSRCConst0;
				r = (cnst >> 16) & 0xff;
				g = (cnst >>  8) & 0xff;
				b = (cnst >>  0) & 0xff;
				break;
			}
			case PIPSEL_TRAM:
			{
				LOGERROR("PIPSEL_TRAM NOT IMPLEMENTED");
				break;
			}
			case PIPSEL_PIP:
			{
				r = (c_val >> 16) & 0xff;
				g = (c_val >>  8) & 0xff;
				b = (c_val >>  0) & 0xff;
				break;
			}
		}

		switch ((TM.TxtPIPCntl & TXTPIPCNTL_ALPHASEL_MASK) >> TXTPIPCNTL_ALPHASEL_SHIFT)
		{
			case PIPSEL_CONSTANT:
			{
				UINT32 cnst = ssb ? TM.TxtSRCConst1 : TM.TxtSRCConst0;
				a = (cnst >> 24) & 0x7f;
				break;
			}
			case PIPSEL_TRAM:
			{
				LOGERROR("ALPHA SOURCE FROM TRAM!\n");
				break;
			}
			case PIPSEL_PIP:
			{
				a = (c_val >> 24) & 0x7f;
				break;
			}
		}

		/* Expand alpha to 8-bits */
		a = (a << 1) | (a >> 6);
	}
}

static inline UINT32 GetSrcAddr(int x, int y)
{
	int xoff = x + ((DB.SrcOffset & SRCOFFS_XOFFS_MASK) >> SRCOFFS_XOFFS_SHIFT);
	int yoff = y + ((DB.SrcOffset & SRCOFFS_YOFFS_MASK) >> SRCOFFS_YOFFS_SHIFT);

	if (DB.SrcCntl & SRCCNTL_32BPP_MASK)
		return DB.SrcBaseAddr + (yoff * DB.SrcXStride + xoff) * 4;
	else
		return DB.SrcBaseAddr + (yoff * DB.SrcXStride + xoff) * 2;
}

static inline UINT8 Lerp(UINT8 a, UINT8 b, UINT8 t)
{
	if (t == 255)
		return b;
	if (t == 0)
		return a;

	return (((255 - t) * a) >> 8) + ((t * b) >> 8);
}

static inline UINT8 Multiply(UINT8 a, UINT8 b)
{
	if (b == 255)
		return a;
	if (a == 255)
		return b;
	else
		return (a * b) >> 8;
}

static void SelectLERP( int sel,
						UINT8 ri, UINT8 gi, UINT8 bi, UINT8 ai,
						UINT8 rtex, UINT8 gtex, UINT8 btex, UINT8 atex,
						UINT8 &ar, UINT8 &ag, UINT8 &ab )
{
	switch (sel)
	{
		case TXTTABCNTL_LERP_AITER:
		{
			ar = ai;
			ag = ai;
			ab = ai;
			break;
		}
		case TXTTABCNTL_LERP_CITER:
		{
			ar = ri;
			ag = gi;
			ab = bi;
			break;
		}
		case TXTTABCNTL_LERP_AT:
		{
			ar = atex;
			ag = atex;
			ab = atex;
			break;
		}
		case TXTTABCNTL_LERP_CT:
		{
			ar = rtex;
			ag = gtex;
			ab = btex;
			break;
		}
		case TXTTABCNTL_LERP_ACONST:
		{
			UINT32 cnst = ssbti ? TM.TxtSRCConst3 : TM.TxtSRCConst2;
			UINT8 aval = (cnst >> 24) & 0x7f;

			/* TODO: Is this correct? */
			aval = (aval << 1) | (aval >> 6);

			ar = aval;
			ag = aval;
			ab = aval;
			break;
		}
		case TXTTABCNTL_LERP_CCONST:
		{
			UINT32 cnst = ssbti ? TM.TxtSRCConst3 : TM.TxtSRCConst2;

			ar = (cnst >> 24) & 0xff;
			ag = (cnst >> 16) & 0xff;
			ab = (cnst >>  0) & 0xff;
			break;
		}
		default:
		{
			ar = 0;
			ag = 0;
			ab = 0;
			break;
		}
	}
}

static UINT8 Bilinear(UINT8 v[4], float fracu, float fracv)
{
	float tmp =
		v[0] * ((1 - fracu) * (1 - fracv)) +
		v[1] * ((1 - fracu) * fracv) +
		v[2] * (fracu * (1 - fracv)) +
		v[3] * (fracu * fracv);

	return (UINT8)tmp;
}

static void GetTextureBlendPixel(float u, float v, float rw, int flags,
								 UINT8 ri, UINT8 gi, UINT8 bi, UINT8 ai)
{
	UINT8 rtex = 0, gtex = 0, btex = 0, atex = 0;
	UINT8 rbl, gbl, bbl, abl;

	if ((flags & VTX_FLAG_TEXT) && (TM.TxtAddrCntl & TXTADDRCNTL_LOOKUPEN_MASK))
	{
		if (flags & VTX_FLAG_PRSP)
		{
			UINT32 unew = (UINT32)(u * (1 << 13));
			UINT32 vnew = (UINT32)(v * (1 << 13));
			UINT32 rnew = (UINT32)(rw * (1 << 13));

			u = (float)unew / (float)rnew;
			v = (float)vnew / (float)rnew;

			//u = (u + 0.000001f) / rw;
			//v = (v + 0.000001f) / rw;
		}

		switch ((TM.TxtAddrCntl & TXTADDRCNTL_R12FLTSEL_MASK) >> TXTADDRCNTL_R12FLTSEL_SHIFT)
		{
			case TXTADDRCNTL_FILTER_POINT:
			{
				GetTexel(u, v, rw, rtex, gtex, btex, atex, ssbti);
				break;
			}
			case TXTADDRCNTL_FILTER_BILINEAR:
#if 0
			{
				UINT8 r[4], g[4], b[4], a[4];
				GetTexel(u + 1, v + 1, rw, r[3], g[3], b[3], a[3], ssbti);
				GetTexel(u + 1, v + 0, rw, r[2], g[2], b[2], a[2], ssbti);
				GetTexel(u + 0, v + 1, rw, r[1], g[1], b[1], a[1], ssbti);
				GetTexel(u + 0, v + 0, rw, r[0], g[0], b[0], a[0], ssbti);

				float intpart;
				float fracu = modf(u, &intpart);
				float fracv = modf(v, &intpart);

				UINT32 fru = (UINT32)(fracu * (1 << 4));
				UINT32 frv = (UINT32)(fracv * (1 << 4));

				fracu = (float)fru / (1 << 4);
				fracv = (float)frv / (1 << 4);

				rtex = Bilinear(r, fracu, fracv);
				gtex = Bilinear(g, fracu, fracv);
				btex = Bilinear(b, fracu, fracv);
				atex = Bilinear(a, fracu, fracv);
				break;
			}
#endif
			default:
			{
//				LOGERROR("Unsupported filtering mode!\n");
				GetTexel(u, v, rw, rtex, gtex, btex, atex, ssbti);
				break;
			}
		}
	}

	/* Now blend */
	{
		UINT8 ar, ab, ag, aa;
		UINT8 br, bb, bg, ba;
		UINT8 tr, tb, tg;

		SelectLERP( (TM.TxtTABCntl & TXTTABCNTL_CASEL_MASK) >> TXTTABCNTL_CASEL_SHIFT,
					ri, gi, bi, ai,
					rtex, gtex, btex, atex,
					ar, ag, ab );

		SelectLERP( (TM.TxtTABCntl & TXTTABCNTL_CBSEL_MASK) >> TXTTABCNTL_CBSEL_SHIFT,
					ri, gi, bi, ai,
					rtex, gtex, btex, atex,
					br, bg, bb );

		SelectLERP( (TM.TxtTABCntl & TXTTABCNTL_CTSEL_MASK) >> TXTTABCNTL_CTSEL_SHIFT,
					ri, gi, bi, ai,
					rtex, gtex, btex, atex,
					tr, tg, tb );
		
		if (((TM.TxtTABCntl & TXTTABCNTL_BLENDOP_MASK) >> TXTTABCNTL_BLENDOP_SHIFT) == TXTTABCNTL_BLENDOP_LERP)
		{
			rbl = Lerp(ar, br, tr);
			gbl = Lerp(ag, bg, tg);
			bbl = Lerp(ab, bb, tb);
		}
		else
		{
			/* Alpha is multiply only */
			SelectLERP( (TM.TxtTABCntl & TXTTABCNTL_AASEL_MASK) >> TXTTABCNTL_AASEL_SHIFT,
						ai, ai, ai, ai,
						atex, atex, atex, atex,
						aa, aa, aa );

			SelectLERP( (TM.TxtTABCntl & TXTTABCNTL_ABSEL_MASK) >> TXTTABCNTL_ABSEL_SHIFT,
						ai, ai, ai, ai,
						atex, atex, atex, atex,
						ba, ba, ba );

			rbl = Multiply(ar, br);
			gbl = Multiply(ag, bg);
			bbl = Multiply(ab, bb);
			abl = Multiply(aa, ba);
		}
	}

	/* Now select the output */
	switch ((TM.TxtTABCntl & TXTTABCNTL_COSEL_MASK) >> TXTTABCNTL_COSEL_SHIFT)
	{
		case TXTTABCNTL_COSEL_CITER:
		{
			rti = ri;
			gti = gi;
			bti = bi;
			break;
		}
		case TXTTABCNTL_COSEL_CT:
		{
			rti = rtex;
			gti = gtex;
			bti = btex;
			break;
		}
		case TXTTABCNTL_COSEL_BLENDOUTPUT:
		{
			rti = rbl;
			gti = gbl;
			bti = bbl;
			break;
		}
		case TXTTABCNTL_COSEL_RESERVED:
		{
			rti = gti = bti = 0;
			break;
		}
	}

	switch ((TM.TxtTABCntl & TXTTABCNTL_AOSEL_MASK) >> TXTTABCNTL_AOSEL_SHIFT)
	{
		case TXTTABCNTL_AOSEL_AITER:
		{
			ati = ai;
			break;
		}
		case TXTTABCNTL_AOSEL_AT:
		{
			ati = atex;
			break;
		}
		case TXTTABCNTL_AOSEL_BLENDOUTPUT:
		{
			ati = abl;
			break;
		}
		case TXTTABCNTL_AOSEL_RESERVED:
		{
			ati = 0;
			break;
		}
	}
}

static void SelTexPixel(UINT8 r, UINT8 g, UINT8 b, UINT8 a, UINT8 ssb_in)
{
	int cntl;

	if (a == 0)
		cntl = (DB.SrcAlphaCntl >> 4) & 3;
	else if (asrc == 255)
		cntl = DB.SrcAlphaCntl & 3;
	else
		cntl = (DB.SrcAlphaCntl >> 2) & 3;

	switch (cntl)
	{
		case TALPHA_ZERO: at = 0;	 break;
		case TALPHA_ATI:  at = a;	break;
		case TALPHA_ONE:  at = 255;  break;
	}

	/* Why bother with this? */
	ssb = ssb_in & 1;

	switch ((DB.TxtMultCntl & TXTMULTCNTL_TXTINSEL_MASK) >> TXTMULTCNTL_TXTINSEL_SHIFT)
	{
		case TXTR_CTI_SELECT:
		{
			rt = r;
			gt = g;
			bt = b;
			break;
		}
		case TXTR_CONST_SELECT:
		{
			UINT32 cnst = DB.ConstIn;
			rt = (cnst >> 16) & 0xff;
			gt = (cnst >> 8) & 0xff;
			bt = (cnst >> 0) & 0xff;
			break;
		}
		case TXTR_COMP_SELECT:
		{
			rt = ~rsrc;
			gt = ~gsrc;
			bt = ~bsrc;
			break;
		}
		case TXTR_ALPHATXT_SELECT:
		{
			rt = at;
			gt = at;
			bt = at;
			break;
		}
	}

	if (DB.TxtMultCntl & TXTMULTCNTL_TXTRJUST_MASK)
	{
		rt >>= 3;
		gt >>= 3;
		bt >>= 3;
	}
}


void SelSrcPixel(int x, int y, UINT8 red, UINT8 green, UINT8 blue)
{
	if ((DB.UserGenControl & USERGENCTL_SRCINEN_MASK)
		&& (DB.UserGenControl & USERGENCTL_BLENDEN_MASK)
		&& !(GC.TEMasterMode & TEMASTER_MODE_DBLEND_MASK))
	{
		if (DB.SrcCntl & SRCCNTL_32BPP_MASK)
		{
			UINT32 srcval = READ32_BE(RAM, GetSrcAddr(x, y) & RAM_SIZE_MASK);

			asrc = ((srcval >> 24) & 0x7f) << 1;
			rsrc = (srcval >> 16) & 0xff;
			gsrc = (srcval >>  8) & 0xff;
			bsrc = (srcval >>  0) & 0xff;

			if (DB.SrcCntl & SRCCNTL_MSBREP_MASK)
				asrc |= (asrc >> 6) & 1;

			dsbsrc = (srcval >> 31) & 1;
		}
		else
		{
			UINT16 srcval = READ16_BE(RAM, GetSrcAddr(x, y) & RAM_SIZE_MASK);

			rsrc = (srcval >> 10) & 0x1f;
			gsrc = (srcval >> 5) & 0x1f;
			bsrc = srcval & 0x1f;
			asrc = 0;

			dsbsrc = srcval & 0x8000;
			rsrc <<= 3;
			gsrc <<= 3;
			bsrc <<= 3;

			if (DB.SrcCntl & SRCCNTL_MSBREP_MASK)
			{
				rsrc |= (rsrc >> 5);
				gsrc |= (gsrc >> 5);
				bsrc |= (bsrc >> 5);
			}
		}
	}
	else
	{
		/* Source input disabled */
		dsbsrc = 0;
		asrc = 0;
		rsrc = 0;
		gsrc = 0;
		bsrc = 0;
	}

	switch ((DB.SrcMultCntl & SRCMULTCNTL_SRCINSEL_MASK) >> SRCMULTCNTL_SRCINSEL_SHIFT)
	{
		case SRC_SRC_SELECT:
		{
			rs = rsrc;
			gs = gsrc;
			bs = bsrc;
			break;
		}
		case SRC_CONST_SELECT:
		{
			UINT32 cnst = DB.ConstIn;
			rs = (cnst >> 16) & 0xff;
			gs = (cnst >> 8) & 0xff;
			bs = (cnst >> 0) & 0xff;
			break;
		}
		case SRC_COMP_SELECT:
		{
			rs = ~red;
			gs = ~green;
			bs = ~blue;
			break;
		}
		case SRC_ALPHASRC_SELECT:
		{
			rs = asrc;
			gs = asrc;
			bs = asrc;
			break;
		}
	}

	if (DB.SrcMultCntl & SRCMULTCNTL_SRCRJUST_MASK)
	{
		rs >>= 3;
		gs >>= 3;
		bs >>= 3;
	}

	as = asrc;
	dsbs = dsbsrc;
}

UINT8 ALUCalc(UINT16 a, UINT16 b)
{	
	UINT16 cinv;
	UINT16 c = 0;
	int blendout;
	int result;
	int carry = 0;
	int borrow = 0;
	int cntl = (DB.ALUCntl & ALUCNTL_ALUSEL_MASK) >> ALUCNTL_ALUSEL_SHIFT;

	// p271

	/* ALU */
	if ((cntl & 8)  == 0)
	{
		if (!(cntl & 4))
		{
			result = a + b;
		}
		else if (cntl & 2)
		{
			result = b - a;
			if (result < 0)
				borrow = 1;
		}
		else
		{
			result = a - b;
			if (result < 0)
				borrow = 1;
		}
	}
	/* Boolean */
	else
	{
		cinv = 0;
		int i, j;

		for (i = 0; i < 8; ++i)
		{
			j = (a & 1) *2 + (b & 1);

			switch (j)
			{
				case 0: cinv |= (cntl & 1);			break;
				case 1: cinv |= (cntl & 2) && 1;	break;
				case 2: cinv |= (cntl & 4) && 1;	break;
				case 3: cinv |= (cntl & 8) && 1;	break;
			}

			cinv <<= 1;
			a >>= 1;
			b >>= 1;
		}
		cinv >>= 1;

		for (i = 0; i < 8; ++i)
		{
			c |= cinv & 1;
			c <<= 1;
			cinv >>= 1;
		}

		c >>= 1;
		result = (int)c;
	}

	result = (result & 0x1ff);

	switch ((DB.ALUCntl & ALUCNTL_FINALDIVIDE_MASK) >> ALUCNTL_FINALDIVIDE_SHIFT)
	{
		case 1:  blendout = result << 1;	break;
		case 2:  blendout = result << 2;	break;
		case 3:  blendout = result << 3;	break;
		case 7:  blendout = result >> 1;	break;
		case 6:  blendout = result >> 2;	break;
		case 5:  blendout = result >> 3;	break;
		default: blendout = result;			break;
	}

	if (blendout > 255)
		carry = 1;

	int alugel;
#pragma message("Fix ALUGEL")
	if (borrow == 1)
		alugel = 1;
	else if (blendout == 0)
		alugel = 2;
	else
		alugel = 4;

	if ((cntl & 8)  == 0)
	{
		/* Clamp? */
		if (!(cntl & 1))
		{
			blendout = carry ? 255 : blendout;
			blendout = borrow ? 0 : blendout;
		}
		else
		{
			if (cntl & 2)
			{
				blendout = carry ? 255 : blendout;
				blendout = borrow ? 0 : blendout;
			}
		}
	}

	return blendout & 0xff;
}

inline UINT8 Dither(UINT8 in, UINT8 dithval)
{
	INT32 res;
	INT32 sgn_val;

	if (dithval & 8)
		sgn_val = -8 + (dithval & 7);
	else
		sgn_val = dithval;

	res = (INT32)in + sgn_val;

	if (res > 255)
		res = 255;
	else if (res < 0)
		res = 0;

	return (UINT8)res;
}

UINT8 SrcCoef(UINT8 cti, UINT8 dm2const0, UINT8 dm2const1)
{
	int sel;
	UINT8 cnst, coef;

	switch ((DB.SrcMultCntl & SRCMULTCNTL_SRCCONSTCNTL_MASK) >> SRCMULTCNTL_SRCCONSTCNTL_SHIFT)
	{
		case 0: sel = ssb;	break;
		case 1: sel = dsbs;	break;
	}

	cnst = sel ? dm2const1 : dm2const0;

	switch ((DB.SrcMultCntl & SRCMULTCNTL_SRCCOEFSEL_MASK) >> SRCMULTCNTL_SRCCOEFSEL_SHIFT)
	{
		case SRCCOEF_ATI_SELECT:	coef = at;		break;
		case SRCCOEF_ASRC_SELECT:	coef = as;		break;
		case SRCCOEF_CONST_SELECT:	coef = cnst;	break;
		case SRCCOEF_CTI_SELECT:	coef = cti;		break;
	}

	if (DB.SrcMultCntl & SRCMULTCNTL_SRCCOEFCMP_MASK)
		return ~coef;
	else
		return coef;
}

UINT8 TxtCoef(UINT8 cs, UINT8 dm1const0, UINT8 dm1const1)
{
	int sel;
	UINT8 cnst, coef;

	int cntl = ((DB.TxtMultCntl & TXTMULTCNTL_TXTCONSTCNTL_MASK) >> TXTMULTCNTL_TXTCONSTCNTL_SHIFT);

	if (cntl == 0)
		sel = ssb;
	else if (cntl == 1)
		sel = dsbs;

	cnst = sel ? dm1const1 : dm1const0;

	switch ((DB.TxtMultCntl & TXTMULTCNTL_TXTCOEFSEL_MASK) >> TXTMULTCNTL_TXTCOEFSEL_SHIFT)
	{
		case TXCOEF_ATI_SELECT:  coef = at;		break;
		case TXCOEF_ASRC_SELECT: coef = as;		break;
		case TXCOEF_CONST_SELECT:coef = cnst;	break;
		case TXCOEF_CSRC_SELECT: coef = cs;		break;
	}

	if (DB.TxtMultCntl & TXTMULTCNTL_TXTCOEFCMP_MASK)
		return ~coef;
	else
		return coef;
}

void SelectAlphaDSB()
{
	if ((DB.UserGenControl & USERGENCTL_BLENDEN_MASK) && !(GC.TEMasterMode & TEMASTER_MODE_DBLEND_MASK))
	{
		int sel;
		UINT8 aconst;
		
		switch ((DB.DestAlphaCntl & DSTACNTL_ADESTCONSTCNTL_MASK) >> DSTACNTL_ADESTCONSTCNTL_SHIFT)
		{
			case 0: sel = ssb;		break;
			case 1: sel = dsbs;		break;
		}

		if (sel)
			aconst = (DB.DestAlphaConst >> DESTALPHACONST_CONST1_SHIFT) & DESTALPHACONST_CONST1_MASK;
		else
			aconst = (DB.DestAlphaConst >> DESTALPHACONST_CONST0_SHIFT) & DESTALPHACONST_CONST0_MASK;

		switch ((DB.DestAlphaCntl & DSTACNTL_ADESTSEL_MASK) >> DSTACNTL_ADESTSEL_SHIFT)
		{
			case 0: ad = at;		break;
			case 1: ad = aconst;	break;
			case 2: ad = as;		break;
			case 3: ad = rblend;	break;
		}

		switch ((DB.SSBDSBCntl & SSBDSBCNTL_DSBSEL_MASK) >> SSBDSBCNTL_DSBSEL_SHIFT)
		{
			case 0: dsb = ssb;		break;
			case 1: dsb = (DB.SSBDSBCntl & SSBDSBCNTL_DSBCONST_MASK) && 1;	break;
			case 2: dsb = dsbs;		break;
		}
	}
	else
	{
		ad = at;
		dsb = ssb;
	}
}



UINT8 ColorBlend(UINT8 ct, UINT8 cti, UINT8 cs, UINT8 csrc,
				 UINT8 dm10, UINT8 dm11,
				 UINT8 dm20, UINT8 dm21)
{
	UINT8 tcoef, scoef;
	UINT16 tm, sm;

	tcoef = TxtCoef(csrc, dm10, dm11);
	scoef = SrcCoef(cti, dm20, dm21);

	tm = (tcoef == 255) ? ct : ((ct == 255) ? tcoef : ((tcoef * ct) >> 8));
	sm = (scoef == 255) ? cs : ((cs == 255) ? scoef : ((scoef * cs) >> 8));

	return ALUCalc(tm, sm);
}

void RGBPath()
{
	UINT8 dm10, dm11, dm20, dm21;
	UINT32 txtcnst0 = DB.TxtCoefConst0;
	UINT32 txtcnst1 = DB.TxtCoefConst1;
	UINT32 srccnst0 = DB.SrcCoefConst0;
	UINT32 srccnst1 = DB.SrcCoefConst1;

	if ((DB.UserGenControl & USERGENCTL_BLENDEN_MASK) && !(GC.TEMasterMode & TEMASTER_MODE_DBLEND_MASK))
	{
		/* Blue */
		dm10 = txtcnst0 & 0xff; txtcnst0 >>= 8;
		dm11 = txtcnst1 & 0xff; txtcnst1 >>= 8;
		dm20 = srccnst0 & 0xff; srccnst0 >>= 8;
		dm21 = srccnst1 & 0xff; srccnst1 >>= 8;
		bblend = ColorBlend(bt, bti, bs, bsrc, dm10, dm11, dm20, dm21);

		// TODO: ALURGEL

		/* Green */
		dm10 = txtcnst0 & 0xff; txtcnst0 >>= 8;
		dm11 = txtcnst1 & 0xff; txtcnst1 >>= 8;
		dm20 = srccnst0 & 0xff; srccnst0 >>= 8;
		dm21 = srccnst1 & 0xff; srccnst1 >>= 8;
		gblend = ColorBlend(gt, gti, gs, gsrc, dm10, dm11, dm20, dm21);

		/* Red */
		dm10 = txtcnst0 & 0xff; txtcnst0 >>= 8;
		dm11 = txtcnst1 & 0xff; txtcnst1 >>= 8;
		dm20 = srccnst0 & 0xff; srccnst0 >>= 8;
		dm21 = srccnst1 & 0xff; srccnst1 >>= 8;
		rblend = ColorBlend(rt, rti, rs, rsrc, dm10, dm11, dm20, dm21);
	}
	else
	{
		rblend = rti;
		gblend = gti;
		bblend = bti;
	}

	/* Dithering */
	if (DB.UserGenControl & USERGENCTL_DITHEREN_MASK)
	{
		UINT32 dith_mtx = dith_y & 2 ? DB.DitherMatB : DB.DitherMatA;
		int idx = 7 ^ (((dith_y & 1) << 2) | dith_x);
		UINT8 val = (dith_mtx >> (idx * 4)) & 0xf;

		rd = Dither(rblend, val);
		gd = Dither(gblend, val);
		bd = Dither(bblend, val);
	}
	else
	{
		rd = rblend;
		gd = gblend;
		bd = bblend;
	}

}

void Discard(UINT8 s, UINT8 r, UINT8 g, UINT8 b)
{
	// PAGE 317
	dis = 0;

	if ((DB.DiscardControl & DISCARDCTL_SSBDISEN_MASK) && (s == 0))
		ssbdis = 1;
	else
		ssbdis = 0;

	if ((DB.DiscardControl & DISCARDCTL_ADISEN_MASK) && (at == 0))
		adis = 1;
	else
		adis = 0;

	if ((DB.DiscardControl & DISCARDCTL_RGBDISEN_MASK) && !r && !g && !b)
		rgbdis = 1;
	else
		rgbdis = 0;

	if (fbclipdis || winclipdis || zclipdis || ssbdis || adis || rgbdis)
		dis = 1;
	else
		dis = 0;
	return;
}

void ProcessSpan(int y, int xs, int xe,
				 float r, float g, float b, float a,
				 float rddx, float gddx, float bddx, float addx,
				 float uw, float vw, float rw,
				 float uwddx, float vwddx, float wddx,
				 enum _scan_ scan, bool omit_right,
				 int flags)
{
	int xinc = (scan == LR) ? 1 : -1;
	xe = (scan == LR) ? xe + 1 : xe - 1;

	if (omit_right)
	{
		if (scan == LR)
			xe -= 1;
		else
			xs -= 1;
	}

	dith_y = y % 4;

	while (xs != xe)
	{
		if ((y == 0) && (xs == 95))
		{
			// TEST
			dith_x = xs % 4;
			
		}

		dith_x = xs % 4;
		{
			int zbufout;
			int zpixout;
			int zgel;
			int zclip = 0;

			float u = uw;
			float v = vw;

			if (!(flags & VTX_FLAG_SHAD))
				r = g = b = a = 0.0f;

			/* Texture Pixel */
			GetTextureBlendPixel(u, v, rw, flags, (UINT8)(r), (UINT8)(g), (UINT8)(b), (UINT8)(a));

			/* Source input */
			SelSrcPixel(xs, y, rti, gti, bti);

			/* Texture input */
			SelTexPixel(rti, gti, bti, ati, ssbti);

			/* Z-buffer control */
			UINT32 xoff = xs + Z_XOFFSET;
			UINT32 yoff = y + Z_YOFFSET;

			if ((xoff < 0) || (yoff < 0) || (xoff >= Z_XCLIP) || (yoff >= Z_YCLIP))
				zclip = 1;
			else
				zclip = 0;

			if (zclip && DB.DiscardControl & DISCARDCTL_ZCLIPDISEN_SHIFT)
				zclipdis = 1;
			else
				zclipdis = 0;

			UINT16 z_o = READ16_BE(RAM, (DB.ZBaseAddr & RAM_SIZE_MASK) + (yoff * Z_XCLIP + xoff) * 2);
			float z_old = (float)z_o / 65536.0f;
			float zdiff = rw - z_old;

			if (!(GC.TEMasterMode & TEMASTER_MODE_DZBUF_MASK)
				&& (DB.UserGenControl & USERGENCTL_ZBUFEN_MASK) && !zclip)
			{
				if (zdiff > 0)
				{
					zpixout = (DB.ZCntl >> (0)) & 1;
					zbufout = (DB.ZCntl >> (1)) & 1;
					zgel |= 4;
				}
				else if (zdiff == 0)
				{
					zpixout = (DB.ZCntl >> (2)) & 1;
					zbufout = (DB.ZCntl >> (3)) & 1;
					zgel |= 2;
				}
				else
				{
					zpixout = (DB.ZCntl >> (4)) & 1;
					zbufout = (DB.ZCntl >> (5)) & 1;
					zgel |= 1;
				}
			}
			else
			{
				zpixout = 1;
				zbufout = 0;
			}

			/* Frame buffer clip */
			if ((xs >= X_FBCLIP) || (y >= Y_FBCLIP))
				fbclipdis = 1;
			else
				fbclipdis = 0;

			/* Window clip */
			int inside = (xs >= X_WINCLIPMIN) && (xs < X_WINCLIPMAX) && (y >= Y_WINCLIPMIN) && (y < Y_WINCLIPMAX);

			if (((DB.UserGenControl & USERGENCTL_WCLIPINEN_MASK) && inside) || ((DB.UserGenControl & USERGENCTL_WCLIPOUTEN_MASK) && !inside))
				winclipdis = 1;
			else
				winclipdis = 0;

			/* Calculate the final color */
			RGBPath();

			/* Alpha and DSB selection */
			SelectAlphaDSB();

			/* Discard pixels? */
			Discard(ssbti, rti, gti, bti);

			/* Z-buffer update */
			if (!dis)
			{
				if (zbufout && (DB.UserGenControl & USERGENCTL_ZOUTEN_MASK))
					WRITE16_BE(RAM, (DB.ZBaseAddr & RAM_SIZE_MASK) + (yoff * Z_XCLIP + xoff) * 2, (UINT16)(rw * 65536.0f));
			}

			/* Color write-out */
			if (!dis && (DB.SuperGenControl & SUPERGENCTL_DESTOUTEN_MASK))
			{
				if (zpixout)
				{
					UINT32 dest = DB.DestBaseAddr & RAM_SIZE_MASK;
					UINT32 stride = DB.DestXStride;

					if (DB.DestCntl & DESTCNTL_32BPP_MASK)
					{
						UINT32 outval = 0;
						UINT32 color = READ32_BE(RAM, dest + (y * stride + xs) * 4);

						int mask = (DB.UserGenControl & USERGENCTL_DESTOUT_MASK);

						int vb = color & 0xff;
						int vg = (color >> 8) & 0xff;
						int vr = (color >> 16) & 0xff;
						int vdsb_a = (color >> 24) & 0xff;

						outval |= ((mask & 8) ? (dsb << 7) | (ad >> 1): vdsb_a) << 24;
						outval |= ((mask & 4) ? rd : vr) << 16;
						outval |= ((mask & 2) ? gd : vg) << 8;
						outval |= ((mask & 1) ? bd : vb);

						WRITE32_BE(RAM, dest + (y * stride + xs) * 4, outval);
					}
					else
					{
						UINT16 outval = 0;
						UINT16 color = READ16_BE(RAM, dest + (y * stride + xs) * 2);

						int mask = (DB.UserGenControl & USERGENCTL_DESTOUT_MASK);

						int vb = color & 0x1f;
						int vg = (color >> 5) & 0x1f;
						int vr = (color >> 10) & 0x1f;
						int vdsb_a = (color >> 15) & 0x1;

						outval |= ((mask & 8) ? dsb : vdsb_a) << 15;
						outval |= ((mask & 4) ? rd >> 3 : vr) << 10;
						outval |= ((mask & 2) ? gd >> 3 : vg) << 5;
						outval |= ((mask & 1) ? bd >> 3 : vb);

						WRITE16_BE(RAM, dest + (y * stride + xs) * 2, outval);
					}
				}
			}
		}

		/* Update interpolated paramters */
		if (scan == LR)
		{
			xs++;
			r += rddx;
			g += gddx;
			b += bddx;
			a += addx;
			uw += uwddx;
			vw += vwddx;
			rw += wddx;
		}
		else
		{
			xs--;
			r -= rddx;
			g -= gddx;
			b -= bddx;
			a -= addx;
			uw -= uwddx;
			vw -= vwddx;
			rw -= wddx;
		}

		FCLAMP(r);
		FCLAMP(g);
		FCLAMP(b);
		FCLAMP(a);
	}

	// Status update
/*
	fbClipDis;
	winClipDis;
	zClipDis;
	ALURGEL
	ALUGGEL
	ALUBGEL
	ZGEL
	anyrendstat
	*/
}

void TextureLoad()
{
	switch ((TM.TxtLdCntl & TXTLDCNTL_LDMODE_MASK) >> TXTLDCNTL_LDMODE_SHIFT)
	{
		case TXTLDCNTL_LDMODE_TEXLOAD:
		{
			if (TM.TxtLdCntl & TXTLDCNTL_COMPRESSED_MASK)
			{
				/* Source address in bits */
				UINT32 src_addr = (TM.TxtLdSrcBase << 3) + (TM.TxtLdCntl & TXTLDCNTL_SRCBITOFFS_MASK);
				UINT32 dst_addr = (TM.TxtLdDstBase << 3);
				INT32 texels = TM.TxtByteCnt;

				UINT32 dst_type = TM.TxtExpType;
				UINT32 dst_cdepth = (dst_type & 0xf);
				UINT32 dst_adepth = (dst_type >> 4) & 0xf;
				UINT32 dst_bits = 0;

				/* Color present */
				if (dst_type & 0x400)
				{
					/* Literal or indexed */
					if (dst_type & 0x1000)
						dst_bits += dst_cdepth * 3;
					else
						dst_bits += dst_cdepth;
				}

				/* SSB */
				if (dst_type & 0x200)
					dst_bits += 1;
				/* Alpha */
				if (dst_bits & 0x800)
					dst_bits += dst_adepth;

				while (texels > 0)
				{
					UINT32 src_val;
					UINT8 cntl = ReadBits(RAM, src_addr, 8);
					UINT32 type = (cntl >> 6) & 3;
					UINT32 cnst;

					/* Select constant register */
					if (type == 0)
						cnst = TM.TxtSRCConst0;
					else if (type == 1)
						cnst = TM.TxtSRCConst1;
					else if (type == 2)
						cnst = TM.TxtSRCConst2;
					else
						cnst = TM.TxtSRCConst3;

					/* Select pixel type register */
					UINT32 src_type = (type & 2) ? TM.TxtSrcType23 : TM.TxtSrcType01;
					src_type = (src_type >> ((type & 1) << 4)) & 0x1fff;

					UINT32 cnt = (cntl & (src_type & 0x100 ? 0x3f : 0x1f)) + 1;

					UINT32 src_cdepth = (src_type & 0xf);
					UINT32 src_adepth = (src_type >> 4) & 0xf;
					UINT32 src_bits = 0;

					if (src_type & 0x1000)
						src_cdepth *= 3;

					/* Transparent */
					if (src_type & 0x100)
					{
						src_val = cnst;
					}
					else
					{
						src_bits += src_cdepth;

						if (src_type & 0x200)
							src_bits += 1;
						if (src_type & 0x800)
							src_bits += src_adepth;
					}

					// Advance
					src_addr += 8;

					/* String */
					if (!(src_type & 0x100) && (cntl & 0x20))
					{
						for (UINT32 i = 0; i < cnt; ++i)
						{
							UINT32 dst_val = 0;
							UINT32 src_ssb = 0;
							UINT32 src_color  = 0;
							UINT32 src_alpha = 0;
							UINT32 src_val = ReadBits(RAM, src_addr, src_bits);

							/* Colour is present */
							if (src_type & 0x400)
							{
								src_color = src_val & ((1 << src_cdepth) - 1);
								src_val >>= src_cdepth;

								/* Indexed format - add an offset */
								if (!(src_type & 0x1000))
									src_color += cnst & 0xff;
							}

							/* Alpha */
							if (src_type & 0x800)
							{
								/* Constant */
								if (src_type & 0x100)
									src_alpha = (cnst >> 24) & ((1 << src_adepth) - 1);
								else
								{
									src_alpha = src_val & ((1 << src_adepth) - 1);
									src_val >>= src_adepth;
								}
							}

							/* SSB */
							if (src_type & 0x200)
							{
								/* Constant */
								if (src_type & 0x100)
									src_ssb = (cnst >> 31) & 1;
								else
									src_ssb = src_val & 1;
							}

							// Now create the destination value.
							dst_val = src_color
									| (src_alpha << src_cdepth)
									| (src_ssb << (src_cdepth + src_adepth));

							UINT32 oldval = READ32_BE(TRAM, ((dst_addr >> 3) & ~3) & RAM_SIZE_MASK);

							/* Clear out the old value */
							UINT32 shift = (32 - (dst_bits + (dst_addr & 31)));
							oldval &= ~(((1 << dst_bits) - 1) << shift);
							oldval |= dst_val << shift;

							WRITE32_BE(TRAM, (dst_addr >> 3) & ~3, oldval);

							src_addr += src_bits;
							dst_addr += dst_bits;
						}
					}
					else
					{
						UINT32 dst_val;

						if (!(src_type & 0x100))
						{
							// Read one texel
							src_val = ReadBits(RAM, src_addr, src_bits);

							/* Indexed format - add an offset */
							if (!(src_type & 0x1000))
								src_val += cnst & 0xff;
						}

						// HACK
						dst_val = src_val;

						for (UINT32 i = 0; i < cnt; ++i)
						{
							UINT32 oldval = READ32_BE(TRAM, ((dst_addr >> 3) & ~3) & RAM_SIZE_MASK);

							/* Clear out the old value */
							UINT32 shift = (32 - (dst_bits + (dst_addr & 31)));
							oldval &= ~(((1 << dst_bits) - 1) << shift);
							oldval |= dst_val << shift;

							WRITE32_BE(TRAM, dst_addr >> 3, oldval);

							dst_addr += dst_bits;
						}

						// Copy 
						src_addr += src_bits;
					}

					texels -= cnt;
				}
			}
			else
			{

			}

			break;
		}

		case TXTLDCNTL_LDMODE_MMDMA:
		{
			if (TM.TxtCntl & (1 << 2))
			{
				assert(TM.TxtByteCnt <= 0x4000);

#pragma message("Use TxtLdDstBase for MMDMA.")
				memcpy(TRAM, &RAM[TM.TxtLdSrcBase & RAM_SIZE_MASK], TM.TxtByteCnt);

				// DEBUG
				SaveTextureToFile(&RAM[TM.TxtLdSrcBase & RAM_SIZE_MASK], TM.TxtByteCnt, 0);
			}
			else if (TM.TxtCntl & (1 << 3))
			{
//				LOGERROR("PIP\n");
				assert(TM.TxtByteCnt <= 0x400);
				memcpy(PIPRAM, &RAM[TM.TxtLdSrcBase & RAM_SIZE_MASK], TM.TxtByteCnt);

				// DEBUG
				SaveTextureToFile(&RAM[TM.TxtLdSrcBase & RAM_SIZE_MASK], TM.TxtByteCnt, 1);
			}
			else
				LOGERROR("UNKNOWN TXT\n");

			break;
		}
		case TXTLDCNTL_LDMODE_PIPLOAD:
		{
//			LOGERROR("PIP Load\n");
//			LOGERROR("Src:   %.8x\n", TM.TxtLdSrcBase);
//			LOGERROR("Bytes: %x\n", TM.TxtByteCnt);

			assert(TM.TxtByteCnt <= 0x400);
			// TODO: Why is +1 needed?
			//memcpy(PIPRAM, &RAM[TM.TxtLdSrcBase & RAM_SIZE_MASK], TM.TxtByteCnt);

			for (UINT32 i = 0; i < TM.TxtByteCnt / 4; ++i)
			{
				UINT32 val = READ32_BE(RAM, (TM.TxtLdSrcBase & RAM_SIZE_MASK)  + i * 4);
				PIPRAM[i] = val;
			}

			SaveTextureToFile(&RAM[TM.TxtLdSrcBase & RAM_SIZE_MASK], TM.TxtByteCnt, 1);
			break;
		}
		default:
			LOGERROR("Bad Texture Load Type!\n");
	}
}

/*
 *  Process an incoming vertex
 */
void ProcessVertex(VTX &vtx, int flags)
{
	UINT32 vcnt = (SE.VertexState & VERTEX_STATE_VCNT_MASK) >> VERTEX_STATE_VCNT_SHIFT;

	if (flags & VTX_FLAG_NEW)
	{
		vcnt = 0;
		SE.VertexState &= ~VERTEX_STATE_VCNT_MASK;
		SE.VertexState |= vcnt << VERTEX_STATE_VCNT_SHIFT;
	}
	else if (vcnt == 3)
	{
		if ((flags & VTX_FLAG_RM) == 0)
			*v_o = *v_m;

		*v_m = *v_n;
		*v_n = vtx;
	}

	if (vcnt < 3)
	{
			 if (vcnt == 0)	*v_o = vtx;
		else if (vcnt == 1)	*v_m = vtx;
		else if (vcnt == 2)	*v_n = vtx;

		SE.VertexState &= ~VERTEX_STATE_VCNT_MASK;
		SE.VertexState |= ++vcnt << VERTEX_STATE_VCNT_SHIFT;
	}

	if (vcnt == 3)
	{
		*v_a = *v_o;
		*v_b = *v_m;
		*v_c = *v_n;

//		LOGERROR("*** Triangle ***\n");
//		LOGERROR("FL: %x\n", flags);
//		LOGERROR("V1: %f %f  (U: %f V: %f)\n", v_a->x, v_a->y, v_a->uw, v_a->vw);
//		LOGERROR("V2: %f %f  (U: %f V: %f)\n", v_b->x, v_b->y, v_b->uw, v_b->vw);
//		LOGERROR("V3: %f %f  (U: %f V: %f)\n", v_c->x, v_c->y, v_c->uw, v_c->vw);

		/* Draw the triangle */
		ProcessTriangle(flags);
	}
}


enum
{
	RW = 0,
	INVALID = 1,
	RS = 2,
	C = 3,
};

static void WriteReg(UINT32 *reg, UINT32 data, UINT32 access)
{
	switch (access)
	{
		case RW: *reg = data;	break;
		case RS:*reg |= data;	break;
		case C: *reg &= ~data;	break;
		default: assert(0);
	}
}


READ32_HANDLER( tram_r )
{
	return READ32_BE(TRAM, offset * 4);
}

WRITE32_HANDLER( tram_w )
{
	UINT32 old = READ32_BE(TRAM, offset * 4);
	data |= (old & ~mask);
	WRITE32_BE(TRAM, offset * 4, data);
}

static inline UINT32 ReadIRPWithUpdate()
{
	UINT32 data = READ32_BE(RAM, GC.IRP & IP_MASK);
	LogRegWrite(GC.IRP, data, 0); // DEBUG!
	GC.IRP += 4;
	return data;
}

static inline UINT32 ReadIRP()
{
	LogRegWrite(GC.IRP, READ32_BE(RAM, GC.IRP & IP_MASK), 0); // DEBUG!
	return READ32_BE(RAM, GC.IRP & IP_MASK);
}


#define WRITE_INT_FLOAT(f, d)	( *(UINT32*)&(f) = (d) )


void eor()
{
	SetBDAInterrupt(INT_TE_IMINSTR);
}


void ExecuteInstructions(bool single_step)
{
	TEActive = true;

	while ((GC.IRP != GC.IWP) && TEActive == true)
	{
		UINT32 val = ReadIRPWithUpdate();

		switch ((val >> 28) & 0xf)
		{
			case 1:
			{
				UINT32 reg = val & 0xffff;
				INT32 cnt = (val >> 16) & 0xff;

				for (cnt; cnt >= 0; --cnt)
				{
					IRP_next = GC.IRP + 4;
					UINT32 data = ReadIRP();
					RegisterWrite(reg, data);

					GC.IRP = IRP_next;
					reg += 4;

					if (TEActive == false)
						break;
				}
				break;
			}
			case 2:
			{
				VTX vtx;
				UINT32 flags;
				bool new_tri = false;

				/* Vertex count */
				INT32 cnt = (val & 0xffff);

				/* Flags are shared */
				flags = val & 0x1f0000;

				for (cnt; cnt >= 0; --cnt)
				{
					WRITE_INT_FLOAT(vtx.x, ReadIRPWithUpdate());
					WRITE_INT_FLOAT(vtx.y, ReadIRPWithUpdate());

					if (flags & VTX_FLAG_SHAD)
					{
						WRITE_INT_FLOAT(vtx.r, ReadIRPWithUpdate());
						WRITE_INT_FLOAT(vtx.g, ReadIRPWithUpdate());
						WRITE_INT_FLOAT(vtx.b, ReadIRPWithUpdate());
						WRITE_INT_FLOAT(vtx.a, ReadIRPWithUpdate());
					}
					if (flags & VTX_FLAG_PRSP)
					{
						WRITE_INT_FLOAT(vtx.rw, ReadIRPWithUpdate());
					}
					if (flags & VTX_FLAG_TEXT)
					{
						WRITE_INT_FLOAT(vtx.uw, ReadIRPWithUpdate());
						WRITE_INT_FLOAT(vtx.vw, ReadIRPWithUpdate());
					}

					/* Send for procesing */
					ProcessVertex(vtx, flags);
					flags &= ~VTX_FLAG_NEW;
				}
				
				break;
			}
			case 3: LOGERROR("WARNING!!!! 3\n"); break;
			case 4: LOGERROR("WARNING!!!! 4\n"); break;
			default: LOGERROR("BAD!!!");
		}

		if (single_step)
			break;
	}

	if (GC.TEICntl & TEICNTL_STPL_MASK)
	{
		GC.TEICntl &= ~TEICNTL_STPL_MASK;
		if (GC.IRP == GC.IWP)
		{
			TEPaused = false;
			TEStopped = true;
		}
	}

	if (GC.TEICntl & TEICNTL_INT_MASK)
	{
		static int test = 0;
		GC.TEICntl &= ~TEICNTL_INT_MASK;
		GC.IntStat |= (1 << (31 - 22));
//		GC.IntStat |= (1 << (31 - 16));
	//	if (test)
//		Core.CreateTimer(TIME_IN_USEC(100), TIMER_ONESHOT, true, eor);
		SetBDAInterrupt(INT_TE_IMINSTR);
		TEPaused = false;
		TEStopped = true;
	}
}


void WriteTEMasterMode(UINT32 val, UINT32 access)
{
	UINT32 old_val = GC.TEMasterMode;
	WriteReg(&GC.TEMasterMode, val, access);

//	if (!(old_val & 0x1) && (val & 0x1))
//		LOGERROR("TE RESET\n");
}

void WriteTEICntl(UINT32 val, UINT32 access)
{
	UINT32 old_val = GC.TEICntl;
	WriteReg(&GC.TEICntl, val, access);

	if (val & TEICNTL_STRT_MASK)
	{
		TEStopped = false;
		TEPaused = false;
		GC.IRP = GC.TEICntlData;
		ExecuteInstructions(false);
	}
	else if (val & TEICNTL_RSTRT_MASK)
	{
		if (TEActive == false)
		{
			if (TEPaused == false)
			{
				// CHECK ME
				TEPaused = false;
				ExecuteInstructions(false);
			}
		}
	}
	else
		LOGERROR("TEICntl: %x\n", GC.TEICntlData);
}
void WriteTEDCntl(UINT32 val, UINT32 access)
{
	UINT32 old_val = GC.TEDCntl;
	WriteReg(&GC.TEDCntl, val, access);

	if (val & 1)
		TextureLoad();
	if (val & 2)
	{
		IRP_next = GC.TEDCntlData;
//		LOGERROR("***** JUMPA\n");
	}
	if (val & 4)
	{
		IRP_next = 4 + GC.TEDCntlData;
		LOGERROR("***** JMPR\n");
	}
	if (val & 8)
		LOGERROR("***** INT\n");
	if (val & 0x10) // PSE
	{
		TEPaused = true;
		TEActive = false;
	}
	if (val & 0x20)
	{
		// SYNC
	}
}

void WriteIntStat(UINT32 val, UINT32 access)
{
	UINT32 old_val = GC.IntStat;
	WriteReg(&GC.IntStat, val, access);

	// TODO: Dubious
	if ((GC.IntStat & GC.IntEn) == 0)
	{
		ClearBDAInterrupt(INT_TE_IMINSTR);
	}

}

void RegisterWrite(UINT32 offset, UINT32 data)
{
	UINT32 access = (offset >> 11) & 3;
	UINT32 unit = (offset >> 13) & 0x7;
	UINT32 r = (offset & 0x7ff) >> 2;

	if (unit > 4)
		LOGERROR("TE: Invalid write access\n");
//	else
//		LOGERROR("%.8x TE W: %.8x (%s) to %s::%s (%.4x)\n", Core.CPUMan->ActiveCPUGetPC(), data, access_types[access], units[unit], GetReg(unit, r), offset);

	switch (unit)
	{
		case 0:
		{
			switch (r)
			{
				case 0: WriteTEMasterMode(data, access);	break;
				case 3: WriteTEICntl(data, access);			break;
				case 5: WriteTEDCntl(data, access);			break;
				case 9: WriteIntStat(data, access);			break;
				default:
					if (r <= 0x28/4)
						WriteReg(&GC.regs[r], data, access);
			}
			break;
		}
		case 1:
		{
			if (r <= 0x100/4)
				WriteReg(&SE.regs[r], data, access);
			break;
		}
		case 2:
		{
			if (r <= 0x8/4)
				WriteReg(&ES.regs[r], data, access);
			break;
		}
		case 3:
		{
			if (r < 0x400/4)
				PIPRAM[r] = data;
			else if (r <= 0x450/4)
				WriteReg(&TM.regs[r - 0x400 / 4], data, access);
			break;
		}
		case 4:
		{
			if (r <= 0x84/4)
				WriteReg(&DB.regs[r], data, access);
			break;
		}
	}
}


/**************************************
 *
 *  Public Functions
 *
 *************************************/

READ32_HANDLER( te_r )
{
	offset <<= 2;

	UINT32 val = 0;
	UINT32 access = (offset >> 11) & 3;
	UINT32 unit = (offset >> 13) & 0x7;
	UINT32 r = (offset & 0x7ff) >> 2;

	if (unit > 4)
		LOGERROR("TE: Invalid read access\n");
//	else
//		LOGERROR("%.8x TE R (%s) from %s::%s (%.4x)\n", Core.CPUMan->ActiveCPUGetPC(), access_types[access], units[unit], GetReg(unit, r), offset);

	switch (unit)
	{
		case 0:
		{
			if (r <= 0x28/4)
				val = GC.regs[r];
			break;
		}
		case 1:
		{
			if (r <= 0x100/4)
				val = SE.regs[r];
			break;
		}
		case 2:
		{
			if (r <= 0x8/4)
				val = ES.regs[r];
			break;
		}
		case 3:
		{
			if (r < 0x400/4)
				val = PIPRAM[r];
			else if (r <= 0x450/4)
				val = TM.regs[r - 0x400/4];
			break;
		}
		case 4:
		{
			if (r <= 0x84/4)
				val = DB.regs[r];
			break;
		}
	}

	return val;
}

WRITE32_HANDLER( te_w )
{
	offset <<= 2;
	RegisterWrite(offset, data);
}

void InitTriangleEngine()
{
	PIPRAM = new UINT32[256];
	TRAM = new UINT8[16384];

	v_o = new VTX;
	v_m = new VTX;
	v_n = new VTX;

	v_a = new VTX;
	v_b = new VTX;
	v_c = new VTX;
}

// TODO: REMOVE ME?
UINT32 GetTEDest()
{
	return DB.DestBaseAddr;
}

UINT32 GetTEFormat()
{
	return (2 << (int)((DB.DestCntl & DESTCNTL_32BPP_MASK) && 1));
}

UINT32 GetTEWidth()
{
	return (DB.FBClip >> 16) & 0xffff;
}

UINT32 GetTEHeight()
{
	return (DB.FBClip) & 0xffff;
}

UINT32 GetTEStride()
{
	return DB.DestXStride;
}
