// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License version 2 as
// published by the Free Software Foundation.

// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.

#include "gp_internal.h"
#include "main.h"
#include "stringCommons.h"
#include <png.h>

void GP::load_bp() {
	bpload(GP_QUEUE_GET_DWORD);
}

void GP::bpload(DWORD d) {
	BPLOAD b;
	b.dword = d;
	GPDEGUB("GP BP Load 0x%02X, 0x%06X\n", b.reg, b.data);
	if(m.bp_mask != 0xffffff) {
		b.data = (m.bp_reg[b.reg] & ~m.bp_mask) | (b.data & m.bp_mask);
		m.bp_mask = 0xffffff;
		GPDEGUB("Old reg: 0x%06X  Masked write: 0x%06X\n", m.bp_reg[b.reg], b.data);
	}

	(this->*m_arr_bp[b.reg])(b.data);
	m.bp_reg[b.reg] = b.data;
	stat.obp++;
}

#define BP_CHANGED(reg) (data != m.bp_reg[reg])
#define BP_BITR_CHANGED(reg, bit) (getbitr(data, bit) != getbitr(m.bp_reg[reg], bit))
#define BP_BITSR_CHANGED(reg, start, end) \
	(getbitsr(data, start, end) != getbitsr(m.bp_reg[reg], start, end))

void GP::bp_direct(DWORD /*data*/) {
}

void GP::bp_pe_zmode(DWORD data) {
	if(g::gp_wireframe) {
		GPHR(setRS(D3DRS_ZENABLE, D3DZB_FALSE));
		return;
	}
	if(BP_CHANGED(0x40)) {
		if(BP_BITR_CHANGED(0x40, 0)) {
			GPDEGUB("GP Ztest %s\n", abled(getbitr(data, 0)));
			GPHR(m.pd3dDevice->SetRenderState(D3DRS_ZENABLE,
				getbitr(data, 0) ? D3DZB_TRUE : D3DZB_FALSE));
		}
		if(BP_BITR_CHANGED(0x40, 4)) {
			GPDEGUB("GP Zwrite %s\n", abled(getbitr(data, 4)));
			GPHR(m.pd3dDevice->SetRenderState(D3DRS_ZWRITEENABLE,
				getbitr(data, 4) ? TRUE : FALSE));
		}
		if(BP_BITSR_CHANGED(0x40, 3, 1)) {
			GPDEGUB("GP Zfunc: %s\n", gx::comp[getbitsr(data, 3, 1)]);
			GPHR(setRS(D3DRS_ZFUNC, gx::d3d_comp[getbitsr(data, 3, 1)]));
		}
	}
	//if(data != hw->hrh(0x1000))
	//throw hardware_fatal_exception("GP Z mode discrepancy!");
}

void GP::bp_pe_cmode0(DWORD data) {
	BEGIN_FIELDS(data, "BP Cmode0");
	const char *str_blendmode[] = { "none", "blend", "logic", TROGDOR };
	DNAMS(logicop, 15, 12, gx::logicop);
	DNAS(subtract_mode, 11);
	DNAMS(sfactor, 10, 8, gx::src_blendfactor);
	DNAMS(dfactor, 7, 5, gx::dst_blendfactor);
	DNAS(alpha_mask, 4);
	DNAS(color_mask, 3);
	DNAS(dither_enable, 2);
	DNAMS(blendmode, 1, 0, str_blendmode);
	END_FIELDS;

	//dither_enable is ignored
	if(subtract_mode && blendmode != 1)
		throw hardware_fatal_exception("GP Invalid Blend mode");

	GPHR(setRS(D3DRS_COLORWRITEENABLE, (alpha_mask ? D3DCOLORWRITEENABLE_ALPHA : 0) |
		(color_mask ?
		D3DCOLORWRITEENABLE_BLUE | D3DCOLORWRITEENABLE_GREEN | D3DCOLORWRITEENABLE_RED : 0)));

	//vs.blendmode = BM_NORMAL;
	vs_control.lo_noop = false;

	if(g::gp_wireframe) {
		GPHR(setRS(D3DRS_ALPHABLENDENABLE, false));
	} else {
		if(blendmode == 1) {  //standard blend
			GPHR(setRS(D3DRS_ALPHABLENDENABLE, true));
			if(subtract_mode) {
				GPHR(setRS(D3DRS_BLENDOP, D3DBLENDOP_REVSUBTRACT));
				GPHR(setRS(D3DRS_SRCBLEND, D3DBLEND_ONE));
				GPHR(setRS(D3DRS_DESTBLEND, D3DBLEND_ONE));
			} else {
				GPHR(setRS(D3DRS_BLENDOP, D3DBLENDOP_ADD));
				GPHR(setRS(D3DRS_SRCBLEND, gx::src_d3d_blendfactor[sfactor]));
				GPHR(setRS(D3DRS_DESTBLEND, gx::dst_d3d_blendfactor[dfactor]));
			}
		} else if(blendmode != 0) {	//logic blend
			switch(logicop) {
			case gx::LO_XOR:	//color problems
				GPHR(setRS(D3DRS_ALPHABLENDENABLE, true));
				GPHR(setRS(D3DRS_BLENDOP, D3DBLENDOP_ADD));
				GPHR(setRS(D3DRS_SRCBLEND, D3DBLEND_INVDESTCOLOR));
				GPHR(setRS(D3DRS_DESTBLEND, D3DBLEND_INVSRCCOLOR));
				break;
			case gx::LO_COPY:	//works fine
				GPHR(setRS(D3DRS_ALPHABLENDENABLE, false));
				break;
			case gx::LO_SET:	//doesn't work. use g::gp_white instead, but with a local variable to avoid contaminating the user's settings
				GPHR(setRS(D3DRS_ALPHABLENDENABLE, false));
				//vs.blendmode = BM_SET;
				/*GPHR(setRS(D3DRS_ALPHABLENDENABLE, true));
				GPHR(setRS(D3DRS_BLENDOP, D3DBLENDOP_ADD));
				GPHR(setRS(D3DRS_SRCBLEND, D3DBLEND_ONE));
				GPHR(setRS(D3DRS_DESTBLEND, D3DBLEND_ZERO));*/
				break;
			case gx::LO_CLEAR:	//won't work. create m.force_black or something.
				GPHR(setRS(D3DRS_ALPHABLENDENABLE, false));
				//vs.blendmode = BM_CLEAR;
				break;
			case gx::LO_NOOP:	//could work, but this approach is faster
				GPHR(setRS(D3DRS_ALPHABLENDENABLE, false));
				vs_control.lo_noop = true;
				/*GPHR(setRS(D3DRS_ALPHABLENDENABLE, true));
				GPHR(setRS(D3DRS_BLENDOP, D3DBLENDOP_ADD));
				GPHR(setRS(D3DRS_SRCBLEND, D3DBLEND_ZERO));
				GPHR(setRS(D3DRS_DESTBLEND, D3DBLEND_ONE));*/
				break;
			case gx::LO_INVCOPY:  //doesn't work
				GPHR(setRS(D3DRS_ALPHABLENDENABLE, false));
				/*GPHR(setRS(D3DRS_ALPHABLENDENABLE, true));
				GPHR(setRS(D3DRS_BLENDOP, D3DBLENDOP_ADD));
				GPHR(setRS(D3DRS_SRCBLEND, D3DBLEND_INVSRCCOLOR));
				GPHR(setRS(D3DRS_DESTBLEND, D3DBLEND_ZERO));*/
				break;
			case gx::LO_AND:	//seems to works
				GPHR(setRS(D3DRS_ALPHABLENDENABLE, true));
				GPHR(setRS(D3DRS_BLENDOP, D3DBLENDOP_MIN));
				GPHR(setRS(D3DRS_SRCBLEND, D3DBLEND_ONE));
				GPHR(setRS(D3DRS_DESTBLEND, D3DBLEND_ONE));
				break;
			case gx::LO_OR: //works, though colors are slightly off
				GPHR(setRS(D3DRS_ALPHABLENDENABLE, true));
				GPHR(setRS(D3DRS_BLENDOP, D3DBLENDOP_MAX));
				GPHR(setRS(D3DRS_SRCBLEND, D3DBLEND_ONE));
				GPHR(setRS(D3DRS_DESTBLEND, D3DBLEND_ONE));
				break;
			case gx::LO_INV:  //won't work
			case gx::LO_NOR:
			case gx::LO_EQUIV:
			case gx::LO_NAND:
			case gx::LO_REVAND:
			case gx::LO_INVAND:
			case gx::LO_REVOR:
			case gx::LO_INVOR:
				GPHR(setRS(D3DRS_ALPHABLENDENABLE, false));
				break;
			default:
				throw hardware_fatal_exception("GP unknown LogicOp!");
			}
		}
	}

	//if(data != hw->hrh(0x1002))
	//throw hardware_fatal_exception("GP Cmode0 discrepancy!");
}

void GP::bp_pe_done(DWORD data) {
	if(data == 0x000002) {  //GXSetDrawDone();
		GPDEGUB("PE_DONE\n");
		WORD pe_isr = hw->hrh(0x100A);
		hw->hwh(0x100A, pe_isr | 0x8);  //uncertain
		if(pe_isr & 0x2) {
			hw->interrupt.raise(INTEX_PEFINISH, "PE Finish");
		}

		//not all programs do this!
		if(g::timing_mode == g::TM_EXACT_FAST) {
			//m.do_visi_at_nop = true;
			/*if(VII_LOG) {
			DEGUB("PE_DONE VISI\n");
			}
			hw->vi_simple_interrupt();
			hw->vi_si_done = true;*/
		}
	} else
		throw hardware_fatal_exception("GP PE_DONE unemulated data!");
}
void GP::bp_pe_token(DWORD data) {
	GPDEGUB("GP Token 0x%04X\n", (WORD)data);
	//hw->hwh(0x000E, (WORD)data); //is this proper?
	hw->hwh(0x100E, (WORD)data);
}
void GP::bp_pe_token_int(DWORD data) {
	//hw->hwh(0x000E, (WORD)data); //is this proper?
	hw->hwh(0x100E, (WORD)data);
	if(hw->hrh(0x100A) & 0x1) {
		GPDEGUB("Raising PETOKEN...\n");
		hw->interrupt.raise(INTEX_PETOKEN, "PE Token");
	}
}
void GP::bp_efb_topleft(DWORD data) {
	GPDEGUB("EFB topleft: %i,%i\n", getbitsw(data, 22, 31), getbitsw(data, 12, 21));
}
void GP::bp_efb_bottomright(DWORD data) {
	GPDEGUB("EFB width & height: %i,%i\n", getbitsw(data, 22, 31),
		getbitsw(data, 12, 21));
}
void GP::bp_pe_copy_execute(DWORD data) {
	BEGIN_FIELDS(data, "GP EFB Copy");
	const char *str_target_type[] = { "texture", "XFB" };
	DNAS(bit16, 16);  //dunno exactly what these mean
	DNAS(bit15, 15);
	DNASS(target_type, 14, str_target_type);
	DNAM(f2fm, 13, 12);
	DNAS(clear, 11);
	DNAS(unknown_scale, 10);
	DNAS(generate_mipmap, 9);
	DNAM(gamma, 8, 7);
	DNAM(target_format, 6, 3);
	DNAS(clamp1, 1);
	DNAS(clamp0, 0);
	END_FIELDS;

	DWORD target_address = m.bp_reg[0x4B] << 5;

	//everything ignored
	//if(!(clamp0 && clamp1 && //!generate_mipmap &&
	//!unknown_scale && !f2fm))
	//throw hardware_fatal_exception("GP Copy mode unemulated!");

	//MYASSERT(m.in_scene);
	if(!target_type) {  //copy to texture
		stat.texcopies++;
		UINT width = getbitsw(m.bp_reg[0x4A], 22, 31),
			height = getbitsw(m.bp_reg[0x4A], 12, 21);
		DWORD src_x = getbitsw(m.bp_reg[0x49], 22, 31),
			src_y = getbitsw(m.bp_reg[0x49], 12, 21);
		GPDEGUB("x = %u | y = %u | width = %u | height = %u\n", src_x, src_y, width, height);
		D3DFORMAT d3dformat = MY_BACKBUFFERFORMAT;  /*txtf[target_format].format;
																								//there's insufficient caps for flipper-style conversion
																								//GetRenderTargetData might be useful, but slow
																								if(target_format >= 16)
																								throw hardware_fatal_exception("GP Texture format out of range!");
																								if(txf[target_format].pixelsize == INVALID_PS)
																								throw hardware_fatal_exception("GP Texture illegal format!");*/
		if(width > 1024 || height > 1024)
			throw hardware_fatal_exception("GP Texture illegal size!");
		//if((m.bp_reg[0x43] & 7) == 3) {	//z texture copy, and we don't do that krap
		//GPDEGUB("Z texture copy ignored!\n");
		//throw hardware_fatal_exception("GP Z texture copy unemulated!");
		//throw hardware_fatal_exception("GP Unemulated Texture Copy Format!");
		{  //too long, should be functionalized
			GPHR(endScene());

#define DTTT_ARGS(macro) macro(UINT, Width, width) macro(UINT, Height, height)\
	macro(D3DFORMAT, Format, d3dformat)
#define DTTT_DECLARE(type, name, defval) type c##name = defval;
			DTTT_ARGS(DTTT_DECLARE);

			HRESULT res = D3DXCheckTextureRequirements(m.pd3dDevice, &cWidth, &cHeight,
				NULL, D3DUSAGE_RENDERTARGET, &cFormat, D3DPOOL_DEFAULT);
			if(res != D3DERR_NOTAVAILABLE && res != D3D_OK) {
				HWHR(res);
			}

			CTX_HASHMAP::iterator itr = m_ctx_map.find(target_address);
			if(itr == m_ctx_map.end()) {
				if(mem.prw(target_address) == target_address)
					throw hardware_fatal_exception("GP Tex cache error no.6");
				CTEXTURE texture;
				texture.nchanges = 0;
				GPDEGUB("width = %u | height = %u\n", width, height);
				GPHR(checkTextureRequirements(width, height, d3dformat, D3DUSAGE_RENDERTARGET));
				GPHR(m.pd3dDevice->CreateTexture(cWidth, cHeight, 1, D3DUSAGE_RENDERTARGET,
					cFormat, D3DPOOL_DEFAULT, &texture.p, NULL));
				itr = m_ctx_map.insert(CTX_PAIR(target_address, texture)).first;
				GPDEGUB("Texture 0x%08X copy-created\n", target_address);
			} else /*if(GPHR_CONDITION)*/ {
				GPDEGUB("Texture 0x%08X copy-changed\n", target_address);
				D3DSURFACE_DESC desc;
				HWHR(itr->second.p->GetLevelDesc(0, &desc));

				bool match = true;
#define DTTT_TEST(type, name, defval) if(c##name != desc.name) match = false;
				DTTT_ARGS(DTTT_TEST);
#undef DTTT_ARGS
#undef DTTT_DECLARE
#undef DTTT_TEST

				if(!match) {
					itr->second.p->Release();
					GPDEGUB("width = %u | height = %u\n", width, height);
					GPHR(checkTextureRequirements(width, height, d3dformat, D3DUSAGE_RENDERTARGET));
					GPHR(m.pd3dDevice->CreateTexture(cWidth, cHeight, 1, D3DUSAGE_RENDERTARGET,
						cFormat, D3DPOOL_DEFAULT, &itr->second.p, NULL));
				}
				itr->second.nchanges++;
			}
			mem.pww(target_address, target_address);
			/*if(GPHR_CONDITION)*/ {
				D3DVIEWPORT9 vp;
				HWHR(m.pd3dDevice->GetViewport(&vp));
				src_x += vp.X;
				src_y += vp.Y;
				width = MIN(width, vp.Width); //HACK
				height = MIN(height, vp.Height);
				RECT src_rect = { src_x, src_y, src_x + width, src_y + height };
				LPDIRECT3DSURFACE9 pTexSurface, pBackBuffer;
				HWHR(itr->second.p->GetSurfaceLevel(0, &pTexSurface));
				HWHR(m.pd3dDevice->GetBackBuffer(0, 0, D3DBACKBUFFER_TYPE_MONO, &pBackBuffer));
				HWHR(m.pd3dDevice->StretchRect(pBackBuffer, &src_rect, pTexSurface, NULL,
					D3DTEXF_NONE));
				if(g::gp_dtex || g::gp_dtexraw) {
					LPDIRECT3DSURFACE9 pDump=NULL;
					D3DLOCKED_RECT lockedrect;
					HWHR(m.pd3dDevice->CreateOffscreenPlainSurface(WIDTH, HEIGHT,
						MY_BACKBUFFERFORMAT, D3DPOOL_SYSTEMMEM, &pDump, NULL));
					HWHR(m.pd3dDevice->GetRenderTargetData(pBackBuffer, pDump));
					HWHR(pDump->LockRect(&lockedrect, &src_rect, D3DLOCK_READONLY));

					string basename;
					HWGLE(prepareTexDumpBaseName(basename, target_address, itr->second.nchanges));

					if(g::gp_dtex) {
						HWGLE(writePNGEx2(basename, PNG_COLOR_TYPE_RGB_ALPHA, width, height,
							lockedrect.pBits, lockedrect.Pitch, PNG_TRANSFORM_BGR));
					}
					if(g::gp_dtexraw) {
						HWGLE(dumpToFile(lockedrect.pBits, lockedrect.Pitch * height,
							CONCAT(basename, ".raw")));
					}
					SAFE_RELEASE(pDump);
				}
				SAFE_RELEASE(pTexSurface);
				SAFE_RELEASE(pBackBuffer);
			}
		}
	} else {  //copy to xfb
		if(!hw->vi.xfb_changed_since_last_vsync && (target_address) != hw->vi.xfb[0]) {
			GPDEGUB("XFB address unchanged && 0x%08X != 0x%08X. Ignoring copy.\n",
				target_address, hw->vi.xfb[0]);
		} else {
			stat.frames++;
			activate_gp();
			GPHR(endScene());
			/*{	//EFB load hack
			LPDIRECT3DSURFACE9 pd3dsBackbuffer;
			GPHR(m.pd3dDevice->GetBackBuffer(0, 0, D3DBACKBUFFER_TYPE_MONO,
			&pd3dsBackbuffer));
			D3DLOCKED_RECT locked_rect;
			GPHR(pd3dsBackbuffer->LockRect(&locked_rect, NULL, D3DLOCK_DISCARD));
			DEGUB("Gonna copy EFB. %i*%i=%i swapped words.\n",
			WIDTH, HEIGHT, WIDTH*HEIGHT);
			DWORD* efb = (DWORD*)mem.getp_physical(0x08000000, 2*M);
			//memcpy(locked_rect.pBits, mem.getp_physical(0x08000000, 2*M),
			//locked_rect.Pitch*HEIGHT);
			#define EFB_PITCH (WIDTH+384)
			for(int y=0; y<HEIGHT; y++) {
			for(int x=0; x<WIDTH; x++) {
			((DWORD*)locked_rect.pBits)[x+y*WIDTH] = swapw(efb[x+y*EFB_PITCH]);
			}
			}
			GPHR(pd3dsBackbuffer->UnlockRect());
			pd3dsBackbuffer->Release();
			}*/

			RECT r = { 0, 0, 640, 480 };  //Hack?
			GPHR(m.pd3dDevice->Present(NULL, &r, NULL, NULL));
			if(g::timing_mode == g::TM_EXACT_FAST) {
				if(VISI_LOG) {
					DEGUB("EFB Copy VISI\n");
				}
				//DEGUB("VISI\n");
				hw->vi_simple_interrupt();
			}
			//DEGUB("PVS\n");
			hw->postVsync(VST_EFB_COPY);  //this Sleeps on frame limit. problem is, it's the emu thread that's supposed to sleep, not the GP thread... though it should work just the same... but it won't save any cpu cycles...
		}
	}
	if(clear) {
		GPHR(this->clear());
	}
}
HRESULT GP::clear() {
	DWORD zbase = getbitsr(m.bp_reg[0x51], 23, 0);
	float zvalue = (float)zbase / ZMAX;
	DWORD color = (getbitsr(m.bp_reg[0x4F], 15, 0) << 16) |
		getbitsr(m.bp_reg[0x50], 15, 0);
	GPDEGUB("Clear to %08X, %.6g\n", color, zvalue);

	//make sure we clear the entire surface, not just the viewport.
	D3DVIEWPORT9 vp;
	THR(m.pd3dDevice->GetViewport(&vp));
	THR(setClearState());
	THR(m.pd3dDevice->Clear(0, NULL,
		D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER,//(BP_ZENABLE ? D3DCLEAR_ZBUFFER : 0), 
		color, zvalue, 0));
	THR(m.pd3dDevice->SetViewport(&vp));
	THR(setScissorTest());
	return S_OK;
}
HRESULT GP::setClearState() {
	D3DVIEWPORT9 temp = { 0, 0, 640, 480, 0, 1 };
	THR(m.pd3dDevice->SetViewport(&temp));
	THR(setRS(D3DRS_SCISSORTESTENABLE, false));
	return S_OK;
}

void GP::bp_tx_setmode0(DWORD data) {
	const char *str_mipfilter[] = { "none", "point", "linear", "unused/reserved" };
	const char *str_filter[] = { "point", "linear" };
	const char *str_wrap[] = { "clamp", "repeat", "mirror", "unused/reserved" };
	GP_BP_TX_DECLARE_INDEX;
	std::string name = STRING_PLUS_DIGIT("Tex", index) + " Mode0";
	BEGIN_FIELDS(data, name.c_str());
	DNAS(clamp, 21);
	DNAM(aniso, 20, 19);
	DNAM(lodbias_raw, 18, 9);
	DNAS(edgelod, 8);
	DNASS(minfilter, 7, str_filter);
	DNAMS(mipfilter, 6, 5, str_mipfilter);
	DNASS(magfilter, 4, str_filter);
	DNAMS(wrapt, 3, 2, str_wrap);
	DNAMS(wraps, 1, 0, str_wrap);
	END_FIELDS;

	/*GPDEGUB("Tex0 Mode0 | lod/bias clamp %d | aniso %i(2^%d) | lodbias (0x%03X) | edgelod %d | minfilter %s(%d) | magfilter %s(%d) | wraps %s(%d) | wrapt %s(%d)\n",
	//(data >> 19) & 3, (data & 0x20)>0, (data & 0x10)>0, (data >> 2) & 3,
	//data & 3);
	clamp, 1 << aniso, aniso, lodbias_raw, edgelod, minstring[minfilter],
	minfilter, magstring[magfilter], magfilter, wrapstring[wrapt], wrapt,
	wrapstring[wraps], wraps);*/
	if(clamp || aniso || lodbias_raw) { // || edgelod //we ignore edgelod for now.
		//throw hardware_fatal_exception("GP Unemulated Texture Mode0!");
	}
	switch(mipfilter) {
	case 0: //none
		GPHR(setSS(index, D3DSAMP_MIPFILTER, D3DTEXF_NONE));
		break;
	case 1: //point
		GPHR(setSS(index, D3DSAMP_MIPFILTER, D3DTEXF_POINT));
		break;
	case 2: //linear
		GPHR(setSS(index, D3DSAMP_MIPFILTER, D3DTEXF_LINEAR));
		break;
	default:
		throw hardware_fatal_exception("GP Unemulated Texture MipFilter!");
	}
	GPHR(setSS(index, D3DSAMP_MINFILTER, minfilter ? D3DTEXF_LINEAR : D3DTEXF_POINT));
	GPHR(setSS(index, D3DSAMP_MAGFILTER, magfilter ? D3DTEXF_LINEAR : D3DTEXF_POINT));

	D3DTEXTUREADDRESS tamode[] = { D3DTADDRESS_CLAMP, D3DTADDRESS_WRAP, D3DTADDRESS_MIRROR };
	if(wrapt == 3 || wraps == 3)
		throw hardware_fatal_exception("GP Invalid Texture Wrap mode!");
	GPHR(setSS(index, D3DSAMP_ADDRESSU, tamode[wrapt]));
	GPHR(setSS(index, D3DSAMP_ADDRESSV, tamode[wraps]));
}
void GP::bp_tx_setmode1(DWORD data) {
	GP_BP_TX_DECLARE_INDEX;
	float maxlod = getbitsr(data, 15, 8) / 16.0f;
	float minlod = getbitsr(data, 7, 0) / 16.0f;
	GPDEGUB("Tex%i Mode1(ignored): maxlod = %4.4f | minlod = %4.4f\n",
		index, maxlod, minlod);
}

void GP::bp_tx_setimage0(DWORD data) {
	GP_BP_TX_DECLARE_INDEX;
	tx[index].width = getbitsr(data, 9, 0) + 1;
	tx[index].height = getbitsr(data, 19, 10) + 1;
	tx[index].format =  getbitsr(data, 23, 20);
	GPDEGUB("Tex%i Image0: width %d | height %d | format %x\n", index,
		tx[index].width, tx[index].height, tx[index].format);
	tx[index].changed = true;
}

static const char *s_str_cache[] = { "", TROGDOR, TROGDOR, "32KB", "128KB",
"512KB", TROGDOR, TROGDOR
};
static const DWORD s_alignment[] = {
	MAX_DWORD, MAX_DWORD, MAX_DWORD, 32*K, 128*K, 512*K, MAX_DWORD, MAX_DWORD
};
static const DWORD s_gtcs[] = { 0, 0, 0, 1, 4, 16, 0, 0 };

void GP::bp_tx_setimage1(DWORD data) {
	GP_BP_TX_DECLARE_INDEX;
	tx[index].preloaded = getbitr(data, 21);
	DWORD cache_height = getbitsr(data, 20, 18);
	DWORD cache_width = getbitsr(data, 17, 15);
	tx[index].even_offset = getbitsr(data, 14, 0) << 5;

	const char *str_preloaded[] = { "cached", "preloaded" };
	GPDEGUB("Tex%i Image1(even): %s(%i). cache_height %s(%i) | cache_width %s(%i) | tmem_offset 0x%05X\n",
		index, str_preloaded[tx[index].preloaded], tx[index].preloaded,
		s_str_cache[cache_height], cache_height, s_str_cache[cache_width], cache_width,
		tx[index].even_offset);
	if(!tx[index].preloaded && !(cache_height == cache_width &&
		cache_width >= 3 && cache_width <= 5 &&
		tx[index].even_offset % s_alignment[cache_width] == 0))
		throw hardware_fatal_exception("GP Texture Cache error");
	tx[index].even_size = s_gtcs[cache_width];
	tx[index].changed = true;
}
void GP::bp_tx_setimage2(DWORD data) {
	GP_BP_TX_DECLARE_INDEX;
	DWORD cache_height = getbitsr(data, 20, 18);
	DWORD cache_width = getbitsr(data, 17, 15);
	tx[index].odd_offset = getbitsr(data, 14, 0) << 5;

	GPDEGUB("Tex%i Image2(odd): cache_height %s(%i) | cache_width %s(%i) | tmem_offset 0x%05X\n",
		index, s_str_cache[cache_height], cache_height, s_str_cache[cache_width], cache_width,
		tx[index].odd_offset);
	if(!tx[index].preloaded && !(cache_height == cache_width &&
		cache_width >= 3 && cache_width <= 5 &&
		tx[index].even_offset % s_alignment[cache_width] == 0))
		throw hardware_fatal_exception("GP Texture Cache error");
	tx[index].odd_size = s_gtcs[cache_width];
	tx[index].changed = true;
}
void GP::bp_tx_setimage3(DWORD data) {
	GP_BP_TX_DECLARE_INDEX;
	tx[index].address = data << 5;
	GPDEGUB("Tex%i Image3: address 0x%08X\n", index, tx[index].address);
	tx[index].changed = true;
}
void GP::bp_tx_settlut(DWORD data) {
	GP_BP_TX_DECLARE_INDEX;
	tx[index].tlutformat = getbitsr(data, 11, 10);
	tx[index].tlutoffset = (getbitsr(data, 9, 0) << 9) + 0x80000;
	const char *str_format[4] = { "IA8", "RGB565", "RGB5A3", TROGDOR };
	GPDEGUB("Tex%i TLUT: format %s(%d) | offset 0x%05X\n", index,
		str_format[tx[index].tlutformat], tx[index].tlutformat, tx[index].tlutoffset);
	if(tx[index].tlutformat == 3)
		throw hardware_fatal_exception("GP TLUT illegal format");
	tx[index].changed = true;
}

void GP::bp_tx_invalidate(DWORD data) {
	DWORD offset = getbitsr(data, 9, 0) << 11;
	DWORD size = getbitsr(data, 12, 11);	//uncertain
	const char *str_size[] = { TROGDOR, "32K", "512K", "128K" };	//uncertain
	GPDEGUB("Texture cache invalidate: offset 0x%05X | size %s(%i)  (IGNORED!)\n",
		offset, str_size[size], size);
	if(!(size ==1 || size == 2))
		throw hardware_fatal_exception("GP TX Invalidate unknown size");
	DWORD alignment[] = { MAX_DWORD, 32*K, 512*K, 128*K };
	if(!(offset % alignment[size] == 0))
		throw hardware_fatal_exception("GP Texture Invalidate alignment error");

	/*DWORD nlines[] = { 0, 1, 16, 4 };
	DWORD baseline = offset >> 15;
	for(DWORD i=baseline; i<baseline + nlines[size]; i++) {
	//we assume that no textures will be loaded from mmaddress 0
	if(tcache.lines[i] != 0) {
	hash_map<DWORD, LPDIRECT3DTEXTURE9>::iterator itr =
	m_ctx_map.find(tcache.lines[i]);
	if(itr != m_ctx_map.end()) {
	itr->second->Release();
	m_ctx_map.erase(itr);
	GPDEGUB("Texture 0x%08X DELETED!!\n", tcache.lines[i]);
	}
	tcache.lines[i] = 0;
	}
	}*/
}

void GP::handle_tev_reg_load(int i) {
	if(ps_control.reg_set[i]) {
		ps_control.reg_set[i] = false;
		if(g::gp_pixelshader) {
			D3DCOLORVALUE cv = dword2cv(ps_control.reg[i].dword);
			GPHR(m.pd3dDevice->SetPixelShaderConstantF(PSC_CREG(i), (float*)&cv, 1));
		} else if(i == 1) {
			//GPHR(setTSS(0, D3DTSS_CONSTANT, ps_key.reg[i].dword));
			GPHR(setRS(D3DRS_TEXTUREFACTOR, ps_control.reg[i].dword));
		}
	}
}
void GP::bp_tev_register_l(DWORD data) {
	BYTE i = (GP_QUEUE_GET_BP_REG - 0xE0) / 2;
	ps_control.reg[i].a = (BYTE)getbitsr(data, 19, 12);
	ps_control.reg[i].r = (BYTE)getbitsr(data, 7, 0);
	GPDEGUB("TEV register %i AR: %02X %02X\n", i, ps_control.reg[i].a, ps_control.reg[i].r);
	ps_control.reg_set[i] = true;
}
void GP::bp_tev_register_h(DWORD data) {
	BYTE i = (GP_QUEUE_GET_BP_REG - 0xE1) / 2;
	ps_control.reg[i].g = (BYTE)getbitsr(data, 19, 12);
	ps_control.reg[i].b = (BYTE)getbitsr(data, 7, 0);
	GPDEGUB("TEV register %i GB: %02X %02X\n", i, ps_control.reg[i].g, ps_control.reg[i].b);
	ps_control.reg_set[i] = true;
}

void GP::bp_tev_ksel(DWORD data) {
	BYTE regnum = GP_QUEUE_GET_BP_REG;
	BYTE index = regnum - 0xF6;
	if(data != m.bp_reg[regnum]) {
		ps_control.set = true;
		for(int i=0; i<2; i++) {
			PS_KEY::STAGE &stage = ps_key.stage[index*2+i];
			stage.kSelColor = (BYTE)(TevKSel)getbitsr(data, 8, 4);
			stage.kSelAlpha = (BYTE)(TevKSel)getbitsr(data, 13, 9);
		}
		//error somewhere here
		PS_KEY::SWAP_ENTRY &swap = ps_key.swap[index / 2];
		BYTE temp = (BYTE)data & 0xF;
		if(index & 1)
			swap.ab = temp;
		else
			swap.gr = temp;
	}
}

void GP::bp_tev_order(DWORD data) {
	BYTE regnum = GP_QUEUE_GET_BP_REG;
	BYTE index = regnum - 0x28;
	if(data != m.bp_reg[regnum]) {
		ps_control.set = true;
		for(int i=0; i<2; i++) {
			int stage = index*2+i;
			ps_key.stage[stage].o.texMap = (BYTE)getbitsr(data, 2+i*12, 0+i*12);
			ps_key.stage[stage].o.texCoord = (BYTE)getbitsr(data, 5+i*12, 3+i*12);
			ps_key.stage[stage].o.texEnabled = getbitr(data, 6+i*12);
			ps_key.stage[stage].o.rasCID = (BYTE)getbitsr(data, 9+i*12, 7+i*12);
		}
	}
}

void GP::bp_tev_color(DWORD data) {
	GP_BP_TEV_DECLARE_INDEX;
	if(data != m.bp_reg[regnum]) {
		ps_control.set = true;
		ps_key.stage[index].c.dest = (BYTE)getbitsr(data, 23, 22);
		ps_key.stage[index].c.scale = (BYTE)getbitsr(data, 21, 20);
		ps_key.stage[index].c.clamp = getbitr(data, 19);
		ps_key.stage[index].c.sub = getbitr(data, 18);
		ps_key.stage[index].c.bias = (BYTE)getbitsr(data, 17, 16);
		ps_key.stage[index].c.a = (TevColorSel)getbitsr(data, 15, 12);
		ps_key.stage[index].c.b = (TevColorSel)getbitsr(data, 11, 8);
		ps_key.stage[index].c.c = (TevColorSel)getbitsr(data, 7, 4);
		ps_key.stage[index].c.d = (TevColorSel)getbitsr(data, 3, 0);
		GPDEGUB("TEV Stage %i Color changed.\n", index);
	}
}
void GP::bp_tev_alpha(DWORD data) {
	GP_BP_TEV_DECLARE_INDEX;
	if(data != m.bp_reg[regnum]) {
		ps_control.set = true;
		ps_key.stage[index].a.dest = (BYTE)getbitsr(data, 23, 22);
		ps_key.stage[index].a.scale = (BYTE)getbitsr(data, 21, 20);
		ps_key.stage[index].a.clamp = getbitr(data, 19);
		ps_key.stage[index].a.sub = getbitr(data, 18);
		ps_key.stage[index].a.bias = (BYTE)getbitsr(data, 17, 16);
		ps_key.stage[index].a.a = (TevAlphaSel)getbitsr(data, 15, 13);
		ps_key.stage[index].a.b = (TevAlphaSel)getbitsr(data, 12, 10);
		ps_key.stage[index].a.c = (TevAlphaSel)getbitsr(data, 9, 7);
		ps_key.stage[index].a.d = (TevAlphaSel)getbitsr(data, 6, 4);
		ps_key.stage[index].a.swapSelTex = (BYTE)getbitsr(data, 3, 2);
		ps_key.stage[index].a.swapSelRas = (BYTE)getbitsr(data, 1, 0);
		GPDEGUB("TEV Stage %i Alpha changed.\n", index);
	}
}

void GP::bp_tlut_load0(DWORD data) {
	//DWORD pad = getbitsr(data, 23, 21);
	tlut.address = getbitsr(data, 20, 0) << 5;
	GPDEGUB("BP TLUT Load0: address = 0x%08X\n", tlut.address);
	//if(pad)
	//throw hardware_fatal_exception("BP TLUT Load0 pad != 0");
	SETLOAD(tlut, LOAD0);
	if(ISLOADED(tlut))
		load_tlut();
}
void GP::bp_tlut_load1(DWORD data) {
	DWORD pad = getbitsr(data, 23, 21);
	tlut.count = getbitsr(data, 20, 10);
	tlut.offset = (getbitsr(data, 9, 0) << 9) + 0x80000;
	GPDEGUB("BP TLUT Load1: count = %i, offset = 0x%05X\n", tlut.count, tlut.offset);
	if(pad)
		throw hardware_fatal_exception("BP TLUT Load1 pad != 0");
	if(!is_power_of_2(tlut.count) || tlut.count > 1024)
		throw hardware_fatal_exception("BP TLUT Load1 strange count");
	SETLOAD(tlut, LOAD1);
	if(ISLOADED(tlut))
		load_tlut();
}
void GP::load_tlut() {
	MYASSERT(ISLOADED(tlut));
	CLEARLOAD(tlut);
	size_t size = tlut.count * 32;
	GPDEGUB("Loading TLUT: %i bytes from 0x%08X to 0x%05X\n", size,
		tlut.address, tlut.offset);
	memcpy(m.tmem + tlut.offset, mem.getp_physical(tlut.address, size), size);
}

void GP::bp_gen_mode(DWORD data) {
	BEGIN_FIELDS(data, "BP General Mode");
	const char *cull_string[] = { "none", "ccw", "cw", "all" };
	DNAM(ntex, 3, 0); //texcoords
	DNAM(ncol, 8, 4); //colors
	DNAS(ms_en, 9); //I have no idea :}
	DNAM(ntev, 13, 10); //TEV stages, -1
	DNAMS(cull, 15, 14, cull_string);
	DNAM(nbmp, 18, 16);	//bumpmaps
	DNAM(zfreeze, 23, 19);  //Z bias? dunno if it's a bool or a value or an enum.
	END_FIELDS;

	if(ntev > 7 || nbmp)	//ms_en ignored
		throw hardware_fatal_exception("GP Unemulated BP GenMode!");
	if(ncol > 2)
		throw hardware_fatal_exception("GP Invalid BP GenMode!");

	//generated by vertex transformer:
	if(ps_key.ntex != ntex) { //texture coordinates
		ps_key.ntex = (BYTE)ntex;
		ps_control.set = true;
	}
	if(ps_key.ncol != ncol) { //colors
		ps_key.ncol = (BYTE)ncol;
		ps_control.set = true;
	}

	if((BYTE)ntev + 1 != ps_key.ntev) {
		ps_key.ntev = (BYTE)ntev + 1;
		ps_control.set = true;
	}

	//D3DCULL cull_mode[] = { D3DCULL_CCW, D3DCULL_NONE, D3DCULL_FORCE_DWORD, D3DCULL_CW };
	D3DCULL cull_mode[] = { D3DCULL_NONE, D3DCULL_CCW, D3DCULL_CW };
	//unsigned int cull[] = { 0, GL_BACK, GL_FRONT, GL_FRONT_AND_BACK, };
	if(cull == 3) {
		vs_control.cull_all = !g::gp_wireframe;
	} else {
		vs_control.cull_all = false;
		m.cullmode = cull_mode[cull];
		GPHR(setRS(D3DRS_CULLMODE, g::gp_wireframe ? D3DCULL_NONE : m.cullmode));
	}
	GPHR(setRS(D3DRS_DEPTHBIAS, zfreeze ? 16 : 0));
}

void GP::bp_tev_alphafunc(DWORD data) {
	BEGIN_FIELDS(data, "BP TEV Alphafunc");
	DNAMS(op, 23, 22, gx::alphaop);
	DNAMS(comp1, 21, 19, gx::comp);
	DNAMS(comp0, 18, 16, gx::comp);
	DNAM(a1, 15, 8);
	DNAM(a0, 7, 0);
	END_FIELDS;

	ps_key.alphafunc.a0 = a0;
	ps_key.alphafunc.a1 = a1;
	ps_key.alphafunc.comp0 = comp0;
	ps_key.alphafunc.comp1 = comp1;
	ps_key.alphafunc.op = op;
	ps_control.set = true;

	/*if(!((op == gx::AO_AND && comp1 == gx::C_ALWAYS) ||
	((op == gx::AO_AND || op == gx::AO_OR) && comp1 == comp0 && a0 == a1)))
	throw hardware_fatal_exception("GP Unemulated Alphafunc!");

	if(comp0 == gx::C_ALWAYS || g::gp_wireframe) {
	GPHR(setRS(D3DRS_ALPHATESTENABLE, FALSE));
	} else {
	GPHR(setRS(D3DRS_ALPHATESTENABLE, TRUE));
	GPHR(setRS(D3DRS_ALPHAFUNC, gx::d3d_comp[comp0]));
	GPHR(setRS(D3DRS_ALPHAREF, a0));
	}*/
}

void GP::bp_su_scis0(DWORD data) {
	int x = getbitsr(data, 23, 12) - 342;
	int y = getbitsr(data, 11, 0) - 342;
	GPDEGUB("Scissor top-left: x %i | y %i\n", x, y);

	//cubed-gcworld seems to use (-342, -342)
	if(/*x < 0 || y < 0 ||*/ x > 2047 || y > 2047)
		throw hardware_fatal_exception("GP Invalid Scissor rect!");

	m.scissor.left = x;
	m.scissor.top = y;
	m.scissor.changed = true;
}
void GP::bp_su_scis1(DWORD data) {
	int x = getbitsr(data, 23, 12) - 341;
	int y = getbitsr(data, 11, 0) - 341;
	GPDEGUB("Scissor bottom-right: x %i | y %i\n", x, y);

	if(x < 0 || y < 0 || x > 2047 || y > 2047)
		throw hardware_fatal_exception("GP Invalid Scissor rect!");

	m.scissor.right = x;
	m.scissor.bottom = y;
	m.scissor.changed = true;
}
void GP::bp_scissor_offset(DWORD data) {
	int x = (getbitsr(data, 19, 10) << 1) - 342;
	int y = (getbitsr(data, 9, 0) << 1) - 342;
	GPDEGUB("Scissor offset: x %i | y %i\n", x, y);

	if(x < -342 || y < -342 || x > 382 || y > 494)
		throw hardware_fatal_exception("GP Invalid Scissor offset!");

	m.scissor.xoffset = x;
	m.scissor.yoffset = y;
	m.scissor.changed = true;
}

void GP::bp_su_lpsize(DWORD data) {
	BEGIN_FIELDS(data, "BP SU_LPSIZE");
	DNAS(lineaspect, 22);
	DNAM(ptoff, 21, 19);
	DNAM(ltoff, 18, 16);
	DNAM(psize, 15, 8);
	DNAM(lsize, 7, 0);
	END_FIELDS;

	if(lineaspect/* || ptoff || ltoff || lsize != 6*/)  //most shit ignored
		//throw hardware_fatal_exception("GP Unemulated Line/Pixel state!");
		throw hardware_fatal_exception("GP Line Aspect unemulated!");

	float f = psize / 6.0f;
	GPHR(setRS(D3DRS_POINTSIZE, MAKE(DWORD, f)));
}

void GP::bp_mask(DWORD data) {
	GPDEGUB("BP Mask: 0x%06X\n", data);
	m.bp_mask = data;
}
