// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License version 2 as
// published by the Free Software Foundation.

// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.

#include "gp_internal.h"
#include "main.h"

void GP::setWVMatrix(DWORD index, const float *data) {
	D3DXMATRIX matWorldView;
	D3DXMatrixIdentity(&matWorldView);
#define THREE(macro) macro(1) macro(2) macro(3)
#define DO_ROW(num) matWorldView._1##num = data[0 + (num-1)*4];\
	matWorldView._2##num = data[1 + (num-1)*4];\
	matWorldView._3##num = data[2 + (num-1)*4];\
	matWorldView._4##num = data[3 + (num-1)*4];
	THREE(DO_ROW);
	//memcpy(&matWorldView, data, 4*12);
	if(g::verbose)
		dump_matrix(&matWorldView);

	//if(vs.lighting) { //set worldview matrix for lighting calculations
	GPHR(m.pd3dDevice->SetVertexShaderConstantF(VSC_WVMATRIX(index),
		data, 3));//(float*)&matWorldView, 4));
	//GPDEGUB("WVMatrix set.\n");
	//}
	//set worldview/projection matrix
	D3DXMATRIX matWVP;
	D3DXMatrixMultiply(&matWVP, &matWorldView, &vs_control.matProj);
	//I don't know what this is for, but they do it in the d3d sample...
	D3DXMatrixTranspose(&matWVP, &matWVP);
	GPHR(m.pd3dDevice->SetVertexShaderConstantF(VSC_PMATRIX(index), (float*)&matWVP, 4));
	VGPDEGUB("WV/PMatrix %i set.\n", index);
}
void GP::setTMatrix(DWORD index, const float *data, bool three) {
	VGPDEGUB("%.6g %.6g %.6g %.6g\n%.6g %.6g %.6g %.6g\n",
		data[0], data[1], data[2], data[3],
		data[4], data[5], data[6], data[7]);
	if(three) {
		VGPDEGUB("%.6g %.6g %.6g %.6g\n", data[8], data[9], data[10], data[11]);
	}
	GPHR(m.pd3dDevice->SetVertexShaderConstantF(VSC_TMATRIX(index), data, three ? 3 : 2));
	VGPDEGUB("Texture matrix %i set.\n", index);
}
void GP::setNMatrix(DWORD index, const float *data) {
	float mx[12] = { data[0], data[1], data[2], 0,
		data[3], data[4], data[5], 0,
		data[6], data[7], data[8], 0 };
	VGPDEGUB("%.6g %.6g %.6g %.6g\n%.6g %.6g %.6g %.6g\n%.6g %.6g %.6g %.6g\n",
		mx[0], mx[1], mx[2], mx[3],
		mx[4], mx[5], mx[6], mx[7],
		mx[8], mx[9], mx[10], mx[11]);
	GPHR(m.pd3dDevice->SetVertexShaderConstantF(VSC_NMATRIX(index), mx, 3));
	VGPDEGUB("Normal matrix %i set.\n", index);
}

//returns a hash of key
template<class T> size_t GP::my_hash_compare<T>::operator()(const T& key) const {
	size_t hash = hash_value(key);
	//VGPDEGUB("hash compute %08X @ 0x%08X\n", hash, &key);
	return hash;
}
template<class T> size_t GP::my_hash_compare<T>::hash_value(const T& key) const {
	/*const BYTE prime[] = { 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59,
	61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149,
	151, 157, 163, 167, 173,179, 181, 191, 193, 197, 199, 211, 223, 227, 229, 233,
	239, 241, 251 };
	const size_t len = sizeof(key)/sizeof(size_t);
	const size_t remainder = sizeof(key)%sizeof(size_t);
	size_t hash=0;
	BYTE count=0;
	for(size_t i=0; i<len; i++) {
	hash += ((size_t*)&key)[i];
	hash *= prime[count++];
	}
	for(size_t j=0; j<remainder; j++) {
	hash += ((BYTE*)&key)[len*sizeof(size_t) + j];
	hash *= prime[count++];
	}
	MYASSERT(count <= sizeof(prime));
	return hash;*/
	return hash3((BYTE*)&key, sizeof(key), 0);
}
template<class T>
bool GP::my_hash_compare<T>::operator()(const T& key1, const T& key2) const {
	//VGPDEGUB("hash compare %08X @ 0x%08X, %08X @ 0x%08X\n", hash_value(key1), &key1,
	//hash_value(key2), &key2);
	const size_t len = sizeof(T)/sizeof(size_t);
	const size_t remainder = sizeof(T)%sizeof(size_t);
	for(size_t i=0; i<len; i++) {
		//VGPDEGUB("%08X < %08X ?\n", ((size_t*)&key1)[i], ((size_t*)&key2)[i]);
		if(((size_t*)&key1)[i] < ((size_t*)&key2)[i])
			return false;
		else if(((size_t*)&key1)[i] > ((size_t*)&key2)[i])
			return true;
	}
	for(size_t j=0; j<remainder; j++) {
		//VGPDEGUB("%02X < %02X ?\n", ((BYTE*)&key1)[len*sizeof(size_t) + j],
		//((BYTE*)&key2)[len*sizeof(size_t) + j]);
		if(((BYTE*)&key1)[len*sizeof(size_t) + j] < ((BYTE*)&key2)[len*sizeof(size_t) + j])
			return false;
		else if(((BYTE*)&key1)[len*sizeof(size_t) + j] >
			((BYTE*)&key2)[len*sizeof(size_t) + j])
			return true;
	}
	return false;
}

void GP::setVertexShader(DWORD FVF, WORD midx) {
	if(vs_control.set_proj) {
		//overkill in many circumstances, but simple
		for(int i=0; i<NMATRICES; i++) {
			setWVMatrix(i, WV_MATRIX(i));
		}
		vs_control.set_proj = false;
	}

	if(vs_key.FVF != FVF) {
		//set vertex declaration
		vs_key.FVF = FVF;
		vs_control.set = true;
	}
	if(vs_key.midx != midx) {
		vs_key.midx = midx;
		vs_control.set = true;
	}
	if(vs_key.force_white != g::gp_white) {
		vs_key.force_white = g::gp_white;
		vs_control.set = true;
	}
	if(!vs_control.set)
		return;
	vs_control.set = false;

	VGPDEGUB("Searching for key...\n");
	VSHashMap::iterator itr = vs_map.find(vs_key);
	VGPDEGUB("Done. ");
	if(itr != vs_map.end()) {
		stat.vs_cachehit++;
		vs_current = itr;
		VGPDEGUB("Hit!\n");
	} else {  //no match in chache, build new
		VGPDEGUB("Miss.\n");
		stat.vertexshaders++;
		VS_DATA data;
		//create vertex declaration
		D3DVERTEXELEMENT9 ve[MAX_FVF_DECL_SIZE];
		GPHR(D3DXDeclaratorFromFVF(FVF, ve));
		GPHR(m.pd3dDevice->CreateVertexDeclaration(ve, &data.pVertexDeclaration));
		//compile shader
		ostringstream sstr;
		generateVertexShader(sstr, vs_key, data.FVFout);
		string hlShader = sstr.str();
		if(g::verbose && g::gp_log) {
			DEGUB("Compiling vertex shader:\n");
			fprintShader(df, hlShader.c_str());
		}
		LPD3DXBUFFER pCode=NULL;
		HWHR2GLE(UE_SHADER_COMPILATION_FAILED, compileShader(hlShader,
			D3DXGetVertexShaderProfile(m.pd3dDevice), &pCode));
		GPHR(m.pd3dDevice->CreateVertexShader((DWORD*)pCode->GetBufferPointer(),
			&data.pVertexShader));
		SAFE_RELEASE(pCode);
		//insert shader into hashmap
		typedef pair<VS_KEY, VS_DATA> VSPair;
		typedef pair<VSHashMap::iterator, bool> VSInsertResult;
		VGPDEGUB("Inserting key...\n");
		VSInsertResult res = vs_map.insert(VSPair(vs_key, data));
		VGPDEGUB("Done.\n");
		MYASSERT(res.second);
		vs_current = res.first;
	}
	//set shader
	GPHR(m.pd3dDevice->SetVertexDeclaration(vs_current->second.pVertexDeclaration));
	GPHR(m.pd3dDevice->SetVertexShader(vs_current->second.pVertexShader));
}
void GP::generateVertexShader(ostream& sstr, const VS_KEY &vs_key, DWORD &FVFout) {
	int level = 0;

	if(vs_key.ntex == 0 && vs_key.ncol == 0)
		throw hardware_fatal_exception("GP Invalid BP GenMode!");

	SWRITE "struct VS_INPUT {\n"; level++;
	SWRITE "float4 pos : POSITION;\n";
	if(vs_key.host.nnrm) {
		if(!(vs_key.FVF & D3DFVF_NORMAL))
			throw hardware_fatal_exception("GP missing normals!");
		if(vs_key.host.nnrm >= 2)
			throw hardware_fatal_exception("GP Binormals unemulated!");
		SWRITE "float3 normal : NORMAL;\n";
	}
	if(vs_key.host.ncol) {
		if(!(vs_key.FVF & D3DFVF_DIFFUSE))
			throw hardware_fatal_exception("GP missing diffuse color!");
		if(vs_key.host.ncol == 2 && !(vs_key.FVF & D3DFVF_SPECULAR))
			throw hardware_fatal_exception("GP missing specular color!");
		SWRITE "float4 color["<< (int)vs_key.host.ncol <<"] : COLOR;\n";
	}
	if(vs_key.host.ntex) {
		if(vs_key.host.ntex != ((vs_key.FVF & D3DFVF_TEXCOUNT_MASK) >> D3DFVF_TEXCOUNT_SHIFT))
			throw hardware_fatal_exception("GP missing texture coordinates!");
		SWRITE "float2 texcoord["<< (int)vs_key.host.ntex <<"] : TEXCOORD;\n";
	}
	if(vs_key.FVF & D3DFVF_LASTBETA_UBYTE4) {
		SWRITE "int4 mat_index : BLENDINDICES;\n";
	}
	level--; SWRITE "};\n";
	SWRITE "struct VS_OUTPUT {\n"; level++;
	SWRITE "float4 pos : POSITION;\n";
	FVFout = D3DFVF_XYZRHW;
	if(vs_key.ncol) {
		FVFout |= D3DFVF_DIFFUSE | (vs_key.ncol == 2 ? D3DFVF_SPECULAR : 0);
		SWRITE "float4 color["<< (int)vs_key.ncol <<"] : COLOR;\n";
	}
	if(vs_key.ntex) {
		FVFout |= vs_key.ntex << D3DFVF_TEXCOUNT_SHIFT;
		SWRITE "float2 texcoord["<< (int)vs_key.ntex <<"] : TEXCOORD;\n";
	}
	level--; SWRITE "};\n"
		"struct CHANNEL {\n"
		"  float4 ambient, material;\n"
		"};\n"
		"struct LIGHT {\n"
		"  float4 color;\n"
		"  float3 a, k, pos, dir_ha;\n"
		"};\n";
	SWRITE "uniform CHANNEL channel[2] : register(c"<< VSCOFF_COLORS <<");\n";
	SWRITE "uniform LIGHT light[8] : register(c"<< VSCOFF_LIGHTS <<");\n";
	SWRITE "uniform float4x4 pos_mtx["<< NMATRICES <<"] : register(c"<<
		VSCOFF_PMATRIX <<");\n";
	SWRITE "uniform float3x3 nrm_mtx["<< NMATRICES <<"] : register(c"<<
		VSCOFF_NMATRIX <<");\n";
	SWRITE "uniform float4x3 wv_mtx["<< NMATRICES <<"] : register(c"<<
		VSCOFF_WVMATRIX <<");\n";
	SWRITE "uniform float4x3 t_mtx["<< NMATRICES <<"] : register(c"<<
		VSCOFF_TMATRIX <<");\n";
	SWRITE "uniform float4x3 dt_mtx["<< DUALTEX_NMATRICES <<"] : register(c"<<
		VSCOFF_DTMATRIX <<");\n";

	SWRITE "VS_OUTPUT main(const VS_INPUT input) { VS_OUTPUT output;\n"; level++;

	if(getbitr(vs_key.midx, MI_GEO)) {
		SWRITE "output.pos = mul(input.pos, pos_mtx[input.mat_index.x]);\n";
	} else {
		SWRITE "output.pos = mul(input.pos, pos_mtx["<<
			(int)vs_key.matrix_index[MI_GEO] <<"]);\n";
	}

	for(int i=0; i<2; i++) {
		SWRITE "float4 color"<< i <<";\n";
		SWRITE "{\n"; level++;
		if(vs_key.force_white) { //Force all colors to white
			SWRITE "color"<< i <<" = 1;\n";
		} else {
			vs_do_channel(sstr, "rgb", vs_key.color[i], i, vs_key);
			vs_do_channel(sstr, "a", vs_key.alpha[i], i, vs_key);
		}
		level--; SWRITE "}\n";
	}
	for(int i=0; i<vs_key.ncol; i++) {
		SWRITE "output.color["<< i <<"] = color"<< i <<";\n";
	}

	enum TexGenOrder { TGO_REGULAR, TGO_BUMP, TGO_SRTG0, TGO_SRTG1 } tgo = TGO_REGULAR;
	if(vs_key.ntex) {
		char apart = 'y';
		for(size_t i=0; i<vs_key.ntex; i++) {
			const VS_KEY::TEXGEN &texgen = vs_key.texgen[i];
			SWRITE "{\n"; level++;
			if(texgen.type == gx::TGT_REGULAR) {
				if(tgo != TGO_REGULAR)
					throw hardware_fatal_exception("GP Invalid TexGenOrder!");
				string input_reg;
				switch(texgen.source) {
	case gx::TGS_POS:
		input_reg = "input.pos";
		break;
	case gx::TGS_NORMAL:
		if(!vs_key.host.nnrm)
			input_reg = "1";
		else
			input_reg = "float4(input.normal, 1)";
		break;
	case gx::TGS_TEX0:
	case gx::TGS_TEX1:
	case gx::TGS_TEX2:
	case gx::TGS_TEX3:
	case gx::TGS_TEX4:
	case gx::TGS_TEX5:
	case gx::TGS_TEX6:
	case gx::TGS_TEX7:
		if(vs_key.host.ntex > texgen.source - gx::TGS_TEX0) {
			ostringstream str;
			str <<"float4(input.texcoord["<<
				(texgen.source - gx::TGS_TEX0) <<"], 1, 1)";
			input_reg = str.str();
		} else
			input_reg = "1";
		break;
	default:
		throw hardware_fatal_exception("GP Unemulated TexGen input!");
				}
				//magic number, bad.
				if(getbitr(vs_key.midx, 1) || vs_key.matrix_index[MI_TEX(i)] != 10) {
					string matrix;
					{
						ostringstream str;
						if(getbitr(vs_key.midx, 1))
							str <<"t_mtx[input.mat_index."<< apart <<"]";
						else
							str <<"t_mtx["<< (int)vs_key.matrix_index[MI_TEX(i)] <<"]";
						matrix = str.str();
					}
					if(getbitr(vs_key.midx, 1)) {
						MYASSERT(apart != 'z');
						apart = 'z';
					}
					if(vs_key.texgen[i].projection) {
						/*ADD_INSTR "dp4 r0.x, v%i, %s\n", input_reg, meh[2]);
						ADD_INSTR "rcp r2.x, r0.x\n");
						ADD_INSTR "dp4 r1.x, v%i, %s\n", input_reg, meh[0]);
						ADD_INSTR "dp4 r1.y, v%i, %s\n", input_reg, meh[1]);
						ADD_INSTR "mul oT%i, r1, r2.x\n", i);*/
						SWRITE "float3 temptc = mul("<< input_reg <<", "<< matrix <<");\n";
						SWRITE "output.texcoord["<< i <<"] = temptc.xy / temptc.z;\n";
					} else {
						SWRITE "output.texcoord["<< i <<"] = mul("<<
							input_reg <<", "<< matrix <<");\n";
					}
				} else {
					SWRITE "output.texcoord["<< i <<"] = "<< input_reg <<";\n";
				}
			} else if(texgen.type == gx::TGT_EMBOSS_MAP) {
				if(tgo > TGO_BUMP || vs_key.texgen[i].emboss_source >= i)
					throw hardware_fatal_exception("GP Invalid TexGenOrder!");
				tgo = TGO_BUMP;

				if(getbitr(vs_key.midx, MI_GEO)) {
					SWRITE "float3 vpos = mul(input.pos, wv_mtx[input.mat_index.x]);\n";
				} else {
					SWRITE "float3 vpos = mul(input.pos, wv_mtx["<<
						(int)vs_key.matrix_index[MI_GEO] <<"]);\n";
				}
				SWRITE "float3 ldir = normalize(light["<<
					(int)texgen.emboss_light <<"].pos - vpos);\n";
				SWRITE "output.texcoord["<< i <<"].xy = output.texcoord["<<
					(int)texgen.emboss_source <<"] + "
					"mul(ldir, float3x3(input.tangent, input.binormal, input.normal));\n";
			} else if(texgen.type == gx::TGT_COLOR_STRGBC0) {
				if(tgo >= TGO_SRTG0)
					throw hardware_fatal_exception("GP Invalid TexGenOrder!");
				tgo = TGO_SRTG0;
				SWRITE "output.texcoord["<< i <<"].xy = color0.rg;\n";
			} else if(texgen.type == gx::TGT_COLOR_STRGBC1) {
				if(tgo != TGO_SRTG0)
					throw hardware_fatal_exception("GP Invalid TexGenOrder!");
				tgo = TGO_SRTG1;
				SWRITE "output.texcoord["<< i <<"].xy = color1.rg;\n";
			} else {
				throw hardware_fatal_exception("GP Unemulated TexGen!");
			}
			level--; SWRITE "}\n";
		}
	}

	SWRITE "return output;\n";
	level--; SWRITE "}\n";
}
void GP::vs_do_channel(ostream& sstr, const char *chstr, const VS_KEY::CHANNEL &channel,
											 int i, const VS_KEY &vs_key)
{
	int level = 2;
	if(!channel.lit) {
		SWRITE "color"<< i <<"."<< chstr <<" = 1;\n";
	} else {  //lighting enabled
		SWRITE "color"<< i <<"."<< chstr <<" = 0;\n";
		bool lightmask[8] = { channel.light0, channel.light1, channel.light2, channel.light3,
			channel.light4, channel.light5, channel.light6, channel.light7 };
		for(int j=0; j<8; j++) if(lightmask[j]) {
			SWRITE "{ float intensity;\n"; level++;
			//get vertex position in view space
			if(getbitr(vs_key.midx, MI_GEO)) {
				SWRITE "float3 vpos = mul(input.pos, wv_mtx[input.mat_index.x]);\n";
			} else {
				SWRITE "float3 vpos = mul(input.pos, wv_mtx["<<
					(int)vs_key.matrix_index[MI_GEO] <<"]);\n";
			}
			//calculate reverse light direction
			if(channel.atten_enabled || channel.diffuse_func != GX_DF_NONE) {
				SWRITE "float3 ldir = normalize(light["<< j <<"].pos - vpos);\n";
				if(!(vs_key.FVF & D3DFVF_NORMAL))
					throw hardware_fatal_exception("nsyNoNormal");
				if(getbitr(vs_key.midx, MI_GEO)) {
					SWRITE "float3 normalt = "
						"normalize(mul(input.normal, nrm_mtx[input.mat_index.x]));\n";
				} else {
					SWRITE "float3 normalt = normalize(mul(input.normal, nrm_mtx["<<
						(int)vs_key.matrix_index[MI_GEO] <<"]));\n";
				}
			}
			if(channel.diffuse_func == GX_DF_NONE) {
				SWRITE "intensity = 1;\n";
			} else {
				//transform normal
				SWRITE "intensity = dot(normalt, ldir);\n";
				if(channel.diffuse_func == GX_DF_CLAMP) {
					SWRITE "intensity = saturate(intensity);\n";
				}
			}
			if(channel.atten_enabled) {
				if(channel.atten_func) {  //spotlight
					SWRITE "float aatt = saturate(dot(light["<< j <<"].dir_ha, ldir));\n";
					SWRITE "float d = distance(light["<< j <<"].pos, vpos);\n";
				} else {  //specular  //normalize(vpos - light[%i].pos)
					SWRITE "float aatt = dot(normalt, ldir) > 0 ? "
						"saturate(dot(normalt, light["<< j <<"].dir_ha)) : 0;\n";
					SWRITE "float d = aatt;";
				}
				SWRITE "intensity *= saturate(light["<< j <<"].a.z * aatt*aatt + "
					"light["<< j <<"].a.y * aatt + light["<< j <<"].a.x) / "
					"(light["<< j <<"].k.z * d*d + light["<< j <<"].k.y * d + "
					"light["<< j <<"].k.x);\n";
			}
			SWRITE "color"<< i <<"."<< chstr <<" += "
				"intensity * light["<< j <<"].color."<< chstr <<";\n";
			level--; SWRITE "}\n";
		}

		SWRITE "color"<< i <<"."<< chstr <<" = "
			"saturate(color"<< i <<"."<< chstr <<" + ";
		if(channel.ambient_source_vertex) {
			if(vs_key.FVF & D3DFVF_DIFFUSE) {
				SWRITE_NOLEVEL "input.color["<< i <<"]."<< chstr <<");\n";
			} else
				throw hardware_fatal_exception("nsyNoColor 2");
		} else {
			SWRITE_NOLEVEL "channel["<< i <<"].ambient."<< chstr <<");\n";
		}
	}
	if(channel.material_source_vertex) {
		if(vs_key.host.ncol > i) {
			SWRITE "color"<< i <<"."<< chstr <<" *= "
				"input.color["<< i <<"]."<< chstr <<";\n";
		} /*else
			throw hardware_fatal_exception("nsyNoColor 1");*/
	} else {
		SWRITE "color"<< i <<"."<< chstr <<" *= "
			"channel["<< i <<"].material."<< chstr <<";\n";
	}
}

void GP::setPixelProcessing() {
	for(int i=0; i<4; i++)
		handle_tev_reg_load(i);
	if(!ps_control.set)
		return;
	ps_control.set = false;
	//user should be able to select between fixed pipeline and shaders
	//g::gp_log = g::verbose = true;
	if(g::gp_pixelshader)
		setPixelShader();
	else
		setTextureStageStates();
	//g::gp_log = g::verbose = false;
}
void GP::setPixelShader() {
	PSHashMap::iterator itr = ps_map.find(ps_key);
	if(itr != ps_map.end()) {
		stat.ps_cachehit++;
		ps_current = itr;
	} else {
		stat.pixelshaders++;
		PS_DATA data;
		ostringstream sstr;
		generatePixelShader(sstr, ps_key);
		string hlShader = sstr.str();
		if(g::verbose && g::gp_log) {
			DEGUB("Compiling pixel shader:\n");
			fprintShader(df, hlShader.c_str());
		}
		LPD3DXBUFFER pCode=NULL;
		const char *profile = D3DXGetPixelShaderProfile(m.pd3dDevice);
		HRESULT hr = compileShader(hlShader, profile, &pCode);
		if(FAILED(hr)) {
			MYASSERT(profile != NULL);
			if(profile[3] - '0' >= 2) {
				HWHR2GLE(UE_SHADER_COMPILATION_FAILED, hr);
			} else {	//test this section too!
				ostringstream str;
				str <<"Pixel Shader compilation failed. Your graphics card supports "
					"v"<< profile[3] <<"."<< profile[5] <<", but v2.0 is required for many "
					"(if not most) operations. Pixel shaders will now be disabled. "
					"Expect errors containing \"Unemulated TEV\".";
				g_capp.SuspendEmuThread();
				THE_ONE_MESSAGE_BOX(str.str().c_str());
				g_capp.ResumeEmuThread();
				g::gp_pixelshader = false;
				setTextureStageStates();
				return;
			}
		}
		GPHR(m.pd3dDevice->CreatePixelShader((DWORD*)pCode->GetBufferPointer(),
			&data.pPixelShader));
		SAFE_RELEASE(pCode);
		//insert shader into hashmap
		typedef pair<PS_KEY, PS_DATA> PSPair;
		typedef pair<PSHashMap::iterator, bool> PSInsertResult;
		PSInsertResult res = ps_map.insert(PSPair(ps_key, data));
		MYASSERT(res.second);
		ps_current = res.first;
	}
	GPHR(m.pd3dDevice->SetPixelShader(ps_current->second.pPixelShader));
}

void GP::generatePixelShader(ostream& sstr, const PS_KEY &ps_key) {
	int level = 0;

	if(ps_key.ntex == 0 && ps_key.ncol == 0)
		throw hardware_fatal_exception("GP Invalid BP GenMode!");

	GPDEGUB("%i TEV stages active.\n", ps_key.ntev);
	if(ps_key.ntev > 8)
		throw hardware_fatal_exception("GP Unemulated number of TEV stages!");
	{
		GPDEGUB("TEV Swap:  Red\tGreen\tBlue\tAlpha\n");
		for(int i=0; i<4; i++) {
			const PS_KEY::SWAP_ENTRY &swap = ps_key.swap[i];
			GPDEGUB("%i\t  %s\t%s\t%s\t%s\n", i, s_tevchan_degub[swap.r],
				s_tevchan_degub[swap.g], s_tevchan_degub[swap.b], s_tevchan_degub[swap.a]);
		}
	}

	for(int i=0; i<4; i++) {
		SWRITE "float4 const"<< i <<" : register(c"<< PSC_CREG(i) <<");\n";
		SWRITE "float4 konst"<< i <<" : register(c"<< PSC_KREG(i) <<");\n";
	}
	SWRITE "sampler smp[8] : register(s0);\n";

	{
		SWRITE "float4 main(in ";
		bool comma = false;
		if(ps_key.ncol) {
			SWRITE (comma ? "," : "") <<"float4 vcolor["<< (int)ps_key.ncol <<"] : COLOR0";
			comma = true;
		}
		if(ps_key.ntex) {
			SWRITE (comma ? "," : "") <<"float4 uv["<< (int)ps_key.ntex <<"] : TEXCOORD0";
			comma = true;
		}
		SWRITE ") : COLOR {\n"; level++;
		SWRITE "float4 prev=const0, c0=const1, c1=const2, c2=const3,"
			"rastemp, textemp, konsttemp;\n";
	}
	for(int i=0; i<ps_key.ntev; i++) {
		SWRITE "\n";
		const PS_KEY::STAGE &stage = ps_key.stage[i];
		const PS_KEY::STAGE::COLOR &cl = stage.c;
		const PS_KEY::STAGE::ALPHA &al = stage.a;
		//BYTE ccomp, acomp;
		GPDEGUB("Stage %i:\n", i);
		{
			const char *colors[8] = { "Channel 0", "Channel 1", TROGDOR, TROGDOR, TROGDOR,
				"Bump", "BumpN", "Zero" };
			GPDEGUB("Order: TexMap %i | TexCoord %i | TexMap %s | Color %s(%i)\n",
				stage.o.texMap, stage.o.texCoord, abled(stage.o.texEnabled),
				colors[stage.o.rasCID], stage.o.rasCID);
			/*GPDEGUB("Order: TexMap "<< stage.o.texMap <<" | TexCoord "<< stage.o.texCoord <<
			" | TexMap "<< abled(stage.o.texEnabled) <<
			" | Color "<< colors[stage.o.rasCID] <<"("<< stage.o.rasCID <<")\n";*/
		}
		{
			/*GPDEGUB("Color: a "<< s_tevcsel_degub[cl.a] <<" | b "<< s_tevcsel_degub[cl.b] <<
			" | c "<< s_tevcsel_degub[cl.c] <<" | d "<< s_tevcsel_degub[cl.d] <<"
			" | dest "<< s_tevreg_degub[cl.dest] <<" | clamp "<< cl.clamp <<" | ";*/
			GPDEGUB("Color: a %s | b %s | c %s | d %s | dest %s | clamp %d | ",
				s_tevcsel_degub[cl.a], s_tevcsel_degub[cl.b], s_tevcsel_degub[cl.c],
				s_tevcsel_degub[cl.d], s_tevreg_degub[cl.dest], cl.clamp);
			if(cl.ISCOMP) {  //compare op
				BYTE ccomp = cl.sub | (cl.scale << 1);
				GPDEGUB("comp %s(%d)\n", s_tevccomp_degub[ccomp], ccomp);
			} else {
				GPDEGUB("sub %d | scale %s(%d) | bias %s(%d)\n", cl.sub,
					s_tevscale_degub[cl.scale], cl.scale, s_tevbias_degub[cl.bias], cl.bias);
			}
		}
		{
			GPDEGUB("Alpha: a %s | b %s | c %s | d %s | dest %s | clamp %d | ",
				s_tevasel_degub[al.a], s_tevasel_degub[al.b], s_tevasel_degub[al.c],
				s_tevasel_degub[al.d], s_tevreg_degub[al.dest], al.clamp);
			if(al.ISCOMP) {  //compare op
				BYTE acomp = al.sub | (cl.scale << 1);
				GPDEGUB("comp %s(%d)\n", s_tevacomp_degub[acomp], acomp);
			} else {
				GPDEGUB("sub %d | scale %s(%d) | bias %s(%d)\n", al.sub,
					s_tevscale_degub[al.scale], al.scale, s_tevbias_degub[al.bias], al.bias);
			}
		}
		GPDEGUB("Swap Select: Tex %i | Ras %i\n", stage.a.swapSelTex, stage.a.swapSelRas);
		{
			GPDEGUB("Konstant Select: Color %s | Alpha %s\n",
				s_tevksel_degub[stage.kSelColor], s_tevksel_degub[stage.kSelAlpha]);
		}
		if(stage.kSelAlpha > KSEL_1_8 && stage.kSelAlpha <= KSEL_K3)
			throw hardware_fatal_exception("GP TEV Invalid KSelAlpha!");
		if(stage.kSelColor > KSEL_1_8 && stage.kSelColor < KSEL_K0)
			throw hardware_fatal_exception("GP TEV Invalid KSelColor!");
		//if(stage.kSelColor != KSEL_1_4 || stage.kSelAlpha != KSEL_1)
		//throw hardware_fatal_exception("GP TEV KSel unemulated!");

		if(stage.kSelColor >= KSEL_K0 && stage.kSelColor <= KSEL_K3) {
			SWRITE "konsttemp = float4("<< s_tevksel_shader[stage.kSelColor] <<".rgb,"<<
				s_tevksel_shader[stage.kSelAlpha] <<");\n";
		} else {
			SWRITE "konsttemp = float4("<< s_tevksel_shader[stage.kSelColor] <<","<<
				s_tevksel_shader[stage.kSelColor] <<","<< s_tevksel_shader[stage.kSelColor] <<
				","<< s_tevksel_shader[stage.kSelAlpha] <<");\n";
		}

		SWRITE "rastemp = ";
		switch(stage.o.rasCID) {
		case 0:
		case 1:
			if(stage.o.rasCID > ps_key.ncol - 1)
				throw hardware_fatal_exception("GP TEV uses a nonexistent color!");
			else {
				const PS_KEY::SWAP_ENTRY &swap = ps_key.swap[stage.a.swapSelRas];
				SWRITE_NOLEVEL "vcolor["<< (int)stage.o.rasCID <<"]."<<
					s_tevchan_shader[swap.r] << s_tevchan_shader[swap.g] <<
					s_tevchan_shader[swap.b] << s_tevchan_shader[swap.a] <<";\n";
			}
			break;
		case 2: case 3: case 4:
			throw hardware_fatal_exception("GP TEV Order invalid!");
		case 7: //Zero
			SWRITE_NOLEVEL "0;\n";
			break;
		default:
			throw hardware_fatal_exception("GP Unemulated TEV Order!");
		}

		if(stage.o.texEnabled &&
			(stage.c.a == CC_TEXC || stage.c.a == CC_TEXA ||
			stage.c.b == CC_TEXC || stage.c.b == CC_TEXA ||
			stage.c.c == CC_TEXC || stage.c.c == CC_TEXA ||
			stage.c.d == CC_TEXC || stage.c.d == CC_TEXA ||
			stage.a.a == CA_TEXA || stage.a.b == CA_TEXA ||
			stage.a.c == CA_TEXA || stage.a.d == CA_TEXA))
		{
			if(stage.o.texCoord > ps_key.ntex - 1)
				throw hardware_fatal_exception("GP TEV uses a nonexistent texCoord!");
			const PS_KEY::SWAP_ENTRY &swap = ps_key.swap[stage.a.swapSelTex];
			SWRITE "textemp = tex2D(smp["<< (int)stage.o.texCoord <<"], uv["<<
				(int)stage.o.texMap <<"])."<< s_tevchan_shader[swap.r] <<
				s_tevchan_shader[swap.g] << s_tevchan_shader[swap.b] <<
				s_tevchan_shader[swap.a] <<";\n";
		}

		//if(!cl.clamp || !al.clamp)  //clamp ignored
		//throw hardware_fatal_exception("GP TEV Clamp unemulated!");

		if(cl.ISCOMP) {
			//d + ((a op b) ? c : 0);
			string a = sprintCCompArg(s_tevcsel_shader[stage.c.a], stage.c.scale);
			string b = sprintCCompArg(s_tevcsel_shader[stage.c.b], stage.c.scale);
			SWRITE s_tevreg_shader[cl.dest] <<".rgb = "<< s_tevcsel_shader[stage.c.d] <<" + "
				"(("<< a <<" "<< (stage.c.sub ? "==" : ">") <<" "<< b <<") ? "<<
				s_tevcsel_shader[stage.c.c] <<" : 0);\n";
		} else {
			SWRITE s_tevreg_shader[cl.dest] <<".rgb = "
				"("<< s_tevcsel_shader[stage.c.d] <<" "<< (stage.c.sub ? '-' : '+') <<" "
				"lerp("<< s_tevcsel_shader[stage.c.a] <<", "<<
				s_tevcsel_shader[stage.c.b] <<", "<< s_tevcsel_shader[stage.c.c] <<") "<<
				s_tevbias_shader[stage.c.bias] <<") "<< s_tevscale_shader[stage.c.scale] <<";\n";
		}
		if(al.ISCOMP) {
			string a = sprintACompArg(s_tevasel_shader[stage.a.a], stage.a.scale);
			string b = sprintACompArg(s_tevasel_shader[stage.a.b], stage.a.scale);
			SWRITE s_tevreg_shader[al.dest] <<".a = "<< s_tevasel_shader[stage.a.d] <<" + "
				"(("<< a <<" "<< (stage.a.sub ? "==" : ">") <<" "<< b <<") ? "<<
				s_tevasel_shader[stage.a.c] <<" : 0);\n";
		} else {
			SWRITE s_tevreg_shader[al.dest] <<".a = "<< s_tevasel_shader[stage.a.d] <<" + "
				"lerp("<< s_tevasel_shader[stage.a.a] <<", "<<
				s_tevasel_shader[stage.a.b] <<", "<< s_tevasel_shader[stage.a.c] <<");\n";
		}
	}
	SWRITE "\n";
	{
		const char *op_str[] = { "&&", "||", "!=", "==" };
		string str0 = sprintAFArg(ps_key.alphafunc.comp0, ps_key.alphafunc.a0);
		string str1 = sprintAFArg(ps_key.alphafunc.comp1, ps_key.alphafunc.a1);
		SWRITE "if(!(("<< str0 <<") "<< op_str[ps_key.alphafunc.op] <<" ("<< str1 <<")))\n";
		SWRITE "  clip(-1);\n";
	}
	SWRITE "return prev;\n"; level--;
	SWRITE "}\n";
}
string sprintAFArg(BYTE comp, BYTE ref) {
	const char *comp_str[] = { "<", "==", "<=", ">", "!=", ">=" };
	switch(comp) {
	case gx::C_NEVER:
		return "false";
	case gx::C_ALWAYS:
		return "true";
	case gx::C_LESS:
	case gx::C_EQUAL:
	case gx::C_LEQUAL:
	case gx::C_GREATER:
	case gx::C_NEQUAL:
	case gx::C_GEQUAL:
		{
			ostringstream str;
			str <<"prev.a "<< comp_str[comp-1] <<" ("<< ref <<"/255.0f)";
			return str.str();
		}
	default:
		throw hardware_fatal_exception("GP Internal error: bad TEV AlphaFunc Comparator!");
	};
}
string sprintCompArg(const char *sel, BYTE op) {
	switch(op) {
	case 0:
		return string(sel) +".r";
		break;
	case 1:
		return string("(")+ sel +".g*256 + "+ sel +".r)";
		break;
	case 2:
		return string("(")+ sel +".b*256*256 + "+ sel +".g*256 + "+ sel +".r)";
		break;
	default:
		throw hardware_fatal_exception("GP Internal error: bad TEV comp op!");
	}
}
string sprintCCompArg(const char *sel, BYTE op) {
	switch(op) {
	case 3:
		return string(sel) +".rgb";
	default:
		return sprintCompArg(sel, op);
	}
}
string sprintACompArg(const char *sel, BYTE op) {
	switch(op) {
	case 3:
		return sel;
	default:
		return sprintCompArg(sel, op);
	}
}

void fprintHTMLAsText(FILE *file, const char *html) {
	const char *tag;
	do {
		tag = strchr(html, '<');
		if(!tag)
			break;
		if(html != tag)
			fwrite(html, 1, tag - html, file);
		tag = strchr(tag, '>');
		html = tag + 1;
	} while(tag);
	fputs(html, file);
	fprintf(file, "\n");
}
void fprintShader(FILE *file, const char *shader) {
	int line=0;
	const char *eol;
	do {
		fprintf(file, "%i\t", ++line);
		eol = strchr(shader, '\n');
		if(!eol)
			break;
		fwrite(shader, 1, eol - shader, file);
		shader = eol+1;
		fprintf(file, "\n");
	} while(eol);
	fprintf(file, "\n");
}
HRESULT GP::compileShader(const string& hlShader, const char *profile,
													LPD3DXBUFFER *ppCode)
{
	LPD3DXBUFFER pErrorMsgs=NULL;
	HRESULT hr = D3DXCompileShader(hlShader.c_str(), hlShader.size(), NULL, NULL, "main",
		profile, 0, ppCode, &pErrorMsgs, NULL);
	if(pErrorMsgs) {
		if(!(g::verbose && g::gp_log)) {
			DEGUB("Shader:\n");
			fprintShader(df, hlShader.c_str());
		}
		DEGUB("Compilation log:\n%s\n", pErrorMsgs->GetBufferPointer());
	}
	SAFE_RELEASE(pErrorMsgs);
	THR(hr);
	if(g::gp_log && g::verbose) {
		LPD3DXBUFFER pBuffer=NULL;
		HR(D3DXDisassembleShader((DWORD*)(*ppCode)->GetBufferPointer(), true, NULL,
			&pBuffer));
		DEGUB("Disassembly:\n");
		fprintHTMLAsText(df, (char*)pBuffer->GetBufferPointer());
		SAFE_RELEASE(pBuffer);
	}
	return S_OK;
}

/*TevAlphaSel mapTEVCS2AS(TevColorSel cs) {
switch(cs) {
#define PREPAIR(name) case CC_C##name: case CC_A##name: return CA_A##name
#define POSTPAIR(name) case CC_##name##C: case CC_##name##A: return CA_##name##A
/*case CC_CPREV:
case CC_APREV:
return CA_APREV;*//*
PREPAIR(PREV);
PREPAIR(0);
PREPAIR(1);
PREPAIR(2);
POSTPAIR(TEX);
POSTPAIR(RAS);
case CC_KONST:
throw hardware_fatal_exception("GP Unemulated TEV Alpha!");
case CC_ZERO:
return CA_ZERO;
default:
return (TevAlphaSel)-1;
}
}
bool GP::TEV::STAGE::stage_is_combined(const COLOR &color, const ALPHA &alpha) {
#define SIE_EQUAL(id) if(color.id != alpha.id) return false
SIE_EQUAL(dest);
SIE_EQUAL(scale);
SIE_EQUAL(clamp);
SIE_EQUAL(sub);
SIE_EQUAL(bias);
#define SIE_MAP(id) { TevAlphaSel mapped = mapTEVCS2AS(color.id);\
if(mapped != alpha.id) return false; }
SIE_MAP(a);
SIE_MAP(b);
SIE_MAP(c);
SIE_MAP(d);
return true;
}*/

void GP::setTextureStageStates() {
	if(ps_key.ntev > 8)
		throw hardware_fatal_exception("GP Unemulated number of TEV stages!");
#define TSSFAILURE(text) throw hardware_fatal_exception(text\
	" You need a better graphics card, one with pixel shaders.")

	for(int i=0; i<ps_key.ntev; i++) {
		const PS_KEY::STAGE &stage = ps_key.stage[i];
		const PS_KEY::STAGE::COLOR &color = stage.c;
		const PS_KEY::STAGE::ALPHA &alpha = stage.a;

		if(!color.clamp || color.dest || color.scale || color.sub || color.bias)
			TSSFAILURE("GP Unemulated TEV Color0!");
		if(!alpha.clamp || alpha.dest || alpha.scale || alpha.sub || alpha.bias)
			TSSFAILURE("GP Unemulated TEV Alpha0!");

		//Color ops
		if(color.a == color.b && color.a == color.c && color.a == color.d) {
			GPHR(setTSS(i, D3DTSS_COLOROP, D3DTOP_ADD));
			GPHR(setTSS(i, D3DTSS_COLORARG1, tevCSel2D3DTA(color.a)));
			GPHR(setTSS(i, D3DTSS_COLORARG2, tevCSel2D3DTA(color.a)));
		} else if(color.a == CC_ZERO && color.b == CC_ZERO && color.c == CC_ZERO) {
			GPHR(setTSS(i, D3DTSS_COLOROP, D3DTOP_SELECTARG1));
			GPHR(setTSS(i, D3DTSS_COLORARG1, tevCSel2D3DTA(color.d)));
		} else if(color.a == CC_ZERO && color.d == CC_ZERO) {
			GPHR(setTSS(i, D3DTSS_COLOROP, D3DTOP_MODULATE));
			GPHR(setTSS(i, D3DTSS_COLORARG1, tevCSel2D3DTA(color.b)));
			GPHR(setTSS(i, D3DTSS_COLORARG2, tevCSel2D3DTA(color.c)));
		} else if(color.d == CC_ZERO && color.b == CC_TEXC && color.c == CC_TEXA) {
			GPHR(setTSS(i, D3DTSS_COLOROP, D3DTOP_BLENDTEXTUREALPHA));
			GPHR(setTSS(i, D3DTSS_COLORARG1, D3DTA_TEXTURE));
			GPHR(setTSS(i, D3DTSS_COLORARG2, tevCSel2D3DTA(color.a)));
		} else if(color.a == CC_RASC && color.b == CC_ONE &&
			color.c == CC_TEXC && color.d == CC_ZERO)
		{
			GPHR(setTSS(i, D3DTSS_COLOROP, D3DTOP_ADDSMOOTH));
			GPHR(setTSS(i, D3DTSS_COLORARG1, D3DTA_TEXTURE));
			GPHR(setTSS(i, D3DTSS_COLORARG2, D3DTA_DIFFUSE));
		} else
			TSSFAILURE("GP Unemulated TEV Color combination!");

		//Alpha ops
		if(alpha.a == CA_ZERO && alpha.b == CA_ZERO && alpha.c == CA_ZERO) {
			GPHR(setTSS(i, D3DTSS_ALPHAOP, D3DTOP_SELECTARG1));
			GPHR(setTSS(i, D3DTSS_ALPHAARG1, tevASel2D3DTA(alpha.d)));
		} else if(alpha.a == CA_ZERO && alpha.d == CA_ZERO) {
			GPHR(setTSS(i, D3DTSS_ALPHAOP, D3DTOP_MODULATE));
			GPHR(setTSS(i, D3DTSS_ALPHAARG1, tevASel2D3DTA(alpha.b)));
			GPHR(setTSS(i, D3DTSS_ALPHAARG2, tevASel2D3DTA(alpha.c)));
		} else
			TSSFAILURE("GP Unemulated TEV Alpha combination!");
	}
}
DWORD tevCSel2D3DTA(BYTE csel) {
	switch(csel) {
	case CC_TEXC:
		return D3DTA_TEXTURE;
	case CC_RASC:
		return D3DTA_DIFFUSE;
	case CC_CPREV:
		return D3DTA_CURRENT;
	case CC_C0:
		return D3DTA_TFACTOR;
	default:
		TSSFAILURE("GP Unemulated TEV Color source!");
	}
}
DWORD tevASel2D3DTA(BYTE asel) {
	switch(asel) {
	case CA_TEXA:
		return D3DTA_TEXTURE;
	case CA_RASA:
		return D3DTA_DIFFUSE;
	case CA_APREV:
		return D3DTA_CURRENT;
	case CA_A0:
		return D3DTA_TFACTOR;
	default:
		TSSFAILURE("GP Unemulated TEV Alpha source!");
	}
}
