// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License version 2 as
// published by the Free Software Foundation.

// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.

#include "gp_dlrec.h"
#include "rechelpers.h"
#include "gp_draw_internal.h"

static DLRecBuf *cur_recbuf;
static std::vector<IUnknown*> s_resources;
static std::vector<DWORD> s_dwords;

void call_func(void *that, DWORD func);

static size_t s_datapos;
static const DWORD sc_fake_data[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
#define INVALID_POS ((size_t)-1)
#define DATAPOS(ptr) (((size_t(ptr) - size_t(s_xf_data)) / 4) + s_datapos)

#define REQUEST_DWORD_PTR(index) cur_recbuf->request_dword_ptr(index);

/*#define XF_REC_COUNTED_OPS(macro) macro(oxf) macro(oxfa) macro(oxfb) macro(oxfc)\
macro(oxfd) macro(nops) macro(ivcs) macro(odraw_null)
#define DECLARE_XFRCO_STATIC(id) static DWORD s_##id;
XF_REC_COUNTED_OPS(DECLARE_XFRCO_STATIC);*/

void GP::runCompiledList() {
	const DWORD dlsize = m.fifo.writeptr - m.fifo.readptr;
	const DWORD address = m.fifo.readptr;
	CDL_HASHMAP::iterator itr = m_cdl_map.find(address);
	if(itr != m_cdl_map.end()) if(itr->second.dl_size != dlsize) {  //size mismatch, delete
		GPDEGUB("DLR size mismatch: 0x%X -> 0x%X\n", itr->second.dl_size, dlsize);
		m_cdl_map.erase(itr);
		itr = m_cdl_map.end();
	}
	if(itr == m_cdl_map.end()) {
		itr = compileList();
	}
	GPDEGUB("Running Compiled List...\n");
	itr->second.run();
	GPDEGUB("Done\n");
}
GP::CDL_HASHMAP::iterator GP::compileList() {
	GPDEGUB("Display List Recompilation started.\n");
	Timing timing("Display List Recompilation");
	DLRecBuf recbuf;
	KEEP_POINTER(cur_recbuf, recbuf);
	const DWORD dlsize = m.fifo.writeptr - m.fifo.readptr;
	const DWORD address = m.fifo.readptr;
	ZERO_OBJECT(dlrStat);
	do {
		BYTE opcode = GP_QUEUE_GET_BYTE;
		(this->*m_rec_opcode[opcode])();
	} while(m.fifo.readptr < m.fifo.writeptr);

	//add stat.id, s_##id
#define ADD_STAT(id, str) if(dlrStat.id > 0) { AB(0x81); AB(0x05); AD(&stat.id);\
	AD(dlrStat.id); }
	GP_STATISTICS(ADD_STAT);
	//return
	AB(0xC3);

	CDL_HASHMAP::_Pairib itr = m_cdl_map.insert(CDL_PAIR(address,
		CompiledDisplayList(*cur_recbuf, dlsize)));
	MYASSERT(itr.second);

	//reset all temporary structures and variables
	s_dwords.clear();
	s_resources.clear();

	/*ostringstream str;
	str << HEX08(address) <<"."<< HEX(dlsize);
	dumpRecBlock(recbuf, "dlrb" + str.str());
	dumpRecBlock(itr.first->second, "dlr" + str.str());*/

	//GPDEGUB("Display List Recompilation finished.\n");
	return itr.first;
}
void CompiledDisplayList::run() {
	sfunc func = (sfunc)rcb();
	__asm {
#ifndef _DEBUG
		pusha //see notes.txt
#endif
			call func
#ifndef _DEBUG
			popa
#endif
	}
}


CompiledDisplayList::CompiledDisplayList(const DLRecBuf &recbuf, DWORD dlsize) :
DynaRecBlockBase(recbuf, s_resources.size()*sizeof(IUnknown*)+s_dwords.size()*4),
dl_size(dlsize), m_nres(s_resources.size()) {
	//copy resources
	for(size_t i=0; i<s_resources.size(); i++) {
		m_resources()[i] = s_resources[i];
	}
	//copy dwords
	for(size_t i=0; i<s_dwords.size(); i++) {
		m_dwords()[i] = s_dwords[i];
	}
	//fill in requests
	for(size_t i=0; i<recbuf.dprv().size(); i++) {
		POS_INDEX pi = recbuf.dprv()[i];
		MAKE(DWORD*, this->rcb()[pi.pos]) = m_dwords() + pi.index;
	}
}
CompiledDisplayList::~CompiledDisplayList() {
	if(refCount() == 1) {
		for(size_t i=0; i<m_nres; i++) {
			m_resources()[i]->Release();
		}
	}
}

void GP::dlr_nop() {
	GPDEGUB("GP NOP\n");
	dlrStat.nops++;
}
void GP::dlr_invalidate_vertex_cache() {
	GPDEGUB("GP IVC\n");
	dlrStat.ivcs++;
}
void GP::dlr_call_list() {
	throw hardware_fatal_exception("GP Display List tried to call another list!");
}
void GP::dlr_update_metrics() {
	throw hardware_fatal_exception("GP Update Metrics unemulated!");
}

void GP::dlr_load_bp() {
	BPLOAD b;
	b.dword = GP_QUEUE_GET_DWORD;
	GPDEGUB("GP BP Load 0x%02X, 0x%06X\n", b.reg, b.data);
	//bpload(GP_QUEUE_GET_DWORD);
	//push GP_QUEUE_GET_DWORD	//this will be popped by the callee
	AB(0x68);
	AD(b.dword);

	void (GP::*temp)(DWORD) = &GP::bpload;
	call_func(this, MAKE(DWORD, temp));
}

void GP::dlr_load_cp() {
	BYTE reg = GP_QUEUE_GET_BYTE;
	DWORD data = GP_QUEUE_GET_DWORD;
	GPDEGUB("GP CP Load 0x%02X, 0x%08X\n", reg, data);
	m.cp_reg[reg] = data;	//kinda hacky, but this approximates what Dolphin does.
	//cpload(reg, data);
	//push data
	AB(0x68);
	AD(data);
	//push reg
	AB(0x68);
	AD(reg);

	void (GP::*temp)(BYTE, DWORD) = &GP::cpload;
	call_func(this, MAKE(DWORD, temp));
}

void GP::dlr_load_xf() {
	XF_LOG x;
	x.length = GP_QUEUE_GET_WORD + 1;
	x.base = GP_QUEUE_GET_WORD;
	xf_log_start(x.dword);
	if(x.length == 0)
		throw hardware_fatal_exception("GP XP Length is totally whack!");
	int effective_length = MIN(x.length, 16);
	if(effective_length == 1) {
		DWORD d = GP_QUEUE_GET_DWORD;
		if(g::gp_log) {
			//cdl_xflog1(x, d);
			//mov edx, d
			AB(0xBA);
			AD(d);
			//mov ecx, x
			AB(0xB9);
			AD(x.dword);
			//call cdl_xflog
			AB(0xE8);
			REQUEST_CALL(DWORD(xf_log1));
		}
		s_datapos = INVALID_POS;
		handle_xf_single<XFHandlerRec>(x.base, d);
	} else {
		s_datapos = s_dwords.size();
		for(int i=0; i<effective_length; i++) {
			DWORD d = GP_QUEUE_GET_DWORD;
			s_xf_data[i] = d;
			s_dwords.push_back(d);
		}
		if(g::gp_log) {
			//cdl_xflog(x, data);
			//mov edx, data
			AB(0xBA);
			REQUEST_DWORD_PTR(s_datapos);
			//mov ecx, x
			AB(0xB9);
			AD(x.dword);
			//call cdl_xflog
			AB(0xE8);
			REQUEST_CALL(DWORD(xf_log));
		}
		handle_xf_multi<XFHandlerRec>(x.length, x.base, s_xf_data);
	}
	dlrStat.oxf++;
}

void call_func(void *that, DWORD func) {  //10 bytes
	//mov ecx, that
	AB(0xB9);
	AD(that);
	//call func
	AB(0xE8);
	REQUEST_CALL(func);
}

void DLRecBuf::request_dword_ptr(size_t index) {
	POS_INDEX p = { this->recbuf_size(), index };
	r_dp_v.push_back(p);
}

void XFHandlerRec::call(GP *that, void (GP::*func)(DWORD), DWORD d) {
	//(that->*func)(d);
	//push d
	AB(0x68);
	AD(d);
	call_func(that, MAKE(DWORD, func));
}
void XFHandlerRec::call(GP *that, void (GP::*func)(const DWORD*), const DWORD *data) {
	//(that->*func)(data);
	//push data
	AB(0x68);
	REQUEST_DWORD_PTR(DATAPOS(data));
	call_func(that, MAKE(DWORD, func));
}
void XFHandlerRec::call(GP *that, void (GP::*func)(DWORD,DWORD), DWORD index, DWORD d) {
	//(that->*func)(index, d);
	//push d
	AB(0x68);
	AD(d);
	//push index
	AB(0x68);
	AD(index);
	call_func(that, MAKE(DWORD, func));
}
void XFHandlerRec::call(GP *that, void (GP::*func)(DWORD,const DWORD*), DWORD index,
												const DWORD *data)
{
	MYASSERT(s_datapos != INVALID_POS);
	//(that->*func)(index, data);
	//push data
	AB(0x68);
	REQUEST_DWORD_PTR(DATAPOS(data));
	//push index
	AB(0x68);
	AD(index);
	call_func(that, MAKE(DWORD, func));
}

void XFHandlerRec::call(GP *that, void (GP::*func)(DWORD,const float*), DWORD index,
												const float *data)
{
	MYASSERT(s_datapos != INVALID_POS);
	//(that->*func)(index, data);
	//push data
	AB(0x68);
	REQUEST_DWORD_PTR(DATAPOS(data));
	//push index
	AB(0x68);
	AD(index);
	call_func(that, MAKE(DWORD, func));
}
void XFHandlerRec::call(GP *that, void (GP::*func)(DWORD,const float*,bool), DWORD index,
												const float *data, bool b)
{
	MYASSERT(s_datapos != INVALID_POS);
	//(that->*func)(index, data, b);
	//push b
	AB(0x6A);
	AD(b);
	//push data
	AB(0x68);
	REQUEST_DWORD_PTR(DATAPOS(data));
	//push index
	AB(0x68);
	AD(index);
	call_func(that, MAKE(DWORD, func));
}
void XFHandlerRec::call_if(GP *that, bool, void (GP::*func)(DWORD,const float*),
													 DWORD index, const float *data)
{
	MYASSERT(s_datapos != INVALID_POS);
	//(that->*func)(index, data);
	//push data
	AB(0x68);
	REQUEST_DWORD_PTR(DATAPOS(data));
	//push index
	AB(0x68);
	AD(index);
	call_func(that, MAKE(DWORD, func));
}

void GP::dlr_load_xf_indx(char name, BYTE array, BYTE stride) {
	//throw hardware_fatal_exception("GP Indexed XF Load in Display List unemulated!");
	XF_INDX_LOG x;
	x.index = GP_QUEUE_GET_WORD;
	x.word2 = GP_QUEUE_GET_WORD;
	xf_indx_log_start(name, x.dword);

	if(m.cp_reg[stride] != x.length * 4u)
		throw hardware_fatal_exception("GP Load Index stride doesn't match length!");

	//This is a proposed architecture for the indexed xf load dlr opcode.
	//length > 1 is assumed for optimization purposes, but not required.

	{ //eax = mem.getp_physical(m.cp_reg[array] + m.cp_reg[stride]*index, m.cp_reg[stride]);
		//mov eax, m.cp_reg[stride]
		AB(0xA1);
		AD(&m.cp_reg[stride]);
		//push eax
		AB(0x50);
		//mov ebx, index
		AB(0xBB);
		AD(x.index);
		//mul eax, ebx
		AB(0xF7);
		AB(0xE3);
		//add eax, m.cp_reg[array]
		AB(0x03);
		AB(0x05);
		AD(&m.cp_reg[array]);
		//push eax
		AB(0x50);

		BYTE* (MemInterface::*func)(DWORD,DWORD) = &MemInterface::getp_physical;
		call_func(&mem, MAKE(DWORD, func));
	}
	{ //load and swap
		//cld
		AB(0xFC);
		//mov esi, eax
		AB(0x89);
		AB(0xC6);
		//mov edi, s_xf_data
		AB(0xBF);
		AD(s_xf_data);
		//mov ecx, length
		AB(0xB9);
		AD(x.length);

		//add_label("loop1");
		//lodsd	//mov eax, [esi]; esi+=4;
		AB(0xAD);
		//bswap eax
		ADD_BYTE(0x0F);
		ADD_BYTE(0xC8);
		//stosd	//mov [edi], eax; edi+=4;
		AB(0xAB);
		//loop loop1
		AB(0xE2);
		//request_label("loop1");
		AB(-4); //hacky
	}
	//edi == s_xf_data + length*4
	if(g::gp_log) {
		//cdl_xf_indx_log(name, x, data);
		//push s_xf_data
		AB(0x68);
		AD(s_xf_data);
		//mov edx, x
		AB(0xBA);
		AD(x.dword);
		//mov ecx, name
		AB(0xB9);
		AD(name);
		//call cdl_xflog
		AB(0xE8);
		REQUEST_CALL(DWORD(xf_indx_log));
		//edi is now invalidated
	}
	handle_xf_multi<XFHandlerRecIndx>(x.length, x.base, sc_fake_data);
}
void GP::dlr_load_xf_indx_a() {
	dlr_load_xf_indx('A', 0xAC, 0xBC);
	dlrStat.oxfa++;
}
void GP::dlr_load_xf_indx_b() {
	dlr_load_xf_indx('B', 0xAD, 0xBD);
	dlrStat.oxfb++;
}
void GP::dlr_load_xf_indx_c() {
	dlr_load_xf_indx('C', 0xAE, 0xBE);
	dlrStat.oxfc++;
}
void GP::dlr_load_xf_indx_d() {
	dlr_load_xf_indx('D', 0xAF, 0xBF);
	dlrStat.oxfd++;
}

void XFHandlerRecIndx::call(GP *that, void (GP::*func)(DWORD), DWORD d) {
	//(that->*func)(d);
	//push DWORD PTR (s_xf_data + d) //assuming pointer arithmetic
	AB(0xFF);
	AB(0x35);
	AD(s_xf_data + d);
	call_func(that, MAKE(DWORD, func));
}
void XFHandlerRecIndx::call(GP *that, void (GP::*func)(const DWORD*), const DWORD *data) {
	//(that->*func)(data);
	//push s_xf_data + *data
	AB(0x68);
	AD(s_xf_data + *data);
	call_func(that, MAKE(DWORD, func));
}
void XFHandlerRecIndx::call(GP *that, void (GP::*func)(DWORD,DWORD), DWORD index,
														DWORD d)
{
	//(that->*func)(index, d);
	//push DWORD PTR (s_xf_data + d)
	AB(0xFF);
	AB(0x35);
	AD(s_xf_data + d);
	//push index
	AB(0x68);
	AD(index);
	call_func(that, MAKE(DWORD, func));
}
void XFHandlerRecIndx::call(GP *that, void (GP::*func)(DWORD,const DWORD*), DWORD index,
														const DWORD *data)
{
	MYASSERT(s_datapos == INVALID_POS);
	//(that->*func)(index, data);
	//push s_xf_data + *data
	AB(0x68);
	AD(s_xf_data + *data);
	//push index
	AB(0x68);
	AD(index);
	call_func(that, MAKE(DWORD, func));
}

void XFHandlerRecIndx::call(GP *that, void (GP::*func)(DWORD,const float*), DWORD index,
														const float *data)
{
	MYASSERT(s_datapos == INVALID_POS);
	//(that->*func)(index, data);
	//push s_xf_data + *data
	AB(0x68);
	AD(s_xf_data + MAKE(DWORD, *data));
	//push index
	AB(0x68);
	AD(index);
	call_func(that, MAKE(DWORD, func));
}
void XFHandlerRecIndx::call(GP *that, void (GP::*func)(DWORD,const float*,bool),
														DWORD index, const float *data, bool b)
{
	MYASSERT(s_datapos == INVALID_POS);
	//(that->*func)(index, data, b);
	//push b
	AB(0x6A);
	AD(b);
	//push s_xf_data + *data
	AB(0x68);
	AD(s_xf_data + MAKE(DWORD, *data));
	//push index
	AB(0x68);
	AD(index);
	call_func(that, MAKE(DWORD, func));
}
void XFHandlerRecIndx::call_if(GP *that, bool, void (GP::*func)(DWORD,const float*),
															 DWORD index, const float *data)
{
	MYASSERT(s_datapos == INVALID_POS);
	//(that->*func)(index, data);
	//push s_xf_data + *data
	AB(0x68);
	AD(s_xf_data + MAKE(DWORD, *data));
	//push index
	AB(0x68);
	AD(index);
	call_func(that, MAKE(DWORD, func));
}

template<class T> void GP::dlr_draw_primitive() {
	BYTE vat = queue_get_past_byte(1) & 0x07;
	WORD numvertices = GP_QUEUE_GET_WORD;
	if(numvertices == 0) {
		GPDEGUB("GP Draw %s 0\n", T::name);
		stat.odraw_null++;
		return;
	}
	if(T::VertexCountIsBad(numvertices)) {
		DEGUB("%i vertices!\n", numvertices);
		throw hardware_fatal_exception("GP Draw "+ string(T::name) +" bad vertex count!");
	}

	CONVERTICES c;
	getConvertices(vat, numvertices, T::name, c);
	s_resources.push_back(c.vertexBuffer);
	//do_stuff_before_draw(c.FVF, c.midx);

	WORD primitiveCount = T::PrimitiveCount(numvertices);
	T::StatPrimitives(dlrStat) += primitiveCount;

	//if(!((vs_control.cull_all && T::isSurface) || vs_control.lo_noop)) {
	{
#pragma warning(push)
#pragma warning(disable:4127)
		if(T::isSurface) {
			//if(vs_control.cull_all) goto end;
			MYASSERT(sizeof(bool) == 1);
			//test vs_control.cull_all, 0xFF
			AB(0xF6);
			AB(0x05);
			AD(&vs_control.cull_all);
			AB(0xFF);
			//jnz end
			AB(0x75);
			AB(45);
		}
#pragma warning(pop)
		//if(vs_control.lo_noop) goto end;
		//test vs_control.lo_noop, 0xFF
		AB(0xF6);
		AB(0x05);
		AD(&vs_control.lo_noop);
		AB(0xFF);
		//jnz end
		AB(0x75);
		AB(35);

		{ //do_stuff_before_draw(c.FVF, c.midx);
			void (GP::*func)(DWORD,WORD) = &GP::do_stuff_before_draw;
			//push c.midx
			AB(0x68);
			AD(c.midx);
			//push c.FVF
			AB(0x68);
			AD(c.FVF);
			call_func(this, MAKE(DWORD, func));
		}

		{ //setStreamSource(c.vertexBuffer, c.convertexSize);
			void (GP::*func)(LPDIRECT3DVERTEXBUFFER9,UINT) = &GP::setStreamSource;
			//push c.convertexSize
			AB(0x68);
			AD(c.convertexSize);
			//push c.vertexBuffer
			AB(0x68);
			AD(c.vertexBuffer);
			call_func(this, MAKE(DWORD, func));
		}
		{ //T::Draw(this, primitiveCount);
			void (__fastcall *func)(GP*,UINT) = &T::Draw;
			//mov edx, primitiveCount
			AB(0xBA);
			AD(primitiveCount);
			//mov ecx, this
			AB(0xB9);
			AD(this);
			//call func
			AB(0xE8);
			REQUEST_CALL(MAKE(DWORD, func));
		}
		//end:
	}
	T::StatOps(dlrStat)++;
}

void GP::setStreamSource(LPDIRECT3DVERTEXBUFFER9 vertexBuffer, UINT convertexSize) {
	GPHR(m.pd3dDevice->SetStreamSource(0, vertexBuffer, 0, convertexSize));
}

void GP::dlr_draw_quads() {
	dlr_draw_primitive<DrawQuads>();
}
void GP::dlr_draw_triangles() {
	dlr_draw_primitive<DrawTriangles>();
}
void GP::dlr_draw_triangle_strip() {
	dlr_draw_primitive<DrawTriangleStrip>();
}
void GP::dlr_draw_triangle_fan() {
	dlr_draw_primitive<DrawTriangleFan>();
}
void GP::dlr_draw_lines() {
	dlr_draw_primitive<DrawLines>();
}
void GP::dlr_draw_line_strip() {
	dlr_draw_primitive<DrawLineStrip>();
}
void GP::dlr_draw_points() {
	dlr_draw_primitive<DrawPoints>();
}
