#include "fpse.h"

#undef PRINTF
#undef rs
#undef rt
#undef rd

#define PRINTF  if (1==0) printf
#define	rs	rsno*4
#define	rt	rtno*4
#define	rd	rdno*4

#define	idxCPR0	35*4
#define	idxCCR0	(35+32)*4
#define	idxCPR2	(35+64)*4
#define	idxCCR2	(35+96)*4
#define	rPC	32*4
#define	idxrLO	33*4
#define	idxrHI	34*4

#define	PP	0	/* Pentium optimize */

#define COMPILERSIZE    (1024*1024)

typedef void (*funcptr)();

/* runtime function */

int breakpoint = 0; /* 0x1a1c; */
int writebreak = 0; /*xa000e28c;*/
/*
void watch(void)
{
static char *regname[32] = {
	"zero","at","v0","v1","a0","a1","a2","a3",
	"t0","t1","t2","t3","t4","t5","t6","t7",
	"s0","s1","s2","s3","s4","s5","s6","s7",
	"t8","t9","k0","k1","gp","sp","fp","ra"
};

	int i;
	for(i=0;i<32;i++) 
		printf("%-4s = %08x ",regname[i],(int)reg.r[i]);
	printf("\n");
}
*/
static void swl(UINT32 adr,UINT32 r)
{
		UINT32	data = read32(adr&~3);
		switch(adr&3) {
#if TARGET_BIG_ENDIAN
		case 0: data = r; break;
		case 1: data = (data & 0xff000000) | (r>> 8); break;
		case 2: data = (data & 0xffff0000) | (r>>16); break;
		case 3: data = (data & 0xffffff00) | (r>>24); break;
#else
		case 3: data = r; break;
		case 2: data = (data & 0xff000000) | (r>> 8); break;
		case 1: data = (data & 0xffff0000) | (r>>16); break;
		case 0: data = (data & 0xffffff00) | (r>>24); break;
#endif
		}
		write32(adr&~3,data);
}

static void swr(UINT32 adr,UINT32 r)
{
		UINT32	data = read32(adr&~3);
		switch(adr&3) {
#if TARGET_BIG_ENDIAN
		case 0: data = (data & 0x00ffffff) | (r<<24); break;
		case 1: data = (data & 0x0000ffff) | (r<<16); break;
		case 2: data = (data & 0x000000ff) | (r<< 8); break;
		case 3: data = r; break;
#else
		case 3: data = (data & 0x00ffffff) | (r<<24); break;
		case 2: data = (data & 0x0000ffff) | (r<<16); break;
		case 1: data = (data & 0x000000ff) | (r<< 8); break;
		case 0: data = r; break;
#endif
		}
		write32(adr&~3,data);
}

static UINT32 lwl(UINT32 adr,UINT32 r)
{
		UINT32	data = read32(adr&~3);
		switch(adr&3) {
#if TARGET_BIG_ENDIAN
		case 0: r = data; break;
		case 1: r = (r & 0x000000ff) | (data<< 8); break;
		case 2: r = (r & 0x0000ffff) | (data<<16); break;
		case 3: r = (r & 0x00ffffff) | (data<<24); break;
#else
		case 3: r = data; break;
		case 2: r = (r & 0x000000ff) | (data<< 8); break;
		case 1: r = (r & 0x0000ffff) | (data<<16); break;
		case 0: r = (r & 0x00ffffff) | (data<<24); break;
#endif
		}
		return r;
}

static UINT32 lwr(UINT32 adr,UINT32 r)
{
		UINT32	data = read32(adr&~3);
		switch(adr&3) {
#if TARGET_BIG_ENDIAN
		case 0: r = (r & 0xffffff00) | (data>>24); break;
		case 1: r = (r & 0xffff0000) | (data>>16); break;
		case 2: r = (r & 0xff000000) | (data>> 8); break;
		case 3: r = data; break;
#else
		case 3: r = (r & 0xffffff00) | (data>>24); break;
		case 2: r = (r & 0xffff0000) | (data>>16); break;
		case 1: r = (r & 0xff000000) | (data>> 8); break;
		case 0: r = data; break;
#endif
		}
		return r;
}

#define	NOT_IMPREMENT(cd)	printf("not imprement %s: %08x\n",cd,(int)code);

static int lastax = -1;

#define MAXMEMBLKS      1024

static UINT32  MemBlocksIdx=0;
static UINT32  MemBlocks[MAXMEMBLKS*2];
static UINT8  *compilebuf;
static UINT32 *realpc;
static UINT8  *pcptr;
static char    endflg;
static UINT8   exitflg=0;

#define	COMPILED(adr)	realpc[( (adr<0xbfc00000) ?                          \
                                ((adr>=0x1f000000 && adr<0x1f020000) ?       \
                                 (adr-0x1f000000+0x280000):(adr&0x1fffff)) : \
                                 (adr-0xbfc00000+0x200000) )/4]

static int SearchFreeSlot()
{
    int x;

    for (x=0;x<MAXMEMBLKS;x++)
        if (MemBlocks[x*2] > 0x1FFFFFFF) return x;

    x = MemBlocksIdx;
    if (++MemBlocksIdx >= MAXMEMBLKS)
        MemBlocksIdx = 0;
    if (MemBlocks[x*2] < 0x1FFFFFFF)
    {
        memset(&COMPILED(MemBlocks[x*2]),0,
               MemBlocks[x*2+1] - MemBlocks[x*2] );
    }

    return x;
}

// int dmact = 0;

void CompileFlush(UINT32 start, UINT32 end)
{
    UINT32 x,mstart,mend;

    if (!compile) return;

    start &= 0x1FFFFFFF;
    end   &= 0x1FFFFFFF;
/*
    if (start == 0x1E000)
    {
        printf("Found CDMA - ACT=%08x-%08x\n",start,end);

        dmact= 1;
    }
*/
    for (x=0; x<MAXMEMBLKS; x++)
    {
        mstart = MemBlocks[x*2];
        mend   = MemBlocks[x*2+1];
/*
        if (dmact) {
            printf("-> %03x) %08x-%08x\n", x, mstart, mend);
            if (x == 0x199)
                printf("reached limit \n");
        }
*/
        if ((start >= mstart && start <= mend) ||
            (mstart >= start && mstart <= end))
        {
//            printf("invalidate blk %08x-%08x\n",mstart,mend);
            MemBlocks[x*2]   = 0xFFFFFFF;
            MemBlocks[x*2+1] = 0xFFFFFFF;
            memset(&COMPILED(mstart),0,mend-mstart);
        }

    }
//    dmact = 0;
}

UINT32 jmppc(UINT32 Pc)
{
    int pc,startptr,x;

    if (update_hw()) {
        if ((SR&0x401)==0x401) {
            EPC = Pc;
            CAUSE = 0x400;
            SR = (SR & ~0x3f)| ((SR<<2)&0x3f);
            Pc = 0x80000080;
        }
    }
    switch(Pc) {
    case 0:
// Oooppsss!! We need also to release everything
// (otherwise DirectDraw will surely kill your PC!)
        printf("PC = 0 at $%08x\n",(int)(PC));
        if (exitflg)
        {
            hw_close();
            memfree();
            exit(-1);
        }
        break;
    case 0xa0:
    case 0xb0:
    case 0xc0:
        PC = Pc;
        biosprint(Pc);
        Pc = PC;
        break;
    case 0x80000080:
        PC = Pc;
        exception_handler();
        Pc = PC;
        break;
    }

    if (COMPILED(Pc)) return COMPILED(Pc);

    SETPC(Pc);
    endflg = 0;
    pc = Pc;

    if (pcptr > compilebuf+COMPILERSIZE-1024) // End-of-buffer is near
    {                                         // Flush all
        memset(MemBlocks,0xFF,MAXMEMBLKS*2*sizeof(UINT32));
        memset(realpc,0,0x200000+0x80000+0x20000);
        pcptr = compilebuf;
    }
    startptr = (int)pcptr;
    x = SearchFreeSlot();

    while(!endflg)
        pc = compile1(pc);

    MemBlocks[x*2]   = (Pc & 0x1FFFFFFF);
    MemBlocks[x*2+1] = (pc & 0x1FFFFFFF);

    PRINTF("compile : psx = %08x-%08x(%08x) pc = %08x-%08x(%08x)\n",
           (int)Pc,(int)pc,(int)(pc-Pc),(int)startptr,(int)pcptr,(int)(pcptr-startptr));

    return COMPILED(Pc);
}

#if 1

#define	GENCODE1(c0)	*pcptr++=c0
#define	GENCODE2(c0,c1)	pcptr[0]=c0;pcptr[1]=c1; pcptr+=2
#define	GENCODE3(c0,c1,c2)	pcptr[0]=c0;pcptr[1]=c1;pcptr[2]=c2; pcptr+=3
#define	GENCODE4(c0,c1,c2,c3)	pcptr[0]=c0;pcptr[1]=c1;pcptr[2]=c2;pcptr[3]=c3;pcptr+=4
#define	GENCODE1i(c0,imm)	pcptr[0]=c0; *(long*)&pcptr[1]=(long)imm; pcptr+=5
#define	GENCODE2i(c0,c1,imm)	pcptr[0]=c0;pcptr[1]=c1; *(long*)&pcptr[2]=(long)imm; pcptr+=6
#define	GENCODE3i(c0,c1,c2,imm)	pcptr[0]=c0;pcptr[1]=c1;pcptr[2]=c2; *(long*)&pcptr[3]=(long)imm; pcptr+=7

//#define	DISASM(code,pc)
#define	DISASM	if (verbose) disasm2
static void disasm2(long code,long addr)
{
	char buf[256];
	disasm(buf,code,addr);
	printf(";%08lx %08lx   %s\n",addr,code,buf);
}

#else

#define	DISASM	if (verbose) disasm2
static void disasm2(long code,long addr)
{
	char buf[256];
	disasm(buf,code,addr);
	printf(";%08lx %08lx   %s\n",addr,code,buf);
}

static void GENCODE1(unsigned char  c0)
{
	PRINTF(" %p %02x                ",pcptr,c0);
	*pcptr++=c0;
}
static void GENCODE2(unsigned char  c0,unsigned char  c1)
{
	PRINTF(" %p %02x %02x             ",pcptr,c0,c1);
	pcptr[0]=c0;pcptr[1]=c1;
	pcptr+=2;
}
static void GENCODE3(unsigned char  c0,unsigned char  c1,unsigned char  c2)
{
	PRINTF(" %p %02x %02x %02x          ",pcptr,c0,c1,c2);
	pcptr[0]=c0;pcptr[1]=c1;pcptr[2]=c2;
	pcptr+=3;
}
static void GENCODE4(unsigned char  c0,unsigned char  c1,unsigned char  c2,unsigned char  c3)
{
	PRINTF(" %p %02x %02x %02x %02x       ",pcptr,c0,c1,c2,c3);
	pcptr[0]=c0;pcptr[1]=c1;pcptr[2]=c2;pcptr[3]=c3;
	pcptr+=4;
}

static void GENCODE1i(unsigned char  c0,int imm)
{
	PRINTF(" %p %02x %08x       ",pcptr,c0,imm);
	pcptr[0]=c0;
	*(long*)&pcptr[1] = imm;
	pcptr+=5;
}
static void GENCODE2i(unsigned char  c0,unsigned char  c1,int imm)
{
	PRINTF(" %p %02x %02x %08x    ",pcptr,c0,c1,imm);
	pcptr[0]=c0;pcptr[1]=c1;
	*(long*)&pcptr[2] = imm;
	pcptr+=6;
}
static void GENCODE3i(unsigned char  c0,unsigned char  c1,unsigned char  c2,int imm)
{
	PRINTF(" %p %02x %02x %02x %08x ",pcptr,c0,c1,c2,imm);
	pcptr[0]=c0;pcptr[1]=c1;pcptr[2]=c2;
	*(long*)&pcptr[3] = imm;
	pcptr+=7;
}


#endif

void compile_init(void)
{
    compilebuf = malloc(COMPILERSIZE);
    realpc = malloc(0x200000+0x80000+0x20000);
    memset(realpc,0,0x200000+0x80000+0x20000);
    memset(MemBlocks,0xFF,MAXMEMBLKS*2*sizeof(UINT32));
    pcptr = compilebuf;
}

static void load_ax(int r)
{
	if (!r) {
		GENCODE2(0x31,0xc0);
		PRINTF("xor eax,eax\n");
	} else {
		GENCODE3(0x8b,0x46,r);
		PRINTF("mov eax,[esi+%d]\n",r);
	}
	lastax = r;
}

#if 0
static void load_dx(int r)
{
	GENCODE3(0x8b,0x56,r);
	PRINTF("mov edx,[esi+%d]\n",r);
}
#endif

static void store_ax(int r)
{
	if (!r) return;
	GENCODE3(0x89,0x46,r);
	PRINTF("mov [esi+%d],eax\n",r);
	//isimm[r/4] = 0;
	lastax = r;
}

static void store_imm(int r,int imm)
{
	GENCODE3i(0xc7,0x46,r,imm);
	PRINTF("mov dword [esi+%d],0%xh\n",r,imm);
	//isimm[r/4] = imm;
	lastax = -1;
}

static void push_imm(int imm)
{
	GENCODE1i(0x68,imm);
	PRINTF("push 0%xh\n",imm);
}

static void push_ax(void)
{
	GENCODE1(0x50);
	PRINTF("push eax\n");
}

static void push_r(int r)
{
	if (!r) {
		load_ax(r);
		push_ax();
	} else {
#if PP
		load_ax(r);
		push_ax();
#else
		GENCODE3(0xff,0x76,r);
		PRINTF("push dword [esi+%d]\n",r);
#endif
	}
}

/* ofset > 127 */
static void load2_ax(int r)
{
	GENCODE2i(0x8b,0x86,r);
	PRINTF("mov eax,[esi+%d]\n",r);
}

static void store2_ax(int r)
{
	GENCODE2i(0x89,0x86,r);
	PRINTF("mov [esi+%d],eax\n",r);
}

static void store2_dx(int r)
{
	GENCODE2i(0x89,0x96,r);
	PRINTF("mov [esi+%d],edx\n",r);
}

static void push2_r(int r)
{
	GENCODE2i(0xff,0xb6,r);
	PRINTF("push dword [esi+%d]\n",r);
}


static void alu_i(UINT32 code,char *str,int op_r,int op_m)
{
	int imm = immU;
/*
	05 ii ii ii ii add	eax,imm
	81 46 xx ii ii ii ii add	dword [esi+rt],imm
	25 ii ii ii ii and	eax,imm
	81 66 xx ii ii ii ii and	dword [esi+rt],imm
	0d ii ii ii ii or	eax,imm
	81 4e xx ii ii ii ii or	dword [esi+rt],imm
	35 ii ii ii ii xor	eax,imm
	81 76 xx ii ii ii ii xor	dword [esi+rt],imm
	3d ii ii ii ii cmp	eax,imm
	81 7e xx ii ii ii ii cmp	dword [esi+rt],imm
	b8 ii ii ii ii mov	eax,imm
	c7 46 xx ii ii ii ii mov	dword [esi+rt],imm
*/

	/* 8b 46 xx mov eax,[esi+xx] */
	/* 89 46 xx mov [esi+xx],eax */

	if (!rt) return;

	if (rs) {
		if (rs==rt) {
			GENCODE3i(0x81,op_m,rs,imm);
			PRINTF("%s dword [esi+%d],0%xh\n",str,(int)rs,imm);
			/*if (rt==lastax)*/ lastax=-1;
		} else {
			load_ax(rs);
			GENCODE1i(op_r,imm);
			PRINTF("%s eax,%d\n",str,imm);
			store_ax(rt);
		}
	} else {
		store_imm(rt,imm);
	}
}

static void add_imm(int imm)
{
	if (!imm) return;

	if (imm<-128 || imm>127) {
		GENCODE1i(0x05,imm);
		PRINTF("add eax,0%xh\n",imm);
	} else if (imm==1) {
		GENCODE1(0x40);
		PRINTF("inc eax\n");
	} else if (imm==-1) {
		GENCODE1(0x48);
		PRINTF("dec eax\n");
	} else {
		GENCODE3(0x8d,0x40,imm);
		PRINTF("lea eax,[eax+%d]\n",imm);
	}
}

static void add_i(UINT32 code)
{
/*
	05 ii ii ii ii add	eax,imm
	81 46 xx ii ii ii ii add	dword [esi+rt],imm
*/
	int imm = immS;

	if (!rt) return;

	if (rs) {
		if (rs==rt) {
			if (imm==1) {
				GENCODE3(0xff,0x46,rs);
				PRINTF("inc [esi+%d]\n",(int)rs);
			} else if (imm==-1) {
				GENCODE3(0xff,0x4e,rs);
				PRINTF("dec [esi+%d]\n",(int)rs);
			} else if (imm) {
				GENCODE3i(0x81,0x46,rs,imm);
				PRINTF("add dword [esi+%d],0%xh\n",(int)rs,imm);
			}
		} else {
			load_ax(rs);
			add_imm(imm);
			store_ax(rt);
		}
	} else {
		store_imm(rt,imm);
	}
}

static void alu_r(UINT32 code,char *str,int op_1,int op_2)
{
/*
	03 46 xx add	eax,[esi+rt]
	01 46 xx add	[esi+rt],eax
	2b 46 xx sub	eax,[esi+rt]
	29 46 xx sub	[esi+rt],eax
	23 46 xx and	eax,[esi+rt]
	21 46 xx and	[esi+rt],eax
	0b 46 xx or	eax,[esi+rt]
	09 46 xx or	[esi+rt],eax
	33 46 xx xor	eax,[esi+rt]
	31 46 xx xor	[esi+rt],eax
	3b 46 xx cmp	eax,[esi+rt]
	39 46 xx cmp	[esi+rt],eax
*/
	if (!rd) return;

	if (rs) {
#if !PP
		if (rs==rd) {
			load_ax(rt);
			GENCODE3(op_2,0x46,rd);
			PRINTF("%s [esi+%d],eax\n",str,(int)rd);
			//isimm[rdno] = 0;
			return;
		} else
#endif
		{
			load_ax(rs);
			if (rt) {
				GENCODE3(op_1,0x46,rt);
				PRINTF("%s eax,[esi+%d]\n",str,(int)rt);
			}
		}
	} else if (rt) {
		load_ax(rt);
	} else {
		store_imm(rd,0);
		return;
	}
	store_ax(rd);
}

static void subu_r(UINT32 code)
{
	if (!rd) return;

	if (rs) {
		if (rs==rd) {
			if (rt) {
				load_ax(rt);
				GENCODE3(0x29,0x46,rd);
				PRINTF("sub [esi+%d],eax\n",(int)rd);
				//isimm[rdno] = 0;
			}
			return;
		}
		load_ax(rs);
		if (rt) {
			GENCODE3(0x2b,0x46,rt);
			PRINTF("sub eax,[esi+%d]\n",(int)rt);
		}
	} else if (rt) {
		load_ax(rt);
		GENCODE2(0xf7,0xd8);
		PRINTF("neg eax\n");
	} else {
		store_imm(rd,0);
		return;
	}
	store_ax(rd);
}

static void nor_r(UINT32 code)
{
	/* f7 d0 not eax */
	if (!rd) return;

	if (rs) {
		load_ax(rs);
		if (rt) {
			GENCODE3(0x0b,0x46,rt);
			PRINTF("or eax,[esi+%d]\n",(int)rt);
		}
	} else if (rt) {
		load_ax(rt);
	} else {
		store_imm(rd,-1);
		return;
	}
	GENCODE2(0xf7,0xd0);
	PRINTF("not eax\n");
	store_ax(rd);
}

static void and_r(UINT32 code)
{
	if (rd) {
		if (rs && rt) {
			load_ax(rs);
			GENCODE3(0x23,0x46,rt);
			PRINTF("and eax,[esi+%d]\n",(int)rt);
			store_ax(rd);
		} else {
			store_imm(rd,0);
		}
	}
}

/*
	50 push	eax
	ff 76 xx push	dword [esi+xx]
	40 inc	eax
	48 dec	eax
	ff 46 xx inc	dword [esi+rs]
	ff 4e xx dec	dword [esi+rs]
	8d 40 xx lea	eax,[eax+imm]
	31 c0 xor	eax,eax
	f7 d8 neg	eax
*/

/*
	9c pushf
	9d popf
	e9 xx xx xx xx jmp
	eb xx jmp	last
	74 xx je	last
	75 xx jne	last
	7c xx jl	last
	7d xx jge	last
	7e xx jle	last
	7f xx jg	last
*/

static void cmp_i(UINT32 code,int imm)
{
	GENCODE3i(0x81,0x7e,rs,imm);
	PRINTF("cmp [esi+%d],0%xh\n",(int)rs,imm);
}

static void cmp_r(UINT32 code)
{
	if (!rt) { cmp_i(code,0); return; }
	load_ax(rs);

	GENCODE3(0x3b,0x46,rt);
	PRINTF("cmp eax,[esi+%d]\n",(int)rt);
}

static void compile2(UINT32 Pc)
{
	FPSE_Flags |= IN_SLOT;
	compile1(Pc);
	FPSE_Flags &= ~IN_SLOT;
}

static void push_addr(UINT32 code)
{
	if (rs==0) {
		push_imm(immS);
	} else 
	if (immS) {
		load_ax(rs);
		add_imm(immS);
		push_ax();
	} else {
		push_r(rs);
	}
}

static void call(char *str,funcptr func,int argn)
{
/*
	b8 xx xx xx xx    mov eax,xx
	ff e0             jmp eax
    ff 20             jmp [eax]
	ff d0             call eax
    ff 10             call [eax]
	ff 15 xx xx xx xx call [xx]
	e8 xx xx xx xx    call xx (offset)
	81 c4 xx xx xx xx add esp,2
	8d 64 24 xx lea esp,[esp+2]
*/

	GENCODE1i(0xe8,(UINT32)func-(UINT32)(pcptr+5));
	PRINTF("call %s\n",str);

/*
	59 pop ecx
	5a pop edx
*/
	lastax = -1;
	if (argn==4) {
		GENCODE1(0x5a);
		PRINTF("pop edx\n");
	} else if (argn==8) {
		GENCODE1(0x5a);
		PRINTF("pop edx\n");
		GENCODE1(0x5a);
		PRINTF("pop edx\n");
	} else
	if (argn) {
#if 0
		GENCODE4(0x8d,0x64,0x24,argn);
		PRINTF("lea esp,[esp+%d]\n",argn);
#else
		GENCODE2i(0x81,0xc4,argn);
		PRINTF("add esp,%d\n",argn);
#endif
	}
}

static void jump(void)
{
	call("jmppc",(funcptr)jmppc,4); /* 5 + 5 */
	GENCODE2(0xff,0xe0); /* 2 */
	PRINTF("jmp eax\n");
	//endflg = 1;
}

static void jump_imm(UINT32 newpc)
{
	push_imm(newpc);
	jump();
}

static void bxx(UINT32 code,UINT32 Pc,char *str,int op)
{
	UINT32 newpc;
	char *base;

	if (FETCH(Pc)==0) {
		/* nop */
		PRINTF("%08x %08x %s\n",(int)Pc,0,"nop");
	} else { 
		GENCODE1(0x9c);
		PRINTF("pushf\n");
		compile2(Pc);
		GENCODE1(0x9d);
		PRINTF("popf\n");
	} 

	GENCODE2(op,0);
	PRINTF("%s @skip\n",str);
	base = pcptr;

	newpc = Pc + immS*4;
#if 0
	if (COMPILED(newpc)) {
		GENCODE1i(0xe9,COMPILED(newpc)-(UINT32)(pcptr+5));
		PRINTF("jmp %p\n",COMPILED(newpc));
	} else 
#endif
	{
		push_imm(newpc); /* 5 */
		jump(); /* 5 + 1 + 2 */
	}
	base[-1] = (UINT32)pcptr-(UINT32)base;
	PRINTF("@skip:\n");
/*
		push_imm(PC+4);
		jump();
		endflg = 1;
*/
}

static void store_pc(int pc)
{
	/* fobOp  */
	GENCODE2i(0xc7,0x86,rPC);
	PRINTF("mov dword [esi+%d],0%xh\n",rPC,pc-4);
	PRINTF(" %p %08x\n",pcptr,pc-4);
	*(long*)pcptr = pc-4;
	pcptr+=4;
}

static void store(UINT32 code,char *str,funcptr func)
{
	push_r(rt);
	push_addr(code);
	call(str,func,8);
}

static void load(UINT32 code,char *str,funcptr func)
{
	push_addr(code);
	call(str,func,4);
}

void sxx(UINT32 code,char *str,int op_r,int op_m)
{
	/* 8b 46 xx mov eax,[esi+xx] */
	/* c1 e0 xx shl eax,xx */
	/* c1 e8 xx shr eax,xx */
	/* c1 f8 xx sar eax,xx */
	/* c1 66 xx yy shl [esi+xx],yy */
	/* c1 6e xx yy shr [esi+xx],yy */
	/* c1 7e xx yy sar [esi+xx],yy */
	/* 89 46 xx mov [esi+xx],eax */

	int shamt = (code>>6)&31;
	if (rd && shamt) {
		if (rd==rt) {
			GENCODE4(0xc1,op_m,rd,shamt);
			PRINTF("%s dword [esi+%d],%d\n",str,(int)rd,shamt);
			//isimm[rdno] = 0;
			/*if (rd==lastax)*/ lastax = -1;
		} else {
			load_ax(rt);
			GENCODE3(0xc1,op_r,shamt);
			PRINTF("%s eax,%d\n",str,shamt);
			store_ax(rd);
		}
	}
}

void sxxv(UINT32 code,char *str,int op_r,int op_m)
{
	/* 8b 46 xx mov eax,[esi+xx] */
	/* 8b 4e xx mov ecx,[esi+xx] */
	/* 8a 4e xx mov cl,[esi+xx] */
	/* d3 e0    shl eax,cl */
	/* d3 e8    shr eax,cl */
	/* d3 f8    sar eax,cl */
	/* d3 66 xx shl [esi+xx],cl */
	/* d3 6e xx shr [esi+xx],cl */
	/* d3 7e xx sar [esi+xx],cl */
	/* 89 46 xx mov [esi+xx],eax */

	if (rd && rs) {
		GENCODE3(0x8b,0x4e,rs);
		PRINTF("mov ecx,[esi+%d]\n",(int)rs);
		if (rd==rt) {
			GENCODE3(0xd3,op_m,rd);
			PRINTF("%s dword [esi+%d],cl\n",str,(int)rd);
			/*if (rd==lastax)*/ lastax = -1;
		} else {
			load_ax(rt);
			GENCODE2(0xd3,op_r);
			PRINTF("%s eax,cl\n",str);
			store_ax(rd);
		}
	}
}

static void cop2gen(cop2func func)
{
	if (!func) return;
	GENCODE2i(0x8d,0x86,idxCPR2);
	PRINTF("lea eax,[esi+CPR2]\n");
	push_ax();
	call("cop2func",(funcptr)func,4);
}

int compile1(UINT32 Pc)
{
	UINT32 code;
//	static char strbuf[256];

	if (COMPILED(Pc)) {
		PRINTF("JUMP %x %x %x\n",(int)Pc,(int)pcptr,(int)COMPILED(Pc));
		GENCODE1i(0xe9,COMPILED(Pc)-(UINT32)(pcptr+5));
		PRINTF("jmp %p\n",(void *)COMPILED(Pc));
		Pc+=4;
		endflg = 1;
		return Pc;
	}

	if (!in_slot) COMPILED(Pc) = (long)pcptr;
        if (!(FPSE_Flags & SET_ESI)) {
            GENCODE1i(0xBE,(int)&reg);
            PRINTF("mov esi,reg\n");
            FPSE_Flags |= SET_ESI;
        }

/*
	if (Pc==breakpoint) {
		call("watch",watch,0);
	}
*/

	code = FETCH(Pc);

	DISASM(code,Pc);

	Pc+=4;
	if (code)
	switch(code>>26) {

	case SPECIAL:
		switch(code&63) {
	/* c1 e0 xx shl eax,xx */
	/* c1 e8 xx shr eax,xx */
	/* c1 f8 xx sar eax,xx */
	/* c1 66 xx yy shl [esi+xx],yy */
	/* c1 6e xx yy shr [esi+xx],yy */
	/* c1 7e xx yy sar [esi+xx],yy */

		case SLL:	sxx(code,"shl",0xe0,0x66); break;
		case SRL:	sxx(code,"shr",0xe8,0x6e); break;
		case SRA:	sxx(code,"sar",0xf8,0x7e); break;

	/* d3 e0    shl eax,cl */
	/* d3 e8    shr eax,cl */
	/* d3 f8    sar eax,cl */
	/* d3 66 xx shl [esi+xx],cl */
	/* d3 6e xx shr [esi+xx],cl */
	/* d3 7e xx sar [esi+xx],cl */

		case SLLV:	sxxv(code,"shl",0xe0,0x66); break;
		case SRLV:	sxxv(code,"shr",0xe8,0x6e); break;
		case SRAV:	sxxv(code,"sar",0xf8,0x7e); break;

/*
	03 46 xx add	eax,[esi+rt]
	01 46 xx add	[esi+rt],eax
	2b 46 xx sub	eax,[esi+rt]
	29 46 xx sub	[esi+rt],eax
	23 46 xx and	eax,[esi+rt]
	21 46 xx and	[esi+rt],eax
	0b 46 xx or	eax,[esi+rt]
	09 46 xx or	[esi+rt],eax
	33 46 xx xor	eax,[esi+rt]
	31 46 xx xor	[esi+rt],eax
*/

		case ADD:	alu_r(code,"add",0x03,0x01); break;
		case ADDU:	alu_r(code,"add",0x03,0x01); break;
		case SUBU:	subu_r(code); break;
		case SUB:	subu_r(code); break;
		case AND:	and_r(code); break;
		case OR:	alu_r(code,"or" ,0x0b,0x09); break;
		case XOR:	alu_r(code,"xor",0x33,0x31); break;
		case NOR:	nor_r(code); break;

		case SLT:
			if (rd==0) break;
			cmp_r(code);
			GENCODE3(0xf,0x9c,0xc0);
			PRINTF("setl al\n");
			GENCODE1i(0x25,1);
			PRINTF("and eax,1\n");
			store_ax(rd);
			break;
		case SLTU:
			if (rd==0) break;
			cmp_r(code);
			GENCODE3(0xf,0x92,0xc0);
			PRINTF("setb al\n");
			GENCODE1i(0x25,1);
			PRINTF("and eax,1\n");
			store_ax(rd);
			break;
		case DIV:
			load_ax(rs);
			GENCODE1(0x99);
			PRINTF("cdq\n");
			GENCODE3(0xf7,0x7e,rt);
			PRINTF("idiv dword [esi+%d]\n",(int)rt);
			store2_ax(idxrLO);
			store2_dx(idxrHI);
			lastax = -1;
			break;
		case DIVU:
			load_ax(rs);
			GENCODE2(0x31,0xd2);
			PRINTF("xor edx,edx\n");
			GENCODE3(0xf7,0x76,rt);
			PRINTF("div dword [esi+%d]\n",(int)rt);
			store2_ax(idxrLO);
			store2_dx(idxrHI);
			lastax = -1;
			break;
		case MULT:
			load_ax(rs);
			GENCODE3(0xf7,0x6e,rt);
			PRINTF("imul dword [esi+%d]\n",(int)rt);
			store2_ax(idxrLO);
			store2_dx(idxrHI);
			lastax = -1;
			break;
		case MULTU:
			load_ax(rs);
			GENCODE3(0xf7,0x66,rt);
			PRINTF("mul dword [esi+%d]\n",(int)rt);
			store2_ax(idxrLO);
			store2_dx(idxrHI);
			lastax = -1;
			break;
		case MFHI: if (rd) { load2_ax(idxrHI); store_ax(rd); } break;
		case MFLO: if (rd) { load2_ax(idxrLO); store_ax(rd); } break;
		case MTHI: load_ax(rs); store2_ax(idxrHI); break;
		case MTLO: load_ax(rs); store2_ax(idxrLO); break;

		case JALR:
			store_imm(31*4,Pc+4);
			push_r(rs);
			compile2(Pc);
			jump();
			Pc+=4;
			break;
		case JR	:
			endflg = 1;
			push_r(rs);
			compile2(Pc);
			jump();
			Pc+=4;
			break;
		case BREAK:
			push_imm(Pc-4);
			push_imm(E_Bp*4);
			call("exception",(funcptr)exception,8);
			GENCODE2(0xff,0xe0);
			PRINTF("jmp eax\n");
			break;
		case SYSCALL:
			push_imm(Pc-4);
			push_imm(E_Sys*4);
			call("exception",(funcptr)exception,8);
			GENCODE2(0xff,0xe0);
			PRINTF("jmp eax\n");
			break;
		default:	NOT_IMPREMENT("special"); break;
		}
		break;

	case BCOND:
		switch(rtno){
		case BLTZAL:store_imm(31*4,Pc+4);
		case BLTZ:	cmp_i(code,0); bxx(code,Pc,"jge",0x7d); Pc+=4; break;
		case BGEZAL:store_imm(31*4,Pc+4);
		case BGEZ:	cmp_i(code,0); bxx(code,Pc,"jl",0x7c); Pc+=4; break;
		default:	NOT_IMPREMENT("bcond"); break;
		}
		break;

	/* BRANCH/JUMP */
	case J:
		endflg = 1;
		compile2(Pc);
		jump_imm((Pc&0xf0000000)|((code&0x03ffffff)<<2));
		Pc+=4;
		break;
	case JAL:
		store_imm(31*4,Pc+4);
		compile2(Pc);
		jump_imm((Pc&0xf0000000)|((code&0x03ffffff)<<2));
		Pc+=4;
		break;
	case BNE:	cmp_r(code); bxx(code,Pc,"je",0x74); Pc+=4; break;
	case BEQ:
		if (rs==0 && rt==0) {
			compile2(Pc);
			jump_imm(Pc+immS*4);
			endflg = 1;
		} else {
			cmp_r(code); bxx(code,Pc,"jne",0x75);
		}
		Pc+=4;
		break;
	case BLEZ:	cmp_i(code,0); bxx(code,Pc,"jg",0x7f); Pc+=4; break;
	case BGTZ:	cmp_i(code,0); bxx(code,Pc,"jle",0x7e); Pc+=4; break;

	/* ALU */
/*
	05 ii ii ii ii add	eax,imm
	81 46 xx ii ii ii ii add	dword [esi+rt],imm
	25 ii ii ii ii and	eax,imm
	81 66 xx ii ii ii ii and	dword [esi+rt],imm
	0d ii ii ii ii or	eax,imm
	81 4e xx ii ii ii ii or	dword [esi+rt],imm
	35 ii ii ii ii xor	eax,imm
	81 76 xx ii ii ii ii xor	dword [esi+rt],imm
	3d ii ii ii ii cmp	eax,imm
	81 7e xx ii ii ii ii cmp	dword [esi+rt],imm
	b8 ii ii ii ii mov	eax,imm
*/
	case ADDI:	add_i(code); break;
	case ADDIU:	add_i(code); break;
	case ANDI:	alu_i(code,"and",0x25,0x66); break;
	case ORI:	alu_i(code,"or" ,0x0d,0x4e); break;
	case XORI:	alu_i(code,"xor",0x35,0x76); break;
	case LUI:
		{
		UINT32 imm = code<<16;
		if (!in_slot) {
			UINT32 code2 = FETCH(Pc);
		  if (((code2>>16)&0x3ff) == ((rtno<<5)|rtno)) {
			switch(code2>>26) {
			case ORI:
			case XORI:
				DISASM(code2,Pc);
				Pc+=4;
				imm |= code2&0xffff;
				break;
			case ADDI:
			case ADDIU:
				DISASM(code2,Pc);
				Pc+=4;
				imm += (INT16)code2;
				break;
			}
		  }
		}
		store_imm(rt,imm);
		}
		break;
	case SLTI:
		cmp_i(code,immS);
		GENCODE3(0xf,0x9c,0xc0);
		PRINTF("setl al\n");
		GENCODE1i(0x25,1);
		PRINTF("and eax,1\n");
		store_ax(rt);
		break;
	case SLTIU:
		cmp_i(code,immS);
		GENCODE3(0xf,0x92,0xc0);
		PRINTF("setb al\n");
		GENCODE1i(0x25,1);
		PRINTF("and eax,1\n");
		store_ax(rt);
		break;

	/* COP */
	case COP0:
		switch(rsno) {
		case MFC: load2_ax(idxCPR0+rd); store_ax(rt); break; /* MFC */
		case CFC: load2_ax(idxCCR0+rd); store_ax(rt); break; /* CFC */
		case MTC: load_ax(rt); store2_ax(idxCPR0+rd); break; /* MTC */
		case CTC: load_ax(rt); store2_ax(idxCCR0+rd); break; /* CTC */
		case 16:
			if ((code&31)==16) { /* RFE */
				call("rfe",(funcptr)rfe,0);
				break;
			}
		default: NOT_IMPREMENT("COP0");	break;
		}
		break;
	case COP2:
		switch(rsno) {
		case MFC:
			cop2gen(cop2readfuncptr(rdno));
			load2_ax(idxCPR2+rd); store_ax(rt);
			break; /* MFC */
		case CFC:
			cop2gen(cop2readfuncptr(rdno+32));
			load2_ax(idxCCR2+rd); store_ax(rt);
			break; /* CFC */
		case MTC:
			load_ax(rt); store2_ax(idxCPR2+rd);
			cop2gen(cop2writefuncptr(rdno));
			break; /* MTC */
		case CTC:
			load_ax(rt); store2_ax(idxCCR2+rd);
			cop2gen(cop2writefuncptr(rdno+32));
			break; /* CTC */
		default:
			cop2gen(cop2funcptr(code&0x1ffffff));
			break;
		}
		break;

	/* LOAD/STORE */
/*
	8b 46 xx mov	eax,[esi+xx]
	8b 56 xx mov	edx,[esi+xx]
	03 05 ii ii ii ii add	eax,[imm]
	8b 80 ii ii ii ii mov	eax,[eax+imm2]
	8a 80 ii ii ii ii mov	al,[eax+imm2]
	66 8b ii ii ii ii mov	ax,[eax+imm2]
	0f b6 80 ii ii ii ii movzx	eax,byte [eax+imm2]
	0f b7 80 ii ii ii ii movzx	eax,word [eax+imm2]
	0f be 80 ii ii ii ii movsx	eax,byte [eax+imm2]
	0f bf 80 ii ii ii ii movsx	eax,word [eax+imm2]
	89 80 ii ii ii ii mov	[eax+imm2],eax
	66 89 80 ii ii ii ii mov	[eax+imm2],ax
	88 80 ii ii ii ii mov	[eax+imm2],al
	89 82 ii ii ii ii mov	[edx+imm2],eax
	66 89 82 ii ii ii ii mov	[edx+imm2],ax
	88 82 ii ii ii ii mov	[edx+imm2],al
*/

	case SB:
		store_pc(Pc);
#if 0
		{ int base;
		if (isimm[rsno] && (base = (int)baseaddr2(isimm[rsno]+immS)+immS)!=0) {
			load_dx(rs);
			load_ax(rt);
			GENCODE2i(0x88,0x82,base);
			PRINTF("mov [edx+0%xh],al\n",base);
			break;
		} }
#endif
		store(code,"write8",(funcptr)write8); break;
	case SH:
		store_pc(Pc);
#if 0
		{ int base;
		if (isimm[rsno] && (base = (int)baseaddr2(isimm[rsno]+immS)+immS)!=0) {
			load_dx(rs);
			load_ax(rt);
			GENCODE3i(0x66,0x89,0x82,base);
			PRINTF("mov [edx+0%xh],ax\n",base);
			break;
		} }
#endif
		store(code,"write16",(funcptr)write16); break;
	case SW:
		store_pc(Pc);
#if 0
		{ int base;
		if (isimm[rsno] && (base = (int)baseaddr2(isimm[rsno]+immS)+immS)!=0) {
			load_dx(rs);
			load_ax(rt);
			GENCODE2i(0x89,0x82,base);
			PRINTF("mov [edx+0%xh],eax\n",base);
			break;
		} }
#endif
		store(code,"write32",(funcptr)write32); break;
	case SWL:	store(code,"swl",(funcptr)swl); break;
	case SWR:	store(code,"swr",(funcptr)swr); break;

	case LB:
		store_pc(Pc);
#if 0
		{ int base;
		if (isimm[rsno] && (base = (int)baseaddr2(isimm[rsno]+immS)+immS)!=0) {
			load_ax(rs);
			GENCODE3i(0x0f,0xbe,0x80,base);
			PRINTF("movsx eax,byte [eax+0%xh]\n",base);
			store_ax(rt);
			break;
		} }
#endif
		load(code,"read8",(funcptr)read8);
		if (rt) {
			GENCODE3(0x0f,0xbe,0xc0); PRINTF("movsx eax,al\n");
			store_ax(rt);
		}
		break;
	case LBU:
		store_pc(Pc);
#if 0
		{ int base;
		if (isimm[rsno] && (base = (int)baseaddr2(isimm[rsno]+immS)+immS)!=0) {
			load_ax(rs);
			GENCODE3i(0x0f,0xb6,0x80,base);
			PRINTF("movzx eax,word [eax+0%xh]\n",base);
			store_ax(rt);
			break;
		} }
#endif
		load(code,"read8",(funcptr)read8);
		store_ax(rt);
		break;
	case LH:
		store_pc(Pc);
#if 0
		{ int base;
		if (isimm[rsno] && (base = (int)baseaddr2(isimm[rsno]+immS)+immS)!=0) {
			load_ax(rs);
			GENCODE3i(0x0f,0xbf,0x80,base);
			PRINTF("movsx eax,word [eax+0%xh]\n",base);
			store_ax(rt);
			break;
		} }
#endif
		load(code,"read16",(funcptr)read16);
		if (rt) {
			GENCODE3(0x0f,0xbf,0xc0); PRINTF("movsx eax,ax\n");
			store_ax(rt);
		}
		break;
	case LHU:
		store_pc(Pc);
#if 0
		{ int base;
		if (isimm[rsno] && (base = (int)baseaddr2(isimm[rsno]+immS)+immS)!=0) {
			load_ax(rs);
			GENCODE3i(0x0f,0xb7,0x80,base);
			PRINTF("movzx eax,word [eax+0%xh]\n",base);
			store_ax(rt);
			break;
		} }
#endif
		load(code,"read16",(funcptr)read16);
		store_ax(rt);
		break;

	case LW:
		store_pc(Pc);
#if 0
		{ int base;
		if (isimm[rsno] && (base = (int)baseaddr2(isimm[rsno]+immS)+immS)!=0) {
			load_ax(rs);
			GENCODE2i(0x8b,0x80,base);
			PRINTF("mov eax,[eax+0%xh]\n",base);
			store_ax(rt);
			break;
		} }
#endif
		load(code,"read32",(funcptr)read32);
		store_ax(rt);
		break;
	case LWL:
		push_r(rt);
		push_addr(code);
		call("lwl",(funcptr)lwl,8);
		store_ax(rt);
		break;
	case LWR:
		push_r(rt);
		push_addr(code);
		call("lwr",(funcptr)lwr,8);
		store_ax(rt);
		break;
	case SWC0:
		push2_r(rt+idxCPR0);
		push_addr(code);
		call("write32",(funcptr)write32,8);
		lastax = -1;
		break;
	case LWC0:
		load(code,"read32",(funcptr)read32);
		store2_ax(rt+idxCPR0);
		break;
	case SWC2:
		cop2gen(cop2readfuncptr(rtno));
		push2_r(rt+idxCPR2);
		push_addr(code);
		call("write32",(funcptr)write32,8);
		lastax = -1;
		break;
	case LWC2:
		load(code,"read32",(funcptr)read32);
		store2_ax(rt+idxCPR2);
		cop2gen(cop2writefuncptr(rtno));
		break;
	default:	NOT_IMPREMENT("??"); break;
	}
	return Pc;
}

#if 0
int compiled(UINT32 Pc)
{
	return COMPILED(Pc);
}

void compile(UINT32 Pc)
{
	if (COMPILED(Pc)) return;
	pcbase = baseaddr(Pc);
	endflg = 0;
	memset(isimm,0,sizeof(isimm));
	while(!endflg) {
		Pc = compile1(Pc);
	}
}

#endif
