/*-----------------------------------------------------------------------------
	[VDC.h]
		ucb̎sȂ܂B

	Copyright (C) 2004 Ki

	This program is free software; you can redistribute it and/or modify
	it under the terms of the GNU General Public License as published by
	the Free Software Foundation; either version 2 of the License, or
	(at your option) any later version.

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
	GNU General Public License for more details.
-----------------------------------------------------------------------------*/
#include <stdio.h>
#include <string.h>
#include <malloc.h>
#include "CPU.h"
#include "VDC.h"
#include "VCE.h"

#include "IntCtrl.h"


/*
	RCR Ŋ荞݂̂ 64 <= RCR <= 325(=64+261) ͈̔́B
	ӂ 64 ƁA 0 <= RCR-64 <= 261
	ƂƂ͂Pt[ 262 CƂƂɂȂB

	PĈbotTCŃA

		21.47727 * 1000000 / 3 / 60 / 262 ~= 455.4128

	Ƌ܂B
*/

#define VDC_CYCLESPERLINE	456		//.41284987277353689567430025445
#define MAX_SCANLINE		262
#define NUM_SPRITES			64
#define NUM_SP_PERLINE		32

#define MAWR	_Regs[VDC_MAWR]
#define MARR	_Regs[VDC_MARR]
#define VWR		_Regs[VDC_VWR]
#define VRR		_Regs[VDC_VRR]
#define CR		_Regs[VDC_CR]
#define RCR		_Regs[VDC_RCR]
#define BXR		_Regs[VDC_BXR]
#define BYR		_Regs[VDC_BYR]
#define MWR		_Regs[VDC_MWR]
#define HSR		_Regs[VDC_HSR]
#define HDR		_Regs[VDC_HDR]
#define VPR		_Regs[VDC_VPR]
#define VDW		_Regs[VDC_VDW]
#define VCR		_Regs[VDC_VCR]
#define DCR		_Regs[VDC_DCR]
#define SOUR	_Regs[VDC_SOUR]
#define DESR	_Regs[VDC_DESR]
#define LENR	_Regs[VDC_LENR]
#define SATB	_Regs[VDC_SATB]

#define HSW		(HSR & 0x1f)		// h-sync(?)
#define VSW		(VPR & 0x1f)		// v-sync(?)
#define HDS		((HSR & 0x7f00) >> 8)
#define HDE		((HDR & 0x7f00) >> 8)
#define VDS		(VPR >> 8)

#define DISPLAY_STARTLINE	(VSW+1+VDS+2)
#define DISPLAY_ENDLINE		(DISPLAY_STARTLINE+VDW+1)

#define SP_INDEX(I)			(_SpRam[I*4+2] & 0x3ff)
#define SP_UCOLOR(I)		(_SpRam[I*4+3] & 0xf)
#define SP_Y(I)				(_SpRam[I*4] & 0x3ff)
#define SP_X(I)				(_SpRam[I*4+1] & 0x3ff)
#define SP_PRIORITY(I)		((_SpRam[I*4+3] & 0x80) >> 7)
#define SP_WIDTH(I)			((_SpRam[I*4+3] & 0x100) >> 8)
#define SP_HEIGHT(I)		((_SpRam[I*4+3] & 0x3000) >> 12)
#define SP_HFLIP(I)			((_SpRam[I*4+3] & 0x800) >> 11)
#define SP_VFLIP(I)			((_SpRam[I*4+3] & 0x8000) >> 15)

static Uint16			_Regs[32];
static Uint32			_AR;

static Uint8			_VideoRam[65536];		// VRAM 64kB 
static Uint16*			_pwVideoRam;

static Sint32			_ScreenW;
static Sint32			_ScreenH;
static Uint32			_BGH;
static Uint32			_BGW;

static Uint16			_VdcAddrInc = 1;
static Uint8			_VdcStatus;

static Uint32 			_PatternLUT[256][2];

static Uint8			_LineBuf[16 + 1024 + 16];		// scanline buffer (_ScreenW <= 1024)
static Uint8			_SpBgBuf[16 + 1024 + 16];		// sp/bg selection / palette selection buffer

static Sint32			_ScanLine;
static Sint32			_RasterCounter;
static Sint32			_DisplayCounter;

static Sint32			_LineCounter;
static Sint32			_LineCountMask = 0xff;

static BOOL				_bUpdateSATB;
static Uint32			_SpDmaCount;					// ]JnɂPBSɂȂI 
static BOOL				_bDmaIrqRequested = FALSE;
static BOOL				_bBurstMode = FALSE;

//static BOOL				_bVblankIntPending = FALSE;

static Uint8			_BgTileCache[2048][8*8];
static Uint8			_SpTileCache[512][16*16];

static Uint16			_SpRam[64*4];

static BOOL				_bSpOver;

static Uint32			_ClockCounter;


static
inline
void
invalidate_tile_cache()
{
	int		i;

	for (i = 0; i < 2048; i++)
	{
		_BgTileCache[i][0] = 0x20;
	}

	for (i = 0; i < 512; i++)
	{
		_SpTileCache[i][0] = 0x20;
	}
}


#if defined(__GNUC__) && defined(USE_INLINE_ASM)
static
inline
void
fill_tile_cache(
	Uint8*		pPixel,
	Uint32		D0,
	Uint32		D1,
	Uint32		D2,
	Uint32		D3)
{	
	__asm__ __volatile__ (
		"movq		(%1),  %%mm0	\n\t"
		"movq		(%2),  %%mm1	\n\t"
		"movq		(%3),  %%mm2	\n\t"
		"movq		(%4),  %%mm3	\n\t"

		"psllq		$1,    %%mm1	\n\t"
		"psllq		$2,    %%mm2	\n\t"
		"psllq		$3,    %%mm3	\n\t"

		"por		%%mm1, %%mm0	\n\t"
		"por		%%mm2, %%mm3	\n\t"
		"por		%%mm3, %%mm0	\n\t"

		"movq		%%mm0, (%0)		\n\t"
		"emms						\n\t"
		: 
		: "r" (pPixel), "r" (&_PatternLUT[D0][0]), "r" (&_PatternLUT[D1][0]),
		  "r" (&_PatternLUT[D2][0]), "r" (&_PatternLUT[D3][0])
	);
}
#else
static
inline
void
fill_tile_cache(
	Uint8*		pPixel,
	Uint32		D0,
	Uint32		D1,
	Uint32		D2,
	Uint32		D3)
{	
	Uint32*		pPixel0_3 = (Uint32*)pPixel;	// pixel 0-3
	Uint32*		pPixel4_7 = pPixel0_3 + 1;		// pixel 4-7

	*pPixel0_3 =  _PatternLUT[D0][0];
	*pPixel4_7 =  _PatternLUT[D0][1];
	*pPixel0_3 |= _PatternLUT[D1][0] << 1;
	*pPixel4_7 |= _PatternLUT[D1][1] << 1;
	*pPixel0_3 |= _PatternLUT[D2][0] << 2;
	*pPixel4_7 |= _PatternLUT[D2][1] << 2;
	*pPixel0_3 |= _PatternLUT[D3][0] << 3;
	*pPixel4_7 |= _PatternLUT[D3][1] << 3;
}
#endif


/*-----------------------------------------------------------------------------
	[update_bg_tile_cache]
		afp^[GR[hێ܂B
-----------------------------------------------------------------------------*/
static
void
update_bg_tile_cache(
	Uint32		cg_addr)
{
	cg_addr &= 2047;

	// ubfp^[ύXꂽv = 0x20
	// ĂA^CLbVXVB
	if (_BgTileCache[cg_addr][0] & 0x20)
	{
		Uint8*		pPixel;
		Uint32		D0, D1, D2, D3;
		Uint8*		pVRAM1;			// for D0 and D1
		Uint8*		pVRAM2;			// for D2 and D3
		int			i;

		pPixel = &_BgTileCache[cg_addr][0];
		pVRAM1 = &_VideoRam[cg_addr*32 +  0];
		pVRAM2 = &_VideoRam[cg_addr*32 + 16];

		// P[vŐWsNZGR[h 
		for (i = 0; i < 8; i++)
		{
			D0 = *pVRAM1++;
			D1 = *pVRAM1++;
			D2 = *pVRAM2++;
			D3 = *pVRAM2++;

			fill_tile_cache(pPixel, D0, D1, D2, D3);

			pPixel += 8;
		}
	}
}


/*-----------------------------------------------------------------------------
	[update_sp_tile_cache]
		rop^[GR[hێ܂B
-----------------------------------------------------------------------------*/
static
void
update_sp_tile_cache(
	Uint32		pc)
{
	pc &= 0x1ff;

	if (_SpTileCache[pc][0] & 0x20)
	{
		Uint8*		pPixel;
		Uint32		D0, D1, D2, D3;
		Uint8*		pVRAM;
		int			i;

		pPixel = &_SpTileCache[pc][0];
		pVRAM = &_VideoRam[pc*2*64];

		// P[vŐPUsNZGR[h 
		for (i = 0; i < 16; i++)
		{
			D0 = pVRAM[1];
			D1 = pVRAM[33];
			D2 = pVRAM[65];
			D3 = pVRAM[97];

			fill_tile_cache(pPixel, D0, D1, D2, D3);

			pPixel += 8;

			D0 = pVRAM[0];
			D1 = pVRAM[32];
			D2 = pVRAM[64];
			D3 = pVRAM[96];

			fill_tile_cache(pPixel, D0, D1, D2, D3);

			pPixel += 8;
			pVRAM  += 2;
		}
	}
}


/*-----------------------------------------------------------------------------
	[write_vram]
		rfIɃf[^݂܂B
		(^CLbVΉ) 
-----------------------------------------------------------------------------*/
static
inline
void
write_vram(
	Uint16		addr,
	Uint16		data)
{
//	printf("vram[%04X] = %04X\n", addr, data);

	if (addr < 0x8000 && _pwVideoRam[addr] != data)
	{
		// uCG p^[ύXꂽvtO
		// uSG p^[ύXꂽvtO𗧂ĂB
		// Ȃ܂ꂽf[^ CG/SG p^[Ƃ͌ȂA
		// ͂܂ȂB 
		_BgTileCache[addr/16][0] |= 0x20;
		_SpTileCache[addr/64][0] |= 0x20;

		_pwVideoRam[addr] = data;
	}
}


/*-----------------------------------------------------------------------------
	[update_satb]
		r`saXV܂B
-----------------------------------------------------------------------------*/
static
inline
void
update_satb(
	Uint16		addr)
{
	Uint32		i = 0;

//	printf("addr = %04x\n", addr);

	while (addr < 0x8000 && i < 256)
	{
		_SpRam[i++] = _pwVideoRam[addr++];
	}
}


/*-----------------------------------------------------------------------------
	[do_vram_vram_dma]
		VRAM-VRAM Ԃ̂cl`]sȂ܂B

	[NOTE]
		PoCg]̂ɉbotTCN͕̂sB
		VDC_STAT_DV  (DCR & 2) == 0 łZbĝǂ͖mFB
		(Zbg)
-----------------------------------------------------------------------------*/
static
void
do_vram_vram_dma()
{
	Uint32		srcInc;
	Uint32		dstInc;

	srcInc = (DCR & 4) ? -1 : 1;
	dstInc = (DCR & 8) ? -1 : 1;

//	printf("vram_vram_transfer: src=%04x dst=%04x len=%04x irq=%d\n",
//		SOUR, DESR, LENR, DCR&2);

	do
	{
		write_vram(DESR, _pwVideoRam[SOUR]);

		SOUR += srcInc;
		DESR += dstInc;
	} while (LENR--);

	_VdcStatus |= VDC_STAT_DV;

	if (DCR & 2)
	{
		_bDmaIrqRequested = TRUE;
	}
}


/*-----------------------------------------------------------------------------
	[VDC_Write]
		ucbւ̏ݓLq܂B
-----------------------------------------------------------------------------*/
void
VDC_Write(
	Uint32		regNum,
	Uint8		data)
{
//	printf("VDCWR: %d, %02X\n", regNum, data);
	switch(regNum)
	{
		case 0: // VDC ̃WX^ԍw肷B
			_AR = data & 0x1f;	// AR = Address Register 
			return;

		case 1: // AR ɏʃrbg͂ȂB 
			return;

		case 2: // ʃoCg̏ 
			_Regs[_AR] = (_Regs[_AR] & ~0xff) | data;

			// WIȏ݈ȊOɓʂȏKvȏꍇ͈ȉɋLq 
			switch (_AR)
			{
				case VDC_BYR:  // BG Y scroll
					if (_DisplayCounter >= DISPLAY_STARTLINE && _DisplayCounter < DISPLAY_ENDLINE)
						_LineCounter = BYR + 1;
					else
						_LineCounter = BYR;

					_LineCounter &= _LineCountMask;
					return;

				case VDC_MWR:  // Memory width
				{
					static const Uint8 bgw[] = { 32, 64, 128, 128 };

					_BGH = (data & 0x40) ? 64 : 32;
					_BGW = bgw[(data >> 4) & 3];
					_LineCountMask = (data & 0x40) ? 0x1ff : 0xff;

					_LineCounter &= _LineCountMask;

					invalidate_tile_cache();
					return;
				}

				case VDC_HDR:  // horizontal display
					_ScreenW = ((data & 0x7f) + 1) * 8;
					return;
			}
			return;

		case 3: // ʃoCg̏
			_Regs[_AR] = (_Regs[_AR] & ~0xff00) | (data << 8);

			// WIȏ݈ȊOɓʂȏKvȏꍇ͈ȉɋLq 
			switch (_AR)
			{
				case VDC_VWR:  // vram data write
//					if (_DisplayCounter >= DISPLAY_STARTLINE && _DisplayCounter < DISPLAY_ENDLINE && !_bBurstMode)
//						write_vram(MAWR + (BYR/8)*_BGW + (BXR/8), VWR);
//					else
						write_vram(MAWR, VWR);
					MAWR += _VdcAddrInc;
					return;

				case VDC_CR:  // control
				{
					static const Uint8 incsize[]={1, 32, 64, 128};
					_VdcAddrInc = incsize[(data >> 3) & 3];
//					printf("CR = %04X\n", CR);
					return;
				}

				case VDC_RCR:  // raster detection
					RCR &= 0x3ff;
//					printf("RCR = %04X\n", RCR);
					return;

				case VDC_BXR:  // bg x scroll
//					printf("BXR = %04X\n", BXR);
					BXR &= 0x3ff;
					return;

				case VDC_BYR:  // bg y scroll
					BYR &= 0x1ff;

					if (_DisplayCounter >= DISPLAY_STARTLINE && _DisplayCounter < DISPLAY_ENDLINE)
						_LineCounter = BYR + 1;
					else
						_LineCounter = BYR;

					_LineCounter &= _LineCountMask;
//					printf("BYR = %04X\n", BYR);
					return;

				case VDC_VDW:  // vertical display width
					_ScreenH = (VDW & 511) + 1;
					return;

				case VDC_LENR:
					do_vram_vram_dma();
					return;

				case VDC_SATB:
					_bUpdateSATB = TRUE;
					return;
			}
	}
}


/*-----------------------------------------------------------------------------
	[VDC_Read]
		ucb̓ǂݏoLq܂B

	[note]
		CR  D11-D12 Ō肳uq`lAhXCNgl
		MAWR, MARR ̗ɗLłB (01.10.2004 mFς)
-----------------------------------------------------------------------------*/
Uint8
VDC_Read(
	Uint32		regNum)
{
	Uint8		ret;

	switch (regNum)
	{
		case 0:
			ret = _VdcStatus;
//			_VdcStatus = 0;

			if (_VdcStatus & VDC_STAT_CR)
				_VdcStatus &= ~VDC_STAT_CR;
			else if (_VdcStatus & VDC_STAT_OR)
				_VdcStatus &= ~VDC_STAT_OR;
			else if (_VdcStatus & VDC_STAT_RR)
				_VdcStatus &= ~VDC_STAT_RR;
			else if (_VdcStatus & VDC_STAT_DS)
				_VdcStatus &= ~VDC_STAT_DS;
			else if (_VdcStatus & VDC_STAT_DV)
				_VdcStatus &= ~VDC_STAT_DV;
			else if (_VdcStatus & VDC_STAT_VD)
				_VdcStatus &= ~VDC_STAT_VD;

//			_bDmaIrqRequested = FALSE;

			if (_VdcStatus == 0)
				INTCTRL_Cancel(INTCTRL_IRQ1);
			return ret;

		case 2: // ʃoCg̓ǂݏo 
			if (_AR == VDC_VRR)
			{
				if (MARR < 0x8000)
					return (Uint8)_pwVideoRam[MARR];
			}
			break;

		case 3:
			if (_AR == VDC_VRR)
			{
				if (MARR < 0x8000)
					ret = (Uint8)(_pwVideoRam[MARR] >> 8);
				else
					ret= 0;
				MARR += _VdcAddrInc;
				return ret;
			}
			break;
	}

// don't return 0xff!
//	return 0xff;

	return 0;
}


/*-----------------------------------------------------------------------------
** [CreatePatternTables]
**	LN^p^[  rbg}bvf[^ւ̕ϊkts쐬܂B
**	CH0 ` CH3 p̂S쐬܂B
**
**	CH0 ̂PoCg(D0 - D7) CWoCgɓWJB̓WJ̎d́F
**
**	byte	00 01 02 03 04 05 06 07
**	data	D7 D6 D5 D4 D3 D2 D1 D0   (f[^rbg͑S D0 փVtg)
**
**	CH1 ̂PoCg(D0 - D7) CWoCgɓWJB̓WJ̎d́F
**
**	byte	00 01 02 03 04 05 06 07
**	data	D7 D6 D5 D4 D3 D2 D1 D0   (f[^rbg͑S D1 փVtg)
**
**	CH2 ̂PoCg(D0 - D7) CWoCgɓWJB̓WJ̎d́F
**
**	byte	00 01 02 03 04 05 06 07
**	data	D7 D6 D5 D4 D3 D2 D1 D0   (f[^rbg͑S D2 փVtg)
**
**	CH3 ̂PoCg(D0 - D7) CWoCgɓWJB̓WJ̎d́F
**
**	byte	00 01 02 03 04 05 06 07
**	data	D7 D6 D5 D4 D3 D2 D1 D0   (f[^rbg͑S D3 փVtg)
**
**	[2004.3]
**	CH1-CH3  CH0  1`3 rbgVtgȂ̂ŁA
**	CH0 ݂̂쐬ACH1`CH3  CH0 VtgđΉ悤ɕύXB
**---------------------------------------------------------------------------*/
static
void
createPatternLUT()
{
	Uint32	i;
	Uint32	D0, D1, D2, D3, D4, D5, D6, D7;

	for (i = 0; i < 256; i++)
	{
		// S D0 փVtgB
		D7 = (i & 0x80) >> 7;
		D6 = (i & 0x40) >> 6;
		D5 = (i & 0x20) >> 5;
		D4 = (i & 0x10) >> 4;
		D3 = (i & 0x08) >> 3;
		D2 = (i & 0x04) >> 2;
		D1 = (i & 0x02) >> 1;
		D0 = (i & 0x01) >> 0;

		_PatternLUT[i][0] = D7 | (D6 << 8) | (D5 << 16) | (D4 << 24);
		_PatternLUT[i][1] = D3 | (D2 << 8) | (D1 << 16) | (D0 << 24);
	}
}


/*-----------------------------------------------------------------------------
	[VDC_Init]
		ucb܂B
-----------------------------------------------------------------------------*/
Sint32
VDC_Init()
{
	_pwVideoRam = (Uint16*)&_VideoRam[0];

	createPatternLUT();

	_ScreenW = 256;
	_ScreenH = 240;

	VDW = 239;
	VCR = 4;
	VPR = 0x0f02;

	// ͕̏Kv  FIXED 2004.09.11 
	_VdcAddrInc = 1;

	invalidate_tile_cache();

	return 0;
}


/*-----------------------------------------------------------------------------
	[drawBgLine]
		afPC`悵܂B

	[DEV NOTES]

	LN^TCYF 8x8
	LN^WFl[^ɒ`łLN^̍ő吔FSOXU
	a`s͂PUrbg^LN^Œ`B
	D0	- D11 F LN^R[h
	D12 - D15 F bfJ[(pbg̏ʂSrbg)
	[] LN^R[hPQrbgȂ̂ŁCőLN^SOXU

	VRAM  BAT ̈̑傫́C MWR Ō肳̂ƎvB
	Ⴆ MWR  WxH=32x32 Ɛݒ肳ꂽꍇ́CBAT 32x32x2 = 2048 oCg
	ƂȂB܂ VRAM $0000 - $07FF  BAT ̈ƂȂBp^[`
	̈(bfAhX)́CBAT ̈̒($800)n܂B

	[XLCx[Xɂ]
	XLCO`QRXňړ̂Ƃ(mF)B
	^C 8x8 Ȃ̂ŁCXLCɂ^Ĉx̃ItZbǵC

		tileOffsetY = scanline / 8;

	ŋ܂BāCXLCŎw肳ꂽC̕`sȂɂ́C

		BATWord = (VRAM[tileOffsetY*BGW*2] | (VRAM[tileOffsetY*BGW*2+1] << 8))

	̂QoCgǂݏoB̉ʂPQrbgRQŁCbfAhX
	B

		CGAddr = (BATWord & 0xfff) * 32

	sNZ̏ʂSrbg(J[ubN)́CBATWord ̏ʂSrbgŎw肳B

		ColorBlock = (BATWord & 0xf000) >> 8

	ʂSrbgɂ́Cp^[̂ŁCColorBlock͏ʂSrbgɕێĂB
	bff[^́CRQoCg琬C^ĈPCSoCgŒ`B
	܂CPsNZSrbgō\B̏ڍׂ́C

		CG[0] ̂Wrbg: line 0  D0	|  CG[10] ̂Wrbg: line 0  D2
		CG[1] ̂Wrbg: line 0  D1	|  CG[11] ̂Wrbg: line 0  D3
		CG[2] ̂Wrbg: line 1  D0	|  CG[12] ̂Wrbg: line 1  D2
		CG[3] ̂Wrbg: line 1  D1	|  CG[13] ̂Wrbg: line 1  D3
		CG[4] ̂Wrbg: line 2  D0	|  CG[14] ̂Wrbg: line 2  D2
		CG[5] ̂Wrbg: line 2  D1	|  CG[15] ̂Wrbg: line 2  D3
		CG[6] ̂Wrbg: line 3  D0	|  CG[16] ̂Wrbg: line 3  D2
		CG[7] ̂Wrbg: line 3  D1	|  CG[17] ̂Wrbg: line 3  D3
		CG[8] ̂Wrbg: line 4  D0	|  CG[18] ̂Wrbg: line 4  D2
		CG[9] ̂Wrbg: line 4  D1	|  CG[19] ̂Wrbg: line 4  D3
		CG[A] ̂Wrbg: line 5  D0	|  CG[1A] ̂Wrbg: line 5  D2
		CG[B] ̂Wrbg: line 5  D1	|  CG[1B] ̂Wrbg: line 5  D3
		CG[C] ̂Wrbg: line 6  D0	|  CG[1C] ̂Wrbg: line 6  D2
		CG[D] ̂Wrbg: line 6  D1	|  CG[1D] ̂Wrbg: line 6  D3
		CG[E] ̂Wrbg: line 7  D0	|  CG[1E] ̂Wrbg: line 7  D2
		CG[F] ̂Wrbg: line 7  D1	|  CG[1F] ̂Wrbg: line 7  D3

	ƂȂĂBline ͂WsNZō\CWrbg D0 E[̃hbg
	ɂȂB

	02/05/2004: []

	1. 8bpp łȂH

	p^[ɂSrbgAJ[ubNłSrbgWrbgB
	bfJ[̓^CƂɐݒ\B
	^C͂a`sɂSOXUݒ\B
	^CSOXUݒ肵ꍇAgp VRAM ̗̈́A

		4096 * 2 = 8192 [bytes]

	CG ͂P^C̃p^[łRQoCg邽߁A

		(65536 - 8192) / 32 = 1792 [tiles] 

	1792 ̃j[Nȃp^[`łB
	̃p^[͂PsNZSrbgȂ̂Ŏgppbg̈͂PUD
	łȂ...

	2. Oɂł邱Ƃ͂Ă܂

	BG ̕`ɂ͎̂R̏ȂB

		a. p^[ƂbfJ[pbgwWvZ(GR[h)
		b. pbg GRB333 9 rbgJ[擾
		c. GRB333  RGB555 / RGB565 ɕϊ

		a. GR[h̍
			^C̃p^[f[^̓t[ɕω邱Ƃ
			܂Ȃ(݂ǂݏo̕)߁A
			VRAM ɂbff[^܂ꂽƂɃGR[hsȂA
			ێĂƈȍ~̓ǂݏoɂȂB
			bfJ[͂a`sœƗɐݒ肳̂ŁA
			GR[hł͂bff[^(4bits/pixel)݂̂̑ΏۂƂB

			ۂɂ͂bf͂RQoCg琬邽߁Af[^܂邽т
			GR[hsȂƍőRQGR[hsȂƂɂȂ()B
			ŁÂقȂl݂̏PoCgł
			uXVtOv𗧂Ă悤ɂA`撼OɍXVtOĂ
			̂GR[hsȂB

			MWR Őݒł鉼zXN[̍ŏTCY 32 x 32B
			̂Ƃbff[^̂߂̂uq`l̈ł傫āA

				65536 - 32 x 32 x 2 = 1984 [patterns]

			1984 ̃p^[`łBbff[^ 8x8 sNZ
			GR[hȂ̂ŁÂ悤ȃe[upӂB

				Uint8 decodedpixel[1984][8*8];

			GR[h̃sNZf[^͂SrbgȂ̂ŁA
			ʂSrbg͏Ƀ[ɂȂĂB̃rbggȂ̂
			Ȃ̂ŁA8x8 sNZz̍ŏ̗vf
			(decodedpixel[xxxx][0]) ̍ŏʃrbgɂ́uXVtOv
			蓖Ă邱ƂɂBȂAɗĂuXVtOv́A
			`̒OɃGR[hȂăNÂ
			bfJ[Ɗ邱Ƃ͂ȂB

		b. GRB333 擾̍
		c. GRB333  RGB555 / RGB565 ϊ̍
			b  c ͓ɏĂ܂B
			^Cf[^ tile (0 <= tile <= 255),
			pbg palette[] (9 bits),
			GRB333  RGB555 / RGB565 ϊe[u table (16-bits)
			ƂƁAۂɕ`悷sNZ pixel ́A

				pixel = table[palette[tile]];

			œB table  palette ̂QdQƂPɌ炷Ƃ͉\B
			palette  GRB333 Ԃe[u RGB555 / RGB565 Ԃe[u
			ɂ邾ł悢B
-----------------------------------------------------------------------------*/
#if defined(__GNUC__) && defined(USE_INLINE_ASM)
static
void
drawBgLine(
	Uint32		scanLine,
	Uint32		lineCounter)
{
	Uint32		vramAddr;
	Uint32		vramData;
	Uint32		offsetX;		// ʓ^ĈwItZbg 
	Uint32		offsetY;		// ʓ^ĈxItZbg 
	Uint32		tileX;			// ^ĈwW(0-7) 
	Uint32		tileY;			// ^ĈxW(0-7) 
	Uint32		color32;
	Uint8*		pDst;
	Sint32		end;

	offsetX = (BXR / 8) & (_BGW-1);
	offsetY = (lineCounter	 / 8) & (_BGH-1);

	tileX = BXR & 7;
	tileY = lineCounter & 7;

	pDst  = &_LineBuf[16-tileX];
	end = (tileX == 0) ? _ScreenW/8 : _ScreenW/8 + 1;

	vramAddr = offsetY * _BGW;

	while (end-- > 0)
	{
		// vramData = hi4color | pattern address
		vramData = _pwVideoRam[vramAddr + offsetX];
		color32 = (vramData & 0xf000) >> 8;
		vramData &= 0x7ff;
		update_bg_tile_cache(vramData);

		__asm__ __volatile__ (
			"movd		(%2),  %%mm0	\n\t"
			"punpcklbw	%%mm0, %%mm0	\n\t"
			"punpcklbw	%%mm0, %%mm0	\n\t"
			"punpcklbw	%%mm0, %%mm0	\n\t"		// mm0 = 8 * cgcolor

			"movq		(%1),  %%mm1	\n\t"
			"por		%%mm0, %%mm1	\n\t"		// mm1 = hhggffeeddccbbaa

			"movq		%%mm1, (%0)		\n\t"
		: 
		: "r" (pDst), "r" (&_BgTileCache[vramData][tileY*8]), "r" (&color32));

		pDst += 8;

		offsetX = (offsetX + 1) & (_BGW-1);
	}

	__asm__ __volatile__ ("emms");
}
#else
static
void
drawBgLine(
	Uint32		scanLine,
	Uint32		lineCounter)
{
	Uint32		vramAddr;
	Uint32		vramData;
	Uint32		offsetX;		// ʓ^ĈwItZbg 
	Uint32		offsetY;		// ʓ^ĈxItZbg 
	Uint32		tileX;			// ^ĈwW(0-7) 
	Uint32		tileY;			// ^ĈxW(0-7) 
	Uint32		color32;
	Uint32*		pDst;
	Sint32		end;

	offsetX = (BXR / 8) & (_BGW-1);
	offsetY = (lineCounter	 / 8) & (_BGH-1);

	tileX = BXR & 7;
	tileY = lineCounter & 7;

	pDst  = (Uint32*)&_LineBuf[16-tileX];
	end = (tileX == 0) ? _ScreenW/8 : _ScreenW/8 + 1;

	vramAddr = offsetY * _BGW;

	while (end-- > 0)
	{
		// vramData = hi4color | pattern address
		vramData = _pwVideoRam[vramAddr + offsetX];
		color32 = vramData & 0xf000;
		vramData &= 0x7ff;
		update_bg_tile_cache(vramData);

		color32 = color32 | (color32 >> 8) | (color32 << 8) | (color32 << 16);
		*pDst++ = *((Uint32*)&_BgTileCache[vramData][tileY*8+0]) | color32;
		*pDst++ = *((Uint32*)&_BgTileCache[vramData][tileY*8+4]) | color32;
		offsetX = (offsetX + 1) & (_BGW-1);
	}
}
#endif


/*
	߂lF BOOL rom̃I[o[bv TRUE
*/
static
BOOL
draw_sp_line_normal(
	Uint32		pc,				// p^[ԍ 
	Uint32		ucolor,			// J[pbgCfbNX̏ʂSrbg 
	BOOL		bSP,			// SP D 
	Sint32		x,				// XLĈwʒu 
	Uint32		tileY)			// ^ĈxW 
{
	Uint8*		pDst;
	Uint8*		pSpBg;
	int			i;
	Uint8		data;
	BOOL		bOverlap = FALSE;

	pDst  = &_LineBuf[16+x];
	pSpBg = &_SpBgBuf[16+x];

	for (i = 0; i < 16; i++)
	{
		if (x + i < 0)			continue;
		if (x + i >= _ScreenW)	break;

		data = _SpTileCache[pc][tileY*16+i] | ucolor;

		if ((data & 0xf) == 0)		// SP ̗LȃsNZƂ̓XLbv 
			continue;

		if (bSP)	// SP D̂Ƃ 
		{
			if (!(pSpBg[i] & 0x80))
			{
				// roaf̑Oɐݒ肳Ăꍇ́A 
				// Dx̍rõsNZƂ 
				// sNZ`ƂłB 
				pDst[i] = data;
				pSpBg[i] = 0x81;
			}
			else
			{
				bOverlap = TRUE;
			}
		}
		else	// BG D̂Ƃ 
		{
			// roaf̔w̏ꍇ 
			// af̎wWcĂlraPɃZbgB 
			if (pDst[i] & 0xf)
			{
				// rõsNZƂ̓I[o[bvtOZbg 
				bOverlap = (pSpBg[i] & 0x80) >> 7;
				pSpBg[i] |= 0x80;
			}
			else
			{
				// roaf̔wɐݒ肳ĂĂA 
				// ɂaf͗Dx̂荂rõsNZꍇ 
				// sNZ`ƂłB 
				pDst[i] = data;
				pSpBg[i] = 0x81;
			}
		}
	}

	return bOverlap;
}


/*
	߂lF BOOL rom̃I[o[bv TRUE
*/
static
BOOL
draw_sp_line_hflip(
	Uint32		pc,				// p^[ԍ 
	Uint32		ucolor,			// J[pbgCfbNX̏ʂSrbg 
	BOOL		bSP,			// SP D 
	Sint32		x,				// XLĈwʒu 
	Uint32		tileY)			// ^ĈxW 
{
	Uint8*		pDst;
	Uint8*		pSpBg;
	int			i;
	Uint8		data;
	BOOL		bOverlap = FALSE;

	pDst  = &_LineBuf[16+x];
	pSpBg = &_SpBgBuf[16+x];

	for (i = 0; i < 16; i++)
	{
		if (x + i < 0)			continue;
		if (x + i >= _ScreenW)	break;

		data = _SpTileCache[pc][tileY*16+(15-i)] | ucolor;

		if ((data & 0xf) == 0)		// SP ̗LȃsNZƂ̓XLbv 
			continue;

		if (bSP)	// SP D̂Ƃ 
		{
			if (!(pSpBg[i] & 0x80))
			{
				pDst[i] = data;
				pSpBg[i] = 0x81;
			}
			else
			{
				bOverlap = TRUE;
			}
		}
		else	// BG D̂Ƃ 
		{
			if (pDst[i] & 0xf)
			{
				// rõsNZƂ̓I[o[bvtOZbg 
				bOverlap = (pSpBg[i] & 0x80) >> 7;
				pSpBg[i] |= 0x80;
			}
			else
			{
				pDst[i] = data;
				pSpBg[i] = 0x81;
			}
		}
	}

	return bOverlap;
}


/*-----------------------------------------------------------------------------
	[draw_sp_line]

	width = 32 ̂Ƃ index  D0 [̃AhXX^[gB


	+---+
	|	|
	+---+

	+---+
	|	|
	+---+
	|	|
	+---+

	+---+
	|	|
	+---+
	|	|
	+---+
	|	|
	+---+
	|	|
	+---+

	+---+---+
	|	|	|
	+---+---+

	+---+---+
	|	|	|
	+---+---+
	|	|	|
	+---+---+

	+---+---+
	|	|	|
	+---+---+
	|	|	|
	+---+---+
	|	|	|
	+---+---+
	|	|	|
	+---+---+

-----------------------------------------------------------------------------*/
static
BOOL
draw_sp_line(
	Uint32		line)		// ݂̃X^JE^l 
{
	const Uint32		heightTable[4] = { 16, 32, 64, 64 };
	Uint32		i;
	Sint32		x;
	Sint32		y;
	Uint32		tileY;
	Uint32		width;
	Uint32		height;
	Uint32		ucolor;
	Uint32		nSpPerLine = 0;
	Uint32		index;
	BOOL		bSP;
	BOOL		bOverlap = FALSE;
	BOOL		b;

	for (i = 0; i < NUM_SPRITES; i++)
	{
		y = SP_Y(i);
		x = SP_X(i);

		if (y == 0 || x == 0 || y >= 240+64 || x >= _ScreenW+32)
			continue;

		x -= 32;

		height = heightTable[SP_HEIGHT(i)];

		if (line >= y && line < y+height)
		{
			if (nSpPerLine == NUM_SP_PERLINE)
			{
				_bSpOver = TRUE;
				if (CR & VDC_CTRL_OC)
				{
					_VdcStatus |= VDC_STAT_OR;
					INTCTRL_Request(INTCTRL_IRQ1);
				}
				break;
			}

			width = (SP_WIDTH(i) + 1) * 16;
			index = SP_INDEX(i) / 2;

			tileY = line - y;

			// width, height, hflip, vflip ɉ index lC 
			// ̕ӂ͂ڂׂĒP 
			if (width == 32)
			{
				index &= ~1;
				index |= SP_HFLIP(i);
			}

			if (height == 32)
			{
				index &= ~2;
				index |= SP_VFLIP(i) << 1;
				if (tileY >= 16)
					index ^= 2;
			}
			else if (height == 64)
			{
				index &= ~6;
				index |= SP_VFLIP(i) << 1;
				index |= SP_VFLIP(i) << 2;
				if (tileY >= 16 && tileY < 32)		index ^= 2;
				else if (tileY >= 32 && tileY < 48)	index ^= 4;
				else if (tileY >= 48)				index ^= 6;
			}

			tileY &= 15;
			if (SP_VFLIP(i))
				tileY = 15 - tileY;

			ucolor = SP_UCOLOR(i) << 4;
			bSP    = SP_PRIORITY(i);

			update_sp_tile_cache(index);
			if (SP_HFLIP(i))	b = draw_sp_line_hflip(index, ucolor, bSP, x, tileY);
			else				b = draw_sp_line_normal(index, ucolor, bSP, x, tileY);

			if (width == 32 && x+16 < _ScreenW)
			{
				index ^= 1;
				update_sp_tile_cache(index);
				if (SP_HFLIP(i))	b |= draw_sp_line_hflip(index, ucolor, bSP, x+16, tileY);
				else				b |= draw_sp_line_normal(index, ucolor, bSP, x+16, tileY);
			}

			bOverlap = b & (i == 0);
			++nSpPerLine;
		}
	}

	return bOverlap;
}


/*-----------------------------------------------------------------------------
	[drawLine]
		afуXvCgPC`悵܂B
-----------------------------------------------------------------------------*/
static
BOOL
drawLine(
	Sint32		lineCounter,
	Sint32		scanLine)
{
	BOOL		bOverlap = FALSE;

	if (_DisplayCounter < DISPLAY_STARTLINE || _DisplayCounter >= DISPLAY_ENDLINE || _bBurstMode)
	{
		memset(&_LineBuf[16], 0, _ScreenW);
		memset(&_SpBgBuf[16], 1, _ScreenW);
	}
	else
	{
		memset(&_SpBgBuf[16], 0, _ScreenW);

		if (CR & 0x80)
		{
			drawBgLine(scanLine, lineCounter);
		}
		else
		{
			memset(&_LineBuf[16], 0, _ScreenW);
		}

		if (CR & 0x40)
		{
			bOverlap = draw_sp_line(_RasterCounter);
		}
	}

	return bOverlap;
}


static
inline
void
vblank()
{
//	puts("vblank");
	/*	s̎ł͂ȂƁA{ʁXɋN͂
		X^荞݂ VBlank 荞݂P IRQ ɏdȂĂ܂
		ǂ炩ɖԂɂȂB
		ł VBlank 𔭐Ă̂ŁA
		VBlank ꍇ͖ƂȂB
		@ł́AX^荞݂X^IɔĂA
		̃X^Jn VBlank 荞݂()B
		X^I玟̃X^Jn܂ł̂botԂ͕sB
		ł 150 NbNƂĂ */
	CPU_AdvanceClock(150);	// simulate hsync period


	/* CR & VDC_CTRL_VC ̂Ƃ̂ VDC_STAT_VD ZbgׂǂveXg */
	if (CR & VDC_CTRL_VC)
	{
		/*
			L 150 NbNԒ CPU  I tOZbgĂƁA
			ς炸 IRQ B
			̂ւ͂Ȃɂ肻B
			(Qȏ㊄荞ݗvꍇ LSB 珇ɃNAȂ)
			Ƃ肠̎ɂĂB
			vڍגB
		*/
		_VdcStatus |= VDC_STAT_VD;
		INTCTRL_Request(INTCTRL_IRQ1);
	}

	/* DMA from VRAM to SATB (takes 1024 cpu cycles)
	   (@ł͓] CPU Sɒ~Ȃ) */
	if (_bUpdateSATB || (DCR & 0x10))
	{
		_bUpdateSATB = FALSE;
		_SpDmaCount = 1;

		update_satb(SATB);

		/* DMA Ŏ CPU NbN */
		CPU_DelayClock(1024);
	}
}


/*-----------------------------------------------------------------------------
	[VDC_AdvanceClock]
		w̃NbN VDC ̏i߂܂B

	_ScanLine --- TV ̃XLC
	_DisplayCounter --- VDC ̃CJE^
	_RasterCounter --- X^rpJE^
	_LineCounter --- CJE^Ƃ VDC  VRAM AhX
-----------------------------------------------------------------------------*/
Uint32
VDC_AdvanceClock(
	Sint32		clock)
{
	Uint32		ret = VDC_OK;

	_ClockCounter += clock;

	while (_ClockCounter >= VDC_CYCLESPERLINE)
	{
		_ClockCounter -= VDC_CYCLESPERLINE;
		ret |= VDC_AdvanceLine();
	}
	return ret;
}


/*-----------------------------------------------------------------------------
	[VDC_AdvanceLine]
		PXLC VDC ̏i߂܂B

	_ScanLine --- TV ̃XLC
	_DisplayCounter --- VDC ̃CJE^
	_RasterCounter --- X^rpJE^
	_LineCounter --- CJE^Ƃ VDC  VRAM AhX
-----------------------------------------------------------------------------*/
Uint32
VDC_AdvanceLine()
{
	Uint32		ret = VDC_OK;

	// X^rsȂ 
	if (_RasterCounter == (RCR & 1023) && _RasterCounter >= 64)
	{
		if (CR & VDC_CTRL_RC)
		{
			// bq̋rbgZbgĂȂƃXe[^XtO 
			// ZbgȂHvmF 
			_VdcStatus |= VDC_STAT_RR;
			INTCTRL_Request(INTCTRL_IRQ1);
//			printf("rcr @ rc=%d\n", _RasterCounter);
		}
	}

	++_DisplayCounter;

	if (drawLine(_LineCounter, _ScanLine))
	{
		if (CR & VDC_CTRL_CC)
		{
			_VdcStatus |= VDC_STAT_CR;
			INTCTRL_Request(INTCTRL_IRQ1);
		}
	}

	if (!_bBurstMode)
	{
		++_LineCounter;
		_LineCounter &= _LineCountMask;
	}
	++_RasterCounter;
	++_ScanLine;

	if (_SpDmaCount > 0)
	{
		if (++_SpDmaCount == 4)
		{
			_SpDmaCount = 0;

			// DCR  D1 ZbgĂƂ͏I荞݂v 
			if (DCR & 0x1)
			{
				_VdcStatus |= VDC_STAT_DS;
				INTCTRL_Request(INTCTRL_IRQ1);
			}
		}
	}

	if (_ScanLine == 4)
	{
		/* VBL ɓĂ 1800 clock (SC)ɔ肳 */
		if ((CR & 0xc0) == 0)
			_bBurstMode = TRUE;
		else// if ((CR & 0xc0) == 0xc0)
			_bBurstMode = FALSE;
	}

	if (_DisplayCounter == DISPLAY_STARTLINE-1)
	{
		if (!_bBurstMode)
			_LineCounter = BYR;

		_RasterCounter = 64;
	}
	else
	{
		if (_DisplayCounter == DISPLAY_ENDLINE)
		{
			ret |= VDC_DISPLAY_END;
			vblank();
		}

		if (_DisplayCounter == DISPLAY_ENDLINE + VCR)
		{
			_DisplayCounter = 0;
		}
	}

	if (_ScanLine == MAX_SCANLINE)
	{
		if ((_DisplayCounter >= VSW+1 + VDS+2 && _DisplayCounter <= 242) || DISPLAY_ENDLINE > 262)
			vblank();

		_DisplayCounter = 0;
		_ScanLine = 0;

		ret |= VDC_VBL_END;
	}

//	printf("rc=%d sl=%d dc=%d lc=%d cr=%04X rcr=%d bm=%d\n", _RasterCounter, _ScanLine, _DisplayCounter, _LineCounter, CR, (int)RCR, _bBurstMode);
//	printf("w=%d sl=%d\n", _ScreenW, _ScanLine);
//	printf("_BGW=%d _BGH=%d\n", _BGW, _BGH);
//	printf("VSW=%d VDS=%d VDW=%d VCR=%d DCR=%04X\n", VSW, VDS, VDW, VCR, DCR);
//	printf("HSW=%d HDS=%d HDE=%d\n", HSW, HDS, HDE);
//	printf("HSR=%04X HDR=%04X\n", HSR, HDR);

	// set IRQ flags
	if (_bDmaIrqRequested)
	{
		_bDmaIrqRequested = FALSE;
		ret |= VDC_VRAMDMA_END;
		INTCTRL_Request(INTCTRL_IRQ1);
	}

	return ret;
}


const
Sint32
VDC_GetScanLine()
{
	return _ScanLine;
}


const
Uint8*
VDC_GetLineBuffer()
{
	return _LineBuf + 16;
}


const
Uint8*
VDC_GetSpBgBuffer()
{
	return _SpBgBuf + 16;
}


const
Sint32
VDC_GetDisplayWidth()
{
	return _ScreenW;
}


const
Sint32
VDC_GetDisplayHeight()
{
	return _ScreenH;
}


const
Sint32
VDC_GetTopOffset()
{
	return DISPLAY_STARTLINE;
}


const
Uint32
VDC_GetDisplayEnd()
{
	return DISPLAY_ENDLINE;
}


const
Sint32
VDC_GetLeftOffset()
{
	return 0;
}


/*-----------------------------------------------------------------------------
** [VDC_Deinit]
**	 ucbj܂B
**---------------------------------------------------------------------------*/
void
VDC_Deinit()
{
	_VdcStatus = 0;
	_ScanLine = 0;
	_LineCounter = 0;
	_LineCountMask = 0xff;
	_bUpdateSATB = FALSE;
	_SpDmaCount = 0;
	_bDmaIrqRequested = FALSE;
	_bSpOver = FALSE;

	_RasterCounter = 0;
	_DisplayCounter = 0;

	_ScreenW = 0;
	_ScreenH = 0;

	_BGH = 0;
	_BGW = 0;

	_VdcAddrInc = 0;

	memset(_PatternLUT, 0, sizeof(_PatternLUT));
	memset(_LineBuf, 0, sizeof(_LineBuf));
	memset(_BgTileCache, 0, sizeof(_BgTileCache));
	memset(_SpTileCache, 0, sizeof(_SpTileCache));
	memset(_VideoRam, 0, sizeof(_VideoRam));
	memset(_Regs, 0, sizeof(_Regs));

	INTCTRL_Cancel(INTCTRL_IRQ1);
}


// save variable
#define SAVE_V(V)	if (fwrite(&V, sizeof(V), 1, p) != 1)	return FALSE
#define LOAD_V(V)	if (fread(&V, sizeof(V), 1, p) != 1)	return FALSE
// save array
#define SAVE_A(A)	if (fwrite(A, sizeof(A), 1, p) != 1)	return FALSE
#define LOAD_A(A)	if (fread(A, sizeof(A), 1, p) != 1)		return FALSE
/*-----------------------------------------------------------------------------
	[SaveState]
		Ԃt@Cɕۑ܂B 
-----------------------------------------------------------------------------*/
BOOL
VDC_SaveState(
	FILE*		p)
{
	if (p == NULL)
		return FALSE;

	SAVE_A(_Regs);
	SAVE_A(_VideoRam);
	SAVE_A(_SpRam);

	SAVE_V(_AR);

	SAVE_V(_ScreenW);
	SAVE_V(_ScreenH);
	SAVE_V(_BGH);
	SAVE_V(_BGW);

	SAVE_V(_VdcAddrInc);
	SAVE_V(_VdcStatus);

	SAVE_V(_ScanLine);
	SAVE_V(_RasterCounter);
	SAVE_V(_DisplayCounter);

	SAVE_V(_LineCounter);
	SAVE_V(_LineCountMask);

	SAVE_V(_bUpdateSATB);
	SAVE_V(_SpDmaCount);
	SAVE_V(_bDmaIrqRequested);
	SAVE_V(_bBurstMode);

	SAVE_V(_bSpOver);

	return TRUE;
}


/*-----------------------------------------------------------------------------
	[LoadState]
		Ԃt@Cǂݍ݂܂B 
-----------------------------------------------------------------------------*/
BOOL
VDC_LoadState(
	FILE*		p)
{
	if (p == NULL)
		return FALSE;

	LOAD_A(_Regs);
	LOAD_A(_VideoRam);
	LOAD_A(_SpRam);

	LOAD_V(_AR);

	LOAD_V(_ScreenW);
	LOAD_V(_ScreenH);
	LOAD_V(_BGH);
	LOAD_V(_BGW);

	LOAD_V(_VdcAddrInc);
	LOAD_V(_VdcStatus);

	LOAD_V(_ScanLine);
	LOAD_V(_RasterCounter);
	LOAD_V(_DisplayCounter);

	LOAD_V(_LineCounter);
	LOAD_V(_LineCountMask);

	LOAD_V(_bUpdateSATB);
	LOAD_V(_SpDmaCount);
	LOAD_V(_bDmaIrqRequested);
	LOAD_V(_bBurstMode);

	LOAD_V(_bSpOver);

	// remake all the bg and sp tiles
	invalidate_tile_cache();

	return TRUE;
}

#undef SAVE_V
#undef SAVE_A
#undef LOAD_V
#undef LOAD_A


// Debug function
Uint8*
VDC_GetVideoRam()
{
	return _VideoRam;
}


