#include <stdio.h>

#define BITSWAP8(n,	\
	bit07, bit06, bit05, bit04, bit03, bit02, bit01, bit00)	\
	(((((n) >> (bit07)) & 1) <<  7) |	\
	 ((((n) >> (bit06)) & 1) <<  6) |	\
	 ((((n) >> (bit05)) & 1) <<  5) |	\
	 ((((n) >> (bit04)) & 1) <<  4) |	\
	 ((((n) >> (bit03)) & 1) <<  3) |	\
	 ((((n) >> (bit02)) & 1) <<  2) |	\
	 ((((n) >> (bit01)) & 1) <<  1) |	\
	 ((((n) >> (bit00)) & 1) <<  0))

/* Code by IQ_132.  You may use this in any way you wish, just leave this line intact. */

int main()
{
	int i,j;
	FILE *p2_rom_out, *p2_rom_in;
	FILE *p1_rom_out, *p1_rom_in;
	FILE *s1_rom_out, *s1_rom_in;

	if ((p1_rom_in=fopen("kf02m-p1.bin","rb"))==NULL)
	{
		fclose(p1_rom_in); fprintf(stderr,"Error: cannot read kf02m-p1.bin"); return(1);
	}
	if ((p2_rom_in=fopen("kf02m-p2.bin","rb"))==NULL)
	{
		fclose(p2_rom_in); fprintf(stderr,"Error: cannot read kf02m-p2.bin"); return(1);
	}

	if ((s1_rom_in=fopen("kf02m-s1.bin","rb"))==NULL)
	{
		fclose(s1_rom_in); fprintf(stderr,"Error: cannot read kf02m-s1.bin"); return(1);
	}

	if ((p1_rom_out=fopen("kf02m-p1d.bin","wb"))==NULL)
	{
		fprintf(stderr,"Error: cannot write to kf02m-p1d.bin"); return(1);
	}

	if ((p2_rom_out=fopen("kf02m-p2d.bin","wb"))==NULL)
	{
		fprintf(stderr,"Error: cannot write to kf02m-p2d.bin"); return(1);
	}

	if ((s1_rom_out=fopen("kf02m-s1d.bin","wb"))==NULL)
	{
		fprintf(stderr,"Error: cannot write to kf02m-sd1.bin"); return(1);
	}

	unsigned char *src = (unsigned char*)malloc(0x800000);
	unsigned char *srm = (unsigned char*)malloc(0x020000);

	for (i = 0; i < 0x800000; i++) src[i] = 0xFF;

	fread (src+0x000000, 1, 0x400000, p1_rom_in); fclose(p1_rom_in);
	fread (src+0x400000, 1, 0x400000, p2_rom_in); fclose(p2_rom_in);
	fread (srm+0x000000, 1, 0x020000, s1_rom_in); fclose(s1_rom_in);

	unsigned char *pTemp = (unsigned char*)malloc(0x80);
	if (pTemp)
	{
		for (i = 0; i < 0x800000; i+=0x80)
		{
			for (j = 0; j < 0x80 / 2; j++)
			{
				int ofst = BITSWAP8( j, 6, 7, 2, 3, 4, 5, 0, 1 );
				memcpy(pTemp + j * 2, src + i + ofst * 2, 2);
			}
			memcpy(src + i, pTemp, 0x80);
		}
	}
	free(pTemp);

	for (i = 0; i < 0x020000; i++)
	{
	      srm[i]=BITSWAP8(srm[i],7,6,0,4,3,2,1,5);
	}

	fwrite (src + 0x200000, 1, 0x100000, p1_rom_out);
	fwrite (src + 0x400000, 1, 0x400000, p2_rom_out);
	fwrite (srm + 0x000000, 1, 0x020000, s1_rom_out);
	fclose(p1_rom_out);
	fclose(p2_rom_out);
	fclose(s1_rom_out);

	free (src);
	free (srm);

	printf("Finished");
}
