/*
    MDEC decoder engine
    ===================

    Written by BERO
*/

#include "fpse.h"

#define INVERT_RGB      0

#if INVERT_RGB==0
#define R   0
#define G   1
#define B   2
#else
#define R   2
#define G   1
#define B   0
#endif

#define   RAMADDR(addr) ((void*)&ram[(addr)&0x1fffff])
#define   DCTSIZE2  64

static UINT16 *rl2blk(BLOCK *blk,UINT16 *mdec_rl);
static void iqtab_init(int *iqtab,UINT8 *iq_y);
static void round_init(void);
static void yuv2rgb24(BLOCK *blk,UINT8  *image);
static void yuv2rgb15(BLOCK *blk,UINT16 *image);

static struct {
     UINT32 command;
     UINT32 status;
     UINT16 *rl;
     int rlsize;
} mdec;

static int iq_y[DCTSIZE2],iq_uv[DCTSIZE2];

void mdec_init(void)
{
    mdec.rl = RAMADDR(0x100000); /* Init to some value */
    mdec.command = 0;
    mdec.status = 0;
    round_init();
}

void mdec0_write(UINT32 data)
{
    mdec.command = data;
    if ((data&0xf5ff0000)==0x30000000) {
        mdec.rlsize = data&0xffff;
    }
}

void mdec1_write(UINT32 data)
{
}

UINT32 mdec0_read(void)
{
    return 0;
}

UINT32 mdec1_read(void)
{
    return mdec.status;
}

void dma0_exec(UINT32 adr,UINT32 bcr,UINT32 chcr)
{
     int cmd = mdec.command;
     int size;

     if (chcr!=0x01000201) return;

     size = (bcr>>16)*(bcr&0xffff);

     if (cmd==0x60000000) {
          /* cosine table */
     //   printf("cos table");
     } else
     if (cmd==0x40000001) {
          /* quantize table */
          unsigned char *p = RAMADDR(adr);
          iqtab_init(iq_y,p);
          iqtab_init(iq_uv,p+64);
     //   printf("quantize table");
     } else
     if ((cmd&0xf5ff0000)==0x30000000) {
          /* run/value data */
          mdec.rl = RAMADDR(adr);
     //   printf("data");
     }
     else {
     //   printf("unknown cmd");
     }
}

void dma1_exec(UINT32 adr,UINT32 bcr,UINT32 chcr)
{
     int size;

     if (chcr!=0x01000200) return;

     size = (bcr>>16)*(bcr&0xffff);

        CompileFlush(adr,adr+size*4);
     /* decode */
  {
     BLOCK blk[DCTSIZE2*6];
     UINT16    *image = RAMADDR(adr);
     int blocksize;
     if (mdec.command&0x08000000) blocksize = 16*16;
     else blocksize = 24*16;

     for(;size>0;size-=blocksize/2,image+=blocksize) {
          mdec.rl = rl2blk(blk,mdec.rl);
          if (mdec.command&0x08000000)
                     yuv2rgb15(blk,image);
          else yuv2rgb24(blk,(unsigned char *)image);
     }
  }
}

/* de-quantize / inverse-dct / yuv->rgb */


#define   RUNOF(a)  ((a)>>10)
#define   VALOF(a)  (((int)(a)<<(32-10))>>(32-10))

static int zscan[DCTSIZE2] = {
     0 ,1 ,8 ,16,9 ,2 ,3 ,10,
     17,24,32,25,18,11,4 ,5 ,
     12,19,26,33,40,48,41,34,
     27,20,13,6 ,7 ,14,21,28,
     35,42,49,56,57,50,43,36,
     29,22,15,23,30,37,44,51,
     58,59,52,45,38,31,39,46,
     53,60,61,54,47,55,62,63
};

static int aanscales[DCTSIZE2] = {
       /* precomputed values scaled up by 14 bits */
       16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
       22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
       21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
       19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
       16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
       12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
        8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
        4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
};

static void iqtab_init(int *iqtab,unsigned char *iq_y)
{
#define CONST_BITS 14
#define   IFAST_SCALE_BITS 2
     int i;
     for(i=0;i<DCTSIZE2;i++) {
          iqtab[i] =iq_y[i]*aanscales[i]>>(CONST_BITS-IFAST_SCALE_BITS);
     }
}


static UINT16 *rl2blk(BLOCK *blk, UINT16 *mdec_rl)
{
     int i,k,q_scale,rl;
     int *iqtab;
#define   EOB  0xfe00

     memset(blk,0,6*DCTSIZE2*sizeof(BLOCK));
     for(i=0;i<6;i++) {
          if (i<2) iqtab = iq_uv;
          else iqtab = iq_y;
          rl = SWAP16(*mdec_rl++);
     //   if (rl==EOB) printf("err");
          q_scale = RUNOF(rl);
          blk[0] = iqtab[0]*VALOF(rl);
          k = 0;
          for(;;) {
               rl = SWAP16(*mdec_rl++);
               if (rl==EOB) break;
               k += RUNOF(rl)+1;
                        if (k > 63) break;
                blk[zscan[k]] = iqtab[zscan[k]]*q_scale*VALOF(rl)/8;
     //        blk[zscan[k]] = iqtab[k]*q_scale*VALOF(rl);
          }

          idct(blk,k+1);
          
          blk+=DCTSIZE2;
     }
     return mdec_rl;
}

#define   SHIFT       12
#define   toFIX(a)    (int)((a)*(1<<SHIFT))
#define   toINT(a)    ((a)>>SHIFT)
#define   FIX_1       toFIX(1)
#define   MULR(a)     toINT((a)*toFIX(1.402))
#define   MULG(a)     toINT((a)*toFIX(-0.3437))
#define   MULG2(a)    toINT((a)*toFIX(-0.7143))
#define   MULB(a)     toINT((a)*toFIX(1.772))

#define   RGB15(r,g,b)   ( (((r)>>3)<<10)|(((g)>>3)<<5)|((b)>>3) )
#define   ROUND(c)  roundtbl[(c)+128+256]

static UINT8 roundtbl[256*3];

static void round_init(void)
{
     int i;
     for(i=0;i<256;i++) {
          roundtbl[i]=0;
          roundtbl[i+256]=i;
          roundtbl[i+512]=255;
     }
}

static void yuv2rgb15(BLOCK *blk, UINT16 *image)
{
     int x,yy;
     BLOCK *yblk = blk+DCTSIZE2*2;
     for(yy=0;yy<16;yy+=2,blk+=4,yblk+=8,image+=8+16) {
          if (yy==8) yblk+=DCTSIZE2;
          for(x=0;x<4;x++,blk++,yblk+=2,image+=2) {
               int r0,b0,g0,y;
               r0 = MULR(blk[DCTSIZE2]); /* cr */
               g0 = MULG(blk[0])+MULG2(blk[DCTSIZE2]);
               b0 = MULB(blk[0]); /* cb */
               y = yblk[0];
               image[0] = SWAP16(RGB15(ROUND(r0+y),ROUND(g0+y),ROUND(b0+y)));
               y = yblk[1];
               image[1] = SWAP16(RGB15(ROUND(r0+y),ROUND(g0+y),ROUND(b0+y)));
               y = yblk[8];
               image[16] = SWAP16(RGB15(ROUND(r0+y),ROUND(g0+y),ROUND(b0+y)));
               y = yblk[9];
               image[17] = SWAP16(RGB15(ROUND(r0+y),ROUND(g0+y),ROUND(b0+y)));
               r0 = MULR(blk[4+DCTSIZE2]);
               g0 = MULG(blk[4])+MULG2(blk[4+DCTSIZE2]);
               b0 = MULB(blk[4]);
               y = yblk[DCTSIZE2+0];
               image[8+0] = SWAP16(RGB15(ROUND(r0+y),ROUND(g0+y),ROUND(b0+y)));
               y = yblk[DCTSIZE2+1];
               image[8+1] = SWAP16(RGB15(ROUND(r0+y),ROUND(g0+y),ROUND(b0+y)));
               y = yblk[DCTSIZE2+8];
               image[8+16] = SWAP16(RGB15(ROUND(r0+y),ROUND(g0+y),ROUND(b0+y)));
               y = yblk[DCTSIZE2+9];
               image[8+17] = SWAP16(RGB15(ROUND(r0+y),ROUND(g0+y),ROUND(b0+y)));
          }
     }
}

static void yuv2rgb24(BLOCK *blk, UINT8 *image)
{
     int x,yy;
     BLOCK *yblk = blk+DCTSIZE2*2;
     for(yy=0;yy<16;yy+=2,blk+=4,yblk+=8,image+=(8+16)*3) {
          if (yy==8) yblk+=DCTSIZE2;
          for(x=0;x<4;x++,blk++,yblk+=2,image+=2*3) {
               int r0,b0,g0,y;
               r0 = MULR(blk[DCTSIZE2]); /* cr */
               g0 = MULG(blk[0])+MULG2(blk[DCTSIZE2]);
               b0 = MULB(blk[0]); /* cb */
               y = yblk[0];
               image[0*3+R] = ROUND(r0+y);
               image[0*3+G] = ROUND(g0+y);
               image[0*3+B] = ROUND(b0+y);
               y = yblk[1];
               image[1*3+R] = ROUND(r0+y);
               image[1*3+G] = ROUND(g0+y);
               image[1*3+B] = ROUND(b0+y);
               y = yblk[8];
               image[16*3+R] = ROUND(r0+y);
               image[16*3+G] = ROUND(g0+y);
               image[16*3+B] = ROUND(b0+y);
               y = yblk[9];
               image[17*3+R] = ROUND(r0+y);
               image[17*3+G] = ROUND(g0+y);
               image[17*3+B] = ROUND(b0+y);

               r0 = MULR(blk[4+DCTSIZE2]);
               g0 = MULG(blk[4])+MULG2(blk[4+DCTSIZE2]);
               b0 = MULB(blk[4]);
               y = yblk[DCTSIZE2+0];
               image[(8+0)*3+R] = ROUND(r0+y);
               image[(8+0)*3+G] = ROUND(g0+y);
               image[(8+0)*3+B] = ROUND(b0+y);
               y = yblk[DCTSIZE2+1];
               image[(8+1)*3+R] = ROUND(r0+y);
               image[(8+1)*3+G] = ROUND(g0+y);
               image[(8+1)*3+B] = ROUND(b0+y);
               y = yblk[DCTSIZE2+8];
               image[(8+16)*3+R] = ROUND(r0+y);
               image[(8+16)*3+G] = ROUND(g0+y);
               image[(8+16)*3+B] = ROUND(b0+y);
               y = yblk[DCTSIZE2+9];
               image[(8+17)*3+R] = ROUND(r0+y);
               image[(8+17)*3+G] = ROUND(g0+y);
               image[(8+17)*3+B] = ROUND(b0+y);
          }
     }
}