
/************************************

  PicchioEngine

  Copyright(c)2008 Emanuele Bettidi

************************************/

/* VDC.cpp */

/* HuC6270 - VDC (Video Display Controller) */

#include <emmintrin.h>
#include <SDL/SDL.h>
#include "Types.h"
#include "Video.h"
#include "CPU.h"
#include "IntCtrl.h"
#include "VCE.h"
#include "VDC.h"

namespace VDC
{
 void reset();

 void fill_pattern_lut();
 void update_bg_sp_caches(uint16 addr);
 void draw_line();
 void draw_background();
 void draw_sprites();
 void update_vb_line();
 void do_vram_satb_dma();

 int32 clock;
 int32 counter;

 /* registers */
  /* 0x1FE000 - 0x1FE3FF */
   static uint8 AR;  // -/W 0x1FE000 - address register
   static uint8 SR;  // R/- 0x1FE000 - status register
   // uint16 DATA ?     // R/W 0x1FE002 - ? Low and High Data Register
  /* registers selectable with AR */
   static uint16 MAWR;   // ?/W 0x00 - memory address write register
   static uint16 MARR;   // ?/W 0x01 - memory address read register
   static uint16 VWR;    // -/W 0x02 - VRAM data write register
   static uint16 VRR;    // R/- 0x02 - VRAM data read register
   static uint16 CR;     // ?/W 0x05 - control register
   static uint16 RCR;    // ?/W 0x06 - raster compare register
   static uint16 BXR;    // ?/W 0x07 - BGX scroll register
   static uint16 BYR;    // ?/W 0x08 - BGY scroll register
   static uint16 MWR;    // ?/W 0x09 - memory width register
   static uint16 HSR;    // ?/W 0x0A - horizontal synchronism register
   static uint16 HDR;    // ?/W 0x0B - horizontal display register
   static uint16 VSR;    // ?/W 0x0C - vertical synchronism register
   static uint16 VDR;    // ?/W 0x0D - vertical display register
   static uint16 VCR;    // ?/W 0x0E - vertical display end position register
   static uint16 DCR;    // ?/W 0x0F - DMA control register
   static uint16 SOUR;   // ?/W 0x10 - DMA source address register
   static uint16 DESR;   // ?/W 0x11 - DMA destination address register
   static uint16 LENR;   // ?/W 0x12 - DMA block length register
   static uint16 DVSSR;  // ?/W 0x13 - DMA VRAM-SATB source address register
   //                    // ?/- 0x03-0x04 and 0x14-0x1F unused

   #define HSW (uint)(HSR & 0x1F)
   #define HDS (uint)((HSR >> 8) & 0x7F)
   #define HDW (uint)(HDR & 0x7F)
   #define HDE (uint)((HDR >> 8) & 0x7F)
   #define VSW (uint)(VSR & 0x1F)
   #define VDS (uint)(VSR >> 8)
   #define VDW (uint)(VDR & 0x1FF)
   #define VDE (uint)(VCR & 0xFF)

 // uint16 rw_buffer;  // CPU Read/Write Buffer
 // ?                  // Background Shift Register
 // ?                  // Sprite Shift Register
 uint16 satb[0x100] __attribute__ ((aligned (16)));  // Sprite Attribute Table Buffer
 uint16 vram[0x10000] __attribute__ ((aligned (16)));  // Video RAM - NOTA: per ora 128kB per evitare problemi

 uint8 addr_inc;

 const uint VSW_SECTOR = 0;
 const uint VDS_SECTOR = 1;
 const uint VDW_SECTOR = 2;
 const uint VDE_SECTOR = 3;
 uint sector = VSW_SECTOR;
 uint sector_counter = 0;

 const uint display_width_lut[4] = {341, 455, 682, 682};
 const uint dot_len_lut[4] = {4, 3, 2, 2};
 const uint bat_width_lut[4] = {32, 64, 128, 128};
 const uint bat_w_shift_lut[4] = {5, 6, 7, 7};
 const uint sp_h_mask_lut[4] = {~15, ~31, ~63, ~63};

 uint display_width;
 uint scan_line = 0;
 uint line_counter = 0x40;
 uint bg_y_offset = 0;
 int satb_line_counter = -1;
 bool satb_dma_pending = false;
 bool burst_mode = true;

 uint32 pattern_lut[256];
 uint32 bg_cache[0x8000];  // NOTA: per ora 128kB per evitare problemi
 uint32 sp_cache[0x8000];  // NOTA: per ora 128kB per evitare problemi
 uint16 line_buf[1024];
 uint16 sp_line_buf[1024];

 struct taken_sprite
 {
  uint32 index;
  uint16 x_pos;
  uint16 code;
  uint32 data[2];
 };
 taken_sprite sp_list[16];

 void init()
 {
  clock = 0;
  counter = 455;
  AR = 0;     // NOT TESTED
  SR = 0;     // NOT TESTED
  MAWR = 0;   // NOT TESTED
  MARR = 0;   // NOT TESTED
  VWR = 0;    // NOT TESTED
  VRR = 0;    // NOT TESTED
  CR = 0;     // NOT TESTED
  RCR = 0;    // NOT TESTED
  BXR = 0;    // NOT TESTED
  BYR = 0;    // NOT TESTED
  MWR = 0;    // NOT TESTED
  HSR = 0;    // NOT TESTED
  HDR = 0;    // NOT TESTED
  VSR = 0;    // NOT TESTED
  VDR = 0;    // NOT TESTED
  VCR = 0;    // NOT TESTED
  DCR = 0;    // NOT TESTED
  SOUR = 0;   // NOT TESTED
  DESR = 0;   // NOT TESTED
  LENR = 0;   // NOT TESTED
  DVSSR = 0;  // NOT TESTED
  addr_inc = 0;  // NOT TESTED
  fill_pattern_lut();
  for (uint32 i = 0; i < 0x100; i++)
  {
   satb[i] = 0x0000;  // NOT TESTED
  }
  for (uint32 i = 0; i < 0x8000; i++)
  {
   vram[i] = 0xFFFF;  // NOT TESTED
   update_bg_sp_caches(i);
  }
  // ...
  reset();
 }

 void reset()
 {
  // ...
 }

 void resync()
 {
  counter -= (clock - CPU::clock);
  clock = CPU::clock;
  if (counter <= 0)
  {
   draw_line();
   counter += 455;
  }
 }

 void fill_pattern_lut()
 {
  for (uint32 i = 0; i < 256; i++)
  {
   pattern_lut[i] = 0;
   pattern_lut[i] |= (i & 0x80) >> 7;
   pattern_lut[i] |= (i & 0x40) >> 2;
   pattern_lut[i] |= (i & 0x20) << 3;
   pattern_lut[i] |= (i & 0x10) << 8;
   pattern_lut[i] |= (i & 0x08) << 13;
   pattern_lut[i] |= (i & 0x04) << 18;
   pattern_lut[i] |= (i & 0x02) << 23;
   pattern_lut[i] |= (i & 0x01) << 28;
  }
 }

 void update_bg_sp_caches(uint16 addr)
 {
  uint16 ptr = ((addr & 0xFFF0) >> 1) | (addr & 7);
  if ((addr & 8) == 0)
  {
   bg_cache[ptr] &= 0xCCCCCCCC;
   bg_cache[ptr] |= pattern_lut[vram[addr] & 0xFF];
   bg_cache[ptr] |= pattern_lut[vram[addr] >> 8] << 1;
  }
  else
  {
   bg_cache[ptr] &= 0x33333333;
   bg_cache[ptr] |= pattern_lut[vram[addr] & 0xFF] << 2;
   bg_cache[ptr] |= pattern_lut[vram[addr] >> 8] << 3;
  }
  ptr = ((addr & 0xFFC0) >> 1) | ((addr & 0xF) << 1);
  switch((addr >> 4) & 3)
  {
   case 0:
   {
    sp_cache[ptr + 0] &= 0xEEEEEEEE;
    sp_cache[ptr + 0] |= pattern_lut[vram[addr] >> 8];
    sp_cache[ptr + 1] &= 0xEEEEEEEE;
    sp_cache[ptr + 1] |= pattern_lut[vram[addr] & 0xFF];
    break;
   }
   case 1:
   {
    sp_cache[ptr + 0] &= 0xDDDDDDDD;
    sp_cache[ptr + 0] |= pattern_lut[vram[addr] >> 8] << 1;
    sp_cache[ptr + 1] &= 0xDDDDDDDD;
    sp_cache[ptr + 1] |= pattern_lut[vram[addr] & 0xFF] << 1;
    break;
   }
   case 2:
   {
    sp_cache[ptr + 0] &= 0xBBBBBBBB;
    sp_cache[ptr + 0] |= pattern_lut[vram[addr] >> 8] << 2;
    sp_cache[ptr + 1] &= 0xBBBBBBBB;
    sp_cache[ptr + 1] |= pattern_lut[vram[addr] & 0xFF] << 2;
    break;
   }
   case 3:
   {
    sp_cache[ptr + 0] &= 0x77777777;
    sp_cache[ptr + 0] |= pattern_lut[vram[addr] >> 8] << 3;
    sp_cache[ptr + 1] &= 0x77777777;
    sp_cache[ptr + 1] |= pattern_lut[vram[addr] & 0xFF] << 3;
    break;
   }
  }
 }

 void draw_line()
 {
  display_width = display_width_lut[VCE::CR & 3];
  switch(sector)
  {
   case VSW_SECTOR:
   {
    for (uint x = 0; x < display_width; x++)
    {
     line_buf[x] = 0x100;
    }
    update_vb_line();
    sector_counter++;
    if (sector_counter > VSW)
    {
     sector_counter = 0;
     sector = VDS_SECTOR;
    }
    break;
   }
   case VDS_SECTOR:
   {
    for (uint x = 0; x < display_width; x++)
    {
     line_buf[x] = 0x100;
    }
    update_vb_line();
    sector_counter++;
    if (sector_counter > (VDS + 1))
    {
     sector_counter = 0;
     sector = VDW_SECTOR;
    }
    break;
   }
   case VDW_SECTOR:
   {
    if (sector_counter == 0)
    {
     bg_y_offset = BYR;
     line_counter = 0x40;
    }
    if (burst_mode == true)
    {
     for (uint x = 0; x < display_width; x++)
     {
      line_buf[x] = 0x100;
     }
    }
    else
    { 
     draw_background();
     draw_sprites();
    }
    update_vb_line();
    sector_counter++;
    if ((sector_counter > VDW) || ((scan_line + 1) > (uint)(261 + ((VCE::CR & 4) >> 2))))
    {
     if ((CR & 8) != 0)
     {
      SR |= 0x20;
      IntCtrl::status |= 0x02;
      CPU::IRQ_lines |= 0x02;
     }
     if (((DCR & 0x10) != 0) || (satb_dma_pending == true))
     {
      satb_dma_pending = false;
      satb_line_counter = 3;
     }
     sector_counter = 0;
     if (VDE != 0) sector = VDE_SECTOR; else sector = VSW_SECTOR;
    }
    break;
   }
   case VDE_SECTOR:
   {
    for (uint x = 0; x < display_width; x++)
    {
     line_buf[x] = 0x100;
    }
    update_vb_line();
    sector_counter++;
    if (sector_counter >= VDE)
    {
     sector_counter = 0;
     sector = VSW_SECTOR;
    }
    break;
   }
  }

  bg_y_offset++;

  if (satb_line_counter == 0) do_vram_satb_dma();
  if (satb_line_counter >= 0) satb_line_counter--;

  if ((line_counter == RCR) && ((CR & 4) != 0))
  {
   SR |= 0x04;
   IntCtrl::status |= 0x02;
   CPU::IRQ_lines |= 0x02;
  }
  line_counter = (line_counter + 1) & 0x3FF;

  scan_line++;
  if (scan_line > (uint)(261 + ((VCE::CR & 4) >> 2)))
  {
   Video::update();
   scan_line = 0;
   sector_counter = 0;
   sector = VSW_SECTOR;
   if ((CR & 0xC0) == 0) burst_mode = true; else burst_mode = false;
  }
 }

 void draw_background()
 {
  uint start = 16 + (HDS * 8);
  if (start > display_width) start = display_width;
  uint end = start + ((HDW + 1) * 8);
  if (end > display_width) end = display_width;

  for (uint x = 0; x < start; x++)
  {
   line_buf[x] = 0x100;
  }

  if ((CR & 0x80) == 0)
  {
   for (uint x = start; x < end; x++)
   {
    line_buf[x] = 0x000;
   }
   for (uint x = end; x < display_width; x++)
   {
    line_buf[x] = 0x100;
   }
   return;
  }

  uint32 cg_mask = 0xFFFFFFFF;
  if ((MWR & 3) == 3)
  {
   if ((MWR & 0x80) == 0)
   {
    cg_mask = 0x33333333;
   }
   else
   {
    cg_mask = 0xCCCCCCCC;
   }
  }

  uint bat_width = bat_width_lut[(MWR & 0x30) >> 4];
  uint bat_w_mask = bat_width - 1;
  uint bat_h_mask = ((MWR & 0x40) >> 1) + 31;
  uint bat_w_shift = bat_w_shift_lut[(MWR & 0x30) >> 4];

  uint bat_y_pos = ((bg_y_offset >> 3) & bat_h_mask) << bat_w_shift;
  uint bat_x_pos = (BXR >> 3) & bat_w_mask;
  uint line = bg_y_offset & 7;

  uint temp = start;
  if ((BXR & 7) != 0)
  {
   temp += (8 - (BXR & 7));
   uint16 vr_ptr = bat_y_pos | bat_x_pos;
   uint16 pattern = vram[vr_ptr] & 0x0FFF;
   uint16 palette = (vram[vr_ptr] & 0xF000) >> 8;
   uint32 tile_line = bg_cache[(pattern << 3) | line] & cg_mask;
   tile_line >>= (BXR & 7) << 2;
   for (uint x = start; x < temp; x++)
   {
    line_buf[x] = palette | (tile_line & 0xF); tile_line >>= 4;
   }
   bat_x_pos = (bat_x_pos + 1) & bat_w_mask;
  }

  for (uint x = temp; x < end; x += 8)
  {
   uint16 vr_ptr = bat_y_pos | bat_x_pos;
   uint16 pattern = vram[vr_ptr] & 0x0FFF;
   uint16 palette = (vram[vr_ptr] & 0xF000) >> 8;
   uint32 tile_line = bg_cache[(pattern << 3) | line] & cg_mask;
   line_buf[x + 0] = palette | (tile_line & 0xF); tile_line >>= 4;
   line_buf[x + 1] = palette | (tile_line & 0xF); tile_line >>= 4;
   line_buf[x + 2] = palette | (tile_line & 0xF); tile_line >>= 4;
   line_buf[x + 3] = palette | (tile_line & 0xF); tile_line >>= 4;
   line_buf[x + 4] = palette | (tile_line & 0xF); tile_line >>= 4;
   line_buf[x + 5] = palette | (tile_line & 0xF); tile_line >>= 4;
   line_buf[x + 6] = palette | (tile_line & 0xF); tile_line >>= 4;
   line_buf[x + 7] = palette | (tile_line & 0xF);
   bat_x_pos = (bat_x_pos + 1) & bat_w_mask;
  }

  for (uint x = end; x < display_width; x++)
  {
   line_buf[x] = 0x100;
  }
 }

 void draw_sprites()
 {
  if ((CR & 0x40) == 0) return;

  uint stored_sprites = 0;
  for (uint i = 0; i < 64; i++)
  {
   uint16 y_pos = satb[i << 2] & 0x3FF;
   uint16 code = satb[(i << 2) + 3];
   uint height_mask = sp_h_mask_lut[(code >> 12) & 3];

   uint line = line_counter - y_pos;
   if ((line & height_mask) == 0)
   {
    if (stored_sprites == 16)
    {
     if ((CR & 2) != 0)
     {
      SR |= 2;
      IntCtrl::status |= 0x02;
      CPU::IRQ_lines |= 0x02;
     }
     break;
    }

    if ((code >> 15) != 0) line ^= ~height_mask;

    uint16 x_pos = satb[(i << 2) + 1] & 0x3FF;
    uint16 pattern = (satb[(i << 2) + 2] >> 1) & 0x3FF;
    uint16 width = (code >> 8) & 1;
    pattern &= (height_mask >> 3) | (width ^ 1);
    pattern |= ((code >> 11) & 1) & width;

    uint ptr = (pattern << 5) | ((line & 0xF) << 1) | ((line & ~0xF) << 2);

    sp_loop:
    {
     sp_list[stored_sprites].index = i;
     sp_list[stored_sprites].x_pos = x_pos;
     sp_list[stored_sprites].code = code;

     if ((MWR & 0xC) != 4)
     {
      sp_list[stored_sprites].data[0] = sp_cache[ptr];
      sp_list[stored_sprites].data[1] = sp_cache[ptr + 1]; 
     }
     else
     {
      if ((satb[(i << 2) + 2] & 1) == 0)
      {
       sp_list[stored_sprites].data[0] = sp_cache[ptr] & 0x33333333;
       sp_list[stored_sprites].data[1] = sp_cache[ptr + 1] & 0x33333333;
      }
      else
      {
       sp_list[stored_sprites].data[0] = (sp_cache[ptr] & 0xCCCCCCCC) >> 2;
       sp_list[stored_sprites].data[1] = (sp_cache[ptr + 1] & 0xCCCCCCCC) >> 2;
      }
     }
     stored_sprites++;
     if (width != 0)
     {
      if (stored_sprites == 16)
      {
       if ((CR & 2) != 0)
       {
        SR |= 2;
        IntCtrl::status |= 0x02;
        CPU::IRQ_lines |= 0x02;
       }
       break;
      }
      width = 0;
      x_pos += 16;
      ptr ^= 0x20;
      goto sp_loop;
     }
    }
   }
  }

  if (stored_sprites == 0) return;

  uint start = 16 + (HDS * 8);
  if (start > display_width) start = display_width;
  uint end = start + ((HDW + 1) * 8);
  if (end > display_width) end = display_width;

  for (uint x = start; x < end; x++) sp_line_buf[x] = 0;

  for (int i = (stored_sprites - 1); i >= 0; i--)
  {
   uint x_pos = start + (sp_list[i].x_pos - 32);
   uint rev_x_mask = 0;
   if ((sp_list[i].code & 0x0800) != 0) rev_x_mask = 0xF;
   uint16 palette = ((sp_list[i].code & 0xF) << 4) | ((sp_list[i].code & 0x80) << 1);

   if ((sp_list[i].index != 0) || ((CR & 1) == 0))
   {
    uint32 sp_line = sp_list[i].data[0];
    for (uint j = 0; j < 8; j++)
    {
     uint16 pixel = sp_line & 0xF; sp_line >>= 4;
     if (pixel != 0)
     {
      uint x = x_pos + (j ^ rev_x_mask);
      if (x >= end) continue;
      sp_line_buf[x] = palette | pixel;
     }
    }
    sp_line = sp_list[i].data[1];
    for (uint j = 8; j < 16; j++)
    {
     uint16 pixel = sp_line & 0xF; sp_line >>= 4;
     if (pixel != 0)
     {
      uint x = x_pos + (j ^ rev_x_mask);
      if (x >= end) continue;
      sp_line_buf[x] = palette | pixel;
     }
    }
   }
   else
   {
    uint32 sp_line = sp_list[i].data[0];
    for (uint j = 0; j < 8; j++)
    {
     uint16 pixel = sp_line & 0xF; sp_line >>= 4;
     if (pixel != 0)
     {
      uint x = x_pos + (j ^ rev_x_mask);
      if ((x < start) || (x >= end)) continue;
      if (sp_line_buf[x] != 0)
      {
       SR |= 0x01;
       IntCtrl::status |= 0x02;
       CPU::IRQ_lines |= 0x02;
      }
      sp_line_buf[x] = palette | pixel;
     }
    }
    sp_line = sp_list[i].data[1];
    for (uint j = 8; j < 16; j++)
    {
     uint16 pixel = sp_line & 0xF; sp_line >>= 4;
     if (pixel != 0)
     {
      uint x = x_pos + (j ^ rev_x_mask);
      if ((x < start) || (x >= end)) continue;
      if (sp_line_buf[x] != 0)
      {
       SR |= 0x01;
       IntCtrl::status |= 0x02;
       CPU::IRQ_lines |= 0x02;
      }
      sp_line_buf[x] = palette | pixel;
     }
    }
   }
  }

  for (uint x = start; x < end; x++)
  {
   if (sp_line_buf[x] != 0)
   {
    if (((line_buf[x] & 0xF) == 0) || ((sp_line_buf[x] & 0x100) != 0))
    {
     line_buf[x] = 0x100 | sp_line_buf[x];
    }
   }
  }
 }

 /* VDC -> VCE -> Video */
 void  update_vb_line()
 {
  if ((scan_line < 17) || (scan_line > 258)) return;
  if (scan_line == 258)
  {
   if ((VCE::CR & 4) == 0)
   {
    #ifdef __SSE2__
     int *vb_ptr = (int*)&Video::buffer[(258 * 1365) + 207];
     for (uint32 i = 0; i < 1128; i++)
     {
      *vb_ptr = 0xFF000000; vb_ptr++;
     }
    #else
     uint32 vb_ptr = (258 * 1365) + 207;
     for (uint32 i = 0; i < 1128; i++)
     {
      Video::buffer[vb_ptr] = 0xFF000000; vb_ptr++;
     }
    #endif
    return;
   }
  }
  uint dot_len = dot_len_lut[VCE::CR & 3];
  #ifdef __SSE2__
   int *vb_ptr = (int*)&Video::buffer[(scan_line * 1365) + 207];
  #else
   uint32 vb_ptr = (scan_line * 1365) + 207;
  #endif
  switch(dot_len)
  {
   case 2:
   {
    for (uint32 i = 80; i < 644; i++)
    {
     uint32 pixel = VCE::palette_lut[line_buf[i]];
     #ifdef __SSE2__
      _mm_stream_si32 (vb_ptr, pixel); vb_ptr++;
      _mm_stream_si32 (vb_ptr, pixel); vb_ptr++;
     #else
      Video::buffer[vb_ptr] = pixel; vb_ptr++;
      Video::buffer[vb_ptr] = pixel; vb_ptr++;
     #endif
    }
    break;
   }
   case 3:
   {
    for (uint32 i = 32; i < 408; i++)
    {
     uint32 pixel = VCE::palette_lut[line_buf[i]];
     #ifdef __SSE2__
      _mm_stream_si32 (vb_ptr, pixel); vb_ptr++;
      _mm_stream_si32 (vb_ptr, pixel); vb_ptr++;
      _mm_stream_si32 (vb_ptr, pixel); vb_ptr++;
     #else
      Video::buffer[vb_ptr] = pixel; vb_ptr++;
      Video::buffer[vb_ptr] = pixel; vb_ptr++;
      Video::buffer[vb_ptr] = pixel; vb_ptr++;
     #endif
    }
    break;
   }
   case 4:
   {
    for (uint32 i = 21; i < 303; i++)
    {
     uint32 pixel = VCE::palette_lut[line_buf[i]];
     #ifdef __SSE2__
      _mm_stream_si32 (vb_ptr, pixel); vb_ptr++;
      _mm_stream_si32 (vb_ptr, pixel); vb_ptr++;
      _mm_stream_si32 (vb_ptr, pixel); vb_ptr++;
      _mm_stream_si32 (vb_ptr, pixel); vb_ptr++;
     #else
      Video::buffer[vb_ptr] = pixel; vb_ptr++;
      Video::buffer[vb_ptr] = pixel; vb_ptr++;
      Video::buffer[vb_ptr] = pixel; vb_ptr++;
      Video::buffer[vb_ptr] = pixel; vb_ptr++;
     #endif
    }
    break;
   }
  }
 }

 void do_vram_satb_dma()
 {
  if ((DCR & 1) != 0)
  {
   SR |= 0x08;
   IntCtrl::status |= 0x02;
   CPU::IRQ_lines |= 0x02;
  }
  uint16 addr = DVSSR;
  for (uint i = 0; i < 0x100; i++)
  {
   satb[i] = vram[addr & 0x7FFF]; addr++;
  }
 }

 uint8 read(uint32 reg)
 {
  CPU::adv_clk();
  resync();
  uint8 tmp;
  switch (reg & 0x03)
  {
   case 0:
    tmp = SR;
    SR = 0x00;
    IntCtrl::status &= 0x05;
    CPU::IRQ_lines &= 0x05;
    return tmp;
   case 1:
    return 0x00;
   case 2:
    return (uint8)VRR;
   case 3:
    tmp = (uint8)(VRR >> 8);
    if (AR == 0x02)
    {
     VRR = vram[MARR];
     MARR += addr_inc;  // *qui?
    }
    return tmp;
   default: return 0;  // to avoid the warning
  }
 }

 void write(uint32 reg, uint8 data)
 {
  CPU::adv_clk();
  resync();
  switch (reg & 0x03)
  {
    case 0:
     AR = data & 0x1F; break;
    case 1:
     break;
    case 2:
     switch(AR)
     {
      case 0x00: /* MAWR low */ MAWR &= 0xFF00; MAWR |= (uint16)data; break;
      case 0x01: /* MARR low */ MARR &= 0xFF00; MARR |= (uint16)data; break;
      case 0x02: /* VWR low */ VWR &= 0xFF00; VWR |= (uint16)data; break;
      case 0x03: /* not used */ break;
      case 0x04: /* not used */ break;
      case 0x05: /* CR */ CR &= 0xFF00; CR |= (uint16)data; break;
      case 0x06: /* RCR */ RCR &= 0xFF00; RCR |= (uint16)data; break;
      case 0x07: /* BXR low */ BXR &= 0xFF00; BXR |= (uint16)data; break;
      case 0x08: /* BYR low */
       BYR &= 0xFF00; BYR |= (uint16)data;
       bg_y_offset = BYR + 1;
       break;
      case 0x09: /* MWR */ MWR &= 0xFF00; MWR |= (uint16)data; break;
      case 0x0A: /* HSR */ HSR &= 0xFF00; HSR |= (uint16)data; break;
      case 0x0B: /* HDR */ HDR &= 0xFF00; HDR |= (uint16)data; break;
      case 0x0C: /* VSR */ VSR &= 0xFF00; VSR |= (uint16)data; break;
      case 0x0D: /* VDR */ VDR &= 0xFF00; VDR |= (uint16)data; break;
      case 0x0E: /* VCR */ VCR &= 0xFF00; VCR |= (uint16)data; break;
      case 0x0F: /* DCR */ DCR &= 0xFF00; DCR |= (uint16)data; break;
      case 0x10: /* SOUR low */ SOUR &= 0xFF00; SOUR |= (uint16)data; break;
      case 0x11: /* DESR low */ DESR &= 0xFF00; DESR |= (uint16)data; break;
      case 0x12: /* LENR low */ LENR &= 0xFF00; LENR |= (uint16)data; break;
      case 0x13: /* DVSSR low */
       DVSSR &= 0xFF00;
       DVSSR |= (uint16)data;
       satb_dma_pending = true;
       break;
      case 0x14: /* not used */ break;
      case 0x15: /* not used */ break;
      case 0x16: /* not used */ break;
      case 0x17: /* not used */ break;
      case 0x18: /* not used */ break;
      case 0x19: /* not used */ break;
      case 0x1A: /* not used */ break;
      case 0x1B: /* not used */ break;
      case 0x1C: /* not used */ break;
      case 0x1D: /* not used */ break;
      case 0x1E: /* not used */ break;
      case 0x1F: /* not used */ break;
     }
     break;
    case 3:
     switch(AR)
     {
      case 0x00: /* MAWR high */ MAWR &= 0x00FF; MAWR |= (uint16)data << 8; break;
      case 0x01: /* MARR high */
       MARR &= 0x00FF;
       MARR |= (uint16)data << 8;
       VRR = vram[MARR];
       MARR += addr_inc;  // *qui?
       break;
      case 0x02: /* VWR high */
       VWR &= 0x00FF;
       VWR |= (uint16)data << 8;
       vram[MAWR] = VWR;
       update_bg_sp_caches(MAWR);
       MAWR += addr_inc;
       break;
      case 0x03: /* not used */ break;
      case 0x04: /* not used */ break;
      case 0x05: /* CR high */
       CR &= 0x00FF; 
       CR |= (uint16)data << 8;
       switch ((CR >> 11) & 3)
       {
        case 0: addr_inc = 0x01; break;
        case 1: addr_inc = 0x20; break;
        case 2: addr_inc = 0x40; break;
        case 3: addr_inc = 0x80; break;
       }
       break;
      case 0x06: /* RCR high */ RCR &= 0x00FF; RCR |= (uint16)(data & 0x03) << 8; break;
      case 0x07: /* BXR high */ BXR &= 0x00FF; BXR |= (uint16)(data & 0x03) << 8; break;
      case 0x08: /* BYR high */
       BYR &= 0x00FF; BYR |= (uint16)(data & 0x01) << 8;
       bg_y_offset = BYR + 1;
       break;
      case 0x09: /* MWR */ MWR &= 0x00FF; MWR |= (uint16)data << 8; break;
      case 0x0A: /* HSR */ HSR &= 0x00FF; HSR |= (uint16)data << 8; break;
      case 0x0B: /* HDR */ HDR &= 0x00FF; HDR |= (uint16)data << 8; break;
      case 0x0C: /* VSR */ VSR &= 0x00FF; VSR |= (uint16)data << 8; break;
      case 0x0D: /* VDR */ VDR &= 0x00FF; VDR |= (uint16)data << 8; break;
      case 0x0E: /* VCR */ VCR &= 0x00FF; VCR |= (uint16)data << 8; break;
      case 0x0F: /* DCR high */ DCR &= 0x00FF; DCR |= (uint16)data << 8; break;
      case 0x10: /* SOUR high */ SOUR &= 0x00FF; SOUR |= (uint16)data << 8; break;
      case 0x11: /* DESR high */ DESR &= 0x00FF; DESR |= (uint16)data << 8; break;
      case 0x12: /* LENR high */
       LENR &= 0x00FF; LENR |= (uint16)data << 8; break;
	 uint16 src_inc = (DCR & 4) ? -1 : 1;
	 uint16 dst_inc = (DCR & 8) ? -1 : 1;
       do
       {
        vram[DESR] = vram[SOUR];  // attenzione!: valido solo con 128kB di VRAM!
        update_bg_sp_caches(DESR);  // NOTA: esiste una soluzione migliore!
        SOUR += src_inc;
        DESR += dst_inc;
        LENR--;
       } while (LENR != 0xFFFF);
       if ((DCR & 2) != 0)
       {
        SR |= 0x10;
        IntCtrl::status |= 0x02;
        CPU::IRQ_lines |= 0x02;
       }
       break;
      case 0x13: /* DVSSR high */
       DVSSR &= 0x00FF;
       DVSSR |= (uint16)data << 8;
       satb_dma_pending = true;
       break;
      case 0x14: /* not used */ break;
      case 0x15: /* not used */ break;
      case 0x16: /* not used */ break;
      case 0x17: /* not used */ break;
      case 0x18: /* not used */ break;
      case 0x19: /* not used */ break;
      case 0x1A: /* not used */ break;
      case 0x1B: /* not used */ break;
      case 0x1C: /* not used */ break;
      case 0x1D: /* not used */ break;
      case 0x1E: /* not used */ break;
      case 0x1F: /* not used */ break;
     break;
     }
  }
  //...
 }
}
