/* Mednafen - Multi-system Emulator
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#include <math.h>
#include "pcfx.h"
#include "../video.h"
#include "vdc.h"
#include "interrupt.h"

#define ALPHA_MASK	0x200

static bool unlimited_sprites;

// Some virtual vdc macros to make code simpler to read
#define M_vdc_HSW	(vdc->HSR & 0x1F)	// Horizontal Synchro Width
#define M_vdc_HDS	((vdc->HSR >> 8) & 0x7F) // Horizontal Display Start
#define M_vdc_HDW	(vdc->HDR & 0x7F)	// Horizontal Display Width
#define M_vdc_HDE	((vdc->HDR >> 8) & 0x7F) // Horizontal Display End

#define M_vdc_VSW	(vdc->VSR & 0x1F)	// Vertical synchro width
#define M_vdc_VDS	((vdc->VSR >> 8) & 0xFF) // Vertical Display Start
#define M_vdc_VDW	(vdc->VDR & 0x1FF)	// Vertical Display Width(Height? :b)
#define M_vdc_VCR	(vdc->VCR & 0xFF)

static const unsigned int vram_inc_tab[4] = { 1, 32, 64, 128 };

#define VDCS_CR		0x01 // Sprite #0 collision interrupt occurred
#define VDCS_OR		0x02 // sprite overflow "" ""
#define VDCS_RR		0x04 // RCR             ""  ""
#define VDCS_DS		0x08 // VRAM to SAT DMA completion interrupt occurred
#define VDCS_DV		0x10 // VRAM to VRAM DMA completion interrupt occurred
#define VDCS_VD		0x20 // Vertical blank interrupt occurred
#define VDCS_BSY	0x40 // VDC is waiting for a CPU access slot during the active display area??

static INLINE void FixTileCache(fx_vdc_t *vdc, uint16 A)
{
 uint32 charname = (A >> 4);
 uint32 y = (A & 0x7);
 uint8 *tc = vdc->bg_tile_cache[charname][y];

 uint32 bitplane01 = vdc->VRAM[y + charname * 16];
 uint32 bitplane23 = vdc->VRAM[y+ 8 + charname * 16];

 for(int x = 0; x < 8; x++)
 {
  uint32 raw_pixel = ((bitplane01 >> x) & 1);
  raw_pixel |= ((bitplane01 >> (x + 8)) & 1) << 1;
  raw_pixel |= ((bitplane23 >> x) & 1) << 2;
  raw_pixel |= ((bitplane23 >> (x + 8)) & 1) << 3;
  tc[7 - x] = raw_pixel;
 }
/*
  uint32 bitplane01 = vdc->VRAM[tiny_YOffset + charname * 16];
  uint32 bitplane23 = vdc->VRAM[tiny_YOffset + 8 + charname * 16];
  uint32 raw_pixel;
  unsigned int tiny_XOffset = 7 - (vdc->BG_XOffset & 7);

  raw_pixel = ((bitplane01 >> tiny_XOffset) & 1);
  raw_pixel |= ((bitplane01 >> (tiny_XOffset + 8)) & 1) << 1;
  raw_pixel |= ((bitplane23 >> tiny_XOffset) & 1) << 2;
  raw_pixel |= ((bitplane23 >> (tiny_XOffset + 8)) & 1) << 3;
 */
}

#define REGSETP(_reg, _data, _msb) { _reg &= 0xFF << (_msb ? 0 : 8); _reg |= _data << (_msb ? 8 : 0); }
#define REGGETP(_reg, _msb) ((_reg >> (_msb ? 8 : 0)) & 0xFF)

uint8 FXVDC_Read(fx_vdc_t *vdc, uint32 A)
{
 uint8 ret = 0;
 int msb = A & 1;

 A &= 0x3;

 switch(A)
 {
  case 0x0: ret = vdc->status;

	    vdc->status &= ~0x3F;

            PCFXIRQ_Assert(vdc->ilevel, 0);
	    break;
  case 0x2:
  case 0x3:
	   ret = REGGETP(vdc->read_buffer, msb);
	   if(vdc->select == 0x2) // VRR - VRAM Read Register
	   {
	    if(msb) 
	    {
	     vdc->MARR += vram_inc_tab[(vdc->CR >> 11) & 0x3];
	     vdc->read_buffer = vdc->VRAM[vdc->MARR & 0x7FFF];
	    }
	   }
	   break;
 }
 return(ret);
}

static void DoDMA(fx_vdc_t *vdc)
{
    // Assuming one cycle for reads, one cycle for write, with DMA?
     for(int i = 0; i < 455; i++)
     {
      if(!vdc->DMAReadWrite)
       vdc->DMAReadBuffer = vdc->VRAM[vdc->SOUR & 0x7FFF];
      else
      {
       vdc->VRAM[vdc->DESR & 0x7FFF] = vdc->DMAReadBuffer;
       FixTileCache(vdc, vdc->DESR & 0x7FFF);

       vdc->DESR += (((vdc->DCR & 0x4) >> 1) ^ 2) - 1;
       vdc->SOUR += (((vdc->DCR & 0x8) >> 2) ^ 2) - 1;
       vdc->LENR--;
       if(vdc->LENR == 0xFFFF)  // DMA is done.
       {
        vdc->DMARunning = 0;
        if(vdc->DCR & 0x02)
        {
         vdc->status |= VDCS_DV;
	 PCFXIRQ_Assert(vdc->ilevel, 1);
	 //puts("DMA IRQ");
        }
        break;
       }
      }
      vdc->DMAReadWrite ^= 1;
     } // for()
}

void FXVDC_Write(fx_vdc_t *vdc, uint32 A, uint8 V)
{
 int msb = A & 1;

 A &= 0x3;

 switch(A)
 {
  case 0x0: vdc->select = V & 0x1F; break;
  case 0x2:
  case 0x3:
	   switch(vdc->select & 0x1F)
	   {
	    case 0x00: REGSETP(vdc->MAWR, V, msb); break;
	    case 0x01: REGSETP(vdc->MARR, V, msb);
		       if(msb)
			vdc->read_buffer = vdc->VRAM[vdc->MARR];
	               break;
	    case 0x02: if(!msb) vdc->write_latch = V;
		       else
		       {
			if(vdc->MAWR < 0x8000)
			{
 			 vdc->VRAM[vdc->MAWR & 0x7fff] = (V << 8) | vdc->write_latch;
			 FixTileCache(vdc, vdc->MAWR & 0x7FFF);
			} 
	                vdc->MAWR += vram_inc_tab[(vdc->CR >> 11) & 0x3];
		       }
		       break;
	    case 0x05: REGSETP(vdc->CR, V, msb); break;
	    case 0x06: REGSETP(vdc->RCR, V, msb); vdc->RCR &= 0x3FF; break;
	    case 0x07: REGSETP(vdc->BXR, V, msb); break;
	    case 0x08: REGSETP(vdc->BYR, V, msb); 
		       vdc->BG_YOffset = vdc->BYR; // Set it on LSB and MSB writes(only changing on MSB breaks Youkai Douchuuki)
		       break;
	    case 0x09: REGSETP(vdc->MWR, V, msb); break;
	    case 0x0a: REGSETP(vdc->HSR, V, msb); break;
	    case 0x0b: REGSETP(vdc->HDR, V, msb); break;
	    case 0x0c: REGSETP(vdc->VSR, V, msb); break;
	    case 0x0d: REGSETP(vdc->VDR, V, msb); break;
	    case 0x0e: REGSETP(vdc->VCR, V, msb); break;
	    case 0x0f: REGSETP(vdc->DCR, V, msb); break;
	    case 0x10: REGSETP(vdc->SOUR, V, msb); /*printf("SOUR: %04x\n", vdc->SOUR); */ break;
	    case 0x11: REGSETP(vdc->DESR, V, msb); /*printf("DESR: %04x\n", vdc->DESR); */ break;
	    case 0x12: REGSETP(vdc->LENR, V, msb); 
		       if(msb)
		       {
			vdc->DMARunning = 1;
		        vdc->DMAReadWrite = 0;
			if(vdc->burst_mode && !(vdc->DCR & 0x02))
			 DoDMA(vdc);	// Do one line's worth of DMA transfers
					// because Cosmic Fantasy 4 is evil
					// and uses timed writes to the DMA
					// start register, rather than waiting until
					// the machine says we're done,
					// which would require cycle-accurate VDC emulation...like that's
					// going to happen when I don't even have accurate values
					// for HuC6280 instruction timings. :b
		       }
		       break;
	    case 0x13: REGSETP(vdc->SATB, V, msb); vdc->SATBPending = 1; break;
//	    default: printf("Oops 2: %04x %02x\n", vdc->select, V);break;
	   }
	   break;
 }
}

static const unsigned int bat_width_tab[4] = { 32, 64, 128, 128 };
static const unsigned int bat_width_shift_tab[4] = { 5, 6, 7, 7 };
static const unsigned int bat_height_tab[2] = { 32, 64 };
static const unsigned int ClockModeWidths[2] = { 288, 384};

static void DrawBG(fx_vdc_t *vdc, uint32 *target)
{
 unsigned int width = (M_vdc_HDW + 1) * 8;

 if(width > ClockModeWidths[vdc->dot_clock])
  width = ClockModeWidths[vdc->dot_clock];

 int start = (ClockModeWidths[vdc->dot_clock] - width) / 2;
 int end = start + width;
 int bat_width = bat_width_tab[(vdc->MWR >> 4) & 3];
 int bat_width_mask = bat_width - 1;
 int bat_width_shift = bat_width_shift_tab[(vdc->MWR >> 4) & 3];
 int bat_height_mask = bat_height_tab[(vdc->MWR >> 6) & 1] - 1;


 // Pseudo-hack for Asuka 120%'s odd video timings
 if(vdc->dot_clock == 1 && M_vdc_HDS == 5 && M_vdc_HDE == 6 && M_vdc_HDW == 43 && M_vdc_HSW == 2)
  start += 8;
 else if(vdc->dot_clock == 0 && M_vdc_HDS == 2 && M_vdc_HDE == 3 && M_vdc_HDW == 33 && M_vdc_HSW == 2)
  start += 4;
 //printf("%d %d\n", vdc->BG_XOffset, vdc->BG_YOffset);

 if(!(vdc->CR & 0x80)) // BG is disabled
 {
  uint32 color = (vdc->CR & 0x40) ? 0x100 : 0x000;
  MDFN_FastU32MemsetM8(target, color, ClockModeWidths[vdc->dot_clock]);
  for(int x = start; x < end; x++)
   target[x] = color;
  return;
 }

 uint32 overscan_color = 0x100;

 //if(enabled)
 {
  int bat_y = ((vdc->BG_YOffset >> 3) & bat_height_mask) << bat_width_shift;
  int first_end = start + 8 - (vdc->BG_XOffset & 7);

  // Clear the left overscan area
  MDFN_FastU32MemsetM8(target, overscan_color, 50); //(start + 1) &~1);

  for(int x = start; x < first_end; x++)
  {
   int bat_x = (vdc->BG_XOffset >> 3) & bat_width_mask;

   uint16 bat = vdc->VRAM[bat_x | bat_y];
   int palette_index = ((bat >> 12) & 0x0F) << 4;
   uint32 raw_pixel;

   raw_pixel = vdc->bg_tile_cache[bat & 0x7FF][vdc->BG_YOffset & 7][vdc->BG_XOffset & 0x7];
   target[x] = palette_index | raw_pixel;

   vdc->BG_XOffset++;
  }

  int bat_boom = (vdc->BG_XOffset >> 3) & bat_width_mask;
  int line_sub = vdc->BG_YOffset & 7;
  for(int x = first_end; x < end; x+=8) // This will draw past the right side of the buffer, but since our pitch is 1024, and max width is ~512, we're safe.  Also,
					// any overflow that is on the visible screen are will be hidden by the overscan color code below this code.
  {
   uint16 bat = vdc->VRAM[bat_boom | bat_y];
   uint32 pix_or = (bat >> 8) & 0xF0;
   uint8 *pix_lut = vdc->bg_tile_cache[bat & 0x7FF][line_sub];

   #ifdef LSB_FIRST
    #if SIZEOF_LONG == 8
    uint64 doh = *(uint64 *)pix_lut;

    (target + 0)[x] = (doh & 0xFF) | pix_or;
    doh >>= 8;
    (target + 1)[x] = (doh & 0xFF) | pix_or;
    doh >>= 8;
    (target + 2)[x] = (doh & 0xFF) | pix_or;
    doh >>= 8;
    (target + 3)[x] = (doh & 0xFF) | pix_or;
    doh >>= 8;
    (target + 4)[x] = (doh & 0xFF) | pix_or;
    doh >>= 8;
    (target + 5)[x] = (doh & 0xFF) | pix_or;
    doh >>= 8;
    (target + 6)[x] = (doh & 0xFF) | pix_or;
    doh >>= 8;
    (target + 7)[x] = (doh) | pix_or;
    #else
    uint32 doh = *(uint32 *)pix_lut;
    (target + 0)[x] = (doh & 0xFF) | pix_or;
    doh >>= 8;
    (target + 1)[x] = (doh & 0xFF) | pix_or;
    doh >>= 8;
    (target + 2)[x] = (doh & 0xFF) | pix_or;
    doh >>= 8;
    (target + 3)[x] = (doh) | pix_or;
    doh = *(uint32 *)(pix_lut + 4);
    (target + 4)[x] = (doh & 0xFF) | pix_or;
    doh >>= 8;
    (target + 5)[x] = (doh & 0xFF) | pix_or;
    doh >>= 8;
    (target + 6)[x] = (doh & 0xFF) | pix_or;
    doh >>= 8;
    (target + 7)[x] = (doh) | pix_or;
    #endif
   #else
   (target + 0)[x] = pix_lut[0] | pix_or;
   (target + 1)[x] = pix_lut[1] | pix_or;
   (target + 2)[x] = pix_lut[2] | pix_or;
   (target + 3)[x] = pix_lut[3] | pix_or;
   (target + 4)[x] = pix_lut[4] | pix_or;
   (target + 5)[x] = pix_lut[5] | pix_or;
   (target + 6)[x] = pix_lut[6] | pix_or;
   (target + 7)[x] = pix_lut[7] | pix_or;
   #endif
   bat_boom = (bat_boom + 1) & bat_width_mask;
   vdc->BG_XOffset++;
  }
  MDFN_FastU32MemsetM8(target, overscan_color, (start + 1) & ~1);
  {
   uint32 end_begin = ((end + 1) & ~ 1);
   MDFN_FastU32MemsetM8(target + end_begin, overscan_color, ClockModeWidths[vdc->dot_clock] - end_begin);
  }
 }
}

#define SPRF_PRIORITY	0x00080
#define SPRF_HFLIP	0x00800
#define SPRF_VFLIP	0x08000
#define SPRF_SPRITE0	0x10000

static const unsigned int sprite_height_tab[4] = { 16, 32, 64, 64 };
static const unsigned int sprite_height_no_mask[4] = { ~0, ~2, ~6, ~6 };
static const unsigned int sprite_width_tab[2] = { 16, 32 };

typedef struct
{
	uint32 x;
	uint32 flags;
	uint8 palette_index;
	uint16 pattern_data[4];
} SPRLE;

static void DrawSprites(fx_vdc_t *vdc, uint32 *target, int enabled)
{
 int active_sprites = 0;
 SPRLE SpriteList[64 * 2]; // (see unlimited_sprites option, *2 to accomodate 32-pixel-width sprites ) //16];
 uint32 sprite_line_buf[1024] __attribute__ ((aligned (16)));

 // First, grab the up to 16 sprites.
 for(int i = 0; i < 64; i++)
 {
  int16 y = (vdc->SAT[i * 4 + 0] & 0x3FF) - 0x40;
  uint16 x = (vdc->SAT[i * 4 + 1] & 0x3FF);
  uint16 no = (vdc->SAT[i * 4 + 2] >> 1) & 0x1FF;	// Todo, cg mode bit
  uint16 flags = (vdc->SAT[i * 4 + 3]);

  uint32 palette_index = (flags & 0xF) << 4;
  uint32 height = sprite_height_tab[(flags >> 12) & 3];
  uint32 width = sprite_width_tab[(flags >> 8) & 1];

  if((int32)vdc->RCRCount >= y && (int32)vdc->RCRCount < (int32)(y + height))
  {
   bool second_half = 0;
   uint32 y_offset = vdc->RCRCount - y;
   if(y_offset > height) continue;

   breepbreep:

   if(active_sprites == 16)
   {
    if(vdc->CR & 0x2)
    {
     vdc->status |= VDCS_OR;
     PCFXIRQ_Assert(vdc->ilevel, 1);
     //puts("OR IRQ");
    }
    if(!unlimited_sprites)
     break;
   }


   {
    if(flags & SPRF_VFLIP)
     y_offset = height - 1 - y_offset;

    no &= sprite_height_no_mask[(flags >> 12) & 3];
    no |= (y_offset & 0x30) >> 3;
    if(width == 32) no &= ~1;
    if(second_half)
     no |= 1;

    SpriteList[active_sprites].flags = flags;

    if(flags & SPRF_HFLIP && width == 32)
     no ^= 1;
    //printf("Found: %d %d\n", vdc->RCRCount, x);
    SpriteList[active_sprites].x = x;
    SpriteList[active_sprites].palette_index = palette_index;


    if((vdc->MWR & 0xC) == 4)
    {
     if(vdc->SAT[i * 4 + 2] & 1)
     {
      SpriteList[active_sprites].pattern_data[2] = 0; //vdc->VRAM[no * 64 + (y_offset & 15) ];
      SpriteList[active_sprites].pattern_data[3] = 0; //vdc->VRAM[no * 64 + (y_offset & 15) + 16];
      SpriteList[active_sprites].pattern_data[0] = vdc->VRAM[no * 64 + (y_offset & 15) + 32];
      SpriteList[active_sprites].pattern_data[1] = vdc->VRAM[no * 64 + (y_offset & 15) + 48];
     }
     else
     {
      SpriteList[active_sprites].pattern_data[0] = vdc->VRAM[no * 64 + (y_offset & 15) ];
      SpriteList[active_sprites].pattern_data[1] = vdc->VRAM[no * 64 + (y_offset & 15) + 16];
      SpriteList[active_sprites].pattern_data[2] = 0; //vdc->VRAM[no * 64 + (y_offset & 15) + 32];
      SpriteList[active_sprites].pattern_data[3] = 0; //vdc->VRAM[no * 64 + (y_offset & 15) + 48];
     }
    }
    else
    {
	//if(y_offset == 0)
        //printf("%d %d\n", vdc->RCRCount, no * 64);

     SpriteList[active_sprites].pattern_data[0] = vdc->VRAM[no * 64 + (y_offset & 15) ];
     SpriteList[active_sprites].pattern_data[1] = vdc->VRAM[no * 64 + (y_offset & 15) + 16];
     SpriteList[active_sprites].pattern_data[2] = vdc->VRAM[no * 64 + (y_offset & 15) + 32];
     SpriteList[active_sprites].pattern_data[3] = vdc->VRAM[no * 64 + (y_offset & 15) + 48];
    }
    SpriteList[active_sprites].flags |= i ? 0 : SPRF_SPRITE0;

    active_sprites++;

    if(width == 32 && !second_half)
    {
     second_half = 1;
     x += 16;
     y_offset = vdc->RCRCount - y;	// Fix the y offset so that sprites that are hflipped + vflipped display properly
     goto breepbreep;
    }
   }
  }
 }

 uint32 display_width = (M_vdc_HDW + 1) * 8;

 if(display_width > ClockModeWidths[vdc->dot_clock])
  display_width = ClockModeWidths[vdc->dot_clock];

 uint32 start = (ClockModeWidths[vdc->dot_clock] - display_width) / 2;;

 if(vdc->dot_clock == 1 && M_vdc_HDS == 5 && M_vdc_HDE == 6 && M_vdc_HDW == 43 && M_vdc_HSW == 2)
  start += 8;
 else if(vdc->dot_clock == 0 && M_vdc_HDS == 2 && M_vdc_HDE == 3 && M_vdc_HDW == 33 && M_vdc_HSW == 2)
  start += 4;

 uint32 end = start + display_width;

 if(end > ClockModeWidths[vdc->dot_clock])
 {
  end = ClockModeWidths[vdc->dot_clock];
 }

 MDFN_FastU32MemsetM8(sprite_line_buf, ALPHA_MASK, (end + 1) & ~1);

 for(int i = (active_sprites - 1) ; i >= 0; i--)
 {
  int32 pos = SpriteList[i].x - 0x20 + start;
  uint32 prio_or = 0;

  if(SpriteList[i].flags & SPRF_PRIORITY) 
   prio_or = ALPHA_MASK << 1;

  prio_or |= (ALPHA_MASK << 2);	// For sprite #0 hit detection

  //printf("%u %u %u %u\n", SpriteList[i].x, start, max_x, display_width);

  if((SpriteList[i].flags & SPRF_SPRITE0) && (vdc->CR & 0x01))
  for(uint32 x = 0; x < 16; x++)
  {
   uint32 raw_pixel;
   uint32 pi = SpriteList[i].palette_index;
   uint32 rev_x = 15 - x;

   if(SpriteList[i].flags & SPRF_HFLIP)
    rev_x = x;

   raw_pixel = (SpriteList[i].pattern_data[0] >> rev_x)  & 1;
   raw_pixel |= ((SpriteList[i].pattern_data[1] >> rev_x) & 1) << 1;
   raw_pixel |= ((SpriteList[i].pattern_data[2] >> rev_x) & 1) << 2;
   raw_pixel |= ((SpriteList[i].pattern_data[3] >> rev_x) & 1) << 3;

   if(raw_pixel)
   {
    pi |= 0x100;
    uint32 tx = pos + x;

    if(tx >= end) // Covers negative and overflowing the right side.
     continue;

    if(sprite_line_buf[tx] & (ALPHA_MASK << 2))
    {
     vdc->status |= VDCS_CR;
     //puts("CR IRQ");
     PCFXIRQ_Assert(vdc->ilevel, 1);
    }
    sprite_line_buf[tx] = pi | raw_pixel | prio_or;
   }
  }
  else
  for(uint32 x = 0; x < 16; x++)
  {
   uint32 raw_pixel;
   uint32 pi = SpriteList[i].palette_index;
   uint32 rev_x = 15 - x;

   if(SpriteList[i].flags & SPRF_HFLIP)
    rev_x = x;

   raw_pixel = (SpriteList[i].pattern_data[0] >> rev_x)  & 1;
   raw_pixel |= ((SpriteList[i].pattern_data[1] >> rev_x) & 1) << 1;
   raw_pixel |= ((SpriteList[i].pattern_data[2] >> rev_x) & 1) << 2;
   raw_pixel |= ((SpriteList[i].pattern_data[3] >> rev_x) & 1) << 3;

   if(raw_pixel)
   {
    pi |= 0x100;
    uint32 tx = pos + x;
    if(tx >= end) // Covers negative and overflowing the right side.
     continue;
    sprite_line_buf[tx] = pi | raw_pixel | prio_or;
   }
  }
 }

 if(enabled)
  for(unsigned int x = start; x < end; x++)
  {
   if(!(sprite_line_buf[x] & ALPHA_MASK))
   {
    if((target[x] & ALPHA_MASK) || (sprite_line_buf[x] & (ALPHA_MASK << 1)))
     target[x] = sprite_line_buf[x];
   }
  }
}

void FXVDC_DoLine(fx_vdc_t *vdc, uint32 frame_counter, uint32 *linebuffer, int skip)
{
 static const unsigned int ClockPixelWidths[3] = { 341, 455, 682 };
 int need_vbi = 0;
 int have_free_time = 1;

 if(frame_counter == 0)
 {
  vdc->VDS_cache = M_vdc_VDS;
  vdc->VSW_cache = M_vdc_VSW;
  vdc->VDW_cache = M_vdc_VDW;
  vdc->VCR_cache = M_vdc_VCR;
  vdc->VBlankFL_cache = vdc->VDS_cache + vdc->VSW_cache + vdc->VDW_cache + 1;

  if(vdc->VBlankFL_cache > 261)
   vdc->VBlankFL_cache = 261;
 }

 if(frame_counter == 0)
 {
  vdc->display_counter = 0;
  vdc->burst_mode = !(vdc->CR & 0xC0);
 }
 
 if(vdc->burst_mode)
 {
  if(vdc->display_counter == (vdc->VDS_cache + vdc->VSW_cache))
   vdc->RCRCount = 0;
 }
 else if(vdc->display_counter >= (vdc->VDS_cache + vdc->VSW_cache) && vdc->display_counter < (vdc->VDS_cache + vdc->VSW_cache + vdc->VDW_cache + 1))
 {
  have_free_time = 0;
  if(vdc->display_counter == (vdc->VDS_cache + vdc->VSW_cache))
   vdc->RCRCount = 0;
 }
 else	// Hmm, overscan...
 {
 }

 if(have_free_time) // We're outside of the active display area.  Weehee
 {
  if(vdc->DMARunning)
   DoDMA(vdc);
 }

 if(vdc->display_counter == vdc->VBlankFL_cache)
 {
  need_vbi = 1;
  if(vdc->SATBPending || (vdc->DCR & 0x10))
  {
   vdc->SATBPending = 0;
   vdc->sat_dma_slcounter = 2;

   if(vdc->SATB < 0x8000)
   {
    uint32 len = 256;
    if(vdc->SATB > 0x7F00)
     len = 0x8000 - vdc->SATB;
    memcpy(vdc->SAT, &vdc->VRAM[vdc->SATB], len * sizeof(uint16));
   }
  }
 }

 if((int)vdc->RCRCount == ((int)vdc->RCR - 0x40) && (vdc->CR & 0x04))
 {
  printf("RCR Interrupt: %d\n", vdc->display_counter);
  vdc->status |= VDCS_RR;
  PCFXIRQ_Assert(vdc->ilevel, 1);
 }

 if(vdc->burst_mode)
 {
  if(frame_counter >= 14 && frame_counter < (14 + 242))
  {
   if(!skip)
    MDFN_FastU32MemsetM8(linebuffer, 0x000, 512);
  }
 }
 else if(vdc->display_counter >= (vdc->VDS_cache + vdc->VSW_cache) && vdc->display_counter < (vdc->VDS_cache + vdc->VSW_cache + vdc->VDW_cache + 1))
 {
  if(vdc->display_counter == (vdc->VDS_cache + vdc->VSW_cache))
   vdc->BG_YOffset = vdc->BYR;
  else
   vdc->BG_YOffset++;
  vdc->BG_XOffset = vdc->BXR;
  if(frame_counter >= 14 && frame_counter < (14 + 242))
  {
   if(!skip)
    DrawBG(vdc, linebuffer);
   if(vdc->CR & 0x40)
    DrawSprites(vdc, linebuffer, skip ? 0 : 1);
  }
  // Draw screen, joy.
 }
 else // Hmm, overscan...
 {
  if(frame_counter >= 14 && frame_counter < (14 + 242))
  {
   MDFN_FastU32MemsetM8(linebuffer, 0x000, 512);
  }
 }

 if((vdc->CR & 0x08) && need_vbi)
 {
  vdc->status |= VDCS_VD;
  PCFXIRQ_Assert(vdc->ilevel, 1);
 }

 vdc->RCRCount++;
 vdc->display_counter++;

 if(vdc->sat_dma_slcounter)
 {
  vdc->sat_dma_slcounter--;
  if(!vdc->sat_dma_slcounter)
  {
   if(vdc->DCR & 0x01)
   {
    vdc->status |= VDCS_DS;
    PCFXIRQ_Assert(vdc->ilevel, 1);
   }
  }
 }

 if(vdc->display_counter == (vdc->VDS_cache + vdc->VSW_cache + vdc->VDW_cache + vdc->VCR_cache + 3))
 {
  vdc->display_counter = 0;
 }
}

void FXVDC_Reset(fx_vdc_t *vdc)
{
 vdc->read_buffer = 0xFFFF;

 vdc->HSR = vdc->HDR = vdc->VSR = vdc->VDR = vdc->VCR = 0xFF; // Needed for Body Conquest 2 -_-
}

void FXVDC_Power(fx_vdc_t *vdc)
{
 //memset(vdc, 0, sizeof(fx_vdc_t));
 FXVDC_Reset(vdc);
}

fx_vdc_t *FXVDC_Init(int ilevel)
{
 fx_vdc_t *ret = (fx_vdc_t *)calloc(1, sizeof(fx_vdc_t));

 ret->ilevel = ilevel;
 unlimited_sprites = MDFN_GetSettingB("pce.nospritelimit");

 return(ret);
}

int FXVDC_StateAction(StateMem *sm, int load, int data_only, fx_vdc_t *vdc, const char *name)
{
 SFORMAT VDC_StateRegs[] = 
 {
	SFVARN(vdc->display_counter, "display_counter"),
        SFVARN(vdc->sat_dma_slcounter, "sat_dma_slcounter"),

        SFVARN(vdc->select, "select"),
        SFVARN(vdc->MAWR, "MAWR"),
        SFVARN(vdc->MARR, "MARR"),
        SFVARN(vdc->CR, "CR"),
        SFVARN(vdc->RCR, "RCR"),
        SFVARN(vdc->BXR, "BXR"),
        SFVARN(vdc->BYR, "BYR"),
        SFVARN(vdc->MWR, "MWR"),

        SFVARN(vdc->HSR, "HSR"),
        SFVARN(vdc->HDR, "HDR"),
        SFVARN(vdc->VSR, "VSR"),
        SFVARN(vdc->VDR, "VDR"),

        SFVARN(vdc->VCR, "VCR"),
        SFVARN(vdc->DCR, "DCR"),
        SFVARN(vdc->SOUR, "SOUR"),
        SFVARN(vdc->DESR, "DESR"),
        SFVARN(vdc->LENR, "LENR"),
        SFVARN(vdc->SATB, "SATB"),

        SFVARN(vdc->RCRCount, "RCRCount"),

        SFVARN(vdc->read_buffer, "read_buffer"),
        SFVARN(vdc->write_latch, "write_latch"),
        SFVARN(vdc->status, "status"),
        SFARRAY16N(vdc->SAT, 0x100, "SAT"),

        SFARRAY16N(vdc->VRAM, 32768, "VRAM"),
        SFVARN(vdc->DMAReadBuffer, "DMAReadBuffer"),
        SFVARN(vdc->DMAReadWrite, "DMAReadWrite"),
        SFVARN(vdc->DMARunning, "DMARunning"),
        SFVARN(vdc->SATBPending, "SATBPending"),
        SFVARN(vdc->burst_mode, "burst_mode"),

        SFVARN(vdc->BG_YOffset, "BG_YOffset"),
        SFVARN(vdc->BG_XOffset, "BG_XOffset"),
	SFVARN(vdc->VDS_cache, "VDS_cache"),
        SFVARN(vdc->VSW_cache, "VDS_cache"),
        SFVARN(vdc->VDW_cache, "VDW_cache"),
        SFVARN(vdc->VCR_cache, "VCR_cache"),
        SFVARN(vdc->VBlankFL_cache, "VBlankFL_cache"),
	SFEND
  };
  int ret = MDFNSS_StateAction(sm, load, data_only, VDC_StateRegs, name);

  if(load)
  {
   for(int x = 0; x < 32768; x++)
    FixTileCache(vdc, x);
  }

 return(ret);
}

