/*
 *  plex86: run multiple x86 operating systems concurrently
 *  Copyright (C) 1999-2001 Kevin P. Lawton
 *
 *  dt.h: Dynamic Translation header file
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this library; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
 */

#ifndef __DT_H__
#define __DT_H__

// xxx Clean up #if 0 clauses


/* ===== DT CONFIGURATION OPTIONS ===== */

#define DT_DEBUG 0  /* Print debug dump */

#define DT_ON 1 /* Use DT engine to run guest code */
//#define DT_ON 0       /* Run guest code native for speed comparison */

/* This is how many loop iterations the major body of code takes */
//#define DT_MacroLoops 2000000
#define DT_MacroLoops    400000

//#define DT_Workload 0   /* NOP */
#define DT_Workload 1 /* Cascading add loop (DT_MicroLoops) */

/*
 *  This is how many tight-loop iterations each code section takes.
 *  Must be between 1 and 255.
 */

//#define DT_MicroLoops  100
//#define DT_MicroLoops   10
#define DT_MicroLoops  5

#define DT_UseR3hStack 1

/*
 *  1=Use backpatch scheme for static out-of-page branches, 0=Always
 *  use lookup function.
 */

#define DT_UseBackpatch 1

/*
 *  Simulate guest context switches @ N usec intervals.  Since the
 *  system timer is used, the lower bound of this is determined by
 *  the setitimer() resolution.
 */

//#define DT_GuestTimeslice 10000
#define DT_GuestTimeslice 500000

#define DT_G2THashMethod 1

/* Only define if testing sparse table logic */
#define TestSparseTables 0

/* Do some extra Sparse Table releated sanity checks. */
#define STExtraSanityChecks 1

#if DT_DEBUG
#define InstrG2THit(i) ({instrG2THit[i]++;})
#define InstrG2TMiss() ({instrG2TMiss++;})
#else
#define InstrG2THit(i) ({})
#define InstrG2TMiss() ({})
#endif

#define CacheLineAlignment 32

//#define DoInstrEmulateOpcode

/* ===== END OF DT CONFIGURATION OPTIONS ===== */

#ifdef DoInstrEmulateOpcode
#define InstrEmulateOpcode(vm) instrEmulateOpcode(vm)
#else
#define InstrEmulateOpcode(vm) ({})
#endif


#define INT3OP 0xcc

/* ================================================================= */
/* Bit definititions for elements in vOpcodeMap_t. */
/* Bit4: signals if this instruction needs virtualization */
/*      0: virtualize this instruction */
/*      1: well behaved; no need to virtualize */
#define VOpcodeRunNative     0x10
#define VOpcodeTranslate     0x20

/* Shorthand macro */
#define RN VOpcodeRunNative


#if 0
/* ================================================================= */
/* Bit definitions for fields in instruction meta cache. */
/*   3..0: instruction length (1..15) */
/*      4: run native;   1=run native, 0=virtualize */
/*      5: available:    (not used) */
/*      6: opcode-byte:  1=instruction opcode byte, 0=not */
/*      7: opcode-start: 1=scanned instruction starts here, 0=not */

#define MetaGetILen(m)     ((m) & 0x0f)
#define MetaRunNative      VOpcodeRunNative
#define MetaOpcodeByte     0x40
#define MetaOpcodeStart    0x80
#endif




/*
 *  The guest Linear to Meta index Hash table.  We need an efficient
 *  hash table to store translations from guest linear page
 *  addresses (upper 20 bits) to the DT meta page for that code page.
 *  This is quite similar to an i-TLB use in the CPU, except that
 *  rather than translate to physical addresses, we translate to
 *  the index of the DT meta page.
 *
 *  In the DT meta page, is a lookup table and other data specific
 *  to that particular code page, which can be accessed to find the
 *  address of a specific translated instruction if it exists.  Thus
 *  when we encounter new guest instruction addresses which are not
 *  in the G2T table, we have an efficient way to lookup the meta
 *  info for that code page.
 *
 *  This translation only concerns the upper 20bits, as the lower
 *  12bits are the page offset.  Bits 15..12 (4bits) from the linear
 *  address are used to select the hash block used.  Bits 31..16 (16bits)
 *  are stored in one of the translation pairs along with the
 *  corresponding meta index (which is also 16bits).  Since both
 *  quantities of each pair is 16bits, 8 pairs where chosen as the size of
 *  the hash block, because this fits neatly into 1 cache line on the
 *  Pentium+.  Thus DO NOT CHANGE the dimensions of this structure without
 *  considering the data size issues.
 *
 *    bits 31..16: stored as tag
 *    bits 15..12: selects hash block [0..15]
 *    bits 11..00: (page offset not used)
 */

#define DT_L2MHashWidth   8
#define DT_L2MHashHeight 16
#define DT_LPAToMIHash(lpa) ((lpa) & 0xf)
#define DT_LPAToMITag(lpa) (((lpa) >> 4) & 0xffff)

/* The following index value signifies the entry is available and does
 * not point to any construct.
 */
#define MetaIndexNone 0xffff

typedef struct {
  Bit16u tag;
  Bit16u metai;
  } __attribute__ ((packed)) dtL2MHash_t[DT_L2MHashHeight][DT_L2MHashWidth];

/* 16*8*4 = 512bytes */



/*
 *  The guest Linear to Translated address Hash table.  Once instructions
 *  have been translated and stored in the DT buffer, the address pairing
 *  (guest and translated instruction addresses) can be stored in this
 *  hash table.  For branch handling, this makes an efficient way to
 *  determine the associated translation buffer address for a given
 *  branch target address.  No extra protection checks are necessary
 *  before the branch is executed.  To allow for this, the following
 *  actions must occur:
 *
 *    - Buffer is completely invalidated for user<-->supervisor transitions
 *    - Buffer is completely invalidated for CS segment reloads
 */

#define DT_G2THashWidth  4  /* Fits in 1 Pentium+ cache line */
#define DT_G2THashHeight 8192   /* Need to tune this value */

/*
 *  NOTE: This hash select function needs to be coordinated with the
 *  hand coded assembly & generated tcode.
 */

#if DT_G2THashMethod == 0
#define DT_G2THashSelect(l) ( ((l)>>5) & 0x00001fff )   /* Need to tune this */
#else
#define DT_G2THashSelect(l) ( (l) & 0x00001fff )    /* Need to tune this */
#endif

/* 8192*4*8 = 256k */

#define TcodeOffsetNone 0xffffffff

typedef struct {
  Bit32u gOff;
  Bit32u tOff;
  } __attribute__ ((packed)) dtG2THash_t[DT_G2THashHeight][DT_G2THashWidth];




/* ============================
 * Sparse table lookup features
 * ============================
 */

/* A sparse table is used to efficiently maintain associations
 * between guest instruction addresses and corresponding translated
 * code sequence addresses, both in forward and reverse directions.
 *
 * L0: bits 11..8 (4bits)
 * L1: bits  7..5 (3bits)
 * L2: bits  4..0 (5bits)
 */


/* Level 2 (L2) of the sparse lookup is actually a linked
 * list, where each node contains STL2N entries.  Using
 * a full (array) frame to cover this part of the address space
 * would consume a lot of space because a lot of addresses will not
 * contain the start of scanned instructions.  The value of
 * STL2N can be 1 or more.  Storing more in each
 * quantum increases the search efficiency, but may consume
 * more space when elements are not used.  Values of
 * 1, 2, or 3 may make sense.
 */

#define STForwardL0N  16   /* Dont change */
#define STForwardL1N   8   /* Dont change */
#define STForwardL2N   3   /* (configurable) */

typedef struct stForwardL2Cluster_tag {
  union {
    struct {
      Bit32u addr4_0:5; /* For address match of bits 4..0 */
      Bit32u attributes:7;
      Bit32u tcodeOffset:20;
      } __attribute__ ((packed)) fields;
    Bit32u raw; /* access to all bits at once */
    } __attribute__ ((packed)) element[STForwardL2N];
  struct stForwardL2Cluster_tag *next;
  } __attribute__ ((packed)) stForwardL2Cluster_t;


/* Levels 0 and 1 (L0 and L1) are simple arrays, one element for
 * each combination of the address bits for the corresponding
 * address range.  Elements of L0 frames are frame indeces for
 * the next level (L1).  Elements of the L1 frames point to an
 * L2 linked list structure.
 */

typedef stForwardL2Cluster_t *stForwardL1Frame_t[8];
typedef stForwardL1Frame_t   *stForwardL0Frame_t[16];


#define STReverseL1N   4   /* Dont change */
#define STReverseL2N   3   /* (configurable) */

typedef struct stReverseL2Cluster_tag {
  union {
    struct {
      Bit32u addr5_0:6; /* For address match of bits 5..0 */
      Bit32u tcodeLen:8; /* length of tcode sequence */
      Bit32u pageOffset:12; /* iaddr page offset (bits 11..0) */
      Bit32u notUsed:6;
      } __attribute__ ((packed)) fields;
    Bit32u raw; /* access to all bits at once */
    } __attribute__ ((packed)) element[STReverseL2N];
  struct stReverseL2Cluster_tag *next;
  } __attribute__ ((packed)) stReverseL2Cluster_t;


/* ====================
 * Tcode chunk features
 * ====================
 */

#define TCodeChunkN    256 /* (configurable) Must be multiple of 8 */
#define TCodeChunkSize 256 /* Dont change this! */

typedef union tcodeChunk_tag {
  struct {
    /* For reverse lookup (tcode address -> instruction address), a
     * lookup table is used, which is similar to the forward lookup
     * mechanisms.  Each tcode chunk is 256 bytes (8 bits of address).
     * This address space is analogous to the L1 & L2 components of
     * forward lookup.  So for simplicity and consistency, we start
     * with L1.  The top level frame is embedded in the chunk.
     */
    stReverseL2Cluster_t *t2iL1[4];

    /* Pointer to next chunk used by associated guest code page.  The
     * meta info for the code page points to the first chunk, and
     * from there subsequent chunks which are allocated as needed
     * are chained together by this pointer.
     */
    union tcodeChunk_tag *next;

    /* The index of the next available data offset in this chunk. */
    Bit16u head;
    Bit16u tail;

    /* The index into the meta array so we can find which associated
     * code page owns this tcode chunk.
     */
    Bit32u ownerMetaIndex;
    } __attribute__ ((packed)) header;
  Bit8u raw[TCodeChunkSize];
  } __attribute__ ((packed)) tcodeChunk_t;

#define MaxTcodeSnippets 32

typedef struct {
  Bit32u pOff; /* The guest instruction page offset. */
  Bit32u tcodeBuffOff; /* The offset into the tcode buffer passed. */
  unsigned tcodeLen; /* Length of this individual tcode sequence */
  } tcodeSnippet_t;


/* =======================
 * Page Meta Info features
 * =======================
 */

#define DTPageMetaTableN 16 /* (configurable) Should be multiple of 8. */

typedef struct {
  stForwardL0Frame_t i2tL0; /* Level0 frame of iaddr->tcode sparse lookup */

  Bit32u lpi; /* Linear Page Index (addr>>12). */
  Bit32u ppi; /* Physical Page Index (addr>>12). */

  /* Code segment info: */
  descriptor_cache_t cs;
  unsigned cpl;       /* CPL of guest code this was scanned for */

  /* +++ other constraints need to be added here */
  /* Time Stamps */
  struct {
    Bit64u guest_write;    /* last time guest code wrote data from this page */
    Bit64u guest_executed; /* last time monitor allowed guest code in */
                           /* this page to execute */

    Bit64u mon_created;    /* 1st time monitor saw use of this page. */
    Bit64u mon_prescan;    /* last time of new prescanning activity */
    } ts;

  tcodeChunk_t *tcodeChunkHead; /* ptr to 1st tcode chunk in list */
  tcodeChunk_t *tcodeChunkCurrent; /* ptr to current tcode chunk in list */
  } dtPageMeta_t; // xxx Should I bother using __attribute__ ((packed))



#define R3HToMonRequestNone       0
#define R3HToMonRequestG2T        1
#define R3HToMonRequestPanic      2
#define R3HToMonRequestTerminate  3

#if 0
extern void __mon2r3h(void);
extern void __r3h2mon(void);

extern Bit32u mon_ESP;

extern unsigned r3h_request;
extern Bit32u r3h_data;
extern Bit32u idPatchDispl;
extern Bit32u jmpPatchDispl;


/* Variable declarations */

extern descriptor_t CS;
extern unsigned CPL;
#endif



/* NOTE: This structure mirrors the values in 'stubs-r3.S'.  Make
 * sure to keep synchronized.
 */
typedef struct {
  /* Data fields. */
  Bit32u   hashID; /* xxx was globalID */
  Bit32u   r3hESP;
  Bit32u   r3hSS;
  unsigned r3hRequest;
  Bit32u   r3hData;
  Bit32u   r3hTargetEIP;
  Bit32u   guestESP;
  Bit32u   guestSS;
  Bit32u   r3hBranchDataAddr;
  Bit32u   r3hAsyncEvent;

  /* Structure pointers. */
  dtG2THash_t  *dtG2THash;
  } __attribute__ ((packed)) r3hData_t;



#define SizeOfTcodeBuffer (sizeof(tcodeChunk_t) * TCodeChunkN)

#define DTL2MMaxPages (1)
#define DTG2TMaxPages (64)
#define DTMetaTableMaxPages (16)
#define DTMetaTableUsageMaxPages (1)
#define DTTcodeChunkMaxPages (64)
#define DTTcodeChunkUsageMaxPages (1)

  void
r3hBranchStatic(void)
    __attribute__ ((section ("r3hSection")));
  void
r3hBranchDynamic(void)
    __attribute__ ((section ("r3hSection")));

void __r3hStubsDataStart(void);
void __r3hStubsCodeStart(void);
void __r3hBranchStatic(void);
void __r3hNewEIP(void);
void __r3hSectionStart(void);
void __r3hSectionEnd(void);

unsigned dtEmitPushImm32(Bit8u *p, unsigned remain, Bit32u imm32);
unsigned dtEmitUseR3hStack(Bit8u *p, unsigned remain);
unsigned dtEmitUseGuestStack(Bit8u *p, unsigned remain);
unsigned dtEmitPushf(Bit8u *p, unsigned remain);
unsigned dtEmitPopf(Bit8u *p, unsigned remain);
unsigned dtEmitIDCheck(Bit8u *p, unsigned remain, Bit8u **offsetPtr,
                       Bit8u **idPtr);
unsigned dtEmitJmp(Bit8u *p, unsigned remain, Bit32u **offsetPtr);
unsigned dtEmitCall(Bit8u *p, unsigned remain, Bit32u **offsetPtr);

#define HashIDNone 0

extern Bit32u __hashID;
extern Bit32u __r3hESP;
extern Bit32u __r3hSS;
extern Bit32u __r3hRequest;
extern Bit32u __r3hData;
extern Bit32u __r3hTargetEIP;
extern Bit32u __guestESP;
extern Bit32u __guestSS;
extern Bit32u __r3hBranchDataAddr;
extern Bit32u __r3hAsyncEvent;

#endif /* __DT_H__ */
