/* ===== DT CONFIGURATION OPTIONS =====*/

#define DT_ON 1 /* Use DT engine to run guest code */
//#define DT_ON 0 /* Run guest code native for speed comparison */

#define DT_LOOP_COUNT 2000000


/* ===== END OF DT CONFIGURATION OPTIONS =====*/



typedef unsigned long  Bit32u;
typedef unsigned short Bit16u;
typedef unsigned char  Bit8u;
typedef   signed long  Bit32s;

typedef struct {
  unsigned eflags;

  unsigned edi;
  unsigned esi;
  unsigned ebp;
  unsigned dummy_esp;
  unsigned ebx;
  unsigned edx;
  unsigned ecx;
  unsigned eax;

  unsigned es;
  unsigned ds;
  } __attribute__ ((packed)) gc_t;


typedef struct {
  Bit32u lpa; /* Linear Page Address */
  /* +++ other constraints need to be here */

  /* +++ For now, a simple offset->tcode lookup table */
#define DT_OffsetLookupMax 32
  struct {
    Bit32u pOff;
    Bit32u tcode;
    } offsetLookup[DT_OffsetLookupMax];
  unsigned offsetLookupFreeIndex;

  /* +++ For now, a simple per-page tcode buffer */
#define DT_TcodeBufferMax 1024
  Bit8u tcodeBuffer[DT_TcodeBufferMax];
  unsigned tcodeBufferFreeIndex;
  } dtMetaEntry_t;

extern unsigned dtMetaFreeIndex;


extern void __seq0000(void);
extern void __seq0001(void);
extern void __seq0002(void);
extern void __seq0003(void);
extern void __seq0004(void);
extern void __exit_ok(void);
extern void __exit_bad(void);


/* Area for storing handler values */
extern Bit32u r3h_DS;
extern Bit32u r3h_ESP;
extern Bit32u r3h_ESP_empty;
extern Bit32u r3h_target_EIP;

/* Area for storing guest values */
extern Bit32u guest_SS;
extern Bit32u guest_ESP;
extern Bit32u guest_EIP;

#define R3H_STACK_SIZE 4096
extern unsigned char r3h_stack[];

#define GUEST_STACK_SIZE 4096
extern unsigned char guest_page0[];
extern unsigned char guest_page1[];
extern Bit8u guest_stack[];

void hack_guest_code(void);
void __execute_guest_code_native(void);

extern Bit8u __r3h_branch[];
extern Bit8u __r3h_prime[];
extern Bit8u __r3h_ret[];


/* The guest Linear to Meta index Hash table.  We need an efficient
 * hash table to store translations from guest linear page
 * addresses (upper 20 bits) to the DT meta page for that code page.
 * This is quite similar to an i-TLB use in the CPU, except that
 * rather than translate to physical addresses, we translate to
 * the index of the DT meta page.
 *
 * In the DT meta page, is a lookup table and other data specific
 * to that particular code page, which can be accessed to find the
 * address of a specific translated instruction if it exists.  Thus
 * when we encounter new guest instruction addresses which are not
 * in the G2T table, we have an efficient way to lookup the meta
 * info for that code page.
 *
 * This translation only concerns the upper 20bits, as the lower
 * 12bits are the page offset.  Bits 15..12 (4bits) from the linear address
 * are used to select the hash block used.  Bits 31..16 (16bits) are
 * stored in one of the translation pairs along with the corresponding
 * meta index (which is also 16bits).  Since both quantities of each
 * pair is 16bits, 8 pairs where chosen as the size of the hash block,
 * because this fits neatly into 1 cache line on the Pentium+.  Thus
 * DO NOT CHANGE the dimensions of this structure without considering
 * the data size issues.
 *
 *   bits 31..16: stored as tag
 *   bits 15..12: selects hash block [0..15]
 *   bits 11..00: (page offset not used)
 */

#define DT_L2MHashWidth 8
#define DT_LPAToMIHash(lpa) ((lpa) & 0xf)
#define DT_LPAToMITag(lpa) (((lpa) >> 4) & 0xffff)

typedef struct {
  Bit16u tag;
  Bit16u metai;
  } __attribute__ ((packed)) dtL2MHash_t[16][DT_L2MHashWidth];
/* 16*8*4 = 512bytes */



/* The guest Linear to Translated address Hash table.  Once instructions
 * have been translated and stored in the DT buffer, the address pairing
 * (guest and translated instruction addresses) can be stored in this
 * hash table.  For branch handling, this makes an efficient way to
 * determine the associated translation buffer address for a given
 * branch target address.  No extra protection checks are necessary
 * before the branch is executed.  To allow for this, the following
 * actions must occur:
 *
 *   - Buffer is completely invalidated for user<-->supervisor transitions
 *   - Buffer is completely invalidated for CS segment reloads
 */

#define DT_G2THashWidth     4 /* Fits in 1 Pentium+ cache line */
#define DT_G2THashHeight 8192 /* Need to tune this value */
#define DT_G2THashSelect(l) ( ((l)>>5) & 0x00001fff ) /* Need to tune this */
/* 8192*4*8 = 256k */

typedef struct {
  Bit32u gOff;
  Bit32u tOff;
  } __attribute__ ((packed)) dtG2THash_t[DT_G2THashHeight][DT_G2THashWidth];

typedef struct {
  Bit32u base;
  Bit32u limit;
  } descriptor_t;


/* ----------------------------------------------------------- */


#define DT_MetaTableElements 2 /* +++ for now */

extern dtL2MHash_t   dtL2MHash;
extern dtG2THash_t   dtG2THash;
extern dtMetaEntry_t dtMetaTable[];
extern unsigned dtMetaFreeIndex;

#define R3HToMonRequestG2T       10
#define R3HToMonRequestPanic     11
#define R3HToMonRequestTerminate 12
Bit32u r3hToMonRequest(unsigned req, Bit32u data);

extern void __mon2r3h(void);
extern void __r3h2mon(void);

extern Bit32u mon_ESP;

extern unsigned r3h_request;
extern Bit32u   r3h_data;
