// Copyright (C)  2000 Intel Corporation.  All rights reserved.
//
// $Header: /usr/development/orp/orp/arch/ia32/ia32_o1_jit/cmp_branch.cpp,v 1.2 2001/08/13 09:59:51 xhshi Exp $
//

#include "defines.h"
#include <iostream.h>
#include "code_emitter.h"
#include "stack.h"
#include "operand.h"
#include "lazy_code_selector.h"
#include "cmp_branch.h"
#include "fp_compatibility.h"

extern X86_CC branch_cc[];


X86_CC cc_lcmp_map[n_cc][3] = {
	{n_cc,  cc_ne, cc_eq},  // if (x != y)  bytecode ifeq
	{cc_ne, n_cc,  cc_ne},  // if (x == y)  bytecode ifne
	{cc_lt, cc_gt, cc_lt},  // if (x >= y)  bytecode iflt
	{cc_gt, cc_lt, cc_ge},  // if (x < y)   bytecode ifge
	{cc_gt, cc_lt, cc_gt},  // if (x <= y)  bytecode ifgt
	{cc_lt, cc_gt, cc_le},  // if (x > y)   bytecode ifle
};

void emit_cmp(Code_Emitter& emitter, Stack& stack,
			  Operand*& op1, Operand*& op2,
			  unsigned& commute, Reg_Operand *reg) {
	commute = 0;
	if (!op1->is_reg()) {
		// m r, i m, m m, i r, m i, i i
		if (op2->is_reg() || op2->is_mem()) {
			// m r --> r m
			// i m --> m i
			// m m --> m m
			// i r --> r i
			// commute
			commute = 1;
			Operand *o = op1;
			op1 = op2;
			op2 = o;
		} 
		// m i, i i are untouched
	}
	if (op1->is_reg()) {
		// r r, r m, r i
		Reg_Operand *op1_reg = (Reg_Operand*)op1;
		op2->emit_alu_inst(emitter,&op1_reg->opnd,cmp_opc);
	} else if (op1->is_mem()) {
		// m m, m i
		// load op2 into register and make into m r
		op2->emit_mov_to_reg(emitter,&reg->opnd);
		// m r --> r m
		op1->emit_alu_inst(emitter,&reg->opnd,cmp_opc);
		commute = !commute;
	} else {
		// i i
		op1->emit_mov_to_reg(emitter,&reg->opnd);
		op2->emit_alu_inst(emitter,&reg->opnd,cmp_opc);
	}
	op1->free_opnd(&stack.reg_manager);
	op2->free_opnd(&stack.reg_manager);
}

void gen_lcmp_br(Mem_Manager& mem_manager,
				 Code_Emitter& emitter,
				 Code_Patch*&  code_patch_list,
				 Map_Entry     *map,
				 X86_CC        cc, 
                 bool          bwd_branch,
                 int           label_bc_index,
				 int           commute,
				 bool          is_signed) {
	if (cc == n_cc) return;

	if (commute)
		cc = cc_commute_map[cc];
	if (bwd_branch) {
		//
		// backward branches do not require patching
		//
		unsigned target_offset = map[label_bc_index].offset;
		int disp = target_offset - emitter.get_offset();
		emitter.emit_branch(cc,disp,is_signed);
	} else {
		//
		// emit a branch with 32-bit offset that is later patched
		//
		emitter.emit_branch32(cc,&Imm_Opnd(0),is_signed);
		//
		// create a patch entry
		//
		unsigned patch_offset = emitter.get_offset() - 4;
		Branch_Patch *p = new(mem_manager)
			Branch_Patch(code_patch_list,patch_offset,map[label_bc_index].patch);
		code_patch_list = map[label_bc_index].patch = p;
	}
}
//
// for long comparison, we compare hi 32 bits then low 32 bits
// e.g.,  if (x > y) {   bytecode: aload(x)
//           ...                   aload(y)
//        } else {                 lcmp
//           ...                   ifle  --> else_label
//        }
//
// Assume that Xh and Xl are high and low 32 bit of X, respectively.
//        cmp Xh, Yh
//        jl  --> else_label
//        jg  --> if_label
//        cmp Xl, Yl
//        jle --> else_label
//
void emit_lcmp_br(Mem_Manager&  mem_manager,
				  Code_Emitter& emitter, Stack& stack,
				  Code_Patch*&  code_patch_list,
				  Map_Entry     *map,
				  unsigned char bytecode,
				  bool          bwd_branch,
				  unsigned      target_bc_index,
				  unsigned      fall_thru_index) {
	//
	// make sure that we have one register for generating cmp
	//
	Reg_Operand *reg = stack.reg_manager.get_reg();
	Operand *opnd1_lo, *opnd1_hi, *opnd2_lo, *opnd2_hi;
	stack.pop64(opnd2_lo, opnd2_hi);
	stack.pop64(opnd1_lo, opnd1_hi);
	//
	// spill all operands at the end of block
	// home_all may need one reg.  Here we free reg so that home_all
	// can use it as a temp reg for homing
	//
	reg->free_opnd(&stack.reg_manager);
	stack.home_all();
	reg = stack.reg_manager.get_reg();
	unsigned commute;
	X86_CC *br_code = cc_lcmp_map[bytecode - 0x99];
	//
	// compare hi 32 bits
	//
	emit_cmp(emitter,stack,opnd1_hi,opnd2_hi,commute,reg);
	gen_lcmp_br(mem_manager,emitter,code_patch_list,map,
		br_code[0],bwd_branch,target_bc_index,commute,true);
	gen_lcmp_br(mem_manager,emitter,code_patch_list,map,
		br_code[1],0,fall_thru_index,commute,true);
	//
	// compare low 32 bits
	//
	emit_cmp(emitter,stack,opnd1_lo,opnd2_lo,commute,reg);
	gen_lcmp_br(mem_manager,emitter,code_patch_list,map,
		br_code[2],bwd_branch,target_bc_index,commute,false);
	reg->free_opnd(&stack.reg_manager);
}

void emit_br(Mem_Manager&  mem_manager,
             Code_Emitter& emitter,
		     Code_Patch*&  code_patch_list,
             Map_Entry     *map, 
             X86_CC        cc,
             unsigned      target_bc_index,
             bool          is_bwd_branch,
             bool          is_signed) {
	if (is_bwd_branch) {
		//
		// backward branches do not require patching
		//
		unsigned target_offset = map[target_bc_index].offset;
		int disp = target_offset - emitter.get_offset();
		emitter.emit_branch(cc,disp,is_signed);
	} else {
		//
		// emit a branch with 32-bit offset that is later patched
		//
		emitter.emit_branch32(cc,&Imm_Opnd(0),is_signed);
		//
		// create a patch entry
		//
		unsigned patch_offset = emitter.get_offset() - 4;
		Branch_Patch *p = new(mem_manager)
			Branch_Patch(code_patch_list,patch_offset,map[target_bc_index].patch);
		code_patch_list = map[target_bc_index].patch = p;
	}
}


//  fld    [eax + 4]   --- load src1 unto fp stack
//  fcomp  [eax + 8]   --- compare src1, src2
//  fnstsw eax         --- store fp status word to eax without checking
//                         unmasked floating-point error condition
//  sahf               --- loads SF, ZF, AF, PF, and CF flags with values
//                         from the eax (AH)
void emit_fdcmp_br(Mem_Manager&           mem_manager,
				  Code_Emitter&           emitter, 
                  Stack&                  stack,
				  Code_Patch*&            code_patch_list,
				  Map_Entry               *map, 
				  unsigned char           curr_bc,
                  unsigned char           prev_bc,
				  bool                    is_bwd_br,
				  unsigned                target_bc_index,
                  unsigned                fall_bc_index,
		          Pre_Alloc_Operand_Pool& op_pool) {
    bool is_double =  prev_bc == 0x97 || prev_bc == 0x98; // dcmpl, dcmpg
	Operand *src1, *src2, *src1_hi, *src2_hi;
	if (!is_double) {
		src2 = stack.pop();
		src1 = stack.pop();
	} else {
		stack.pop64(src2,src2_hi);
		stack.pop64(src1,src1_hi);
	}
	//
	// spill all operands at the end of block
	//
	stack.home_all(); 

    if (stack.fp_strict_mode)
    {
	    src1->free_opnd(&stack.reg_manager);
	    src2->free_opnd(&stack.reg_manager);
	    Mem_Operand *m_src1 = (Mem_Operand*)src1;
	    Mem_Operand *m_src2 = (Mem_Operand*)src2;
	    emitter.emit_fld(m_src1->mem_opnd(),is_double);
	    emitter.emit_fp_op_mem(fcomp_opc,m_src2->mem_opnd(),is_double);
	    emitter.emit_fnstsw();
    }
    else
    {
        //
        // may need to consider using fcomip which sets ZF, PF, CF
        //
        load_onto_fp_stack(stack, src1, is_double);
        if (src2->is_mem()) {
	        src2->free_opnd(&stack.reg_manager);
	        Mem_Operand *m_src2 = (Mem_Operand*)src2;
	        emitter.emit_fp_op_mem(fcomp_opc,m_src2->mem_opnd(),is_double);
        } else {
		    assert(src2->kind == Operand::Fp);
            //
            // if both src1 and src2 are FP_Operands, src2 is on the top
            // of the fp stack, st(0).
            //
            if (src1->kind == Operand::Fp)
                emitter.emit_fxch(1);  // exchange st(0) and st(1)
            emitter.emit_fcompp();
	    }
	    emitter.emit_fnstsw();
    }
    
    Reg_Operand *reg = stack.reg_manager.get_reg(eax_reg);
	emitter.emit_sahf();

    bool is_cmpg = prev_bc == 0x96 || prev_bc == 0x98; // fcmpg, dcmpg
    X86_CC cc = branch_cc[curr_bc-0x99];
    switch (curr_bc)
    {
        case 0x99: // ifeq
            // jp fallthrough
            // je target
            emit_br(mem_manager,emitter,code_patch_list,map,cc_p,fall_bc_index,false,false);
            emit_br(mem_manager,emitter,code_patch_list,map,cc,target_bc_index,is_bwd_br,false);
            break;
        case 0x9a:	// ifne
            // jne target
            // jp  target
            emit_br(mem_manager,emitter,code_patch_list,map,cc,target_bc_index,is_bwd_br,false);
            emit_br(mem_manager,emitter,code_patch_list,map,cc_p,target_bc_index,is_bwd_br,false);
            break;
        case 0x9b: // iflt
        case 0x9e: // ifle
            // jp     fallthrough (cmpg)
            // jb/jbe target
            if (is_cmpg)
                emit_br(mem_manager,emitter,code_patch_list,map,cc_p,fall_bc_index,false,false);
            emit_br(mem_manager,emitter,code_patch_list,map,cc,target_bc_index,is_bwd_br,false);
            break;
        case 0x9c: // ifge
        case 0x9d: // ifgt
            // jae/ja target
            // jp  target (cmpg)
            emit_br(mem_manager,emitter,code_patch_list,map,cc,target_bc_index,is_bwd_br,false);
            if (is_cmpg)
                emit_br(mem_manager,emitter,code_patch_list,map,cc_p,target_bc_index,is_bwd_br,false);
            break;
        default:
            assert(0);
    }
}


extern void emit_cmp_zero(Code_Emitter& emitter,Stack& stack,unsigned char bytecode);
void emit_cmp0_br(Mem_Manager& mem_manager,
				  Code_Emitter& emitter, Stack& stack,
				  Code_Patch*& code_patch_list,
				  Map_Entry *map,
				  unsigned char bytecode,
				  bool bwd_branch,
        		  unsigned target_bc_index) {
    emit_cmp_zero(emitter,stack,bytecode);
    emit_br(mem_manager,emitter,code_patch_list,map,branch_cc[bytecode-0x99],
            target_bc_index,bwd_branch,true);
}

//
// Pop two operands, value2(top) and value1.
// If value1 >  value2, then push 1  onto the stack.
// If value1 == value2, then push 0  onto the stack.
// If value1 <  value2, then push -1 onto the stack.
//          cmp v1h,v2h
//          jg  push_1 
//          jl  push_m1
//          cmp v1l,v2l
//          jg  push_1
//          jl  push_m1
//          mov eax,0
//          j   cont
// push_1:  mov eax,1
//          j   cont
// push_m1: mov eax,-1
// cont:
//
static void emit_lcmp(Code_Emitter& emitter, 
                      Stack&        stack)
{
	//
	// make sure that we have one register for generating cmp
	//
	Reg_Operand *reg = stack.reg_manager.get_reg();
	Operand *opnd1_lo, *opnd1_hi, *opnd2_lo, *opnd2_hi;
	stack.pop64(opnd2_lo, opnd2_hi);
	stack.pop64(opnd1_lo, opnd1_hi);

    unsigned commute;
	//
	// compare hi 32 bits
	//
    X86_CC cc;
	emit_cmp(emitter,stack,opnd1_hi,opnd2_hi,commute,reg);
    //
    // jg  --> push 1
    //
    cc = (commute) ? cc_commute_map[cc_gt] : cc_gt;
	emitter.emit_branch(cc,0,true); // signed
	int   push_1_off1 = emitter.get_offset();
	char *push_1_pos1 = emitter.get_next() - 1;
    //
    // jl --> push -1
    //
    cc = (commute) ? cc_commute_map[cc_lt] : cc_lt;
	emitter.emit_branch(cc,0,true); // signed 
	int   push_m1_off1 = emitter.get_offset();
	char *push_m1_pos1 = emitter.get_next() - 1;
    //
    // compare low 32
    //
	emit_cmp(emitter,stack,opnd1_lo,opnd2_lo,commute,reg);
    //
    // jg  --> push 1
    //
    cc = (commute) ? cc_commute_map[cc_gt] : cc_gt;
	emitter.emit_branch(cc,0,false); // unsigned
	int   push_1_off2 = emitter.get_offset();
	char *push_1_pos2 = emitter.get_next() - 1;
    //
    // jl --> push -1
    //
    cc = (commute) ? cc_commute_map[cc_lt] : cc_lt;
	emitter.emit_branch(cc,0,false); // unsigned
	int   push_m1_off2 = emitter.get_offset();
	char *push_m1_pos2 = emitter.get_next() - 1;
    //
    // value1 and value2 are equal
    // reg = 0;
    //
    emitter.emit_mov(&reg->opnd,&Imm_Opnd(0));
    emitter.emit_jump8(&Imm_Opnd(0));
    int   jmp_eq_off = emitter.get_offset();
    char *jmp_eq_pos = emitter.get_next() - 1;
    //
    // value2 > value1
    // reg = 1;
    //
    // patch correct branch offset
    //
    int curr_off = emitter.get_offset();
    *push_1_pos1 = curr_off - push_1_off1;
    *push_1_pos2 = curr_off - push_1_off2;
    emitter.emit_mov(&reg->opnd,&Imm_Opnd(1));
    emitter.emit_jump8(&Imm_Opnd(0));
    int   jmp_gt_off = emitter.get_offset();
    char *jmp_gt_pos = emitter.get_next() - 1;
    //
    // value2 < value1
    // reg = -1
    curr_off = emitter.get_offset();
    *push_m1_pos1 = curr_off - push_m1_off1;
    *push_m1_pos2 = curr_off - push_m1_off2;
    emitter.emit_mov(&reg->opnd,&Imm_Opnd(-1));
    //
    // patch correct jump offset
    //
    curr_off = emitter.get_offset();
    *jmp_eq_pos = curr_off - jmp_eq_off;
    *jmp_gt_pos = curr_off - jmp_gt_off;
    
    stack.push(reg); // push result onto the stack
}

//
//  fld    [eax + 4]   --- load src1 unto fp stack
//  fcomp  [eax + 8]   --- compare src1, src2
//  fnstsw eax         --- store fp status word to eax without checking
//                         unmasked floating-point error condition
//  sahf               --- loads SF, ZF, AF, PF, and CF flags with values
//                         from the eax (AH)
//
void emit_fdcmp(Code_Emitter& emitter, 
                Stack&        stack,
                bool          is_double,
                Pre_Alloc_Operand_Pool& op_pool) {
	Operand *src1, *src2, *src1_hi, *src2_hi;
	if (!is_double) {
		src2 = stack.pop();
		src1 = stack.pop();
	} else {
		stack.pop64(src2,src2_hi);
		stack.pop64(src1,src1_hi);
	}

    if (stack.fp_strict_mode)
    {
	    src1->free_opnd(&stack.reg_manager);
	    src2->free_opnd(&stack.reg_manager);
	    Mem_Operand *m_src1 = (Mem_Operand*)src1;
	    Mem_Operand *m_src2 = (Mem_Operand*)src2;
	    emitter.emit_fld(m_src1->mem_opnd(),is_double);
	    emitter.emit_fp_op_mem(fcomp_opc,m_src2->mem_opnd(),is_double);
	    emitter.emit_fnstsw();
    }
    else
    {
        load_onto_fp_stack(stack, src1, is_double);

        if (src2->is_mem()) {
	        src2->free_opnd(&stack.reg_manager);
	        Mem_Operand *m_src2 = (Mem_Operand*)src2;
	        emitter.emit_fp_op_mem(fcomp_opc,m_src2->mem_opnd(),is_double);
        } else {
		    assert(src2->kind == Operand::Fp);
            emitter.emit_fp_op(fcomp_opc,1);
	        emitter.emit_fstp(0);
	    }
	    emitter.emit_fnstsw();
    }
    
    Reg_Operand *reg = stack.reg_manager.get_reg(eax_reg);
	emitter.emit_sahf();
}

//
// If value1 >  value2, then push 1  onto the stack.
// If value1 == value2, then push 0  onto the stack.
// If value1 <  value2, then push -1 onto the stack.
//
static void emit_fdcmp(Code_Emitter& emitter,
                       Stack&        stack,
                       Pre_Alloc_Operand_Pool& op_pool,
                       bool          is_double,
                       bool          is_cmpg)
{
    //
    // do floating-point comparison and set status flags
    //
    emit_fdcmp(emitter,stack,is_double,op_pool);
    //
    // determine the result (1, 0, or -1)
    //
    emitter.emit_branch(cc_p,0,false); // test NaN case
	int   nan_br_off = emitter.get_offset();
	char *nan_br_pos = emitter.get_next() - 1;
    //
    // test 'value1 > value2'
    //
    emitter.emit_branch(cc_gt,0,false); // test > case
	int   gt_br_off = emitter.get_offset();
	char *gt_br_pos = emitter.get_next() - 1;
    //
    // test 'value1 < value2'
    //
    emitter.emit_branch(cc_lt,0,false); // test < case
	int   lt_br_off = emitter.get_offset();
	char *lt_br_pos = emitter.get_next() - 1;
    //
    // value1 == value2 (result = 0)
    //
	Reg_Operand *reg = stack.reg_manager.get_reg();
    emitter.emit_mov(&reg->opnd,&Imm_Opnd(0));
    emitter.emit_jump8(&Imm_Opnd(0));
    int   jmp_eq_off = emitter.get_offset();
    char *jmp_eq_pos = emitter.get_next() - 1;
    //
    // set result = 1
    //
    *gt_br_pos = emitter.get_offset() - gt_br_off; // patch correct branch off
    if (is_cmpg)
        *nan_br_pos = emitter.get_offset() - nan_br_off;
    emitter.emit_mov(&reg->opnd,&Imm_Opnd(1));
    emitter.emit_jump8(&Imm_Opnd(0));
    int   jmp_gt_off = emitter.get_offset();
    char *jmp_gt_pos = emitter.get_next() - 1;
    //
    // set result = -1
    //
    *lt_br_pos = emitter.get_offset() - lt_br_off; // patch correct branch off
    if (!is_cmpg)
        *nan_br_pos = emitter.get_offset() - nan_br_off;
    emitter.emit_mov(&reg->opnd,&Imm_Opnd(-1));
    //
    // patch all jump offsets
    //
    int curr_off = emitter.get_offset();
    *jmp_eq_pos = curr_off - jmp_eq_off;
    *jmp_gt_pos = curr_off - jmp_gt_off;

    stack.push(reg); // push result onto the stack
}

//
// push the result (1,-1, or 0) of the comparison onto the Java stack
//
void emit_compare(Code_Emitter& em, 
                  Stack&        stack,
                  unsigned char bc,
                  Pre_Alloc_Operand_Pool& op_pool)
{
    if (bc == 0x94)       // lcmp
        emit_lcmp(em,stack);
    else if (bc == 0x95)  // fcmpl
        emit_fdcmp(em,stack,op_pool,false,false);
    else if (bc == 0x96)  // fcmpg
        emit_fdcmp(em,stack,op_pool,false,true);
    else if (bc == 0x97)  // dcmpl
        emit_fdcmp(em,stack,op_pool,true,false);
    else if (bc == 0x98)  // dcmpg
        emit_fdcmp(em,stack,op_pool,true,true);
    else
        assert(0);
}

//
// if{eq,ne,lt,ge,gt,le} int comparisons against zero
//
void emit_ifxx(Mem_Manager&  mm,
               Code_Emitter& emitter,
               Stack&        stack,
               Code_Patch*&  code_patch_list,
               Map_Entry     *map, 
               X86_CC        cc,
               bool          is_bwd_br,
               unsigned      target_bc_index)
{
    Operand *src1 = stack.pop();
    Imm_Operand imm0(0);
    Operand *src2 = &imm0;
	//
	// spill all operands at the end of block
	//
	stack.home_all(); 
    //
    // compare against 0
    //
	Reg_Operand *reg = stack.reg_manager.get_reg();
    unsigned commute;
	emit_cmp(emitter,stack,src1,src2,commute,reg);
	reg->free_opnd(&stack.reg_manager);
    //
    // emit branch
    //
    if (commute)
        cc = cc_commute_map[cc];
    emit_br(mm,emitter,code_patch_list,map,cc,target_bc_index,is_bwd_br,true);
}
