// Copyright (C)  2000 Intel Corporation.  All rights reserved.
//
// $Header: /usr/development/orp/orp/arch/ia32/ia32_o3_jit/fpspill.cpp,v 1.2 2001/08/13 09:54:55 xhshi Exp $
//


#include "defines.h"
#include <assert.h>
#include "fpspill.h"

// Looks at inst->region_live_ranges_ended to see whether this operand is the last use
// within the region.
static bool is_last_use_region(Inst *inst, Operand *opnd)
{
    unsigned srcno;
    for (srcno=0; srcno<inst->n_srcs; srcno++)
    {
        if (inst->src(srcno) == opnd)
            break;
    }
    assert(srcno < inst->n_srcs);
    assert(opnd->is_reg());
    unsigned char ended =
        (((Reg_Operand *)opnd)->is_globally_allocated_fp() ?
        inst->live_ranges_ended : inst->region_live_ranges_ended);
    return (ended & (1u << (2 * srcno + 2))) != 0;
}

static bool is_last_use(Inst *inst, Operand *opnd)
{
    unsigned srcno;
    for (srcno=0; srcno<inst->n_srcs; srcno++)
    {
        if (inst->src(srcno) == opnd)
            break;
    }
    assert(srcno < inst->n_srcs);
    return (inst->live_ranges_ended & (1u << (2 * srcno + 2))) != 0;
}

static void SET_BC(Inst *new_inst, Inst *old_inst, bool is_prepended)
{
    if (new_inst->is_assignment())
    {
        new_inst->dont_eliminate();
    }
#ifdef PRINTABLE_O3
    new_inst->bc_index = old_inst->bc_index;
    new_inst->is_spill = (is_prepended ? -1 : 1);
#endif // PRINTABLE_O3
}

static bool needs_spilling(Inst *inst)
{
    if (inst->fp_spilled())
        return false;
    if (inst->n_srcs == 0)
        return false;
    if (inst->is_push() && inst->src(0)->type == JIT_TYPE_DOUBLE)
        return true;
    Operand *dst = inst->dst();
    if (dst == NULL)
        return false;
    if (dst->is_status_flags() && IS_FP_DBL_TYPE(inst->src(0)->type))
        return true;
    if (dst != NULL && !IS_FP_DBL_TYPE(dst->type))
        return false;
    Operand *src1 = inst->src(0);
    Operand *src2 = NULL;
    if (inst->n_srcs > 1)
        src2 = inst->src(1);
    assert(inst->n_srcs < 3);
    if (dst == NULL)
    {
        if (!IS_FP_DBL_TYPE(src1->type))
            return false;
        if (src2 != NULL && !IS_FP_DBL_TYPE(src2->type))
            return false;
    }
    return true;
}

static void spill_stk0_without_popping(Inst *inst, Spill_Closure *sc)
{
    Reg_Operand *fpstk0 = sc->fpstk0;
    Assign_Inst *ainst = new(sc->mm) Assign_Inst(sc->fpstk.stack_loc(0), fpstk0, inst->exp, inst);
    ainst->set_no_pop_on_fp_store();
    SET_BC(ainst, inst, true);
    sc->fpstk.set_dirty(false);
}

static void spill_if_dirty_and_last_use(Operand *opnd, Inst *inst, Spill_Closure *sc)
{
    // It isn't on the fp stack if it isn't a register operand.
    if (!opnd->is_reg())
        return;
    Reg_Operand *ropnd = (Reg_Operand *) opnd;
    // Find out whether it's on the fp stack.
    unsigned stk_loc = sc->fpstk.find_operand(ropnd);
    // Do nothing if it's not on the fp stack.
    if (stk_loc == -1)
        return;
    // Do nothing if it's not dirty.
    if (!sc->fpstk.is_dirty(stk_loc))
        return;
    // Do nothing if it's not the last use in the region.
    if (!is_last_use_region(inst, opnd))
        return;
    // If it's no longer live globally, then there's no reason to spill it.
    if (is_last_use(inst, opnd))
        return;
    // Swap it to stk(0).  It shouldn't already be there.
    assert(stk_loc != 0);
    // Possible optimization: if stk(0) contains the other operand,
    // then swap stk(0) and stk(1) first.
    Fp_Stk *fpstk0 = sc->fpstk0;
    Fp_Stk *fpstkn = sc->exprs.fp_stk_opnd(stk_loc);
    // Swap stk(0) and stk(stk_loc).
    sc->fpstk.swap(stk_loc);
    stk_loc = 0;
    SET_BC(new(sc->mm) Fxch_Inst(fpstk0, fpstkn, inst->exp, inst), inst, true);
    // Write stk(0) to memory/fpstk without popping, and mark as clean.
    Assign_Inst *ainst = new(sc->mm) Assign_Inst(opnd, fpstk0, inst->exp, inst);
    ainst->set_no_pop_on_fp_store();
    SET_BC(ainst, inst, true);
    sc->fpstk.set_dirty(false);
}

static void spill_dirty_operands(Inst *inst, Spill_Closure *sc)
{
    if (sc->fpstk.is_empty())
        return;
    Operand *src1 = inst->src(0);
    Operand *src2 = NULL;
    if (inst->n_srcs > 1)
        src2 = inst->src(1);

    // Spill anything in stk(0).
    Reg_Operand *stk0 = sc->fpstk.stack_loc(0);
    if ((stk0 == src1 || stk0 == src2) &&
        sc->fpstk.is_dirty(0) &&
        is_last_use_region(inst, stk0) &&
        !is_last_use(inst, stk0))
    {
        // write stk(0) to memory/fpstk without popping, mark as clean
        spill_stk0_without_popping(inst, sc);
    }

    spill_if_dirty_and_last_use(src1, inst, sc);
    if (src2 != NULL)
        spill_if_dirty_and_last_use(src2, inst, sc);
}

extern bool O3_is_PPro;
// Which operand will be operated on directly from memory.
static Operand *memory_designated_operand(Inst *inst, Spill_Closure *sc)
{
    // The unary instructions (fneg, fsqrt, etc.) and the fprem instruction
    // require all their operands to be on the FP stack, so none can be
    // used from memory.
    if (inst->is_fprem() || inst->info()->is_unary)
        return NULL;
    if (O3_is_PPro && inst->is_compare())
        return NULL;
    Operand *src1 = inst->src(0);
    // Usually, if the first source operand is a Mem_Operand, it can go
    // ahead and be the mdo.  However, a compare instruction requires the
    // first souce operand to be st(0).  (Unless we commute the condition...)
    if (!src1->is_reg() && !inst->is_compare())
        return src1;
    Operand *src2 = NULL;
    if (inst->n_srcs > 1)
        src2 = inst->src(1);
    // Second source operand is mdo if it's a Mem_Operand.
    if (src2 != NULL && !src2->is_reg())
        return src2;
    // If both operands are Reg_Operands, and they are the same, we definitely
    // have to load one onto the stack.
    if (src1 == src2)
        return NULL;
    // For a reg=reg assignment, make sure the RHS is on the stack.
    if (inst->is_assignment())
        return NULL;
    Reg_Operand *rsrc1 = (Reg_Operand *) src1;
    Reg_Operand *rsrc2 = (Reg_Operand *) src2;
    // Now src1 is a register, and src2 is either NULL or a register.
    // If one of the operands is not on the FP stack, and this use is the last
    // use in the region, then there's no point loading it onto the FP stack.
    // Note the special condition for a compare instruction, where the first
    // operand must be in st(0).
    if (is_last_use_region(inst, src1) && sc->fpstk.find_operand(rsrc1) == -1 && !inst->is_compare())
        return src1;
    if (src2 != NULL && is_last_use_region(inst, src2) && sc->fpstk.find_operand(rsrc2) == -1)
        return src2;
    return NULL;
}

static void bring_operand_onto_fp_stack(Inst *inst, Spill_Closure *sc,
                                        Operand *opnd, Operand *mdo,
                                        Operand *src1, Operand *src2)
{
    if (opnd == mdo)
        return;
    if (!opnd->is_reg())
        return;
    Reg_Operand *ropnd = (Reg_Operand *) opnd;
    if (sc->fpstk.find_operand(ropnd) != -1)
        return;
    Fp_Stk *fpstk0 = sc->fpstk0;
    // It's OK to do the push before the assign because the operand is
    // guaranteed to be in memory.
    sc->fpstk.push(ropnd, src1, src2, inst, sc->exprs, sc->mm);
    SET_BC(new(sc->mm) Assign_Inst(fpstk0, opnd, inst->exp, inst), inst, true);
}

// Load anything onto the stack that isn't there already.
static void bring_operands_onto_fp_stack(Inst *inst, Spill_Closure *sc, Operand *mdo,
                                         Operand *src1, Operand *src2)
{
    bring_operand_onto_fp_stack(inst, sc, inst->src(0), mdo, src1, src2);
    if (inst->n_srcs > 1)
        bring_operand_onto_fp_stack(inst, sc, inst->src(1), mdo, src1, src2);
}

// XXX- this breaks if the result is dirty and !is_last_use().
static Operand *operand_to_consume(Inst *inst, Spill_Closure *sc, Operand *&other_to_consume,
                                   Operand *mdo)
{
    Operand *src1 = inst->src(0);
    Operand *src2;
    Operand *result = NULL;
    other_to_consume = NULL;
    if (src1->is_reg())
    {
        Reg_Operand *rsrc1 = (Reg_Operand *) src1;
        if (src1 != mdo && is_last_use_region(inst, src1))
            result = src1;
    }
    if (inst->n_srcs > 1)
    {
        src2 = inst->src(1);
        if (src2->is_reg())
        {
            Reg_Operand *rsrc2 = (Reg_Operand *) src2;
            if (src2 != mdo && is_last_use_region(inst, src2))
            {
                if (result == NULL)
                    result = src2;
                else if (src2 != result)  // src1 and src2 can be identical
                    other_to_consume = src2;
            }
            if ((inst->is_compare() || inst->is_fprem()) && result == src2 &&
                other_to_consume == NULL && src1 != src2)
            {
                other_to_consume = result;
                result = NULL;
            }
        }
    }
    if (inst->is_fprem() && src1 == src2)
        other_to_consume = result;
    return result;
}

static void duplicate_operands(Inst *inst, Spill_Closure *sc, Operand *&consume,
                               Operand *mdo, Operand *old_dst)
{
    if (inst->is_fprem() && inst->src(0) == inst->src(1))
    {
        Operand *src = inst->src(0);
        assert(src->is_reg());
        Reg_Operand *rsrc = (Reg_Operand *) src;
        unsigned loc = sc->fpstk.find_operand(rsrc);
        assert(loc != -1);
        Fp_Stk *fpstk0 = sc->fpstk0;
        Fp_Stk *fpstkn = sc->exprs.fp_stk_opnd(loc);
        sc->fpstk.push(rsrc, src, src, inst, sc->exprs, sc->mm);
        SET_BC(new(sc->mm) Assign_Inst(fpstk0, fpstkn, inst->exp, inst), inst, true);
        return;
    }
    if (consume != NULL)
        return;
    Operand *src1 = inst->src(0);
    Operand *src2 = NULL;
    // For a reg=reg assignment statement, we need to duplicate src0 if it's not the
    // last regional use of dst and it's not the last regional use of src0.
    if (inst->is_assignment() &&
        (!old_dst->is_reg() || !src1->is_reg() ||
         inst->region_dst_live_range_ends || is_last_use_region(inst, src1)))
        return;
    if (inst->n_srcs > 1)
        src2 = inst->src(1);
    if (src1 != mdo && src1->is_reg())
    {
        Reg_Operand *rsrc1 = (Reg_Operand *) src1;
        if (!is_last_use_region(inst, src1))
        {
            Fp_Stk *fpstk0 = sc->fpstk0;
            // We first push rsrc1 and then pop it again, just to generate the necessary
            // spill code in case the FP stack is full.
            sc->fpstk.push(rsrc1, src1, src2, inst, sc->exprs, sc->mm);
            sc->fpstk.pop();
            // Then we find where rsrc1 is on the FP stack, and generate the assignment.
            Fp_Stk *fpstkn = sc->exprs.fp_stk_opnd(sc->fpstk.find_operand(rsrc1));
            sc->fpstk.push(rsrc1, src1, src2, inst, sc->exprs, sc->mm);
            SET_BC(new(sc->mm) Assign_Inst(fpstk0, fpstkn, inst->exp, inst), inst, true);
            consume = src1;
            return;
        }
    }
    if (inst->n_srcs > 1)
    {
        if (src2 != consume && src2 != mdo && src2->is_reg())
        {
            Reg_Operand *rsrc2 = (Reg_Operand *) src2;
            if (!is_last_use_region(inst, src2))
            {
                Fp_Stk *fpstk0 = sc->fpstk0;
                sc->fpstk.push(rsrc2, src1, src2, inst, sc->exprs, sc->mm);
                sc->fpstk.pop();
                Fp_Stk *fpstkn = sc->exprs.fp_stk_opnd(sc->fpstk.find_operand(rsrc2));
                sc->fpstk.push(rsrc2, src1, src2, inst, sc->exprs, sc->mm);
                SET_BC(new(sc->mm) Assign_Inst(fpstk0, fpstkn, inst->exp, inst), inst, true);
                if (consume == NULL)
                    consume = src2;
                return;
            }
        }
    }
}

// All the operands that need to be on the FP stack are currently there, but possibly
// not in the right positions.  The goal here is to swap them into the right positions.
// For example, fprem requires src1 to be in stk0 and src2 to be in stk1.
// For other instructions, one of the source operands must be in stk0.
static bool prepare_operands(Inst *inst, Spill_Closure *sc, Operand *consume, Operand *mdo)
{
    if (inst->is_fprem())
    {
        assert(mdo == NULL);
        Operand *src1 = inst->src(0);
        Operand *src2 = inst->src(1);
        assert(consume == NULL || consume == src1);
        assert(src1->is_reg());
        assert(src2->is_reg());
        Reg_Operand *rsrc1 = (Reg_Operand *) src1;
        Reg_Operand *rsrc2 = (Reg_Operand *) src2;
        unsigned src1loc = sc->fpstk.find_operand(rsrc1);
        unsigned src2loc = sc->fpstk.find_operand(rsrc2);
        assert(src1loc != -1);
        assert(src2loc != -1);
        if (src1loc == 0 && src2loc == 1 && consume == src1)  // everything is in the right place
            return true;
        Fp_Stk *fpstk0 = sc->fpstk0;
        Fp_Stk *fpstkn;
        if (consume == NULL)
        {
            // If neither operand is consumed, swap src2 into stk0 and then load src1 into stk0.
            // First get src2 into stk0, then load src1.
            if (src2loc != 0)
            {
                fpstkn = sc->exprs.fp_stk_opnd(src2loc);
                // Swap stk(0) and stk(src2loc).
                sc->fpstk.swap(src2loc);
                SET_BC(new(sc->mm) Fxch_Inst(fpstk0, fpstkn, inst->exp, inst), inst, true);
                src1loc = sc->fpstk.find_operand(rsrc1);
            }
            fpstkn = sc->exprs.fp_stk_opnd(src1loc);
            SET_BC(new(sc->mm) Assign_Inst(fpstk0, fpstkn, inst->exp, inst), inst, true);
            return true;
        }
        if (src2loc != 1)
        {
            // If src2 isn't already in stk1, swap it into stk0 and then swap with stk1.
            if (src2loc != 0)
            {
                fpstkn = sc->exprs.fp_stk_opnd(src2loc);
                // Swap stk(0) and stk(src2loc).
                sc->fpstk.swap(src2loc);
                SET_BC(new(sc->mm) Fxch_Inst(fpstk0, fpstkn, inst->exp, inst), inst, true);
                src2loc = 0;
            }
            // Now swap stk(0) and stk(1)
            fpstkn = sc->exprs.fp_stk_opnd(1);
            sc->fpstk.swap(1);
            SET_BC(new(sc->mm) Fxch_Inst(fpstk0, fpstkn, inst->exp, inst), inst, true);
            src1loc = sc->fpstk.find_operand(rsrc1);
            src2loc = 1;
        }
        // Finally, swap stk(0) and src1.
        if (src1loc != 0)
        {
            fpstkn = sc->exprs.fp_stk_opnd(src1loc);
            sc->fpstk.swap(src1loc);
            SET_BC(new(sc->mm) Fxch_Inst(fpstk0, fpstkn, inst->exp, inst), inst, true);
        }
        assert(sc->fpstk.stack_loc(0) == rsrc1);
        assert(sc->fpstk.stack_loc(1) == rsrc2);
#if 0
        assert(sc->fpstk.find_operand(rsrc1) == 0);
        assert(sc->fpstk.find_operand(rsrc2) == 1);
#endif // 0
        return true;
    }
    // If it's "a=b" where b's live range ends and "a" is used later, don't bother to
    // do a useless fxch.
    if (inst->is_assignment() && inst->dst()->is_reg() && inst->src(0)->is_reg() &&
        !inst->region_dst_live_range_ends && is_last_use_region(inst, inst->src(0)))
        return false;
    // If one of the operands is to be consumed, we can just swap it into stk(0)
    // and use the other operand directly where it is.
    // And if the other operand happens to already be in stk(0), we can use the form
    // of the FP operation where the second operand is overwritten.
    if (consume != NULL)
    {
        unsigned cloc = sc->fpstk.find_operand((Reg_Operand *)consume);
        assert(cloc != -1);
        if (cloc != 0)
        {
            if (inst->n_srcs > 1 && !inst->dst()->is_status_flags() &&
                !inst->region_dst_live_range_ends)
            {
                Operand *src = inst->src(0);
                if (src != consume && src->is_reg() && sc->fpstk.find_operand((Reg_Operand *)src) == 0)
                    return false;
                src = inst->src(1);
                if (src != consume && src->is_reg() && sc->fpstk.find_operand((Reg_Operand *)src) == 0)
                    return false;
            }
            Fp_Stk *fpstk0 = sc->fpstk0;
            Fp_Stk *fpstkn = sc->exprs.fp_stk_opnd(cloc);
            // Swap stk(0) and stk(cloc).
            sc->fpstk.swap(cloc);
            SET_BC(new(sc->mm) Fxch_Inst(fpstk0, fpstkn, inst->exp, inst), inst, true);
        }
        return true;
    }
    // None of the operands is supposed to be consumed, so we pick a non-mdo operand
    // and copy it into stk(0).
    Operand *opnd = inst->src(0);
    if (opnd == mdo)
    {
        if (inst->n_srcs < 2)
            return true;
        opnd = inst->src(1);
    }
    assert(opnd != mdo);
    assert(opnd->is_reg());
    Reg_Operand *ropnd = (Reg_Operand *) opnd;
    unsigned loc = sc->fpstk.find_operand(ropnd);
    assert(loc != -1);
    // The proper number of each operand should already be on the stack.  Therefore,
    // we only need to swap one of the operands into stk0 if it's not already there.
    if (loc != 0)
    {
        Fp_Stk *fpstk0 = sc->fpstk0;
        Fp_Stk *fpstkn = sc->exprs.fp_stk_opnd(loc);
        SET_BC(new(sc->mm) Fxch_Inst(fpstk0, fpstkn, inst->exp, inst), inst, true);
        sc->fpstk.swap(loc);
    }
    return true;
}

static void consume_remaining(Inst *inst, Spill_Closure *sc, Operand *opnd, Inst *nxt)
{
    if (opnd == NULL)
        return;
    assert(opnd->is_reg());
    Reg_Operand *ropnd = (Reg_Operand *) opnd;
    unsigned oploc = sc->fpstk.find_operand(ropnd);
    assert(oploc != -1);
    Fp_Stk *fpstk0 = sc->fpstk0;
    Fp_Stk *fpstkn = sc->exprs.fp_stk_opnd(oploc);

    // It should be the case that dst, src1, and src2 are all fpstk operands,
    // and that dst is the same as either src1 or src2.  Also, either src1 or
    // src2 should be stk(0).
    // (Actually, dst could also be Status.)
    // If dst is NOT stk(0), then we just set the "pop" flag of the instruction.
    // If dst IS stk(0), then we change dst to be the other fpstk location, and
    // also set the "pop" flag of the instruction.
    if (!inst->is_compare() && !inst->is_fprem())
    {
        Operand *src1 = inst->src(0);
        Operand *src2 = inst->src(1);
        assert(src1->is_fp_stk());
        assert(src2->is_fp_stk());
        Operand *dst = inst->dst();
        assert(dst->is_fp_stk());
        Fp_Stk *fdst = (Fp_Stk *)dst;
        Fp_Stk *fsrc1 = (Fp_Stk *)src1;
        Fp_Stk *fsrc2 = (Fp_Stk *)src2;
        assert(dst == src1 || dst == src2);
        assert(fsrc1->stk == 0 || fsrc2->stk == 0);
        unsigned nonzero_oploc = (fsrc1->stk == 0 ? fsrc2->stk : fsrc1->stk);
        assert(nonzero_oploc != 0);
        if (fdst->stk == 0)
        {
            inst->set_dst(sc->exprs.fp_stk_opnd(nonzero_oploc));
        }
        inst->set_fp_pop_after_compute();
        sc->fpstk.store_and_pop(oploc);
        return;
    }

    // Try to generate the "fcompp" instruction.
    if (!O3_is_PPro && inst->is_compare() && oploc == 0)
    {
        ((Compare_Inst *)inst)->set_double_pop();
        sc->fpstk.pop();
        return;
    }

    Assign_Inst *ainst = new(sc->mm) Assign_Inst(fpstkn, fpstk0, inst->exp, nxt);
    ainst->set_is_fp_pop();
    ainst->set_fp_spilled();
    SET_BC(ainst, inst, false);
    sc->fpstk.store_and_pop(oploc);
}

// Decide in advance how much space will be needed on the FP stack to perform
// the computation, and if there's currently not enough room, evict/spill
// whatever is necessary.
static void spill_in_advance(Inst *inst, Spill_Closure *sc, Operand *mdo,
                             Operand *consume, Operand *other_to_consume)
{
    unsigned num_needed = 0;
    unsigned i;
    for (i=0; i<inst->n_srcs; i++)
    {
        Operand *src = inst->src(i);
        // Mimic the logic in bring_operand_onto_fp_stack()
        if (src != mdo && src->is_reg() &&
            sc->fpstk.find_operand((Reg_Operand *)src) == -1)
            num_needed ++;
        // Mimic the logic in duplicate_operands()
        if (consume == NULL && src != consume && src != mdo && src->is_reg() &&
            !is_last_use_region(inst, src))
            num_needed ++;
    }
    if (inst->n_srcs == 1 && mdo != NULL)
        num_needed ++;
    if (sc->fpstk.num_free() < num_needed)
    {
        unsigned num_to_spill = num_needed - sc->fpstk.num_free();
        Operand *src1 = inst->src(0);
        Operand *src2 = (inst->n_srcs > 1 ? inst->src(1) : NULL);
        for (i=0; i<num_to_spill; i++)
            sc->fpstk.spill_one(src1, src2, inst, sc->exprs, sc->mm);
    }
}

// Replace the operands in the instruction with FP stack operands,
// wherever necessary.  If the result needs to be stored back to
// memory, generate that additional assignment.
static void execute_instruction(Inst *inst, Spill_Closure *sc, bool dst_in_stk0,
                                Operand *src1, Operand *src2,
                                Operand *consume, Operand *mdo, Inst *nxt)
{
    Fp_Stk *fpstk0 = sc->fpstk0;
    Operand *old_dst = inst->dst();

    if (inst->is_assignment())
    {
        assert(src2 == NULL);
        if (!src1->is_reg()) // a = []
        {
            assert(old_dst->is_reg());
            assert(sc->fpstk.find_operand((Reg_Operand *)old_dst) == -1);
            // Change to   st(0) = []
            inst->set_dst(fpstk0);
            // Push "a" onto mimic stack
            sc->fpstk.push((Reg_Operand *)old_dst, src1, src2, inst, sc->exprs, sc->mm);
            // Store back to memory if necessary
            if (inst->region_dst_live_range_ends)
            {
                Assign_Inst *ainst = new(sc->mm) Assign_Inst(old_dst, fpstk0, inst->exp, nxt);
                // The new instruction pops st(0)  (by default).
                ainst->set_fp_spilled();
                SET_BC(ainst, inst, false);
                sc->fpstk.pop();
            }
            else
                sc->fpstk.set_dirty(true);
            return;
        }
        if (!old_dst->is_reg()) // [] = b
        {
            assert(src1->is_reg());
            assert(sc->fpstk.find_operand((Reg_Operand *)src1) == 0);
            bool pop = is_last_use_region(inst, src1);
            inst->replace_src(0, fpstk0);
            if (pop) // the old instruction already pops st(0) by default
                sc->fpstk.pop();
            else
                ((Assign_Inst *)inst)->set_no_pop_on_fp_store();
            return;
        }
        // The instruction is "a = b".
        assert(old_dst->is_reg());
        assert(src1->is_reg());
        Reg_Operand *rdst = (Reg_Operand *) old_dst;
        Reg_Operand *rsrc1 = (Reg_Operand *) src1;
        assert(sc->fpstk.find_operand(rdst) == -1);
        unsigned src1loc = sc->fpstk.find_operand(rsrc1);
        bool last_use = is_last_use_region(inst, src1);
        Operand *new_dst = fpstk0;
        if (!inst->region_dst_live_range_ends && last_use)
            new_dst = sc->exprs.fp_stk_opnd(src1loc);
        // The effect of these two statements is that the instruction will
        // be dead code eliminated.
        inst->set_dst(new_dst);
        inst->replace_src(0, new_dst);
        if (inst->region_dst_live_range_ends)
        {
            Assign_Inst *ainst = new(sc->mm) Assign_Inst(old_dst, fpstk0, inst->exp, nxt);
            ainst->set_fp_spilled();
            SET_BC(ainst, inst, false);
            // The new instruction pops st(0)  (by default).
            if (last_use)
                sc->fpstk.pop();
            else
                ainst->set_no_pop_on_fp_store();
        }
        else
        {
            // Replace st(n) with "a"
            sc->fpstk.overwrite(src1loc, rdst);
        }
        return;
    }

    // XXX- All the code below needs to be revised and possible rewritten.
    Operand *dst = inst->dst();
    Operand *src1_orig = src1;
    Operand *src2_orig = src2;
    unsigned src1loc, src2loc;
    Reg_Operand *rsrc1 = (Reg_Operand *) src1;
    Reg_Operand *rsrc2 = (Reg_Operand *) src2;
    src1loc = sc->fpstk.find_operand(rsrc1);
    if (src2 == NULL)
        src2loc = -1;
    else
        src2loc = sc->fpstk.find_operand(rsrc2);
    if (src1 == src2 && inst->is_fprem())
    {
        assert(sc->fpstk.stack_loc(1) == src2);
        src2loc = 1;
    }
    if (src1loc == -1 && src2loc == -1)
    {
        assert(src2 == NULL);
        sc->fpstk.push(fpstk0, src1, src2, inst, sc->exprs, sc->mm);
    }
    else
    {
        assert(src1loc == 0 || src2loc == 0);
    }
    
    unsigned dstloc = 0;
    if (!dst_in_stk0)
    {
        // dst should be the location of "consume"
        assert(consume != NULL);
        assert(consume->is_reg());
        dstloc = sc->fpstk.find_operand((Reg_Operand *)consume);
        assert(dstloc != -1);
    }
    if (src1loc != -1)
        inst->replace_src(0, sc->exprs.fp_stk_opnd(src1loc));
    if (src2loc != -1)
        inst->replace_src(1, sc->exprs.fp_stk_opnd(src2loc));
    if (dst->is_status_flags())
    {
        sc->fpstk.pop();
    }
    else
    {
        inst->set_dst(sc->exprs.fp_stk_opnd(dstloc));
        Reg_Operand *rdst = (Reg_Operand *) dst;
        if (old_dst->is_reg() && !inst->region_dst_live_range_ends)
            sc->fpstk.overwrite(dstloc, rdst);
    }
    
    // It's possible that dst has no more uses in the region.
    // It wasn't dead-code eliminated because it has more global uses.
    // If so, we should store it back.
    if (inst->region_dst_live_range_ends)
    {
        assert(dst_in_stk0);
        Assign_Inst *ainst = new(sc->mm) Assign_Inst(dst, fpstk0, inst->exp, nxt);
        ainst->set_fp_spilled();
        SET_BC(ainst, inst, false);
        if (consume == NULL)
        {
            if (mdo == NULL)
                ainst->set_no_pop_on_fp_store();
            else
                sc->fpstk.pop();
        }
        else
            sc->fpstk.pop();
    }
    
    if (old_dst->is_reg() || old_dst->is_status_flags())
    {}
    else
    {
        assert(inst->is_assignment());
        Inst *ainst = new(sc->mm) Assign_Inst(old_dst, fpstk0, inst->exp, nxt);
        ainst->set_fp_spilled();
        SET_BC(ainst, inst, false);
        if (consume == NULL)
        {
            if (mdo == NULL)
                ((Assign_Inst *)ainst)->set_no_pop_on_fp_store();
            if (mdo != NULL)
                sc->fpstk.pop();
        }
        else
            sc->fpstk.pop();
    }
}

Inst *O3_do_fp_spilling(Inst *inst, Spill_Closure *sc, bool should_discard_fpstk0)
{
    Fp_Stk *fpstk0 = sc->fpstk0;

    // If the current instruction is a call instruction, but it returns a
    // floating-point value that is not used, we need to explicitly pop
    // the result from the FP stack.
    if (should_discard_fpstk0)
    {
        // Create the instruction "fstp  st(0)"
        Assign_Inst *ainst = new(sc->mm) Assign_Inst(fpstk0, fpstk0, inst->exp, inst->next());
        ainst->set_fp_spilled();
        ainst->set_is_fp_pop();
        SET_BC(ainst, inst, false);
        sc->fpstk.pop();
        assert(sc->fpstk.is_empty());
    }

    // Decide whether the instruction needs any spilling.
    if (!needs_spilling(inst))
        return inst;

    inst->set_fp_spilled();
    Inst *nxt = inst->next();
    Operand *old_dst = inst->dst();
    if (old_dst != NULL && old_dst->is_reg() &&
        ((Reg_Operand *)old_dst)->is_globally_allocated_fp())
        inst->region_dst_live_range_ends = 0;

    Operand *src1 = inst->src(0);
    Operand *src2 = NULL;
    if (inst->n_srcs > 1)
        src2 = inst->src(1);

    // If it is a push instruction, the operand's type must be double (otherwise
    // needs_spilling() would have returned false).  The FP stack should be empty
    // before any push sequence.  We have to generate code that pushes both
    // halves of the double operand.
    if (inst->is_push())
    {
        assert(sc->fpstk.is_empty());
        assert(src1->hi_opnd() != NULL);
        Push_Inst *pinst = new(sc->mm) Push_Inst(src1->hi_opnd(), inst->exp, inst);
        pinst->set_fp_spilled();
        return pinst;
    }

    // After a call instruction, the result is explicitly in the st(0)
    // operand.  The instruction that uses the return value should be
    // storing it in either a Mem_Operand or a Reg_Operand.  If the
    // destination is a Mem_Operand, we can just leave the instruction
    // unchanged; by default, the emitted instruction will be fstp, rather
    // than fst.  If the destination is a Reg_Operand that has no more
    // uses in the region, then we also leave the instruction unchanged,
    // and st(0) will be automatically stored back to the stack.  Otherwise,
    // we replace the top of stack with the destination operand.
    //
    // There's another situation where the src operand is an Fp_Stk operand.
    // In the epilog of a synchronized method that returns an FP result
    // (such as java/lang/Math/random()D), the Fp_Stk operand needs to be
    // spilled to the stack before calling monitor_exit.  We need to recognize
    // this situation and make sure the value on the stack gets written back
    // to memory.
    //
    // How do we tell if this instruction is a special one like that?  As a
    // major hack for now, I'll pattern match on the first instruction of the
    // epilog.
    if (src1->is_fp_stk())
    {
        Reg_Operand *tmp;
        if (inst->region_dst_live_range_ends)
        {
            tmp = sc->fpstk.pop();
            assert(tmp->is_fp_stk());
            assert(sc->fpstk.is_empty());
            return inst;
        }

        if (sc->epilog != NULL &&
            sc->epilog->IR_instruction_list()->next() == inst)
        {
            // By default, the instruction will pop the stack when storing.
            sc->fpstk.pop();
        }
        else if (old_dst->is_reg())
        {
            assert(sc->fpstk.num_pushed() == 1);
            assert(sc->fpstk.stack_loc(0)->is_fp_stk());
            sc->fpstk.overwrite(0, (Reg_Operand *)old_dst);
            assert(inst->is_assignment());
            ((Assign_Inst *)inst)->set_no_pop_on_fp_store();
        }
        else
        {
            tmp = sc->fpstk.pop();
            assert(tmp->is_fp_stk());
            assert(sc->fpstk.is_empty());
        }
        
        return inst;
    }

    // The model:
    //  1. spill_dirty_operands
    //     If either of the source operands needs to be written back to
    //     memory, do it now without popping the FP stack.  An operand
    //     needs to be written back if it is dirty, and this is its last
    //     use in the region, but not its last use globally.
    //  2. memory_designated_operand
    //     Find out whether we can operate on one of the operands
    //     directly from memory, rather than loading it onto the FP stack
    //     first.  This would save one instruction, and might keep us from
    //     "overflowing" the FP stack.
    //  3. operand_to_consume
    //     Figure out which Reg_Operands should be consumed in this instruction,
    //     because it is their last use in the region.  It might be the case
    //     that both operands are to be consumed.  That's fine, except that we
    //     have to be careful with fprem and fcomp/fcompp.  fprem can only directly
    //     consume its first source operand, and fcomp can only consume its second
    //     source operand if it consumes the first source operand as well.
    //  4. spill_in_advance
    //     Figure out how much space we'll need on the FP stack to get everything
    //     done.  If there's not currently enough space, spill some operands.
    //     The policy on what to spill is in Fp_Mimic_Stack::spill_one().
    //  5. bring_operands_onto_fp_stack
    //     Load all necessary Reg_Operands onto the stack, if they aren't
    //     already there.  Don't load the mdo.
    //  6. duplicate_operands
    //     If none of the operands is supposed to be consumed, then one of the
    //     operands should be duplicated on the FP stack, and the new copy will be
    //     overwritten by the instruction.  The mdo should never be duplicated.
    //     An operand never needs duplication if this is its last use in the region.
    //     After duplicating the operand, we mark that that operand is supposed
    //     to be consumed.  Note that a compare instruction should never need any
    //     duplication, since it should be the last uses of both operands.
    //     Special case: If it's an fprem instruction, and the source operands
    //     are identical, the source operand needs to be duplicated.
    //  7. prepare_operands
    //     At this point, all required operands are somewhere on the FP stack,
    //     but possibly not in the right positions.  Most instructions require
    //     one of the operands to be in st(0).  Some, like fprem and fcompp, are
    //     more restrictive.  Here we generate the right set of swaps to get the
    //     operands into suitable positions.
    //  8. execute_instruction
    //     Update the existing instruction to reflect the actual operand locations.
    //     This may involve creating a new instruction to store the result back
    //     to memory.
    //  9. consume_remaining
    //     If "other_to_consume" is not NULL, then we can generally modify the
    //     instruction to use the reverse popping mode.  For a compare instruction,
    //     we can change it to the fcompp version.  For fprem, though, we have to
    //     explicitly pop the other operand.
    spill_dirty_operands(inst, sc);
    Operand *mdo = memory_designated_operand(inst, sc);
    Operand *other_to_consume;
    Operand *consume = operand_to_consume(inst, sc, other_to_consume, mdo);
    spill_in_advance(inst, sc, mdo, consume, other_to_consume);
    bring_operands_onto_fp_stack(inst, sc, mdo, src1, src2);
    duplicate_operands(inst, sc, consume, mdo, old_dst);
    bool dst_in_stk0 = prepare_operands(inst, sc, consume, mdo);
    execute_instruction(inst, sc, dst_in_stk0, src1, src2, consume, mdo, nxt);
    consume_remaining(inst, sc, other_to_consume, nxt);

    return inst;
}

void Fp_Mimic_Stack::push(Reg_Operand *opnd, Operand *src1, Operand *src2, Inst *inst,
                          Expressions &exprs, Mem_Manager &mm)
{
    assert(opnd != NULL);

    // Spill something if the fpstack is full.
    if (_top >= size)
    {
        assert(0);
        spill_one(src1, src2, inst, exprs, mm);
    }
    _dirty[_top] = false;
    _array[_top] = opnd;
#ifdef PRINTABLE_O3
    _ids[_top] = (opnd->is_fp_stk() ? -1 : opnd->id);
#endif // PRINTABLE_O3
    _top ++;
}

void Fp_Mimic_Stack::spill_top(Inst *inst, Expressions &exprs, Mem_Manager &mm)
{
    assert(_top > 0);
    unsigned which_to_spill = _top - 1;
    Operand *dst;  // where to store it: memory or st0
    Fp_Stk *fpstk0 = exprs.fp_stk_opnd(0);
    if (_dirty[which_to_spill])
        dst = _array[which_to_spill];
    else
        dst = fpstk0;
    Assign_Inst *ainst = new(mm) Assign_Inst(dst, fpstk0, inst->exp, inst);
    ainst->set_is_fp_pop();
    SET_BC(ainst, inst, true);
    pop();
}

void Fp_Mimic_Stack::spill_one(Operand *src1, Operand *src2, Inst *inst,
                               Expressions &exprs, Mem_Manager &mm)
{
    // Never spill something that is src1 or src2, or that is_fp_stk().
    // Of the rest, the best to spill is something that is not dirty.
    // New addition: never spill anything that is global.
    // As a tiebreaker, choose stk(0).  (Or will that result in thrashing?)
    unsigned first_clean = -1;
    unsigned first_dirty = -1;
    unsigned i;
    for (i=0; i<_top; i++)
    {
        if (_array[i] == src1 || _array[i] == src2 || _array[i]->is_fp_stk())
            continue;
        if (_array[i]->is_globally_allocated_fp())
            continue;
        if (!_dirty[i])
        {
            first_clean = i;
            break;
        }
        first_dirty = i;
    }
    unsigned which_to_spill = (first_clean == -1 ? first_dirty : first_clean);
    assert(which_to_spill != -1);
    // Swap it to stk(0) if it's not already there.
    Fp_Stk *fpstk0 = exprs.fp_stk_opnd(0);
    if (!_dirty[which_to_spill])
    {
        unsigned loc = _top - which_to_spill - 1;
        store_and_pop(loc);
        Fp_Stk *fpstkn = exprs.fp_stk_opnd(loc);
        Assign_Inst *ainst = new(mm) Assign_Inst(fpstkn, fpstk0, inst->exp, inst);
        ainst->set_is_fp_pop();
        SET_BC(ainst, inst, true);
    }
    else
    {
        if (which_to_spill != _top - 1)
        {
            unsigned loc = _top - which_to_spill - 1;
            Fp_Stk *fpstkn = exprs.fp_stk_opnd(loc);
            SET_BC(new(mm) Fxch_Inst(fpstk0, fpstkn, inst->exp, inst), inst, true);
            swap(loc);
            which_to_spill = _top - 1;
        }
        spill_top(inst, exprs, mm);
    }
}

// Transforms the current stack into the same configuration as "target".
// It assumes that the current stack is either empty or contains a
// single operand.  The single operand must be part of the target FP
// stack, because otherwise the return value would already have been
// popped from the stack.
//
// First, spill the top of stack if it isn't needed in the target stack.
// Then, load the bottom portion of the stack, the items that are below
// the current top of stack, except don't load the very bottom element.
// Then load the new bottom.
// Then exchange the current bottom and top.  Then load the remaining
// elements.
void Fp_Mimic_Stack::convert_to(Fp_Mimic_Stack *target, Inst *inst,
                                Expressions &exprs, Mem_Manager &mm)
{
    assert(_top <= 1);
    unsigned split = -1;
    if (_top == 1)
    {
        split = target->find_operand(stack_loc(0));
        //assert(split != -1); // this assert fails sometimes.
        if (split == -1)
        {
            // Spill the top of stack if it isn't needed in the target stack.
            spill_top(inst, exprs, mm);
        }
    }
    Fp_Stk *fpstk0 = exprs.fp_stk_opnd(0);
    Inst *prev = inst->prev();
    Reg_Operand *src;
    Assign_Inst *ainst;
    unsigned i;
    if (split == -1)
    {
        // Do it in a straightforward way if the stack is currently empty.
        for (i=0; i<target->_top; i++)
        {
            src = target->_array[i];
            // XXX- inst->exp is probably the wrong thing to use.
            ainst = new(mm) Assign_Inst(fpstk0, src, inst->exp, inst);
            SET_BC(ainst, prev, true);
            push(src, NULL, NULL, inst, exprs, mm);
        }
    }
    else
    {
        unsigned realsplit = target->_top - split - 1;
        if (realsplit > 0)
        {
            // Load everything below the split point, except the very bottom.
            for (i=1; i<realsplit; i++)
            {
                src = target->_array[i];
                // XXX- inst->exp is probably the wrong thing to use.
                ainst = new(mm) Assign_Inst(fpstk0, src, inst->exp, inst);
                SET_BC(ainst, prev, true);
                push(src, NULL, NULL, inst, exprs, mm);
            }
            // Load the bottom of the target stack.
            src = target->_array[0];
            ainst = new(mm) Assign_Inst(fpstk0, src, inst->exp, inst);
            SET_BC(ainst, prev, true);
            push(src, NULL, NULL, inst, exprs, mm);
            // Swap the current top and bottom.
            unsigned newsplit = _top - 1;
            Fp_Stk *fpstkn = exprs.fp_stk_opnd(newsplit);
            SET_BC(new(mm) Fxch_Inst(fpstk0, fpstkn, inst->exp, inst), prev, true);
            swap(newsplit);
        }
        for (i=realsplit+1; i<target->_top; i++)
        {
            src = target->_array[i];
            // XXX- inst->exp is probably the wrong thing to use.
            ainst = new(mm) Assign_Inst(fpstk0, src, inst->exp, inst);
            SET_BC(ainst, prev, true);
            push(src, NULL, NULL, inst, exprs, mm);
        }
    }
#ifdef _DEBUG
    assert(_top == target->_top);
    for (i=0; i<_top; i++)
    {
        assert(_array[i] == target->_array[i]);
    }
#endif // _DEBUG
}

void Fp_Mimic_Stack::spill_specific(unsigned loc, Inst *inst,
                                    Expressions &exprs, Mem_Manager &mm)
{
    assert(loc < _top);
    store_and_pop(loc);
    Fp_Stk *fpstk0 = exprs.fp_stk_opnd(0);
    Fp_Stk *fpstkn = exprs.fp_stk_opnd(loc);
    Assign_Inst *ainst = new(mm) Assign_Inst(fpstkn, fpstk0, inst->exp, inst);
    ainst->set_is_fp_pop();
    SET_BC(ainst, inst, true);
}

// Converts the FP stack into the target configuration.  All entries on the
// stack should be global.  Some entries might have to be deleted, but none
// should need to be added.  Some reordering might be necessary.
void Fp_Mimic_Stack::convert_to_global(Fp_Mimic_Stack *target, Inst *inst,
                                       Expressions &exprs, Mem_Manager &mm)
{
    Fp_Stk *fpstk0 = exprs.fp_stk_opnd(0);
    Reg_Operand *opnds[size];
    unsigned num = 0;
    unsigned i;
    for (i=0; i<_top; i++)
    {
        if (target->find_operand(_array[i]) == (unsigned)-1)
            opnds[num++] = _array[i];
    }
    remove_operands(opnds, num, inst, exprs, mm);
    assert(_top == target->_top);
    // Now do the reordering.  The algorithm is the following.
    // If the stack is currently in the correct order, stop.
    // Otherwise, look at the top of stack.  If it currently
    // has the correct element, swap it with some location that
    // currently has the wrong element.  Now, swap the top of
    // stack into its correct location, and repeat until the
    // top of stack has the correct value (this should require
    // at most Fp_Mimic_Stack::size swaps).  Then go back to the
    // very first step (this should also require at most
    // Fp_Mimic_Stack::size swaps).
    //
    // The theorptical model on which this algorithm is based is
    // that the reordering can be expressed as a composition of
    // cyclic permutations, and we can just reorder one cyclic
    // permutation at a time until all are ordered.
    //
    // Impose a limit of "size" iterations so that we don't end
    // up in an infinite loop in the compiler if something goes
    // horribly wrong.
    unsigned outer;
    for (outer=0; outer<size; outer++)
    {
        // Stop when we finally get it in order.
        if (equals(target))
            break;
        // Make sure the top does not contain the right value.
        if (_array[_top-1] == target->_array[_top-1])
        {
            for (i=0; i<_top; i++)
            {
                if (_array[i] != target->_array[i])
                    break;
            }
            assert(i < _top);
            // Exchange i and _top-1.
            swap(_top-i-1);
            Fp_Stk *fpstkn = exprs.fp_stk_opnd(_top-i-1);
            // XXX- inst->exp is the wrong thing to use.
            new(mm) Fxch_Inst(fpstk0, fpstkn, inst->exp, inst);
        }
        // Impose a limit of "size" iterations so that we don't end
        // up in an infinite loop in the compiler if something goes
        // horribly wrong.
        unsigned inner;
        for (inner=0; inner<size; inner++)
        {
            if (_array[_top-1] == target->_array[_top-1])
                break;
            // Swap the top of the stack into its correct location.
            unsigned loc = target->find_operand(_array[_top-1]);
            assert(loc != 0);
            assert(loc != (unsigned)-1);
            swap(loc);
            Fp_Stk *fpstkn = exprs.fp_stk_opnd(loc);
            // XXX- inst->exp is the wrong thing to use.
            new(mm) Fxch_Inst(fpstk0, fpstkn, inst->exp, inst);
        }
        assert(inner < size);
    }
    assert(outer < size);

#ifdef _DEBUG
    for (i=0; i<_top; i++)
        assert(_array[i] == target->_array[i]);
#endif // _DEBUG
}

// Removes all the operands in the opnds[] array.  Everything in the opnds[]
// array must be present on the FP stack.
void Fp_Mimic_Stack::remove_operands(Reg_Operand **opnds, unsigned num, Inst *inst,
                                     Expressions &exprs, Mem_Manager &mm)
{
    unsigned i;
    for (i=0; i<num; i++)
    {
        unsigned loc = find_operand(opnds[i]);
        assert(loc != (unsigned)-1);
        spill_specific(loc, inst, exprs, mm);
    }
}

// Same as above, but without generating code.
void Fp_Mimic_Stack::remove_operands(Reg_Operand **opnds, unsigned num)
{
    unsigned i;
    for (i=0; i<num; i++)
    {
        unsigned loc = find_operand(opnds[i]);
        assert(loc != (unsigned)-1);
        store_and_pop(loc);
    }
}


