// Copyright (C)  2000 Intel Corporation.  All rights reserved.
//
// $Header: /usr/development/orp/orp/arch/ia32/ia32_o3_jit/flow_graph.cpp,v 1.4 2001/10/15 09:57:10 xli18 Exp $
//



#include "defines.h"
#include <stdio.h>
#include <iostream.h>
#include <assert.h>
#include <string.h>  // for memset()
#include "ir.h"
#include "stack.h"
#include "flow_graph.h"
#include "Mem_Manager.h"
#include "local_cse.h"
#include "handles.h"
#include "jit_common.h"

// Update the return address stack for a single instruction.
static void process_bytecode(Return_Address_Tracking *stk, const unsigned char *bc, unsigned idx,
                             Class_Handle class_handle, Compile_Handle cmpl_handle)
{
    // 99: something special
    // positive: # to push
    // negative: # to pop
    static int operation[256] =
    {
      //0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f
        0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 2, 2,  // 00-0f
        1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 1, 2, 2,  // 10-1f
        2, 2, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1,-1, 0,  // 20-2f
       -1, 0,-1,-1,-1,-1,99,99,99,99,99,99,99,99,99,99,  // 30-3f
       99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,-3,  // 40-4f
       -4,-3,-4,-3,-3,-3,-3,-1,-2,99,99,99,99,99,99,99,  // 50-5f
       -1,-2,-1,-2,-1,-2,-1,-2,-1,-2,-1,-2,-1,-2,-1,-2,  // 60-6f
       -1,-2,-1,-2, 0, 0, 0, 0,-1,-1,-1,-1,-1,-1,-1,-2,  // 70-7f
       -1,-2,-1,-2, 0, 1, 0, 1,-1,-1, 0, 0, 1, 1,-1, 0,  // 80-8f
       -1, 0, 0, 0,-3,-1,-1,-3,-3,-1,-1,-1,-1,-1,-1,-2,  // 90-9f
       -2,-2,-2,-2,-2,-2,-2, 0,99, 0,-1,-1,-1,-2,-1,-2,  // a0-af
       -1, 0,99,99,99,99,99,99,99,99, 0, 1, 0, 0, 0, 0,  // b0-bf
        0, 0,-1,-1,99,99,-1,-1, 0,99, 0, 0, 0, 0, 0, 0,  // c0-cf
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // d0-df
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // e0-ef
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0   // f0-ff
    };
        
    unsigned tmp1, tmp2, tmp3, tmp4, i;
    unsigned n_words;
    Loader_Exception exc;
	Field_Handle fh;
    Method_Handle mh;
	Java_Type type;
    int j;

    int change = operation[bc[idx]];
    
    if (change < 0)
    {
        for (j=0; j<-change; j++)
            stk->pop();
        return;
    }
    else if (change != 99)
    {
        for (j=0; j<change; j++)
            stk->push();
        return;
    }

    switch (bc[idx])
	{
	case 0x36: // istore, p.275
	case 0x38: // fstore, p.223
	case 0x3a: // astore, p.165
        stk->store((unsigned)(bc[idx+1]));
        break;
	case 0x37: // lstore, p.307
	case 0x39: // dstore, p.195
        stk->store((unsigned)(bc[idx+1]));
        stk->store((unsigned)(bc[idx+1])+1);
        break;
	case 0x4b:
	case 0x4c:
	case 0x4d:
	case 0x4e: // astore_{0,1,2,3}, p.166
        stk->store((unsigned)(bc[idx]-0x4b));
        break;
	case 0x3b:
	case 0x3c:
	case 0x3d:
	case 0x3e: // istore_{0,1,2,3}, p.276
        stk->store((unsigned)(bc[idx]-0x3b));
        break;
	case 0x43:
	case 0x44:
	case 0x45:
	case 0x46: // fstore_{0,1,2,3}, p.224
        stk->store((unsigned)(bc[idx]-0x43));
        break;
	case 0x3f:
	case 0x40:
	case 0x41:
	case 0x42: // lstore_{0,1,2,3}, p.308
        stk->store((unsigned)(bc[idx]-0x3f));
        stk->store((unsigned)(bc[idx]-0x3f)+1);
        break;
	case 0x47:
	case 0x48:
	case 0x49:
	case 0x4a: // dstore_{0,1,2,3}, p.196
        stk->store((unsigned)(bc[idx]-0x47));
        stk->store((unsigned)(bc[idx]-0x47)+1);
        break;
	case 0x59: // dup, p.198
        tmp1 = stk->pop();
        stk->push(tmp1);
        stk->push(tmp1);
        break;
	case 0x5a: // dup_x1, p.199
        tmp1 = stk->pop();
        tmp2 = stk->pop();
        stk->push(tmp1);
        stk->push(tmp2);
        stk->push(tmp1);
        break;
	case 0x5b: // dup_x2, p.200
        tmp1 = stk->pop();
        tmp2 = stk->pop();
        tmp3 = stk->pop();
        stk->push(tmp1);
        stk->push(tmp3);
        stk->push(tmp2);
        stk->push(tmp1);
        break;
	case 0x5c: // dup2, p.201
        tmp1 = stk->pop();
        tmp2 = stk->pop();
        stk->push(tmp2);
        stk->push(tmp1);
        stk->push(tmp2);
        stk->push(tmp1);
        break;
	case 0x5d: // dup2_x1, p.202
        tmp1 = stk->pop();
        tmp2 = stk->pop();
        tmp3 = stk->pop();
        stk->push(tmp2);
        stk->push(tmp1);
        stk->push(tmp3);
        stk->push(tmp2);
        stk->push(tmp1);
        break;
	case 0x5e: // dup2_x2, p.203
        tmp1 = stk->pop();
        tmp2 = stk->pop();
        tmp3 = stk->pop();
        tmp4 = stk->pop();
        stk->push(tmp2);
        stk->push(tmp1);
        stk->push(tmp4);
        stk->push(tmp3);
        stk->push(tmp2);
        stk->push(tmp1);
        break;
	case 0x5f: // swap, p.334
        tmp1 = stk->pop();
        tmp2 = stk->pop();
        stk->push(tmp1);
        stk->push(tmp2);
        break;
	case 0xa8: // jsr, p.280
        stk->push(idx+OFFSET2(bc,idx+1));
        break;
	case 0xb2: // getstatic, p.228
        fh = resolve_static_field(cmpl_handle,class_handle,OFFSET2(bc,idx+1),&exc);
        type = field_get_type(fh);
        n_words = num_words_of_type(type);
        for (i=0; i<n_words; i++)
            stk->push();
		break;
	case 0xb4: // getfield, p.226
        fh = resolve_nonstatic_field(cmpl_handle,class_handle,OFFSET2(bc,idx+1),&exc);
        type = field_get_type(fh);
        n_words = num_words_of_type(type) - 1;
        for (i=0; i<n_words; i++)
            stk->push();
		break;
	case 0xb3: // putstatic, p.327
        fh = resolve_static_field(cmpl_handle,class_handle,OFFSET2(bc,idx+1),&exc);
        type = field_get_type(fh);
        n_words = num_words_of_type(type);
        for (i=0; i<n_words; i++)
            stk->pop();
        break;
	case 0xb5: // putfield, p.325
        fh = resolve_nonstatic_field(cmpl_handle,class_handle,OFFSET2(bc,idx+1),&exc);
        type = field_get_type(fh);
        n_words = num_words_of_type(type) + 1;
        for (i=0; i<n_words; i++)
            stk->pop();
		break;
	case 0xb6: // invokevirtual, p.267
	case 0xb7: // invokespecial, p.261
        if (bc[idx] == 0xb6) // invokevirtual
            mh = resolve_virtual_method(cmpl_handle,class_handle,OFFSET2(bc,idx+1),&exc);
        else // invokespecial
            mh = resolve_special_method(cmpl_handle,class_handle,OFFSET2(bc,idx+1),&exc);
        type = method_get_return_type(mh);
        n_words = n_words_of_method_arg_type(mh) + 1;
        for (i=0; i<n_words; i++)
            stk->pop();
        n_words = num_words_of_type(type);
        for (i=0; i<n_words; i++)
            stk->push();
        break;
	case 0xb8: // invokestatic, p.265
        mh = resolve_static_method(cmpl_handle,class_handle,OFFSET2(bc,idx+1),&exc);
        type = method_get_return_type(mh);
        n_words = n_words_of_method_arg_type(mh);
        for (i=0; i<n_words; i++)
            stk->pop();
        n_words = num_words_of_type(type);
        for (i=0; i<n_words; i++)
            stk->push();
		break;
	case 0xb9: // invokeinterface, p.258
        mh = resolve_interface_method(cmpl_handle,class_handle,OFFSET2(bc,idx+1),&exc);
        type = method_get_return_type(mh);
        n_words = bc[idx+3];
        for (i=0; i<n_words; i++)
            stk->pop();
        n_words = num_words_of_type(type);
        for (i=0; i<n_words; i++)
            stk->push();
		break;
	case 0xc4: // wide, p.337
        {
			int indexword = OFFSET2(bc,idx+2);
			switch (bc[idx+1])
            {
            case 0x84: // iinc, p.251
            case 0xa9: // ret, p.329
				break;
            case 0x15: // iload, p.252
            case 0x17: // fload, p.215
            case 0x19: // aload, p.160
				stk->push();
				break;
            case 0x16: // lload, p.296
            case 0x18: // dload, p.187
				stk->push();
				stk->push();
				break;
            case 0x36: // istore, p.275
            case 0x38: // fstore, p.223
            case 0x3a: // astore, p.165
				stk->store(indexword);
				break;
            case 0x37: // lstore, p.307
            case 0x39: // dstore, p.195
				stk->store(indexword);
				stk->store(indexword+1);
				break;
            default:
				break;
            }
        }
		break;
	case 0xc5: // multianewarray, p.316
        for (i=0; i<(unsigned)(bc[idx+3])-1; i++)
            stk->pop();
        break;
	case 0xc9: // jsr_w, p.281
        stk->push(idx+OFFSET4(bc,idx+1));
        break;
    default:
        assert(0);
        break;
      }
}

// This is a recursive routine (similar to CG_Prepass) that creates
// the flow graph structure.
Cfg_Node *Flow_Graph::create_flow_graph(unsigned bc_start, Return_Address_Tracking *stk)
{
    assert(bc_start < _bc_length);
    // If we already created a node, return that node.
    assert(bc_start < _prepass->code_length);
    if (_prepass->bytecode_info[bc_start].fgnode_inited)
        return _prepass->bytecode_info[bc_start].fgnode;
    assert(stk != NULL);
    
    // Otherwise, create the node, find its range of bytecodes, and
    // create the necessary out-edges based on the last instruction in
    // the block.
    
    // find range of bytecodes
    unsigned cur_bc = bc_start;
    unsigned last_stmt;
    do
    {
        process_bytecode(stk, _bytecodes, cur_bc, _c_handle, _cmpl_handle);
        last_stmt = cur_bc;
        cur_bc += instruction_length(_bytecodes,cur_bc);
        assert(cur_bc >= _bc_length || cur_bc < _prepass->code_length);
    }
    while (cur_bc < _bc_length && !_prepass->bytecode_info[cur_bc].is_block_entry);
    // At this point, bc_start is the first bytecode of the basic block,
    // cur_bc is the first bytecode of the next basic block, and
    // last_stmt points to the last bytecode statement of the current basic block.
    
    assert(bc_start < _prepass->code_length);
    Cfg_Node *result = _prepass->bytecode_info[bc_start].fgnode;
    _prepass->bytecode_info[bc_start].fgnode_inited = 1;
    result->set_bytecodes(bc_start, cur_bc-bc_start);
    //_prepass->bytecode_info[bc_start].fgnode = result;
    
    switch (_bytecodes[last_stmt])
    {
    case 0x99:
    case 0x9a:
    case 0x9b:
    case 0x9c:
    case 0x9d:
    case 0x9e: // if{eq,ne,lt,ge,gt,le}, p.247
    case 0x9f:
    case 0xa0:
    case 0xa1:
    case 0xa2:
    case 0xa3:
    case 0xa4: // if_icmp{eq,ne,lt,ge,gt,le}, p.245
    case 0xa5:
    case 0xa6: // if_acmp{eq,ne}, p.244
    case 0xc6: // ifnull, p.250
    case 0xc7: // ifnonnull, p.249
        result->add_edge(mem_manager, create_flow_graph(cur_bc, stk->clone()));
        result->add_edge(mem_manager,
            create_flow_graph(last_stmt+OFFSET2(_bytecodes, last_stmt+1), stk));
        break;
    case 0xa8: // jsr, p.280
        {
            unsigned target = last_stmt+OFFSET2(_bytecodes, last_stmt+1);
            result->add_edge(mem_manager, create_flow_graph(target, stk));
            // If the target subroutine has a "ret", add an edge and process
            // the successor of the jsr instruction.
            // Be careful that the stack is correct for the successor.
            assert(target < _prepass->code_length);
            Cfg_Node *target_ret = _prepass->bytecode_info[target].subr_ret;
            if (target_ret != NULL)
            {
                stk = _prepass->bytecode_info[target].stk->clone();
                Cfg_Node *jsr_succ = create_flow_graph(cur_bc, stk);
                result->set_jsr_succ(jsr_succ);
                jsr_succ->set_jsr_pred(result);
                target_ret->add_edge(mem_manager, jsr_succ);
            }
        }
        break;
    case 0xa7: // goto, p.230
        result->add_edge(mem_manager,
            create_flow_graph(last_stmt+OFFSET2(_bytecodes, last_stmt+1), stk));
        break;
    case 0xaa: // tableswitch, p.335
        {
            int new_idx = ((last_stmt + 4) & (~3));
            int default_offset = OFFSET4(_bytecodes,new_idx);
            int low = OFFSET4(_bytecodes,new_idx+4);
            int high = OFFSET4(_bytecodes,new_idx+8);
            int i;
            tableswitch_info *extra = new(mem_manager) tableswitch_info;
            extra->low = low;
            extra->high = high;
            result->extra_info = extra;
            new_idx += 12;
            int n_entries = high - low + 1;
            result->add_edge(mem_manager, create_flow_graph(last_stmt+default_offset, stk->clone()));
            for (i=0; i<n_entries; i++)
            {
                int offset = OFFSET4(_bytecodes,new_idx);
                result->add_edge(mem_manager, create_flow_graph(last_stmt+offset, stk->clone()));
                new_idx += 4;
            }
        }
        break;
    case 0xab: // lookupswitch, p.300
        {
            int new_idx = ((last_stmt + 4) & (~3));
            int default_offset = OFFSET4(_bytecodes,new_idx);
            int npairs = OFFSET4(_bytecodes,new_idx+4);
            int i;
            lookupswitch_info *extra = new(mem_manager) lookupswitch_info;
            extra->size = npairs;
            extra->node = result;
            extra->matches = (int *)mem_manager.alloc(extra->size * sizeof(*extra->matches));
            result->extra_info = extra;
            new_idx += 8;
			Cfg_Node* default_node = create_flow_graph(last_stmt+default_offset, stk->clone()) ;
            result->add_edge(mem_manager, default_node);
			//::Remember the targets of switch , for future optimization.
			extra->offsets = (unsigned *)mem_manager.alloc((extra->size+1) * sizeof(*extra->offsets));
			extra->offsets[0] = 0 ;//default == 0
			assert(result->out_edge_size()==1) ;
			int real_edge = 1 ;
			//::
            for (i=0; i<npairs; i++)
            {
                int match = OFFSET4(_bytecodes,new_idx);
                int offset = OFFSET4(_bytecodes,new_idx+4);
                extra->matches[i] = match;
				//::Only add edges for different nodes. But remember all the targets in lookupswitch_info!
				Cfg_Node* node = create_flow_graph(last_stmt+offset, stk->clone()) ;
				int j;
				for(j = i ; j>=0 ; j--){
					if(node == result->out_edges(extra->offsets[j])){
						break ;
					}
				}
				if(j>=0)
					extra->offsets[i+1] = extra->offsets[j] ;
				else{
					extra->offsets[i+1] = real_edge++ ;
					result->add_edge(mem_manager,node);
				}
				//::
                new_idx += 8;
            }
        }
        break;
    case 0xa9: // ret, p.329
        // Record that this is the basic block containing the "ret" for the
        // appropriate subroutine.
        assert(last_stmt+1 < _bc_length);
        assert(stk->getvar(_bytecodes[last_stmt+1]) < _prepass->code_length);
        _prepass->bytecode_info[stk->getvar(_bytecodes[last_stmt+1])].subr_ret = result;
        _prepass->bytecode_info[stk->getvar(_bytecodes[last_stmt+1])].stk = stk->clone();
        break;
    case 0xac: // ireturn, p.272
    case 0xb0: // areturn, p.163
    case 0xae: // freturn, p.222
    case 0xad: // lreturn, p.304
    case 0xaf: // dreturn, p.194
    case 0xb1: // return, p.330
    case 0xbf: // athrow, p.167
        break;
    case 0xc8: // goto_w, p.231
        result->add_edge(mem_manager, create_flow_graph(last_stmt+OFFSET4(_bytecodes, last_stmt+1), stk));
        break;
    case 0xc9: // jsr_w, p.281
        {
            unsigned target = last_stmt+OFFSET4(_bytecodes, last_stmt+1);
            result->add_edge(mem_manager, create_flow_graph(target, stk));
            // If the target subroutine has a "ret", add an edge and process
            // the successor of the jsr instruction.
            // Be careful that the stack is correct for the successor.
            assert(target < _prepass->code_length);
            Cfg_Node *target_ret = _prepass->bytecode_info[target].subr_ret;
            if (target_ret != NULL)
            {
                stk = _prepass->bytecode_info[target].stk->clone();
                Cfg_Node *jsr_succ = create_flow_graph(cur_bc, stk);
                result->set_jsr_succ(jsr_succ);
                jsr_succ->set_jsr_pred(result);
                target_ret->add_edge(mem_manager, jsr_succ);
            }
        }
        break;
    case 0xc4: // wide, p.337
        if (_bytecodes[last_stmt+1] == 0xa9) // wide ret
        {
            // Record that this is the basic block containing the "ret" for the
            // appropriate subroutine.
            assert(last_stmt+3 < _bc_length);
            unsigned varno = (_bytecodes[last_stmt+2] << 8) + _bytecodes[last_stmt+3];
            assert(stk->getvar(varno) < _prepass->code_length);
            _prepass->bytecode_info[stk->getvar(varno)].subr_ret = result;
            _prepass->bytecode_info[stk->getvar(varno)].stk = stk->clone();
        }
        else
        {
            result->add_edge(mem_manager,create_flow_graph(cur_bc, stk));
        }
        break;
    default:
        result->add_edge(mem_manager,create_flow_graph(cur_bc, stk));
        break;
    }
    
    return result;
}

Cfg_Node *Flow_Graph::create_epilog(Expressions &exprs, Cfg_Node *pred,
                                    Cfg_Node *enclosing_subr)
{
    Cfg_Node *result = new(mem_manager) Cfg_Node(mem_manager, 0, this, pred->linearization_node());
    result->set_enclosing_subr(enclosing_subr);
    return_inst = ir_make_epilog(exprs, result, mem_manager);
    
    return result;
}

#ifdef PRINTABLE_O3
static void make_prefix_string(char *str, Flow_Graph *fg)
{
    char tmp[1000];
    if (fg->calling_fg == NULL)
    {
        str[0] = '\0';
        return;
    }
    make_prefix_string(str,fg->calling_fg);
    sprintf(tmp,"%u.",fg->calling_bc_idx);
    strcat(str,tmp);
}
#endif // PRINTABLE_O3

// If the prepass info for this node indicates that it is the start of a
// subroutine, and there is more than one incoming edge, then we are
// starting a new subroutine.
void Flow_Graph::mark_subroutines_node(Cfg_Node *node, Cfg_Node *subr)
{
    if (node->flowgraph != this)
        return;
    if (node->latest_traversal >= traversal_number)
        return;
    node->latest_traversal = traversal_number;
    if (node->already_set_subr())
        return;

    assert(node->first_bc_idx() == NO_BC_IDX || node->first_bc_idx() < _prepass->code_length);
    // Find out if we've entered a new subroutine.
    if (node->first_bc_idx() != NO_BC_IDX &&
        _prepass->bytecode_info[node->first_bc_idx()].subr_ret != NULL &&
        node->in_edge_size() > 1)
        subr = node;

    // Mark the subroutine for this node.
    node->set_enclosing_subr(subr);
    node->set_already_set_subr();

    // If this node ends in a "ret", and there's more than one out edge,
    // don't process its out edges.
    // But do process its exception handlers.
    // It ends in a "ret" if the current subroutine's subr_ret field
    // in the prepass points to this node.
    assert(subr == NULL || node->flowgraph != subr->flowgraph || subr->first_bc_idx() < _prepass->code_length);
    Cfg_Int i;
    if (subr == NULL ||
        node->flowgraph != subr->flowgraph ||
        _prepass->bytecode_info[subr->first_bc_idx()].subr_ret != node ||
        node->out_edge_size() <= 1)
    {
        for (i=0; i<node->out_edge_size(); i++)
        {
            mark_subroutines_node(node->out_edges(i), subr);
            // If the out edge pointed to a new subroutine, process that
            // subroutine's out edges as well.
            assert(node->out_edges(i)->first_bc_idx() < _prepass->code_length);
            Cfg_Node *ret_node = _prepass->bytecode_info[node->out_edges(i)->first_bc_idx()].subr_ret;
            if (ret_node != NULL)
            {
                for (Cfg_Int j=0; j<ret_node->out_edge_size(); j++)
                {
                    mark_subroutines_node(ret_node->out_edges(j), subr);
                }
            }
        }
    }
#if 1
    Eh_Node *ehnode = node->eh_out_edge();
    if (ehnode != NULL)
    {
        for (i=0; i<ehnode->out_edge_size(); i++)
        {
            // XXX- problem.  It's not necessarily true that the exception handler
            // is outside all subroutines.  E.g., a catch clause nested inside a
            // finally clause is still within the outer subroutine.  I theorize that
            // the proper subroutine nesting of the exception handler is the least
            // upper bound of all subroutines with edges into or out of the handler.
            // The next higher subroutine nesting level for "node" is
            // node->enclosing_subr->jsr_succ->enclosing_subr, unless node->enclosing_subr
            // is NULL, in which case node is already at the top.
            mark_subroutines_node(ehnode->out_edges(i)->handler, NULL);
        }
    }
#endif
}

// To mark the enclosing subroutines of an exception handler, we start by taking the
// least common ancestor of the enclosing subroutines of all the in edges.  Then we
// traverse the out edges of the exception node, traversing only nodes whose enclosing
// subroutine hasn't been set.  When we encounter one whose enclosing subroutine has been
// set, we take the least common ancestor of that node's enclosing subroutine and our
// current result.  When this traversal completes, our result is the enclosing subroutine
// of the beginning of the exception handler.  One other thing: we be sure not to traverse
// past a basic block ending in a "ret" instruction.
//
// Then, we call mark_subroutines_node() on the out edges of the handler.
void Flow_Graph::mark_subroutines_eh(Eh_Node *eh)
{
    Cfg_Int i;
    Cfg_Node *result = NULL;
    bool result_set = false;
    for (i=0; i<eh->in_edge_size(); i++)
    {
        //update_lub(result, result_set);
    }
}

void Flow_Graph::mark_subroutines(Cfg_Node *caller_subr)
{
    Eh_Node *eh;
    traversal_number ++;
    mark_subroutines_node(prolog(), caller_subr);
    traversal_number ++;
    for (eh = _handlers.next(); eh != &_handlers; eh = eh->next())
    {
        mark_subroutines_eh(eh);
    }
}

// Creates the initial flow graph based on the given byte codes.
Flow_Graph::Flow_Graph(const unsigned char *bc, unsigned code_length,
                       unsigned maxLocals, CG_Prepass *prepass,
                       Mem_Manager &m,
                       Cfg_Int inline_depth,
                       unsigned maxStack,
                       Eh_Node *caller_eh,
                       Cfg_Node *caller_subr,
                       Flow_Graph *caller_fg,
                       unsigned caller_bc_idx,
                       Compile_Handle cmpl_handle, Method_Handle methodHandle, Class_Handle classHandle):
mem_manager(m), _bytecodes(bc), _bc_length(code_length), _prepass(prepass),
inlining_depth(inline_depth),
_m_handle(methodHandle), _c_handle(classHandle), _cmpl_handle(cmpl_handle),
_next_cfg_label(0), _next_eh_label(0), _need_linearization(false),
liveness_changed(false), _callee_saved_registers_used(0),
_num_home_locations(0), code_block(NULL), _bounds_exception(NULL),
did_dead_elim(false), this_pointer_of_method(NULL),
max_stack(maxStack), _epilog(NULL),
o3_prof_rec(NULL), _prof_rec(NULL), has_fp(prepass->has_fp), has_virtual_inline(false),
num_jsr(prepass->num_jsr)
{
	//
	// O3 statistics
	//
	inner_counter = NULL ;
	inner_counter_num = 0 ;

    traversal_number = 0;
    calling_fg = caller_fg;
#ifdef PRINTABLE_O3
    calling_bc_idx = caller_bc_idx;
    Flow_Graph *search = caller_fg;
    char s[1000];
    make_prefix_string(s,this);
    prefix_str = (char *)mem_manager.alloc(1+strlen(s));
    strcpy(prefix_str,s);
    nodes_in_fg_size = nodes_in_fg_capacity = fgs_in_fg_size = fgs_in_fg_capacity = 0;
#endif // PRINTABLE_O3
#ifdef PLDI_OVERRIDDEN
    overridden_rec = NULL;
#endif

    unsigned label = 0;
    unsigned bc_idx;
    for (bc_idx=0; bc_idx<code_length; bc_idx++)
    {
        assert(bc_idx < prepass->code_length);
        if (prepass->bytecode_info[bc_idx].is_block_entry)
        {
            prepass->bytecode_info[bc_idx].fgnode = new(mem_manager)
                Cfg_Node(mem_manager, label++, this, linear_node_ordering.prev());
        }
    }

    // create the skeleton of the flow graph
    // could create a new throwaway mem_manager for retstack.
    retstack = new(mem_manager) Return_Address_Tracking(maxLocals, max_stack, mem_manager);
    Cfg_Node *tmp_nodes = create_flow_graph(0, retstack);
    Cfg_Node *prolog = new(mem_manager) Cfg_Node(mem_manager, 0, this, &linear_node_ordering);
    prolog->add_edge(mem_manager,tmp_nodes);
    nodes = prolog;
    // Now create the flow graphs for all the exception handlers.
    // After this, everything unprocessed is dead code.
    create_flow_graph_handlers(retstack);
    add_exception_info(caller_eh);
    mark_subroutines(caller_subr);
    remove_all_bounds_checks = (
        c_handle() == cached_class_handles[ch_java_lang_String] &&
        (  m_handle() == cached_method_handles[mh_java_lang_String_compareTo]
        || m_handle() == cached_method_handles[mh_java_lang_String_equals]
        || m_handle() == cached_method_handles[mh_java_lang_String_indexOf__1]
        || m_handle() == cached_method_handles[mh_java_lang_String_indexOf__2]
        || m_handle() == cached_method_handles[mh_java_lang_String_indexOf__3]
        || m_handle() == cached_method_handles[mh_java_lang_String_indexOf__4]
         )
         );

}

////////////////////////////////////////////////////////////////
// Flow graph traversal routines below
////////////////////////////////////////////////////////////////

void Flow_Graph::Build_IR(Compile_Handle compilation_handle,
                          Expressions &expressions,
                          Stack&        stack,  // mimic stack
                          bool gc_requires_write_barriers
                          )
{
    // For GC_Map::get_address_of_this, we have to return the stack location
    // or register where the "this" pointer of a nonstatic synchronized method
    // is kept.  We store away the operand early, while the IR is being built,
    // because later on we might modify the "id" field of the GCTrack_Operand,
    // to coalesce the home locations of the Arg_Operand and the Reg_Operand.
    // After the coalescing, it's too late to look up the operand in the hash
    // table.
    if (!method_is_static(_m_handle))
    {
        unsigned reg_id = expressions.reg_map.virtual_reg_id(0,JIT_TYPE_CLASS);
        this_pointer_of_method = expressions.lookup_reg_exp(reg_id, JIT_TYPE_CLASS, 1)->opnd;
    }
    build_incoming_arg_assignment(_c_handle,_m_handle,mem_manager, expressions, nodes->IR_instruction_list());
    Mem_Manager build_mm(max_stack * 100 /* fill in later with #BBs */);
    traversal_number ++;
    char *initial_stack_sig = (char *)build_mm.alloc(max_stack);
    //_epilog = create_epilog(expressions);
    Build_IR_node(nodes->out_edges(0), compilation_handle, expressions, stack,
        initial_stack_sig,0,build_mm, NULL,0, gc_requires_write_barriers);
}

static void true_of_instanceof(Cfg_Node *prev_bb,
                               Cfg_Node *&true_node,
                               Exp      *&instance_exp) {
    Inst *head = prev_bb->IR_instruction_list();
    assert(head != head->prev()); // must have insts
    Inst *last = head->prev();
    if (!last->is_branch())
        return;
    //
    // match exp pattern       br
    //                         /
    //                       test
    //                        /
    //                    instanceof
    //
    Exp *test = ((Inst_Exp*)last->exp)->left_child();
    if (test->op != Exp::Test ||
        ((Inst_Exp*)test)->left_child()->op != Exp::Instanceof)
        return;
    Exp::Kind op = (Exp::Kind)last->exp->op;
    if (op == Exp::Beq || op == Exp::Bne)
    {
        instance_exp = ((Inst_Exp*)test)->left_child();
        if (op == Exp::Beq)
            true_node = prev_bb->get_fallthrough();
        else
            true_node = prev_bb->get_branch_target();
    }
}

void Flow_Graph::Build_IR_node(Cfg_Node       *root,
                               Compile_Handle compilation_handle,
                               Expressions    &expressions,
                               Stack&         stack,  // mimic stack
                               char           *stack_sig,
                               int            stack_depth,
                               Mem_Manager    &build_mm,
                               Cfg_Node       *prev_bb,
                               int            is_extended_bb,
                               bool           gc_requires_write_barriers)
{
    if (root->latest_traversal >= traversal_number)
        return;
    root->latest_traversal = traversal_number;
    assert(root->live_lcse() == NULL); // IR is not being built

    Live_LCSE *live_lcse = new (mem_manager) Live_LCSE(expressions.lcse_pool);
    root->set_live_lcse(live_lcse);
    
    // propagate live cse from prev_bb to the current bb if prev_bb and current bb
    // form an extended bb
    if (is_extended_bb) 
    {
        Cfg_Node *true_node = NULL;
        Exp      *instance_exp = NULL;
        if (prev_bb->out_edge_size() == 2)
            true_of_instanceof(prev_bb, true_node, instance_exp);
        //
        // determine if we want to propagate instanceof
        //
        if (root == true_node)
            prev_bb->live_lcse()->propagate_to(*live_lcse, instance_exp);
        else
            prev_bb->live_lcse()->propagate_to(*live_lcse, NULL);
    }
    expressions.set_live_lcse(live_lcse);
    
    char *stack_sig_in = stack_sig;
    char *stack_sig_out = (char *)build_mm.alloc(max_stack);
    int stack_sig_in_size = stack_depth;
    int stack_sig_out_size;
    int ends_in_return;
    
    // Process this node.
    build_IR(compilation_handle, _c_handle, _m_handle, gc_requires_write_barriers,
        mem_manager, expressions, stack, root->IR_instruction_list(),
        _bytecodes, root, stack_sig_in, stack_sig_in_size,
        stack_sig_out, stack_sig_out_size,
        ends_in_return,
        (root->eh_in_edge() != NULL),
        is_extended_bb);
    
    // Process this node's successors.  
    // if a successor has only one incoming edge, then set is_extended_bb
    Cfg_Int edge;
    for (edge=0; edge<root->out_edge_size(); edge++)
    {
        is_extended_bb = (root->out_edges(edge)->in_edge_size() == 1 &&
            root->out_edges(edge)->eh_in_edge() == NULL);
        Build_IR_node(root->out_edges(edge), compilation_handle, expressions, stack,
            stack_sig_out, stack_sig_out_size, build_mm, root, is_extended_bb,
            gc_requires_write_barriers);
    }
    
    // Process this node's exception handler block.
    Build_IR_eh(root->eh_out_edge(), compilation_handle, expressions, stack, build_mm,
        gc_requires_write_barriers);
    
    if (ends_in_return)
    {
        if (_epilog == NULL)
            _epilog = create_epilog(expressions, root, root->get_enclosing_subr());
        root->add_edge(mem_manager, _epilog);
    }
}

void Flow_Graph::Build_IR_eh(Eh_Node *root,
                             Compile_Handle compilation_handle,
                             Expressions &expressions,
                             Stack&        stack,  // mimic stack
                             Mem_Manager &build_mm,
                             bool         gc_requires_write_barriers)
{
    if (root == NULL)
        return;
    if (root->latest_traversal >= traversal_number)
        return;
    root->latest_traversal = traversal_number;
    
    char stack_sig_out = 0;
    Cfg_Int i;
    for (i=0; i<root->out_edge_size(); i++)
    {
        Build_IR_node(root->out_edges(i)->handler, compilation_handle, expressions, stack,
            &stack_sig_out, 1, build_mm, NULL, 0, gc_requires_write_barriers);
    }
}

// Create a basic block that throws the ArrayIndexOutOfBoundsException.
// The Cfg_Node returned will have an out-edge to the epilog, and will
// have the same Eh_Node as the original Cfg_Node.  It will NOT have
// the actual sequence of IR instructions -- those must be filled in.
Cfg_Node *Flow_Graph::create_bounds_exception_block(Cfg_Node *orig_node)
{
    Cfg_Node *result = new(mem_manager) Cfg_Node(mem_manager, 0, this, linear_node_ordering.prev());
    if (orig_node->eh_out_edge() != NULL)
        result->add_eh_edge(mem_manager, orig_node->eh_out_edge());
    (new(mem_manager) Cfg_Node_List(result))->insert_before(&_bounds_nodes);
    return result;
}

#if 1  // Non-recursive version of apply

#define APPLY_INSERT_NODE(ws, wl, nd, tn) { \
    if (nd->latest_traversal < tn) {  \
        nd->latest_traversal = tn;  \
        WL_Node *wn = (WL_Node*)ws.get_free_node(); \
        if (wn == NULL) \
            wn = new (mm) WL_Node(); \
        wn->node = nd;  \
        wn->insert_after(wl); \
    }  \
}

void Flow_Graph::apply(Apply_Func fun, Closure *c, bool traverse_sub_graph)
{
    Mem_Manager mm(100*sizeof(void*));
    traversal_number ++;
    Work_Set ws;
    Cfg_Node_List *bounds_node = _bounds_nodes.next();
    while (bounds_node != &_bounds_nodes)
    {
        Cfg_Node *node = bounds_node->node();
        APPLY_INSERT_NODE(ws, &ws.work_list, node, traversal_number);
        bounds_node = bounds_node->next();
    }
    APPLY_INSERT_NODE(ws, &ws.work_list, nodes, traversal_number);
    //
    // apply fun non-recursivly
    //
    WL_Node *w = NULL;
    for (;!ws.is_empty(); ws.free(w)) 
    {
        w = (WL_Node *)ws.work_list.get_next();
        Cfg_Node *node = (Cfg_Node*)w->node;
        //
        // only traverse sub graph (fg)
        //
        if (traverse_sub_graph && node->flowgraph != this) continue;

	    fun(node,c);
        // Process this node's exception handler block.
        Eh_Node *eh = node->eh_out_edge();
        if (eh != NULL && eh->latest_traversal < traversal_number)
        {
            eh->latest_traversal = traversal_number;
            Cfg_Int i;
            for (i=eh->out_edge_size(); i > 0 ; i--)
            {
                Cfg_Node *succ = eh->out_edges(i-1)->handler;
                APPLY_INSERT_NODE(ws, w, succ, traversal_number);
            }
        }
        // Process this node's successors.  
        Cfg_Int edge;
        for (edge=node->out_edge_size(); edge > 0; edge--) 
        {
            Cfg_Node *succ = node->out_edges(edge-1);
            APPLY_INSERT_NODE(ws, w, succ, traversal_number);
        }
    }
}
#else
void Flow_Graph::apply(Apply_Func fun, Closure *c, bool traverse_sub_graph)
{
    traversal_number ++;
    nodes->apply(traversal_number, fun, c, this, traverse_sub_graph);
    Cfg_Node_List *bounds_node = _bounds_nodes.next();
    while (bounds_node != &_bounds_nodes)
    {
        bounds_node->node()->apply(traversal_number, fun, c, this, traverse_sub_graph);
        bounds_node = bounds_node->next();
    }
}
#endif

void Flow_Graph::set_home_locations(int n)
{
#if 0
    _num_home_locations = ((n+1) & ~0x1);
#else // 0
    _num_home_locations = n;
#endif // 0
}

static void assign_label(Cfg_Node *node, Closure *c) {
    node->label = (*(short*)c)++;
}

//
// make each node's label be unique
//
int Flow_Graph::reassign_label() {
    int label = 0;
    apply(assign_label,(Closure*)&label);
    return label;
}

static void just_visiting(Cfg_Node *node, Closure *c) {}

//
// Called after apply() to removed nodes that weren't visited.
//
void Flow_Graph::prune_unreachable()
{
    apply(just_visiting, NULL);
    Cfg_Node_List *search, *next;
    for (search=linear_node_ordering.next(); search!=&linear_node_ordering; search=next)
    {
        next = search->next();
        Cfg_Node *node = search->node();
        if (node->latest_traversal < traversal_number)
        {
            while (node->out_edge_size() > 0)
                node->delete_edge(node->out_edges(0));
            if (node->eh_out_edge() != NULL)
                node->delete_eh_edge(node->eh_out_edge());
            search->unlink();
        }
    }
    if (_epilog != NULL && _epilog->latest_traversal < traversal_number)
        _epilog = NULL;

    // Remove the eh_nodes too.  If any are removed, we have to repeat the procedure
    // so that unreachable handlers are removed as well.
    bool must_repeat = false;
    Eh_Node *enext, *esearch;
    for (esearch=handlers()->next(); esearch != handlers(); esearch=enext)
    {
        enext = esearch->next();
        if (esearch->in_edge_size() == 0)
        {
            must_repeat = true;
            esearch->unlink();
            // Remove out-edges?
        }
    }
    if (must_repeat)
        prune_unreachable();
}

//
// Inlining introduces a lot of blocks.  After folding and dead code elimination,
// most of blocks become empty.  We eliminate those empty blocks.
//
void Flow_Graph::remove_empty_blocks() {
    Cfg_Node_List *search, *next;
    for (search=linear_node_ordering.next(); search!=&linear_node_ordering; search=next) {
        next = search->next();
        Cfg_Node *node = search->node();
        //
        // we may want to add code to prolog/epilog at the end
        //
        if (!node->is_empty_block() || node == prolog() || node == epilog() ||
            node->eh_in_edge() != NULL) 
            continue;

        assert(node->out_edge_size() == 1); // must have only one successor
        Cfg_Node *succ = node->out_edges(0);
        //
        // if succ's jsr_pred has been set, then we don't delete node otherwise
        // previous jsr_pred will be lost.
        //
        if (node == succ || succ == epilog() ||
            (node->get_jsr_pred() != NULL && succ->get_jsr_pred() != NULL))
            continue;

        node->delete_edge(succ);
        // update jsr_succ/jsr_pred
        if (node->get_jsr_pred() != NULL)
        {
            node->get_jsr_pred()->set_jsr_succ(succ);
            assert(succ->get_jsr_pred() == NULL);
            succ->set_jsr_pred(node->get_jsr_pred());
        }
        //
        // replace out edges of node's predecessors
        //
        while (node->in_edge_size() > 0)
            node->in_edges(0)->replace_edge(mem_manager,node,succ);
        if (node->eh_out_edge() != NULL)
            node->delete_eh_edge(node->eh_out_edge());

        // just for the dot files
        if (node->flowgraph->epilog() == node)
            node->flowgraph->_epilog = NULL;
        if (node->flowgraph->prolog() == node)
            node->flowgraph->nodes = succ;
        //
        // remove node from the linearization ordering list
        //
        search->unlink(); 
    }
}

static void fina_node(Cfg_Node *node, unsigned short traversal_number,
                      Cfg_Node **nodearray,
                      int &next_node)
{
    if (node->latest_traversal >= traversal_number)
        return;
    node->latest_traversal = traversal_number;

    Cfg_Int edge;
    for (edge=0; edge<node->out_edge_size(); edge++)
    {
        if (node->out_edges(edge)->is_cold_non_inlined())
            fina_node(node->out_edges(edge), traversal_number, nodearray, next_node);
    }
    for (edge=0; edge<node->out_edge_size(); edge++)
    {
        if (!node->out_edges(edge)->is_cold_non_inlined())
            fina_node(node->out_edges(edge), traversal_number, nodearray, next_node);
    }
    Eh_Node *eh = node->eh_out_edge();
    if (eh != NULL && eh->latest_traversal < traversal_number)
    {
        eh->latest_traversal = traversal_number;
        for (Cfg_Int i=0; i< eh->out_edge_size(); i++)
            fina_node(eh->out_edges(i)->handler, traversal_number, nodearray, next_node);
    }

    nodearray[next_node++] = node;
}

void Flow_Graph::create_dataflow_ordering(Mem_Manager &mm, Cfg_Node **&nodearray,
                                          int &num_nodes)
{
    num_nodes = reassign_label();
    nodearray = (Cfg_Node **) mm.alloc(num_nodes * sizeof(Cfg_Node *));
    traversal_number ++;
    int next_node = 0;
    fina_node(prolog(), traversal_number, nodearray, next_node);
    Cfg_Node_List *bounds_node = _bounds_nodes.next();
    while (bounds_node != &_bounds_nodes)
    {
        fina_node(bounds_node->node(), traversal_number, nodearray, next_node);
        bounds_node = bounds_node->next();
    }
    assert(next_node == num_nodes);
}
