src/cpu/x86/vm/c1_LinearScan_x86.cpp

changeset 0
f90c822e73f8
child 6876
710a3c8b516e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/cpu/x86/vm/c1_LinearScan_x86.cpp	Wed Apr 27 01:25:04 2016 +0800
     1.3 @@ -0,0 +1,1242 @@
     1.4 +/*
     1.5 + * Copyright (c) 2005, 2014, Oracle and/or its affiliates. All rights reserved.
     1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     1.7 + *
     1.8 + * This code is free software; you can redistribute it and/or modify it
     1.9 + * under the terms of the GNU General Public License version 2 only, as
    1.10 + * published by the Free Software Foundation.
    1.11 + *
    1.12 + * This code is distributed in the hope that it will be useful, but WITHOUT
    1.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    1.14 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    1.15 + * version 2 for more details (a copy is included in the LICENSE file that
    1.16 + * accompanied this code).
    1.17 + *
    1.18 + * You should have received a copy of the GNU General Public License version
    1.19 + * 2 along with this work; if not, write to the Free Software Foundation,
    1.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    1.21 + *
    1.22 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    1.23 + * or visit www.oracle.com if you need additional information or have any
    1.24 + * questions.
    1.25 + *
    1.26 + */
    1.27 +
    1.28 +#include "precompiled.hpp"
    1.29 +#include "c1/c1_Instruction.hpp"
    1.30 +#include "c1/c1_LinearScan.hpp"
    1.31 +#include "utilities/bitMap.inline.hpp"
    1.32 +
    1.33 +
    1.34 +//----------------------------------------------------------------------
    1.35 +// Allocation of FPU stack slots (Intel x86 only)
    1.36 +//----------------------------------------------------------------------
    1.37 +
    1.38 +void LinearScan::allocate_fpu_stack() {
    1.39 +  // First compute which FPU registers are live at the start of each basic block
    1.40 +  // (To minimize the amount of work we have to do if we have to merge FPU stacks)
    1.41 +  if (ComputeExactFPURegisterUsage) {
    1.42 +    Interval* intervals_in_register, *intervals_in_memory;
    1.43 +    create_unhandled_lists(&intervals_in_register, &intervals_in_memory, is_in_fpu_register, NULL);
    1.44 +
    1.45 +    // ignore memory intervals by overwriting intervals_in_memory
    1.46 +    // the dummy interval is needed to enforce the walker to walk until the given id:
    1.47 +    // without it, the walker stops when the unhandled-list is empty -> live information
    1.48 +    // beyond this point would be incorrect.
    1.49 +    Interval* dummy_interval = new Interval(any_reg);
    1.50 +    dummy_interval->add_range(max_jint - 2, max_jint - 1);
    1.51 +    dummy_interval->set_next(Interval::end());
    1.52 +    intervals_in_memory = dummy_interval;
    1.53 +
    1.54 +    IntervalWalker iw(this, intervals_in_register, intervals_in_memory);
    1.55 +
    1.56 +    const int num_blocks = block_count();
    1.57 +    for (int i = 0; i < num_blocks; i++) {
    1.58 +      BlockBegin* b = block_at(i);
    1.59 +
    1.60 +      // register usage is only needed for merging stacks -> compute only
    1.61 +      // when more than one predecessor.
    1.62 +      // the block must not have any spill moves at the beginning (checked by assertions)
    1.63 +      // spill moves would use intervals that are marked as handled and so the usage bit
    1.64 +      // would been set incorrectly
    1.65 +
    1.66 +      // NOTE: the check for number_of_preds > 1 is necessary. A block with only one
    1.67 +      //       predecessor may have spill moves at the begin of the block.
    1.68 +      //       If an interval ends at the current instruction id, it is not possible
    1.69 +      //       to decide if the register is live or not at the block begin -> the
    1.70 +      //       register information would be incorrect.
    1.71 +      if (b->number_of_preds() > 1) {
    1.72 +        int id = b->first_lir_instruction_id();
    1.73 +        BitMap regs(FrameMap::nof_fpu_regs);
    1.74 +        regs.clear();
    1.75 +
    1.76 +        iw.walk_to(id);   // walk after the first instruction (always a label) of the block
    1.77 +        assert(iw.current_position() == id, "did not walk completely to id");
    1.78 +
    1.79 +        // Only consider FPU values in registers
    1.80 +        Interval* interval = iw.active_first(fixedKind);
    1.81 +        while (interval != Interval::end()) {
    1.82 +          int reg = interval->assigned_reg();
    1.83 +          assert(reg >= pd_first_fpu_reg && reg <= pd_last_fpu_reg, "no fpu register");
    1.84 +          assert(interval->assigned_regHi() == -1, "must not have hi register (doubles stored in one register)");
    1.85 +          assert(interval->from() <= id && id < interval->to(), "interval out of range");
    1.86 +
    1.87 +#ifndef PRODUCT
    1.88 +          if (TraceFPURegisterUsage) {
    1.89 +            tty->print("fpu reg %d is live because of ", reg - pd_first_fpu_reg); interval->print();
    1.90 +          }
    1.91 +#endif
    1.92 +
    1.93 +          regs.set_bit(reg - pd_first_fpu_reg);
    1.94 +          interval = interval->next();
    1.95 +        }
    1.96 +
    1.97 +        b->set_fpu_register_usage(regs);
    1.98 +
    1.99 +#ifndef PRODUCT
   1.100 +        if (TraceFPURegisterUsage) {
   1.101 +          tty->print("FPU regs for block %d, LIR instr %d): ", b->block_id(), id); regs.print_on(tty); tty->cr();
   1.102 +        }
   1.103 +#endif
   1.104 +      }
   1.105 +    }
   1.106 +  }
   1.107 +
   1.108 +  FpuStackAllocator alloc(ir()->compilation(), this);
   1.109 +  _fpu_stack_allocator = &alloc;
   1.110 +  alloc.allocate();
   1.111 +  _fpu_stack_allocator = NULL;
   1.112 +}
   1.113 +
   1.114 +
   1.115 +FpuStackAllocator::FpuStackAllocator(Compilation* compilation, LinearScan* allocator)
   1.116 +  : _compilation(compilation)
   1.117 +  , _lir(NULL)
   1.118 +  , _pos(-1)
   1.119 +  , _allocator(allocator)
   1.120 +  , _sim(compilation)
   1.121 +  , _temp_sim(compilation)
   1.122 +{}
   1.123 +
   1.124 +void FpuStackAllocator::allocate() {
   1.125 +  int num_blocks = allocator()->block_count();
   1.126 +  for (int i = 0; i < num_blocks; i++) {
   1.127 +    // Set up to process block
   1.128 +    BlockBegin* block = allocator()->block_at(i);
   1.129 +    intArray* fpu_stack_state = block->fpu_stack_state();
   1.130 +
   1.131 +#ifndef PRODUCT
   1.132 +    if (TraceFPUStack) {
   1.133 +      tty->cr();
   1.134 +      tty->print_cr("------- Begin of new Block %d -------", block->block_id());
   1.135 +    }
   1.136 +#endif
   1.137 +
   1.138 +    assert(fpu_stack_state != NULL ||
   1.139 +           block->end()->as_Base() != NULL ||
   1.140 +           block->is_set(BlockBegin::exception_entry_flag),
   1.141 +           "FPU stack state must be present due to linear-scan order for FPU stack allocation");
   1.142 +    // note: exception handler entries always start with an empty fpu stack
   1.143 +    //       because stack merging would be too complicated
   1.144 +
   1.145 +    if (fpu_stack_state != NULL) {
   1.146 +      sim()->read_state(fpu_stack_state);
   1.147 +    } else {
   1.148 +      sim()->clear();
   1.149 +    }
   1.150 +
   1.151 +#ifndef PRODUCT
   1.152 +    if (TraceFPUStack) {
   1.153 +      tty->print("Reading FPU state for block %d:", block->block_id());
   1.154 +      sim()->print();
   1.155 +      tty->cr();
   1.156 +    }
   1.157 +#endif
   1.158 +
   1.159 +    allocate_block(block);
   1.160 +    CHECK_BAILOUT();
   1.161 +  }
   1.162 +}
   1.163 +
   1.164 +void FpuStackAllocator::allocate_block(BlockBegin* block) {
   1.165 +  bool processed_merge = false;
   1.166 +  LIR_OpList* insts = block->lir()->instructions_list();
   1.167 +  set_lir(block->lir());
   1.168 +  set_pos(0);
   1.169 +
   1.170 +
   1.171 +  // Note: insts->length() may change during loop
   1.172 +  while (pos() < insts->length()) {
   1.173 +    LIR_Op* op = insts->at(pos());
   1.174 +    _debug_information_computed = false;
   1.175 +
   1.176 +#ifndef PRODUCT
   1.177 +    if (TraceFPUStack) {
   1.178 +      op->print();
   1.179 +    }
   1.180 +    check_invalid_lir_op(op);
   1.181 +#endif
   1.182 +
   1.183 +    LIR_OpBranch* branch = op->as_OpBranch();
   1.184 +    LIR_Op1* op1 = op->as_Op1();
   1.185 +    LIR_Op2* op2 = op->as_Op2();
   1.186 +    LIR_OpCall* opCall = op->as_OpCall();
   1.187 +
   1.188 +    if (branch != NULL && branch->block() != NULL) {
   1.189 +      if (!processed_merge) {
   1.190 +        // propagate stack at first branch to a successor
   1.191 +        processed_merge = true;
   1.192 +        bool required_merge = merge_fpu_stack_with_successors(block);
   1.193 +
   1.194 +        assert(!required_merge || branch->cond() == lir_cond_always, "splitting of critical edges should prevent FPU stack mismatches at cond branches");
   1.195 +      }
   1.196 +
   1.197 +    } else if (op1 != NULL) {
   1.198 +      handle_op1(op1);
   1.199 +    } else if (op2 != NULL) {
   1.200 +      handle_op2(op2);
   1.201 +    } else if (opCall != NULL) {
   1.202 +      handle_opCall(opCall);
   1.203 +    }
   1.204 +
   1.205 +    compute_debug_information(op);
   1.206 +
   1.207 +    set_pos(1 + pos());
   1.208 +  }
   1.209 +
   1.210 +  // Propagate stack when block does not end with branch
   1.211 +  if (!processed_merge) {
   1.212 +    merge_fpu_stack_with_successors(block);
   1.213 +  }
   1.214 +}
   1.215 +
   1.216 +
   1.217 +void FpuStackAllocator::compute_debug_information(LIR_Op* op) {
   1.218 +  if (!_debug_information_computed && op->id() != -1 && allocator()->has_info(op->id())) {
   1.219 +    visitor.visit(op);
   1.220 +
   1.221 +    // exception handling
   1.222 +    if (allocator()->compilation()->has_exception_handlers()) {
   1.223 +      XHandlers* xhandlers = visitor.all_xhandler();
   1.224 +      int n = xhandlers->length();
   1.225 +      for (int k = 0; k < n; k++) {
   1.226 +        allocate_exception_handler(xhandlers->handler_at(k));
   1.227 +      }
   1.228 +    } else {
   1.229 +      assert(visitor.all_xhandler()->length() == 0, "missed exception handler");
   1.230 +    }
   1.231 +
   1.232 +    // compute debug information
   1.233 +    int n = visitor.info_count();
   1.234 +    assert(n > 0, "should not visit operation otherwise");
   1.235 +
   1.236 +    for (int j = 0; j < n; j++) {
   1.237 +      CodeEmitInfo* info = visitor.info_at(j);
   1.238 +      // Compute debug information
   1.239 +      allocator()->compute_debug_info(info, op->id());
   1.240 +    }
   1.241 +  }
   1.242 +  _debug_information_computed = true;
   1.243 +}
   1.244 +
   1.245 +void FpuStackAllocator::allocate_exception_handler(XHandler* xhandler) {
   1.246 +  if (!sim()->is_empty()) {
   1.247 +    LIR_List* old_lir = lir();
   1.248 +    int old_pos = pos();
   1.249 +    intArray* old_state = sim()->write_state();
   1.250 +
   1.251 +#ifndef PRODUCT
   1.252 +    if (TraceFPUStack) {
   1.253 +      tty->cr();
   1.254 +      tty->print_cr("------- begin of exception handler -------");
   1.255 +    }
   1.256 +#endif
   1.257 +
   1.258 +    if (xhandler->entry_code() == NULL) {
   1.259 +      // need entry code to clear FPU stack
   1.260 +      LIR_List* entry_code = new LIR_List(_compilation);
   1.261 +      entry_code->jump(xhandler->entry_block());
   1.262 +      xhandler->set_entry_code(entry_code);
   1.263 +    }
   1.264 +
   1.265 +    LIR_OpList* insts = xhandler->entry_code()->instructions_list();
   1.266 +    set_lir(xhandler->entry_code());
   1.267 +    set_pos(0);
   1.268 +
   1.269 +    // Note: insts->length() may change during loop
   1.270 +    while (pos() < insts->length()) {
   1.271 +      LIR_Op* op = insts->at(pos());
   1.272 +
   1.273 +#ifndef PRODUCT
   1.274 +      if (TraceFPUStack) {
   1.275 +        op->print();
   1.276 +      }
   1.277 +      check_invalid_lir_op(op);
   1.278 +#endif
   1.279 +
   1.280 +      switch (op->code()) {
   1.281 +        case lir_move:
   1.282 +          assert(op->as_Op1() != NULL, "must be LIR_Op1");
   1.283 +          assert(pos() != insts->length() - 1, "must not be last operation");
   1.284 +
   1.285 +          handle_op1((LIR_Op1*)op);
   1.286 +          break;
   1.287 +
   1.288 +        case lir_branch:
   1.289 +          assert(op->as_OpBranch()->cond() == lir_cond_always, "must be unconditional branch");
   1.290 +          assert(pos() == insts->length() - 1, "must be last operation");
   1.291 +
   1.292 +          // remove all remaining dead registers from FPU stack
   1.293 +          clear_fpu_stack(LIR_OprFact::illegalOpr);
   1.294 +          break;
   1.295 +
   1.296 +        default:
   1.297 +          // other operations not allowed in exception entry code
   1.298 +          ShouldNotReachHere();
   1.299 +      }
   1.300 +
   1.301 +      set_pos(pos() + 1);
   1.302 +    }
   1.303 +
   1.304 +#ifndef PRODUCT
   1.305 +    if (TraceFPUStack) {
   1.306 +      tty->cr();
   1.307 +      tty->print_cr("------- end of exception handler -------");
   1.308 +    }
   1.309 +#endif
   1.310 +
   1.311 +    set_lir(old_lir);
   1.312 +    set_pos(old_pos);
   1.313 +    sim()->read_state(old_state);
   1.314 +  }
   1.315 +}
   1.316 +
   1.317 +
   1.318 +int FpuStackAllocator::fpu_num(LIR_Opr opr) {
   1.319 +  assert(opr->is_fpu_register() && !opr->is_xmm_register(), "shouldn't call this otherwise");
   1.320 +  return opr->is_single_fpu() ? opr->fpu_regnr() : opr->fpu_regnrLo();
   1.321 +}
   1.322 +
   1.323 +int FpuStackAllocator::tos_offset(LIR_Opr opr) {
   1.324 +  return sim()->offset_from_tos(fpu_num(opr));
   1.325 +}
   1.326 +
   1.327 +
   1.328 +LIR_Opr FpuStackAllocator::to_fpu_stack(LIR_Opr opr) {
   1.329 +  assert(opr->is_fpu_register() && !opr->is_xmm_register(), "shouldn't call this otherwise");
   1.330 +
   1.331 +  int stack_offset = tos_offset(opr);
   1.332 +  if (opr->is_single_fpu()) {
   1.333 +    return LIR_OprFact::single_fpu(stack_offset)->make_fpu_stack_offset();
   1.334 +  } else {
   1.335 +    assert(opr->is_double_fpu(), "shouldn't call this otherwise");
   1.336 +    return LIR_OprFact::double_fpu(stack_offset)->make_fpu_stack_offset();
   1.337 +  }
   1.338 +}
   1.339 +
   1.340 +LIR_Opr FpuStackAllocator::to_fpu_stack_top(LIR_Opr opr, bool dont_check_offset) {
   1.341 +  assert(opr->is_fpu_register() && !opr->is_xmm_register(), "shouldn't call this otherwise");
   1.342 +  assert(dont_check_offset || tos_offset(opr) == 0, "operand is not on stack top");
   1.343 +
   1.344 +  int stack_offset = 0;
   1.345 +  if (opr->is_single_fpu()) {
   1.346 +    return LIR_OprFact::single_fpu(stack_offset)->make_fpu_stack_offset();
   1.347 +  } else {
   1.348 +    assert(opr->is_double_fpu(), "shouldn't call this otherwise");
   1.349 +    return LIR_OprFact::double_fpu(stack_offset)->make_fpu_stack_offset();
   1.350 +  }
   1.351 +}
   1.352 +
   1.353 +
   1.354 +
   1.355 +void FpuStackAllocator::insert_op(LIR_Op* op) {
   1.356 +  lir()->insert_before(pos(), op);
   1.357 +  set_pos(1 + pos());
   1.358 +}
   1.359 +
   1.360 +
   1.361 +void FpuStackAllocator::insert_exchange(int offset) {
   1.362 +  if (offset > 0) {
   1.363 +    LIR_Op1* fxch_op = new LIR_Op1(lir_fxch, LIR_OprFact::intConst(offset), LIR_OprFact::illegalOpr);
   1.364 +    insert_op(fxch_op);
   1.365 +    sim()->swap(offset);
   1.366 +
   1.367 +#ifndef PRODUCT
   1.368 +    if (TraceFPUStack) {
   1.369 +      tty->print("Exchanged register: %d         New state: ", sim()->get_slot(0)); sim()->print(); tty->cr();
   1.370 +    }
   1.371 +#endif
   1.372 +
   1.373 +  }
   1.374 +}
   1.375 +
   1.376 +void FpuStackAllocator::insert_exchange(LIR_Opr opr) {
   1.377 +  insert_exchange(tos_offset(opr));
   1.378 +}
   1.379 +
   1.380 +
   1.381 +void FpuStackAllocator::insert_free(int offset) {
   1.382 +  // move stack slot to the top of stack and then pop it
   1.383 +  insert_exchange(offset);
   1.384 +
   1.385 +  LIR_Op* fpop = new LIR_Op0(lir_fpop_raw);
   1.386 +  insert_op(fpop);
   1.387 +  sim()->pop();
   1.388 +
   1.389 +#ifndef PRODUCT
   1.390 +    if (TraceFPUStack) {
   1.391 +      tty->print("Inserted pop                   New state: "); sim()->print(); tty->cr();
   1.392 +    }
   1.393 +#endif
   1.394 +}
   1.395 +
   1.396 +
   1.397 +void FpuStackAllocator::insert_free_if_dead(LIR_Opr opr) {
   1.398 +  if (sim()->contains(fpu_num(opr))) {
   1.399 +    int res_slot = tos_offset(opr);
   1.400 +    insert_free(res_slot);
   1.401 +  }
   1.402 +}
   1.403 +
   1.404 +void FpuStackAllocator::insert_free_if_dead(LIR_Opr opr, LIR_Opr ignore) {
   1.405 +  if (fpu_num(opr) != fpu_num(ignore) && sim()->contains(fpu_num(opr))) {
   1.406 +    int res_slot = tos_offset(opr);
   1.407 +    insert_free(res_slot);
   1.408 +  }
   1.409 +}
   1.410 +
   1.411 +void FpuStackAllocator::insert_copy(LIR_Opr from, LIR_Opr to) {
   1.412 +  int offset = tos_offset(from);
   1.413 +  LIR_Op1* fld = new LIR_Op1(lir_fld, LIR_OprFact::intConst(offset), LIR_OprFact::illegalOpr);
   1.414 +  insert_op(fld);
   1.415 +
   1.416 +  sim()->push(fpu_num(to));
   1.417 +
   1.418 +#ifndef PRODUCT
   1.419 +  if (TraceFPUStack) {
   1.420 +    tty->print("Inserted copy (%d -> %d)         New state: ", fpu_num(from), fpu_num(to)); sim()->print(); tty->cr();
   1.421 +  }
   1.422 +#endif
   1.423 +}
   1.424 +
   1.425 +void FpuStackAllocator::do_rename(LIR_Opr from, LIR_Opr to) {
   1.426 +  sim()->rename(fpu_num(from), fpu_num(to));
   1.427 +}
   1.428 +
   1.429 +void FpuStackAllocator::do_push(LIR_Opr opr) {
   1.430 +  sim()->push(fpu_num(opr));
   1.431 +}
   1.432 +
   1.433 +void FpuStackAllocator::pop_if_last_use(LIR_Op* op, LIR_Opr opr) {
   1.434 +  assert(op->fpu_pop_count() == 0, "fpu_pop_count alredy set");
   1.435 +  assert(tos_offset(opr) == 0, "can only pop stack top");
   1.436 +
   1.437 +  if (opr->is_last_use()) {
   1.438 +    op->set_fpu_pop_count(1);
   1.439 +    sim()->pop();
   1.440 +  }
   1.441 +}
   1.442 +
   1.443 +void FpuStackAllocator::pop_always(LIR_Op* op, LIR_Opr opr) {
   1.444 +  assert(op->fpu_pop_count() == 0, "fpu_pop_count alredy set");
   1.445 +  assert(tos_offset(opr) == 0, "can only pop stack top");
   1.446 +
   1.447 +  op->set_fpu_pop_count(1);
   1.448 +  sim()->pop();
   1.449 +}
   1.450 +
   1.451 +void FpuStackAllocator::clear_fpu_stack(LIR_Opr preserve) {
   1.452 +  int result_stack_size = (preserve->is_fpu_register() && !preserve->is_xmm_register() ? 1 : 0);
   1.453 +  while (sim()->stack_size() > result_stack_size) {
   1.454 +    assert(!sim()->slot_is_empty(0), "not allowed");
   1.455 +
   1.456 +    if (result_stack_size == 0 || sim()->get_slot(0) != fpu_num(preserve)) {
   1.457 +      insert_free(0);
   1.458 +    } else {
   1.459 +      // move "preserve" to bottom of stack so that all other stack slots can be popped
   1.460 +      insert_exchange(sim()->stack_size() - 1);
   1.461 +    }
   1.462 +  }
   1.463 +}
   1.464 +
   1.465 +
   1.466 +void FpuStackAllocator::handle_op1(LIR_Op1* op1) {
   1.467 +  LIR_Opr in  = op1->in_opr();
   1.468 +  LIR_Opr res = op1->result_opr();
   1.469 +
   1.470 +  LIR_Opr new_in  = in;  // new operands relative to the actual fpu stack top
   1.471 +  LIR_Opr new_res = res;
   1.472 +
   1.473 +  // Note: this switch is processed for all LIR_Op1, regardless if they have FPU-arguments,
   1.474 +  //       so checks for is_float_kind() are necessary inside the cases
   1.475 +  switch (op1->code()) {
   1.476 +
   1.477 +    case lir_return: {
   1.478 +      // FPU-Stack must only contain the (optional) fpu return value.
   1.479 +      // All remaining dead values are popped from the stack
   1.480 +      // If the input operand is a fpu-register, it is exchanged to the bottom of the stack
   1.481 +
   1.482 +      clear_fpu_stack(in);
   1.483 +      if (in->is_fpu_register() && !in->is_xmm_register()) {
   1.484 +        new_in = to_fpu_stack_top(in);
   1.485 +      }
   1.486 +
   1.487 +      break;
   1.488 +    }
   1.489 +
   1.490 +    case lir_move: {
   1.491 +      if (in->is_fpu_register() && !in->is_xmm_register()) {
   1.492 +        if (res->is_xmm_register()) {
   1.493 +          // move from fpu register to xmm register (necessary for operations that
   1.494 +          // are not available in the SSE instruction set)
   1.495 +          insert_exchange(in);
   1.496 +          new_in = to_fpu_stack_top(in);
   1.497 +          pop_always(op1, in);
   1.498 +
   1.499 +        } else if (res->is_fpu_register() && !res->is_xmm_register()) {
   1.500 +          // move from fpu-register to fpu-register:
   1.501 +          // * input and result register equal:
   1.502 +          //   nothing to do
   1.503 +          // * input register is last use:
   1.504 +          //   rename the input register to result register -> input register
   1.505 +          //   not present on fpu-stack afterwards
   1.506 +          // * input register not last use:
   1.507 +          //   duplicate input register to result register to preserve input
   1.508 +          //
   1.509 +          // Note: The LIR-Assembler does not produce any code for fpu register moves,
   1.510 +          //       so input and result stack index must be equal
   1.511 +
   1.512 +          if (fpu_num(in) == fpu_num(res)) {
   1.513 +            // nothing to do
   1.514 +          } else if (in->is_last_use()) {
   1.515 +            insert_free_if_dead(res);//, in);
   1.516 +            do_rename(in, res);
   1.517 +          } else {
   1.518 +            insert_free_if_dead(res);
   1.519 +            insert_copy(in, res);
   1.520 +          }
   1.521 +          new_in = to_fpu_stack(res);
   1.522 +          new_res = new_in;
   1.523 +
   1.524 +        } else {
   1.525 +          // move from fpu-register to memory
   1.526 +          // input operand must be on top of stack
   1.527 +
   1.528 +          insert_exchange(in);
   1.529 +
   1.530 +          // create debug information here because afterwards the register may have been popped
   1.531 +          compute_debug_information(op1);
   1.532 +
   1.533 +          new_in = to_fpu_stack_top(in);
   1.534 +          pop_if_last_use(op1, in);
   1.535 +        }
   1.536 +
   1.537 +      } else if (res->is_fpu_register() && !res->is_xmm_register()) {
   1.538 +        // move from memory/constant to fpu register
   1.539 +        // result is pushed on the stack
   1.540 +
   1.541 +        insert_free_if_dead(res);
   1.542 +
   1.543 +        // create debug information before register is pushed
   1.544 +        compute_debug_information(op1);
   1.545 +
   1.546 +        do_push(res);
   1.547 +        new_res = to_fpu_stack_top(res);
   1.548 +      }
   1.549 +      break;
   1.550 +    }
   1.551 +
   1.552 +    case lir_neg: {
   1.553 +      if (in->is_fpu_register() && !in->is_xmm_register()) {
   1.554 +        assert(res->is_fpu_register() && !res->is_xmm_register(), "must be");
   1.555 +        assert(in->is_last_use(), "old value gets destroyed");
   1.556 +
   1.557 +        insert_free_if_dead(res, in);
   1.558 +        insert_exchange(in);
   1.559 +        new_in = to_fpu_stack_top(in);
   1.560 +
   1.561 +        do_rename(in, res);
   1.562 +        new_res = to_fpu_stack_top(res);
   1.563 +      }
   1.564 +      break;
   1.565 +    }
   1.566 +
   1.567 +    case lir_convert: {
   1.568 +      Bytecodes::Code bc = op1->as_OpConvert()->bytecode();
   1.569 +      switch (bc) {
   1.570 +        case Bytecodes::_d2f:
   1.571 +        case Bytecodes::_f2d:
   1.572 +          assert(res->is_fpu_register(), "must be");
   1.573 +          assert(in->is_fpu_register(), "must be");
   1.574 +
   1.575 +          if (!in->is_xmm_register() && !res->is_xmm_register()) {
   1.576 +            // this is quite the same as a move from fpu-register to fpu-register
   1.577 +            // Note: input and result operands must have different types
   1.578 +            if (fpu_num(in) == fpu_num(res)) {
   1.579 +              // nothing to do
   1.580 +              new_in = to_fpu_stack(in);
   1.581 +            } else if (in->is_last_use()) {
   1.582 +              insert_free_if_dead(res);//, in);
   1.583 +              new_in = to_fpu_stack(in);
   1.584 +              do_rename(in, res);
   1.585 +            } else {
   1.586 +              insert_free_if_dead(res);
   1.587 +              insert_copy(in, res);
   1.588 +              new_in = to_fpu_stack_top(in, true);
   1.589 +            }
   1.590 +            new_res = to_fpu_stack(res);
   1.591 +          }
   1.592 +
   1.593 +          break;
   1.594 +
   1.595 +        case Bytecodes::_i2f:
   1.596 +        case Bytecodes::_l2f:
   1.597 +        case Bytecodes::_i2d:
   1.598 +        case Bytecodes::_l2d:
   1.599 +          assert(res->is_fpu_register(), "must be");
   1.600 +          if (!res->is_xmm_register()) {
   1.601 +            insert_free_if_dead(res);
   1.602 +            do_push(res);
   1.603 +            new_res = to_fpu_stack_top(res);
   1.604 +          }
   1.605 +          break;
   1.606 +
   1.607 +        case Bytecodes::_f2i:
   1.608 +        case Bytecodes::_d2i:
   1.609 +          assert(in->is_fpu_register(), "must be");
   1.610 +          if (!in->is_xmm_register()) {
   1.611 +            insert_exchange(in);
   1.612 +            new_in = to_fpu_stack_top(in);
   1.613 +
   1.614 +            // TODO: update registes of stub
   1.615 +          }
   1.616 +          break;
   1.617 +
   1.618 +        case Bytecodes::_f2l:
   1.619 +        case Bytecodes::_d2l:
   1.620 +          assert(in->is_fpu_register(), "must be");
   1.621 +          if (!in->is_xmm_register()) {
   1.622 +            insert_exchange(in);
   1.623 +            new_in = to_fpu_stack_top(in);
   1.624 +            pop_always(op1, in);
   1.625 +          }
   1.626 +          break;
   1.627 +
   1.628 +        case Bytecodes::_i2l:
   1.629 +        case Bytecodes::_l2i:
   1.630 +        case Bytecodes::_i2b:
   1.631 +        case Bytecodes::_i2c:
   1.632 +        case Bytecodes::_i2s:
   1.633 +          // no fpu operands
   1.634 +          break;
   1.635 +
   1.636 +        default:
   1.637 +          ShouldNotReachHere();
   1.638 +      }
   1.639 +      break;
   1.640 +    }
   1.641 +
   1.642 +    case lir_roundfp: {
   1.643 +      assert(in->is_fpu_register() && !in->is_xmm_register(), "input must be in register");
   1.644 +      assert(res->is_stack(), "result must be on stack");
   1.645 +
   1.646 +      insert_exchange(in);
   1.647 +      new_in = to_fpu_stack_top(in);
   1.648 +      pop_if_last_use(op1, in);
   1.649 +      break;
   1.650 +    }
   1.651 +
   1.652 +    default: {
   1.653 +      assert(!in->is_float_kind() && !res->is_float_kind(), "missed a fpu-operation");
   1.654 +    }
   1.655 +  }
   1.656 +
   1.657 +  op1->set_in_opr(new_in);
   1.658 +  op1->set_result_opr(new_res);
   1.659 +}
   1.660 +
   1.661 +void FpuStackAllocator::handle_op2(LIR_Op2* op2) {
   1.662 +  LIR_Opr left  = op2->in_opr1();
   1.663 +  if (!left->is_float_kind()) {
   1.664 +    return;
   1.665 +  }
   1.666 +  if (left->is_xmm_register()) {
   1.667 +    return;
   1.668 +  }
   1.669 +
   1.670 +  LIR_Opr right = op2->in_opr2();
   1.671 +  LIR_Opr res   = op2->result_opr();
   1.672 +  LIR_Opr new_left  = left;  // new operands relative to the actual fpu stack top
   1.673 +  LIR_Opr new_right = right;
   1.674 +  LIR_Opr new_res   = res;
   1.675 +
   1.676 +  assert(!left->is_xmm_register() && !right->is_xmm_register() && !res->is_xmm_register(), "not for xmm registers");
   1.677 +
   1.678 +  switch (op2->code()) {
   1.679 +    case lir_cmp:
   1.680 +    case lir_cmp_fd2i:
   1.681 +    case lir_ucmp_fd2i:
   1.682 +    case lir_assert: {
   1.683 +      assert(left->is_fpu_register(), "invalid LIR");
   1.684 +      assert(right->is_fpu_register(), "invalid LIR");
   1.685 +
   1.686 +      // the left-hand side must be on top of stack.
   1.687 +      // the right-hand side is never popped, even if is_last_use is set
   1.688 +      insert_exchange(left);
   1.689 +      new_left = to_fpu_stack_top(left);
   1.690 +      new_right = to_fpu_stack(right);
   1.691 +      pop_if_last_use(op2, left);
   1.692 +      break;
   1.693 +    }
   1.694 +
   1.695 +    case lir_mul_strictfp:
   1.696 +    case lir_div_strictfp: {
   1.697 +      assert(op2->tmp1_opr()->is_fpu_register(), "strict operations need temporary fpu stack slot");
   1.698 +      insert_free_if_dead(op2->tmp1_opr());
   1.699 +      assert(sim()->stack_size() <= 7, "at least one stack slot must be free");
   1.700 +      // fall-through: continue with the normal handling of lir_mul and lir_div
   1.701 +    }
   1.702 +    case lir_add:
   1.703 +    case lir_sub:
   1.704 +    case lir_mul:
   1.705 +    case lir_div: {
   1.706 +      assert(left->is_fpu_register(), "must be");
   1.707 +      assert(res->is_fpu_register(), "must be");
   1.708 +      assert(left->is_equal(res), "must be");
   1.709 +
   1.710 +      // either the left-hand or the right-hand side must be on top of stack
   1.711 +      // (if right is not a register, left must be on top)
   1.712 +      if (!right->is_fpu_register()) {
   1.713 +        insert_exchange(left);
   1.714 +        new_left = to_fpu_stack_top(left);
   1.715 +      } else {
   1.716 +        // no exchange necessary if right is alredy on top of stack
   1.717 +        if (tos_offset(right) == 0) {
   1.718 +          new_left = to_fpu_stack(left);
   1.719 +          new_right = to_fpu_stack_top(right);
   1.720 +        } else {
   1.721 +          insert_exchange(left);
   1.722 +          new_left = to_fpu_stack_top(left);
   1.723 +          new_right = to_fpu_stack(right);
   1.724 +        }
   1.725 +
   1.726 +        if (right->is_last_use()) {
   1.727 +          op2->set_fpu_pop_count(1);
   1.728 +
   1.729 +          if (tos_offset(right) == 0) {
   1.730 +            sim()->pop();
   1.731 +          } else {
   1.732 +            // if left is on top of stack, the result is placed in the stack
   1.733 +            // slot of right, so a renaming from right to res is necessary
   1.734 +            assert(tos_offset(left) == 0, "must be");
   1.735 +            sim()->pop();
   1.736 +            do_rename(right, res);
   1.737 +          }
   1.738 +        }
   1.739 +      }
   1.740 +      new_res = to_fpu_stack(res);
   1.741 +
   1.742 +      break;
   1.743 +    }
   1.744 +
   1.745 +    case lir_rem: {
   1.746 +      assert(left->is_fpu_register(), "must be");
   1.747 +      assert(right->is_fpu_register(), "must be");
   1.748 +      assert(res->is_fpu_register(), "must be");
   1.749 +      assert(left->is_equal(res), "must be");
   1.750 +
   1.751 +      // Must bring both operands to top of stack with following operand ordering:
   1.752 +      // * fpu stack before rem: ... right left
   1.753 +      // * fpu stack after rem:  ... left
   1.754 +      if (tos_offset(right) != 1) {
   1.755 +        insert_exchange(right);
   1.756 +        insert_exchange(1);
   1.757 +      }
   1.758 +      insert_exchange(left);
   1.759 +      assert(tos_offset(right) == 1, "check");
   1.760 +      assert(tos_offset(left) == 0, "check");
   1.761 +
   1.762 +      new_left = to_fpu_stack_top(left);
   1.763 +      new_right = to_fpu_stack(right);
   1.764 +
   1.765 +      op2->set_fpu_pop_count(1);
   1.766 +      sim()->pop();
   1.767 +      do_rename(right, res);
   1.768 +
   1.769 +      new_res = to_fpu_stack_top(res);
   1.770 +      break;
   1.771 +    }
   1.772 +
   1.773 +    case lir_abs:
   1.774 +    case lir_sqrt: {
   1.775 +      // Right argument appears to be unused
   1.776 +      assert(right->is_illegal(), "must be");
   1.777 +      assert(left->is_fpu_register(), "must be");
   1.778 +      assert(res->is_fpu_register(), "must be");
   1.779 +      assert(left->is_last_use(), "old value gets destroyed");
   1.780 +
   1.781 +      insert_free_if_dead(res, left);
   1.782 +      insert_exchange(left);
   1.783 +      do_rename(left, res);
   1.784 +
   1.785 +      new_left = to_fpu_stack_top(res);
   1.786 +      new_res = new_left;
   1.787 +
   1.788 +      op2->set_fpu_stack_size(sim()->stack_size());
   1.789 +      break;
   1.790 +    }
   1.791 +
   1.792 +    case lir_log:
   1.793 +    case lir_log10: {
   1.794 +      // log and log10 need one temporary fpu stack slot, so
   1.795 +      // there is one temporary registers stored in temp of the
   1.796 +      // operation. the stack allocator must guarantee that the stack
   1.797 +      // slots are really free, otherwise there might be a stack
   1.798 +      // overflow.
   1.799 +      assert(right->is_illegal(), "must be");
   1.800 +      assert(left->is_fpu_register(), "must be");
   1.801 +      assert(res->is_fpu_register(), "must be");
   1.802 +      assert(op2->tmp1_opr()->is_fpu_register(), "must be");
   1.803 +
   1.804 +      insert_free_if_dead(op2->tmp1_opr());
   1.805 +      insert_free_if_dead(res, left);
   1.806 +      insert_exchange(left);
   1.807 +      do_rename(left, res);
   1.808 +
   1.809 +      new_left = to_fpu_stack_top(res);
   1.810 +      new_res = new_left;
   1.811 +
   1.812 +      op2->set_fpu_stack_size(sim()->stack_size());
   1.813 +      assert(sim()->stack_size() <= 7, "at least one stack slot must be free");
   1.814 +      break;
   1.815 +    }
   1.816 +
   1.817 +
   1.818 +    case lir_tan:
   1.819 +    case lir_sin:
   1.820 +    case lir_cos:
   1.821 +    case lir_exp: {
   1.822 +      // sin, cos and exp need two temporary fpu stack slots, so there are two temporary
   1.823 +      // registers (stored in right and temp of the operation).
   1.824 +      // the stack allocator must guarantee that the stack slots are really free,
   1.825 +      // otherwise there might be a stack overflow.
   1.826 +      assert(left->is_fpu_register(), "must be");
   1.827 +      assert(res->is_fpu_register(), "must be");
   1.828 +      // assert(left->is_last_use(), "old value gets destroyed");
   1.829 +      assert(right->is_fpu_register(), "right is used as the first temporary register");
   1.830 +      assert(op2->tmp1_opr()->is_fpu_register(), "temp is used as the second temporary register");
   1.831 +      assert(fpu_num(left) != fpu_num(right) && fpu_num(right) != fpu_num(op2->tmp1_opr()) && fpu_num(op2->tmp1_opr()) != fpu_num(res), "need distinct temp registers");
   1.832 +
   1.833 +      insert_free_if_dead(right);
   1.834 +      insert_free_if_dead(op2->tmp1_opr());
   1.835 +
   1.836 +      insert_free_if_dead(res, left);
   1.837 +      insert_exchange(left);
   1.838 +      do_rename(left, res);
   1.839 +
   1.840 +      new_left = to_fpu_stack_top(res);
   1.841 +      new_res = new_left;
   1.842 +
   1.843 +      op2->set_fpu_stack_size(sim()->stack_size());
   1.844 +      assert(sim()->stack_size() <= 6, "at least two stack slots must be free");
   1.845 +      break;
   1.846 +    }
   1.847 +
   1.848 +    case lir_pow: {
   1.849 +      // pow needs two temporary fpu stack slots, so there are two temporary
   1.850 +      // registers (stored in tmp1 and tmp2 of the operation).
   1.851 +      // the stack allocator must guarantee that the stack slots are really free,
   1.852 +      // otherwise there might be a stack overflow.
   1.853 +      assert(left->is_fpu_register(), "must be");
   1.854 +      assert(right->is_fpu_register(), "must be");
   1.855 +      assert(res->is_fpu_register(), "must be");
   1.856 +
   1.857 +      assert(op2->tmp1_opr()->is_fpu_register(), "tmp1 is the first temporary register");
   1.858 +      assert(op2->tmp2_opr()->is_fpu_register(), "tmp2 is the second temporary register");
   1.859 +      assert(fpu_num(left) != fpu_num(right) && fpu_num(left) != fpu_num(op2->tmp1_opr()) && fpu_num(left) != fpu_num(op2->tmp2_opr()) && fpu_num(left) != fpu_num(res), "need distinct temp registers");
   1.860 +      assert(fpu_num(right) != fpu_num(op2->tmp1_opr()) && fpu_num(right) != fpu_num(op2->tmp2_opr()) && fpu_num(right) != fpu_num(res), "need distinct temp registers");
   1.861 +      assert(fpu_num(op2->tmp1_opr()) != fpu_num(op2->tmp2_opr()) && fpu_num(op2->tmp1_opr()) != fpu_num(res), "need distinct temp registers");
   1.862 +      assert(fpu_num(op2->tmp2_opr()) != fpu_num(res), "need distinct temp registers");
   1.863 +
   1.864 +      insert_free_if_dead(op2->tmp1_opr());
   1.865 +      insert_free_if_dead(op2->tmp2_opr());
   1.866 +
   1.867 +      // Must bring both operands to top of stack with following operand ordering:
   1.868 +      // * fpu stack before pow: ... right left
   1.869 +      // * fpu stack after pow:  ... left
   1.870 +
   1.871 +      insert_free_if_dead(res, right);
   1.872 +
   1.873 +      if (tos_offset(right) != 1) {
   1.874 +        insert_exchange(right);
   1.875 +        insert_exchange(1);
   1.876 +      }
   1.877 +      insert_exchange(left);
   1.878 +      assert(tos_offset(right) == 1, "check");
   1.879 +      assert(tos_offset(left) == 0, "check");
   1.880 +
   1.881 +      new_left = to_fpu_stack_top(left);
   1.882 +      new_right = to_fpu_stack(right);
   1.883 +
   1.884 +      op2->set_fpu_stack_size(sim()->stack_size());
   1.885 +      assert(sim()->stack_size() <= 6, "at least two stack slots must be free");
   1.886 +
   1.887 +      sim()->pop();
   1.888 +
   1.889 +      do_rename(right, res);
   1.890 +
   1.891 +      new_res = to_fpu_stack_top(res);
   1.892 +      break;
   1.893 +    }
   1.894 +
   1.895 +    default: {
   1.896 +      assert(false, "missed a fpu-operation");
   1.897 +    }
   1.898 +  }
   1.899 +
   1.900 +  op2->set_in_opr1(new_left);
   1.901 +  op2->set_in_opr2(new_right);
   1.902 +  op2->set_result_opr(new_res);
   1.903 +}
   1.904 +
   1.905 +void FpuStackAllocator::handle_opCall(LIR_OpCall* opCall) {
   1.906 +  LIR_Opr res = opCall->result_opr();
   1.907 +
   1.908 +  // clear fpu-stack before call
   1.909 +  // it may contain dead values that could not have been remved by previous operations
   1.910 +  clear_fpu_stack(LIR_OprFact::illegalOpr);
   1.911 +  assert(sim()->is_empty(), "fpu stack must be empty now");
   1.912 +
   1.913 +  // compute debug information before (possible) fpu result is pushed
   1.914 +  compute_debug_information(opCall);
   1.915 +
   1.916 +  if (res->is_fpu_register() && !res->is_xmm_register()) {
   1.917 +    do_push(res);
   1.918 +    opCall->set_result_opr(to_fpu_stack_top(res));
   1.919 +  }
   1.920 +}
   1.921 +
   1.922 +#ifndef PRODUCT
   1.923 +void FpuStackAllocator::check_invalid_lir_op(LIR_Op* op) {
   1.924 +  switch (op->code()) {
   1.925 +    case lir_24bit_FPU:
   1.926 +    case lir_reset_FPU:
   1.927 +    case lir_ffree:
   1.928 +      assert(false, "operations not allowed in lir. If one of these operations is needed, check if they have fpu operands");
   1.929 +      break;
   1.930 +
   1.931 +    case lir_fpop_raw:
   1.932 +    case lir_fxch:
   1.933 +    case lir_fld:
   1.934 +      assert(false, "operations only inserted by FpuStackAllocator");
   1.935 +      break;
   1.936 +  }
   1.937 +}
   1.938 +#endif
   1.939 +
   1.940 +
   1.941 +void FpuStackAllocator::merge_insert_add(LIR_List* instrs, FpuStackSim* cur_sim, int reg) {
   1.942 +  LIR_Op1* move = new LIR_Op1(lir_move, LIR_OprFact::doubleConst(0), LIR_OprFact::double_fpu(reg)->make_fpu_stack_offset());
   1.943 +
   1.944 +  instrs->instructions_list()->push(move);
   1.945 +
   1.946 +  cur_sim->push(reg);
   1.947 +  move->set_result_opr(to_fpu_stack(move->result_opr()));
   1.948 +
   1.949 +  #ifndef PRODUCT
   1.950 +    if (TraceFPUStack) {
   1.951 +      tty->print("Added new register: %d         New state: ", reg); cur_sim->print(); tty->cr();
   1.952 +    }
   1.953 +  #endif
   1.954 +}
   1.955 +
   1.956 +void FpuStackAllocator::merge_insert_xchg(LIR_List* instrs, FpuStackSim* cur_sim, int slot) {
   1.957 +  assert(slot > 0, "no exchange necessary");
   1.958 +
   1.959 +  LIR_Op1* fxch = new LIR_Op1(lir_fxch, LIR_OprFact::intConst(slot));
   1.960 +  instrs->instructions_list()->push(fxch);
   1.961 +  cur_sim->swap(slot);
   1.962 +
   1.963 +  #ifndef PRODUCT
   1.964 +    if (TraceFPUStack) {
   1.965 +      tty->print("Exchanged register: %d         New state: ", cur_sim->get_slot(slot)); cur_sim->print(); tty->cr();
   1.966 +    }
   1.967 +  #endif
   1.968 +}
   1.969 +
   1.970 +void FpuStackAllocator::merge_insert_pop(LIR_List* instrs, FpuStackSim* cur_sim) {
   1.971 +  int reg = cur_sim->get_slot(0);
   1.972 +
   1.973 +  LIR_Op* fpop = new LIR_Op0(lir_fpop_raw);
   1.974 +  instrs->instructions_list()->push(fpop);
   1.975 +  cur_sim->pop(reg);
   1.976 +
   1.977 +  #ifndef PRODUCT
   1.978 +    if (TraceFPUStack) {
   1.979 +      tty->print("Removed register: %d           New state: ", reg); cur_sim->print(); tty->cr();
   1.980 +    }
   1.981 +  #endif
   1.982 +}
   1.983 +
   1.984 +bool FpuStackAllocator::merge_rename(FpuStackSim* cur_sim, FpuStackSim* sux_sim, int start_slot, int change_slot) {
   1.985 +  int reg = cur_sim->get_slot(change_slot);
   1.986 +
   1.987 +  for (int slot = start_slot; slot >= 0; slot--) {
   1.988 +    int new_reg = sux_sim->get_slot(slot);
   1.989 +
   1.990 +    if (!cur_sim->contains(new_reg)) {
   1.991 +      cur_sim->set_slot(change_slot, new_reg);
   1.992 +
   1.993 +      #ifndef PRODUCT
   1.994 +        if (TraceFPUStack) {
   1.995 +          tty->print("Renamed register %d to %d       New state: ", reg, new_reg); cur_sim->print(); tty->cr();
   1.996 +        }
   1.997 +      #endif
   1.998 +
   1.999 +      return true;
  1.1000 +    }
  1.1001 +  }
  1.1002 +  return false;
  1.1003 +}
  1.1004 +
  1.1005 +
  1.1006 +void FpuStackAllocator::merge_fpu_stack(LIR_List* instrs, FpuStackSim* cur_sim, FpuStackSim* sux_sim) {
  1.1007 +#ifndef PRODUCT
  1.1008 +  if (TraceFPUStack) {
  1.1009 +    tty->cr();
  1.1010 +    tty->print("before merging: pred: "); cur_sim->print(); tty->cr();
  1.1011 +    tty->print("                 sux: "); sux_sim->print(); tty->cr();
  1.1012 +  }
  1.1013 +
  1.1014 +  int slot;
  1.1015 +  for (slot = 0; slot < cur_sim->stack_size(); slot++) {
  1.1016 +    assert(!cur_sim->slot_is_empty(slot), "not handled by algorithm");
  1.1017 +  }
  1.1018 +  for (slot = 0; slot < sux_sim->stack_size(); slot++) {
  1.1019 +    assert(!sux_sim->slot_is_empty(slot), "not handled by algorithm");
  1.1020 +  }
  1.1021 +#endif
  1.1022 +
  1.1023 +  // size difference between cur and sux that must be resolved by adding or removing values form the stack
  1.1024 +  int size_diff = cur_sim->stack_size() - sux_sim->stack_size();
  1.1025 +
  1.1026 +  if (!ComputeExactFPURegisterUsage) {
  1.1027 +    // add slots that are currently free, but used in successor
  1.1028 +    // When the exact FPU register usage is computed, the stack does
  1.1029 +    // not contain dead values at merging -> no values must be added
  1.1030 +
  1.1031 +    int sux_slot = sux_sim->stack_size() - 1;
  1.1032 +    while (size_diff < 0) {
  1.1033 +      assert(sux_slot >= 0, "slot out of bounds -> error in algorithm");
  1.1034 +
  1.1035 +      int reg = sux_sim->get_slot(sux_slot);
  1.1036 +      if (!cur_sim->contains(reg)) {
  1.1037 +        merge_insert_add(instrs, cur_sim, reg);
  1.1038 +        size_diff++;
  1.1039 +
  1.1040 +        if (sux_slot + size_diff != 0) {
  1.1041 +          merge_insert_xchg(instrs, cur_sim, sux_slot + size_diff);
  1.1042 +        }
  1.1043 +      }
  1.1044 +     sux_slot--;
  1.1045 +    }
  1.1046 +  }
  1.1047 +
  1.1048 +  assert(cur_sim->stack_size() >= sux_sim->stack_size(), "stack size must be equal or greater now");
  1.1049 +  assert(size_diff == cur_sim->stack_size() - sux_sim->stack_size(), "must be");
  1.1050 +
  1.1051 +  // stack merge algorithm:
  1.1052 +  // 1) as long as the current stack top is not in the right location (that meens
  1.1053 +  //    it should not be on the stack top), exchange it into the right location
  1.1054 +  // 2) if the stack top is right, but the remaining stack is not ordered correctly,
  1.1055 +  //    the stack top is exchanged away to get another value on top ->
  1.1056 +  //    now step 1) can be continued
  1.1057 +  // the stack can also contain unused items -> these items are removed from stack
  1.1058 +
  1.1059 +  int finished_slot = sux_sim->stack_size() - 1;
  1.1060 +  while (finished_slot >= 0 || size_diff > 0) {
  1.1061 +    while (size_diff > 0 || (cur_sim->stack_size() > 0 && cur_sim->get_slot(0) != sux_sim->get_slot(0))) {
  1.1062 +      int reg = cur_sim->get_slot(0);
  1.1063 +      if (sux_sim->contains(reg)) {
  1.1064 +        int sux_slot = sux_sim->offset_from_tos(reg);
  1.1065 +        merge_insert_xchg(instrs, cur_sim, sux_slot + size_diff);
  1.1066 +
  1.1067 +      } else if (!merge_rename(cur_sim, sux_sim, finished_slot, 0)) {
  1.1068 +        assert(size_diff > 0, "must be");
  1.1069 +
  1.1070 +        merge_insert_pop(instrs, cur_sim);
  1.1071 +        size_diff--;
  1.1072 +      }
  1.1073 +      assert(cur_sim->stack_size() == 0 || cur_sim->get_slot(0) != reg, "register must have been changed");
  1.1074 +    }
  1.1075 +
  1.1076 +    while (finished_slot >= 0 && cur_sim->get_slot(finished_slot) == sux_sim->get_slot(finished_slot)) {
  1.1077 +      finished_slot--;
  1.1078 +    }
  1.1079 +
  1.1080 +    if (finished_slot >= 0) {
  1.1081 +      int reg = cur_sim->get_slot(finished_slot);
  1.1082 +
  1.1083 +      if (sux_sim->contains(reg) || !merge_rename(cur_sim, sux_sim, finished_slot, finished_slot)) {
  1.1084 +        assert(sux_sim->contains(reg) || size_diff > 0, "must be");
  1.1085 +        merge_insert_xchg(instrs, cur_sim, finished_slot);
  1.1086 +      }
  1.1087 +      assert(cur_sim->get_slot(finished_slot) != reg, "register must have been changed");
  1.1088 +    }
  1.1089 +  }
  1.1090 +
  1.1091 +#ifndef PRODUCT
  1.1092 +  if (TraceFPUStack) {
  1.1093 +    tty->print("after merging:  pred: "); cur_sim->print(); tty->cr();
  1.1094 +    tty->print("                 sux: "); sux_sim->print(); tty->cr();
  1.1095 +    tty->cr();
  1.1096 +  }
  1.1097 +#endif
  1.1098 +  assert(cur_sim->stack_size() == sux_sim->stack_size(), "stack size must be equal now");
  1.1099 +}
  1.1100 +
  1.1101 +
  1.1102 +void FpuStackAllocator::merge_cleanup_fpu_stack(LIR_List* instrs, FpuStackSim* cur_sim, BitMap& live_fpu_regs) {
  1.1103 +#ifndef PRODUCT
  1.1104 +  if (TraceFPUStack) {
  1.1105 +    tty->cr();
  1.1106 +    tty->print("before cleanup: state: "); cur_sim->print(); tty->cr();
  1.1107 +    tty->print("                live:  "); live_fpu_regs.print_on(tty); tty->cr();
  1.1108 +  }
  1.1109 +#endif
  1.1110 +
  1.1111 +  int slot = 0;
  1.1112 +  while (slot < cur_sim->stack_size()) {
  1.1113 +    int reg = cur_sim->get_slot(slot);
  1.1114 +    if (!live_fpu_regs.at(reg)) {
  1.1115 +      if (slot != 0) {
  1.1116 +        merge_insert_xchg(instrs, cur_sim, slot);
  1.1117 +      }
  1.1118 +      merge_insert_pop(instrs, cur_sim);
  1.1119 +    } else {
  1.1120 +      slot++;
  1.1121 +    }
  1.1122 +  }
  1.1123 +
  1.1124 +#ifndef PRODUCT
  1.1125 +  if (TraceFPUStack) {
  1.1126 +    tty->print("after cleanup:  state: "); cur_sim->print(); tty->cr();
  1.1127 +    tty->print("                live:  "); live_fpu_regs.print_on(tty); tty->cr();
  1.1128 +    tty->cr();
  1.1129 +  }
  1.1130 +
  1.1131 +  // check if fpu stack only contains live registers
  1.1132 +  for (unsigned int i = 0; i < live_fpu_regs.size(); i++) {
  1.1133 +    if (live_fpu_regs.at(i) != cur_sim->contains(i)) {
  1.1134 +      tty->print_cr("mismatch between required and actual stack content");
  1.1135 +      break;
  1.1136 +    }
  1.1137 +  }
  1.1138 +#endif
  1.1139 +}
  1.1140 +
  1.1141 +
  1.1142 +bool FpuStackAllocator::merge_fpu_stack_with_successors(BlockBegin* block) {
  1.1143 +#ifndef PRODUCT
  1.1144 +  if (TraceFPUStack) {
  1.1145 +    tty->print_cr("Propagating FPU stack state for B%d at LIR_Op position %d to successors:",
  1.1146 +                  block->block_id(), pos());
  1.1147 +    sim()->print();
  1.1148 +    tty->cr();
  1.1149 +  }
  1.1150 +#endif
  1.1151 +
  1.1152 +  bool changed = false;
  1.1153 +  int number_of_sux = block->number_of_sux();
  1.1154 +
  1.1155 +  if (number_of_sux == 1 && block->sux_at(0)->number_of_preds() > 1) {
  1.1156 +    // The successor has at least two incoming edges, so a stack merge will be necessary
  1.1157 +    // If this block is the first predecessor, cleanup the current stack and propagate it
  1.1158 +    // If this block is not the first predecessor, a stack merge will be necessary
  1.1159 +
  1.1160 +    BlockBegin* sux = block->sux_at(0);
  1.1161 +    intArray* state = sux->fpu_stack_state();
  1.1162 +    LIR_List* instrs = new LIR_List(_compilation);
  1.1163 +
  1.1164 +    if (state != NULL) {
  1.1165 +      // Merge with a successors that already has a FPU stack state
  1.1166 +      // the block must only have one successor because critical edges must been split
  1.1167 +      FpuStackSim* cur_sim = sim();
  1.1168 +      FpuStackSim* sux_sim = temp_sim();
  1.1169 +      sux_sim->read_state(state);
  1.1170 +
  1.1171 +      merge_fpu_stack(instrs, cur_sim, sux_sim);
  1.1172 +
  1.1173 +    } else {
  1.1174 +      // propagate current FPU stack state to successor without state
  1.1175 +      // clean up stack first so that there are no dead values on the stack
  1.1176 +      if (ComputeExactFPURegisterUsage) {
  1.1177 +        FpuStackSim* cur_sim = sim();
  1.1178 +        BitMap live_fpu_regs = block->sux_at(0)->fpu_register_usage();
  1.1179 +        assert(live_fpu_regs.size() == FrameMap::nof_fpu_regs, "missing register usage");
  1.1180 +
  1.1181 +        merge_cleanup_fpu_stack(instrs, cur_sim, live_fpu_regs);
  1.1182 +      }
  1.1183 +
  1.1184 +      intArray* state = sim()->write_state();
  1.1185 +      if (TraceFPUStack) {
  1.1186 +        tty->print_cr("Setting FPU stack state of B%d (merge path)", sux->block_id());
  1.1187 +        sim()->print(); tty->cr();
  1.1188 +      }
  1.1189 +      sux->set_fpu_stack_state(state);
  1.1190 +    }
  1.1191 +
  1.1192 +    if (instrs->instructions_list()->length() > 0) {
  1.1193 +      lir()->insert_before(pos(), instrs);
  1.1194 +      set_pos(instrs->instructions_list()->length() + pos());
  1.1195 +      changed = true;
  1.1196 +    }
  1.1197 +
  1.1198 +  } else {
  1.1199 +    // Propagate unmodified Stack to successors where a stack merge is not necessary
  1.1200 +    intArray* state = sim()->write_state();
  1.1201 +    for (int i = 0; i < number_of_sux; i++) {
  1.1202 +      BlockBegin* sux = block->sux_at(i);
  1.1203 +
  1.1204 +#ifdef ASSERT
  1.1205 +      for (int j = 0; j < sux->number_of_preds(); j++) {
  1.1206 +        assert(block == sux->pred_at(j), "all critical edges must be broken");
  1.1207 +      }
  1.1208 +
  1.1209 +      // check if new state is same
  1.1210 +      if (sux->fpu_stack_state() != NULL) {
  1.1211 +        intArray* sux_state = sux->fpu_stack_state();
  1.1212 +        assert(state->length() == sux_state->length(), "overwriting existing stack state");
  1.1213 +        for (int j = 0; j < state->length(); j++) {
  1.1214 +          assert(state->at(j) == sux_state->at(j), "overwriting existing stack state");
  1.1215 +        }
  1.1216 +      }
  1.1217 +#endif
  1.1218 +#ifndef PRODUCT
  1.1219 +      if (TraceFPUStack) {
  1.1220 +        tty->print_cr("Setting FPU stack state of B%d", sux->block_id());
  1.1221 +        sim()->print(); tty->cr();
  1.1222 +      }
  1.1223 +#endif
  1.1224 +
  1.1225 +      sux->set_fpu_stack_state(state);
  1.1226 +    }
  1.1227 +  }
  1.1228 +
  1.1229 +#ifndef PRODUCT
  1.1230 +  // assertions that FPU stack state conforms to all successors' states
  1.1231 +  intArray* cur_state = sim()->write_state();
  1.1232 +  for (int i = 0; i < number_of_sux; i++) {
  1.1233 +    BlockBegin* sux = block->sux_at(i);
  1.1234 +    intArray* sux_state = sux->fpu_stack_state();
  1.1235 +
  1.1236 +    assert(sux_state != NULL, "no fpu state");
  1.1237 +    assert(cur_state->length() == sux_state->length(), "incorrect length");
  1.1238 +    for (int i = 0; i < cur_state->length(); i++) {
  1.1239 +      assert(cur_state->at(i) == sux_state->at(i), "element not equal");
  1.1240 +    }
  1.1241 +  }
  1.1242 +#endif
  1.1243 +
  1.1244 +  return changed;
  1.1245 +}

mercurial