src/cpu/x86/vm/c1_LinearScan_x86.cpp

changeset 435
a61af66fc99e
child 1388
ff1a29907b6c
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/cpu/x86/vm/c1_LinearScan_x86.cpp	Sat Dec 01 00:00:00 2007 +0000
     1.3 @@ -0,0 +1,1168 @@
     1.4 +/*
     1.5 + * Copyright 2005 Sun Microsystems, Inc.  All Rights Reserved.
     1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     1.7 + *
     1.8 + * This code is free software; you can redistribute it and/or modify it
     1.9 + * under the terms of the GNU General Public License version 2 only, as
    1.10 + * published by the Free Software Foundation.
    1.11 + *
    1.12 + * This code is distributed in the hope that it will be useful, but WITHOUT
    1.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    1.14 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    1.15 + * version 2 for more details (a copy is included in the LICENSE file that
    1.16 + * accompanied this code).
    1.17 + *
    1.18 + * You should have received a copy of the GNU General Public License version
    1.19 + * 2 along with this work; if not, write to the Free Software Foundation,
    1.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    1.21 + *
    1.22 + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
    1.23 + * CA 95054 USA or visit www.sun.com if you need additional information or
    1.24 + * have any questions.
    1.25 + *
    1.26 + */
    1.27 +
    1.28 +#include "incls/_precompiled.incl"
    1.29 +#include "incls/_c1_LinearScan_x86.cpp.incl"
    1.30 +
    1.31 +
    1.32 +//----------------------------------------------------------------------
    1.33 +// Allocation of FPU stack slots (Intel x86 only)
    1.34 +//----------------------------------------------------------------------
    1.35 +
    1.36 +void LinearScan::allocate_fpu_stack() {
    1.37 +  // First compute which FPU registers are live at the start of each basic block
    1.38 +  // (To minimize the amount of work we have to do if we have to merge FPU stacks)
    1.39 +  if (ComputeExactFPURegisterUsage) {
    1.40 +    Interval* intervals_in_register, *intervals_in_memory;
    1.41 +    create_unhandled_lists(&intervals_in_register, &intervals_in_memory, is_in_fpu_register, NULL);
    1.42 +
    1.43 +    // ignore memory intervals by overwriting intervals_in_memory
    1.44 +    // the dummy interval is needed to enforce the walker to walk until the given id:
    1.45 +    // without it, the walker stops when the unhandled-list is empty -> live information
    1.46 +    // beyond this point would be incorrect.
    1.47 +    Interval* dummy_interval = new Interval(any_reg);
    1.48 +    dummy_interval->add_range(max_jint - 2, max_jint - 1);
    1.49 +    dummy_interval->set_next(Interval::end());
    1.50 +    intervals_in_memory = dummy_interval;
    1.51 +
    1.52 +    IntervalWalker iw(this, intervals_in_register, intervals_in_memory);
    1.53 +
    1.54 +    const int num_blocks = block_count();
    1.55 +    for (int i = 0; i < num_blocks; i++) {
    1.56 +      BlockBegin* b = block_at(i);
    1.57 +
    1.58 +      // register usage is only needed for merging stacks -> compute only
    1.59 +      // when more than one predecessor.
    1.60 +      // the block must not have any spill moves at the beginning (checked by assertions)
    1.61 +      // spill moves would use intervals that are marked as handled and so the usage bit
    1.62 +      // would been set incorrectly
    1.63 +
    1.64 +      // NOTE: the check for number_of_preds > 1 is necessary. A block with only one
    1.65 +      //       predecessor may have spill moves at the begin of the block.
    1.66 +      //       If an interval ends at the current instruction id, it is not possible
    1.67 +      //       to decide if the register is live or not at the block begin -> the
    1.68 +      //       register information would be incorrect.
    1.69 +      if (b->number_of_preds() > 1) {
    1.70 +        int id = b->first_lir_instruction_id();
    1.71 +        BitMap regs(FrameMap::nof_fpu_regs);
    1.72 +        regs.clear();
    1.73 +
    1.74 +        iw.walk_to(id);   // walk after the first instruction (always a label) of the block
    1.75 +        assert(iw.current_position() == id, "did not walk completely to id");
    1.76 +
    1.77 +        // Only consider FPU values in registers
    1.78 +        Interval* interval = iw.active_first(fixedKind);
    1.79 +        while (interval != Interval::end()) {
    1.80 +          int reg = interval->assigned_reg();
    1.81 +          assert(reg >= pd_first_fpu_reg && reg <= pd_last_fpu_reg, "no fpu register");
    1.82 +          assert(interval->assigned_regHi() == -1, "must not have hi register (doubles stored in one register)");
    1.83 +          assert(interval->from() <= id && id < interval->to(), "interval out of range");
    1.84 +
    1.85 +#ifndef PRODUCT
    1.86 +          if (TraceFPURegisterUsage) {
    1.87 +            tty->print("fpu reg %d is live because of ", reg - pd_first_fpu_reg); interval->print();
    1.88 +          }
    1.89 +#endif
    1.90 +
    1.91 +          regs.set_bit(reg - pd_first_fpu_reg);
    1.92 +          interval = interval->next();
    1.93 +        }
    1.94 +
    1.95 +        b->set_fpu_register_usage(regs);
    1.96 +
    1.97 +#ifndef PRODUCT
    1.98 +        if (TraceFPURegisterUsage) {
    1.99 +          tty->print("FPU regs for block %d, LIR instr %d): ", b->block_id(), id); regs.print_on(tty); tty->print_cr("");
   1.100 +        }
   1.101 +#endif
   1.102 +      }
   1.103 +    }
   1.104 +  }
   1.105 +
   1.106 +  FpuStackAllocator alloc(ir()->compilation(), this);
   1.107 +  _fpu_stack_allocator = &alloc;
   1.108 +  alloc.allocate();
   1.109 +  _fpu_stack_allocator = NULL;
   1.110 +}
   1.111 +
   1.112 +
   1.113 +FpuStackAllocator::FpuStackAllocator(Compilation* compilation, LinearScan* allocator)
   1.114 +  : _compilation(compilation)
   1.115 +  , _lir(NULL)
   1.116 +  , _pos(-1)
   1.117 +  , _allocator(allocator)
   1.118 +  , _sim(compilation)
   1.119 +  , _temp_sim(compilation)
   1.120 +{}
   1.121 +
   1.122 +void FpuStackAllocator::allocate() {
   1.123 +  int num_blocks = allocator()->block_count();
   1.124 +  for (int i = 0; i < num_blocks; i++) {
   1.125 +    // Set up to process block
   1.126 +    BlockBegin* block = allocator()->block_at(i);
   1.127 +    intArray* fpu_stack_state = block->fpu_stack_state();
   1.128 +
   1.129 +#ifndef PRODUCT
   1.130 +    if (TraceFPUStack) {
   1.131 +      tty->cr();
   1.132 +      tty->print_cr("------- Begin of new Block %d -------", block->block_id());
   1.133 +    }
   1.134 +#endif
   1.135 +
   1.136 +    assert(fpu_stack_state != NULL ||
   1.137 +           block->end()->as_Base() != NULL ||
   1.138 +           block->is_set(BlockBegin::exception_entry_flag),
   1.139 +           "FPU stack state must be present due to linear-scan order for FPU stack allocation");
   1.140 +    // note: exception handler entries always start with an empty fpu stack
   1.141 +    //       because stack merging would be too complicated
   1.142 +
   1.143 +    if (fpu_stack_state != NULL) {
   1.144 +      sim()->read_state(fpu_stack_state);
   1.145 +    } else {
   1.146 +      sim()->clear();
   1.147 +    }
   1.148 +
   1.149 +#ifndef PRODUCT
   1.150 +    if (TraceFPUStack) {
   1.151 +      tty->print("Reading FPU state for block %d:", block->block_id());
   1.152 +      sim()->print();
   1.153 +      tty->cr();
   1.154 +    }
   1.155 +#endif
   1.156 +
   1.157 +    allocate_block(block);
   1.158 +    CHECK_BAILOUT();
   1.159 +  }
   1.160 +}
   1.161 +
   1.162 +void FpuStackAllocator::allocate_block(BlockBegin* block) {
   1.163 +  bool processed_merge = false;
   1.164 +  LIR_OpList* insts = block->lir()->instructions_list();
   1.165 +  set_lir(block->lir());
   1.166 +  set_pos(0);
   1.167 +
   1.168 +
   1.169 +  // Note: insts->length() may change during loop
   1.170 +  while (pos() < insts->length()) {
   1.171 +    LIR_Op* op = insts->at(pos());
   1.172 +    _debug_information_computed = false;
   1.173 +
   1.174 +#ifndef PRODUCT
   1.175 +    if (TraceFPUStack) {
   1.176 +      op->print();
   1.177 +    }
   1.178 +    check_invalid_lir_op(op);
   1.179 +#endif
   1.180 +
   1.181 +    LIR_OpBranch* branch = op->as_OpBranch();
   1.182 +    LIR_Op1* op1 = op->as_Op1();
   1.183 +    LIR_Op2* op2 = op->as_Op2();
   1.184 +    LIR_OpCall* opCall = op->as_OpCall();
   1.185 +
   1.186 +    if (branch != NULL && branch->block() != NULL) {
   1.187 +      if (!processed_merge) {
   1.188 +        // propagate stack at first branch to a successor
   1.189 +        processed_merge = true;
   1.190 +        bool required_merge = merge_fpu_stack_with_successors(block);
   1.191 +
   1.192 +        assert(!required_merge || branch->cond() == lir_cond_always, "splitting of critical edges should prevent FPU stack mismatches at cond branches");
   1.193 +      }
   1.194 +
   1.195 +    } else if (op1 != NULL) {
   1.196 +      handle_op1(op1);
   1.197 +    } else if (op2 != NULL) {
   1.198 +      handle_op2(op2);
   1.199 +    } else if (opCall != NULL) {
   1.200 +      handle_opCall(opCall);
   1.201 +    }
   1.202 +
   1.203 +    compute_debug_information(op);
   1.204 +
   1.205 +    set_pos(1 + pos());
   1.206 +  }
   1.207 +
   1.208 +  // Propagate stack when block does not end with branch
   1.209 +  if (!processed_merge) {
   1.210 +    merge_fpu_stack_with_successors(block);
   1.211 +  }
   1.212 +}
   1.213 +
   1.214 +
   1.215 +void FpuStackAllocator::compute_debug_information(LIR_Op* op) {
   1.216 +  if (!_debug_information_computed && op->id() != -1 && allocator()->has_info(op->id())) {
   1.217 +    visitor.visit(op);
   1.218 +
   1.219 +    // exception handling
   1.220 +    if (allocator()->compilation()->has_exception_handlers()) {
   1.221 +      XHandlers* xhandlers = visitor.all_xhandler();
   1.222 +      int n = xhandlers->length();
   1.223 +      for (int k = 0; k < n; k++) {
   1.224 +        allocate_exception_handler(xhandlers->handler_at(k));
   1.225 +      }
   1.226 +    } else {
   1.227 +      assert(visitor.all_xhandler()->length() == 0, "missed exception handler");
   1.228 +    }
   1.229 +
   1.230 +    // compute debug information
   1.231 +    int n = visitor.info_count();
   1.232 +    assert(n > 0, "should not visit operation otherwise");
   1.233 +
   1.234 +    for (int j = 0; j < n; j++) {
   1.235 +      CodeEmitInfo* info = visitor.info_at(j);
   1.236 +      // Compute debug information
   1.237 +      allocator()->compute_debug_info(info, op->id());
   1.238 +    }
   1.239 +  }
   1.240 +  _debug_information_computed = true;
   1.241 +}
   1.242 +
   1.243 +void FpuStackAllocator::allocate_exception_handler(XHandler* xhandler) {
   1.244 +  if (!sim()->is_empty()) {
   1.245 +    LIR_List* old_lir = lir();
   1.246 +    int old_pos = pos();
   1.247 +    intArray* old_state = sim()->write_state();
   1.248 +
   1.249 +#ifndef PRODUCT
   1.250 +    if (TraceFPUStack) {
   1.251 +      tty->cr();
   1.252 +      tty->print_cr("------- begin of exception handler -------");
   1.253 +    }
   1.254 +#endif
   1.255 +
   1.256 +    if (xhandler->entry_code() == NULL) {
   1.257 +      // need entry code to clear FPU stack
   1.258 +      LIR_List* entry_code = new LIR_List(_compilation);
   1.259 +      entry_code->jump(xhandler->entry_block());
   1.260 +      xhandler->set_entry_code(entry_code);
   1.261 +    }
   1.262 +
   1.263 +    LIR_OpList* insts = xhandler->entry_code()->instructions_list();
   1.264 +    set_lir(xhandler->entry_code());
   1.265 +    set_pos(0);
   1.266 +
   1.267 +    // Note: insts->length() may change during loop
   1.268 +    while (pos() < insts->length()) {
   1.269 +      LIR_Op* op = insts->at(pos());
   1.270 +
   1.271 +#ifndef PRODUCT
   1.272 +      if (TraceFPUStack) {
   1.273 +        op->print();
   1.274 +      }
   1.275 +      check_invalid_lir_op(op);
   1.276 +#endif
   1.277 +
   1.278 +      switch (op->code()) {
   1.279 +        case lir_move:
   1.280 +          assert(op->as_Op1() != NULL, "must be LIR_Op1");
   1.281 +          assert(pos() != insts->length() - 1, "must not be last operation");
   1.282 +
   1.283 +          handle_op1((LIR_Op1*)op);
   1.284 +          break;
   1.285 +
   1.286 +        case lir_branch:
   1.287 +          assert(op->as_OpBranch()->cond() == lir_cond_always, "must be unconditional branch");
   1.288 +          assert(pos() == insts->length() - 1, "must be last operation");
   1.289 +
   1.290 +          // remove all remaining dead registers from FPU stack
   1.291 +          clear_fpu_stack(LIR_OprFact::illegalOpr);
   1.292 +          break;
   1.293 +
   1.294 +        default:
   1.295 +          // other operations not allowed in exception entry code
   1.296 +          ShouldNotReachHere();
   1.297 +      }
   1.298 +
   1.299 +      set_pos(pos() + 1);
   1.300 +    }
   1.301 +
   1.302 +#ifndef PRODUCT
   1.303 +    if (TraceFPUStack) {
   1.304 +      tty->cr();
   1.305 +      tty->print_cr("------- end of exception handler -------");
   1.306 +    }
   1.307 +#endif
   1.308 +
   1.309 +    set_lir(old_lir);
   1.310 +    set_pos(old_pos);
   1.311 +    sim()->read_state(old_state);
   1.312 +  }
   1.313 +}
   1.314 +
   1.315 +
   1.316 +int FpuStackAllocator::fpu_num(LIR_Opr opr) {
   1.317 +  assert(opr->is_fpu_register() && !opr->is_xmm_register(), "shouldn't call this otherwise");
   1.318 +  return opr->is_single_fpu() ? opr->fpu_regnr() : opr->fpu_regnrLo();
   1.319 +}
   1.320 +
   1.321 +int FpuStackAllocator::tos_offset(LIR_Opr opr) {
   1.322 +  return sim()->offset_from_tos(fpu_num(opr));
   1.323 +}
   1.324 +
   1.325 +
   1.326 +LIR_Opr FpuStackAllocator::to_fpu_stack(LIR_Opr opr) {
   1.327 +  assert(opr->is_fpu_register() && !opr->is_xmm_register(), "shouldn't call this otherwise");
   1.328 +
   1.329 +  int stack_offset = tos_offset(opr);
   1.330 +  if (opr->is_single_fpu()) {
   1.331 +    return LIR_OprFact::single_fpu(stack_offset)->make_fpu_stack_offset();
   1.332 +  } else {
   1.333 +    assert(opr->is_double_fpu(), "shouldn't call this otherwise");
   1.334 +    return LIR_OprFact::double_fpu(stack_offset)->make_fpu_stack_offset();
   1.335 +  }
   1.336 +}
   1.337 +
   1.338 +LIR_Opr FpuStackAllocator::to_fpu_stack_top(LIR_Opr opr, bool dont_check_offset) {
   1.339 +  assert(opr->is_fpu_register() && !opr->is_xmm_register(), "shouldn't call this otherwise");
   1.340 +  assert(dont_check_offset || tos_offset(opr) == 0, "operand is not on stack top");
   1.341 +
   1.342 +  int stack_offset = 0;
   1.343 +  if (opr->is_single_fpu()) {
   1.344 +    return LIR_OprFact::single_fpu(stack_offset)->make_fpu_stack_offset();
   1.345 +  } else {
   1.346 +    assert(opr->is_double_fpu(), "shouldn't call this otherwise");
   1.347 +    return LIR_OprFact::double_fpu(stack_offset)->make_fpu_stack_offset();
   1.348 +  }
   1.349 +}
   1.350 +
   1.351 +
   1.352 +
   1.353 +void FpuStackAllocator::insert_op(LIR_Op* op) {
   1.354 +  lir()->insert_before(pos(), op);
   1.355 +  set_pos(1 + pos());
   1.356 +}
   1.357 +
   1.358 +
   1.359 +void FpuStackAllocator::insert_exchange(int offset) {
   1.360 +  if (offset > 0) {
   1.361 +    LIR_Op1* fxch_op = new LIR_Op1(lir_fxch, LIR_OprFact::intConst(offset), LIR_OprFact::illegalOpr);
   1.362 +    insert_op(fxch_op);
   1.363 +    sim()->swap(offset);
   1.364 +
   1.365 +#ifndef PRODUCT
   1.366 +    if (TraceFPUStack) {
   1.367 +      tty->print("Exchanged register: %d         New state: ", sim()->get_slot(0)); sim()->print(); tty->cr();
   1.368 +    }
   1.369 +#endif
   1.370 +
   1.371 +  }
   1.372 +}
   1.373 +
   1.374 +void FpuStackAllocator::insert_exchange(LIR_Opr opr) {
   1.375 +  insert_exchange(tos_offset(opr));
   1.376 +}
   1.377 +
   1.378 +
   1.379 +void FpuStackAllocator::insert_free(int offset) {
   1.380 +  // move stack slot to the top of stack and then pop it
   1.381 +  insert_exchange(offset);
   1.382 +
   1.383 +  LIR_Op* fpop = new LIR_Op0(lir_fpop_raw);
   1.384 +  insert_op(fpop);
   1.385 +  sim()->pop();
   1.386 +
   1.387 +#ifndef PRODUCT
   1.388 +    if (TraceFPUStack) {
   1.389 +      tty->print("Inserted pop                   New state: "); sim()->print(); tty->cr();
   1.390 +    }
   1.391 +#endif
   1.392 +}
   1.393 +
   1.394 +
   1.395 +void FpuStackAllocator::insert_free_if_dead(LIR_Opr opr) {
   1.396 +  if (sim()->contains(fpu_num(opr))) {
   1.397 +    int res_slot = tos_offset(opr);
   1.398 +    insert_free(res_slot);
   1.399 +  }
   1.400 +}
   1.401 +
   1.402 +void FpuStackAllocator::insert_free_if_dead(LIR_Opr opr, LIR_Opr ignore) {
   1.403 +  if (fpu_num(opr) != fpu_num(ignore) && sim()->contains(fpu_num(opr))) {
   1.404 +    int res_slot = tos_offset(opr);
   1.405 +    insert_free(res_slot);
   1.406 +  }
   1.407 +}
   1.408 +
   1.409 +void FpuStackAllocator::insert_copy(LIR_Opr from, LIR_Opr to) {
   1.410 +  int offset = tos_offset(from);
   1.411 +  LIR_Op1* fld = new LIR_Op1(lir_fld, LIR_OprFact::intConst(offset), LIR_OprFact::illegalOpr);
   1.412 +  insert_op(fld);
   1.413 +
   1.414 +  sim()->push(fpu_num(to));
   1.415 +
   1.416 +#ifndef PRODUCT
   1.417 +  if (TraceFPUStack) {
   1.418 +    tty->print("Inserted copy (%d -> %d)         New state: ", fpu_num(from), fpu_num(to)); sim()->print(); tty->cr();
   1.419 +  }
   1.420 +#endif
   1.421 +}
   1.422 +
   1.423 +void FpuStackAllocator::do_rename(LIR_Opr from, LIR_Opr to) {
   1.424 +  sim()->rename(fpu_num(from), fpu_num(to));
   1.425 +}
   1.426 +
   1.427 +void FpuStackAllocator::do_push(LIR_Opr opr) {
   1.428 +  sim()->push(fpu_num(opr));
   1.429 +}
   1.430 +
   1.431 +void FpuStackAllocator::pop_if_last_use(LIR_Op* op, LIR_Opr opr) {
   1.432 +  assert(op->fpu_pop_count() == 0, "fpu_pop_count alredy set");
   1.433 +  assert(tos_offset(opr) == 0, "can only pop stack top");
   1.434 +
   1.435 +  if (opr->is_last_use()) {
   1.436 +    op->set_fpu_pop_count(1);
   1.437 +    sim()->pop();
   1.438 +  }
   1.439 +}
   1.440 +
   1.441 +void FpuStackAllocator::pop_always(LIR_Op* op, LIR_Opr opr) {
   1.442 +  assert(op->fpu_pop_count() == 0, "fpu_pop_count alredy set");
   1.443 +  assert(tos_offset(opr) == 0, "can only pop stack top");
   1.444 +
   1.445 +  op->set_fpu_pop_count(1);
   1.446 +  sim()->pop();
   1.447 +}
   1.448 +
   1.449 +void FpuStackAllocator::clear_fpu_stack(LIR_Opr preserve) {
   1.450 +  int result_stack_size = (preserve->is_fpu_register() && !preserve->is_xmm_register() ? 1 : 0);
   1.451 +  while (sim()->stack_size() > result_stack_size) {
   1.452 +    assert(!sim()->slot_is_empty(0), "not allowed");
   1.453 +
   1.454 +    if (result_stack_size == 0 || sim()->get_slot(0) != fpu_num(preserve)) {
   1.455 +      insert_free(0);
   1.456 +    } else {
   1.457 +      // move "preserve" to bottom of stack so that all other stack slots can be popped
   1.458 +      insert_exchange(sim()->stack_size() - 1);
   1.459 +    }
   1.460 +  }
   1.461 +}
   1.462 +
   1.463 +
   1.464 +void FpuStackAllocator::handle_op1(LIR_Op1* op1) {
   1.465 +  LIR_Opr in  = op1->in_opr();
   1.466 +  LIR_Opr res = op1->result_opr();
   1.467 +
   1.468 +  LIR_Opr new_in  = in;  // new operands relative to the actual fpu stack top
   1.469 +  LIR_Opr new_res = res;
   1.470 +
   1.471 +  // Note: this switch is processed for all LIR_Op1, regardless if they have FPU-arguments,
   1.472 +  //       so checks for is_float_kind() are necessary inside the cases
   1.473 +  switch (op1->code()) {
   1.474 +
   1.475 +    case lir_return: {
   1.476 +      // FPU-Stack must only contain the (optional) fpu return value.
   1.477 +      // All remaining dead values are popped from the stack
   1.478 +      // If the input operand is a fpu-register, it is exchanged to the bottom of the stack
   1.479 +
   1.480 +      clear_fpu_stack(in);
   1.481 +      if (in->is_fpu_register() && !in->is_xmm_register()) {
   1.482 +        new_in = to_fpu_stack_top(in);
   1.483 +      }
   1.484 +
   1.485 +      break;
   1.486 +    }
   1.487 +
   1.488 +    case lir_move: {
   1.489 +      if (in->is_fpu_register() && !in->is_xmm_register()) {
   1.490 +        if (res->is_xmm_register()) {
   1.491 +          // move from fpu register to xmm register (necessary for operations that
   1.492 +          // are not available in the SSE instruction set)
   1.493 +          insert_exchange(in);
   1.494 +          new_in = to_fpu_stack_top(in);
   1.495 +          pop_always(op1, in);
   1.496 +
   1.497 +        } else if (res->is_fpu_register() && !res->is_xmm_register()) {
   1.498 +          // move from fpu-register to fpu-register:
   1.499 +          // * input and result register equal:
   1.500 +          //   nothing to do
   1.501 +          // * input register is last use:
   1.502 +          //   rename the input register to result register -> input register
   1.503 +          //   not present on fpu-stack afterwards
   1.504 +          // * input register not last use:
   1.505 +          //   duplicate input register to result register to preserve input
   1.506 +          //
   1.507 +          // Note: The LIR-Assembler does not produce any code for fpu register moves,
   1.508 +          //       so input and result stack index must be equal
   1.509 +
   1.510 +          if (fpu_num(in) == fpu_num(res)) {
   1.511 +            // nothing to do
   1.512 +          } else if (in->is_last_use()) {
   1.513 +            insert_free_if_dead(res);//, in);
   1.514 +            do_rename(in, res);
   1.515 +          } else {
   1.516 +            insert_free_if_dead(res);
   1.517 +            insert_copy(in, res);
   1.518 +          }
   1.519 +          new_in = to_fpu_stack(res);
   1.520 +          new_res = new_in;
   1.521 +
   1.522 +        } else {
   1.523 +          // move from fpu-register to memory
   1.524 +          // input operand must be on top of stack
   1.525 +
   1.526 +          insert_exchange(in);
   1.527 +
   1.528 +          // create debug information here because afterwards the register may have been popped
   1.529 +          compute_debug_information(op1);
   1.530 +
   1.531 +          new_in = to_fpu_stack_top(in);
   1.532 +          pop_if_last_use(op1, in);
   1.533 +        }
   1.534 +
   1.535 +      } else if (res->is_fpu_register() && !res->is_xmm_register()) {
   1.536 +        // move from memory/constant to fpu register
   1.537 +        // result is pushed on the stack
   1.538 +
   1.539 +        insert_free_if_dead(res);
   1.540 +
   1.541 +        // create debug information before register is pushed
   1.542 +        compute_debug_information(op1);
   1.543 +
   1.544 +        do_push(res);
   1.545 +        new_res = to_fpu_stack_top(res);
   1.546 +      }
   1.547 +      break;
   1.548 +    }
   1.549 +
   1.550 +    case lir_neg: {
   1.551 +      if (in->is_fpu_register() && !in->is_xmm_register()) {
   1.552 +        assert(res->is_fpu_register() && !res->is_xmm_register(), "must be");
   1.553 +        assert(in->is_last_use(), "old value gets destroyed");
   1.554 +
   1.555 +        insert_free_if_dead(res, in);
   1.556 +        insert_exchange(in);
   1.557 +        new_in = to_fpu_stack_top(in);
   1.558 +
   1.559 +        do_rename(in, res);
   1.560 +        new_res = to_fpu_stack_top(res);
   1.561 +      }
   1.562 +      break;
   1.563 +    }
   1.564 +
   1.565 +    case lir_convert: {
   1.566 +      Bytecodes::Code bc = op1->as_OpConvert()->bytecode();
   1.567 +      switch (bc) {
   1.568 +        case Bytecodes::_d2f:
   1.569 +        case Bytecodes::_f2d:
   1.570 +          assert(res->is_fpu_register(), "must be");
   1.571 +          assert(in->is_fpu_register(), "must be");
   1.572 +
   1.573 +          if (!in->is_xmm_register() && !res->is_xmm_register()) {
   1.574 +            // this is quite the same as a move from fpu-register to fpu-register
   1.575 +            // Note: input and result operands must have different types
   1.576 +            if (fpu_num(in) == fpu_num(res)) {
   1.577 +              // nothing to do
   1.578 +              new_in = to_fpu_stack(in);
   1.579 +            } else if (in->is_last_use()) {
   1.580 +              insert_free_if_dead(res);//, in);
   1.581 +              new_in = to_fpu_stack(in);
   1.582 +              do_rename(in, res);
   1.583 +            } else {
   1.584 +              insert_free_if_dead(res);
   1.585 +              insert_copy(in, res);
   1.586 +              new_in = to_fpu_stack_top(in, true);
   1.587 +            }
   1.588 +            new_res = to_fpu_stack(res);
   1.589 +          }
   1.590 +
   1.591 +          break;
   1.592 +
   1.593 +        case Bytecodes::_i2f:
   1.594 +        case Bytecodes::_l2f:
   1.595 +        case Bytecodes::_i2d:
   1.596 +        case Bytecodes::_l2d:
   1.597 +          assert(res->is_fpu_register(), "must be");
   1.598 +          if (!res->is_xmm_register()) {
   1.599 +            insert_free_if_dead(res);
   1.600 +            do_push(res);
   1.601 +            new_res = to_fpu_stack_top(res);
   1.602 +          }
   1.603 +          break;
   1.604 +
   1.605 +        case Bytecodes::_f2i:
   1.606 +        case Bytecodes::_d2i:
   1.607 +          assert(in->is_fpu_register(), "must be");
   1.608 +          if (!in->is_xmm_register()) {
   1.609 +            insert_exchange(in);
   1.610 +            new_in = to_fpu_stack_top(in);
   1.611 +
   1.612 +            // TODO: update registes of stub
   1.613 +          }
   1.614 +          break;
   1.615 +
   1.616 +        case Bytecodes::_f2l:
   1.617 +        case Bytecodes::_d2l:
   1.618 +          assert(in->is_fpu_register(), "must be");
   1.619 +          if (!in->is_xmm_register()) {
   1.620 +            insert_exchange(in);
   1.621 +            new_in = to_fpu_stack_top(in);
   1.622 +            pop_always(op1, in);
   1.623 +          }
   1.624 +          break;
   1.625 +
   1.626 +        case Bytecodes::_i2l:
   1.627 +        case Bytecodes::_l2i:
   1.628 +        case Bytecodes::_i2b:
   1.629 +        case Bytecodes::_i2c:
   1.630 +        case Bytecodes::_i2s:
   1.631 +          // no fpu operands
   1.632 +          break;
   1.633 +
   1.634 +        default:
   1.635 +          ShouldNotReachHere();
   1.636 +      }
   1.637 +      break;
   1.638 +    }
   1.639 +
   1.640 +    case lir_roundfp: {
   1.641 +      assert(in->is_fpu_register() && !in->is_xmm_register(), "input must be in register");
   1.642 +      assert(res->is_stack(), "result must be on stack");
   1.643 +
   1.644 +      insert_exchange(in);
   1.645 +      new_in = to_fpu_stack_top(in);
   1.646 +      pop_if_last_use(op1, in);
   1.647 +      break;
   1.648 +    }
   1.649 +
   1.650 +    default: {
   1.651 +      assert(!in->is_float_kind() && !res->is_float_kind(), "missed a fpu-operation");
   1.652 +    }
   1.653 +  }
   1.654 +
   1.655 +  op1->set_in_opr(new_in);
   1.656 +  op1->set_result_opr(new_res);
   1.657 +}
   1.658 +
   1.659 +void FpuStackAllocator::handle_op2(LIR_Op2* op2) {
   1.660 +  LIR_Opr left  = op2->in_opr1();
   1.661 +  if (!left->is_float_kind()) {
   1.662 +    return;
   1.663 +  }
   1.664 +  if (left->is_xmm_register()) {
   1.665 +    return;
   1.666 +  }
   1.667 +
   1.668 +  LIR_Opr right = op2->in_opr2();
   1.669 +  LIR_Opr res   = op2->result_opr();
   1.670 +  LIR_Opr new_left  = left;  // new operands relative to the actual fpu stack top
   1.671 +  LIR_Opr new_right = right;
   1.672 +  LIR_Opr new_res   = res;
   1.673 +
   1.674 +  assert(!left->is_xmm_register() && !right->is_xmm_register() && !res->is_xmm_register(), "not for xmm registers");
   1.675 +
   1.676 +  switch (op2->code()) {
   1.677 +    case lir_cmp:
   1.678 +    case lir_cmp_fd2i:
   1.679 +    case lir_ucmp_fd2i: {
   1.680 +      assert(left->is_fpu_register(), "invalid LIR");
   1.681 +      assert(right->is_fpu_register(), "invalid LIR");
   1.682 +
   1.683 +      // the left-hand side must be on top of stack.
   1.684 +      // the right-hand side is never popped, even if is_last_use is set
   1.685 +      insert_exchange(left);
   1.686 +      new_left = to_fpu_stack_top(left);
   1.687 +      new_right = to_fpu_stack(right);
   1.688 +      pop_if_last_use(op2, left);
   1.689 +      break;
   1.690 +    }
   1.691 +
   1.692 +    case lir_mul_strictfp:
   1.693 +    case lir_div_strictfp: {
   1.694 +      assert(op2->tmp_opr()->is_fpu_register(), "strict operations need temporary fpu stack slot");
   1.695 +      insert_free_if_dead(op2->tmp_opr());
   1.696 +      assert(sim()->stack_size() <= 7, "at least one stack slot must be free");
   1.697 +      // fall-through: continue with the normal handling of lir_mul and lir_div
   1.698 +    }
   1.699 +    case lir_add:
   1.700 +    case lir_sub:
   1.701 +    case lir_mul:
   1.702 +    case lir_div: {
   1.703 +      assert(left->is_fpu_register(), "must be");
   1.704 +      assert(res->is_fpu_register(), "must be");
   1.705 +      assert(left->is_equal(res), "must be");
   1.706 +
   1.707 +      // either the left-hand or the right-hand side must be on top of stack
   1.708 +      // (if right is not a register, left must be on top)
   1.709 +      if (!right->is_fpu_register()) {
   1.710 +        insert_exchange(left);
   1.711 +        new_left = to_fpu_stack_top(left);
   1.712 +      } else {
   1.713 +        // no exchange necessary if right is alredy on top of stack
   1.714 +        if (tos_offset(right) == 0) {
   1.715 +          new_left = to_fpu_stack(left);
   1.716 +          new_right = to_fpu_stack_top(right);
   1.717 +        } else {
   1.718 +          insert_exchange(left);
   1.719 +          new_left = to_fpu_stack_top(left);
   1.720 +          new_right = to_fpu_stack(right);
   1.721 +        }
   1.722 +
   1.723 +        if (right->is_last_use()) {
   1.724 +          op2->set_fpu_pop_count(1);
   1.725 +
   1.726 +          if (tos_offset(right) == 0) {
   1.727 +            sim()->pop();
   1.728 +          } else {
   1.729 +            // if left is on top of stack, the result is placed in the stack
   1.730 +            // slot of right, so a renaming from right to res is necessary
   1.731 +            assert(tos_offset(left) == 0, "must be");
   1.732 +            sim()->pop();
   1.733 +            do_rename(right, res);
   1.734 +          }
   1.735 +        }
   1.736 +      }
   1.737 +      new_res = to_fpu_stack(res);
   1.738 +
   1.739 +      break;
   1.740 +    }
   1.741 +
   1.742 +    case lir_rem: {
   1.743 +      assert(left->is_fpu_register(), "must be");
   1.744 +      assert(right->is_fpu_register(), "must be");
   1.745 +      assert(res->is_fpu_register(), "must be");
   1.746 +      assert(left->is_equal(res), "must be");
   1.747 +
   1.748 +      // Must bring both operands to top of stack with following operand ordering:
   1.749 +      // * fpu stack before rem: ... right left
   1.750 +      // * fpu stack after rem:  ... left
   1.751 +      if (tos_offset(right) != 1) {
   1.752 +        insert_exchange(right);
   1.753 +        insert_exchange(1);
   1.754 +      }
   1.755 +      insert_exchange(left);
   1.756 +      assert(tos_offset(right) == 1, "check");
   1.757 +      assert(tos_offset(left) == 0, "check");
   1.758 +
   1.759 +      new_left = to_fpu_stack_top(left);
   1.760 +      new_right = to_fpu_stack(right);
   1.761 +
   1.762 +      op2->set_fpu_pop_count(1);
   1.763 +      sim()->pop();
   1.764 +      do_rename(right, res);
   1.765 +
   1.766 +      new_res = to_fpu_stack_top(res);
   1.767 +      break;
   1.768 +    }
   1.769 +
   1.770 +    case lir_log:
   1.771 +    case lir_log10:
   1.772 +    case lir_abs:
   1.773 +    case lir_sqrt: {
   1.774 +      // Right argument appears to be unused
   1.775 +      assert(right->is_illegal(), "must be");
   1.776 +      assert(left->is_fpu_register(), "must be");
   1.777 +      assert(res->is_fpu_register(), "must be");
   1.778 +      assert(left->is_last_use(), "old value gets destroyed");
   1.779 +
   1.780 +      insert_free_if_dead(res, left);
   1.781 +      insert_exchange(left);
   1.782 +      do_rename(left, res);
   1.783 +
   1.784 +      new_left = to_fpu_stack_top(res);
   1.785 +      new_res = new_left;
   1.786 +
   1.787 +      op2->set_fpu_stack_size(sim()->stack_size());
   1.788 +      break;
   1.789 +    }
   1.790 +
   1.791 +
   1.792 +    case lir_tan:
   1.793 +    case lir_sin:
   1.794 +    case lir_cos: {
   1.795 +      // sin and cos need two temporary fpu stack slots, so there are two temporary
   1.796 +      // registers (stored in right and temp of the operation).
   1.797 +      // the stack allocator must guarantee that the stack slots are really free,
   1.798 +      // otherwise there might be a stack overflow.
   1.799 +      assert(left->is_fpu_register(), "must be");
   1.800 +      assert(res->is_fpu_register(), "must be");
   1.801 +      // assert(left->is_last_use(), "old value gets destroyed");
   1.802 +      assert(right->is_fpu_register(), "right is used as the first temporary register");
   1.803 +      assert(op2->tmp_opr()->is_fpu_register(), "temp is used as the second temporary register");
   1.804 +      assert(fpu_num(left) != fpu_num(right) && fpu_num(right) != fpu_num(op2->tmp_opr()) && fpu_num(op2->tmp_opr()) != fpu_num(res), "need distinct temp registers");
   1.805 +
   1.806 +      insert_free_if_dead(right);
   1.807 +      insert_free_if_dead(op2->tmp_opr());
   1.808 +
   1.809 +      insert_free_if_dead(res, left);
   1.810 +      insert_exchange(left);
   1.811 +      do_rename(left, res);
   1.812 +
   1.813 +      new_left = to_fpu_stack_top(res);
   1.814 +      new_res = new_left;
   1.815 +
   1.816 +      op2->set_fpu_stack_size(sim()->stack_size());
   1.817 +      assert(sim()->stack_size() <= 6, "at least two stack slots must be free");
   1.818 +      break;
   1.819 +    }
   1.820 +
   1.821 +    default: {
   1.822 +      assert(false, "missed a fpu-operation");
   1.823 +    }
   1.824 +  }
   1.825 +
   1.826 +  op2->set_in_opr1(new_left);
   1.827 +  op2->set_in_opr2(new_right);
   1.828 +  op2->set_result_opr(new_res);
   1.829 +}
   1.830 +
   1.831 +void FpuStackAllocator::handle_opCall(LIR_OpCall* opCall) {
   1.832 +  LIR_Opr res = opCall->result_opr();
   1.833 +
   1.834 +  // clear fpu-stack before call
   1.835 +  // it may contain dead values that could not have been remved by previous operations
   1.836 +  clear_fpu_stack(LIR_OprFact::illegalOpr);
   1.837 +  assert(sim()->is_empty(), "fpu stack must be empty now");
   1.838 +
   1.839 +  // compute debug information before (possible) fpu result is pushed
   1.840 +  compute_debug_information(opCall);
   1.841 +
   1.842 +  if (res->is_fpu_register() && !res->is_xmm_register()) {
   1.843 +    do_push(res);
   1.844 +    opCall->set_result_opr(to_fpu_stack_top(res));
   1.845 +  }
   1.846 +}
   1.847 +
   1.848 +#ifndef PRODUCT
   1.849 +void FpuStackAllocator::check_invalid_lir_op(LIR_Op* op) {
   1.850 +  switch (op->code()) {
   1.851 +    case lir_24bit_FPU:
   1.852 +    case lir_reset_FPU:
   1.853 +    case lir_ffree:
   1.854 +      assert(false, "operations not allowed in lir. If one of these operations is needed, check if they have fpu operands");
   1.855 +      break;
   1.856 +
   1.857 +    case lir_fpop_raw:
   1.858 +    case lir_fxch:
   1.859 +    case lir_fld:
   1.860 +      assert(false, "operations only inserted by FpuStackAllocator");
   1.861 +      break;
   1.862 +  }
   1.863 +}
   1.864 +#endif
   1.865 +
   1.866 +
   1.867 +void FpuStackAllocator::merge_insert_add(LIR_List* instrs, FpuStackSim* cur_sim, int reg) {
   1.868 +  LIR_Op1* move = new LIR_Op1(lir_move, LIR_OprFact::doubleConst(0), LIR_OprFact::double_fpu(reg)->make_fpu_stack_offset());
   1.869 +
   1.870 +  instrs->instructions_list()->push(move);
   1.871 +
   1.872 +  cur_sim->push(reg);
   1.873 +  move->set_result_opr(to_fpu_stack(move->result_opr()));
   1.874 +
   1.875 +  #ifndef PRODUCT
   1.876 +    if (TraceFPUStack) {
   1.877 +      tty->print("Added new register: %d         New state: ", reg); cur_sim->print(); tty->cr();
   1.878 +    }
   1.879 +  #endif
   1.880 +}
   1.881 +
   1.882 +void FpuStackAllocator::merge_insert_xchg(LIR_List* instrs, FpuStackSim* cur_sim, int slot) {
   1.883 +  assert(slot > 0, "no exchange necessary");
   1.884 +
   1.885 +  LIR_Op1* fxch = new LIR_Op1(lir_fxch, LIR_OprFact::intConst(slot));
   1.886 +  instrs->instructions_list()->push(fxch);
   1.887 +  cur_sim->swap(slot);
   1.888 +
   1.889 +  #ifndef PRODUCT
   1.890 +    if (TraceFPUStack) {
   1.891 +      tty->print("Exchanged register: %d         New state: ", cur_sim->get_slot(slot)); cur_sim->print(); tty->cr();
   1.892 +    }
   1.893 +  #endif
   1.894 +}
   1.895 +
   1.896 +void FpuStackAllocator::merge_insert_pop(LIR_List* instrs, FpuStackSim* cur_sim) {
   1.897 +  int reg = cur_sim->get_slot(0);
   1.898 +
   1.899 +  LIR_Op* fpop = new LIR_Op0(lir_fpop_raw);
   1.900 +  instrs->instructions_list()->push(fpop);
   1.901 +  cur_sim->pop(reg);
   1.902 +
   1.903 +  #ifndef PRODUCT
   1.904 +    if (TraceFPUStack) {
   1.905 +      tty->print("Removed register: %d           New state: ", reg); cur_sim->print(); tty->cr();
   1.906 +    }
   1.907 +  #endif
   1.908 +}
   1.909 +
   1.910 +bool FpuStackAllocator::merge_rename(FpuStackSim* cur_sim, FpuStackSim* sux_sim, int start_slot, int change_slot) {
   1.911 +  int reg = cur_sim->get_slot(change_slot);
   1.912 +
   1.913 +  for (int slot = start_slot; slot >= 0; slot--) {
   1.914 +    int new_reg = sux_sim->get_slot(slot);
   1.915 +
   1.916 +    if (!cur_sim->contains(new_reg)) {
   1.917 +      cur_sim->set_slot(change_slot, new_reg);
   1.918 +
   1.919 +      #ifndef PRODUCT
   1.920 +        if (TraceFPUStack) {
   1.921 +          tty->print("Renamed register %d to %d       New state: ", reg, new_reg); cur_sim->print(); tty->cr();
   1.922 +        }
   1.923 +      #endif
   1.924 +
   1.925 +      return true;
   1.926 +    }
   1.927 +  }
   1.928 +  return false;
   1.929 +}
   1.930 +
   1.931 +
   1.932 +void FpuStackAllocator::merge_fpu_stack(LIR_List* instrs, FpuStackSim* cur_sim, FpuStackSim* sux_sim) {
   1.933 +#ifndef PRODUCT
   1.934 +  if (TraceFPUStack) {
   1.935 +    tty->cr();
   1.936 +    tty->print("before merging: pred: "); cur_sim->print(); tty->cr();
   1.937 +    tty->print("                 sux: "); sux_sim->print(); tty->cr();
   1.938 +  }
   1.939 +
   1.940 +  int slot;
   1.941 +  for (slot = 0; slot < cur_sim->stack_size(); slot++) {
   1.942 +    assert(!cur_sim->slot_is_empty(slot), "not handled by algorithm");
   1.943 +  }
   1.944 +  for (slot = 0; slot < sux_sim->stack_size(); slot++) {
   1.945 +    assert(!sux_sim->slot_is_empty(slot), "not handled by algorithm");
   1.946 +  }
   1.947 +#endif
   1.948 +
   1.949 +  // size difference between cur and sux that must be resolved by adding or removing values form the stack
   1.950 +  int size_diff = cur_sim->stack_size() - sux_sim->stack_size();
   1.951 +
   1.952 +  if (!ComputeExactFPURegisterUsage) {
   1.953 +    // add slots that are currently free, but used in successor
   1.954 +    // When the exact FPU register usage is computed, the stack does
   1.955 +    // not contain dead values at merging -> no values must be added
   1.956 +
   1.957 +    int sux_slot = sux_sim->stack_size() - 1;
   1.958 +    while (size_diff < 0) {
   1.959 +      assert(sux_slot >= 0, "slot out of bounds -> error in algorithm");
   1.960 +
   1.961 +      int reg = sux_sim->get_slot(sux_slot);
   1.962 +      if (!cur_sim->contains(reg)) {
   1.963 +        merge_insert_add(instrs, cur_sim, reg);
   1.964 +        size_diff++;
   1.965 +
   1.966 +        if (sux_slot + size_diff != 0) {
   1.967 +          merge_insert_xchg(instrs, cur_sim, sux_slot + size_diff);
   1.968 +        }
   1.969 +      }
   1.970 +     sux_slot--;
   1.971 +    }
   1.972 +  }
   1.973 +
   1.974 +  assert(cur_sim->stack_size() >= sux_sim->stack_size(), "stack size must be equal or greater now");
   1.975 +  assert(size_diff == cur_sim->stack_size() - sux_sim->stack_size(), "must be");
   1.976 +
   1.977 +  // stack merge algorithm:
   1.978 +  // 1) as long as the current stack top is not in the right location (that meens
   1.979 +  //    it should not be on the stack top), exchange it into the right location
   1.980 +  // 2) if the stack top is right, but the remaining stack is not ordered correctly,
   1.981 +  //    the stack top is exchanged away to get another value on top ->
   1.982 +  //    now step 1) can be continued
   1.983 +  // the stack can also contain unused items -> these items are removed from stack
   1.984 +
   1.985 +  int finished_slot = sux_sim->stack_size() - 1;
   1.986 +  while (finished_slot >= 0 || size_diff > 0) {
   1.987 +    while (size_diff > 0 || (cur_sim->stack_size() > 0 && cur_sim->get_slot(0) != sux_sim->get_slot(0))) {
   1.988 +      int reg = cur_sim->get_slot(0);
   1.989 +      if (sux_sim->contains(reg)) {
   1.990 +        int sux_slot = sux_sim->offset_from_tos(reg);
   1.991 +        merge_insert_xchg(instrs, cur_sim, sux_slot + size_diff);
   1.992 +
   1.993 +      } else if (!merge_rename(cur_sim, sux_sim, finished_slot, 0)) {
   1.994 +        assert(size_diff > 0, "must be");
   1.995 +
   1.996 +        merge_insert_pop(instrs, cur_sim);
   1.997 +        size_diff--;
   1.998 +      }
   1.999 +      assert(cur_sim->stack_size() == 0 || cur_sim->get_slot(0) != reg, "register must have been changed");
  1.1000 +    }
  1.1001 +
  1.1002 +    while (finished_slot >= 0 && cur_sim->get_slot(finished_slot) == sux_sim->get_slot(finished_slot)) {
  1.1003 +      finished_slot--;
  1.1004 +    }
  1.1005 +
  1.1006 +    if (finished_slot >= 0) {
  1.1007 +      int reg = cur_sim->get_slot(finished_slot);
  1.1008 +
  1.1009 +      if (sux_sim->contains(reg) || !merge_rename(cur_sim, sux_sim, finished_slot, finished_slot)) {
  1.1010 +        assert(sux_sim->contains(reg) || size_diff > 0, "must be");
  1.1011 +        merge_insert_xchg(instrs, cur_sim, finished_slot);
  1.1012 +      }
  1.1013 +      assert(cur_sim->get_slot(finished_slot) != reg, "register must have been changed");
  1.1014 +    }
  1.1015 +  }
  1.1016 +
  1.1017 +#ifndef PRODUCT
  1.1018 +  if (TraceFPUStack) {
  1.1019 +    tty->print("after merging:  pred: "); cur_sim->print(); tty->cr();
  1.1020 +    tty->print("                 sux: "); sux_sim->print(); tty->cr();
  1.1021 +    tty->cr();
  1.1022 +  }
  1.1023 +#endif
  1.1024 +  assert(cur_sim->stack_size() == sux_sim->stack_size(), "stack size must be equal now");
  1.1025 +}
  1.1026 +
  1.1027 +
  1.1028 +void FpuStackAllocator::merge_cleanup_fpu_stack(LIR_List* instrs, FpuStackSim* cur_sim, BitMap& live_fpu_regs) {
  1.1029 +#ifndef PRODUCT
  1.1030 +  if (TraceFPUStack) {
  1.1031 +    tty->cr();
  1.1032 +    tty->print("before cleanup: state: "); cur_sim->print(); tty->cr();
  1.1033 +    tty->print("                live:  "); live_fpu_regs.print_on(tty); tty->cr();
  1.1034 +  }
  1.1035 +#endif
  1.1036 +
  1.1037 +  int slot = 0;
  1.1038 +  while (slot < cur_sim->stack_size()) {
  1.1039 +    int reg = cur_sim->get_slot(slot);
  1.1040 +    if (!live_fpu_regs.at(reg)) {
  1.1041 +      if (slot != 0) {
  1.1042 +        merge_insert_xchg(instrs, cur_sim, slot);
  1.1043 +      }
  1.1044 +      merge_insert_pop(instrs, cur_sim);
  1.1045 +    } else {
  1.1046 +      slot++;
  1.1047 +    }
  1.1048 +  }
  1.1049 +
  1.1050 +#ifndef PRODUCT
  1.1051 +  if (TraceFPUStack) {
  1.1052 +    tty->print("after cleanup:  state: "); cur_sim->print(); tty->cr();
  1.1053 +    tty->print("                live:  "); live_fpu_regs.print_on(tty); tty->cr();
  1.1054 +    tty->cr();
  1.1055 +  }
  1.1056 +
  1.1057 +  // check if fpu stack only contains live registers
  1.1058 +  for (unsigned int i = 0; i < live_fpu_regs.size(); i++) {
  1.1059 +    if (live_fpu_regs.at(i) != cur_sim->contains(i)) {
  1.1060 +      tty->print_cr("mismatch between required and actual stack content");
  1.1061 +      break;
  1.1062 +    }
  1.1063 +  }
  1.1064 +#endif
  1.1065 +}
  1.1066 +
  1.1067 +
  1.1068 +bool FpuStackAllocator::merge_fpu_stack_with_successors(BlockBegin* block) {
  1.1069 +#ifndef PRODUCT
  1.1070 +  if (TraceFPUStack) {
  1.1071 +    tty->print_cr("Propagating FPU stack state for B%d at LIR_Op position %d to successors:",
  1.1072 +                  block->block_id(), pos());
  1.1073 +    sim()->print();
  1.1074 +    tty->cr();
  1.1075 +  }
  1.1076 +#endif
  1.1077 +
  1.1078 +  bool changed = false;
  1.1079 +  int number_of_sux = block->number_of_sux();
  1.1080 +
  1.1081 +  if (number_of_sux == 1 && block->sux_at(0)->number_of_preds() > 1) {
  1.1082 +    // The successor has at least two incoming edges, so a stack merge will be necessary
  1.1083 +    // If this block is the first predecessor, cleanup the current stack and propagate it
  1.1084 +    // If this block is not the first predecessor, a stack merge will be necessary
  1.1085 +
  1.1086 +    BlockBegin* sux = block->sux_at(0);
  1.1087 +    intArray* state = sux->fpu_stack_state();
  1.1088 +    LIR_List* instrs = new LIR_List(_compilation);
  1.1089 +
  1.1090 +    if (state != NULL) {
  1.1091 +      // Merge with a successors that already has a FPU stack state
  1.1092 +      // the block must only have one successor because critical edges must been split
  1.1093 +      FpuStackSim* cur_sim = sim();
  1.1094 +      FpuStackSim* sux_sim = temp_sim();
  1.1095 +      sux_sim->read_state(state);
  1.1096 +
  1.1097 +      merge_fpu_stack(instrs, cur_sim, sux_sim);
  1.1098 +
  1.1099 +    } else {
  1.1100 +      // propagate current FPU stack state to successor without state
  1.1101 +      // clean up stack first so that there are no dead values on the stack
  1.1102 +      if (ComputeExactFPURegisterUsage) {
  1.1103 +        FpuStackSim* cur_sim = sim();
  1.1104 +        BitMap live_fpu_regs = block->sux_at(0)->fpu_register_usage();
  1.1105 +        assert(live_fpu_regs.size() == FrameMap::nof_fpu_regs, "missing register usage");
  1.1106 +
  1.1107 +        merge_cleanup_fpu_stack(instrs, cur_sim, live_fpu_regs);
  1.1108 +      }
  1.1109 +
  1.1110 +      intArray* state = sim()->write_state();
  1.1111 +      if (TraceFPUStack) {
  1.1112 +        tty->print_cr("Setting FPU stack state of B%d (merge path)", sux->block_id());
  1.1113 +        sim()->print(); tty->cr();
  1.1114 +      }
  1.1115 +      sux->set_fpu_stack_state(state);
  1.1116 +    }
  1.1117 +
  1.1118 +    if (instrs->instructions_list()->length() > 0) {
  1.1119 +      lir()->insert_before(pos(), instrs);
  1.1120 +      set_pos(instrs->instructions_list()->length() + pos());
  1.1121 +      changed = true;
  1.1122 +    }
  1.1123 +
  1.1124 +  } else {
  1.1125 +    // Propagate unmodified Stack to successors where a stack merge is not necessary
  1.1126 +    intArray* state = sim()->write_state();
  1.1127 +    for (int i = 0; i < number_of_sux; i++) {
  1.1128 +      BlockBegin* sux = block->sux_at(i);
  1.1129 +
  1.1130 +#ifdef ASSERT
  1.1131 +      for (int j = 0; j < sux->number_of_preds(); j++) {
  1.1132 +        assert(block == sux->pred_at(j), "all critical edges must be broken");
  1.1133 +      }
  1.1134 +
  1.1135 +      // check if new state is same
  1.1136 +      if (sux->fpu_stack_state() != NULL) {
  1.1137 +        intArray* sux_state = sux->fpu_stack_state();
  1.1138 +        assert(state->length() == sux_state->length(), "overwriting existing stack state");
  1.1139 +        for (int j = 0; j < state->length(); j++) {
  1.1140 +          assert(state->at(j) == sux_state->at(j), "overwriting existing stack state");
  1.1141 +        }
  1.1142 +      }
  1.1143 +#endif
  1.1144 +#ifndef PRODUCT
  1.1145 +      if (TraceFPUStack) {
  1.1146 +        tty->print_cr("Setting FPU stack state of B%d", sux->block_id());
  1.1147 +        sim()->print(); tty->cr();
  1.1148 +      }
  1.1149 +#endif
  1.1150 +
  1.1151 +      sux->set_fpu_stack_state(state);
  1.1152 +    }
  1.1153 +  }
  1.1154 +
  1.1155 +#ifndef PRODUCT
  1.1156 +  // assertions that FPU stack state conforms to all successors' states
  1.1157 +  intArray* cur_state = sim()->write_state();
  1.1158 +  for (int i = 0; i < number_of_sux; i++) {
  1.1159 +    BlockBegin* sux = block->sux_at(i);
  1.1160 +    intArray* sux_state = sux->fpu_stack_state();
  1.1161 +
  1.1162 +    assert(sux_state != NULL, "no fpu state");
  1.1163 +    assert(cur_state->length() == sux_state->length(), "incorrect length");
  1.1164 +    for (int i = 0; i < cur_state->length(); i++) {
  1.1165 +      assert(cur_state->at(i) == sux_state->at(i), "element not equal");
  1.1166 +    }
  1.1167 +  }
  1.1168 +#endif
  1.1169 +
  1.1170 +  return changed;
  1.1171 +}

mercurial