src/share/vm/opto/matcher.cpp

changeset 0
f90c822e73f8
child 1
2d8a650513c2
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/share/vm/opto/matcher.cpp	Wed Apr 27 01:25:04 2016 +0800
     1.3 @@ -0,0 +1,2631 @@
     1.4 +/*
     1.5 + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
     1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     1.7 + *
     1.8 + * This code is free software; you can redistribute it and/or modify it
     1.9 + * under the terms of the GNU General Public License version 2 only, as
    1.10 + * published by the Free Software Foundation.
    1.11 + *
    1.12 + * This code is distributed in the hope that it will be useful, but WITHOUT
    1.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    1.14 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    1.15 + * version 2 for more details (a copy is included in the LICENSE file that
    1.16 + * accompanied this code).
    1.17 + *
    1.18 + * You should have received a copy of the GNU General Public License version
    1.19 + * 2 along with this work; if not, write to the Free Software Foundation,
    1.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    1.21 + *
    1.22 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    1.23 + * or visit www.oracle.com if you need additional information or have any
    1.24 + * questions.
    1.25 + *
    1.26 + */
    1.27 +
    1.28 +#include "precompiled.hpp"
    1.29 +#include "memory/allocation.inline.hpp"
    1.30 +#include "opto/addnode.hpp"
    1.31 +#include "opto/callnode.hpp"
    1.32 +#include "opto/connode.hpp"
    1.33 +#include "opto/idealGraphPrinter.hpp"
    1.34 +#include "opto/matcher.hpp"
    1.35 +#include "opto/memnode.hpp"
    1.36 +#include "opto/opcodes.hpp"
    1.37 +#include "opto/regmask.hpp"
    1.38 +#include "opto/rootnode.hpp"
    1.39 +#include "opto/runtime.hpp"
    1.40 +#include "opto/type.hpp"
    1.41 +#include "opto/vectornode.hpp"
    1.42 +#include "runtime/atomic.hpp"
    1.43 +#include "runtime/os.hpp"
    1.44 +#ifdef TARGET_ARCH_MODEL_x86_32
    1.45 +# include "adfiles/ad_x86_32.hpp"
    1.46 +#endif
    1.47 +#ifdef TARGET_ARCH_MODEL_x86_64
    1.48 +# include "adfiles/ad_x86_64.hpp"
    1.49 +#endif
    1.50 +#ifdef TARGET_ARCH_MODEL_sparc
    1.51 +# include "adfiles/ad_sparc.hpp"
    1.52 +#endif
    1.53 +#ifdef TARGET_ARCH_MODEL_zero
    1.54 +# include "adfiles/ad_zero.hpp"
    1.55 +#endif
    1.56 +#ifdef TARGET_ARCH_MODEL_arm
    1.57 +# include "adfiles/ad_arm.hpp"
    1.58 +#endif
    1.59 +#ifdef TARGET_ARCH_MODEL_ppc_32
    1.60 +# include "adfiles/ad_ppc_32.hpp"
    1.61 +#endif
    1.62 +#ifdef TARGET_ARCH_MODEL_ppc_64
    1.63 +# include "adfiles/ad_ppc_64.hpp"
    1.64 +#endif
    1.65 +
    1.66 +OptoReg::Name OptoReg::c_frame_pointer;
    1.67 +
    1.68 +const RegMask *Matcher::idealreg2regmask[_last_machine_leaf];
    1.69 +RegMask Matcher::mreg2regmask[_last_Mach_Reg];
    1.70 +RegMask Matcher::STACK_ONLY_mask;
    1.71 +RegMask Matcher::c_frame_ptr_mask;
    1.72 +const uint Matcher::_begin_rematerialize = _BEGIN_REMATERIALIZE;
    1.73 +const uint Matcher::_end_rematerialize   = _END_REMATERIALIZE;
    1.74 +
    1.75 +//---------------------------Matcher-------------------------------------------
    1.76 +Matcher::Matcher()
    1.77 +: PhaseTransform( Phase::Ins_Select ),
    1.78 +#ifdef ASSERT
    1.79 +  _old2new_map(C->comp_arena()),
    1.80 +  _new2old_map(C->comp_arena()),
    1.81 +#endif
    1.82 +  _shared_nodes(C->comp_arena()),
    1.83 +  _reduceOp(reduceOp), _leftOp(leftOp), _rightOp(rightOp),
    1.84 +  _swallowed(swallowed),
    1.85 +  _begin_inst_chain_rule(_BEGIN_INST_CHAIN_RULE),
    1.86 +  _end_inst_chain_rule(_END_INST_CHAIN_RULE),
    1.87 +  _must_clone(must_clone),
    1.88 +  _register_save_policy(register_save_policy),
    1.89 +  _c_reg_save_policy(c_reg_save_policy),
    1.90 +  _register_save_type(register_save_type),
    1.91 +  _ruleName(ruleName),
    1.92 +  _allocation_started(false),
    1.93 +  _states_arena(Chunk::medium_size),
    1.94 +  _visited(&_states_arena),
    1.95 +  _shared(&_states_arena),
    1.96 +  _dontcare(&_states_arena) {
    1.97 +  C->set_matcher(this);
    1.98 +
    1.99 +  idealreg2spillmask  [Op_RegI] = NULL;
   1.100 +  idealreg2spillmask  [Op_RegN] = NULL;
   1.101 +  idealreg2spillmask  [Op_RegL] = NULL;
   1.102 +  idealreg2spillmask  [Op_RegF] = NULL;
   1.103 +  idealreg2spillmask  [Op_RegD] = NULL;
   1.104 +  idealreg2spillmask  [Op_RegP] = NULL;
   1.105 +  idealreg2spillmask  [Op_VecS] = NULL;
   1.106 +  idealreg2spillmask  [Op_VecD] = NULL;
   1.107 +  idealreg2spillmask  [Op_VecX] = NULL;
   1.108 +  idealreg2spillmask  [Op_VecY] = NULL;
   1.109 +
   1.110 +  idealreg2debugmask  [Op_RegI] = NULL;
   1.111 +  idealreg2debugmask  [Op_RegN] = NULL;
   1.112 +  idealreg2debugmask  [Op_RegL] = NULL;
   1.113 +  idealreg2debugmask  [Op_RegF] = NULL;
   1.114 +  idealreg2debugmask  [Op_RegD] = NULL;
   1.115 +  idealreg2debugmask  [Op_RegP] = NULL;
   1.116 +  idealreg2debugmask  [Op_VecS] = NULL;
   1.117 +  idealreg2debugmask  [Op_VecD] = NULL;
   1.118 +  idealreg2debugmask  [Op_VecX] = NULL;
   1.119 +  idealreg2debugmask  [Op_VecY] = NULL;
   1.120 +
   1.121 +  idealreg2mhdebugmask[Op_RegI] = NULL;
   1.122 +  idealreg2mhdebugmask[Op_RegN] = NULL;
   1.123 +  idealreg2mhdebugmask[Op_RegL] = NULL;
   1.124 +  idealreg2mhdebugmask[Op_RegF] = NULL;
   1.125 +  idealreg2mhdebugmask[Op_RegD] = NULL;
   1.126 +  idealreg2mhdebugmask[Op_RegP] = NULL;
   1.127 +  idealreg2mhdebugmask[Op_VecS] = NULL;
   1.128 +  idealreg2mhdebugmask[Op_VecD] = NULL;
   1.129 +  idealreg2mhdebugmask[Op_VecX] = NULL;
   1.130 +  idealreg2mhdebugmask[Op_VecY] = NULL;
   1.131 +
   1.132 +  debug_only(_mem_node = NULL;)   // Ideal memory node consumed by mach node
   1.133 +}
   1.134 +
   1.135 +//------------------------------warp_incoming_stk_arg------------------------
   1.136 +// This warps a VMReg into an OptoReg::Name
   1.137 +OptoReg::Name Matcher::warp_incoming_stk_arg( VMReg reg ) {
   1.138 +  OptoReg::Name warped;
   1.139 +  if( reg->is_stack() ) {  // Stack slot argument?
   1.140 +    warped = OptoReg::add(_old_SP, reg->reg2stack() );
   1.141 +    warped = OptoReg::add(warped, C->out_preserve_stack_slots());
   1.142 +    if( warped >= _in_arg_limit )
   1.143 +      _in_arg_limit = OptoReg::add(warped, 1); // Bump max stack slot seen
   1.144 +    if (!RegMask::can_represent_arg(warped)) {
   1.145 +      // the compiler cannot represent this method's calling sequence
   1.146 +      C->record_method_not_compilable_all_tiers("unsupported incoming calling sequence");
   1.147 +      return OptoReg::Bad;
   1.148 +    }
   1.149 +    return warped;
   1.150 +  }
   1.151 +  return OptoReg::as_OptoReg(reg);
   1.152 +}
   1.153 +
   1.154 +//---------------------------compute_old_SP------------------------------------
   1.155 +OptoReg::Name Compile::compute_old_SP() {
   1.156 +  int fixed    = fixed_slots();
   1.157 +  int preserve = in_preserve_stack_slots();
   1.158 +  return OptoReg::stack2reg(round_to(fixed + preserve, Matcher::stack_alignment_in_slots()));
   1.159 +}
   1.160 +
   1.161 +
   1.162 +
   1.163 +#ifdef ASSERT
   1.164 +void Matcher::verify_new_nodes_only(Node* xroot) {
   1.165 +  // Make sure that the new graph only references new nodes
   1.166 +  ResourceMark rm;
   1.167 +  Unique_Node_List worklist;
   1.168 +  VectorSet visited(Thread::current()->resource_area());
   1.169 +  worklist.push(xroot);
   1.170 +  while (worklist.size() > 0) {
   1.171 +    Node* n = worklist.pop();
   1.172 +    visited <<= n->_idx;
   1.173 +    assert(C->node_arena()->contains(n), "dead node");
   1.174 +    for (uint j = 0; j < n->req(); j++) {
   1.175 +      Node* in = n->in(j);
   1.176 +      if (in != NULL) {
   1.177 +        assert(C->node_arena()->contains(in), "dead node");
   1.178 +        if (!visited.test(in->_idx)) {
   1.179 +          worklist.push(in);
   1.180 +        }
   1.181 +      }
   1.182 +    }
   1.183 +  }
   1.184 +}
   1.185 +#endif
   1.186 +
   1.187 +
   1.188 +//---------------------------match---------------------------------------------
   1.189 +void Matcher::match( ) {
   1.190 +  if( MaxLabelRootDepth < 100 ) { // Too small?
   1.191 +    assert(false, "invalid MaxLabelRootDepth, increase it to 100 minimum");
   1.192 +    MaxLabelRootDepth = 100;
   1.193 +  }
   1.194 +  // One-time initialization of some register masks.
   1.195 +  init_spill_mask( C->root()->in(1) );
   1.196 +  _return_addr_mask = return_addr();
   1.197 +#ifdef _LP64
   1.198 +  // Pointers take 2 slots in 64-bit land
   1.199 +  _return_addr_mask.Insert(OptoReg::add(return_addr(),1));
   1.200 +#endif
   1.201 +
   1.202 +  // Map a Java-signature return type into return register-value
   1.203 +  // machine registers for 0, 1 and 2 returned values.
   1.204 +  const TypeTuple *range = C->tf()->range();
   1.205 +  if( range->cnt() > TypeFunc::Parms ) { // If not a void function
   1.206 +    // Get ideal-register return type
   1.207 +    int ireg = range->field_at(TypeFunc::Parms)->ideal_reg();
   1.208 +    // Get machine return register
   1.209 +    uint sop = C->start()->Opcode();
   1.210 +    OptoRegPair regs = return_value(ireg, false);
   1.211 +
   1.212 +    // And mask for same
   1.213 +    _return_value_mask = RegMask(regs.first());
   1.214 +    if( OptoReg::is_valid(regs.second()) )
   1.215 +      _return_value_mask.Insert(regs.second());
   1.216 +  }
   1.217 +
   1.218 +  // ---------------
   1.219 +  // Frame Layout
   1.220 +
   1.221 +  // Need the method signature to determine the incoming argument types,
   1.222 +  // because the types determine which registers the incoming arguments are
   1.223 +  // in, and this affects the matched code.
   1.224 +  const TypeTuple *domain = C->tf()->domain();
   1.225 +  uint             argcnt = domain->cnt() - TypeFunc::Parms;
   1.226 +  BasicType *sig_bt        = NEW_RESOURCE_ARRAY( BasicType, argcnt );
   1.227 +  VMRegPair *vm_parm_regs  = NEW_RESOURCE_ARRAY( VMRegPair, argcnt );
   1.228 +  _parm_regs               = NEW_RESOURCE_ARRAY( OptoRegPair, argcnt );
   1.229 +  _calling_convention_mask = NEW_RESOURCE_ARRAY( RegMask, argcnt );
   1.230 +  uint i;
   1.231 +  for( i = 0; i<argcnt; i++ ) {
   1.232 +    sig_bt[i] = domain->field_at(i+TypeFunc::Parms)->basic_type();
   1.233 +  }
   1.234 +
   1.235 +  // Pass array of ideal registers and length to USER code (from the AD file)
   1.236 +  // that will convert this to an array of register numbers.
   1.237 +  const StartNode *start = C->start();
   1.238 +  start->calling_convention( sig_bt, vm_parm_regs, argcnt );
   1.239 +#ifdef ASSERT
   1.240 +  // Sanity check users' calling convention.  Real handy while trying to
   1.241 +  // get the initial port correct.
   1.242 +  { for (uint i = 0; i<argcnt; i++) {
   1.243 +      if( !vm_parm_regs[i].first()->is_valid() && !vm_parm_regs[i].second()->is_valid() ) {
   1.244 +        assert(domain->field_at(i+TypeFunc::Parms)==Type::HALF, "only allowed on halve" );
   1.245 +        _parm_regs[i].set_bad();
   1.246 +        continue;
   1.247 +      }
   1.248 +      VMReg parm_reg = vm_parm_regs[i].first();
   1.249 +      assert(parm_reg->is_valid(), "invalid arg?");
   1.250 +      if (parm_reg->is_reg()) {
   1.251 +        OptoReg::Name opto_parm_reg = OptoReg::as_OptoReg(parm_reg);
   1.252 +        assert(can_be_java_arg(opto_parm_reg) ||
   1.253 +               C->stub_function() == CAST_FROM_FN_PTR(address, OptoRuntime::rethrow_C) ||
   1.254 +               opto_parm_reg == inline_cache_reg(),
   1.255 +               "parameters in register must be preserved by runtime stubs");
   1.256 +      }
   1.257 +      for (uint j = 0; j < i; j++) {
   1.258 +        assert(parm_reg != vm_parm_regs[j].first(),
   1.259 +               "calling conv. must produce distinct regs");
   1.260 +      }
   1.261 +    }
   1.262 +  }
   1.263 +#endif
   1.264 +
   1.265 +  // Do some initial frame layout.
   1.266 +
   1.267 +  // Compute the old incoming SP (may be called FP) as
   1.268 +  //   OptoReg::stack0() + locks + in_preserve_stack_slots + pad2.
   1.269 +  _old_SP = C->compute_old_SP();
   1.270 +  assert( is_even(_old_SP), "must be even" );
   1.271 +
   1.272 +  // Compute highest incoming stack argument as
   1.273 +  //   _old_SP + out_preserve_stack_slots + incoming argument size.
   1.274 +  _in_arg_limit = OptoReg::add(_old_SP, C->out_preserve_stack_slots());
   1.275 +  assert( is_even(_in_arg_limit), "out_preserve must be even" );
   1.276 +  for( i = 0; i < argcnt; i++ ) {
   1.277 +    // Permit args to have no register
   1.278 +    _calling_convention_mask[i].Clear();
   1.279 +    if( !vm_parm_regs[i].first()->is_valid() && !vm_parm_regs[i].second()->is_valid() ) {
   1.280 +      continue;
   1.281 +    }
   1.282 +    // calling_convention returns stack arguments as a count of
   1.283 +    // slots beyond OptoReg::stack0()/VMRegImpl::stack0.  We need to convert this to
   1.284 +    // the allocators point of view, taking into account all the
   1.285 +    // preserve area, locks & pad2.
   1.286 +
   1.287 +    OptoReg::Name reg1 = warp_incoming_stk_arg(vm_parm_regs[i].first());
   1.288 +    if( OptoReg::is_valid(reg1))
   1.289 +      _calling_convention_mask[i].Insert(reg1);
   1.290 +
   1.291 +    OptoReg::Name reg2 = warp_incoming_stk_arg(vm_parm_regs[i].second());
   1.292 +    if( OptoReg::is_valid(reg2))
   1.293 +      _calling_convention_mask[i].Insert(reg2);
   1.294 +
   1.295 +    // Saved biased stack-slot register number
   1.296 +    _parm_regs[i].set_pair(reg2, reg1);
   1.297 +  }
   1.298 +
   1.299 +  // Finally, make sure the incoming arguments take up an even number of
   1.300 +  // words, in case the arguments or locals need to contain doubleword stack
   1.301 +  // slots.  The rest of the system assumes that stack slot pairs (in
   1.302 +  // particular, in the spill area) which look aligned will in fact be
   1.303 +  // aligned relative to the stack pointer in the target machine.  Double
   1.304 +  // stack slots will always be allocated aligned.
   1.305 +  _new_SP = OptoReg::Name(round_to(_in_arg_limit, RegMask::SlotsPerLong));
   1.306 +
   1.307 +  // Compute highest outgoing stack argument as
   1.308 +  //   _new_SP + out_preserve_stack_slots + max(outgoing argument size).
   1.309 +  _out_arg_limit = OptoReg::add(_new_SP, C->out_preserve_stack_slots());
   1.310 +  assert( is_even(_out_arg_limit), "out_preserve must be even" );
   1.311 +
   1.312 +  if (!RegMask::can_represent_arg(OptoReg::add(_out_arg_limit,-1))) {
   1.313 +    // the compiler cannot represent this method's calling sequence
   1.314 +    C->record_method_not_compilable("must be able to represent all call arguments in reg mask");
   1.315 +  }
   1.316 +
   1.317 +  if (C->failing())  return;  // bailed out on incoming arg failure
   1.318 +
   1.319 +  // ---------------
   1.320 +  // Collect roots of matcher trees.  Every node for which
   1.321 +  // _shared[_idx] is cleared is guaranteed to not be shared, and thus
   1.322 +  // can be a valid interior of some tree.
   1.323 +  find_shared( C->root() );
   1.324 +  find_shared( C->top() );
   1.325 +
   1.326 +  C->print_method(PHASE_BEFORE_MATCHING);
   1.327 +
   1.328 +  // Create new ideal node ConP #NULL even if it does exist in old space
   1.329 +  // to avoid false sharing if the corresponding mach node is not used.
   1.330 +  // The corresponding mach node is only used in rare cases for derived
   1.331 +  // pointers.
   1.332 +  Node* new_ideal_null = ConNode::make(C, TypePtr::NULL_PTR);
   1.333 +
   1.334 +  // Swap out to old-space; emptying new-space
   1.335 +  Arena *old = C->node_arena()->move_contents(C->old_arena());
   1.336 +
   1.337 +  // Save debug and profile information for nodes in old space:
   1.338 +  _old_node_note_array = C->node_note_array();
   1.339 +  if (_old_node_note_array != NULL) {
   1.340 +    C->set_node_note_array(new(C->comp_arena()) GrowableArray<Node_Notes*>
   1.341 +                           (C->comp_arena(), _old_node_note_array->length(),
   1.342 +                            0, NULL));
   1.343 +  }
   1.344 +
   1.345 +  // Pre-size the new_node table to avoid the need for range checks.
   1.346 +  grow_new_node_array(C->unique());
   1.347 +
   1.348 +  // Reset node counter so MachNodes start with _idx at 0
   1.349 +  int nodes = C->unique(); // save value
   1.350 +  C->set_unique(0);
   1.351 +  C->reset_dead_node_list();
   1.352 +
   1.353 +  // Recursively match trees from old space into new space.
   1.354 +  // Correct leaves of new-space Nodes; they point to old-space.
   1.355 +  _visited.Clear();             // Clear visit bits for xform call
   1.356 +  C->set_cached_top_node(xform( C->top(), nodes ));
   1.357 +  if (!C->failing()) {
   1.358 +    Node* xroot =        xform( C->root(), 1 );
   1.359 +    if (xroot == NULL) {
   1.360 +      Matcher::soft_match_failure();  // recursive matching process failed
   1.361 +      C->record_method_not_compilable("instruction match failed");
   1.362 +    } else {
   1.363 +      // During matching shared constants were attached to C->root()
   1.364 +      // because xroot wasn't available yet, so transfer the uses to
   1.365 +      // the xroot.
   1.366 +      for( DUIterator_Fast jmax, j = C->root()->fast_outs(jmax); j < jmax; j++ ) {
   1.367 +        Node* n = C->root()->fast_out(j);
   1.368 +        if (C->node_arena()->contains(n)) {
   1.369 +          assert(n->in(0) == C->root(), "should be control user");
   1.370 +          n->set_req(0, xroot);
   1.371 +          --j;
   1.372 +          --jmax;
   1.373 +        }
   1.374 +      }
   1.375 +
   1.376 +      // Generate new mach node for ConP #NULL
   1.377 +      assert(new_ideal_null != NULL, "sanity");
   1.378 +      _mach_null = match_tree(new_ideal_null);
   1.379 +      // Don't set control, it will confuse GCM since there are no uses.
   1.380 +      // The control will be set when this node is used first time
   1.381 +      // in find_base_for_derived().
   1.382 +      assert(_mach_null != NULL, "");
   1.383 +
   1.384 +      C->set_root(xroot->is_Root() ? xroot->as_Root() : NULL);
   1.385 +
   1.386 +#ifdef ASSERT
   1.387 +      verify_new_nodes_only(xroot);
   1.388 +#endif
   1.389 +    }
   1.390 +  }
   1.391 +  if (C->top() == NULL || C->root() == NULL) {
   1.392 +    C->record_method_not_compilable("graph lost"); // %%% cannot happen?
   1.393 +  }
   1.394 +  if (C->failing()) {
   1.395 +    // delete old;
   1.396 +    old->destruct_contents();
   1.397 +    return;
   1.398 +  }
   1.399 +  assert( C->top(), "" );
   1.400 +  assert( C->root(), "" );
   1.401 +  validate_null_checks();
   1.402 +
   1.403 +  // Now smoke old-space
   1.404 +  NOT_DEBUG( old->destruct_contents() );
   1.405 +
   1.406 +  // ------------------------
   1.407 +  // Set up save-on-entry registers
   1.408 +  Fixup_Save_On_Entry( );
   1.409 +}
   1.410 +
   1.411 +
   1.412 +//------------------------------Fixup_Save_On_Entry----------------------------
   1.413 +// The stated purpose of this routine is to take care of save-on-entry
   1.414 +// registers.  However, the overall goal of the Match phase is to convert into
   1.415 +// machine-specific instructions which have RegMasks to guide allocation.
   1.416 +// So what this procedure really does is put a valid RegMask on each input
   1.417 +// to the machine-specific variations of all Return, TailCall and Halt
   1.418 +// instructions.  It also adds edgs to define the save-on-entry values (and of
   1.419 +// course gives them a mask).
   1.420 +
   1.421 +static RegMask *init_input_masks( uint size, RegMask &ret_adr, RegMask &fp ) {
   1.422 +  RegMask *rms = NEW_RESOURCE_ARRAY( RegMask, size );
   1.423 +  // Do all the pre-defined register masks
   1.424 +  rms[TypeFunc::Control  ] = RegMask::Empty;
   1.425 +  rms[TypeFunc::I_O      ] = RegMask::Empty;
   1.426 +  rms[TypeFunc::Memory   ] = RegMask::Empty;
   1.427 +  rms[TypeFunc::ReturnAdr] = ret_adr;
   1.428 +  rms[TypeFunc::FramePtr ] = fp;
   1.429 +  return rms;
   1.430 +}
   1.431 +
   1.432 +//---------------------------init_first_stack_mask-----------------------------
   1.433 +// Create the initial stack mask used by values spilling to the stack.
   1.434 +// Disallow any debug info in outgoing argument areas by setting the
   1.435 +// initial mask accordingly.
   1.436 +void Matcher::init_first_stack_mask() {
   1.437 +
   1.438 +  // Allocate storage for spill masks as masks for the appropriate load type.
   1.439 +  RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask) * (3*6+4));
   1.440 +
   1.441 +  idealreg2spillmask  [Op_RegN] = &rms[0];
   1.442 +  idealreg2spillmask  [Op_RegI] = &rms[1];
   1.443 +  idealreg2spillmask  [Op_RegL] = &rms[2];
   1.444 +  idealreg2spillmask  [Op_RegF] = &rms[3];
   1.445 +  idealreg2spillmask  [Op_RegD] = &rms[4];
   1.446 +  idealreg2spillmask  [Op_RegP] = &rms[5];
   1.447 +
   1.448 +  idealreg2debugmask  [Op_RegN] = &rms[6];
   1.449 +  idealreg2debugmask  [Op_RegI] = &rms[7];
   1.450 +  idealreg2debugmask  [Op_RegL] = &rms[8];
   1.451 +  idealreg2debugmask  [Op_RegF] = &rms[9];
   1.452 +  idealreg2debugmask  [Op_RegD] = &rms[10];
   1.453 +  idealreg2debugmask  [Op_RegP] = &rms[11];
   1.454 +
   1.455 +  idealreg2mhdebugmask[Op_RegN] = &rms[12];
   1.456 +  idealreg2mhdebugmask[Op_RegI] = &rms[13];
   1.457 +  idealreg2mhdebugmask[Op_RegL] = &rms[14];
   1.458 +  idealreg2mhdebugmask[Op_RegF] = &rms[15];
   1.459 +  idealreg2mhdebugmask[Op_RegD] = &rms[16];
   1.460 +  idealreg2mhdebugmask[Op_RegP] = &rms[17];
   1.461 +
   1.462 +  idealreg2spillmask  [Op_VecS] = &rms[18];
   1.463 +  idealreg2spillmask  [Op_VecD] = &rms[19];
   1.464 +  idealreg2spillmask  [Op_VecX] = &rms[20];
   1.465 +  idealreg2spillmask  [Op_VecY] = &rms[21];
   1.466 +
   1.467 +  OptoReg::Name i;
   1.468 +
   1.469 +  // At first, start with the empty mask
   1.470 +  C->FIRST_STACK_mask().Clear();
   1.471 +
   1.472 +  // Add in the incoming argument area
   1.473 +  OptoReg::Name init_in = OptoReg::add(_old_SP, C->out_preserve_stack_slots());
   1.474 +  for (i = init_in; i < _in_arg_limit; i = OptoReg::add(i,1)) {
   1.475 +    C->FIRST_STACK_mask().Insert(i);
   1.476 +  }
   1.477 +  // Add in all bits past the outgoing argument area
   1.478 +  guarantee(RegMask::can_represent_arg(OptoReg::add(_out_arg_limit,-1)),
   1.479 +            "must be able to represent all call arguments in reg mask");
   1.480 +  OptoReg::Name init = _out_arg_limit;
   1.481 +  for (i = init; RegMask::can_represent(i); i = OptoReg::add(i,1)) {
   1.482 +    C->FIRST_STACK_mask().Insert(i);
   1.483 +  }
   1.484 +  // Finally, set the "infinite stack" bit.
   1.485 +  C->FIRST_STACK_mask().set_AllStack();
   1.486 +
   1.487 +  // Make spill masks.  Registers for their class, plus FIRST_STACK_mask.
   1.488 +  RegMask aligned_stack_mask = C->FIRST_STACK_mask();
   1.489 +  // Keep spill masks aligned.
   1.490 +  aligned_stack_mask.clear_to_pairs();
   1.491 +  assert(aligned_stack_mask.is_AllStack(), "should be infinite stack");
   1.492 +
   1.493 +  *idealreg2spillmask[Op_RegP] = *idealreg2regmask[Op_RegP];
   1.494 +#ifdef _LP64
   1.495 +  *idealreg2spillmask[Op_RegN] = *idealreg2regmask[Op_RegN];
   1.496 +   idealreg2spillmask[Op_RegN]->OR(C->FIRST_STACK_mask());
   1.497 +   idealreg2spillmask[Op_RegP]->OR(aligned_stack_mask);
   1.498 +#else
   1.499 +   idealreg2spillmask[Op_RegP]->OR(C->FIRST_STACK_mask());
   1.500 +#endif
   1.501 +  *idealreg2spillmask[Op_RegI] = *idealreg2regmask[Op_RegI];
   1.502 +   idealreg2spillmask[Op_RegI]->OR(C->FIRST_STACK_mask());
   1.503 +  *idealreg2spillmask[Op_RegL] = *idealreg2regmask[Op_RegL];
   1.504 +   idealreg2spillmask[Op_RegL]->OR(aligned_stack_mask);
   1.505 +  *idealreg2spillmask[Op_RegF] = *idealreg2regmask[Op_RegF];
   1.506 +   idealreg2spillmask[Op_RegF]->OR(C->FIRST_STACK_mask());
   1.507 +  *idealreg2spillmask[Op_RegD] = *idealreg2regmask[Op_RegD];
   1.508 +   idealreg2spillmask[Op_RegD]->OR(aligned_stack_mask);
   1.509 +
   1.510 +  if (Matcher::vector_size_supported(T_BYTE,4)) {
   1.511 +    *idealreg2spillmask[Op_VecS] = *idealreg2regmask[Op_VecS];
   1.512 +     idealreg2spillmask[Op_VecS]->OR(C->FIRST_STACK_mask());
   1.513 +  }
   1.514 +  if (Matcher::vector_size_supported(T_FLOAT,2)) {
   1.515 +    // For VecD we need dual alignment and 8 bytes (2 slots) for spills.
   1.516 +    // RA guarantees such alignment since it is needed for Double and Long values.
   1.517 +    *idealreg2spillmask[Op_VecD] = *idealreg2regmask[Op_VecD];
   1.518 +     idealreg2spillmask[Op_VecD]->OR(aligned_stack_mask);
   1.519 +  }
   1.520 +  if (Matcher::vector_size_supported(T_FLOAT,4)) {
   1.521 +    // For VecX we need quadro alignment and 16 bytes (4 slots) for spills.
   1.522 +    //
   1.523 +    // RA can use input arguments stack slots for spills but until RA
   1.524 +    // we don't know frame size and offset of input arg stack slots.
   1.525 +    //
   1.526 +    // Exclude last input arg stack slots to avoid spilling vectors there
   1.527 +    // otherwise vector spills could stomp over stack slots in caller frame.
   1.528 +    OptoReg::Name in = OptoReg::add(_in_arg_limit, -1);
   1.529 +    for (int k = 1; (in >= init_in) && (k < RegMask::SlotsPerVecX); k++) {
   1.530 +      aligned_stack_mask.Remove(in);
   1.531 +      in = OptoReg::add(in, -1);
   1.532 +    }
   1.533 +     aligned_stack_mask.clear_to_sets(RegMask::SlotsPerVecX);
   1.534 +     assert(aligned_stack_mask.is_AllStack(), "should be infinite stack");
   1.535 +    *idealreg2spillmask[Op_VecX] = *idealreg2regmask[Op_VecX];
   1.536 +     idealreg2spillmask[Op_VecX]->OR(aligned_stack_mask);
   1.537 +  }
   1.538 +  if (Matcher::vector_size_supported(T_FLOAT,8)) {
   1.539 +    // For VecY we need octo alignment and 32 bytes (8 slots) for spills.
   1.540 +    OptoReg::Name in = OptoReg::add(_in_arg_limit, -1);
   1.541 +    for (int k = 1; (in >= init_in) && (k < RegMask::SlotsPerVecY); k++) {
   1.542 +      aligned_stack_mask.Remove(in);
   1.543 +      in = OptoReg::add(in, -1);
   1.544 +    }
   1.545 +     aligned_stack_mask.clear_to_sets(RegMask::SlotsPerVecY);
   1.546 +     assert(aligned_stack_mask.is_AllStack(), "should be infinite stack");
   1.547 +    *idealreg2spillmask[Op_VecY] = *idealreg2regmask[Op_VecY];
   1.548 +     idealreg2spillmask[Op_VecY]->OR(aligned_stack_mask);
   1.549 +  }
   1.550 +   if (UseFPUForSpilling) {
   1.551 +     // This mask logic assumes that the spill operations are
   1.552 +     // symmetric and that the registers involved are the same size.
   1.553 +     // On sparc for instance we may have to use 64 bit moves will
   1.554 +     // kill 2 registers when used with F0-F31.
   1.555 +     idealreg2spillmask[Op_RegI]->OR(*idealreg2regmask[Op_RegF]);
   1.556 +     idealreg2spillmask[Op_RegF]->OR(*idealreg2regmask[Op_RegI]);
   1.557 +#ifdef _LP64
   1.558 +     idealreg2spillmask[Op_RegN]->OR(*idealreg2regmask[Op_RegF]);
   1.559 +     idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]);
   1.560 +     idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]);
   1.561 +     idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegD]);
   1.562 +#else
   1.563 +     idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegF]);
   1.564 +#ifdef ARM
   1.565 +     // ARM has support for moving 64bit values between a pair of
   1.566 +     // integer registers and a double register
   1.567 +     idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]);
   1.568 +     idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]);
   1.569 +#endif
   1.570 +#endif
   1.571 +   }
   1.572 +
   1.573 +  // Make up debug masks.  Any spill slot plus callee-save registers.
   1.574 +  // Caller-save registers are assumed to be trashable by the various
   1.575 +  // inline-cache fixup routines.
   1.576 +  *idealreg2debugmask  [Op_RegN]= *idealreg2spillmask[Op_RegN];
   1.577 +  *idealreg2debugmask  [Op_RegI]= *idealreg2spillmask[Op_RegI];
   1.578 +  *idealreg2debugmask  [Op_RegL]= *idealreg2spillmask[Op_RegL];
   1.579 +  *idealreg2debugmask  [Op_RegF]= *idealreg2spillmask[Op_RegF];
   1.580 +  *idealreg2debugmask  [Op_RegD]= *idealreg2spillmask[Op_RegD];
   1.581 +  *idealreg2debugmask  [Op_RegP]= *idealreg2spillmask[Op_RegP];
   1.582 +
   1.583 +  *idealreg2mhdebugmask[Op_RegN]= *idealreg2spillmask[Op_RegN];
   1.584 +  *idealreg2mhdebugmask[Op_RegI]= *idealreg2spillmask[Op_RegI];
   1.585 +  *idealreg2mhdebugmask[Op_RegL]= *idealreg2spillmask[Op_RegL];
   1.586 +  *idealreg2mhdebugmask[Op_RegF]= *idealreg2spillmask[Op_RegF];
   1.587 +  *idealreg2mhdebugmask[Op_RegD]= *idealreg2spillmask[Op_RegD];
   1.588 +  *idealreg2mhdebugmask[Op_RegP]= *idealreg2spillmask[Op_RegP];
   1.589 +
   1.590 +  // Prevent stub compilations from attempting to reference
   1.591 +  // callee-saved registers from debug info
   1.592 +  bool exclude_soe = !Compile::current()->is_method_compilation();
   1.593 +
   1.594 +  for( i=OptoReg::Name(0); i<OptoReg::Name(_last_Mach_Reg); i = OptoReg::add(i,1) ) {
   1.595 +    // registers the caller has to save do not work
   1.596 +    if( _register_save_policy[i] == 'C' ||
   1.597 +        _register_save_policy[i] == 'A' ||
   1.598 +        (_register_save_policy[i] == 'E' && exclude_soe) ) {
   1.599 +      idealreg2debugmask  [Op_RegN]->Remove(i);
   1.600 +      idealreg2debugmask  [Op_RegI]->Remove(i); // Exclude save-on-call
   1.601 +      idealreg2debugmask  [Op_RegL]->Remove(i); // registers from debug
   1.602 +      idealreg2debugmask  [Op_RegF]->Remove(i); // masks
   1.603 +      idealreg2debugmask  [Op_RegD]->Remove(i);
   1.604 +      idealreg2debugmask  [Op_RegP]->Remove(i);
   1.605 +
   1.606 +      idealreg2mhdebugmask[Op_RegN]->Remove(i);
   1.607 +      idealreg2mhdebugmask[Op_RegI]->Remove(i);
   1.608 +      idealreg2mhdebugmask[Op_RegL]->Remove(i);
   1.609 +      idealreg2mhdebugmask[Op_RegF]->Remove(i);
   1.610 +      idealreg2mhdebugmask[Op_RegD]->Remove(i);
   1.611 +      idealreg2mhdebugmask[Op_RegP]->Remove(i);
   1.612 +    }
   1.613 +  }
   1.614 +
   1.615 +  // Subtract the register we use to save the SP for MethodHandle
   1.616 +  // invokes to from the debug mask.
   1.617 +  const RegMask save_mask = method_handle_invoke_SP_save_mask();
   1.618 +  idealreg2mhdebugmask[Op_RegN]->SUBTRACT(save_mask);
   1.619 +  idealreg2mhdebugmask[Op_RegI]->SUBTRACT(save_mask);
   1.620 +  idealreg2mhdebugmask[Op_RegL]->SUBTRACT(save_mask);
   1.621 +  idealreg2mhdebugmask[Op_RegF]->SUBTRACT(save_mask);
   1.622 +  idealreg2mhdebugmask[Op_RegD]->SUBTRACT(save_mask);
   1.623 +  idealreg2mhdebugmask[Op_RegP]->SUBTRACT(save_mask);
   1.624 +}
   1.625 +
   1.626 +//---------------------------is_save_on_entry----------------------------------
   1.627 +bool Matcher::is_save_on_entry( int reg ) {
   1.628 +  return
   1.629 +    _register_save_policy[reg] == 'E' ||
   1.630 +    _register_save_policy[reg] == 'A' || // Save-on-entry register?
   1.631 +    // Also save argument registers in the trampolining stubs
   1.632 +    (C->save_argument_registers() && is_spillable_arg(reg));
   1.633 +}
   1.634 +
   1.635 +//---------------------------Fixup_Save_On_Entry-------------------------------
   1.636 +void Matcher::Fixup_Save_On_Entry( ) {
   1.637 +  init_first_stack_mask();
   1.638 +
   1.639 +  Node *root = C->root();       // Short name for root
   1.640 +  // Count number of save-on-entry registers.
   1.641 +  uint soe_cnt = number_of_saved_registers();
   1.642 +  uint i;
   1.643 +
   1.644 +  // Find the procedure Start Node
   1.645 +  StartNode *start = C->start();
   1.646 +  assert( start, "Expect a start node" );
   1.647 +
   1.648 +  // Save argument registers in the trampolining stubs
   1.649 +  if( C->save_argument_registers() )
   1.650 +    for( i = 0; i < _last_Mach_Reg; i++ )
   1.651 +      if( is_spillable_arg(i) )
   1.652 +        soe_cnt++;
   1.653 +
   1.654 +  // Input RegMask array shared by all Returns.
   1.655 +  // The type for doubles and longs has a count of 2, but
   1.656 +  // there is only 1 returned value
   1.657 +  uint ret_edge_cnt = TypeFunc::Parms + ((C->tf()->range()->cnt() == TypeFunc::Parms) ? 0 : 1);
   1.658 +  RegMask *ret_rms  = init_input_masks( ret_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask );
   1.659 +  // Returns have 0 or 1 returned values depending on call signature.
   1.660 +  // Return register is specified by return_value in the AD file.
   1.661 +  if (ret_edge_cnt > TypeFunc::Parms)
   1.662 +    ret_rms[TypeFunc::Parms+0] = _return_value_mask;
   1.663 +
   1.664 +  // Input RegMask array shared by all Rethrows.
   1.665 +  uint reth_edge_cnt = TypeFunc::Parms+1;
   1.666 +  RegMask *reth_rms  = init_input_masks( reth_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask );
   1.667 +  // Rethrow takes exception oop only, but in the argument 0 slot.
   1.668 +  reth_rms[TypeFunc::Parms] = mreg2regmask[find_receiver(false)];
   1.669 +#ifdef _LP64
   1.670 +  // Need two slots for ptrs in 64-bit land
   1.671 +  reth_rms[TypeFunc::Parms].Insert(OptoReg::add(OptoReg::Name(find_receiver(false)),1));
   1.672 +#endif
   1.673 +
   1.674 +  // Input RegMask array shared by all TailCalls
   1.675 +  uint tail_call_edge_cnt = TypeFunc::Parms+2;
   1.676 +  RegMask *tail_call_rms = init_input_masks( tail_call_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask );
   1.677 +
   1.678 +  // Input RegMask array shared by all TailJumps
   1.679 +  uint tail_jump_edge_cnt = TypeFunc::Parms+2;
   1.680 +  RegMask *tail_jump_rms = init_input_masks( tail_jump_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask );
   1.681 +
   1.682 +  // TailCalls have 2 returned values (target & moop), whose masks come
   1.683 +  // from the usual MachNode/MachOper mechanism.  Find a sample
   1.684 +  // TailCall to extract these masks and put the correct masks into
   1.685 +  // the tail_call_rms array.
   1.686 +  for( i=1; i < root->req(); i++ ) {
   1.687 +    MachReturnNode *m = root->in(i)->as_MachReturn();
   1.688 +    if( m->ideal_Opcode() == Op_TailCall ) {
   1.689 +      tail_call_rms[TypeFunc::Parms+0] = m->MachNode::in_RegMask(TypeFunc::Parms+0);
   1.690 +      tail_call_rms[TypeFunc::Parms+1] = m->MachNode::in_RegMask(TypeFunc::Parms+1);
   1.691 +      break;
   1.692 +    }
   1.693 +  }
   1.694 +
   1.695 +  // TailJumps have 2 returned values (target & ex_oop), whose masks come
   1.696 +  // from the usual MachNode/MachOper mechanism.  Find a sample
   1.697 +  // TailJump to extract these masks and put the correct masks into
   1.698 +  // the tail_jump_rms array.
   1.699 +  for( i=1; i < root->req(); i++ ) {
   1.700 +    MachReturnNode *m = root->in(i)->as_MachReturn();
   1.701 +    if( m->ideal_Opcode() == Op_TailJump ) {
   1.702 +      tail_jump_rms[TypeFunc::Parms+0] = m->MachNode::in_RegMask(TypeFunc::Parms+0);
   1.703 +      tail_jump_rms[TypeFunc::Parms+1] = m->MachNode::in_RegMask(TypeFunc::Parms+1);
   1.704 +      break;
   1.705 +    }
   1.706 +  }
   1.707 +
   1.708 +  // Input RegMask array shared by all Halts
   1.709 +  uint halt_edge_cnt = TypeFunc::Parms;
   1.710 +  RegMask *halt_rms = init_input_masks( halt_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask );
   1.711 +
   1.712 +  // Capture the return input masks into each exit flavor
   1.713 +  for( i=1; i < root->req(); i++ ) {
   1.714 +    MachReturnNode *exit = root->in(i)->as_MachReturn();
   1.715 +    switch( exit->ideal_Opcode() ) {
   1.716 +      case Op_Return   : exit->_in_rms = ret_rms;  break;
   1.717 +      case Op_Rethrow  : exit->_in_rms = reth_rms; break;
   1.718 +      case Op_TailCall : exit->_in_rms = tail_call_rms; break;
   1.719 +      case Op_TailJump : exit->_in_rms = tail_jump_rms; break;
   1.720 +      case Op_Halt     : exit->_in_rms = halt_rms; break;
   1.721 +      default          : ShouldNotReachHere();
   1.722 +    }
   1.723 +  }
   1.724 +
   1.725 +  // Next unused projection number from Start.
   1.726 +  int proj_cnt = C->tf()->domain()->cnt();
   1.727 +
   1.728 +  // Do all the save-on-entry registers.  Make projections from Start for
   1.729 +  // them, and give them a use at the exit points.  To the allocator, they
   1.730 +  // look like incoming register arguments.
   1.731 +  for( i = 0; i < _last_Mach_Reg; i++ ) {
   1.732 +    if( is_save_on_entry(i) ) {
   1.733 +
   1.734 +      // Add the save-on-entry to the mask array
   1.735 +      ret_rms      [      ret_edge_cnt] = mreg2regmask[i];
   1.736 +      reth_rms     [     reth_edge_cnt] = mreg2regmask[i];
   1.737 +      tail_call_rms[tail_call_edge_cnt] = mreg2regmask[i];
   1.738 +      tail_jump_rms[tail_jump_edge_cnt] = mreg2regmask[i];
   1.739 +      // Halts need the SOE registers, but only in the stack as debug info.
   1.740 +      // A just-prior uncommon-trap or deoptimization will use the SOE regs.
   1.741 +      halt_rms     [     halt_edge_cnt] = *idealreg2spillmask[_register_save_type[i]];
   1.742 +
   1.743 +      Node *mproj;
   1.744 +
   1.745 +      // Is this a RegF low half of a RegD?  Double up 2 adjacent RegF's
   1.746 +      // into a single RegD.
   1.747 +      if( (i&1) == 0 &&
   1.748 +          _register_save_type[i  ] == Op_RegF &&
   1.749 +          _register_save_type[i+1] == Op_RegF &&
   1.750 +          is_save_on_entry(i+1) ) {
   1.751 +        // Add other bit for double
   1.752 +        ret_rms      [      ret_edge_cnt].Insert(OptoReg::Name(i+1));
   1.753 +        reth_rms     [     reth_edge_cnt].Insert(OptoReg::Name(i+1));
   1.754 +        tail_call_rms[tail_call_edge_cnt].Insert(OptoReg::Name(i+1));
   1.755 +        tail_jump_rms[tail_jump_edge_cnt].Insert(OptoReg::Name(i+1));
   1.756 +        halt_rms     [     halt_edge_cnt].Insert(OptoReg::Name(i+1));
   1.757 +        mproj = new (C) MachProjNode( start, proj_cnt, ret_rms[ret_edge_cnt], Op_RegD );
   1.758 +        proj_cnt += 2;          // Skip 2 for doubles
   1.759 +      }
   1.760 +      else if( (i&1) == 1 &&    // Else check for high half of double
   1.761 +               _register_save_type[i-1] == Op_RegF &&
   1.762 +               _register_save_type[i  ] == Op_RegF &&
   1.763 +               is_save_on_entry(i-1) ) {
   1.764 +        ret_rms      [      ret_edge_cnt] = RegMask::Empty;
   1.765 +        reth_rms     [     reth_edge_cnt] = RegMask::Empty;
   1.766 +        tail_call_rms[tail_call_edge_cnt] = RegMask::Empty;
   1.767 +        tail_jump_rms[tail_jump_edge_cnt] = RegMask::Empty;
   1.768 +        halt_rms     [     halt_edge_cnt] = RegMask::Empty;
   1.769 +        mproj = C->top();
   1.770 +      }
   1.771 +      // Is this a RegI low half of a RegL?  Double up 2 adjacent RegI's
   1.772 +      // into a single RegL.
   1.773 +      else if( (i&1) == 0 &&
   1.774 +          _register_save_type[i  ] == Op_RegI &&
   1.775 +          _register_save_type[i+1] == Op_RegI &&
   1.776 +        is_save_on_entry(i+1) ) {
   1.777 +        // Add other bit for long
   1.778 +        ret_rms      [      ret_edge_cnt].Insert(OptoReg::Name(i+1));
   1.779 +        reth_rms     [     reth_edge_cnt].Insert(OptoReg::Name(i+1));
   1.780 +        tail_call_rms[tail_call_edge_cnt].Insert(OptoReg::Name(i+1));
   1.781 +        tail_jump_rms[tail_jump_edge_cnt].Insert(OptoReg::Name(i+1));
   1.782 +        halt_rms     [     halt_edge_cnt].Insert(OptoReg::Name(i+1));
   1.783 +        mproj = new (C) MachProjNode( start, proj_cnt, ret_rms[ret_edge_cnt], Op_RegL );
   1.784 +        proj_cnt += 2;          // Skip 2 for longs
   1.785 +      }
   1.786 +      else if( (i&1) == 1 &&    // Else check for high half of long
   1.787 +               _register_save_type[i-1] == Op_RegI &&
   1.788 +               _register_save_type[i  ] == Op_RegI &&
   1.789 +               is_save_on_entry(i-1) ) {
   1.790 +        ret_rms      [      ret_edge_cnt] = RegMask::Empty;
   1.791 +        reth_rms     [     reth_edge_cnt] = RegMask::Empty;
   1.792 +        tail_call_rms[tail_call_edge_cnt] = RegMask::Empty;
   1.793 +        tail_jump_rms[tail_jump_edge_cnt] = RegMask::Empty;
   1.794 +        halt_rms     [     halt_edge_cnt] = RegMask::Empty;
   1.795 +        mproj = C->top();
   1.796 +      } else {
   1.797 +        // Make a projection for it off the Start
   1.798 +        mproj = new (C) MachProjNode( start, proj_cnt++, ret_rms[ret_edge_cnt], _register_save_type[i] );
   1.799 +      }
   1.800 +
   1.801 +      ret_edge_cnt ++;
   1.802 +      reth_edge_cnt ++;
   1.803 +      tail_call_edge_cnt ++;
   1.804 +      tail_jump_edge_cnt ++;
   1.805 +      halt_edge_cnt ++;
   1.806 +
   1.807 +      // Add a use of the SOE register to all exit paths
   1.808 +      for( uint j=1; j < root->req(); j++ )
   1.809 +        root->in(j)->add_req(mproj);
   1.810 +    } // End of if a save-on-entry register
   1.811 +  } // End of for all machine registers
   1.812 +}
   1.813 +
   1.814 +//------------------------------init_spill_mask--------------------------------
   1.815 +void Matcher::init_spill_mask( Node *ret ) {
   1.816 +  if( idealreg2regmask[Op_RegI] ) return; // One time only init
   1.817 +
   1.818 +  OptoReg::c_frame_pointer = c_frame_pointer();
   1.819 +  c_frame_ptr_mask = c_frame_pointer();
   1.820 +#ifdef _LP64
   1.821 +  // pointers are twice as big
   1.822 +  c_frame_ptr_mask.Insert(OptoReg::add(c_frame_pointer(),1));
   1.823 +#endif
   1.824 +
   1.825 +  // Start at OptoReg::stack0()
   1.826 +  STACK_ONLY_mask.Clear();
   1.827 +  OptoReg::Name init = OptoReg::stack2reg(0);
   1.828 +  // STACK_ONLY_mask is all stack bits
   1.829 +  OptoReg::Name i;
   1.830 +  for (i = init; RegMask::can_represent(i); i = OptoReg::add(i,1))
   1.831 +    STACK_ONLY_mask.Insert(i);
   1.832 +  // Also set the "infinite stack" bit.
   1.833 +  STACK_ONLY_mask.set_AllStack();
   1.834 +
   1.835 +  // Copy the register names over into the shared world
   1.836 +  for( i=OptoReg::Name(0); i<OptoReg::Name(_last_Mach_Reg); i = OptoReg::add(i,1) ) {
   1.837 +    // SharedInfo::regName[i] = regName[i];
   1.838 +    // Handy RegMasks per machine register
   1.839 +    mreg2regmask[i].Insert(i);
   1.840 +  }
   1.841 +
   1.842 +  // Grab the Frame Pointer
   1.843 +  Node *fp  = ret->in(TypeFunc::FramePtr);
   1.844 +  Node *mem = ret->in(TypeFunc::Memory);
   1.845 +  const TypePtr* atp = TypePtr::BOTTOM;
   1.846 +  // Share frame pointer while making spill ops
   1.847 +  set_shared(fp);
   1.848 +
   1.849 +  // Compute generic short-offset Loads
   1.850 +#ifdef _LP64
   1.851 +  MachNode *spillCP = match_tree(new (C) LoadNNode(NULL,mem,fp,atp,TypeInstPtr::BOTTOM,MemNode::unordered));
   1.852 +#endif
   1.853 +  MachNode *spillI  = match_tree(new (C) LoadINode(NULL,mem,fp,atp,TypeInt::INT,MemNode::unordered));
   1.854 +  MachNode *spillL  = match_tree(new (C) LoadLNode(NULL,mem,fp,atp,TypeLong::LONG,MemNode::unordered,false));
   1.855 +  MachNode *spillF  = match_tree(new (C) LoadFNode(NULL,mem,fp,atp,Type::FLOAT,MemNode::unordered));
   1.856 +  MachNode *spillD  = match_tree(new (C) LoadDNode(NULL,mem,fp,atp,Type::DOUBLE,MemNode::unordered));
   1.857 +  MachNode *spillP  = match_tree(new (C) LoadPNode(NULL,mem,fp,atp,TypeInstPtr::BOTTOM,MemNode::unordered));
   1.858 +  assert(spillI != NULL && spillL != NULL && spillF != NULL &&
   1.859 +         spillD != NULL && spillP != NULL, "");
   1.860 +  // Get the ADLC notion of the right regmask, for each basic type.
   1.861 +#ifdef _LP64
   1.862 +  idealreg2regmask[Op_RegN] = &spillCP->out_RegMask();
   1.863 +#endif
   1.864 +  idealreg2regmask[Op_RegI] = &spillI->out_RegMask();
   1.865 +  idealreg2regmask[Op_RegL] = &spillL->out_RegMask();
   1.866 +  idealreg2regmask[Op_RegF] = &spillF->out_RegMask();
   1.867 +  idealreg2regmask[Op_RegD] = &spillD->out_RegMask();
   1.868 +  idealreg2regmask[Op_RegP] = &spillP->out_RegMask();
   1.869 +
   1.870 +  // Vector regmasks.
   1.871 +  if (Matcher::vector_size_supported(T_BYTE,4)) {
   1.872 +    TypeVect::VECTS = TypeVect::make(T_BYTE, 4);
   1.873 +    MachNode *spillVectS = match_tree(new (C) LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTS));
   1.874 +    idealreg2regmask[Op_VecS] = &spillVectS->out_RegMask();
   1.875 +  }
   1.876 +  if (Matcher::vector_size_supported(T_FLOAT,2)) {
   1.877 +    MachNode *spillVectD = match_tree(new (C) LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTD));
   1.878 +    idealreg2regmask[Op_VecD] = &spillVectD->out_RegMask();
   1.879 +  }
   1.880 +  if (Matcher::vector_size_supported(T_FLOAT,4)) {
   1.881 +    MachNode *spillVectX = match_tree(new (C) LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTX));
   1.882 +    idealreg2regmask[Op_VecX] = &spillVectX->out_RegMask();
   1.883 +  }
   1.884 +  if (Matcher::vector_size_supported(T_FLOAT,8)) {
   1.885 +    MachNode *spillVectY = match_tree(new (C) LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTY));
   1.886 +    idealreg2regmask[Op_VecY] = &spillVectY->out_RegMask();
   1.887 +  }
   1.888 +}
   1.889 +
   1.890 +#ifdef ASSERT
   1.891 +static void match_alias_type(Compile* C, Node* n, Node* m) {
   1.892 +  if (!VerifyAliases)  return;  // do not go looking for trouble by default
   1.893 +  const TypePtr* nat = n->adr_type();
   1.894 +  const TypePtr* mat = m->adr_type();
   1.895 +  int nidx = C->get_alias_index(nat);
   1.896 +  int midx = C->get_alias_index(mat);
   1.897 +  // Detune the assert for cases like (AndI 0xFF (LoadB p)).
   1.898 +  if (nidx == Compile::AliasIdxTop && midx >= Compile::AliasIdxRaw) {
   1.899 +    for (uint i = 1; i < n->req(); i++) {
   1.900 +      Node* n1 = n->in(i);
   1.901 +      const TypePtr* n1at = n1->adr_type();
   1.902 +      if (n1at != NULL) {
   1.903 +        nat = n1at;
   1.904 +        nidx = C->get_alias_index(n1at);
   1.905 +      }
   1.906 +    }
   1.907 +  }
   1.908 +  // %%% Kludgery.  Instead, fix ideal adr_type methods for all these cases:
   1.909 +  if (nidx == Compile::AliasIdxTop && midx == Compile::AliasIdxRaw) {
   1.910 +    switch (n->Opcode()) {
   1.911 +    case Op_PrefetchRead:
   1.912 +    case Op_PrefetchWrite:
   1.913 +    case Op_PrefetchAllocation:
   1.914 +      nidx = Compile::AliasIdxRaw;
   1.915 +      nat = TypeRawPtr::BOTTOM;
   1.916 +      break;
   1.917 +    }
   1.918 +  }
   1.919 +  if (nidx == Compile::AliasIdxRaw && midx == Compile::AliasIdxTop) {
   1.920 +    switch (n->Opcode()) {
   1.921 +    case Op_ClearArray:
   1.922 +      midx = Compile::AliasIdxRaw;
   1.923 +      mat = TypeRawPtr::BOTTOM;
   1.924 +      break;
   1.925 +    }
   1.926 +  }
   1.927 +  if (nidx == Compile::AliasIdxTop && midx == Compile::AliasIdxBot) {
   1.928 +    switch (n->Opcode()) {
   1.929 +    case Op_Return:
   1.930 +    case Op_Rethrow:
   1.931 +    case Op_Halt:
   1.932 +    case Op_TailCall:
   1.933 +    case Op_TailJump:
   1.934 +      nidx = Compile::AliasIdxBot;
   1.935 +      nat = TypePtr::BOTTOM;
   1.936 +      break;
   1.937 +    }
   1.938 +  }
   1.939 +  if (nidx == Compile::AliasIdxBot && midx == Compile::AliasIdxTop) {
   1.940 +    switch (n->Opcode()) {
   1.941 +    case Op_StrComp:
   1.942 +    case Op_StrEquals:
   1.943 +    case Op_StrIndexOf:
   1.944 +    case Op_AryEq:
   1.945 +    case Op_MemBarVolatile:
   1.946 +    case Op_MemBarCPUOrder: // %%% these ideals should have narrower adr_type?
   1.947 +    case Op_EncodeISOArray:
   1.948 +      nidx = Compile::AliasIdxTop;
   1.949 +      nat = NULL;
   1.950 +      break;
   1.951 +    }
   1.952 +  }
   1.953 +  if (nidx != midx) {
   1.954 +    if (PrintOpto || (PrintMiscellaneous && (WizardMode || Verbose))) {
   1.955 +      tty->print_cr("==== Matcher alias shift %d => %d", nidx, midx);
   1.956 +      n->dump();
   1.957 +      m->dump();
   1.958 +    }
   1.959 +    assert(C->subsume_loads() && C->must_alias(nat, midx),
   1.960 +           "must not lose alias info when matching");
   1.961 +  }
   1.962 +}
   1.963 +#endif
   1.964 +
   1.965 +
   1.966 +//------------------------------MStack-----------------------------------------
   1.967 +// State and MStack class used in xform() and find_shared() iterative methods.
   1.968 +enum Node_State { Pre_Visit,  // node has to be pre-visited
   1.969 +                      Visit,  // visit node
   1.970 +                 Post_Visit,  // post-visit node
   1.971 +             Alt_Post_Visit   // alternative post-visit path
   1.972 +                };
   1.973 +
   1.974 +class MStack: public Node_Stack {
   1.975 +  public:
   1.976 +    MStack(int size) : Node_Stack(size) { }
   1.977 +
   1.978 +    void push(Node *n, Node_State ns) {
   1.979 +      Node_Stack::push(n, (uint)ns);
   1.980 +    }
   1.981 +    void push(Node *n, Node_State ns, Node *parent, int indx) {
   1.982 +      ++_inode_top;
   1.983 +      if ((_inode_top + 1) >= _inode_max) grow();
   1.984 +      _inode_top->node = parent;
   1.985 +      _inode_top->indx = (uint)indx;
   1.986 +      ++_inode_top;
   1.987 +      _inode_top->node = n;
   1.988 +      _inode_top->indx = (uint)ns;
   1.989 +    }
   1.990 +    Node *parent() {
   1.991 +      pop();
   1.992 +      return node();
   1.993 +    }
   1.994 +    Node_State state() const {
   1.995 +      return (Node_State)index();
   1.996 +    }
   1.997 +    void set_state(Node_State ns) {
   1.998 +      set_index((uint)ns);
   1.999 +    }
  1.1000 +};
  1.1001 +
  1.1002 +
  1.1003 +//------------------------------xform------------------------------------------
  1.1004 +// Given a Node in old-space, Match him (Label/Reduce) to produce a machine
  1.1005 +// Node in new-space.  Given a new-space Node, recursively walk his children.
  1.1006 +Node *Matcher::transform( Node *n ) { ShouldNotCallThis(); return n; }
  1.1007 +Node *Matcher::xform( Node *n, int max_stack ) {
  1.1008 +  // Use one stack to keep both: child's node/state and parent's node/index
  1.1009 +  MStack mstack(max_stack * 2 * 2); // C->unique() * 2 * 2
  1.1010 +  mstack.push(n, Visit, NULL, -1);  // set NULL as parent to indicate root
  1.1011 +
  1.1012 +  while (mstack.is_nonempty()) {
  1.1013 +    C->check_node_count(NodeLimitFudgeFactor, "too many nodes matching instructions");
  1.1014 +    if (C->failing()) return NULL;
  1.1015 +    n = mstack.node();          // Leave node on stack
  1.1016 +    Node_State nstate = mstack.state();
  1.1017 +    if (nstate == Visit) {
  1.1018 +      mstack.set_state(Post_Visit);
  1.1019 +      Node *oldn = n;
  1.1020 +      // Old-space or new-space check
  1.1021 +      if (!C->node_arena()->contains(n)) {
  1.1022 +        // Old space!
  1.1023 +        Node* m;
  1.1024 +        if (has_new_node(n)) {  // Not yet Label/Reduced
  1.1025 +          m = new_node(n);
  1.1026 +        } else {
  1.1027 +          if (!is_dontcare(n)) { // Matcher can match this guy
  1.1028 +            // Calls match special.  They match alone with no children.
  1.1029 +            // Their children, the incoming arguments, match normally.
  1.1030 +            m = n->is_SafePoint() ? match_sfpt(n->as_SafePoint()):match_tree(n);
  1.1031 +            if (C->failing())  return NULL;
  1.1032 +            if (m == NULL) { Matcher::soft_match_failure(); return NULL; }
  1.1033 +          } else {                  // Nothing the matcher cares about
  1.1034 +            if( n->is_Proj() && n->in(0)->is_Multi()) {       // Projections?
  1.1035 +              // Convert to machine-dependent projection
  1.1036 +              m = n->in(0)->as_Multi()->match( n->as_Proj(), this );
  1.1037 +#ifdef ASSERT
  1.1038 +              _new2old_map.map(m->_idx, n);
  1.1039 +#endif
  1.1040 +              if (m->in(0) != NULL) // m might be top
  1.1041 +                collect_null_checks(m, n);
  1.1042 +            } else {                // Else just a regular 'ol guy
  1.1043 +              m = n->clone();       // So just clone into new-space
  1.1044 +#ifdef ASSERT
  1.1045 +              _new2old_map.map(m->_idx, n);
  1.1046 +#endif
  1.1047 +              // Def-Use edges will be added incrementally as Uses
  1.1048 +              // of this node are matched.
  1.1049 +              assert(m->outcnt() == 0, "no Uses of this clone yet");
  1.1050 +            }
  1.1051 +          }
  1.1052 +
  1.1053 +          set_new_node(n, m);       // Map old to new
  1.1054 +          if (_old_node_note_array != NULL) {
  1.1055 +            Node_Notes* nn = C->locate_node_notes(_old_node_note_array,
  1.1056 +                                                  n->_idx);
  1.1057 +            C->set_node_notes_at(m->_idx, nn);
  1.1058 +          }
  1.1059 +          debug_only(match_alias_type(C, n, m));
  1.1060 +        }
  1.1061 +        n = m;    // n is now a new-space node
  1.1062 +        mstack.set_node(n);
  1.1063 +      }
  1.1064 +
  1.1065 +      // New space!
  1.1066 +      if (_visited.test_set(n->_idx)) continue; // while(mstack.is_nonempty())
  1.1067 +
  1.1068 +      int i;
  1.1069 +      // Put precedence edges on stack first (match them last).
  1.1070 +      for (i = oldn->req(); (uint)i < oldn->len(); i++) {
  1.1071 +        Node *m = oldn->in(i);
  1.1072 +        if (m == NULL) break;
  1.1073 +        // set -1 to call add_prec() instead of set_req() during Step1
  1.1074 +        mstack.push(m, Visit, n, -1);
  1.1075 +      }
  1.1076 +
  1.1077 +      // For constant debug info, I'd rather have unmatched constants.
  1.1078 +      int cnt = n->req();
  1.1079 +      JVMState* jvms = n->jvms();
  1.1080 +      int debug_cnt = jvms ? jvms->debug_start() : cnt;
  1.1081 +
  1.1082 +      // Now do only debug info.  Clone constants rather than matching.
  1.1083 +      // Constants are represented directly in the debug info without
  1.1084 +      // the need for executable machine instructions.
  1.1085 +      // Monitor boxes are also represented directly.
  1.1086 +      for (i = cnt - 1; i >= debug_cnt; --i) { // For all debug inputs do
  1.1087 +        Node *m = n->in(i);          // Get input
  1.1088 +        int op = m->Opcode();
  1.1089 +        assert((op == Op_BoxLock) == jvms->is_monitor_use(i), "boxes only at monitor sites");
  1.1090 +        if( op == Op_ConI || op == Op_ConP || op == Op_ConN || op == Op_ConNKlass ||
  1.1091 +            op == Op_ConF || op == Op_ConD || op == Op_ConL
  1.1092 +            // || op == Op_BoxLock  // %%%% enable this and remove (+++) in chaitin.cpp
  1.1093 +            ) {
  1.1094 +          m = m->clone();
  1.1095 +#ifdef ASSERT
  1.1096 +          _new2old_map.map(m->_idx, n);
  1.1097 +#endif
  1.1098 +          mstack.push(m, Post_Visit, n, i); // Don't need to visit
  1.1099 +          mstack.push(m->in(0), Visit, m, 0);
  1.1100 +        } else {
  1.1101 +          mstack.push(m, Visit, n, i);
  1.1102 +        }
  1.1103 +      }
  1.1104 +
  1.1105 +      // And now walk his children, and convert his inputs to new-space.
  1.1106 +      for( ; i >= 0; --i ) { // For all normal inputs do
  1.1107 +        Node *m = n->in(i);  // Get input
  1.1108 +        if(m != NULL)
  1.1109 +          mstack.push(m, Visit, n, i);
  1.1110 +      }
  1.1111 +
  1.1112 +    }
  1.1113 +    else if (nstate == Post_Visit) {
  1.1114 +      // Set xformed input
  1.1115 +      Node *p = mstack.parent();
  1.1116 +      if (p != NULL) { // root doesn't have parent
  1.1117 +        int i = (int)mstack.index();
  1.1118 +        if (i >= 0)
  1.1119 +          p->set_req(i, n); // required input
  1.1120 +        else if (i == -1)
  1.1121 +          p->add_prec(n);   // precedence input
  1.1122 +        else
  1.1123 +          ShouldNotReachHere();
  1.1124 +      }
  1.1125 +      mstack.pop(); // remove processed node from stack
  1.1126 +    }
  1.1127 +    else {
  1.1128 +      ShouldNotReachHere();
  1.1129 +    }
  1.1130 +  } // while (mstack.is_nonempty())
  1.1131 +  return n; // Return new-space Node
  1.1132 +}
  1.1133 +
  1.1134 +//------------------------------warp_outgoing_stk_arg------------------------
  1.1135 +OptoReg::Name Matcher::warp_outgoing_stk_arg( VMReg reg, OptoReg::Name begin_out_arg_area, OptoReg::Name &out_arg_limit_per_call ) {
  1.1136 +  // Convert outgoing argument location to a pre-biased stack offset
  1.1137 +  if (reg->is_stack()) {
  1.1138 +    OptoReg::Name warped = reg->reg2stack();
  1.1139 +    // Adjust the stack slot offset to be the register number used
  1.1140 +    // by the allocator.
  1.1141 +    warped = OptoReg::add(begin_out_arg_area, warped);
  1.1142 +    // Keep track of the largest numbered stack slot used for an arg.
  1.1143 +    // Largest used slot per call-site indicates the amount of stack
  1.1144 +    // that is killed by the call.
  1.1145 +    if( warped >= out_arg_limit_per_call )
  1.1146 +      out_arg_limit_per_call = OptoReg::add(warped,1);
  1.1147 +    if (!RegMask::can_represent_arg(warped)) {
  1.1148 +      C->record_method_not_compilable_all_tiers("unsupported calling sequence");
  1.1149 +      return OptoReg::Bad;
  1.1150 +    }
  1.1151 +    return warped;
  1.1152 +  }
  1.1153 +  return OptoReg::as_OptoReg(reg);
  1.1154 +}
  1.1155 +
  1.1156 +
  1.1157 +//------------------------------match_sfpt-------------------------------------
  1.1158 +// Helper function to match call instructions.  Calls match special.
  1.1159 +// They match alone with no children.  Their children, the incoming
  1.1160 +// arguments, match normally.
  1.1161 +MachNode *Matcher::match_sfpt( SafePointNode *sfpt ) {
  1.1162 +  MachSafePointNode *msfpt = NULL;
  1.1163 +  MachCallNode      *mcall = NULL;
  1.1164 +  uint               cnt;
  1.1165 +  // Split out case for SafePoint vs Call
  1.1166 +  CallNode *call;
  1.1167 +  const TypeTuple *domain;
  1.1168 +  ciMethod*        method = NULL;
  1.1169 +  bool             is_method_handle_invoke = false;  // for special kill effects
  1.1170 +  if( sfpt->is_Call() ) {
  1.1171 +    call = sfpt->as_Call();
  1.1172 +    domain = call->tf()->domain();
  1.1173 +    cnt = domain->cnt();
  1.1174 +
  1.1175 +    // Match just the call, nothing else
  1.1176 +    MachNode *m = match_tree(call);
  1.1177 +    if (C->failing())  return NULL;
  1.1178 +    if( m == NULL ) { Matcher::soft_match_failure(); return NULL; }
  1.1179 +
  1.1180 +    // Copy data from the Ideal SafePoint to the machine version
  1.1181 +    mcall = m->as_MachCall();
  1.1182 +
  1.1183 +    mcall->set_tf(         call->tf());
  1.1184 +    mcall->set_entry_point(call->entry_point());
  1.1185 +    mcall->set_cnt(        call->cnt());
  1.1186 +
  1.1187 +    if( mcall->is_MachCallJava() ) {
  1.1188 +      MachCallJavaNode *mcall_java  = mcall->as_MachCallJava();
  1.1189 +      const CallJavaNode *call_java =  call->as_CallJava();
  1.1190 +      method = call_java->method();
  1.1191 +      mcall_java->_method = method;
  1.1192 +      mcall_java->_bci = call_java->_bci;
  1.1193 +      mcall_java->_optimized_virtual = call_java->is_optimized_virtual();
  1.1194 +      is_method_handle_invoke = call_java->is_method_handle_invoke();
  1.1195 +      mcall_java->_method_handle_invoke = is_method_handle_invoke;
  1.1196 +      if (is_method_handle_invoke) {
  1.1197 +        C->set_has_method_handle_invokes(true);
  1.1198 +      }
  1.1199 +      if( mcall_java->is_MachCallStaticJava() )
  1.1200 +        mcall_java->as_MachCallStaticJava()->_name =
  1.1201 +         call_java->as_CallStaticJava()->_name;
  1.1202 +      if( mcall_java->is_MachCallDynamicJava() )
  1.1203 +        mcall_java->as_MachCallDynamicJava()->_vtable_index =
  1.1204 +         call_java->as_CallDynamicJava()->_vtable_index;
  1.1205 +    }
  1.1206 +    else if( mcall->is_MachCallRuntime() ) {
  1.1207 +      mcall->as_MachCallRuntime()->_name = call->as_CallRuntime()->_name;
  1.1208 +    }
  1.1209 +    msfpt = mcall;
  1.1210 +  }
  1.1211 +  // This is a non-call safepoint
  1.1212 +  else {
  1.1213 +    call = NULL;
  1.1214 +    domain = NULL;
  1.1215 +    MachNode *mn = match_tree(sfpt);
  1.1216 +    if (C->failing())  return NULL;
  1.1217 +    msfpt = mn->as_MachSafePoint();
  1.1218 +    cnt = TypeFunc::Parms;
  1.1219 +  }
  1.1220 +
  1.1221 +  // Advertise the correct memory effects (for anti-dependence computation).
  1.1222 +  msfpt->set_adr_type(sfpt->adr_type());
  1.1223 +
  1.1224 +  // Allocate a private array of RegMasks.  These RegMasks are not shared.
  1.1225 +  msfpt->_in_rms = NEW_RESOURCE_ARRAY( RegMask, cnt );
  1.1226 +  // Empty them all.
  1.1227 +  memset( msfpt->_in_rms, 0, sizeof(RegMask)*cnt );
  1.1228 +
  1.1229 +  // Do all the pre-defined non-Empty register masks
  1.1230 +  msfpt->_in_rms[TypeFunc::ReturnAdr] = _return_addr_mask;
  1.1231 +  msfpt->_in_rms[TypeFunc::FramePtr ] = c_frame_ptr_mask;
  1.1232 +
  1.1233 +  // Place first outgoing argument can possibly be put.
  1.1234 +  OptoReg::Name begin_out_arg_area = OptoReg::add(_new_SP, C->out_preserve_stack_slots());
  1.1235 +  assert( is_even(begin_out_arg_area), "" );
  1.1236 +  // Compute max outgoing register number per call site.
  1.1237 +  OptoReg::Name out_arg_limit_per_call = begin_out_arg_area;
  1.1238 +  // Calls to C may hammer extra stack slots above and beyond any arguments.
  1.1239 +  // These are usually backing store for register arguments for varargs.
  1.1240 +  if( call != NULL && call->is_CallRuntime() )
  1.1241 +    out_arg_limit_per_call = OptoReg::add(out_arg_limit_per_call,C->varargs_C_out_slots_killed());
  1.1242 +
  1.1243 +
  1.1244 +  // Do the normal argument list (parameters) register masks
  1.1245 +  int argcnt = cnt - TypeFunc::Parms;
  1.1246 +  if( argcnt > 0 ) {          // Skip it all if we have no args
  1.1247 +    BasicType *sig_bt  = NEW_RESOURCE_ARRAY( BasicType, argcnt );
  1.1248 +    VMRegPair *parm_regs = NEW_RESOURCE_ARRAY( VMRegPair, argcnt );
  1.1249 +    int i;
  1.1250 +    for( i = 0; i < argcnt; i++ ) {
  1.1251 +      sig_bt[i] = domain->field_at(i+TypeFunc::Parms)->basic_type();
  1.1252 +    }
  1.1253 +    // V-call to pick proper calling convention
  1.1254 +    call->calling_convention( sig_bt, parm_regs, argcnt );
  1.1255 +
  1.1256 +#ifdef ASSERT
  1.1257 +    // Sanity check users' calling convention.  Really handy during
  1.1258 +    // the initial porting effort.  Fairly expensive otherwise.
  1.1259 +    { for (int i = 0; i<argcnt; i++) {
  1.1260 +      if( !parm_regs[i].first()->is_valid() &&
  1.1261 +          !parm_regs[i].second()->is_valid() ) continue;
  1.1262 +      VMReg reg1 = parm_regs[i].first();
  1.1263 +      VMReg reg2 = parm_regs[i].second();
  1.1264 +      for (int j = 0; j < i; j++) {
  1.1265 +        if( !parm_regs[j].first()->is_valid() &&
  1.1266 +            !parm_regs[j].second()->is_valid() ) continue;
  1.1267 +        VMReg reg3 = parm_regs[j].first();
  1.1268 +        VMReg reg4 = parm_regs[j].second();
  1.1269 +        if( !reg1->is_valid() ) {
  1.1270 +          assert( !reg2->is_valid(), "valid halvsies" );
  1.1271 +        } else if( !reg3->is_valid() ) {
  1.1272 +          assert( !reg4->is_valid(), "valid halvsies" );
  1.1273 +        } else {
  1.1274 +          assert( reg1 != reg2, "calling conv. must produce distinct regs");
  1.1275 +          assert( reg1 != reg3, "calling conv. must produce distinct regs");
  1.1276 +          assert( reg1 != reg4, "calling conv. must produce distinct regs");
  1.1277 +          assert( reg2 != reg3, "calling conv. must produce distinct regs");
  1.1278 +          assert( reg2 != reg4 || !reg2->is_valid(), "calling conv. must produce distinct regs");
  1.1279 +          assert( reg3 != reg4, "calling conv. must produce distinct regs");
  1.1280 +        }
  1.1281 +      }
  1.1282 +    }
  1.1283 +    }
  1.1284 +#endif
  1.1285 +
  1.1286 +    // Visit each argument.  Compute its outgoing register mask.
  1.1287 +    // Return results now can have 2 bits returned.
  1.1288 +    // Compute max over all outgoing arguments both per call-site
  1.1289 +    // and over the entire method.
  1.1290 +    for( i = 0; i < argcnt; i++ ) {
  1.1291 +      // Address of incoming argument mask to fill in
  1.1292 +      RegMask *rm = &mcall->_in_rms[i+TypeFunc::Parms];
  1.1293 +      if( !parm_regs[i].first()->is_valid() &&
  1.1294 +          !parm_regs[i].second()->is_valid() ) {
  1.1295 +        continue;               // Avoid Halves
  1.1296 +      }
  1.1297 +      // Grab first register, adjust stack slots and insert in mask.
  1.1298 +      OptoReg::Name reg1 = warp_outgoing_stk_arg(parm_regs[i].first(), begin_out_arg_area, out_arg_limit_per_call );
  1.1299 +      if (OptoReg::is_valid(reg1))
  1.1300 +        rm->Insert( reg1 );
  1.1301 +      // Grab second register (if any), adjust stack slots and insert in mask.
  1.1302 +      OptoReg::Name reg2 = warp_outgoing_stk_arg(parm_regs[i].second(), begin_out_arg_area, out_arg_limit_per_call );
  1.1303 +      if (OptoReg::is_valid(reg2))
  1.1304 +        rm->Insert( reg2 );
  1.1305 +    } // End of for all arguments
  1.1306 +
  1.1307 +    // Compute number of stack slots needed to restore stack in case of
  1.1308 +    // Pascal-style argument popping.
  1.1309 +    mcall->_argsize = out_arg_limit_per_call - begin_out_arg_area;
  1.1310 +  }
  1.1311 +
  1.1312 +  // Compute the max stack slot killed by any call.  These will not be
  1.1313 +  // available for debug info, and will be used to adjust FIRST_STACK_mask
  1.1314 +  // after all call sites have been visited.
  1.1315 +  if( _out_arg_limit < out_arg_limit_per_call)
  1.1316 +    _out_arg_limit = out_arg_limit_per_call;
  1.1317 +
  1.1318 +  if (mcall) {
  1.1319 +    // Kill the outgoing argument area, including any non-argument holes and
  1.1320 +    // any legacy C-killed slots.  Use Fat-Projections to do the killing.
  1.1321 +    // Since the max-per-method covers the max-per-call-site and debug info
  1.1322 +    // is excluded on the max-per-method basis, debug info cannot land in
  1.1323 +    // this killed area.
  1.1324 +    uint r_cnt = mcall->tf()->range()->cnt();
  1.1325 +    MachProjNode *proj = new (C) MachProjNode( mcall, r_cnt+10000, RegMask::Empty, MachProjNode::fat_proj );
  1.1326 +    if (!RegMask::can_represent_arg(OptoReg::Name(out_arg_limit_per_call-1))) {
  1.1327 +      C->record_method_not_compilable_all_tiers("unsupported outgoing calling sequence");
  1.1328 +    } else {
  1.1329 +      for (int i = begin_out_arg_area; i < out_arg_limit_per_call; i++)
  1.1330 +        proj->_rout.Insert(OptoReg::Name(i));
  1.1331 +    }
  1.1332 +    if (proj->_rout.is_NotEmpty()) {
  1.1333 +      push_projection(proj);
  1.1334 +    }
  1.1335 +  }
  1.1336 +  // Transfer the safepoint information from the call to the mcall
  1.1337 +  // Move the JVMState list
  1.1338 +  msfpt->set_jvms(sfpt->jvms());
  1.1339 +  for (JVMState* jvms = msfpt->jvms(); jvms; jvms = jvms->caller()) {
  1.1340 +    jvms->set_map(sfpt);
  1.1341 +  }
  1.1342 +
  1.1343 +  // Debug inputs begin just after the last incoming parameter
  1.1344 +  assert((mcall == NULL) || (mcall->jvms() == NULL) ||
  1.1345 +         (mcall->jvms()->debug_start() + mcall->_jvmadj == mcall->tf()->domain()->cnt()), "");
  1.1346 +
  1.1347 +  // Move the OopMap
  1.1348 +  msfpt->_oop_map = sfpt->_oop_map;
  1.1349 +
  1.1350 +  // Add additional edges.
  1.1351 +  if (msfpt->mach_constant_base_node_input() != (uint)-1 && !msfpt->is_MachCallLeaf()) {
  1.1352 +    // For these calls we can not add MachConstantBase in expand(), as the
  1.1353 +    // ins are not complete then.
  1.1354 +    msfpt->ins_req(msfpt->mach_constant_base_node_input(), C->mach_constant_base_node());
  1.1355 +    if (msfpt->jvms() &&
  1.1356 +        msfpt->mach_constant_base_node_input() <= msfpt->jvms()->debug_start() + msfpt->_jvmadj) {
  1.1357 +      // We added an edge before jvms, so we must adapt the position of the ins.
  1.1358 +      msfpt->jvms()->adapt_position(+1);
  1.1359 +    }
  1.1360 +  }
  1.1361 +
  1.1362 +  // Registers killed by the call are set in the local scheduling pass
  1.1363 +  // of Global Code Motion.
  1.1364 +  return msfpt;
  1.1365 +}
  1.1366 +
  1.1367 +//---------------------------match_tree----------------------------------------
  1.1368 +// Match a Ideal Node DAG - turn it into a tree; Label & Reduce.  Used as part
  1.1369 +// of the whole-sale conversion from Ideal to Mach Nodes.  Also used for
  1.1370 +// making GotoNodes while building the CFG and in init_spill_mask() to identify
  1.1371 +// a Load's result RegMask for memoization in idealreg2regmask[]
  1.1372 +MachNode *Matcher::match_tree( const Node *n ) {
  1.1373 +  assert( n->Opcode() != Op_Phi, "cannot match" );
  1.1374 +  assert( !n->is_block_start(), "cannot match" );
  1.1375 +  // Set the mark for all locally allocated State objects.
  1.1376 +  // When this call returns, the _states_arena arena will be reset
  1.1377 +  // freeing all State objects.
  1.1378 +  ResourceMark rm( &_states_arena );
  1.1379 +
  1.1380 +  LabelRootDepth = 0;
  1.1381 +
  1.1382 +  // StoreNodes require their Memory input to match any LoadNodes
  1.1383 +  Node *mem = n->is_Store() ? n->in(MemNode::Memory) : (Node*)1 ;
  1.1384 +#ifdef ASSERT
  1.1385 +  Node* save_mem_node = _mem_node;
  1.1386 +  _mem_node = n->is_Store() ? (Node*)n : NULL;
  1.1387 +#endif
  1.1388 +  // State object for root node of match tree
  1.1389 +  // Allocate it on _states_arena - stack allocation can cause stack overflow.
  1.1390 +  State *s = new (&_states_arena) State;
  1.1391 +  s->_kids[0] = NULL;
  1.1392 +  s->_kids[1] = NULL;
  1.1393 +  s->_leaf = (Node*)n;
  1.1394 +  // Label the input tree, allocating labels from top-level arena
  1.1395 +  Label_Root( n, s, n->in(0), mem );
  1.1396 +  if (C->failing())  return NULL;
  1.1397 +
  1.1398 +  // The minimum cost match for the whole tree is found at the root State
  1.1399 +  uint mincost = max_juint;
  1.1400 +  uint cost = max_juint;
  1.1401 +  uint i;
  1.1402 +  for( i = 0; i < NUM_OPERANDS; i++ ) {
  1.1403 +    if( s->valid(i) &&                // valid entry and
  1.1404 +        s->_cost[i] < cost &&         // low cost and
  1.1405 +        s->_rule[i] >= NUM_OPERANDS ) // not an operand
  1.1406 +      cost = s->_cost[mincost=i];
  1.1407 +  }
  1.1408 +  if (mincost == max_juint) {
  1.1409 +#ifndef PRODUCT
  1.1410 +    tty->print("No matching rule for:");
  1.1411 +    s->dump();
  1.1412 +#endif
  1.1413 +    Matcher::soft_match_failure();
  1.1414 +    return NULL;
  1.1415 +  }
  1.1416 +  // Reduce input tree based upon the state labels to machine Nodes
  1.1417 +  MachNode *m = ReduceInst( s, s->_rule[mincost], mem );
  1.1418 +#ifdef ASSERT
  1.1419 +  _old2new_map.map(n->_idx, m);
  1.1420 +  _new2old_map.map(m->_idx, (Node*)n);
  1.1421 +#endif
  1.1422 +
  1.1423 +  // Add any Matcher-ignored edges
  1.1424 +  uint cnt = n->req();
  1.1425 +  uint start = 1;
  1.1426 +  if( mem != (Node*)1 ) start = MemNode::Memory+1;
  1.1427 +  if( n->is_AddP() ) {
  1.1428 +    assert( mem == (Node*)1, "" );
  1.1429 +    start = AddPNode::Base+1;
  1.1430 +  }
  1.1431 +  for( i = start; i < cnt; i++ ) {
  1.1432 +    if( !n->match_edge(i) ) {
  1.1433 +      if( i < m->req() )
  1.1434 +        m->ins_req( i, n->in(i) );
  1.1435 +      else
  1.1436 +        m->add_req( n->in(i) );
  1.1437 +    }
  1.1438 +  }
  1.1439 +
  1.1440 +  debug_only( _mem_node = save_mem_node; )
  1.1441 +  return m;
  1.1442 +}
  1.1443 +
  1.1444 +
  1.1445 +//------------------------------match_into_reg---------------------------------
  1.1446 +// Choose to either match this Node in a register or part of the current
  1.1447 +// match tree.  Return true for requiring a register and false for matching
  1.1448 +// as part of the current match tree.
  1.1449 +static bool match_into_reg( const Node *n, Node *m, Node *control, int i, bool shared ) {
  1.1450 +
  1.1451 +  const Type *t = m->bottom_type();
  1.1452 +
  1.1453 +  if (t->singleton()) {
  1.1454 +    // Never force constants into registers.  Allow them to match as
  1.1455 +    // constants or registers.  Copies of the same value will share
  1.1456 +    // the same register.  See find_shared_node.
  1.1457 +    return false;
  1.1458 +  } else {                      // Not a constant
  1.1459 +    // Stop recursion if they have different Controls.
  1.1460 +    Node* m_control = m->in(0);
  1.1461 +    // Control of load's memory can post-dominates load's control.
  1.1462 +    // So use it since load can't float above its memory.
  1.1463 +    Node* mem_control = (m->is_Load()) ? m->in(MemNode::Memory)->in(0) : NULL;
  1.1464 +    if (control && m_control && control != m_control && control != mem_control) {
  1.1465 +
  1.1466 +      // Actually, we can live with the most conservative control we
  1.1467 +      // find, if it post-dominates the others.  This allows us to
  1.1468 +      // pick up load/op/store trees where the load can float a little
  1.1469 +      // above the store.
  1.1470 +      Node *x = control;
  1.1471 +      const uint max_scan = 6;  // Arbitrary scan cutoff
  1.1472 +      uint j;
  1.1473 +      for (j=0; j<max_scan; j++) {
  1.1474 +        if (x->is_Region())     // Bail out at merge points
  1.1475 +          return true;
  1.1476 +        x = x->in(0);
  1.1477 +        if (x == m_control)     // Does 'control' post-dominate
  1.1478 +          break;                // m->in(0)?  If so, we can use it
  1.1479 +        if (x == mem_control)   // Does 'control' post-dominate
  1.1480 +          break;                // mem_control?  If so, we can use it
  1.1481 +      }
  1.1482 +      if (j == max_scan)        // No post-domination before scan end?
  1.1483 +        return true;            // Then break the match tree up
  1.1484 +    }
  1.1485 +    if ((m->is_DecodeN() && Matcher::narrow_oop_use_complex_address()) ||
  1.1486 +        (m->is_DecodeNKlass() && Matcher::narrow_klass_use_complex_address())) {
  1.1487 +      // These are commonly used in address expressions and can
  1.1488 +      // efficiently fold into them on X64 in some cases.
  1.1489 +      return false;
  1.1490 +    }
  1.1491 +  }
  1.1492 +
  1.1493 +  // Not forceable cloning.  If shared, put it into a register.
  1.1494 +  return shared;
  1.1495 +}
  1.1496 +
  1.1497 +
  1.1498 +//------------------------------Instruction Selection--------------------------
  1.1499 +// Label method walks a "tree" of nodes, using the ADLC generated DFA to match
  1.1500 +// ideal nodes to machine instructions.  Trees are delimited by shared Nodes,
  1.1501 +// things the Matcher does not match (e.g., Memory), and things with different
  1.1502 +// Controls (hence forced into different blocks).  We pass in the Control
  1.1503 +// selected for this entire State tree.
  1.1504 +
  1.1505 +// The Matcher works on Trees, but an Intel add-to-memory requires a DAG: the
  1.1506 +// Store and the Load must have identical Memories (as well as identical
  1.1507 +// pointers).  Since the Matcher does not have anything for Memory (and
  1.1508 +// does not handle DAGs), I have to match the Memory input myself.  If the
  1.1509 +// Tree root is a Store, I require all Loads to have the identical memory.
  1.1510 +Node *Matcher::Label_Root( const Node *n, State *svec, Node *control, const Node *mem){
  1.1511 +  // Since Label_Root is a recursive function, its possible that we might run
  1.1512 +  // out of stack space.  See bugs 6272980 & 6227033 for more info.
  1.1513 +  LabelRootDepth++;
  1.1514 +  if (LabelRootDepth > MaxLabelRootDepth) {
  1.1515 +    C->record_method_not_compilable_all_tiers("Out of stack space, increase MaxLabelRootDepth");
  1.1516 +    return NULL;
  1.1517 +  }
  1.1518 +  uint care = 0;                // Edges matcher cares about
  1.1519 +  uint cnt = n->req();
  1.1520 +  uint i = 0;
  1.1521 +
  1.1522 +  // Examine children for memory state
  1.1523 +  // Can only subsume a child into your match-tree if that child's memory state
  1.1524 +  // is not modified along the path to another input.
  1.1525 +  // It is unsafe even if the other inputs are separate roots.
  1.1526 +  Node *input_mem = NULL;
  1.1527 +  for( i = 1; i < cnt; i++ ) {
  1.1528 +    if( !n->match_edge(i) ) continue;
  1.1529 +    Node *m = n->in(i);         // Get ith input
  1.1530 +    assert( m, "expect non-null children" );
  1.1531 +    if( m->is_Load() ) {
  1.1532 +      if( input_mem == NULL ) {
  1.1533 +        input_mem = m->in(MemNode::Memory);
  1.1534 +      } else if( input_mem != m->in(MemNode::Memory) ) {
  1.1535 +        input_mem = NodeSentinel;
  1.1536 +      }
  1.1537 +    }
  1.1538 +  }
  1.1539 +
  1.1540 +  for( i = 1; i < cnt; i++ ){// For my children
  1.1541 +    if( !n->match_edge(i) ) continue;
  1.1542 +    Node *m = n->in(i);         // Get ith input
  1.1543 +    // Allocate states out of a private arena
  1.1544 +    State *s = new (&_states_arena) State;
  1.1545 +    svec->_kids[care++] = s;
  1.1546 +    assert( care <= 2, "binary only for now" );
  1.1547 +
  1.1548 +    // Recursively label the State tree.
  1.1549 +    s->_kids[0] = NULL;
  1.1550 +    s->_kids[1] = NULL;
  1.1551 +    s->_leaf = m;
  1.1552 +
  1.1553 +    // Check for leaves of the State Tree; things that cannot be a part of
  1.1554 +    // the current tree.  If it finds any, that value is matched as a
  1.1555 +    // register operand.  If not, then the normal matching is used.
  1.1556 +    if( match_into_reg(n, m, control, i, is_shared(m)) ||
  1.1557 +        //
  1.1558 +        // Stop recursion if this is LoadNode and the root of this tree is a
  1.1559 +        // StoreNode and the load & store have different memories.
  1.1560 +        ((mem!=(Node*)1) && m->is_Load() && m->in(MemNode::Memory) != mem) ||
  1.1561 +        // Can NOT include the match of a subtree when its memory state
  1.1562 +        // is used by any of the other subtrees
  1.1563 +        (input_mem == NodeSentinel) ) {
  1.1564 +#ifndef PRODUCT
  1.1565 +      // Print when we exclude matching due to different memory states at input-loads
  1.1566 +      if( PrintOpto && (Verbose && WizardMode) && (input_mem == NodeSentinel)
  1.1567 +        && !((mem!=(Node*)1) && m->is_Load() && m->in(MemNode::Memory) != mem) ) {
  1.1568 +        tty->print_cr("invalid input_mem");
  1.1569 +      }
  1.1570 +#endif
  1.1571 +      // Switch to a register-only opcode; this value must be in a register
  1.1572 +      // and cannot be subsumed as part of a larger instruction.
  1.1573 +      s->DFA( m->ideal_reg(), m );
  1.1574 +
  1.1575 +    } else {
  1.1576 +      // If match tree has no control and we do, adopt it for entire tree
  1.1577 +      if( control == NULL && m->in(0) != NULL && m->req() > 1 )
  1.1578 +        control = m->in(0);         // Pick up control
  1.1579 +      // Else match as a normal part of the match tree.
  1.1580 +      control = Label_Root(m,s,control,mem);
  1.1581 +      if (C->failing()) return NULL;
  1.1582 +    }
  1.1583 +  }
  1.1584 +
  1.1585 +
  1.1586 +  // Call DFA to match this node, and return
  1.1587 +  svec->DFA( n->Opcode(), n );
  1.1588 +
  1.1589 +#ifdef ASSERT
  1.1590 +  uint x;
  1.1591 +  for( x = 0; x < _LAST_MACH_OPER; x++ )
  1.1592 +    if( svec->valid(x) )
  1.1593 +      break;
  1.1594 +
  1.1595 +  if (x >= _LAST_MACH_OPER) {
  1.1596 +    n->dump();
  1.1597 +    svec->dump();
  1.1598 +    assert( false, "bad AD file" );
  1.1599 +  }
  1.1600 +#endif
  1.1601 +  return control;
  1.1602 +}
  1.1603 +
  1.1604 +
  1.1605 +// Con nodes reduced using the same rule can share their MachNode
  1.1606 +// which reduces the number of copies of a constant in the final
  1.1607 +// program.  The register allocator is free to split uses later to
  1.1608 +// split live ranges.
  1.1609 +MachNode* Matcher::find_shared_node(Node* leaf, uint rule) {
  1.1610 +  if (!leaf->is_Con() && !leaf->is_DecodeNarrowPtr()) return NULL;
  1.1611 +
  1.1612 +  // See if this Con has already been reduced using this rule.
  1.1613 +  if (_shared_nodes.Size() <= leaf->_idx) return NULL;
  1.1614 +  MachNode* last = (MachNode*)_shared_nodes.at(leaf->_idx);
  1.1615 +  if (last != NULL && rule == last->rule()) {
  1.1616 +    // Don't expect control change for DecodeN
  1.1617 +    if (leaf->is_DecodeNarrowPtr())
  1.1618 +      return last;
  1.1619 +    // Get the new space root.
  1.1620 +    Node* xroot = new_node(C->root());
  1.1621 +    if (xroot == NULL) {
  1.1622 +      // This shouldn't happen give the order of matching.
  1.1623 +      return NULL;
  1.1624 +    }
  1.1625 +
  1.1626 +    // Shared constants need to have their control be root so they
  1.1627 +    // can be scheduled properly.
  1.1628 +    Node* control = last->in(0);
  1.1629 +    if (control != xroot) {
  1.1630 +      if (control == NULL || control == C->root()) {
  1.1631 +        last->set_req(0, xroot);
  1.1632 +      } else {
  1.1633 +        assert(false, "unexpected control");
  1.1634 +        return NULL;
  1.1635 +      }
  1.1636 +    }
  1.1637 +    return last;
  1.1638 +  }
  1.1639 +  return NULL;
  1.1640 +}
  1.1641 +
  1.1642 +
  1.1643 +//------------------------------ReduceInst-------------------------------------
  1.1644 +// Reduce a State tree (with given Control) into a tree of MachNodes.
  1.1645 +// This routine (and it's cohort ReduceOper) convert Ideal Nodes into
  1.1646 +// complicated machine Nodes.  Each MachNode covers some tree of Ideal Nodes.
  1.1647 +// Each MachNode has a number of complicated MachOper operands; each
  1.1648 +// MachOper also covers a further tree of Ideal Nodes.
  1.1649 +
  1.1650 +// The root of the Ideal match tree is always an instruction, so we enter
  1.1651 +// the recursion here.  After building the MachNode, we need to recurse
  1.1652 +// the tree checking for these cases:
  1.1653 +// (1) Child is an instruction -
  1.1654 +//     Build the instruction (recursively), add it as an edge.
  1.1655 +//     Build a simple operand (register) to hold the result of the instruction.
  1.1656 +// (2) Child is an interior part of an instruction -
  1.1657 +//     Skip over it (do nothing)
  1.1658 +// (3) Child is the start of a operand -
  1.1659 +//     Build the operand, place it inside the instruction
  1.1660 +//     Call ReduceOper.
  1.1661 +MachNode *Matcher::ReduceInst( State *s, int rule, Node *&mem ) {
  1.1662 +  assert( rule >= NUM_OPERANDS, "called with operand rule" );
  1.1663 +
  1.1664 +  MachNode* shared_node = find_shared_node(s->_leaf, rule);
  1.1665 +  if (shared_node != NULL) {
  1.1666 +    return shared_node;
  1.1667 +  }
  1.1668 +
  1.1669 +  // Build the object to represent this state & prepare for recursive calls
  1.1670 +  MachNode *mach = s->MachNodeGenerator( rule, C );
  1.1671 +  mach->_opnds[0] = s->MachOperGenerator( _reduceOp[rule], C );
  1.1672 +  assert( mach->_opnds[0] != NULL, "Missing result operand" );
  1.1673 +  Node *leaf = s->_leaf;
  1.1674 +  // Check for instruction or instruction chain rule
  1.1675 +  if( rule >= _END_INST_CHAIN_RULE || rule < _BEGIN_INST_CHAIN_RULE ) {
  1.1676 +    assert(C->node_arena()->contains(s->_leaf) || !has_new_node(s->_leaf),
  1.1677 +           "duplicating node that's already been matched");
  1.1678 +    // Instruction
  1.1679 +    mach->add_req( leaf->in(0) ); // Set initial control
  1.1680 +    // Reduce interior of complex instruction
  1.1681 +    ReduceInst_Interior( s, rule, mem, mach, 1 );
  1.1682 +  } else {
  1.1683 +    // Instruction chain rules are data-dependent on their inputs
  1.1684 +    mach->add_req(0);             // Set initial control to none
  1.1685 +    ReduceInst_Chain_Rule( s, rule, mem, mach );
  1.1686 +  }
  1.1687 +
  1.1688 +  // If a Memory was used, insert a Memory edge
  1.1689 +  if( mem != (Node*)1 ) {
  1.1690 +    mach->ins_req(MemNode::Memory,mem);
  1.1691 +#ifdef ASSERT
  1.1692 +    // Verify adr type after matching memory operation
  1.1693 +    const MachOper* oper = mach->memory_operand();
  1.1694 +    if (oper != NULL && oper != (MachOper*)-1) {
  1.1695 +      // It has a unique memory operand.  Find corresponding ideal mem node.
  1.1696 +      Node* m = NULL;
  1.1697 +      if (leaf->is_Mem()) {
  1.1698 +        m = leaf;
  1.1699 +      } else {
  1.1700 +        m = _mem_node;
  1.1701 +        assert(m != NULL && m->is_Mem(), "expecting memory node");
  1.1702 +      }
  1.1703 +      const Type* mach_at = mach->adr_type();
  1.1704 +      // DecodeN node consumed by an address may have different type
  1.1705 +      // then its input. Don't compare types for such case.
  1.1706 +      if (m->adr_type() != mach_at &&
  1.1707 +          (m->in(MemNode::Address)->is_DecodeNarrowPtr() ||
  1.1708 +           m->in(MemNode::Address)->is_AddP() &&
  1.1709 +           m->in(MemNode::Address)->in(AddPNode::Address)->is_DecodeNarrowPtr() ||
  1.1710 +           m->in(MemNode::Address)->is_AddP() &&
  1.1711 +           m->in(MemNode::Address)->in(AddPNode::Address)->is_AddP() &&
  1.1712 +           m->in(MemNode::Address)->in(AddPNode::Address)->in(AddPNode::Address)->is_DecodeNarrowPtr())) {
  1.1713 +        mach_at = m->adr_type();
  1.1714 +      }
  1.1715 +      if (m->adr_type() != mach_at) {
  1.1716 +        m->dump();
  1.1717 +        tty->print_cr("mach:");
  1.1718 +        mach->dump(1);
  1.1719 +      }
  1.1720 +      assert(m->adr_type() == mach_at, "matcher should not change adr type");
  1.1721 +    }
  1.1722 +#endif
  1.1723 +  }
  1.1724 +
  1.1725 +  // If the _leaf is an AddP, insert the base edge
  1.1726 +  if (leaf->is_AddP()) {
  1.1727 +    mach->ins_req(AddPNode::Base,leaf->in(AddPNode::Base));
  1.1728 +  }
  1.1729 +
  1.1730 +  uint number_of_projections_prior = number_of_projections();
  1.1731 +
  1.1732 +  // Perform any 1-to-many expansions required
  1.1733 +  MachNode *ex = mach->Expand(s, _projection_list, mem);
  1.1734 +  if (ex != mach) {
  1.1735 +    assert(ex->ideal_reg() == mach->ideal_reg(), "ideal types should match");
  1.1736 +    if( ex->in(1)->is_Con() )
  1.1737 +      ex->in(1)->set_req(0, C->root());
  1.1738 +    // Remove old node from the graph
  1.1739 +    for( uint i=0; i<mach->req(); i++ ) {
  1.1740 +      mach->set_req(i,NULL);
  1.1741 +    }
  1.1742 +#ifdef ASSERT
  1.1743 +    _new2old_map.map(ex->_idx, s->_leaf);
  1.1744 +#endif
  1.1745 +  }
  1.1746 +
  1.1747 +  // PhaseChaitin::fixup_spills will sometimes generate spill code
  1.1748 +  // via the matcher.  By the time, nodes have been wired into the CFG,
  1.1749 +  // and any further nodes generated by expand rules will be left hanging
  1.1750 +  // in space, and will not get emitted as output code.  Catch this.
  1.1751 +  // Also, catch any new register allocation constraints ("projections")
  1.1752 +  // generated belatedly during spill code generation.
  1.1753 +  if (_allocation_started) {
  1.1754 +    guarantee(ex == mach, "no expand rules during spill generation");
  1.1755 +    guarantee(number_of_projections_prior == number_of_projections(), "no allocation during spill generation");
  1.1756 +  }
  1.1757 +
  1.1758 +  if (leaf->is_Con() || leaf->is_DecodeNarrowPtr()) {
  1.1759 +    // Record the con for sharing
  1.1760 +    _shared_nodes.map(leaf->_idx, ex);
  1.1761 +  }
  1.1762 +
  1.1763 +  return ex;
  1.1764 +}
  1.1765 +
  1.1766 +void Matcher::ReduceInst_Chain_Rule( State *s, int rule, Node *&mem, MachNode *mach ) {
  1.1767 +  // 'op' is what I am expecting to receive
  1.1768 +  int op = _leftOp[rule];
  1.1769 +  // Operand type to catch childs result
  1.1770 +  // This is what my child will give me.
  1.1771 +  int opnd_class_instance = s->_rule[op];
  1.1772 +  // Choose between operand class or not.
  1.1773 +  // This is what I will receive.
  1.1774 +  int catch_op = (FIRST_OPERAND_CLASS <= op && op < NUM_OPERANDS) ? opnd_class_instance : op;
  1.1775 +  // New rule for child.  Chase operand classes to get the actual rule.
  1.1776 +  int newrule = s->_rule[catch_op];
  1.1777 +
  1.1778 +  if( newrule < NUM_OPERANDS ) {
  1.1779 +    // Chain from operand or operand class, may be output of shared node
  1.1780 +    assert( 0 <= opnd_class_instance && opnd_class_instance < NUM_OPERANDS,
  1.1781 +            "Bad AD file: Instruction chain rule must chain from operand");
  1.1782 +    // Insert operand into array of operands for this instruction
  1.1783 +    mach->_opnds[1] = s->MachOperGenerator( opnd_class_instance, C );
  1.1784 +
  1.1785 +    ReduceOper( s, newrule, mem, mach );
  1.1786 +  } else {
  1.1787 +    // Chain from the result of an instruction
  1.1788 +    assert( newrule >= _LAST_MACH_OPER, "Do NOT chain from internal operand");
  1.1789 +    mach->_opnds[1] = s->MachOperGenerator( _reduceOp[catch_op], C );
  1.1790 +    Node *mem1 = (Node*)1;
  1.1791 +    debug_only(Node *save_mem_node = _mem_node;)
  1.1792 +    mach->add_req( ReduceInst(s, newrule, mem1) );
  1.1793 +    debug_only(_mem_node = save_mem_node;)
  1.1794 +  }
  1.1795 +  return;
  1.1796 +}
  1.1797 +
  1.1798 +
  1.1799 +uint Matcher::ReduceInst_Interior( State *s, int rule, Node *&mem, MachNode *mach, uint num_opnds ) {
  1.1800 +  if( s->_leaf->is_Load() ) {
  1.1801 +    Node *mem2 = s->_leaf->in(MemNode::Memory);
  1.1802 +    assert( mem == (Node*)1 || mem == mem2, "multiple Memories being matched at once?" );
  1.1803 +    debug_only( if( mem == (Node*)1 ) _mem_node = s->_leaf;)
  1.1804 +    mem = mem2;
  1.1805 +  }
  1.1806 +  if( s->_leaf->in(0) != NULL && s->_leaf->req() > 1) {
  1.1807 +    if( mach->in(0) == NULL )
  1.1808 +      mach->set_req(0, s->_leaf->in(0));
  1.1809 +  }
  1.1810 +
  1.1811 +  // Now recursively walk the state tree & add operand list.
  1.1812 +  for( uint i=0; i<2; i++ ) {   // binary tree
  1.1813 +    State *newstate = s->_kids[i];
  1.1814 +    if( newstate == NULL ) break;      // Might only have 1 child
  1.1815 +    // 'op' is what I am expecting to receive
  1.1816 +    int op;
  1.1817 +    if( i == 0 ) {
  1.1818 +      op = _leftOp[rule];
  1.1819 +    } else {
  1.1820 +      op = _rightOp[rule];
  1.1821 +    }
  1.1822 +    // Operand type to catch childs result
  1.1823 +    // This is what my child will give me.
  1.1824 +    int opnd_class_instance = newstate->_rule[op];
  1.1825 +    // Choose between operand class or not.
  1.1826 +    // This is what I will receive.
  1.1827 +    int catch_op = (op >= FIRST_OPERAND_CLASS && op < NUM_OPERANDS) ? opnd_class_instance : op;
  1.1828 +    // New rule for child.  Chase operand classes to get the actual rule.
  1.1829 +    int newrule = newstate->_rule[catch_op];
  1.1830 +
  1.1831 +    if( newrule < NUM_OPERANDS ) { // Operand/operandClass or internalOp/instruction?
  1.1832 +      // Operand/operandClass
  1.1833 +      // Insert operand into array of operands for this instruction
  1.1834 +      mach->_opnds[num_opnds++] = newstate->MachOperGenerator( opnd_class_instance, C );
  1.1835 +      ReduceOper( newstate, newrule, mem, mach );
  1.1836 +
  1.1837 +    } else {                    // Child is internal operand or new instruction
  1.1838 +      if( newrule < _LAST_MACH_OPER ) { // internal operand or instruction?
  1.1839 +        // internal operand --> call ReduceInst_Interior
  1.1840 +        // Interior of complex instruction.  Do nothing but recurse.
  1.1841 +        num_opnds = ReduceInst_Interior( newstate, newrule, mem, mach, num_opnds );
  1.1842 +      } else {
  1.1843 +        // instruction --> call build operand(  ) to catch result
  1.1844 +        //             --> ReduceInst( newrule )
  1.1845 +        mach->_opnds[num_opnds++] = s->MachOperGenerator( _reduceOp[catch_op], C );
  1.1846 +        Node *mem1 = (Node*)1;
  1.1847 +        debug_only(Node *save_mem_node = _mem_node;)
  1.1848 +        mach->add_req( ReduceInst( newstate, newrule, mem1 ) );
  1.1849 +        debug_only(_mem_node = save_mem_node;)
  1.1850 +      }
  1.1851 +    }
  1.1852 +    assert( mach->_opnds[num_opnds-1], "" );
  1.1853 +  }
  1.1854 +  return num_opnds;
  1.1855 +}
  1.1856 +
  1.1857 +// This routine walks the interior of possible complex operands.
  1.1858 +// At each point we check our children in the match tree:
  1.1859 +// (1) No children -
  1.1860 +//     We are a leaf; add _leaf field as an input to the MachNode
  1.1861 +// (2) Child is an internal operand -
  1.1862 +//     Skip over it ( do nothing )
  1.1863 +// (3) Child is an instruction -
  1.1864 +//     Call ReduceInst recursively and
  1.1865 +//     and instruction as an input to the MachNode
  1.1866 +void Matcher::ReduceOper( State *s, int rule, Node *&mem, MachNode *mach ) {
  1.1867 +  assert( rule < _LAST_MACH_OPER, "called with operand rule" );
  1.1868 +  State *kid = s->_kids[0];
  1.1869 +  assert( kid == NULL || s->_leaf->in(0) == NULL, "internal operands have no control" );
  1.1870 +
  1.1871 +  // Leaf?  And not subsumed?
  1.1872 +  if( kid == NULL && !_swallowed[rule] ) {
  1.1873 +    mach->add_req( s->_leaf );  // Add leaf pointer
  1.1874 +    return;                     // Bail out
  1.1875 +  }
  1.1876 +
  1.1877 +  if( s->_leaf->is_Load() ) {
  1.1878 +    assert( mem == (Node*)1, "multiple Memories being matched at once?" );
  1.1879 +    mem = s->_leaf->in(MemNode::Memory);
  1.1880 +    debug_only(_mem_node = s->_leaf;)
  1.1881 +  }
  1.1882 +  if( s->_leaf->in(0) && s->_leaf->req() > 1) {
  1.1883 +    if( !mach->in(0) )
  1.1884 +      mach->set_req(0,s->_leaf->in(0));
  1.1885 +    else {
  1.1886 +      assert( s->_leaf->in(0) == mach->in(0), "same instruction, differing controls?" );
  1.1887 +    }
  1.1888 +  }
  1.1889 +
  1.1890 +  for( uint i=0; kid != NULL && i<2; kid = s->_kids[1], i++ ) {   // binary tree
  1.1891 +    int newrule;
  1.1892 +    if( i == 0)
  1.1893 +      newrule = kid->_rule[_leftOp[rule]];
  1.1894 +    else
  1.1895 +      newrule = kid->_rule[_rightOp[rule]];
  1.1896 +
  1.1897 +    if( newrule < _LAST_MACH_OPER ) { // Operand or instruction?
  1.1898 +      // Internal operand; recurse but do nothing else
  1.1899 +      ReduceOper( kid, newrule, mem, mach );
  1.1900 +
  1.1901 +    } else {                    // Child is a new instruction
  1.1902 +      // Reduce the instruction, and add a direct pointer from this
  1.1903 +      // machine instruction to the newly reduced one.
  1.1904 +      Node *mem1 = (Node*)1;
  1.1905 +      debug_only(Node *save_mem_node = _mem_node;)
  1.1906 +      mach->add_req( ReduceInst( kid, newrule, mem1 ) );
  1.1907 +      debug_only(_mem_node = save_mem_node;)
  1.1908 +    }
  1.1909 +  }
  1.1910 +}
  1.1911 +
  1.1912 +
  1.1913 +// -------------------------------------------------------------------------
  1.1914 +// Java-Java calling convention
  1.1915 +// (what you use when Java calls Java)
  1.1916 +
  1.1917 +//------------------------------find_receiver----------------------------------
  1.1918 +// For a given signature, return the OptoReg for parameter 0.
  1.1919 +OptoReg::Name Matcher::find_receiver( bool is_outgoing ) {
  1.1920 +  VMRegPair regs;
  1.1921 +  BasicType sig_bt = T_OBJECT;
  1.1922 +  calling_convention(&sig_bt, &regs, 1, is_outgoing);
  1.1923 +  // Return argument 0 register.  In the LP64 build pointers
  1.1924 +  // take 2 registers, but the VM wants only the 'main' name.
  1.1925 +  return OptoReg::as_OptoReg(regs.first());
  1.1926 +}
  1.1927 +
  1.1928 +// This function identifies sub-graphs in which a 'load' node is
  1.1929 +// input to two different nodes, and such that it can be matched
  1.1930 +// with BMI instructions like blsi, blsr, etc.
  1.1931 +// Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
  1.1932 +// The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
  1.1933 +// refers to the same node.
  1.1934 +#ifdef X86
  1.1935 +// Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
  1.1936 +// This is a temporary solution until we make DAGs expressible in ADL.
  1.1937 +template<typename ConType>
  1.1938 +class FusedPatternMatcher {
  1.1939 +  Node* _op1_node;
  1.1940 +  Node* _mop_node;
  1.1941 +  int _con_op;
  1.1942 +
  1.1943 +  static int match_next(Node* n, int next_op, int next_op_idx) {
  1.1944 +    if (n->in(1) == NULL || n->in(2) == NULL) {
  1.1945 +      return -1;
  1.1946 +    }
  1.1947 +
  1.1948 +    if (next_op_idx == -1) { // n is commutative, try rotations
  1.1949 +      if (n->in(1)->Opcode() == next_op) {
  1.1950 +        return 1;
  1.1951 +      } else if (n->in(2)->Opcode() == next_op) {
  1.1952 +        return 2;
  1.1953 +      }
  1.1954 +    } else {
  1.1955 +      assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
  1.1956 +      if (n->in(next_op_idx)->Opcode() == next_op) {
  1.1957 +        return next_op_idx;
  1.1958 +      }
  1.1959 +    }
  1.1960 +    return -1;
  1.1961 +  }
  1.1962 +public:
  1.1963 +  FusedPatternMatcher(Node* op1_node, Node *mop_node, int con_op) :
  1.1964 +    _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
  1.1965 +
  1.1966 +  bool match(int op1, int op1_op2_idx,  // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
  1.1967 +             int op2, int op2_con_idx,  // op2 and the index of the op2->con edge, -1 if op2 is commutative
  1.1968 +             typename ConType::NativeType con_value) {
  1.1969 +    if (_op1_node->Opcode() != op1) {
  1.1970 +      return false;
  1.1971 +    }
  1.1972 +    if (_mop_node->outcnt() > 2) {
  1.1973 +      return false;
  1.1974 +    }
  1.1975 +    op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
  1.1976 +    if (op1_op2_idx == -1) {
  1.1977 +      return false;
  1.1978 +    }
  1.1979 +    // Memory operation must be the other edge
  1.1980 +    int op1_mop_idx = (op1_op2_idx & 1) + 1;
  1.1981 +
  1.1982 +    // Check that the mop node is really what we want
  1.1983 +    if (_op1_node->in(op1_mop_idx) == _mop_node) {
  1.1984 +      Node *op2_node = _op1_node->in(op1_op2_idx);
  1.1985 +      if (op2_node->outcnt() > 1) {
  1.1986 +        return false;
  1.1987 +      }
  1.1988 +      assert(op2_node->Opcode() == op2, "Should be");
  1.1989 +      op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
  1.1990 +      if (op2_con_idx == -1) {
  1.1991 +        return false;
  1.1992 +      }
  1.1993 +      // Memory operation must be the other edge
  1.1994 +      int op2_mop_idx = (op2_con_idx & 1) + 1;
  1.1995 +      // Check that the memory operation is the same node
  1.1996 +      if (op2_node->in(op2_mop_idx) == _mop_node) {
  1.1997 +        // Now check the constant
  1.1998 +        const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
  1.1999 +        if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
  1.2000 +          return true;
  1.2001 +        }
  1.2002 +      }
  1.2003 +    }
  1.2004 +    return false;
  1.2005 +  }
  1.2006 +};
  1.2007 +
  1.2008 +
  1.2009 +bool Matcher::is_bmi_pattern(Node *n, Node *m) {
  1.2010 +  if (n != NULL && m != NULL) {
  1.2011 +    if (m->Opcode() == Op_LoadI) {
  1.2012 +      FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
  1.2013 +      return bmii.match(Op_AndI, -1, Op_SubI,  1,  0)  ||
  1.2014 +             bmii.match(Op_AndI, -1, Op_AddI, -1, -1)  ||
  1.2015 +             bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
  1.2016 +    } else if (m->Opcode() == Op_LoadL) {
  1.2017 +      FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
  1.2018 +      return bmil.match(Op_AndL, -1, Op_SubL,  1,  0) ||
  1.2019 +             bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
  1.2020 +             bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
  1.2021 +    }
  1.2022 +  }
  1.2023 +  return false;
  1.2024 +}
  1.2025 +#endif // X86
  1.2026 +
  1.2027 +// A method-klass-holder may be passed in the inline_cache_reg
  1.2028 +// and then expanded into the inline_cache_reg and a method_oop register
  1.2029 +//   defined in ad_<arch>.cpp
  1.2030 +
  1.2031 +
  1.2032 +//------------------------------find_shared------------------------------------
  1.2033 +// Set bits if Node is shared or otherwise a root
  1.2034 +void Matcher::find_shared( Node *n ) {
  1.2035 +  // Allocate stack of size C->unique() * 2 to avoid frequent realloc
  1.2036 +  MStack mstack(C->unique() * 2);
  1.2037 +  // Mark nodes as address_visited if they are inputs to an address expression
  1.2038 +  VectorSet address_visited(Thread::current()->resource_area());
  1.2039 +  mstack.push(n, Visit);     // Don't need to pre-visit root node
  1.2040 +  while (mstack.is_nonempty()) {
  1.2041 +    n = mstack.node();       // Leave node on stack
  1.2042 +    Node_State nstate = mstack.state();
  1.2043 +    uint nop = n->Opcode();
  1.2044 +    if (nstate == Pre_Visit) {
  1.2045 +      if (address_visited.test(n->_idx)) { // Visited in address already?
  1.2046 +        // Flag as visited and shared now.
  1.2047 +        set_visited(n);
  1.2048 +      }
  1.2049 +      if (is_visited(n)) {   // Visited already?
  1.2050 +        // Node is shared and has no reason to clone.  Flag it as shared.
  1.2051 +        // This causes it to match into a register for the sharing.
  1.2052 +        set_shared(n);       // Flag as shared and
  1.2053 +        mstack.pop();        // remove node from stack
  1.2054 +        continue;
  1.2055 +      }
  1.2056 +      nstate = Visit; // Not already visited; so visit now
  1.2057 +    }
  1.2058 +    if (nstate == Visit) {
  1.2059 +      mstack.set_state(Post_Visit);
  1.2060 +      set_visited(n);   // Flag as visited now
  1.2061 +      bool mem_op = false;
  1.2062 +
  1.2063 +      switch( nop ) {  // Handle some opcodes special
  1.2064 +      case Op_Phi:             // Treat Phis as shared roots
  1.2065 +      case Op_Parm:
  1.2066 +      case Op_Proj:            // All handled specially during matching
  1.2067 +      case Op_SafePointScalarObject:
  1.2068 +        set_shared(n);
  1.2069 +        set_dontcare(n);
  1.2070 +        break;
  1.2071 +      case Op_If:
  1.2072 +      case Op_CountedLoopEnd:
  1.2073 +        mstack.set_state(Alt_Post_Visit); // Alternative way
  1.2074 +        // Convert (If (Bool (CmpX A B))) into (If (Bool) (CmpX A B)).  Helps
  1.2075 +        // with matching cmp/branch in 1 instruction.  The Matcher needs the
  1.2076 +        // Bool and CmpX side-by-side, because it can only get at constants
  1.2077 +        // that are at the leaves of Match trees, and the Bool's condition acts
  1.2078 +        // as a constant here.
  1.2079 +        mstack.push(n->in(1), Visit);         // Clone the Bool
  1.2080 +        mstack.push(n->in(0), Pre_Visit);     // Visit control input
  1.2081 +        continue; // while (mstack.is_nonempty())
  1.2082 +      case Op_ConvI2D:         // These forms efficiently match with a prior
  1.2083 +      case Op_ConvI2F:         //   Load but not a following Store
  1.2084 +        if( n->in(1)->is_Load() &&        // Prior load
  1.2085 +            n->outcnt() == 1 &&           // Not already shared
  1.2086 +            n->unique_out()->is_Store() ) // Following store
  1.2087 +          set_shared(n);       // Force it to be a root
  1.2088 +        break;
  1.2089 +      case Op_ReverseBytesI:
  1.2090 +      case Op_ReverseBytesL:
  1.2091 +        if( n->in(1)->is_Load() &&        // Prior load
  1.2092 +            n->outcnt() == 1 )            // Not already shared
  1.2093 +          set_shared(n);                  // Force it to be a root
  1.2094 +        break;
  1.2095 +      case Op_BoxLock:         // Cant match until we get stack-regs in ADLC
  1.2096 +      case Op_IfFalse:
  1.2097 +      case Op_IfTrue:
  1.2098 +      case Op_MachProj:
  1.2099 +      case Op_MergeMem:
  1.2100 +      case Op_Catch:
  1.2101 +      case Op_CatchProj:
  1.2102 +      case Op_CProj:
  1.2103 +      case Op_JumpProj:
  1.2104 +      case Op_JProj:
  1.2105 +      case Op_NeverBranch:
  1.2106 +        set_dontcare(n);
  1.2107 +        break;
  1.2108 +      case Op_Jump:
  1.2109 +        mstack.push(n->in(1), Pre_Visit);     // Switch Value (could be shared)
  1.2110 +        mstack.push(n->in(0), Pre_Visit);     // Visit Control input
  1.2111 +        continue;                             // while (mstack.is_nonempty())
  1.2112 +      case Op_StrComp:
  1.2113 +      case Op_StrEquals:
  1.2114 +      case Op_StrIndexOf:
  1.2115 +      case Op_AryEq:
  1.2116 +      case Op_EncodeISOArray:
  1.2117 +        set_shared(n); // Force result into register (it will be anyways)
  1.2118 +        break;
  1.2119 +      case Op_ConP: {  // Convert pointers above the centerline to NUL
  1.2120 +        TypeNode *tn = n->as_Type(); // Constants derive from type nodes
  1.2121 +        const TypePtr* tp = tn->type()->is_ptr();
  1.2122 +        if (tp->_ptr == TypePtr::AnyNull) {
  1.2123 +          tn->set_type(TypePtr::NULL_PTR);
  1.2124 +        }
  1.2125 +        break;
  1.2126 +      }
  1.2127 +      case Op_ConN: {  // Convert narrow pointers above the centerline to NUL
  1.2128 +        TypeNode *tn = n->as_Type(); // Constants derive from type nodes
  1.2129 +        const TypePtr* tp = tn->type()->make_ptr();
  1.2130 +        if (tp && tp->_ptr == TypePtr::AnyNull) {
  1.2131 +          tn->set_type(TypeNarrowOop::NULL_PTR);
  1.2132 +        }
  1.2133 +        break;
  1.2134 +      }
  1.2135 +      case Op_Binary:         // These are introduced in the Post_Visit state.
  1.2136 +        ShouldNotReachHere();
  1.2137 +        break;
  1.2138 +      case Op_ClearArray:
  1.2139 +      case Op_SafePoint:
  1.2140 +        mem_op = true;
  1.2141 +        break;
  1.2142 +      default:
  1.2143 +        if( n->is_Store() ) {
  1.2144 +          // Do match stores, despite no ideal reg
  1.2145 +          mem_op = true;
  1.2146 +          break;
  1.2147 +        }
  1.2148 +        if( n->is_Mem() ) { // Loads and LoadStores
  1.2149 +          mem_op = true;
  1.2150 +          // Loads must be root of match tree due to prior load conflict
  1.2151 +          if( C->subsume_loads() == false )
  1.2152 +            set_shared(n);
  1.2153 +        }
  1.2154 +        // Fall into default case
  1.2155 +        if( !n->ideal_reg() )
  1.2156 +          set_dontcare(n);  // Unmatchable Nodes
  1.2157 +      } // end_switch
  1.2158 +
  1.2159 +      for(int i = n->req() - 1; i >= 0; --i) { // For my children
  1.2160 +        Node *m = n->in(i); // Get ith input
  1.2161 +        if (m == NULL) continue;  // Ignore NULLs
  1.2162 +        uint mop = m->Opcode();
  1.2163 +
  1.2164 +        // Must clone all producers of flags, or we will not match correctly.
  1.2165 +        // Suppose a compare setting int-flags is shared (e.g., a switch-tree)
  1.2166 +        // then it will match into an ideal Op_RegFlags.  Alas, the fp-flags
  1.2167 +        // are also there, so we may match a float-branch to int-flags and
  1.2168 +        // expect the allocator to haul the flags from the int-side to the
  1.2169 +        // fp-side.  No can do.
  1.2170 +        if( _must_clone[mop] ) {
  1.2171 +          mstack.push(m, Visit);
  1.2172 +          continue; // for(int i = ...)
  1.2173 +        }
  1.2174 +
  1.2175 +        if( mop == Op_AddP && m->in(AddPNode::Base)->is_DecodeNarrowPtr()) {
  1.2176 +          // Bases used in addresses must be shared but since
  1.2177 +          // they are shared through a DecodeN they may appear
  1.2178 +          // to have a single use so force sharing here.
  1.2179 +          set_shared(m->in(AddPNode::Base)->in(1));
  1.2180 +        }
  1.2181 +
  1.2182 +        // if 'n' and 'm' are part of a graph for BMI instruction, clone this node.
  1.2183 +#ifdef X86
  1.2184 +        if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
  1.2185 +          mstack.push(m, Visit);
  1.2186 +          continue;
  1.2187 +        }
  1.2188 +#endif
  1.2189 +
  1.2190 +        // Clone addressing expressions as they are "free" in memory access instructions
  1.2191 +        if( mem_op && i == MemNode::Address && mop == Op_AddP ) {
  1.2192 +          // Some inputs for address expression are not put on stack
  1.2193 +          // to avoid marking them as shared and forcing them into register
  1.2194 +          // if they are used only in address expressions.
  1.2195 +          // But they should be marked as shared if there are other uses
  1.2196 +          // besides address expressions.
  1.2197 +
  1.2198 +          Node *off = m->in(AddPNode::Offset);
  1.2199 +          if( off->is_Con() &&
  1.2200 +              // When there are other uses besides address expressions
  1.2201 +              // put it on stack and mark as shared.
  1.2202 +              !is_visited(m) ) {
  1.2203 +            address_visited.test_set(m->_idx); // Flag as address_visited
  1.2204 +            Node *adr = m->in(AddPNode::Address);
  1.2205 +
  1.2206 +            // Intel, ARM and friends can handle 2 adds in addressing mode
  1.2207 +            if( clone_shift_expressions && adr->is_AddP() &&
  1.2208 +                // AtomicAdd is not an addressing expression.
  1.2209 +                // Cheap to find it by looking for screwy base.
  1.2210 +                !adr->in(AddPNode::Base)->is_top() &&
  1.2211 +                // Are there other uses besides address expressions?
  1.2212 +                !is_visited(adr) ) {
  1.2213 +              address_visited.set(adr->_idx); // Flag as address_visited
  1.2214 +              Node *shift = adr->in(AddPNode::Offset);
  1.2215 +              // Check for shift by small constant as well
  1.2216 +              if( shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
  1.2217 +                  shift->in(2)->get_int() <= 3 &&
  1.2218 +                  // Are there other uses besides address expressions?
  1.2219 +                  !is_visited(shift) ) {
  1.2220 +                address_visited.set(shift->_idx); // Flag as address_visited
  1.2221 +                mstack.push(shift->in(2), Visit);
  1.2222 +                Node *conv = shift->in(1);
  1.2223 +#ifdef _LP64
  1.2224 +                // Allow Matcher to match the rule which bypass
  1.2225 +                // ConvI2L operation for an array index on LP64
  1.2226 +                // if the index value is positive.
  1.2227 +                if( conv->Opcode() == Op_ConvI2L &&
  1.2228 +                    conv->as_Type()->type()->is_long()->_lo >= 0 &&
  1.2229 +                    // Are there other uses besides address expressions?
  1.2230 +                    !is_visited(conv) ) {
  1.2231 +                  address_visited.set(conv->_idx); // Flag as address_visited
  1.2232 +                  mstack.push(conv->in(1), Pre_Visit);
  1.2233 +                } else
  1.2234 +#endif
  1.2235 +                mstack.push(conv, Pre_Visit);
  1.2236 +              } else {
  1.2237 +                mstack.push(shift, Pre_Visit);
  1.2238 +              }
  1.2239 +              mstack.push(adr->in(AddPNode::Address), Pre_Visit);
  1.2240 +              mstack.push(adr->in(AddPNode::Base), Pre_Visit);
  1.2241 +            } else {  // Sparc, Alpha, PPC and friends
  1.2242 +              mstack.push(adr, Pre_Visit);
  1.2243 +            }
  1.2244 +
  1.2245 +            // Clone X+offset as it also folds into most addressing expressions
  1.2246 +            mstack.push(off, Visit);
  1.2247 +            mstack.push(m->in(AddPNode::Base), Pre_Visit);
  1.2248 +            continue; // for(int i = ...)
  1.2249 +          } // if( off->is_Con() )
  1.2250 +        }   // if( mem_op &&
  1.2251 +        mstack.push(m, Pre_Visit);
  1.2252 +      }     // for(int i = ...)
  1.2253 +    }
  1.2254 +    else if (nstate == Alt_Post_Visit) {
  1.2255 +      mstack.pop(); // Remove node from stack
  1.2256 +      // We cannot remove the Cmp input from the Bool here, as the Bool may be
  1.2257 +      // shared and all users of the Bool need to move the Cmp in parallel.
  1.2258 +      // This leaves both the Bool and the If pointing at the Cmp.  To
  1.2259 +      // prevent the Matcher from trying to Match the Cmp along both paths
  1.2260 +      // BoolNode::match_edge always returns a zero.
  1.2261 +
  1.2262 +      // We reorder the Op_If in a pre-order manner, so we can visit without
  1.2263 +      // accidentally sharing the Cmp (the Bool and the If make 2 users).
  1.2264 +      n->add_req( n->in(1)->in(1) ); // Add the Cmp next to the Bool
  1.2265 +    }
  1.2266 +    else if (nstate == Post_Visit) {
  1.2267 +      mstack.pop(); // Remove node from stack
  1.2268 +
  1.2269 +      // Now hack a few special opcodes
  1.2270 +      switch( n->Opcode() ) {       // Handle some opcodes special
  1.2271 +      case Op_StorePConditional:
  1.2272 +      case Op_StoreIConditional:
  1.2273 +      case Op_StoreLConditional:
  1.2274 +      case Op_CompareAndSwapI:
  1.2275 +      case Op_CompareAndSwapL:
  1.2276 +      case Op_CompareAndSwapP:
  1.2277 +      case Op_CompareAndSwapN: {   // Convert trinary to binary-tree
  1.2278 +        Node *newval = n->in(MemNode::ValueIn );
  1.2279 +        Node *oldval  = n->in(LoadStoreConditionalNode::ExpectedIn);
  1.2280 +        Node *pair = new (C) BinaryNode( oldval, newval );
  1.2281 +        n->set_req(MemNode::ValueIn,pair);
  1.2282 +        n->del_req(LoadStoreConditionalNode::ExpectedIn);
  1.2283 +        break;
  1.2284 +      }
  1.2285 +      case Op_CMoveD:              // Convert trinary to binary-tree
  1.2286 +      case Op_CMoveF:
  1.2287 +      case Op_CMoveI:
  1.2288 +      case Op_CMoveL:
  1.2289 +      case Op_CMoveN:
  1.2290 +      case Op_CMoveP: {
  1.2291 +        // Restructure into a binary tree for Matching.  It's possible that
  1.2292 +        // we could move this code up next to the graph reshaping for IfNodes
  1.2293 +        // or vice-versa, but I do not want to debug this for Ladybird.
  1.2294 +        // 10/2/2000 CNC.
  1.2295 +        Node *pair1 = new (C) BinaryNode(n->in(1),n->in(1)->in(1));
  1.2296 +        n->set_req(1,pair1);
  1.2297 +        Node *pair2 = new (C) BinaryNode(n->in(2),n->in(3));
  1.2298 +        n->set_req(2,pair2);
  1.2299 +        n->del_req(3);
  1.2300 +        break;
  1.2301 +      }
  1.2302 +      case Op_LoopLimit: {
  1.2303 +        Node *pair1 = new (C) BinaryNode(n->in(1),n->in(2));
  1.2304 +        n->set_req(1,pair1);
  1.2305 +        n->set_req(2,n->in(3));
  1.2306 +        n->del_req(3);
  1.2307 +        break;
  1.2308 +      }
  1.2309 +      case Op_StrEquals: {
  1.2310 +        Node *pair1 = new (C) BinaryNode(n->in(2),n->in(3));
  1.2311 +        n->set_req(2,pair1);
  1.2312 +        n->set_req(3,n->in(4));
  1.2313 +        n->del_req(4);
  1.2314 +        break;
  1.2315 +      }
  1.2316 +      case Op_StrComp:
  1.2317 +      case Op_StrIndexOf: {
  1.2318 +        Node *pair1 = new (C) BinaryNode(n->in(2),n->in(3));
  1.2319 +        n->set_req(2,pair1);
  1.2320 +        Node *pair2 = new (C) BinaryNode(n->in(4),n->in(5));
  1.2321 +        n->set_req(3,pair2);
  1.2322 +        n->del_req(5);
  1.2323 +        n->del_req(4);
  1.2324 +        break;
  1.2325 +      }
  1.2326 +      case Op_EncodeISOArray: {
  1.2327 +        // Restructure into a binary tree for Matching.
  1.2328 +        Node* pair = new (C) BinaryNode(n->in(3), n->in(4));
  1.2329 +        n->set_req(3, pair);
  1.2330 +        n->del_req(4);
  1.2331 +        break;
  1.2332 +      }
  1.2333 +      default:
  1.2334 +        break;
  1.2335 +      }
  1.2336 +    }
  1.2337 +    else {
  1.2338 +      ShouldNotReachHere();
  1.2339 +    }
  1.2340 +  } // end of while (mstack.is_nonempty())
  1.2341 +}
  1.2342 +
  1.2343 +#ifdef ASSERT
  1.2344 +// machine-independent root to machine-dependent root
  1.2345 +void Matcher::dump_old2new_map() {
  1.2346 +  _old2new_map.dump();
  1.2347 +}
  1.2348 +#endif
  1.2349 +
  1.2350 +//---------------------------collect_null_checks-------------------------------
  1.2351 +// Find null checks in the ideal graph; write a machine-specific node for
  1.2352 +// it.  Used by later implicit-null-check handling.  Actually collects
  1.2353 +// either an IfTrue or IfFalse for the common NOT-null path, AND the ideal
  1.2354 +// value being tested.
  1.2355 +void Matcher::collect_null_checks( Node *proj, Node *orig_proj ) {
  1.2356 +  Node *iff = proj->in(0);
  1.2357 +  if( iff->Opcode() == Op_If ) {
  1.2358 +    // During matching If's have Bool & Cmp side-by-side
  1.2359 +    BoolNode *b = iff->in(1)->as_Bool();
  1.2360 +    Node *cmp = iff->in(2);
  1.2361 +    int opc = cmp->Opcode();
  1.2362 +    if (opc != Op_CmpP && opc != Op_CmpN) return;
  1.2363 +
  1.2364 +    const Type* ct = cmp->in(2)->bottom_type();
  1.2365 +    if (ct == TypePtr::NULL_PTR ||
  1.2366 +        (opc == Op_CmpN && ct == TypeNarrowOop::NULL_PTR)) {
  1.2367 +
  1.2368 +      bool push_it = false;
  1.2369 +      if( proj->Opcode() == Op_IfTrue ) {
  1.2370 +        extern int all_null_checks_found;
  1.2371 +        all_null_checks_found++;
  1.2372 +        if( b->_test._test == BoolTest::ne ) {
  1.2373 +          push_it = true;
  1.2374 +        }
  1.2375 +      } else {
  1.2376 +        assert( proj->Opcode() == Op_IfFalse, "" );
  1.2377 +        if( b->_test._test == BoolTest::eq ) {
  1.2378 +          push_it = true;
  1.2379 +        }
  1.2380 +      }
  1.2381 +      if( push_it ) {
  1.2382 +        _null_check_tests.push(proj);
  1.2383 +        Node* val = cmp->in(1);
  1.2384 +#ifdef _LP64
  1.2385 +        if (val->bottom_type()->isa_narrowoop() &&
  1.2386 +            !Matcher::narrow_oop_use_complex_address()) {
  1.2387 +          //
  1.2388 +          // Look for DecodeN node which should be pinned to orig_proj.
  1.2389 +          // On platforms (Sparc) which can not handle 2 adds
  1.2390 +          // in addressing mode we have to keep a DecodeN node and
  1.2391 +          // use it to do implicit NULL check in address.
  1.2392 +          //
  1.2393 +          // DecodeN node was pinned to non-null path (orig_proj) during
  1.2394 +          // CastPP transformation in final_graph_reshaping_impl().
  1.2395 +          //
  1.2396 +          uint cnt = orig_proj->outcnt();
  1.2397 +          for (uint i = 0; i < orig_proj->outcnt(); i++) {
  1.2398 +            Node* d = orig_proj->raw_out(i);
  1.2399 +            if (d->is_DecodeN() && d->in(1) == val) {
  1.2400 +              val = d;
  1.2401 +              val->set_req(0, NULL); // Unpin now.
  1.2402 +              // Mark this as special case to distinguish from
  1.2403 +              // a regular case: CmpP(DecodeN, NULL).
  1.2404 +              val = (Node*)(((intptr_t)val) | 1);
  1.2405 +              break;
  1.2406 +            }
  1.2407 +          }
  1.2408 +        }
  1.2409 +#endif
  1.2410 +        _null_check_tests.push(val);
  1.2411 +      }
  1.2412 +    }
  1.2413 +  }
  1.2414 +}
  1.2415 +
  1.2416 +//---------------------------validate_null_checks------------------------------
  1.2417 +// Its possible that the value being NULL checked is not the root of a match
  1.2418 +// tree.  If so, I cannot use the value in an implicit null check.
  1.2419 +void Matcher::validate_null_checks( ) {
  1.2420 +  uint cnt = _null_check_tests.size();
  1.2421 +  for( uint i=0; i < cnt; i+=2 ) {
  1.2422 +    Node *test = _null_check_tests[i];
  1.2423 +    Node *val = _null_check_tests[i+1];
  1.2424 +    bool is_decoden = ((intptr_t)val) & 1;
  1.2425 +    val = (Node*)(((intptr_t)val) & ~1);
  1.2426 +    if (has_new_node(val)) {
  1.2427 +      Node* new_val = new_node(val);
  1.2428 +      if (is_decoden) {
  1.2429 +        assert(val->is_DecodeNarrowPtr() && val->in(0) == NULL, "sanity");
  1.2430 +        // Note: new_val may have a control edge if
  1.2431 +        // the original ideal node DecodeN was matched before
  1.2432 +        // it was unpinned in Matcher::collect_null_checks().
  1.2433 +        // Unpin the mach node and mark it.
  1.2434 +        new_val->set_req(0, NULL);
  1.2435 +        new_val = (Node*)(((intptr_t)new_val) | 1);
  1.2436 +      }
  1.2437 +      // Is a match-tree root, so replace with the matched value
  1.2438 +      _null_check_tests.map(i+1, new_val);
  1.2439 +    } else {
  1.2440 +      // Yank from candidate list
  1.2441 +      _null_check_tests.map(i+1,_null_check_tests[--cnt]);
  1.2442 +      _null_check_tests.map(i,_null_check_tests[--cnt]);
  1.2443 +      _null_check_tests.pop();
  1.2444 +      _null_check_tests.pop();
  1.2445 +      i-=2;
  1.2446 +    }
  1.2447 +  }
  1.2448 +}
  1.2449 +
  1.2450 +// Used by the DFA in dfa_xxx.cpp.  Check for a following barrier or
  1.2451 +// atomic instruction acting as a store_load barrier without any
  1.2452 +// intervening volatile load, and thus we don't need a barrier here.
  1.2453 +// We retain the Node to act as a compiler ordering barrier.
  1.2454 +bool Matcher::post_store_load_barrier(const Node* vmb) {
  1.2455 +  Compile* C = Compile::current();
  1.2456 +  assert(vmb->is_MemBar(), "");
  1.2457 +  assert(vmb->Opcode() != Op_MemBarAcquire && vmb->Opcode() != Op_LoadFence, "");
  1.2458 +  const MemBarNode* membar = vmb->as_MemBar();
  1.2459 +
  1.2460 +  // Get the Ideal Proj node, ctrl, that can be used to iterate forward
  1.2461 +  Node* ctrl = NULL;
  1.2462 +  for (DUIterator_Fast imax, i = membar->fast_outs(imax); i < imax; i++) {
  1.2463 +    Node* p = membar->fast_out(i);
  1.2464 +    assert(p->is_Proj(), "only projections here");
  1.2465 +    if ((p->as_Proj()->_con == TypeFunc::Control) &&
  1.2466 +        !C->node_arena()->contains(p)) { // Unmatched old-space only
  1.2467 +      ctrl = p;
  1.2468 +      break;
  1.2469 +    }
  1.2470 +  }
  1.2471 +  assert((ctrl != NULL), "missing control projection");
  1.2472 +
  1.2473 +  for (DUIterator_Fast jmax, j = ctrl->fast_outs(jmax); j < jmax; j++) {
  1.2474 +    Node *x = ctrl->fast_out(j);
  1.2475 +    int xop = x->Opcode();
  1.2476 +
  1.2477 +    // We don't need current barrier if we see another or a lock
  1.2478 +    // before seeing volatile load.
  1.2479 +    //
  1.2480 +    // Op_Fastunlock previously appeared in the Op_* list below.
  1.2481 +    // With the advent of 1-0 lock operations we're no longer guaranteed
  1.2482 +    // that a monitor exit operation contains a serializing instruction.
  1.2483 +
  1.2484 +    if (xop == Op_MemBarVolatile ||
  1.2485 +        xop == Op_CompareAndSwapL ||
  1.2486 +        xop == Op_CompareAndSwapP ||
  1.2487 +        xop == Op_CompareAndSwapN ||
  1.2488 +        xop == Op_CompareAndSwapI) {
  1.2489 +      return true;
  1.2490 +    }
  1.2491 +
  1.2492 +    // Op_FastLock previously appeared in the Op_* list above.
  1.2493 +    // With biased locking we're no longer guaranteed that a monitor
  1.2494 +    // enter operation contains a serializing instruction.
  1.2495 +    if ((xop == Op_FastLock) && !UseBiasedLocking) {
  1.2496 +      return true;
  1.2497 +    }
  1.2498 +
  1.2499 +    if (x->is_MemBar()) {
  1.2500 +      // We must retain this membar if there is an upcoming volatile
  1.2501 +      // load, which will be followed by acquire membar.
  1.2502 +      if (xop == Op_MemBarAcquire || xop == Op_LoadFence) {
  1.2503 +        return false;
  1.2504 +      } else {
  1.2505 +        // For other kinds of barriers, check by pretending we
  1.2506 +        // are them, and seeing if we can be removed.
  1.2507 +        return post_store_load_barrier(x->as_MemBar());
  1.2508 +      }
  1.2509 +    }
  1.2510 +
  1.2511 +    // probably not necessary to check for these
  1.2512 +    if (x->is_Call() || x->is_SafePoint() || x->is_block_proj()) {
  1.2513 +      return false;
  1.2514 +    }
  1.2515 +  }
  1.2516 +  return false;
  1.2517 +}
  1.2518 +
  1.2519 +// Check whether node n is a branch to an uncommon trap that we could
  1.2520 +// optimize as test with very high branch costs in case of going to
  1.2521 +// the uncommon trap. The code must be able to be recompiled to use
  1.2522 +// a cheaper test.
  1.2523 +bool Matcher::branches_to_uncommon_trap(const Node *n) {
  1.2524 +  // Don't do it for natives, adapters, or runtime stubs
  1.2525 +  Compile *C = Compile::current();
  1.2526 +  if (!C->is_method_compilation()) return false;
  1.2527 +
  1.2528 +  assert(n->is_If(), "You should only call this on if nodes.");
  1.2529 +  IfNode *ifn = n->as_If();
  1.2530 +
  1.2531 +  Node *ifFalse = NULL;
  1.2532 +  for (DUIterator_Fast imax, i = ifn->fast_outs(imax); i < imax; i++) {
  1.2533 +    if (ifn->fast_out(i)->is_IfFalse()) {
  1.2534 +      ifFalse = ifn->fast_out(i);
  1.2535 +      break;
  1.2536 +    }
  1.2537 +  }
  1.2538 +  assert(ifFalse, "An If should have an ifFalse. Graph is broken.");
  1.2539 +
  1.2540 +  Node *reg = ifFalse;
  1.2541 +  int cnt = 4; // We must protect against cycles.  Limit to 4 iterations.
  1.2542 +               // Alternatively use visited set?  Seems too expensive.
  1.2543 +  while (reg != NULL && cnt > 0) {
  1.2544 +    CallNode *call = NULL;
  1.2545 +    RegionNode *nxt_reg = NULL;
  1.2546 +    for (DUIterator_Fast imax, i = reg->fast_outs(imax); i < imax; i++) {
  1.2547 +      Node *o = reg->fast_out(i);
  1.2548 +      if (o->is_Call()) {
  1.2549 +        call = o->as_Call();
  1.2550 +      }
  1.2551 +      if (o->is_Region()) {
  1.2552 +        nxt_reg = o->as_Region();
  1.2553 +      }
  1.2554 +    }
  1.2555 +
  1.2556 +    if (call &&
  1.2557 +        call->entry_point() == SharedRuntime::uncommon_trap_blob()->entry_point()) {
  1.2558 +      const Type* trtype = call->in(TypeFunc::Parms)->bottom_type();
  1.2559 +      if (trtype->isa_int() && trtype->is_int()->is_con()) {
  1.2560 +        jint tr_con = trtype->is_int()->get_con();
  1.2561 +        Deoptimization::DeoptReason reason = Deoptimization::trap_request_reason(tr_con);
  1.2562 +        Deoptimization::DeoptAction action = Deoptimization::trap_request_action(tr_con);
  1.2563 +        assert((int)reason < (int)BitsPerInt, "recode bit map");
  1.2564 +
  1.2565 +        if (is_set_nth_bit(C->allowed_deopt_reasons(), (int)reason)
  1.2566 +            && action != Deoptimization::Action_none) {
  1.2567 +          // This uncommon trap is sure to recompile, eventually.
  1.2568 +          // When that happens, C->too_many_traps will prevent
  1.2569 +          // this transformation from happening again.
  1.2570 +          return true;
  1.2571 +        }
  1.2572 +      }
  1.2573 +    }
  1.2574 +
  1.2575 +    reg = nxt_reg;
  1.2576 +    cnt--;
  1.2577 +  }
  1.2578 +
  1.2579 +  return false;
  1.2580 +}
  1.2581 +
  1.2582 +//=============================================================================
  1.2583 +//---------------------------State---------------------------------------------
  1.2584 +State::State(void) {
  1.2585 +#ifdef ASSERT
  1.2586 +  _id = 0;
  1.2587 +  _kids[0] = _kids[1] = (State*)(intptr_t) CONST64(0xcafebabecafebabe);
  1.2588 +  _leaf = (Node*)(intptr_t) CONST64(0xbaadf00dbaadf00d);
  1.2589 +  //memset(_cost, -1, sizeof(_cost));
  1.2590 +  //memset(_rule, -1, sizeof(_rule));
  1.2591 +#endif
  1.2592 +  memset(_valid, 0, sizeof(_valid));
  1.2593 +}
  1.2594 +
  1.2595 +#ifdef ASSERT
  1.2596 +State::~State() {
  1.2597 +  _id = 99;
  1.2598 +  _kids[0] = _kids[1] = (State*)(intptr_t) CONST64(0xcafebabecafebabe);
  1.2599 +  _leaf = (Node*)(intptr_t) CONST64(0xbaadf00dbaadf00d);
  1.2600 +  memset(_cost, -3, sizeof(_cost));
  1.2601 +  memset(_rule, -3, sizeof(_rule));
  1.2602 +}
  1.2603 +#endif
  1.2604 +
  1.2605 +#ifndef PRODUCT
  1.2606 +//---------------------------dump----------------------------------------------
  1.2607 +void State::dump() {
  1.2608 +  tty->print("\n");
  1.2609 +  dump(0);
  1.2610 +}
  1.2611 +
  1.2612 +void State::dump(int depth) {
  1.2613 +  for( int j = 0; j < depth; j++ )
  1.2614 +    tty->print("   ");
  1.2615 +  tty->print("--N: ");
  1.2616 +  _leaf->dump();
  1.2617 +  uint i;
  1.2618 +  for( i = 0; i < _LAST_MACH_OPER; i++ )
  1.2619 +    // Check for valid entry
  1.2620 +    if( valid(i) ) {
  1.2621 +      for( int j = 0; j < depth; j++ )
  1.2622 +        tty->print("   ");
  1.2623 +        assert(_cost[i] != max_juint, "cost must be a valid value");
  1.2624 +        assert(_rule[i] < _last_Mach_Node, "rule[i] must be valid rule");
  1.2625 +        tty->print_cr("%s  %d  %s",
  1.2626 +                      ruleName[i], _cost[i], ruleName[_rule[i]] );
  1.2627 +      }
  1.2628 +  tty->cr();
  1.2629 +
  1.2630 +  for( i=0; i<2; i++ )
  1.2631 +    if( _kids[i] )
  1.2632 +      _kids[i]->dump(depth+1);
  1.2633 +}
  1.2634 +#endif

mercurial