src/cpu/ppc/vm/ppc.ad

changeset 6660
63c5920a038d
parent 6522
3a55cf1e3c9f
child 6723
0bf37f737702
     1.1 --- a/src/cpu/ppc/vm/ppc.ad	Thu May 08 11:05:02 2014 +0200
     1.2 +++ b/src/cpu/ppc/vm/ppc.ad	Fri May 02 14:53:06 2014 +0200
     1.3 @@ -898,7 +898,7 @@
     1.4    // To keep related declarations/definitions/uses close together,
     1.5    // we switch between source %{ }% and source_hpp %{ }% freely as needed.
     1.6  
     1.7 -  // Returns true if Node n is followed by a MemBar node that 
     1.8 +  // Returns true if Node n is followed by a MemBar node that
     1.9    // will do an acquire. If so, this node must not do the acquire
    1.10    // operation.
    1.11    bool followed_by_acquire(const Node *n);
    1.12 @@ -908,7 +908,7 @@
    1.13  
    1.14  // Optimize load-acquire.
    1.15  //
    1.16 -// Check if acquire is unnecessary due to following operation that does 
    1.17 +// Check if acquire is unnecessary due to following operation that does
    1.18  // acquire anyways.
    1.19  // Walk the pattern:
    1.20  //
    1.21 @@ -919,12 +919,12 @@
    1.22  //  Proj(ctrl)  Proj(mem)
    1.23  //       |         |
    1.24  //   MemBarRelease/Volatile
    1.25 -// 
    1.26 +//
    1.27  bool followed_by_acquire(const Node *load) {
    1.28    assert(load->is_Load(), "So far implemented only for loads.");
    1.29  
    1.30    // Find MemBarAcquire.
    1.31 -  const Node *mba = NULL;         
    1.32 +  const Node *mba = NULL;
    1.33    for (DUIterator_Fast imax, i = load->fast_outs(imax); i < imax; i++) {
    1.34      const Node *out = load->fast_out(i);
    1.35      if (out->Opcode() == Op_MemBarAcquire) {
    1.36 @@ -937,7 +937,7 @@
    1.37  
    1.38    // Find following MemBar node.
    1.39    //
    1.40 -  // The following node must be reachable by control AND memory 
    1.41 +  // The following node must be reachable by control AND memory
    1.42    // edge to assure no other operations are in between the two nodes.
    1.43    //
    1.44    // So first get the Proj node, mem_proj, to use it to iterate forward.
    1.45 @@ -1135,6 +1135,7 @@
    1.46  
    1.47   public:
    1.48  
    1.49 +  // Emit call stub, compiled java to interpreter.
    1.50    static void emit_trampoline_stub(MacroAssembler &_masm, int destination_toc_offset, int insts_call_instruction_offset);
    1.51  
    1.52    // Size of call trampoline stub.
    1.53 @@ -2752,7 +2753,7 @@
    1.54        // inputs for new nodes
    1.55        m1->add_req(NULL, n_toc);
    1.56        m2->add_req(NULL, m1);
    1.57 -      
    1.58 +
    1.59        // operands for new nodes
    1.60        m1->_opnds[0] = new (C) iRegPdstOper(); // dst
    1.61        m1->_opnds[1] = op_src;                 // src
    1.62 @@ -2760,29 +2761,29 @@
    1.63        m2->_opnds[0] = new (C) iRegPdstOper(); // dst
    1.64        m2->_opnds[1] = op_src;                 // src
    1.65        m2->_opnds[2] = new (C) iRegLdstOper(); // base
    1.66 -      
    1.67 +
    1.68        // Initialize ins_attrib TOC fields.
    1.69        m1->_const_toc_offset = -1;
    1.70        m2->_const_toc_offset_hi_node = m1;
    1.71 -      
    1.72 +
    1.73        // Register allocation for new nodes.
    1.74        ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
    1.75        ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
    1.76 -      
    1.77 +
    1.78        nodes->push(m1);
    1.79        nodes->push(m2);
    1.80        assert(m2->bottom_type()->isa_ptr(), "must be ptr");
    1.81      } else {
    1.82        loadConPNode *m2 = new (C) loadConPNode();
    1.83 -      
    1.84 +
    1.85        // inputs for new nodes
    1.86        m2->add_req(NULL, n_toc);
    1.87 -      
    1.88 +
    1.89        // operands for new nodes
    1.90        m2->_opnds[0] = new (C) iRegPdstOper(); // dst
    1.91        m2->_opnds[1] = op_src;                 // src
    1.92        m2->_opnds[2] = new (C) iRegPdstOper(); // toc
    1.93 -      
    1.94 +
    1.95        // Register allocation for new nodes.
    1.96        ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
    1.97  
    1.98 @@ -2974,17 +2975,17 @@
    1.99        n_sub_base->_opnds[1] = op_crx;
   1.100        n_sub_base->_opnds[2] = op_src;
   1.101        n_sub_base->_bottom_type = _bottom_type;
   1.102 -   
   1.103 +
   1.104        n_shift->add_req(n_region, n_sub_base);
   1.105        n_shift->_opnds[0] = op_dst;
   1.106        n_shift->_opnds[1] = op_dst;
   1.107        n_shift->_bottom_type = _bottom_type;
   1.108 -   
   1.109 +
   1.110        ra_->set_pair(n_shift->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
   1.111        ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx));
   1.112        ra_->set_pair(n_sub_base->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
   1.113        ra_->set_pair(n_move->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
   1.114 -   
   1.115 +
   1.116        nodes->push(n_move);
   1.117        nodes->push(n_compare);
   1.118        nodes->push(n_sub_base);
   1.119 @@ -3061,20 +3062,20 @@
   1.120      } else {
   1.121        // before Power 7
   1.122        cond_add_baseNode *n_add_base = new (C) cond_add_baseNode();
   1.123 -     
   1.124 +
   1.125        n_add_base->add_req(n_region, n_compare, n_shift);
   1.126        n_add_base->_opnds[0] = op_dst;
   1.127        n_add_base->_opnds[1] = op_crx;
   1.128        n_add_base->_opnds[2] = op_dst;
   1.129        n_add_base->_bottom_type = _bottom_type;
   1.130 -     
   1.131 +
   1.132        assert(ra_->is_oop(this) == true, "A decodeN node must produce an oop!");
   1.133        ra_->set_oop(n_add_base, true);
   1.134 -     
   1.135 +
   1.136        ra_->set_pair(n_shift->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
   1.137        ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx));
   1.138        ra_->set_pair(n_add_base->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
   1.139 -     
   1.140 +
   1.141        nodes->push(n_compare);
   1.142        nodes->push(n_shift);
   1.143        nodes->push(n_add_base);
   1.144 @@ -3631,11 +3632,11 @@
   1.145      // Req...
   1.146      for (uint i = 0; i < req(); ++i) {
   1.147        // The expanded node does not need toc any more.
   1.148 -      // Add the inline cache constant here instead.  This expresses the 
   1.149 +      // Add the inline cache constant here instead. This expresses the
   1.150        // register of the inline cache must be live at the call.
   1.151        // Else we would have to adapt JVMState by -1.
   1.152        if (i == mach_constant_base_node_input()) {
   1.153 -        call->add_req(loadConLNodes_IC._last);        
   1.154 +        call->add_req(loadConLNodes_IC._last);
   1.155        } else {
   1.156          call->add_req(in(i));
   1.157        }
   1.158 @@ -3663,6 +3664,8 @@
   1.159    %}
   1.160  
   1.161    // Compound version of call dynamic
   1.162 +  // Toc is only passed so that it can be used in ins_encode statement.
   1.163 +  // In the code we have to use $constanttablebase.
   1.164    enc_class enc_java_dynamic_call(method meth, iRegLdst toc) %{
   1.165      // TODO: PPC port $archOpcode(ppc64Opcode_compound);
   1.166      MacroAssembler _masm(&cbuf);
   1.167 @@ -3670,14 +3673,17 @@
   1.168  
   1.169      Register Rtoc = (ra_) ? $constanttablebase : R2_TOC;
   1.170  #if 0
   1.171 +    int vtable_index = this->_vtable_index;
   1.172      if (_vtable_index < 0) {
   1.173        // Must be invalid_vtable_index, not nonvirtual_vtable_index.
   1.174        assert(_vtable_index == Method::invalid_vtable_index, "correct sentinel value");
   1.175        Register ic_reg = as_Register(Matcher::inline_cache_reg_encode());
   1.176 -      AddressLiteral meta = __ allocate_metadata_address((Metadata *)Universe::non_oop_word());
   1.177 -
   1.178 +
   1.179 +      // Virtual call relocation will point to ic load.
   1.180        address virtual_call_meta_addr = __ pc();
   1.181 -      __ load_const_from_method_toc(ic_reg, meta, Rtoc);
   1.182 +      // Load a clear inline cache.
   1.183 +      AddressLiteral empty_ic((address) Universe::non_oop_word());
   1.184 +      __ load_const_from_method_toc(ic_reg, empty_ic, Rtoc);
   1.185        // CALL to fixup routine.  Fixup routine uses ScopeDesc info
   1.186        // to determine who we intended to call.
   1.187        __ relocate(virtual_call_Relocation::spec(virtual_call_meta_addr));
   1.188 @@ -3710,7 +3716,6 @@
   1.189               "Fix constant in ret_addr_offset()");
   1.190      }
   1.191  #endif
   1.192 -    guarantee(0, "Fix handling of toc edge: messes up derived/base pairs.");
   1.193      Unimplemented();  // ret_addr_offset not yet fixed. Depends on compressed oops (load klass!).
   1.194    %}
   1.195  
   1.196 @@ -5436,7 +5441,7 @@
   1.197    ins_pipe(pipe_class_memory);
   1.198  %}
   1.199  
   1.200 -// Match loading integer and casting it to unsigned int in 
   1.201 +// Match loading integer and casting it to unsigned int in
   1.202  // long register.
   1.203  // LoadI + ConvI2L + AndL 0xffffffff.
   1.204  instruct loadUI2L(iRegLdst dst, memory mem, immL_32bits mask) %{
   1.205 @@ -6078,7 +6083,7 @@
   1.206    ins_pipe(pipe_class_default);
   1.207  %}
   1.208  
   1.209 -// This needs a match rule so that build_oop_map knows this is 
   1.210 +// This needs a match rule so that build_oop_map knows this is
   1.211  // not a narrow oop.
   1.212  instruct loadConNKlass_lo(iRegNdst dst, immNKlass_NM src1, iRegNsrc src2) %{
   1.213    match(Set dst src1);
   1.214 @@ -6702,7 +6707,7 @@
   1.215    size(4);
   1.216    ins_encode %{
   1.217      // This is a Power7 instruction for which no machine description exists.
   1.218 -    // TODO: PPC port $archOpcode(ppc64Opcode_compound); 
   1.219 +    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
   1.220      __ isel_0($dst$$Register, $crx$$CondRegister, Assembler::equal, $src1$$Register);
   1.221    %}
   1.222    ins_pipe(pipe_class_default);
   1.223 @@ -6847,7 +6852,7 @@
   1.224    size(4);
   1.225    ins_encode %{
   1.226      // This is a Power7 instruction for which no machine description exists.
   1.227 -    // TODO: PPC port $archOpcode(ppc64Opcode_compound); 
   1.228 +    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
   1.229      __ isel_0($dst$$Register, $crx$$CondRegister, Assembler::equal, $src1$$Register);
   1.230    %}
   1.231    ins_pipe(pipe_class_default);
   1.232 @@ -7064,7 +7069,7 @@
   1.233      n1->_bottom_type = _bottom_type;
   1.234  
   1.235      decodeNKlass_shiftNode *n2 = new (C) decodeNKlass_shiftNode();
   1.236 -    n2->add_req(n_region, n2);
   1.237 +    n2->add_req(n_region, n1);
   1.238      n2->_opnds[0] = op_dst;
   1.239      n2->_opnds[1] = op_dst;
   1.240      n2->_bottom_type = _bottom_type;
   1.241 @@ -7199,7 +7204,7 @@
   1.242  //  inline_unsafe_load_store).
   1.243  //
   1.244  // Add this node again if we found a good solution for inline_unsafe_load_store().
   1.245 -// Don't forget to look at the implementation of post_store_load_barrier again, 
   1.246 +// Don't forget to look at the implementation of post_store_load_barrier again,
   1.247  // we did other fixes in that method.
   1.248  //instruct unnecessary_membar_volatile() %{
   1.249  //  match(MemBarVolatile);
   1.250 @@ -7237,7 +7242,7 @@
   1.251      // exists. Anyways, the scheduler should be off on Power7.
   1.252      // TODO: PPC port $archOpcode(ppc64Opcode_compound);
   1.253      int cc        = $cmp$$cmpcode;
   1.254 -    __ isel($dst$$Register, $crx$$CondRegister, 
   1.255 +    __ isel($dst$$Register, $crx$$CondRegister,
   1.256              (Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register);
   1.257    %}
   1.258    ins_pipe(pipe_class_default);
   1.259 @@ -7283,7 +7288,7 @@
   1.260      // exists. Anyways, the scheduler should be off on Power7.
   1.261      // TODO: PPC port $archOpcode(ppc64Opcode_compound);
   1.262      int cc        = $cmp$$cmpcode;
   1.263 -    __ isel($dst$$Register, $crx$$CondRegister, 
   1.264 +    __ isel($dst$$Register, $crx$$CondRegister,
   1.265              (Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register);
   1.266    %}
   1.267    ins_pipe(pipe_class_default);
   1.268 @@ -7329,7 +7334,7 @@
   1.269      // exists. Anyways, the scheduler should be off on Power7.
   1.270      // TODO: PPC port $archOpcode(ppc64Opcode_compound);
   1.271      int cc        = $cmp$$cmpcode;
   1.272 -    __ isel($dst$$Register, $crx$$CondRegister, 
   1.273 +    __ isel($dst$$Register, $crx$$CondRegister,
   1.274              (Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register);
   1.275    %}
   1.276    ins_pipe(pipe_class_default);
   1.277 @@ -7376,7 +7381,7 @@
   1.278      // exists. Anyways, the scheduler should be off on Power7.
   1.279      // TODO: PPC port $archOpcode(ppc64Opcode_compound);
   1.280      int cc        = $cmp$$cmpcode;
   1.281 -    __ isel($dst$$Register, $crx$$CondRegister, 
   1.282 +    __ isel($dst$$Register, $crx$$CondRegister,
   1.283              (Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register);
   1.284    %}
   1.285    ins_pipe(pipe_class_default);
   1.286 @@ -7522,8 +7527,8 @@
   1.287    ins_encode %{
   1.288      // TODO: PPC port $archOpcode(ppc64Opcode_compound);
   1.289      // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
   1.290 -    __ cmpxchgw(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, 
   1.291 -                MacroAssembler::MemBarFenceAfter, MacroAssembler::cmpxchgx_hint_atomic_update(), 
   1.292 +    __ cmpxchgw(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
   1.293 +                MacroAssembler::MemBarFenceAfter, MacroAssembler::cmpxchgx_hint_atomic_update(),
   1.294                  $res$$Register, true);
   1.295    %}
   1.296    ins_pipe(pipe_class_default);
   1.297 @@ -7929,7 +7934,23 @@
   1.298  
   1.299  // Turn the sign-bit of a long into a 64-bit mask, 0x0...0 for
   1.300  // positive longs and 0xF...F for negative ones.
   1.301 -instruct signmask64I_regI(iRegIdst dst, iRegIsrc src) %{
   1.302 +instruct signmask64I_regL(iRegIdst dst, iRegLsrc src) %{
   1.303 +  // no match-rule, false predicate
   1.304 +  effect(DEF dst, USE src);
   1.305 +  predicate(false);
   1.306 +
   1.307 +  format %{ "SRADI   $dst, $src, #63" %}
   1.308 +  size(4);
   1.309 +  ins_encode %{
   1.310 +    // TODO: PPC port $archOpcode(ppc64Opcode_sradi);
   1.311 +    __ sradi($dst$$Register, $src$$Register, 0x3f);
   1.312 +  %}
   1.313 +  ins_pipe(pipe_class_default);
   1.314 +%}
   1.315 +
   1.316 +// Turn the sign-bit of a long into a 64-bit mask, 0x0...0 for
   1.317 +// positive longs and 0xF...F for negative ones.
   1.318 +instruct signmask64L_regL(iRegLdst dst, iRegLsrc src) %{
   1.319    // no match-rule, false predicate
   1.320    effect(DEF dst, USE src);
   1.321    predicate(false);
   1.322 @@ -8893,7 +8914,7 @@
   1.323    size(4);
   1.324    ins_encode %{
   1.325      // TODO: PPC port $archOpcode(ppc64Opcode_rlwinm);
   1.326 -    __ rlwinm($dst$$Register, $src1$$Register, 0, 
   1.327 +    __ rlwinm($dst$$Register, $src1$$Register, 0,
   1.328                (31-log2_long((jlong) $src2$$constant)) & 0x1f, (31-log2_long((jlong) $src2$$constant)) & 0x1f);
   1.329    %}
   1.330    ins_pipe(pipe_class_default);
   1.331 @@ -9619,14 +9640,14 @@
   1.332    ins_cost(DEFAULT_COST*4);
   1.333  
   1.334    expand %{
   1.335 -    iRegIdst src1s;
   1.336 -    iRegIdst src2s;
   1.337 -    iRegIdst diff;
   1.338 -    sxtI_reg(src1s, src1); // ensure proper sign extention
   1.339 -    sxtI_reg(src2s, src2); // ensure proper sign extention
   1.340 -    subI_reg_reg(diff, src1s, src2s);
   1.341 +    iRegLdst src1s;
   1.342 +    iRegLdst src2s;
   1.343 +    iRegLdst diff;
   1.344 +    convI2L_reg(src1s, src1); // Ensure proper sign extension.
   1.345 +    convI2L_reg(src2s, src2); // Ensure proper sign extension.
   1.346 +    subL_reg_reg(diff, src1s, src2s);
   1.347      // Need to consider >=33 bit result, therefore we need signmaskL.
   1.348 -    signmask64I_regI(dst, diff);
   1.349 +    signmask64I_regL(dst, diff);
   1.350    %}
   1.351  %}
   1.352  
   1.353 @@ -10863,7 +10884,7 @@
   1.354    format %{ "PartialSubtypeCheck $result = ($subklass instanceOf $superklass) tmp: $tmp_klass, $tmp_arrayptr" %}
   1.355    ins_encode %{
   1.356      // TODO: PPC port $archOpcode(ppc64Opcode_compound);
   1.357 -    __ check_klass_subtype_slow_path($subklass$$Register, $superklass$$Register, $tmp_arrayptr$$Register, 
   1.358 +    __ check_klass_subtype_slow_path($subklass$$Register, $superklass$$Register, $tmp_arrayptr$$Register,
   1.359                                       $tmp_klass$$Register, NULL, $result$$Register);
   1.360    %}
   1.361    ins_pipe(pipe_class_default);
   1.362 @@ -11178,18 +11199,18 @@
   1.363    ins_cost(DEFAULT_COST*6);
   1.364  
   1.365    expand %{
   1.366 -    iRegIdst src1s;
   1.367 -    iRegIdst src2s;
   1.368 -    iRegIdst diff;
   1.369 -    iRegIdst sm;
   1.370 -    iRegIdst doz; // difference or zero
   1.371 -    sxtI_reg(src1s, src1); // Ensure proper sign extention.
   1.372 -    sxtI_reg(src2s, src2); // Ensure proper sign extention.
   1.373 -    subI_reg_reg(diff, src2s, src1s);
   1.374 +    iRegLdst src1s;
   1.375 +    iRegLdst src2s;
   1.376 +    iRegLdst diff;
   1.377 +    iRegLdst sm;
   1.378 +    iRegLdst doz; // difference or zero
   1.379 +    convI2L_reg(src1s, src1); // Ensure proper sign extension.
   1.380 +    convI2L_reg(src2s, src2); // Ensure proper sign extension.
   1.381 +    subL_reg_reg(diff, src2s, src1s);
   1.382      // Need to consider >=33 bit result, therefore we need signmaskL.
   1.383 -    signmask64I_regI(sm, diff);
   1.384 -    andI_reg_reg(doz, diff, sm); // <=0
   1.385 -    addI_reg_reg(dst, doz, src1s);
   1.386 +    signmask64L_regL(sm, diff);
   1.387 +    andL_reg_reg(doz, diff, sm); // <=0
   1.388 +    addI_regL_regL(dst, doz, src1s);
   1.389    %}
   1.390  %}
   1.391  
   1.392 @@ -11198,19 +11219,18 @@
   1.393    ins_cost(DEFAULT_COST*6);
   1.394  
   1.395    expand %{
   1.396 -    immI_minus1 m1 %{ -1 %}
   1.397 -    iRegIdst src1s;
   1.398 -    iRegIdst src2s;
   1.399 -    iRegIdst diff;
   1.400 -    iRegIdst sm;
   1.401 -    iRegIdst doz; // difference or zero
   1.402 -    sxtI_reg(src1s, src1); // Ensure proper sign extention.
   1.403 -    sxtI_reg(src2s, src2); // Ensure proper sign extention.
   1.404 -    subI_reg_reg(diff, src2s, src1s);
   1.405 +    iRegLdst src1s;
   1.406 +    iRegLdst src2s;
   1.407 +    iRegLdst diff;
   1.408 +    iRegLdst sm;
   1.409 +    iRegLdst doz; // difference or zero
   1.410 +    convI2L_reg(src1s, src1); // Ensure proper sign extension.
   1.411 +    convI2L_reg(src2s, src2); // Ensure proper sign extension.
   1.412 +    subL_reg_reg(diff, src2s, src1s);
   1.413      // Need to consider >=33 bit result, therefore we need signmaskL.
   1.414 -    signmask64I_regI(sm, diff);
   1.415 -    andcI_reg_reg(doz, sm, m1, diff); // >=0
   1.416 -    addI_reg_reg(dst, doz, src1s);
   1.417 +    signmask64L_regL(sm, diff);
   1.418 +    andcL_reg_reg(doz, diff, sm); // >=0
   1.419 +    addI_regL_regL(dst, doz, src1s);
   1.420    %}
   1.421  %}
   1.422  

mercurial