1.1 --- a/src/cpu/ppc/vm/ppc.ad Thu May 08 11:05:02 2014 +0200 1.2 +++ b/src/cpu/ppc/vm/ppc.ad Fri May 02 14:53:06 2014 +0200 1.3 @@ -898,7 +898,7 @@ 1.4 // To keep related declarations/definitions/uses close together, 1.5 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1.6 1.7 - // Returns true if Node n is followed by a MemBar node that 1.8 + // Returns true if Node n is followed by a MemBar node that 1.9 // will do an acquire. If so, this node must not do the acquire 1.10 // operation. 1.11 bool followed_by_acquire(const Node *n); 1.12 @@ -908,7 +908,7 @@ 1.13 1.14 // Optimize load-acquire. 1.15 // 1.16 -// Check if acquire is unnecessary due to following operation that does 1.17 +// Check if acquire is unnecessary due to following operation that does 1.18 // acquire anyways. 1.19 // Walk the pattern: 1.20 // 1.21 @@ -919,12 +919,12 @@ 1.22 // Proj(ctrl) Proj(mem) 1.23 // | | 1.24 // MemBarRelease/Volatile 1.25 -// 1.26 +// 1.27 bool followed_by_acquire(const Node *load) { 1.28 assert(load->is_Load(), "So far implemented only for loads."); 1.29 1.30 // Find MemBarAcquire. 1.31 - const Node *mba = NULL; 1.32 + const Node *mba = NULL; 1.33 for (DUIterator_Fast imax, i = load->fast_outs(imax); i < imax; i++) { 1.34 const Node *out = load->fast_out(i); 1.35 if (out->Opcode() == Op_MemBarAcquire) { 1.36 @@ -937,7 +937,7 @@ 1.37 1.38 // Find following MemBar node. 1.39 // 1.40 - // The following node must be reachable by control AND memory 1.41 + // The following node must be reachable by control AND memory 1.42 // edge to assure no other operations are in between the two nodes. 1.43 // 1.44 // So first get the Proj node, mem_proj, to use it to iterate forward. 1.45 @@ -1135,6 +1135,7 @@ 1.46 1.47 public: 1.48 1.49 + // Emit call stub, compiled java to interpreter. 1.50 static void emit_trampoline_stub(MacroAssembler &_masm, int destination_toc_offset, int insts_call_instruction_offset); 1.51 1.52 // Size of call trampoline stub. 1.53 @@ -2752,7 +2753,7 @@ 1.54 // inputs for new nodes 1.55 m1->add_req(NULL, n_toc); 1.56 m2->add_req(NULL, m1); 1.57 - 1.58 + 1.59 // operands for new nodes 1.60 m1->_opnds[0] = new (C) iRegPdstOper(); // dst 1.61 m1->_opnds[1] = op_src; // src 1.62 @@ -2760,29 +2761,29 @@ 1.63 m2->_opnds[0] = new (C) iRegPdstOper(); // dst 1.64 m2->_opnds[1] = op_src; // src 1.65 m2->_opnds[2] = new (C) iRegLdstOper(); // base 1.66 - 1.67 + 1.68 // Initialize ins_attrib TOC fields. 1.69 m1->_const_toc_offset = -1; 1.70 m2->_const_toc_offset_hi_node = m1; 1.71 - 1.72 + 1.73 // Register allocation for new nodes. 1.74 ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); 1.75 ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); 1.76 - 1.77 + 1.78 nodes->push(m1); 1.79 nodes->push(m2); 1.80 assert(m2->bottom_type()->isa_ptr(), "must be ptr"); 1.81 } else { 1.82 loadConPNode *m2 = new (C) loadConPNode(); 1.83 - 1.84 + 1.85 // inputs for new nodes 1.86 m2->add_req(NULL, n_toc); 1.87 - 1.88 + 1.89 // operands for new nodes 1.90 m2->_opnds[0] = new (C) iRegPdstOper(); // dst 1.91 m2->_opnds[1] = op_src; // src 1.92 m2->_opnds[2] = new (C) iRegPdstOper(); // toc 1.93 - 1.94 + 1.95 // Register allocation for new nodes. 1.96 ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); 1.97 1.98 @@ -2974,17 +2975,17 @@ 1.99 n_sub_base->_opnds[1] = op_crx; 1.100 n_sub_base->_opnds[2] = op_src; 1.101 n_sub_base->_bottom_type = _bottom_type; 1.102 - 1.103 + 1.104 n_shift->add_req(n_region, n_sub_base); 1.105 n_shift->_opnds[0] = op_dst; 1.106 n_shift->_opnds[1] = op_dst; 1.107 n_shift->_bottom_type = _bottom_type; 1.108 - 1.109 + 1.110 ra_->set_pair(n_shift->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); 1.111 ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx)); 1.112 ra_->set_pair(n_sub_base->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); 1.113 ra_->set_pair(n_move->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); 1.114 - 1.115 + 1.116 nodes->push(n_move); 1.117 nodes->push(n_compare); 1.118 nodes->push(n_sub_base); 1.119 @@ -3061,20 +3062,20 @@ 1.120 } else { 1.121 // before Power 7 1.122 cond_add_baseNode *n_add_base = new (C) cond_add_baseNode(); 1.123 - 1.124 + 1.125 n_add_base->add_req(n_region, n_compare, n_shift); 1.126 n_add_base->_opnds[0] = op_dst; 1.127 n_add_base->_opnds[1] = op_crx; 1.128 n_add_base->_opnds[2] = op_dst; 1.129 n_add_base->_bottom_type = _bottom_type; 1.130 - 1.131 + 1.132 assert(ra_->is_oop(this) == true, "A decodeN node must produce an oop!"); 1.133 ra_->set_oop(n_add_base, true); 1.134 - 1.135 + 1.136 ra_->set_pair(n_shift->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); 1.137 ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx)); 1.138 ra_->set_pair(n_add_base->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); 1.139 - 1.140 + 1.141 nodes->push(n_compare); 1.142 nodes->push(n_shift); 1.143 nodes->push(n_add_base); 1.144 @@ -3631,11 +3632,11 @@ 1.145 // Req... 1.146 for (uint i = 0; i < req(); ++i) { 1.147 // The expanded node does not need toc any more. 1.148 - // Add the inline cache constant here instead. This expresses the 1.149 + // Add the inline cache constant here instead. This expresses the 1.150 // register of the inline cache must be live at the call. 1.151 // Else we would have to adapt JVMState by -1. 1.152 if (i == mach_constant_base_node_input()) { 1.153 - call->add_req(loadConLNodes_IC._last); 1.154 + call->add_req(loadConLNodes_IC._last); 1.155 } else { 1.156 call->add_req(in(i)); 1.157 } 1.158 @@ -3663,6 +3664,8 @@ 1.159 %} 1.160 1.161 // Compound version of call dynamic 1.162 + // Toc is only passed so that it can be used in ins_encode statement. 1.163 + // In the code we have to use $constanttablebase. 1.164 enc_class enc_java_dynamic_call(method meth, iRegLdst toc) %{ 1.165 // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.166 MacroAssembler _masm(&cbuf); 1.167 @@ -3670,14 +3673,17 @@ 1.168 1.169 Register Rtoc = (ra_) ? $constanttablebase : R2_TOC; 1.170 #if 0 1.171 + int vtable_index = this->_vtable_index; 1.172 if (_vtable_index < 0) { 1.173 // Must be invalid_vtable_index, not nonvirtual_vtable_index. 1.174 assert(_vtable_index == Method::invalid_vtable_index, "correct sentinel value"); 1.175 Register ic_reg = as_Register(Matcher::inline_cache_reg_encode()); 1.176 - AddressLiteral meta = __ allocate_metadata_address((Metadata *)Universe::non_oop_word()); 1.177 - 1.178 + 1.179 + // Virtual call relocation will point to ic load. 1.180 address virtual_call_meta_addr = __ pc(); 1.181 - __ load_const_from_method_toc(ic_reg, meta, Rtoc); 1.182 + // Load a clear inline cache. 1.183 + AddressLiteral empty_ic((address) Universe::non_oop_word()); 1.184 + __ load_const_from_method_toc(ic_reg, empty_ic, Rtoc); 1.185 // CALL to fixup routine. Fixup routine uses ScopeDesc info 1.186 // to determine who we intended to call. 1.187 __ relocate(virtual_call_Relocation::spec(virtual_call_meta_addr)); 1.188 @@ -3710,7 +3716,6 @@ 1.189 "Fix constant in ret_addr_offset()"); 1.190 } 1.191 #endif 1.192 - guarantee(0, "Fix handling of toc edge: messes up derived/base pairs."); 1.193 Unimplemented(); // ret_addr_offset not yet fixed. Depends on compressed oops (load klass!). 1.194 %} 1.195 1.196 @@ -5436,7 +5441,7 @@ 1.197 ins_pipe(pipe_class_memory); 1.198 %} 1.199 1.200 -// Match loading integer and casting it to unsigned int in 1.201 +// Match loading integer and casting it to unsigned int in 1.202 // long register. 1.203 // LoadI + ConvI2L + AndL 0xffffffff. 1.204 instruct loadUI2L(iRegLdst dst, memory mem, immL_32bits mask) %{ 1.205 @@ -6078,7 +6083,7 @@ 1.206 ins_pipe(pipe_class_default); 1.207 %} 1.208 1.209 -// This needs a match rule so that build_oop_map knows this is 1.210 +// This needs a match rule so that build_oop_map knows this is 1.211 // not a narrow oop. 1.212 instruct loadConNKlass_lo(iRegNdst dst, immNKlass_NM src1, iRegNsrc src2) %{ 1.213 match(Set dst src1); 1.214 @@ -6702,7 +6707,7 @@ 1.215 size(4); 1.216 ins_encode %{ 1.217 // This is a Power7 instruction for which no machine description exists. 1.218 - // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.219 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.220 __ isel_0($dst$$Register, $crx$$CondRegister, Assembler::equal, $src1$$Register); 1.221 %} 1.222 ins_pipe(pipe_class_default); 1.223 @@ -6847,7 +6852,7 @@ 1.224 size(4); 1.225 ins_encode %{ 1.226 // This is a Power7 instruction for which no machine description exists. 1.227 - // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.228 + // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.229 __ isel_0($dst$$Register, $crx$$CondRegister, Assembler::equal, $src1$$Register); 1.230 %} 1.231 ins_pipe(pipe_class_default); 1.232 @@ -7064,7 +7069,7 @@ 1.233 n1->_bottom_type = _bottom_type; 1.234 1.235 decodeNKlass_shiftNode *n2 = new (C) decodeNKlass_shiftNode(); 1.236 - n2->add_req(n_region, n2); 1.237 + n2->add_req(n_region, n1); 1.238 n2->_opnds[0] = op_dst; 1.239 n2->_opnds[1] = op_dst; 1.240 n2->_bottom_type = _bottom_type; 1.241 @@ -7199,7 +7204,7 @@ 1.242 // inline_unsafe_load_store). 1.243 // 1.244 // Add this node again if we found a good solution for inline_unsafe_load_store(). 1.245 -// Don't forget to look at the implementation of post_store_load_barrier again, 1.246 +// Don't forget to look at the implementation of post_store_load_barrier again, 1.247 // we did other fixes in that method. 1.248 //instruct unnecessary_membar_volatile() %{ 1.249 // match(MemBarVolatile); 1.250 @@ -7237,7 +7242,7 @@ 1.251 // exists. Anyways, the scheduler should be off on Power7. 1.252 // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.253 int cc = $cmp$$cmpcode; 1.254 - __ isel($dst$$Register, $crx$$CondRegister, 1.255 + __ isel($dst$$Register, $crx$$CondRegister, 1.256 (Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register); 1.257 %} 1.258 ins_pipe(pipe_class_default); 1.259 @@ -7283,7 +7288,7 @@ 1.260 // exists. Anyways, the scheduler should be off on Power7. 1.261 // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.262 int cc = $cmp$$cmpcode; 1.263 - __ isel($dst$$Register, $crx$$CondRegister, 1.264 + __ isel($dst$$Register, $crx$$CondRegister, 1.265 (Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register); 1.266 %} 1.267 ins_pipe(pipe_class_default); 1.268 @@ -7329,7 +7334,7 @@ 1.269 // exists. Anyways, the scheduler should be off on Power7. 1.270 // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.271 int cc = $cmp$$cmpcode; 1.272 - __ isel($dst$$Register, $crx$$CondRegister, 1.273 + __ isel($dst$$Register, $crx$$CondRegister, 1.274 (Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register); 1.275 %} 1.276 ins_pipe(pipe_class_default); 1.277 @@ -7376,7 +7381,7 @@ 1.278 // exists. Anyways, the scheduler should be off on Power7. 1.279 // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.280 int cc = $cmp$$cmpcode; 1.281 - __ isel($dst$$Register, $crx$$CondRegister, 1.282 + __ isel($dst$$Register, $crx$$CondRegister, 1.283 (Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register); 1.284 %} 1.285 ins_pipe(pipe_class_default); 1.286 @@ -7522,8 +7527,8 @@ 1.287 ins_encode %{ 1.288 // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.289 // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. 1.290 - __ cmpxchgw(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, 1.291 - MacroAssembler::MemBarFenceAfter, MacroAssembler::cmpxchgx_hint_atomic_update(), 1.292 + __ cmpxchgw(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, 1.293 + MacroAssembler::MemBarFenceAfter, MacroAssembler::cmpxchgx_hint_atomic_update(), 1.294 $res$$Register, true); 1.295 %} 1.296 ins_pipe(pipe_class_default); 1.297 @@ -7929,7 +7934,23 @@ 1.298 1.299 // Turn the sign-bit of a long into a 64-bit mask, 0x0...0 for 1.300 // positive longs and 0xF...F for negative ones. 1.301 -instruct signmask64I_regI(iRegIdst dst, iRegIsrc src) %{ 1.302 +instruct signmask64I_regL(iRegIdst dst, iRegLsrc src) %{ 1.303 + // no match-rule, false predicate 1.304 + effect(DEF dst, USE src); 1.305 + predicate(false); 1.306 + 1.307 + format %{ "SRADI $dst, $src, #63" %} 1.308 + size(4); 1.309 + ins_encode %{ 1.310 + // TODO: PPC port $archOpcode(ppc64Opcode_sradi); 1.311 + __ sradi($dst$$Register, $src$$Register, 0x3f); 1.312 + %} 1.313 + ins_pipe(pipe_class_default); 1.314 +%} 1.315 + 1.316 +// Turn the sign-bit of a long into a 64-bit mask, 0x0...0 for 1.317 +// positive longs and 0xF...F for negative ones. 1.318 +instruct signmask64L_regL(iRegLdst dst, iRegLsrc src) %{ 1.319 // no match-rule, false predicate 1.320 effect(DEF dst, USE src); 1.321 predicate(false); 1.322 @@ -8893,7 +8914,7 @@ 1.323 size(4); 1.324 ins_encode %{ 1.325 // TODO: PPC port $archOpcode(ppc64Opcode_rlwinm); 1.326 - __ rlwinm($dst$$Register, $src1$$Register, 0, 1.327 + __ rlwinm($dst$$Register, $src1$$Register, 0, 1.328 (31-log2_long((jlong) $src2$$constant)) & 0x1f, (31-log2_long((jlong) $src2$$constant)) & 0x1f); 1.329 %} 1.330 ins_pipe(pipe_class_default); 1.331 @@ -9619,14 +9640,14 @@ 1.332 ins_cost(DEFAULT_COST*4); 1.333 1.334 expand %{ 1.335 - iRegIdst src1s; 1.336 - iRegIdst src2s; 1.337 - iRegIdst diff; 1.338 - sxtI_reg(src1s, src1); // ensure proper sign extention 1.339 - sxtI_reg(src2s, src2); // ensure proper sign extention 1.340 - subI_reg_reg(diff, src1s, src2s); 1.341 + iRegLdst src1s; 1.342 + iRegLdst src2s; 1.343 + iRegLdst diff; 1.344 + convI2L_reg(src1s, src1); // Ensure proper sign extension. 1.345 + convI2L_reg(src2s, src2); // Ensure proper sign extension. 1.346 + subL_reg_reg(diff, src1s, src2s); 1.347 // Need to consider >=33 bit result, therefore we need signmaskL. 1.348 - signmask64I_regI(dst, diff); 1.349 + signmask64I_regL(dst, diff); 1.350 %} 1.351 %} 1.352 1.353 @@ -10863,7 +10884,7 @@ 1.354 format %{ "PartialSubtypeCheck $result = ($subklass instanceOf $superklass) tmp: $tmp_klass, $tmp_arrayptr" %} 1.355 ins_encode %{ 1.356 // TODO: PPC port $archOpcode(ppc64Opcode_compound); 1.357 - __ check_klass_subtype_slow_path($subklass$$Register, $superklass$$Register, $tmp_arrayptr$$Register, 1.358 + __ check_klass_subtype_slow_path($subklass$$Register, $superklass$$Register, $tmp_arrayptr$$Register, 1.359 $tmp_klass$$Register, NULL, $result$$Register); 1.360 %} 1.361 ins_pipe(pipe_class_default); 1.362 @@ -11178,18 +11199,18 @@ 1.363 ins_cost(DEFAULT_COST*6); 1.364 1.365 expand %{ 1.366 - iRegIdst src1s; 1.367 - iRegIdst src2s; 1.368 - iRegIdst diff; 1.369 - iRegIdst sm; 1.370 - iRegIdst doz; // difference or zero 1.371 - sxtI_reg(src1s, src1); // Ensure proper sign extention. 1.372 - sxtI_reg(src2s, src2); // Ensure proper sign extention. 1.373 - subI_reg_reg(diff, src2s, src1s); 1.374 + iRegLdst src1s; 1.375 + iRegLdst src2s; 1.376 + iRegLdst diff; 1.377 + iRegLdst sm; 1.378 + iRegLdst doz; // difference or zero 1.379 + convI2L_reg(src1s, src1); // Ensure proper sign extension. 1.380 + convI2L_reg(src2s, src2); // Ensure proper sign extension. 1.381 + subL_reg_reg(diff, src2s, src1s); 1.382 // Need to consider >=33 bit result, therefore we need signmaskL. 1.383 - signmask64I_regI(sm, diff); 1.384 - andI_reg_reg(doz, diff, sm); // <=0 1.385 - addI_reg_reg(dst, doz, src1s); 1.386 + signmask64L_regL(sm, diff); 1.387 + andL_reg_reg(doz, diff, sm); // <=0 1.388 + addI_regL_regL(dst, doz, src1s); 1.389 %} 1.390 %} 1.391 1.392 @@ -11198,19 +11219,18 @@ 1.393 ins_cost(DEFAULT_COST*6); 1.394 1.395 expand %{ 1.396 - immI_minus1 m1 %{ -1 %} 1.397 - iRegIdst src1s; 1.398 - iRegIdst src2s; 1.399 - iRegIdst diff; 1.400 - iRegIdst sm; 1.401 - iRegIdst doz; // difference or zero 1.402 - sxtI_reg(src1s, src1); // Ensure proper sign extention. 1.403 - sxtI_reg(src2s, src2); // Ensure proper sign extention. 1.404 - subI_reg_reg(diff, src2s, src1s); 1.405 + iRegLdst src1s; 1.406 + iRegLdst src2s; 1.407 + iRegLdst diff; 1.408 + iRegLdst sm; 1.409 + iRegLdst doz; // difference or zero 1.410 + convI2L_reg(src1s, src1); // Ensure proper sign extension. 1.411 + convI2L_reg(src2s, src2); // Ensure proper sign extension. 1.412 + subL_reg_reg(diff, src2s, src1s); 1.413 // Need to consider >=33 bit result, therefore we need signmaskL. 1.414 - signmask64I_regI(sm, diff); 1.415 - andcI_reg_reg(doz, sm, m1, diff); // >=0 1.416 - addI_reg_reg(dst, doz, src1s); 1.417 + signmask64L_regL(sm, diff); 1.418 + andcL_reg_reg(doz, diff, sm); // >=0 1.419 + addI_regL_regL(dst, doz, src1s); 1.420 %} 1.421 %} 1.422