1.1 --- a/src/cpu/x86/vm/x86_64.ad Wed Mar 18 11:37:48 2009 -0400 1.2 +++ b/src/cpu/x86/vm/x86_64.ad Thu Mar 19 09:13:24 2009 -0700 1.3 @@ -326,7 +326,6 @@ 1.4 R9, R9_H, 1.5 R10, R10_H, 1.6 R11, R11_H, 1.7 - R12, R12_H, 1.8 R13, R13_H, 1.9 R14, R14_H); 1.10 1.11 @@ -340,7 +339,6 @@ 1.12 R9, R9_H, 1.13 R10, R10_H, 1.14 R11, R11_H, 1.15 - R12, R12_H, 1.16 R13, R13_H, 1.17 R14, R14_H); 1.18 1.19 @@ -354,7 +352,6 @@ 1.20 R9, R9_H, 1.21 R10, R10_H, 1.22 R11, R11_H, 1.23 - R12, R12_H, 1.24 R13, R13_H, 1.25 R14, R14_H); 1.26 1.27 @@ -444,9 +441,6 @@ 1.28 // Singleton class for RDX long register 1.29 reg_class long_rdx_reg(RDX, RDX_H); 1.30 1.31 -// Singleton class for R12 long register 1.32 -reg_class long_r12_reg(R12, R12_H); 1.33 - 1.34 // Class for all int registers (except RSP) 1.35 reg_class int_reg(RAX, 1.36 RDX, 1.37 @@ -1842,7 +1836,9 @@ 1.38 { 1.39 if (UseCompressedOops) { 1.40 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t", oopDesc::klass_offset_in_bytes()); 1.41 - st->print_cr("leaq rscratch1, [r12_heapbase, r, Address::times_8, 0]"); 1.42 + if (Universe::narrow_oop_shift() != 0) { 1.43 + st->print_cr("leaq rscratch1, [r12_heapbase, r, Address::times_8, 0]"); 1.44 + } 1.45 st->print_cr("cmpq rax, rscratch1\t # Inline cache check"); 1.46 } else { 1.47 st->print_cr("cmpq rax, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t" 1.48 @@ -1891,7 +1887,11 @@ 1.49 uint MachUEPNode::size(PhaseRegAlloc* ra_) const 1.50 { 1.51 if (UseCompressedOops) { 1.52 - return OptoBreakpoint ? 19 : 20; 1.53 + if (Universe::narrow_oop_shift() == 0) { 1.54 + return OptoBreakpoint ? 15 : 16; 1.55 + } else { 1.56 + return OptoBreakpoint ? 19 : 20; 1.57 + } 1.58 } else { 1.59 return OptoBreakpoint ? 11 : 12; 1.60 } 1.61 @@ -2575,45 +2575,13 @@ 1.62 Register Rrax = as_Register(RAX_enc); // super class 1.63 Register Rrcx = as_Register(RCX_enc); // killed 1.64 Register Rrsi = as_Register(RSI_enc); // sub class 1.65 - Label hit, miss, cmiss; 1.66 + Label miss; 1.67 + const bool set_cond_codes = true; 1.68 1.69 MacroAssembler _masm(&cbuf); 1.70 - // Compare super with sub directly, since super is not in its own SSA. 1.71 - // The compiler used to emit this test, but we fold it in here, 1.72 - // to allow platform-specific tweaking on sparc. 1.73 - __ cmpptr(Rrax, Rrsi); 1.74 - __ jcc(Assembler::equal, hit); 1.75 -#ifndef PRODUCT 1.76 - __ lea(Rrcx, ExternalAddress((address)&SharedRuntime::_partial_subtype_ctr)); 1.77 - __ incrementl(Address(Rrcx, 0)); 1.78 -#endif //PRODUCT 1.79 - __ movptr(Rrdi, Address(Rrsi, 1.80 - sizeof(oopDesc) + 1.81 - Klass::secondary_supers_offset_in_bytes())); 1.82 - __ movl(Rrcx, Address(Rrdi, arrayOopDesc::length_offset_in_bytes())); 1.83 - __ addptr(Rrdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); 1.84 - if (UseCompressedOops) { 1.85 - __ encode_heap_oop(Rrax); 1.86 - __ repne_scanl(); 1.87 - __ jcc(Assembler::notEqual, cmiss); 1.88 - __ decode_heap_oop(Rrax); 1.89 - __ movptr(Address(Rrsi, 1.90 - sizeof(oopDesc) + 1.91 - Klass::secondary_super_cache_offset_in_bytes()), 1.92 - Rrax); 1.93 - __ jmp(hit); 1.94 - __ bind(cmiss); 1.95 - __ decode_heap_oop(Rrax); 1.96 - __ jmp(miss); 1.97 - } else { 1.98 - __ repne_scan(); 1.99 - __ jcc(Assembler::notEqual, miss); 1.100 - __ movptr(Address(Rrsi, 1.101 - sizeof(oopDesc) + 1.102 - Klass::secondary_super_cache_offset_in_bytes()), 1.103 - Rrax); 1.104 - } 1.105 - __ bind(hit); 1.106 + __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi, 1.107 + NULL, &miss, 1.108 + /*set_cond_codes:*/ true); 1.109 if ($primary) { 1.110 __ xorptr(Rrdi, Rrdi); 1.111 } 1.112 @@ -4906,15 +4874,6 @@ 1.113 interface(REG_INTER); 1.114 %} 1.115 1.116 - 1.117 -operand r12RegL() %{ 1.118 - constraint(ALLOC_IN_RC(long_r12_reg)); 1.119 - match(RegL); 1.120 - 1.121 - format %{ %} 1.122 - interface(REG_INTER); 1.123 -%} 1.124 - 1.125 operand rRegN() %{ 1.126 constraint(ALLOC_IN_RC(int_reg)); 1.127 match(RegN); 1.128 @@ -5289,21 +5248,6 @@ 1.129 %} 1.130 %} 1.131 1.132 -// Indirect Narrow Oop Plus Offset Operand 1.133 -operand indNarrowOopOffset(rRegN src, immL32 off) %{ 1.134 - constraint(ALLOC_IN_RC(ptr_reg)); 1.135 - match(AddP (DecodeN src) off); 1.136 - 1.137 - op_cost(10); 1.138 - format %{"[R12 + $src << 3 + $off] (compressed oop addressing)" %} 1.139 - interface(MEMORY_INTER) %{ 1.140 - base(0xc); // R12 1.141 - index($src); 1.142 - scale(0x3); 1.143 - disp($off); 1.144 - %} 1.145 -%} 1.146 - 1.147 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand 1.148 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale) 1.149 %{ 1.150 @@ -5321,6 +5265,158 @@ 1.151 %} 1.152 %} 1.153 1.154 +// Indirect Narrow Oop Plus Offset Operand 1.155 +// Note: x86 architecture doesn't support "scale * index + offset" without a base 1.156 +// we can't free r12 even with Universe::narrow_oop_base() == NULL. 1.157 +operand indCompressedOopOffset(rRegN reg, immL32 off) %{ 1.158 + predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0)); 1.159 + constraint(ALLOC_IN_RC(ptr_reg)); 1.160 + match(AddP (DecodeN reg) off); 1.161 + 1.162 + op_cost(10); 1.163 + format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %} 1.164 + interface(MEMORY_INTER) %{ 1.165 + base(0xc); // R12 1.166 + index($reg); 1.167 + scale(0x3); 1.168 + disp($off); 1.169 + %} 1.170 +%} 1.171 + 1.172 +// Indirect Memory Operand 1.173 +operand indirectNarrow(rRegN reg) 1.174 +%{ 1.175 + predicate(Universe::narrow_oop_shift() == 0); 1.176 + constraint(ALLOC_IN_RC(ptr_reg)); 1.177 + match(DecodeN reg); 1.178 + 1.179 + format %{ "[$reg]" %} 1.180 + interface(MEMORY_INTER) %{ 1.181 + base($reg); 1.182 + index(0x4); 1.183 + scale(0x0); 1.184 + disp(0x0); 1.185 + %} 1.186 +%} 1.187 + 1.188 +// Indirect Memory Plus Short Offset Operand 1.189 +operand indOffset8Narrow(rRegN reg, immL8 off) 1.190 +%{ 1.191 + predicate(Universe::narrow_oop_shift() == 0); 1.192 + constraint(ALLOC_IN_RC(ptr_reg)); 1.193 + match(AddP (DecodeN reg) off); 1.194 + 1.195 + format %{ "[$reg + $off (8-bit)]" %} 1.196 + interface(MEMORY_INTER) %{ 1.197 + base($reg); 1.198 + index(0x4); 1.199 + scale(0x0); 1.200 + disp($off); 1.201 + %} 1.202 +%} 1.203 + 1.204 +// Indirect Memory Plus Long Offset Operand 1.205 +operand indOffset32Narrow(rRegN reg, immL32 off) 1.206 +%{ 1.207 + predicate(Universe::narrow_oop_shift() == 0); 1.208 + constraint(ALLOC_IN_RC(ptr_reg)); 1.209 + match(AddP (DecodeN reg) off); 1.210 + 1.211 + format %{ "[$reg + $off (32-bit)]" %} 1.212 + interface(MEMORY_INTER) %{ 1.213 + base($reg); 1.214 + index(0x4); 1.215 + scale(0x0); 1.216 + disp($off); 1.217 + %} 1.218 +%} 1.219 + 1.220 +// Indirect Memory Plus Index Register Plus Offset Operand 1.221 +operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off) 1.222 +%{ 1.223 + predicate(Universe::narrow_oop_shift() == 0); 1.224 + constraint(ALLOC_IN_RC(ptr_reg)); 1.225 + match(AddP (AddP (DecodeN reg) lreg) off); 1.226 + 1.227 + op_cost(10); 1.228 + format %{"[$reg + $off + $lreg]" %} 1.229 + interface(MEMORY_INTER) %{ 1.230 + base($reg); 1.231 + index($lreg); 1.232 + scale(0x0); 1.233 + disp($off); 1.234 + %} 1.235 +%} 1.236 + 1.237 +// Indirect Memory Plus Index Register Plus Offset Operand 1.238 +operand indIndexNarrow(rRegN reg, rRegL lreg) 1.239 +%{ 1.240 + predicate(Universe::narrow_oop_shift() == 0); 1.241 + constraint(ALLOC_IN_RC(ptr_reg)); 1.242 + match(AddP (DecodeN reg) lreg); 1.243 + 1.244 + op_cost(10); 1.245 + format %{"[$reg + $lreg]" %} 1.246 + interface(MEMORY_INTER) %{ 1.247 + base($reg); 1.248 + index($lreg); 1.249 + scale(0x0); 1.250 + disp(0x0); 1.251 + %} 1.252 +%} 1.253 + 1.254 +// Indirect Memory Times Scale Plus Index Register 1.255 +operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale) 1.256 +%{ 1.257 + predicate(Universe::narrow_oop_shift() == 0); 1.258 + constraint(ALLOC_IN_RC(ptr_reg)); 1.259 + match(AddP (DecodeN reg) (LShiftL lreg scale)); 1.260 + 1.261 + op_cost(10); 1.262 + format %{"[$reg + $lreg << $scale]" %} 1.263 + interface(MEMORY_INTER) %{ 1.264 + base($reg); 1.265 + index($lreg); 1.266 + scale($scale); 1.267 + disp(0x0); 1.268 + %} 1.269 +%} 1.270 + 1.271 +// Indirect Memory Times Scale Plus Index Register Plus Offset Operand 1.272 +operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale) 1.273 +%{ 1.274 + predicate(Universe::narrow_oop_shift() == 0); 1.275 + constraint(ALLOC_IN_RC(ptr_reg)); 1.276 + match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off); 1.277 + 1.278 + op_cost(10); 1.279 + format %{"[$reg + $off + $lreg << $scale]" %} 1.280 + interface(MEMORY_INTER) %{ 1.281 + base($reg); 1.282 + index($lreg); 1.283 + scale($scale); 1.284 + disp($off); 1.285 + %} 1.286 +%} 1.287 + 1.288 +// Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand 1.289 +operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale) 1.290 +%{ 1.291 + constraint(ALLOC_IN_RC(ptr_reg)); 1.292 + predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0); 1.293 + match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off); 1.294 + 1.295 + op_cost(10); 1.296 + format %{"[$reg + $off + $idx << $scale]" %} 1.297 + interface(MEMORY_INTER) %{ 1.298 + base($reg); 1.299 + index($idx); 1.300 + scale($scale); 1.301 + disp($off); 1.302 + %} 1.303 +%} 1.304 + 1.305 + 1.306 //----------Special Memory Operands-------------------------------------------- 1.307 // Stack Slot Operand - This operand is used for loading and storing temporary 1.308 // values on the stack where a match requires a value to 1.309 @@ -5488,7 +5584,10 @@ 1.310 1.311 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex, 1.312 indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset, 1.313 - indNarrowOopOffset); 1.314 + indCompressedOopOffset, 1.315 + indirectNarrow, indOffset8Narrow, indOffset32Narrow, 1.316 + indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow, 1.317 + indIndexScaleOffsetNarrow, indPosIndexScaleOffsetNarrow); 1.318 1.319 //----------PIPELINE----------------------------------------------------------- 1.320 // Rules which define the behavior of the target architectures pipeline. 1.321 @@ -6234,9 +6333,7 @@ 1.322 ins_cost(125); // XXX 1.323 format %{ "movl $dst, $mem\t# compressed ptr" %} 1.324 ins_encode %{ 1.325 - Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp); 1.326 - Register dst = as_Register($dst$$reg); 1.327 - __ movl(dst, addr); 1.328 + __ movl($dst$$Register, $mem$$Address); 1.329 %} 1.330 ins_pipe(ialu_reg_mem); // XXX 1.331 %} 1.332 @@ -6262,9 +6359,7 @@ 1.333 ins_cost(125); // XXX 1.334 format %{ "movl $dst, $mem\t# compressed klass ptr" %} 1.335 ins_encode %{ 1.336 - Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp); 1.337 - Register dst = as_Register($dst$$reg); 1.338 - __ movl(dst, addr); 1.339 + __ movl($dst$$Register, $mem$$Address); 1.340 %} 1.341 ins_pipe(ialu_reg_mem); // XXX 1.342 %} 1.343 @@ -6418,6 +6513,102 @@ 1.344 ins_pipe(ialu_reg_reg_fat); 1.345 %} 1.346 1.347 +instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem) 1.348 +%{ 1.349 + match(Set dst mem); 1.350 + 1.351 + ins_cost(110); 1.352 + format %{ "leaq $dst, $mem\t# ptr posidxscaleoff" %} 1.353 + opcode(0x8D); 1.354 + ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem)); 1.355 + ins_pipe(ialu_reg_reg_fat); 1.356 +%} 1.357 + 1.358 +// Load Effective Address which uses Narrow (32-bits) oop 1.359 +instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem) 1.360 +%{ 1.361 + predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0)); 1.362 + match(Set dst mem); 1.363 + 1.364 + ins_cost(110); 1.365 + format %{ "leaq $dst, $mem\t# ptr compressedoopoff32" %} 1.366 + opcode(0x8D); 1.367 + ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem)); 1.368 + ins_pipe(ialu_reg_reg_fat); 1.369 +%} 1.370 + 1.371 +instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem) 1.372 +%{ 1.373 + predicate(Universe::narrow_oop_shift() == 0); 1.374 + match(Set dst mem); 1.375 + 1.376 + ins_cost(110); // XXX 1.377 + format %{ "leaq $dst, $mem\t# ptr off8narrow" %} 1.378 + opcode(0x8D); 1.379 + ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem)); 1.380 + ins_pipe(ialu_reg_reg_fat); 1.381 +%} 1.382 + 1.383 +instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem) 1.384 +%{ 1.385 + predicate(Universe::narrow_oop_shift() == 0); 1.386 + match(Set dst mem); 1.387 + 1.388 + ins_cost(110); 1.389 + format %{ "leaq $dst, $mem\t# ptr off32narrow" %} 1.390 + opcode(0x8D); 1.391 + ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem)); 1.392 + ins_pipe(ialu_reg_reg_fat); 1.393 +%} 1.394 + 1.395 +instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem) 1.396 +%{ 1.397 + predicate(Universe::narrow_oop_shift() == 0); 1.398 + match(Set dst mem); 1.399 + 1.400 + ins_cost(110); 1.401 + format %{ "leaq $dst, $mem\t# ptr idxoffnarrow" %} 1.402 + opcode(0x8D); 1.403 + ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem)); 1.404 + ins_pipe(ialu_reg_reg_fat); 1.405 +%} 1.406 + 1.407 +instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem) 1.408 +%{ 1.409 + predicate(Universe::narrow_oop_shift() == 0); 1.410 + match(Set dst mem); 1.411 + 1.412 + ins_cost(110); 1.413 + format %{ "leaq $dst, $mem\t# ptr idxscalenarrow" %} 1.414 + opcode(0x8D); 1.415 + ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem)); 1.416 + ins_pipe(ialu_reg_reg_fat); 1.417 +%} 1.418 + 1.419 +instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem) 1.420 +%{ 1.421 + predicate(Universe::narrow_oop_shift() == 0); 1.422 + match(Set dst mem); 1.423 + 1.424 + ins_cost(110); 1.425 + format %{ "leaq $dst, $mem\t# ptr idxscaleoffnarrow" %} 1.426 + opcode(0x8D); 1.427 + ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem)); 1.428 + ins_pipe(ialu_reg_reg_fat); 1.429 +%} 1.430 + 1.431 +instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem) 1.432 +%{ 1.433 + predicate(Universe::narrow_oop_shift() == 0); 1.434 + match(Set dst mem); 1.435 + 1.436 + ins_cost(110); 1.437 + format %{ "leaq $dst, $mem\t# ptr posidxscaleoffnarrow" %} 1.438 + opcode(0x8D); 1.439 + ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem)); 1.440 + ins_pipe(ialu_reg_reg_fat); 1.441 +%} 1.442 + 1.443 instruct loadConI(rRegI dst, immI src) 1.444 %{ 1.445 match(Set dst src); 1.446 @@ -6528,8 +6719,7 @@ 1.447 effect(KILL cr); 1.448 format %{ "xorq $dst, $src\t# compressed NULL ptr" %} 1.449 ins_encode %{ 1.450 - Register dst = $dst$$Register; 1.451 - __ xorq(dst, dst); 1.452 + __ xorq($dst$$Register, $dst$$Register); 1.453 %} 1.454 ins_pipe(ialu_reg); 1.455 %} 1.456 @@ -6541,11 +6731,10 @@ 1.457 format %{ "movl $dst, $src\t# compressed ptr" %} 1.458 ins_encode %{ 1.459 address con = (address)$src$$constant; 1.460 - Register dst = $dst$$Register; 1.461 if (con == NULL) { 1.462 ShouldNotReachHere(); 1.463 } else { 1.464 - __ set_narrow_oop(dst, (jobject)$src$$constant); 1.465 + __ set_narrow_oop($dst$$Register, (jobject)$src$$constant); 1.466 } 1.467 %} 1.468 ins_pipe(ialu_reg_fat); // XXX 1.469 @@ -6794,12 +6983,25 @@ 1.470 ins_pipe(ialu_mem_reg); 1.471 %} 1.472 1.473 +instruct storeImmP0(memory mem, immP0 zero) 1.474 +%{ 1.475 + predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL)); 1.476 + match(Set mem (StoreP mem zero)); 1.477 + 1.478 + ins_cost(125); // XXX 1.479 + format %{ "movq $mem, R12\t# ptr (R12_heapbase==0)" %} 1.480 + ins_encode %{ 1.481 + __ movq($mem$$Address, r12); 1.482 + %} 1.483 + ins_pipe(ialu_mem_reg); 1.484 +%} 1.485 + 1.486 // Store NULL Pointer, mark word, or other simple pointer constant. 1.487 instruct storeImmP(memory mem, immP31 src) 1.488 %{ 1.489 match(Set mem (StoreP mem src)); 1.490 1.491 - ins_cost(125); // XXX 1.492 + ins_cost(150); // XXX 1.493 format %{ "movq $mem, $src\t# ptr" %} 1.494 opcode(0xC7); /* C7 /0 */ 1.495 ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src)); 1.496 @@ -6814,14 +7016,55 @@ 1.497 ins_cost(125); // XXX 1.498 format %{ "movl $mem, $src\t# compressed ptr" %} 1.499 ins_encode %{ 1.500 - Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp); 1.501 - Register src = as_Register($src$$reg); 1.502 - __ movl(addr, src); 1.503 + __ movl($mem$$Address, $src$$Register); 1.504 %} 1.505 ins_pipe(ialu_mem_reg); 1.506 %} 1.507 1.508 +instruct storeImmN0(memory mem, immN0 zero) 1.509 +%{ 1.510 + predicate(Universe::narrow_oop_base() == NULL); 1.511 + match(Set mem (StoreN mem zero)); 1.512 + 1.513 + ins_cost(125); // XXX 1.514 + format %{ "movl $mem, R12\t# compressed ptr (R12_heapbase==0)" %} 1.515 + ins_encode %{ 1.516 + __ movl($mem$$Address, r12); 1.517 + %} 1.518 + ins_pipe(ialu_mem_reg); 1.519 +%} 1.520 + 1.521 +instruct storeImmN(memory mem, immN src) 1.522 +%{ 1.523 + match(Set mem (StoreN mem src)); 1.524 + 1.525 + ins_cost(150); // XXX 1.526 + format %{ "movl $mem, $src\t# compressed ptr" %} 1.527 + ins_encode %{ 1.528 + address con = (address)$src$$constant; 1.529 + if (con == NULL) { 1.530 + __ movl($mem$$Address, (int32_t)0); 1.531 + } else { 1.532 + __ set_narrow_oop($mem$$Address, (jobject)$src$$constant); 1.533 + } 1.534 + %} 1.535 + ins_pipe(ialu_mem_imm); 1.536 +%} 1.537 + 1.538 // Store Integer Immediate 1.539 +instruct storeImmI0(memory mem, immI0 zero) 1.540 +%{ 1.541 + predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL)); 1.542 + match(Set mem (StoreI mem zero)); 1.543 + 1.544 + ins_cost(125); // XXX 1.545 + format %{ "movl $mem, R12\t# int (R12_heapbase==0)" %} 1.546 + ins_encode %{ 1.547 + __ movl($mem$$Address, r12); 1.548 + %} 1.549 + ins_pipe(ialu_mem_reg); 1.550 +%} 1.551 + 1.552 instruct storeImmI(memory mem, immI src) 1.553 %{ 1.554 match(Set mem (StoreI mem src)); 1.555 @@ -6834,6 +7077,19 @@ 1.556 %} 1.557 1.558 // Store Long Immediate 1.559 +instruct storeImmL0(memory mem, immL0 zero) 1.560 +%{ 1.561 + predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL)); 1.562 + match(Set mem (StoreL mem zero)); 1.563 + 1.564 + ins_cost(125); // XXX 1.565 + format %{ "movq $mem, R12\t# long (R12_heapbase==0)" %} 1.566 + ins_encode %{ 1.567 + __ movq($mem$$Address, r12); 1.568 + %} 1.569 + ins_pipe(ialu_mem_reg); 1.570 +%} 1.571 + 1.572 instruct storeImmL(memory mem, immL32 src) 1.573 %{ 1.574 match(Set mem (StoreL mem src)); 1.575 @@ -6846,6 +7102,19 @@ 1.576 %} 1.577 1.578 // Store Short/Char Immediate 1.579 +instruct storeImmC0(memory mem, immI0 zero) 1.580 +%{ 1.581 + predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL)); 1.582 + match(Set mem (StoreC mem zero)); 1.583 + 1.584 + ins_cost(125); // XXX 1.585 + format %{ "movw $mem, R12\t# short/char (R12_heapbase==0)" %} 1.586 + ins_encode %{ 1.587 + __ movw($mem$$Address, r12); 1.588 + %} 1.589 + ins_pipe(ialu_mem_reg); 1.590 +%} 1.591 + 1.592 instruct storeImmI16(memory mem, immI16 src) 1.593 %{ 1.594 predicate(UseStoreImmI16); 1.595 @@ -6859,6 +7128,19 @@ 1.596 %} 1.597 1.598 // Store Byte Immediate 1.599 +instruct storeImmB0(memory mem, immI0 zero) 1.600 +%{ 1.601 + predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL)); 1.602 + match(Set mem (StoreB mem zero)); 1.603 + 1.604 + ins_cost(125); // XXX 1.605 + format %{ "movb $mem, R12\t# short/char (R12_heapbase==0)" %} 1.606 + ins_encode %{ 1.607 + __ movb($mem$$Address, r12); 1.608 + %} 1.609 + ins_pipe(ialu_mem_reg); 1.610 +%} 1.611 + 1.612 instruct storeImmB(memory mem, immI8 src) 1.613 %{ 1.614 match(Set mem (StoreB mem src)); 1.615 @@ -6898,6 +7180,19 @@ 1.616 %} 1.617 1.618 // Store CMS card-mark Immediate 1.619 +instruct storeImmCM0_reg(memory mem, immI0 zero) 1.620 +%{ 1.621 + predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL)); 1.622 + match(Set mem (StoreCM mem zero)); 1.623 + 1.624 + ins_cost(125); // XXX 1.625 + format %{ "movb $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %} 1.626 + ins_encode %{ 1.627 + __ movb($mem$$Address, r12); 1.628 + %} 1.629 + ins_pipe(ialu_mem_reg); 1.630 +%} 1.631 + 1.632 instruct storeImmCM0(memory mem, immI0 src) 1.633 %{ 1.634 match(Set mem (StoreCM mem src)); 1.635 @@ -6931,6 +7226,19 @@ 1.636 %} 1.637 1.638 // Store immediate Float value (it is faster than store from XMM register) 1.639 +instruct storeF0(memory mem, immF0 zero) 1.640 +%{ 1.641 + predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL)); 1.642 + match(Set mem (StoreF mem zero)); 1.643 + 1.644 + ins_cost(25); // XXX 1.645 + format %{ "movl $mem, R12\t# float 0. (R12_heapbase==0)" %} 1.646 + ins_encode %{ 1.647 + __ movl($mem$$Address, r12); 1.648 + %} 1.649 + ins_pipe(ialu_mem_reg); 1.650 +%} 1.651 + 1.652 instruct storeF_imm(memory mem, immF src) 1.653 %{ 1.654 match(Set mem (StoreF mem src)); 1.655 @@ -6957,6 +7265,7 @@ 1.656 // Store immediate double 0.0 (it is faster than store from XMM register) 1.657 instruct storeD0_imm(memory mem, immD0 src) 1.658 %{ 1.659 + predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL)); 1.660 match(Set mem (StoreD mem src)); 1.661 1.662 ins_cost(50); 1.663 @@ -6966,6 +7275,19 @@ 1.664 ins_pipe(ialu_mem_imm); 1.665 %} 1.666 1.667 +instruct storeD0(memory mem, immD0 zero) 1.668 +%{ 1.669 + predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL)); 1.670 + match(Set mem (StoreD mem zero)); 1.671 + 1.672 + ins_cost(25); // XXX 1.673 + format %{ "movq $mem, R12\t# double 0. (R12_heapbase==0)" %} 1.674 + ins_encode %{ 1.675 + __ movq($mem$$Address, r12); 1.676 + %} 1.677 + ins_pipe(ialu_mem_reg); 1.678 +%} 1.679 + 1.680 instruct storeSSI(stackSlotI dst, rRegI src) 1.681 %{ 1.682 match(Set dst src); 1.683 @@ -7077,6 +7399,56 @@ 1.684 ins_pipe( ialu_mem_reg ); 1.685 %} 1.686 1.687 + 1.688 +//---------- Population Count Instructions ------------------------------------- 1.689 + 1.690 +instruct popCountI(rRegI dst, rRegI src) %{ 1.691 + predicate(UsePopCountInstruction); 1.692 + match(Set dst (PopCountI src)); 1.693 + 1.694 + format %{ "popcnt $dst, $src" %} 1.695 + ins_encode %{ 1.696 + __ popcntl($dst$$Register, $src$$Register); 1.697 + %} 1.698 + ins_pipe(ialu_reg); 1.699 +%} 1.700 + 1.701 +instruct popCountI_mem(rRegI dst, memory mem) %{ 1.702 + predicate(UsePopCountInstruction); 1.703 + match(Set dst (PopCountI (LoadI mem))); 1.704 + 1.705 + format %{ "popcnt $dst, $mem" %} 1.706 + ins_encode %{ 1.707 + __ popcntl($dst$$Register, $mem$$Address); 1.708 + %} 1.709 + ins_pipe(ialu_reg); 1.710 +%} 1.711 + 1.712 +// Note: Long.bitCount(long) returns an int. 1.713 +instruct popCountL(rRegI dst, rRegL src) %{ 1.714 + predicate(UsePopCountInstruction); 1.715 + match(Set dst (PopCountL src)); 1.716 + 1.717 + format %{ "popcnt $dst, $src" %} 1.718 + ins_encode %{ 1.719 + __ popcntq($dst$$Register, $src$$Register); 1.720 + %} 1.721 + ins_pipe(ialu_reg); 1.722 +%} 1.723 + 1.724 +// Note: Long.bitCount(long) returns an int. 1.725 +instruct popCountL_mem(rRegI dst, memory mem) %{ 1.726 + predicate(UsePopCountInstruction); 1.727 + match(Set dst (PopCountL (LoadL mem))); 1.728 + 1.729 + format %{ "popcnt $dst, $mem" %} 1.730 + ins_encode %{ 1.731 + __ popcntq($dst$$Register, $mem$$Address); 1.732 + %} 1.733 + ins_pipe(ialu_reg); 1.734 +%} 1.735 + 1.736 + 1.737 //----------MemBar Instructions----------------------------------------------- 1.738 // Memory barrier flavors 1.739 1.740 @@ -7192,9 +7564,7 @@ 1.741 effect(KILL cr); 1.742 format %{ "encode_heap_oop_not_null $dst,$src" %} 1.743 ins_encode %{ 1.744 - Register s = $src$$Register; 1.745 - Register d = $dst$$Register; 1.746 - __ encode_heap_oop_not_null(d, s); 1.747 + __ encode_heap_oop_not_null($dst$$Register, $src$$Register); 1.748 %} 1.749 ins_pipe(ialu_reg_long); 1.750 %} 1.751 @@ -7224,7 +7594,11 @@ 1.752 ins_encode %{ 1.753 Register s = $src$$Register; 1.754 Register d = $dst$$Register; 1.755 - __ decode_heap_oop_not_null(d, s); 1.756 + if (s != d) { 1.757 + __ decode_heap_oop_not_null(d, s); 1.758 + } else { 1.759 + __ decode_heap_oop_not_null(d); 1.760 + } 1.761 %} 1.762 ins_pipe(ialu_reg_long); 1.763 %} 1.764 @@ -11389,8 +11763,9 @@ 1.765 1.766 // This will generate a signed flags result. This should be OK since 1.767 // any compare to a zero should be eq/neq. 1.768 -instruct testP_reg_mem(rFlagsReg cr, memory op, immP0 zero) 1.769 -%{ 1.770 +instruct testP_mem(rFlagsReg cr, memory op, immP0 zero) 1.771 +%{ 1.772 + predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL)); 1.773 match(Set cr (CmpP (LoadP op) zero)); 1.774 1.775 ins_cost(500); // XXX 1.776 @@ -11401,13 +11776,24 @@ 1.777 ins_pipe(ialu_cr_reg_imm); 1.778 %} 1.779 1.780 +instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero) 1.781 +%{ 1.782 + predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL)); 1.783 + match(Set cr (CmpP (LoadP mem) zero)); 1.784 + 1.785 + format %{ "cmpq R12, $mem\t# ptr (R12_heapbase==0)" %} 1.786 + ins_encode %{ 1.787 + __ cmpq(r12, $mem$$Address); 1.788 + %} 1.789 + ins_pipe(ialu_cr_reg_mem); 1.790 +%} 1.791 1.792 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2) 1.793 %{ 1.794 match(Set cr (CmpN op1 op2)); 1.795 1.796 format %{ "cmpl $op1, $op2\t# compressed ptr" %} 1.797 - ins_encode %{ __ cmpl(as_Register($op1$$reg), as_Register($op2$$reg)); %} 1.798 + ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %} 1.799 ins_pipe(ialu_cr_reg_reg); 1.800 %} 1.801 1.802 @@ -11415,11 +11801,30 @@ 1.803 %{ 1.804 match(Set cr (CmpN src (LoadN mem))); 1.805 1.806 - ins_cost(500); // XXX 1.807 - format %{ "cmpl $src, mem\t# compressed ptr" %} 1.808 + format %{ "cmpl $src, $mem\t# compressed ptr" %} 1.809 ins_encode %{ 1.810 - Address adr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp); 1.811 - __ cmpl(as_Register($src$$reg), adr); 1.812 + __ cmpl($src$$Register, $mem$$Address); 1.813 + %} 1.814 + ins_pipe(ialu_cr_reg_mem); 1.815 +%} 1.816 + 1.817 +instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{ 1.818 + match(Set cr (CmpN op1 op2)); 1.819 + 1.820 + format %{ "cmpl $op1, $op2\t# compressed ptr" %} 1.821 + ins_encode %{ 1.822 + __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant); 1.823 + %} 1.824 + ins_pipe(ialu_cr_reg_imm); 1.825 +%} 1.826 + 1.827 +instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src) 1.828 +%{ 1.829 + match(Set cr (CmpN src (LoadN mem))); 1.830 + 1.831 + format %{ "cmpl $mem, $src\t# compressed ptr" %} 1.832 + ins_encode %{ 1.833 + __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant); 1.834 %} 1.835 ins_pipe(ialu_cr_reg_mem); 1.836 %} 1.837 @@ -11432,15 +11837,27 @@ 1.838 ins_pipe(ialu_cr_reg_imm); 1.839 %} 1.840 1.841 -instruct testN_reg_mem(rFlagsReg cr, memory mem, immN0 zero) 1.842 -%{ 1.843 +instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero) 1.844 +%{ 1.845 + predicate(Universe::narrow_oop_base() != NULL); 1.846 match(Set cr (CmpN (LoadN mem) zero)); 1.847 1.848 ins_cost(500); // XXX 1.849 format %{ "testl $mem, 0xffffffff\t# compressed ptr" %} 1.850 ins_encode %{ 1.851 - Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp); 1.852 - __ cmpl(addr, (int)0xFFFFFFFF); 1.853 + __ cmpl($mem$$Address, (int)0xFFFFFFFF); 1.854 + %} 1.855 + ins_pipe(ialu_cr_reg_mem); 1.856 +%} 1.857 + 1.858 +instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero) 1.859 +%{ 1.860 + predicate(Universe::narrow_oop_base() == NULL); 1.861 + match(Set cr (CmpN (LoadN mem) zero)); 1.862 + 1.863 + format %{ "cmpl R12, $mem\t# compressed ptr (R12_heapbase==0)" %} 1.864 + ins_encode %{ 1.865 + __ cmpl(r12, $mem$$Address); 1.866 %} 1.867 ins_pipe(ialu_cr_reg_mem); 1.868 %} 1.869 @@ -11472,7 +11889,6 @@ 1.870 %{ 1.871 match(Set cr (CmpL op1 (LoadL op2))); 1.872 1.873 - ins_cost(500); // XXX 1.874 format %{ "cmpq $op1, $op2" %} 1.875 opcode(0x3B); /* Opcode 3B /r */ 1.876 ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2)); 1.877 @@ -11733,15 +12149,12 @@ 1.878 effect(KILL rcx, KILL cr); 1.879 1.880 ins_cost(1100); // slightly larger than the next version 1.881 - format %{ "cmpq rax, rsi\n\t" 1.882 - "jeq,s hit\n\t" 1.883 - "movq rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t" 1.884 + format %{ "movq rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t" 1.885 "movl rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t" 1.886 "addq rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t" 1.887 "repne scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t" 1.888 "jne,s miss\t\t# Missed: rdi not-zero\n\t" 1.889 "movq [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t" 1.890 - "hit:\n\t" 1.891 "xorq $result, $result\t\t Hit: rdi zero\n\t" 1.892 "miss:\t" %} 1.893 1.894 @@ -11756,13 +12169,10 @@ 1.895 rdi_RegP result) 1.896 %{ 1.897 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 1.898 - predicate(!UseCompressedOops); // decoding oop kills condition codes 1.899 effect(KILL rcx, KILL result); 1.900 1.901 ins_cost(1000); 1.902 - format %{ "cmpq rax, rsi\n\t" 1.903 - "jeq,s miss\t# Actually a hit; we are done.\n\t" 1.904 - "movq rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t" 1.905 + format %{ "movq rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t" 1.906 "movl rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t" 1.907 "addq rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t" 1.908 "repne scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"