1.1 --- a/src/cpu/x86/vm/x86_64.ad Fri Apr 11 09:56:35 2008 -0400 1.2 +++ b/src/cpu/x86/vm/x86_64.ad Sun Apr 13 17:43:42 2008 -0400 1.3 @@ -312,7 +312,6 @@ 1.4 R9, R9_H, 1.5 R10, R10_H, 1.6 R11, R11_H, 1.7 - R12, R12_H, 1.8 R13, R13_H, 1.9 R14, R14_H); 1.10 1.11 @@ -392,7 +391,6 @@ 1.12 R9, R9_H, 1.13 R10, R10_H, 1.14 R11, R11_H, 1.15 - R12, R12_H, 1.16 R13, R13_H, 1.17 R14, R14_H); 1.18 1.19 @@ -406,7 +404,6 @@ 1.20 R9, R9_H, 1.21 R10, R10_H, 1.22 R11, R11_H, 1.23 - R12, R12_H, 1.24 R13, R13_H, 1.25 R14, R14_H); 1.26 1.27 @@ -421,7 +418,6 @@ 1.28 R9, R9_H, 1.29 R10, R10_H, 1.30 R11, R11_H, 1.31 - R12, R12_H, 1.32 R13, R13_H, 1.33 R14, R14_H); 1.34 1.35 @@ -436,7 +432,6 @@ 1.36 R9, R9_H, 1.37 R10, R10_H, 1.38 R11, R11_H, 1.39 - R12, R12_H, 1.40 R13, R13_H, 1.41 R14, R14_H); 1.42 1.43 @@ -449,6 +444,9 @@ 1.44 // Singleton class for RDX long register 1.45 reg_class long_rdx_reg(RDX, RDX_H); 1.46 1.47 +// Singleton class for R12 long register 1.48 +reg_class long_r12_reg(R12, R12_H); 1.49 + 1.50 // Class for all int registers (except RSP) 1.51 reg_class int_reg(RAX, 1.52 RDX, 1.53 @@ -461,7 +459,6 @@ 1.54 R9, 1.55 R10, 1.56 R11, 1.57 - R12, 1.58 R13, 1.59 R14); 1.60 1.61 @@ -476,7 +473,6 @@ 1.62 R9, 1.63 R10, 1.64 R11, 1.65 - R12, 1.66 R13, 1.67 R14); 1.68 1.69 @@ -490,7 +486,6 @@ 1.70 R9, 1.71 R10, 1.72 R11, 1.73 - R12, 1.74 R13, 1.75 R14); 1.76 1.77 @@ -1844,8 +1839,14 @@ 1.78 #ifndef PRODUCT 1.79 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const 1.80 { 1.81 - st->print_cr("cmpq rax, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t" 1.82 - "# Inline cache check", oopDesc::klass_offset_in_bytes()); 1.83 + if (UseCompressedOops) { 1.84 + st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t", oopDesc::klass_offset_in_bytes()); 1.85 + st->print_cr("leaq rscratch1, [r12_heapbase, r, Address::times_8, 0]"); 1.86 + st->print_cr("cmpq rax, rscratch1\t # Inline cache check"); 1.87 + } else { 1.88 + st->print_cr("cmpq rax, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t" 1.89 + "# Inline cache check", oopDesc::klass_offset_in_bytes()); 1.90 + } 1.91 st->print_cr("\tjne SharedRuntime::_ic_miss_stub"); 1.92 st->print_cr("\tnop"); 1.93 if (!OptoBreakpoint) { 1.94 @@ -1860,7 +1861,12 @@ 1.95 #ifdef ASSERT 1.96 uint code_size = cbuf.code_size(); 1.97 #endif 1.98 - masm.cmpq(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes())); 1.99 + if (UseCompressedOops) { 1.100 + masm.load_klass(rscratch1, j_rarg0); 1.101 + masm.cmpq(rax, rscratch1); 1.102 + } else { 1.103 + masm.cmpq(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes())); 1.104 + } 1.105 1.106 masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1.107 1.108 @@ -1871,6 +1877,10 @@ 1.109 // Leave space for int3 1.110 nops_cnt += 1; 1.111 } 1.112 + if (UseCompressedOops) { 1.113 + // ??? divisible by 4 is aligned? 1.114 + nops_cnt += 1; 1.115 + } 1.116 masm.nop(nops_cnt); 1.117 1.118 assert(cbuf.code_size() - code_size == size(ra_), 1.119 @@ -1879,7 +1889,11 @@ 1.120 1.121 uint MachUEPNode::size(PhaseRegAlloc* ra_) const 1.122 { 1.123 - return OptoBreakpoint ? 11 : 12; 1.124 + if (UseCompressedOops) { 1.125 + return OptoBreakpoint ? 19 : 20; 1.126 + } else { 1.127 + return OptoBreakpoint ? 11 : 12; 1.128 + } 1.129 } 1.130 1.131 1.132 @@ -2052,6 +2066,7 @@ 1.133 reg == RCX_num || reg == RCX_H_num || 1.134 reg == R8_num || reg == R8_H_num || 1.135 reg == R9_num || reg == R9_H_num || 1.136 + reg == R12_num || reg == R12_H_num || 1.137 reg == XMM0_num || reg == XMM0_H_num || 1.138 reg == XMM1_num || reg == XMM1_H_num || 1.139 reg == XMM2_num || reg == XMM2_H_num || 1.140 @@ -2087,6 +2102,17 @@ 1.141 return LONG_RDX_REG_mask; 1.142 } 1.143 1.144 +static Address build_address(int b, int i, int s, int d) { 1.145 + Register index = as_Register(i); 1.146 + Address::ScaleFactor scale = (Address::ScaleFactor)s; 1.147 + if (index == rsp) { 1.148 + index = noreg; 1.149 + scale = Address::no_scale; 1.150 + } 1.151 + Address addr(as_Register(b), index, scale, d); 1.152 + return addr; 1.153 +} 1.154 + 1.155 %} 1.156 1.157 //----------ENCODING BLOCK----------------------------------------------------- 1.158 @@ -2545,7 +2571,7 @@ 1.159 Register Rrax = as_Register(RAX_enc); // super class 1.160 Register Rrcx = as_Register(RCX_enc); // killed 1.161 Register Rrsi = as_Register(RSI_enc); // sub class 1.162 - Label hit, miss; 1.163 + Label hit, miss, cmiss; 1.164 1.165 MacroAssembler _masm(&cbuf); 1.166 // Compare super with sub directly, since super is not in its own SSA. 1.167 @@ -2562,12 +2588,27 @@ 1.168 Klass::secondary_supers_offset_in_bytes())); 1.169 __ movl(Rrcx, Address(Rrdi, arrayOopDesc::length_offset_in_bytes())); 1.170 __ addq(Rrdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); 1.171 - __ repne_scan(); 1.172 - __ jcc(Assembler::notEqual, miss); 1.173 - __ movq(Address(Rrsi, 1.174 - sizeof(oopDesc) + 1.175 - Klass::secondary_super_cache_offset_in_bytes()), 1.176 - Rrax); 1.177 + if (UseCompressedOops) { 1.178 + __ encode_heap_oop(Rrax); 1.179 + __ repne_scanl(); 1.180 + __ jcc(Assembler::notEqual, cmiss); 1.181 + __ decode_heap_oop(Rrax); 1.182 + __ movq(Address(Rrsi, 1.183 + sizeof(oopDesc) + 1.184 + Klass::secondary_super_cache_offset_in_bytes()), 1.185 + Rrax); 1.186 + __ jmp(hit); 1.187 + __ bind(cmiss); 1.188 + __ decode_heap_oop(Rrax); 1.189 + __ jmp(miss); 1.190 + } else { 1.191 + __ repne_scanq(); 1.192 + __ jcc(Assembler::notEqual, miss); 1.193 + __ movq(Address(Rrsi, 1.194 + sizeof(oopDesc) + 1.195 + Klass::secondary_super_cache_offset_in_bytes()), 1.196 + Rrax); 1.197 + } 1.198 __ bind(hit); 1.199 if ($primary) { 1.200 __ xorq(Rrdi, Rrdi); 1.201 @@ -3693,10 +3734,10 @@ 1.202 int count_offset = java_lang_String::count_offset_in_bytes(); 1.203 int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); 1.204 1.205 - masm.movq(rax, Address(rsi, value_offset)); 1.206 + masm.load_heap_oop(rax, Address(rsi, value_offset)); 1.207 masm.movl(rcx, Address(rsi, offset_offset)); 1.208 masm.leaq(rax, Address(rax, rcx, Address::times_2, base_offset)); 1.209 - masm.movq(rbx, Address(rdi, value_offset)); 1.210 + masm.load_heap_oop(rbx, Address(rdi, value_offset)); 1.211 masm.movl(rcx, Address(rdi, offset_offset)); 1.212 masm.leaq(rbx, Address(rbx, rcx, Address::times_2, base_offset)); 1.213 1.214 @@ -4120,6 +4161,7 @@ 1.215 %} 1.216 1.217 1.218 + 1.219 //----------FRAME-------------------------------------------------------------- 1.220 // Definition of frame structure and management information. 1.221 // 1.222 @@ -4255,6 +4297,7 @@ 1.223 static const int lo[Op_RegL + 1] = { 1.224 0, 1.225 0, 1.226 + RAX_num, // Op_RegN 1.227 RAX_num, // Op_RegI 1.228 RAX_num, // Op_RegP 1.229 XMM0_num, // Op_RegF 1.230 @@ -4264,13 +4307,14 @@ 1.231 static const int hi[Op_RegL + 1] = { 1.232 0, 1.233 0, 1.234 + OptoReg::Bad, // Op_RegN 1.235 OptoReg::Bad, // Op_RegI 1.236 RAX_H_num, // Op_RegP 1.237 OptoReg::Bad, // Op_RegF 1.238 XMM0_H_num, // Op_RegD 1.239 RAX_H_num // Op_RegL 1.240 }; 1.241 - 1.242 + assert(ARRAY_SIZE(hi) == _last_machine_leaf - 1, "missing type"); 1.243 return OptoRegPair(hi[ideal_reg], lo[ideal_reg]); 1.244 %} 1.245 %} 1.246 @@ -4417,9 +4461,25 @@ 1.247 interface(CONST_INTER); 1.248 %} 1.249 1.250 -// Unsigned 31-bit Pointer Immediate 1.251 -// Can be used in both 32-bit signed and 32-bit unsigned insns. 1.252 -// Works for nulls and markOops; not for relocatable (oop) pointers. 1.253 +// Pointer Immediate 1.254 +operand immN() %{ 1.255 + match(ConN); 1.256 + 1.257 + op_cost(10); 1.258 + format %{ %} 1.259 + interface(CONST_INTER); 1.260 +%} 1.261 + 1.262 +// NULL Pointer Immediate 1.263 +operand immN0() %{ 1.264 + predicate(n->get_narrowcon() == 0); 1.265 + match(ConN); 1.266 + 1.267 + op_cost(5); 1.268 + format %{ %} 1.269 + interface(CONST_INTER); 1.270 +%} 1.271 + 1.272 operand immP31() 1.273 %{ 1.274 predicate(!n->as_Type()->type()->isa_oopptr() 1.275 @@ -4431,6 +4491,7 @@ 1.276 interface(CONST_INTER); 1.277 %} 1.278 1.279 + 1.280 // Long Immediate 1.281 operand immL() 1.282 %{ 1.283 @@ -4767,6 +4828,23 @@ 1.284 interface(REG_INTER); 1.285 %} 1.286 1.287 + 1.288 +operand r12RegL() %{ 1.289 + constraint(ALLOC_IN_RC(long_r12_reg)); 1.290 + match(RegL); 1.291 + 1.292 + format %{ %} 1.293 + interface(REG_INTER); 1.294 +%} 1.295 + 1.296 +operand rRegN() %{ 1.297 + constraint(ALLOC_IN_RC(int_reg)); 1.298 + match(RegN); 1.299 + 1.300 + format %{ %} 1.301 + interface(REG_INTER); 1.302 +%} 1.303 + 1.304 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP? 1.305 // Answer: Operand match rules govern the DFA as it processes instruction inputs. 1.306 // It's fine for an instruction input which expects rRegP to match a r15_RegP. 1.307 @@ -4822,6 +4900,18 @@ 1.308 interface(REG_INTER); 1.309 %} 1.310 1.311 +// Special Registers 1.312 +// Return a compressed pointer value 1.313 +operand rax_RegN() 1.314 +%{ 1.315 + constraint(ALLOC_IN_RC(int_rax_reg)); 1.316 + match(RegN); 1.317 + match(rRegN); 1.318 + 1.319 + format %{ %} 1.320 + interface(REG_INTER); 1.321 +%} 1.322 + 1.323 // Used in AtomicAdd 1.324 operand rbx_RegP() 1.325 %{ 1.326 @@ -5112,6 +5202,21 @@ 1.327 %} 1.328 %} 1.329 1.330 +// Indirect Memory Times Scale Plus Index Register Plus Offset Operand 1.331 +operand indIndexScaleOffsetComp(rRegN src, immL32 off, r12RegL base) %{ 1.332 + constraint(ALLOC_IN_RC(ptr_reg)); 1.333 + match(AddP (DecodeN src base) off); 1.334 + 1.335 + op_cost(10); 1.336 + format %{"[$base + $src << 3 + $off] (compressed)" %} 1.337 + interface(MEMORY_INTER) %{ 1.338 + base($base); 1.339 + index($src); 1.340 + scale(0x3); 1.341 + disp($off); 1.342 + %} 1.343 +%} 1.344 + 1.345 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand 1.346 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale) 1.347 %{ 1.348 @@ -5259,7 +5364,8 @@ 1.349 // case of this is memory operands. 1.350 1.351 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex, 1.352 - indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset); 1.353 + indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset, 1.354 + indIndexScaleOffsetComp); 1.355 1.356 //----------PIPELINE----------------------------------------------------------- 1.357 // Rules which define the behavior of the target architectures pipeline. 1.358 @@ -5937,10 +6043,28 @@ 1.359 ins_pipe(ialu_reg_mem); // XXX 1.360 %} 1.361 1.362 +// Load Compressed Pointer 1.363 +instruct loadN(rRegN dst, memory mem, rFlagsReg cr) 1.364 +%{ 1.365 + match(Set dst (LoadN mem)); 1.366 + effect(KILL cr); 1.367 + 1.368 + ins_cost(125); // XXX 1.369 + format %{ "movl $dst, $mem\t# compressed ptr" %} 1.370 + ins_encode %{ 1.371 + Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp); 1.372 + Register dst = as_Register($dst$$reg); 1.373 + __ movl(dst, addr); 1.374 + %} 1.375 + ins_pipe(ialu_reg_mem); // XXX 1.376 +%} 1.377 + 1.378 + 1.379 // Load Klass Pointer 1.380 instruct loadKlass(rRegP dst, memory mem) 1.381 %{ 1.382 match(Set dst (LoadKlass mem)); 1.383 + predicate(!n->in(MemNode::Address)->bottom_type()->is_narrow()); 1.384 1.385 ins_cost(125); // XXX 1.386 format %{ "movq $dst, $mem\t# class" %} 1.387 @@ -5949,6 +6073,25 @@ 1.388 ins_pipe(ialu_reg_mem); // XXX 1.389 %} 1.390 1.391 +// Load Klass Pointer 1.392 +instruct loadKlassComp(rRegP dst, memory mem) 1.393 +%{ 1.394 + match(Set dst (LoadKlass mem)); 1.395 + predicate(n->in(MemNode::Address)->bottom_type()->is_narrow()); 1.396 + 1.397 + ins_cost(125); // XXX 1.398 + format %{ "movl $dst, $mem\t# compressed class" %} 1.399 + ins_encode %{ 1.400 + Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp); 1.401 + Register dst = as_Register($dst$$reg); 1.402 + __ movl(dst, addr); 1.403 + // klass is never null in the header but this is generated for all 1.404 + // klass loads not just the _klass field in the header. 1.405 + __ decode_heap_oop(dst); 1.406 + %} 1.407 + ins_pipe(ialu_reg_mem); // XXX 1.408 +%} 1.409 + 1.410 // Load Float 1.411 instruct loadF(regF dst, memory mem) 1.412 %{ 1.413 @@ -6203,6 +6346,35 @@ 1.414 ins_pipe(pipe_slow); 1.415 %} 1.416 1.417 +instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{ 1.418 + match(Set dst src); 1.419 + effect(KILL cr); 1.420 + format %{ "xorq $dst, $src\t# compressed ptr" %} 1.421 + ins_encode %{ 1.422 + Register dst = $dst$$Register; 1.423 + __ xorq(dst, dst); 1.424 + %} 1.425 + ins_pipe(ialu_reg); 1.426 +%} 1.427 + 1.428 +instruct loadConN(rRegN dst, immN src) %{ 1.429 + match(Set dst src); 1.430 + 1.431 + ins_cost(125); 1.432 + format %{ "movl $dst, $src\t# compressed ptr" %} 1.433 + ins_encode %{ 1.434 + address con = (address)$src$$constant; 1.435 + Register dst = $dst$$Register; 1.436 + if (con == NULL) { 1.437 + ShouldNotReachHere(); 1.438 + } else { 1.439 + __ movoop(dst, (jobject)$src$$constant); 1.440 + __ encode_heap_oop_not_null(dst); 1.441 + } 1.442 + %} 1.443 + ins_pipe(ialu_reg_fat); // XXX 1.444 +%} 1.445 + 1.446 instruct loadConF0(regF dst, immF0 src) 1.447 %{ 1.448 match(Set dst src); 1.449 @@ -6458,6 +6630,22 @@ 1.450 ins_pipe(ialu_mem_imm); 1.451 %} 1.452 1.453 +// Store Compressed Pointer 1.454 +instruct storeN(memory mem, rRegN src, rFlagsReg cr) 1.455 +%{ 1.456 + match(Set mem (StoreN mem src)); 1.457 + effect(KILL cr); 1.458 + 1.459 + ins_cost(125); // XXX 1.460 + format %{ "movl $mem, $src\t# ptr" %} 1.461 + ins_encode %{ 1.462 + Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp); 1.463 + Register src = as_Register($src$$reg); 1.464 + __ movl(addr, src); 1.465 + %} 1.466 + ins_pipe(ialu_mem_reg); 1.467 +%} 1.468 + 1.469 // Store Integer Immediate 1.470 instruct storeImmI(memory mem, immI src) 1.471 %{ 1.472 @@ -6805,6 +6993,39 @@ 1.473 ins_pipe(ialu_reg_reg); // XXX 1.474 %} 1.475 1.476 + 1.477 +// Convert oop pointer into compressed form 1.478 +instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{ 1.479 + match(Set dst (EncodeP src)); 1.480 + effect(KILL cr); 1.481 + format %{ "encode_heap_oop $dst,$src" %} 1.482 + ins_encode %{ 1.483 + Register s = $src$$Register; 1.484 + Register d = $dst$$Register; 1.485 + if (s != d) { 1.486 + __ movq(d, s); 1.487 + } 1.488 + __ encode_heap_oop(d); 1.489 + %} 1.490 + ins_pipe(ialu_reg_long); 1.491 +%} 1.492 + 1.493 +instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{ 1.494 + match(Set dst (DecodeN src)); 1.495 + effect(KILL cr); 1.496 + format %{ "decode_heap_oop $dst,$src" %} 1.497 + ins_encode %{ 1.498 + Register s = $src$$Register; 1.499 + Register d = $dst$$Register; 1.500 + if (s != d) { 1.501 + __ movq(d, s); 1.502 + } 1.503 + __ decode_heap_oop(d); 1.504 + %} 1.505 + ins_pipe(ialu_reg_long); 1.506 +%} 1.507 + 1.508 + 1.509 //----------Conditional Move--------------------------------------------------- 1.510 // Jump 1.511 // dummy instruction for generating temp registers 1.512 @@ -7521,6 +7742,28 @@ 1.513 %} 1.514 1.515 1.516 +instruct compareAndSwapN(rRegI res, 1.517 + memory mem_ptr, 1.518 + rax_RegN oldval, rRegN newval, 1.519 + rFlagsReg cr) %{ 1.520 + match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval))); 1.521 + effect(KILL cr, KILL oldval); 1.522 + 1.523 + format %{ "cmpxchgl $mem_ptr,$newval\t# " 1.524 + "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" 1.525 + "sete $res\n\t" 1.526 + "movzbl $res, $res" %} 1.527 + opcode(0x0F, 0xB1); 1.528 + ins_encode(lock_prefix, 1.529 + REX_reg_mem(newval, mem_ptr), 1.530 + OpcP, OpcS, 1.531 + reg_mem(newval, mem_ptr), 1.532 + REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete 1.533 + REX_reg_breg(res, res), // movzbl 1.534 + Opcode(0xF), Opcode(0xB6), reg_reg(res, res)); 1.535 + ins_pipe( pipe_cmpxchg ); 1.536 +%} 1.537 + 1.538 //----------Subtraction Instructions------------------------------------------- 1.539 1.540 // Integer Subtraction Instructions 1.541 @@ -10771,6 +11014,14 @@ 1.542 ins_pipe(ialu_cr_reg_imm); 1.543 %} 1.544 1.545 +instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{ 1.546 + match(Set cr (CmpN src zero)); 1.547 + 1.548 + format %{ "testl $src, $src" %} 1.549 + ins_encode %{ __ testl($src$$Register, $src$$Register); %} 1.550 + ins_pipe(ialu_cr_reg_imm); 1.551 +%} 1.552 + 1.553 // Yanked all unsigned pointer compare operations. 1.554 // Pointer compares are done with CmpP which is already unsigned. 1.555 1.556 @@ -11018,6 +11269,7 @@ 1.557 rdi_RegP result) 1.558 %{ 1.559 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 1.560 + predicate(!UseCompressedOops); // decoding oop kills condition codes 1.561 effect(KILL rcx, KILL result); 1.562 1.563 ins_cost(1000);