1.1 --- a/src/cpu/ppc/vm/macroAssembler_ppc.cpp Tue Dec 10 14:29:43 2013 +0100 1.2 +++ b/src/cpu/ppc/vm/macroAssembler_ppc.cpp Wed Dec 11 00:06:11 2013 +0100 1.3 @@ -97,8 +97,10 @@ 1.4 } 1.5 } 1.6 1.7 -void MacroAssembler::align(int modulus) { 1.8 - while (offset() % modulus != 0) nop(); 1.9 +void MacroAssembler::align(int modulus, int max, int rem) { 1.10 + int padding = (rem + modulus - (offset() % modulus)) % modulus; 1.11 + if (padding > max) return; 1.12 + for (int c = (padding >> 2); c > 0; --c) { nop(); } 1.13 } 1.14 1.15 // Issue instructions that calculate given TOC from global TOC. 1.16 @@ -186,16 +188,25 @@ 1.17 1.18 #ifdef _LP64 1.19 // Patch compressed oops or klass constants. 1.20 +// Assembler sequence is 1.21 +// 1) compressed oops: 1.22 +// lis rx = const.hi 1.23 +// ori rx = rx | const.lo 1.24 +// 2) compressed klass: 1.25 +// lis rx = const.hi 1.26 +// clrldi rx = rx & 0xFFFFffff // clearMS32b, optional 1.27 +// ori rx = rx | const.lo 1.28 +// Clrldi will be passed by. 1.29 int MacroAssembler::patch_set_narrow_oop(address a, address bound, narrowOop data) { 1.30 assert(UseCompressedOops, "Should only patch compressed oops"); 1.31 1.32 const address inst2_addr = a; 1.33 const int inst2 = *(int *)inst2_addr; 1.34 1.35 - // The relocation points to the second instruction, the addi, 1.36 - // and the addi reads and writes the same register dst. 1.37 - const int dst = inv_rt_field(inst2); 1.38 - assert(is_addi(inst2) && inv_ra_field(inst2) == dst, "must be addi reading and writing dst"); 1.39 + // The relocation points to the second instruction, the ori, 1.40 + // and the ori reads and writes the same register dst. 1.41 + const int dst = inv_rta_field(inst2); 1.42 + assert(is_ori(inst2) && inv_rs_field(inst2) == dst, "must be addi reading and writing dst"); 1.43 // Now, find the preceding addis which writes to dst. 1.44 int inst1 = 0; 1.45 address inst1_addr = inst2_addr - BytesPerInstWord; 1.46 @@ -210,8 +221,9 @@ 1.47 int xc = (data >> 16) & 0xffff; 1.48 int xd = (data >> 0) & 0xffff; 1.49 1.50 - set_imm((int *)inst1_addr,((short)(xc + ((xd & 0x8000) != 0 ? 1 : 0)))); // see enc_load_con_narrow1/2 1.51 + set_imm((int *)inst1_addr, (short)(xc)); // see enc_load_con_narrow_hi/_lo 1.52 set_imm((int *)inst2_addr, (short)(xd)); 1.53 + 1.54 return (int)((intptr_t)inst2_addr - (intptr_t)inst1_addr); 1.55 } 1.56 1.57 @@ -222,10 +234,10 @@ 1.58 const address inst2_addr = a; 1.59 const int inst2 = *(int *)inst2_addr; 1.60 1.61 - // The relocation points to the second instruction, the addi, 1.62 - // and the addi reads and writes the same register dst. 1.63 - const int dst = inv_rt_field(inst2); 1.64 - assert(is_addi(inst2) && inv_ra_field(inst2) == dst, "must be addi reading and writing dst"); 1.65 + // The relocation points to the second instruction, the ori, 1.66 + // and the ori reads and writes the same register dst. 1.67 + const int dst = inv_rta_field(inst2); 1.68 + assert(is_ori(inst2) && inv_rs_field(inst2) == dst, "must be addi reading and writing dst"); 1.69 // Now, find the preceding lis which writes to dst. 1.70 int inst1 = 0; 1.71 address inst1_addr = inst2_addr - BytesPerInstWord; 1.72 @@ -238,8 +250,9 @@ 1.73 } 1.74 assert(inst1_found, "inst is not lis"); 1.75 1.76 - uint xl = ((unsigned int) (get_imm(inst2_addr,0) & 0xffff)); 1.77 - uint xh = (((((xl & 0x8000) != 0 ? -1 : 0) + get_imm(inst1_addr,0)) & 0xffff) << 16); 1.78 + uint xl = ((unsigned int) (get_imm(inst2_addr, 0) & 0xffff)); 1.79 + uint xh = (((get_imm(inst1_addr, 0)) & 0xffff) << 16); 1.80 + 1.81 return (int) (xl | xh); 1.82 } 1.83 #endif // _LP64 1.84 @@ -252,13 +265,10 @@ 1.85 // FIXME: We should insert relocation information for oops at the constant 1.86 // pool entries instead of inserting it at the loads; patching of a constant 1.87 // pool entry should be less expensive. 1.88 - Unimplemented(); 1.89 - if (false) { 1.90 - address oop_address = address_constant((address)a.value(), RelocationHolder::none); 1.91 - // Relocate at the pc of the load. 1.92 - relocate(a.rspec()); 1.93 - toc_offset = (int)(oop_address - code()->consts()->start()); 1.94 - } 1.95 + address oop_address = address_constant((address)a.value(), RelocationHolder::none); 1.96 + // Relocate at the pc of the load. 1.97 + relocate(a.rspec()); 1.98 + toc_offset = (int)(oop_address - code()->consts()->start()); 1.99 ld_largeoffset_unchecked(dst, toc_offset, toc, true); 1.100 } 1.101 1.102 @@ -532,7 +542,7 @@ 1.103 masm.b(dest); 1.104 } 1.105 } 1.106 - ICache::invalidate_range(instruction_addr, code_size); 1.107 + ICache::ppc64_flush_icache_bytes(instruction_addr, code_size); 1.108 } 1.109 1.110 // Emit a NOT mt-safe patchable 64 bit absolute call/jump. 1.111 @@ -673,7 +683,7 @@ 1.112 CodeBuffer buf(instruction_addr, code_size); 1.113 MacroAssembler masm(&buf); 1.114 masm.bxx64_patchable(dest, relocInfo::none, link); 1.115 - ICache::invalidate_range(instruction_addr, code_size); 1.116 + ICache::ppc64_flush_icache_bytes(instruction_addr, code_size); 1.117 } 1.118 1.119 // Get dest address of a bxx64_patchable instruction. 1.120 @@ -964,6 +974,14 @@ 1.121 /*load env=*/true); 1.122 } 1.123 1.124 +address MacroAssembler::call_c_and_return_to_caller(Register fd) { 1.125 + return branch_to(fd, /*and_link=*/false, 1.126 + /*save toc=*/false, 1.127 + /*restore toc=*/false, 1.128 + /*load toc=*/true, 1.129 + /*load env=*/true); 1.130 +} 1.131 + 1.132 address MacroAssembler::call_c(const FunctionDescriptor* fd, relocInfo::relocType rt) { 1.133 if (rt != relocInfo::none) { 1.134 // this call needs to be relocatable 1.135 @@ -2315,7 +2333,7 @@ 1.136 if (last_Java_pc != noreg) 1.137 std(last_Java_pc, in_bytes(JavaThread::last_Java_pc_offset()), R16_thread); 1.138 1.139 - // set last_Java_sp last 1.140 + // Set last_Java_sp last. 1.141 std(last_Java_sp, in_bytes(JavaThread::last_Java_sp_offset()), R16_thread); 1.142 } 1.143 1.144 @@ -2454,6 +2472,57 @@ 1.145 } 1.146 } 1.147 1.148 +// Clear Array 1.149 +// Kills both input registers. tmp == R0 is allowed. 1.150 +void MacroAssembler::clear_memory_doubleword(Register base_ptr, Register cnt_dwords, Register tmp) { 1.151 + // Procedure for large arrays (uses data cache block zero instruction). 1.152 + Label startloop, fast, fastloop, small_rest, restloop, done; 1.153 + const int cl_size = VM_Version::get_cache_line_size(), 1.154 + cl_dwords = cl_size>>3, 1.155 + cl_dw_addr_bits = exact_log2(cl_dwords), 1.156 + dcbz_min = 1; // Min count of dcbz executions, needs to be >0. 1.157 + 1.158 +//2: 1.159 + cmpdi(CCR1, cnt_dwords, ((dcbz_min+1)<<cl_dw_addr_bits)-1); // Big enough? (ensure >=dcbz_min lines included). 1.160 + blt(CCR1, small_rest); // Too small. 1.161 + rldicl_(tmp, base_ptr, 64-3, 64-cl_dw_addr_bits); // Extract dword offset within first cache line. 1.162 + beq(CCR0, fast); // Already 128byte aligned. 1.163 + 1.164 + subfic(tmp, tmp, cl_dwords); 1.165 + mtctr(tmp); // Set ctr to hit 128byte boundary (0<ctr<cl_dwords). 1.166 + subf(cnt_dwords, tmp, cnt_dwords); // rest. 1.167 + li(tmp, 0); 1.168 +//10: 1.169 + bind(startloop); // Clear at the beginning to reach 128byte boundary. 1.170 + std(tmp, 0, base_ptr); // Clear 8byte aligned block. 1.171 + addi(base_ptr, base_ptr, 8); 1.172 + bdnz(startloop); 1.173 +//13: 1.174 + bind(fast); // Clear 128byte blocks. 1.175 + srdi(tmp, cnt_dwords, cl_dw_addr_bits); // Loop count for 128byte loop (>0). 1.176 + andi(cnt_dwords, cnt_dwords, cl_dwords-1); // Rest in dwords. 1.177 + mtctr(tmp); // Load counter. 1.178 +//16: 1.179 + bind(fastloop); 1.180 + dcbz(base_ptr); // Clear 128byte aligned block. 1.181 + addi(base_ptr, base_ptr, cl_size); 1.182 + bdnz(fastloop); 1.183 + if (InsertEndGroupPPC64) { endgroup(); } else { nop(); } 1.184 +//20: 1.185 + bind(small_rest); 1.186 + cmpdi(CCR0, cnt_dwords, 0); // size 0? 1.187 + beq(CCR0, done); // rest == 0 1.188 + li(tmp, 0); 1.189 + mtctr(cnt_dwords); // Load counter. 1.190 +//24: 1.191 + bind(restloop); // Clear rest. 1.192 + std(tmp, 0, base_ptr); // Clear 8byte aligned block. 1.193 + addi(base_ptr, base_ptr, 8); 1.194 + bdnz(restloop); 1.195 +//27: 1.196 + bind(done); 1.197 +} 1.198 + 1.199 /////////////////////////////////////////// String intrinsics //////////////////////////////////////////// 1.200 1.201 // Search for a single jchar in an jchar[]. 1.202 @@ -2926,12 +2995,11 @@ 1.203 if (!VerifyOops) { 1.204 return; 1.205 } 1.206 - // will be preserved. 1.207 + // Will be preserved. 1.208 Register tmp = R11; 1.209 assert(oop != tmp, "precondition"); 1.210 unsigned int nbytes_save = 10*8; // 10 volatile gprs 1.211 - address/* FunctionDescriptor** */fd = 1.212 - StubRoutines::verify_oop_subroutine_entry_address(); 1.213 + address/* FunctionDescriptor** */fd = StubRoutines::verify_oop_subroutine_entry_address(); 1.214 // save tmp 1.215 mr(R0, tmp); 1.216 // kill tmp