1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/cpu/x86/vm/nativeInst_x86.cpp Sat Dec 01 00:00:00 2007 +0000 1.3 @@ -0,0 +1,474 @@ 1.4 +/* 1.5 + * Copyright 1997-2007 Sun Microsystems, Inc. All Rights Reserved. 1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.7 + * 1.8 + * This code is free software; you can redistribute it and/or modify it 1.9 + * under the terms of the GNU General Public License version 2 only, as 1.10 + * published by the Free Software Foundation. 1.11 + * 1.12 + * This code is distributed in the hope that it will be useful, but WITHOUT 1.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1.14 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1.15 + * version 2 for more details (a copy is included in the LICENSE file that 1.16 + * accompanied this code). 1.17 + * 1.18 + * You should have received a copy of the GNU General Public License version 1.19 + * 2 along with this work; if not, write to the Free Software Foundation, 1.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1.21 + * 1.22 + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 1.23 + * CA 95054 USA or visit www.sun.com if you need additional information or 1.24 + * have any questions. 1.25 + * 1.26 + */ 1.27 + 1.28 +# include "incls/_precompiled.incl" 1.29 +# include "incls/_nativeInst_x86.cpp.incl" 1.30 + 1.31 +void NativeInstruction::wrote(int offset) { 1.32 + ICache::invalidate_word(addr_at(offset)); 1.33 +} 1.34 + 1.35 + 1.36 +void NativeCall::verify() { 1.37 + // Make sure code pattern is actually a call imm32 instruction. 1.38 + int inst = ubyte_at(0); 1.39 + if (inst != instruction_code) { 1.40 + tty->print_cr("Addr: " INTPTR_FORMAT " Code: 0x%x", instruction_address(), 1.41 + inst); 1.42 + fatal("not a call disp32"); 1.43 + } 1.44 +} 1.45 + 1.46 +address NativeCall::destination() const { 1.47 + // Getting the destination of a call isn't safe because that call can 1.48 + // be getting patched while you're calling this. There's only special 1.49 + // places where this can be called but not automatically verifiable by 1.50 + // checking which locks are held. The solution is true atomic patching 1.51 + // on x86, nyi. 1.52 + return return_address() + displacement(); 1.53 +} 1.54 + 1.55 +void NativeCall::print() { 1.56 + tty->print_cr(PTR_FORMAT ": call " PTR_FORMAT, 1.57 + instruction_address(), destination()); 1.58 +} 1.59 + 1.60 +// Inserts a native call instruction at a given pc 1.61 +void NativeCall::insert(address code_pos, address entry) { 1.62 + intptr_t disp = (intptr_t)entry - ((intptr_t)code_pos + 1 + 4); 1.63 +#ifdef AMD64 1.64 + guarantee(disp == (intptr_t)(jint)disp, "must be 32-bit offset"); 1.65 +#endif // AMD64 1.66 + *code_pos = instruction_code; 1.67 + *((int32_t *)(code_pos+1)) = (int32_t) disp; 1.68 + ICache::invalidate_range(code_pos, instruction_size); 1.69 +} 1.70 + 1.71 +// MT-safe patching of a call instruction. 1.72 +// First patches first word of instruction to two jmp's that jmps to them 1.73 +// selfs (spinlock). Then patches the last byte, and then atomicly replaces 1.74 +// the jmp's with the first 4 byte of the new instruction. 1.75 +void NativeCall::replace_mt_safe(address instr_addr, address code_buffer) { 1.76 + assert(Patching_lock->is_locked() || 1.77 + SafepointSynchronize::is_at_safepoint(), "concurrent code patching"); 1.78 + assert (instr_addr != NULL, "illegal address for code patching"); 1.79 + 1.80 + NativeCall* n_call = nativeCall_at (instr_addr); // checking that it is a call 1.81 + if (os::is_MP()) { 1.82 + guarantee((intptr_t)instr_addr % BytesPerWord == 0, "must be aligned"); 1.83 + } 1.84 + 1.85 + // First patch dummy jmp in place 1.86 + unsigned char patch[4]; 1.87 + assert(sizeof(patch)==sizeof(jint), "sanity check"); 1.88 + patch[0] = 0xEB; // jmp rel8 1.89 + patch[1] = 0xFE; // jmp to self 1.90 + patch[2] = 0xEB; 1.91 + patch[3] = 0xFE; 1.92 + 1.93 + // First patch dummy jmp in place 1.94 + *(jint*)instr_addr = *(jint *)patch; 1.95 + 1.96 + // Invalidate. Opteron requires a flush after every write. 1.97 + n_call->wrote(0); 1.98 + 1.99 + // Patch 4th byte 1.100 + instr_addr[4] = code_buffer[4]; 1.101 + 1.102 + n_call->wrote(4); 1.103 + 1.104 + // Patch bytes 0-3 1.105 + *(jint*)instr_addr = *(jint *)code_buffer; 1.106 + 1.107 + n_call->wrote(0); 1.108 + 1.109 +#ifdef ASSERT 1.110 + // verify patching 1.111 + for ( int i = 0; i < instruction_size; i++) { 1.112 + address ptr = (address)((intptr_t)code_buffer + i); 1.113 + int a_byte = (*ptr) & 0xFF; 1.114 + assert(*((address)((intptr_t)instr_addr + i)) == a_byte, "mt safe patching failed"); 1.115 + } 1.116 +#endif 1.117 + 1.118 +} 1.119 + 1.120 + 1.121 +// Similar to replace_mt_safe, but just changes the destination. The 1.122 +// important thing is that free-running threads are able to execute this 1.123 +// call instruction at all times. If the displacement field is aligned 1.124 +// we can simply rely on atomicity of 32-bit writes to make sure other threads 1.125 +// will see no intermediate states. Otherwise, the first two bytes of the 1.126 +// call are guaranteed to be aligned, and can be atomically patched to a 1.127 +// self-loop to guard the instruction while we change the other bytes. 1.128 + 1.129 +// We cannot rely on locks here, since the free-running threads must run at 1.130 +// full speed. 1.131 +// 1.132 +// Used in the runtime linkage of calls; see class CompiledIC. 1.133 +// (Cf. 4506997 and 4479829, where threads witnessed garbage displacements.) 1.134 +void NativeCall::set_destination_mt_safe(address dest) { 1.135 + debug_only(verify()); 1.136 + // Make sure patching code is locked. No two threads can patch at the same 1.137 + // time but one may be executing this code. 1.138 + assert(Patching_lock->is_locked() || 1.139 + SafepointSynchronize::is_at_safepoint(), "concurrent code patching"); 1.140 + // Both C1 and C2 should now be generating code which aligns the patched address 1.141 + // to be within a single cache line except that C1 does not do the alignment on 1.142 + // uniprocessor systems. 1.143 + bool is_aligned = ((uintptr_t)displacement_address() + 0) / cache_line_size == 1.144 + ((uintptr_t)displacement_address() + 3) / cache_line_size; 1.145 + 1.146 + guarantee(!os::is_MP() || is_aligned, "destination must be aligned"); 1.147 + 1.148 + if (is_aligned) { 1.149 + // Simple case: The destination lies within a single cache line. 1.150 + set_destination(dest); 1.151 + } else if ((uintptr_t)instruction_address() / cache_line_size == 1.152 + ((uintptr_t)instruction_address()+1) / cache_line_size) { 1.153 + // Tricky case: The instruction prefix lies within a single cache line. 1.154 + intptr_t disp = dest - return_address(); 1.155 +#ifdef AMD64 1.156 + guarantee(disp == (intptr_t)(jint)disp, "must be 32-bit offset"); 1.157 +#endif // AMD64 1.158 + 1.159 + int call_opcode = instruction_address()[0]; 1.160 + 1.161 + // First patch dummy jump in place: 1.162 + { 1.163 + u_char patch_jump[2]; 1.164 + patch_jump[0] = 0xEB; // jmp rel8 1.165 + patch_jump[1] = 0xFE; // jmp to self 1.166 + 1.167 + assert(sizeof(patch_jump)==sizeof(short), "sanity check"); 1.168 + *(short*)instruction_address() = *(short*)patch_jump; 1.169 + } 1.170 + // Invalidate. Opteron requires a flush after every write. 1.171 + wrote(0); 1.172 + 1.173 + // (Note: We assume any reader which has already started to read 1.174 + // the unpatched call will completely read the whole unpatched call 1.175 + // without seeing the next writes we are about to make.) 1.176 + 1.177 + // Next, patch the last three bytes: 1.178 + u_char patch_disp[5]; 1.179 + patch_disp[0] = call_opcode; 1.180 + *(int32_t*)&patch_disp[1] = (int32_t)disp; 1.181 + assert(sizeof(patch_disp)==instruction_size, "sanity check"); 1.182 + for (int i = sizeof(short); i < instruction_size; i++) 1.183 + instruction_address()[i] = patch_disp[i]; 1.184 + 1.185 + // Invalidate. Opteron requires a flush after every write. 1.186 + wrote(sizeof(short)); 1.187 + 1.188 + // (Note: We assume that any reader which reads the opcode we are 1.189 + // about to repatch will also read the writes we just made.) 1.190 + 1.191 + // Finally, overwrite the jump: 1.192 + *(short*)instruction_address() = *(short*)patch_disp; 1.193 + // Invalidate. Opteron requires a flush after every write. 1.194 + wrote(0); 1.195 + 1.196 + debug_only(verify()); 1.197 + guarantee(destination() == dest, "patch succeeded"); 1.198 + } else { 1.199 + // Impossible: One or the other must be atomically writable. 1.200 + ShouldNotReachHere(); 1.201 + } 1.202 +} 1.203 + 1.204 + 1.205 +void NativeMovConstReg::verify() { 1.206 +#ifdef AMD64 1.207 + // make sure code pattern is actually a mov reg64, imm64 instruction 1.208 + if ((ubyte_at(0) != Assembler::REX_W && ubyte_at(0) != Assembler::REX_WB) || 1.209 + (ubyte_at(1) & (0xff ^ register_mask)) != 0xB8) { 1.210 + print(); 1.211 + fatal("not a REX.W[B] mov reg64, imm64"); 1.212 + } 1.213 +#else 1.214 + // make sure code pattern is actually a mov reg, imm32 instruction 1.215 + u_char test_byte = *(u_char*)instruction_address(); 1.216 + u_char test_byte_2 = test_byte & ( 0xff ^ register_mask); 1.217 + if (test_byte_2 != instruction_code) fatal("not a mov reg, imm32"); 1.218 +#endif // AMD64 1.219 +} 1.220 + 1.221 + 1.222 +void NativeMovConstReg::print() { 1.223 + tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT, 1.224 + instruction_address(), data()); 1.225 +} 1.226 + 1.227 +//------------------------------------------------------------------- 1.228 + 1.229 +#ifndef AMD64 1.230 + 1.231 +void NativeMovRegMem::copy_instruction_to(address new_instruction_address) { 1.232 + int inst_size = instruction_size; 1.233 + 1.234 + // See if there's an instruction size prefix override. 1.235 + if ( *(address(this)) == instruction_operandsize_prefix && 1.236 + *(address(this)+1) != instruction_code_xmm_code ) { // Not SSE instr 1.237 + inst_size += 1; 1.238 + } 1.239 + if ( *(address(this)) == instruction_extended_prefix ) inst_size += 1; 1.240 + 1.241 + for (int i = 0; i < instruction_size; i++) { 1.242 + *(new_instruction_address + i) = *(address(this) + i); 1.243 + } 1.244 +} 1.245 + 1.246 +void NativeMovRegMem::verify() { 1.247 + // make sure code pattern is actually a mov [reg+offset], reg instruction 1.248 + u_char test_byte = *(u_char*)instruction_address(); 1.249 + if ( ! ( (test_byte == instruction_code_reg2memb) 1.250 + || (test_byte == instruction_code_mem2regb) 1.251 + || (test_byte == instruction_code_mem2regl) 1.252 + || (test_byte == instruction_code_reg2meml) 1.253 + || (test_byte == instruction_code_mem2reg_movzxb ) 1.254 + || (test_byte == instruction_code_mem2reg_movzxw ) 1.255 + || (test_byte == instruction_code_mem2reg_movsxb ) 1.256 + || (test_byte == instruction_code_mem2reg_movsxw ) 1.257 + || (test_byte == instruction_code_float_s) 1.258 + || (test_byte == instruction_code_float_d) 1.259 + || (test_byte == instruction_code_long_volatile) ) ) 1.260 + { 1.261 + u_char byte1 = ((u_char*)instruction_address())[1]; 1.262 + u_char byte2 = ((u_char*)instruction_address())[2]; 1.263 + if ((test_byte != instruction_code_xmm_ss_prefix && 1.264 + test_byte != instruction_code_xmm_sd_prefix && 1.265 + test_byte != instruction_operandsize_prefix) || 1.266 + byte1 != instruction_code_xmm_code || 1.267 + (byte2 != instruction_code_xmm_load && 1.268 + byte2 != instruction_code_xmm_lpd && 1.269 + byte2 != instruction_code_xmm_store)) { 1.270 + fatal ("not a mov [reg+offs], reg instruction"); 1.271 + } 1.272 + } 1.273 +} 1.274 + 1.275 + 1.276 +void NativeMovRegMem::print() { 1.277 + tty->print_cr("0x%x: mov reg, [reg + %x]", instruction_address(), offset()); 1.278 +} 1.279 + 1.280 +//------------------------------------------------------------------- 1.281 + 1.282 +void NativeLoadAddress::verify() { 1.283 + // make sure code pattern is actually a mov [reg+offset], reg instruction 1.284 + u_char test_byte = *(u_char*)instruction_address(); 1.285 + if ( ! (test_byte == instruction_code) ) { 1.286 + fatal ("not a lea reg, [reg+offs] instruction"); 1.287 + } 1.288 +} 1.289 + 1.290 + 1.291 +void NativeLoadAddress::print() { 1.292 + tty->print_cr("0x%x: lea [reg + %x], reg", instruction_address(), offset()); 1.293 +} 1.294 + 1.295 +#endif // !AMD64 1.296 + 1.297 +//-------------------------------------------------------------------------------- 1.298 + 1.299 +void NativeJump::verify() { 1.300 + if (*(u_char*)instruction_address() != instruction_code) { 1.301 + fatal("not a jump instruction"); 1.302 + } 1.303 +} 1.304 + 1.305 + 1.306 +void NativeJump::insert(address code_pos, address entry) { 1.307 + intptr_t disp = (intptr_t)entry - ((intptr_t)code_pos + 1 + 4); 1.308 +#ifdef AMD64 1.309 + guarantee(disp == (intptr_t)(int32_t)disp, "must be 32-bit offset"); 1.310 +#endif // AMD64 1.311 + 1.312 + *code_pos = instruction_code; 1.313 + *((int32_t*)(code_pos + 1)) = (int32_t)disp; 1.314 + 1.315 + ICache::invalidate_range(code_pos, instruction_size); 1.316 +} 1.317 + 1.318 +void NativeJump::check_verified_entry_alignment(address entry, address verified_entry) { 1.319 + // Patching to not_entrant can happen while activations of the method are 1.320 + // in use. The patching in that instance must happen only when certain 1.321 + // alignment restrictions are true. These guarantees check those 1.322 + // conditions. 1.323 +#ifdef AMD64 1.324 + const int linesize = 64; 1.325 +#else 1.326 + const int linesize = 32; 1.327 +#endif // AMD64 1.328 + 1.329 + // Must be wordSize aligned 1.330 + guarantee(((uintptr_t) verified_entry & (wordSize -1)) == 0, 1.331 + "illegal address for code patching 2"); 1.332 + // First 5 bytes must be within the same cache line - 4827828 1.333 + guarantee((uintptr_t) verified_entry / linesize == 1.334 + ((uintptr_t) verified_entry + 4) / linesize, 1.335 + "illegal address for code patching 3"); 1.336 +} 1.337 + 1.338 + 1.339 +// MT safe inserting of a jump over an unknown instruction sequence (used by nmethod::makeZombie) 1.340 +// The problem: jmp <dest> is a 5-byte instruction. Atomical write can be only with 4 bytes. 1.341 +// First patches the first word atomically to be a jump to itself. 1.342 +// Then patches the last byte and then atomically patches the first word (4-bytes), 1.343 +// thus inserting the desired jump 1.344 +// This code is mt-safe with the following conditions: entry point is 4 byte aligned, 1.345 +// entry point is in same cache line as unverified entry point, and the instruction being 1.346 +// patched is >= 5 byte (size of patch). 1.347 +// 1.348 +// In C2 the 5+ byte sized instruction is enforced by code in MachPrologNode::emit. 1.349 +// In C1 the restriction is enforced by CodeEmitter::method_entry 1.350 +// 1.351 +void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) { 1.352 + // complete jump instruction (to be inserted) is in code_buffer; 1.353 + unsigned char code_buffer[5]; 1.354 + code_buffer[0] = instruction_code; 1.355 + intptr_t disp = (intptr_t)dest - ((intptr_t)verified_entry + 1 + 4); 1.356 +#ifdef AMD64 1.357 + guarantee(disp == (intptr_t)(int32_t)disp, "must be 32-bit offset"); 1.358 +#endif // AMD64 1.359 + *(int32_t*)(code_buffer + 1) = (int32_t)disp; 1.360 + 1.361 + check_verified_entry_alignment(entry, verified_entry); 1.362 + 1.363 + // Can't call nativeJump_at() because it's asserts jump exists 1.364 + NativeJump* n_jump = (NativeJump*) verified_entry; 1.365 + 1.366 + //First patch dummy jmp in place 1.367 + 1.368 + unsigned char patch[4]; 1.369 + assert(sizeof(patch)==sizeof(int32_t), "sanity check"); 1.370 + patch[0] = 0xEB; // jmp rel8 1.371 + patch[1] = 0xFE; // jmp to self 1.372 + patch[2] = 0xEB; 1.373 + patch[3] = 0xFE; 1.374 + 1.375 + // First patch dummy jmp in place 1.376 + *(int32_t*)verified_entry = *(int32_t *)patch; 1.377 + 1.378 + n_jump->wrote(0); 1.379 + 1.380 + // Patch 5th byte (from jump instruction) 1.381 + verified_entry[4] = code_buffer[4]; 1.382 + 1.383 + n_jump->wrote(4); 1.384 + 1.385 + // Patch bytes 0-3 (from jump instruction) 1.386 + *(int32_t*)verified_entry = *(int32_t *)code_buffer; 1.387 + // Invalidate. Opteron requires a flush after every write. 1.388 + n_jump->wrote(0); 1.389 + 1.390 +} 1.391 + 1.392 +void NativePopReg::insert(address code_pos, Register reg) { 1.393 + assert(reg->encoding() < 8, "no space for REX"); 1.394 + assert(NativePopReg::instruction_size == sizeof(char), "right address unit for update"); 1.395 + *code_pos = (u_char)(instruction_code | reg->encoding()); 1.396 + ICache::invalidate_range(code_pos, instruction_size); 1.397 +} 1.398 + 1.399 + 1.400 +void NativeIllegalInstruction::insert(address code_pos) { 1.401 + assert(NativeIllegalInstruction::instruction_size == sizeof(short), "right address unit for update"); 1.402 + *(short *)code_pos = instruction_code; 1.403 + ICache::invalidate_range(code_pos, instruction_size); 1.404 +} 1.405 + 1.406 +void NativeGeneralJump::verify() { 1.407 + assert(((NativeInstruction *)this)->is_jump() || 1.408 + ((NativeInstruction *)this)->is_cond_jump(), "not a general jump instruction"); 1.409 +} 1.410 + 1.411 + 1.412 +void NativeGeneralJump::insert_unconditional(address code_pos, address entry) { 1.413 + intptr_t disp = (intptr_t)entry - ((intptr_t)code_pos + 1 + 4); 1.414 +#ifdef AMD64 1.415 + guarantee(disp == (intptr_t)(int32_t)disp, "must be 32-bit offset"); 1.416 +#endif // AMD64 1.417 + 1.418 + *code_pos = unconditional_long_jump; 1.419 + *((int32_t *)(code_pos+1)) = (int32_t) disp; 1.420 + ICache::invalidate_range(code_pos, instruction_size); 1.421 +} 1.422 + 1.423 + 1.424 +// MT-safe patching of a long jump instruction. 1.425 +// First patches first word of instruction to two jmp's that jmps to them 1.426 +// selfs (spinlock). Then patches the last byte, and then atomicly replaces 1.427 +// the jmp's with the first 4 byte of the new instruction. 1.428 +void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) { 1.429 + assert (instr_addr != NULL, "illegal address for code patching (4)"); 1.430 + NativeGeneralJump* n_jump = nativeGeneralJump_at (instr_addr); // checking that it is a jump 1.431 + 1.432 + // Temporary code 1.433 + unsigned char patch[4]; 1.434 + assert(sizeof(patch)==sizeof(int32_t), "sanity check"); 1.435 + patch[0] = 0xEB; // jmp rel8 1.436 + patch[1] = 0xFE; // jmp to self 1.437 + patch[2] = 0xEB; 1.438 + patch[3] = 0xFE; 1.439 + 1.440 + // First patch dummy jmp in place 1.441 + *(int32_t*)instr_addr = *(int32_t *)patch; 1.442 + n_jump->wrote(0); 1.443 + 1.444 + // Patch 4th byte 1.445 + instr_addr[4] = code_buffer[4]; 1.446 + 1.447 + n_jump->wrote(4); 1.448 + 1.449 + // Patch bytes 0-3 1.450 + *(jint*)instr_addr = *(jint *)code_buffer; 1.451 + 1.452 + n_jump->wrote(0); 1.453 + 1.454 +#ifdef ASSERT 1.455 + // verify patching 1.456 + for ( int i = 0; i < instruction_size; i++) { 1.457 + address ptr = (address)((intptr_t)code_buffer + i); 1.458 + int a_byte = (*ptr) & 0xFF; 1.459 + assert(*((address)((intptr_t)instr_addr + i)) == a_byte, "mt safe patching failed"); 1.460 + } 1.461 +#endif 1.462 + 1.463 +} 1.464 + 1.465 + 1.466 + 1.467 +address NativeGeneralJump::jump_destination() const { 1.468 + int op_code = ubyte_at(0); 1.469 + bool is_rel32off = (op_code == 0xE9 || op_code == 0x0F); 1.470 + int offset = (op_code == 0x0F) ? 2 : 1; 1.471 + int length = offset + ((is_rel32off) ? 4 : 1); 1.472 + 1.473 + if (is_rel32off) 1.474 + return addr_at(0) + length + int_at(offset); 1.475 + else 1.476 + return addr_at(0) + length + sbyte_at(offset); 1.477 +}