1.1 --- a/src/cpu/x86/vm/assembler_x86.cpp Wed Feb 15 16:29:40 2012 -0800 1.2 +++ b/src/cpu/x86/vm/assembler_x86.cpp Wed Feb 15 21:37:49 2012 -0800 1.3 @@ -236,6 +236,16 @@ 1.4 } 1.5 } 1.6 1.7 +// Force generation of a 4 byte immediate value even if it fits into 8bit 1.8 +void Assembler::emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32) { 1.9 + assert(isByte(op1) && isByte(op2), "wrong opcode"); 1.10 + assert((op1 & 0x01) == 1, "should be 32bit operation"); 1.11 + assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 1.12 + emit_byte(op1); 1.13 + emit_byte(op2 | encode(dst)); 1.14 + emit_long(imm32); 1.15 +} 1.16 + 1.17 // immediate-to-memory forms 1.18 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) { 1.19 assert((op1 & 0x01) == 1, "should be 32bit operation"); 1.20 @@ -939,6 +949,7 @@ 1.21 } 1.22 1.23 void Assembler::addr_nop_4() { 1.24 + assert(UseAddressNop, "no CPU support"); 1.25 // 4 bytes: NOP DWORD PTR [EAX+0] 1.26 emit_byte(0x0F); 1.27 emit_byte(0x1F); 1.28 @@ -947,6 +958,7 @@ 1.29 } 1.30 1.31 void Assembler::addr_nop_5() { 1.32 + assert(UseAddressNop, "no CPU support"); 1.33 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset 1.34 emit_byte(0x0F); 1.35 emit_byte(0x1F); 1.36 @@ -956,6 +968,7 @@ 1.37 } 1.38 1.39 void Assembler::addr_nop_7() { 1.40 + assert(UseAddressNop, "no CPU support"); 1.41 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset 1.42 emit_byte(0x0F); 1.43 emit_byte(0x1F); 1.44 @@ -964,6 +977,7 @@ 1.45 } 1.46 1.47 void Assembler::addr_nop_8() { 1.48 + assert(UseAddressNop, "no CPU support"); 1.49 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset 1.50 emit_byte(0x0F); 1.51 emit_byte(0x1F); 1.52 @@ -2769,6 +2783,12 @@ 1.53 emit_arith(0x81, 0xE8, dst, imm32); 1.54 } 1.55 1.56 +// Force generation of a 4 byte immediate value even if it fits into 8bit 1.57 +void Assembler::subl_imm32(Register dst, int32_t imm32) { 1.58 + prefix(dst); 1.59 + emit_arith_imm32(0x81, 0xE8, dst, imm32); 1.60 +} 1.61 + 1.62 void Assembler::subl(Register dst, Address src) { 1.63 InstructionMark im(this); 1.64 prefix(src, dst); 1.65 @@ -4760,6 +4780,12 @@ 1.66 emit_arith(0x81, 0xE8, dst, imm32); 1.67 } 1.68 1.69 +// Force generation of a 4 byte immediate value even if it fits into 8bit 1.70 +void Assembler::subq_imm32(Register dst, int32_t imm32) { 1.71 + (void) prefixq_and_encode(dst->encoding()); 1.72 + emit_arith_imm32(0x81, 0xE8, dst, imm32); 1.73 +} 1.74 + 1.75 void Assembler::subq(Register dst, Address src) { 1.76 InstructionMark im(this); 1.77 prefixq(src, dst); 1.78 @@ -5101,15 +5127,6 @@ 1.79 } 1.80 } 1.81 1.82 -void MacroAssembler::fat_nop() { 1.83 - // A 5 byte nop that is safe for patching (see patch_verified_entry) 1.84 - emit_byte(0x26); // es: 1.85 - emit_byte(0x2e); // cs: 1.86 - emit_byte(0x64); // fs: 1.87 - emit_byte(0x65); // gs: 1.88 - emit_byte(0x90); 1.89 -} 1.90 - 1.91 void MacroAssembler::jC2(Register tmp, Label& L) { 1.92 // set parity bit if FPU flag C2 is set (via rax) 1.93 save_rax(tmp); 1.94 @@ -5704,17 +5721,6 @@ 1.95 /* else */ { subq(dst, value) ; return; } 1.96 } 1.97 1.98 -void MacroAssembler::fat_nop() { 1.99 - // A 5 byte nop that is safe for patching (see patch_verified_entry) 1.100 - // Recommened sequence from 'Software Optimization Guide for the AMD 1.101 - // Hammer Processor' 1.102 - emit_byte(0x66); 1.103 - emit_byte(0x66); 1.104 - emit_byte(0x90); 1.105 - emit_byte(0x66); 1.106 - emit_byte(0x90); 1.107 -} 1.108 - 1.109 void MacroAssembler::incrementq(Register reg, int value) { 1.110 if (value == min_jint) { addq(reg, value); return; } 1.111 if (value < 0) { decrementq(reg, -value); return; } 1.112 @@ -6766,6 +6772,19 @@ 1.113 mov(rbp, rsp); 1.114 } 1.115 1.116 +// A 5 byte nop that is safe for patching (see patch_verified_entry) 1.117 +void MacroAssembler::fat_nop() { 1.118 + if (UseAddressNop) { 1.119 + addr_nop_5(); 1.120 + } else { 1.121 + emit_byte(0x26); // es: 1.122 + emit_byte(0x2e); // cs: 1.123 + emit_byte(0x64); // fs: 1.124 + emit_byte(0x65); // gs: 1.125 + emit_byte(0x90); 1.126 + } 1.127 +} 1.128 + 1.129 void MacroAssembler::fcmp(Register tmp) { 1.130 fcmp(tmp, 1, true, true); 1.131 } 1.132 @@ -7825,6 +7844,11 @@ 1.133 LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32)); 1.134 } 1.135 1.136 +// Force generation of a 4 byte immediate value even if it fits into 8bit 1.137 +void MacroAssembler::subptr_imm32(Register dst, int32_t imm32) { 1.138 + LP64_ONLY(subq_imm32(dst, imm32)) NOT_LP64(subl_imm32(dst, imm32)); 1.139 +} 1.140 + 1.141 void MacroAssembler::subptr(Register dst, Register src) { 1.142 LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); 1.143 } 1.144 @@ -9292,6 +9316,80 @@ 1.145 } 1.146 #endif // _LP64 1.147 1.148 + 1.149 +// C2 compiled method's prolog code. 1.150 +void MacroAssembler::verified_entry(int framesize, bool stack_bang, bool fp_mode_24b) { 1.151 + 1.152 + // WARNING: Initial instruction MUST be 5 bytes or longer so that 1.153 + // NativeJump::patch_verified_entry will be able to patch out the entry 1.154 + // code safely. The push to verify stack depth is ok at 5 bytes, 1.155 + // the frame allocation can be either 3 or 6 bytes. So if we don't do 1.156 + // stack bang then we must use the 6 byte frame allocation even if 1.157 + // we have no frame. :-( 1.158 + 1.159 + assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 1.160 + // Remove word for return addr 1.161 + framesize -= wordSize; 1.162 + 1.163 + // Calls to C2R adapters often do not accept exceptional returns. 1.164 + // We require that their callers must bang for them. But be careful, because 1.165 + // some VM calls (such as call site linkage) can use several kilobytes of 1.166 + // stack. But the stack safety zone should account for that. 1.167 + // See bugs 4446381, 4468289, 4497237. 1.168 + if (stack_bang) { 1.169 + generate_stack_overflow_check(framesize); 1.170 + 1.171 + // We always push rbp, so that on return to interpreter rbp, will be 1.172 + // restored correctly and we can correct the stack. 1.173 + push(rbp); 1.174 + // Remove word for ebp 1.175 + framesize -= wordSize; 1.176 + 1.177 + // Create frame 1.178 + if (framesize) { 1.179 + subptr(rsp, framesize); 1.180 + } 1.181 + } else { 1.182 + // Create frame (force generation of a 4 byte immediate value) 1.183 + subptr_imm32(rsp, framesize); 1.184 + 1.185 + // Save RBP register now. 1.186 + framesize -= wordSize; 1.187 + movptr(Address(rsp, framesize), rbp); 1.188 + } 1.189 + 1.190 + if (VerifyStackAtCalls) { // Majik cookie to verify stack depth 1.191 + framesize -= wordSize; 1.192 + movptr(Address(rsp, framesize), (int32_t)0xbadb100d); 1.193 + } 1.194 + 1.195 +#ifndef _LP64 1.196 + // If method sets FPU control word do it now 1.197 + if (fp_mode_24b) { 1.198 + fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 1.199 + } 1.200 + if (UseSSE >= 2 && VerifyFPU) { 1.201 + verify_FPU(0, "FPU stack must be clean on entry"); 1.202 + } 1.203 +#endif 1.204 + 1.205 +#ifdef ASSERT 1.206 + if (VerifyStackAtCalls) { 1.207 + Label L; 1.208 + push(rax); 1.209 + mov(rax, rsp); 1.210 + andptr(rax, StackAlignmentInBytes-1); 1.211 + cmpptr(rax, StackAlignmentInBytes-wordSize); 1.212 + pop(rax); 1.213 + jcc(Assembler::equal, L); 1.214 + stop("Stack is not properly aligned!"); 1.215 + bind(L); 1.216 + } 1.217 +#endif 1.218 + 1.219 +} 1.220 + 1.221 + 1.222 // IndexOf for constant substrings with size >= 8 chars 1.223 // which don't need to be loaded through stack. 1.224 void MacroAssembler::string_indexofC8(Register str1, Register str2,