1.1 --- a/src/cpu/x86/vm/sharedRuntime_x86_32.cpp Mon Sep 17 17:02:10 2012 -0700 1.2 +++ b/src/cpu/x86/vm/sharedRuntime_x86_32.cpp Mon Sep 17 19:39:07 2012 -0700 1.3 @@ -46,11 +46,11 @@ 1.4 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size; 1.5 1.6 class RegisterSaver { 1.7 - enum { FPU_regs_live = 8 /*for the FPU stack*/+8/*eight more for XMM registers*/ }; 1.8 // Capture info about frame layout 1.9 +#define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off 1.10 enum layout { 1.11 fpu_state_off = 0, 1.12 - fpu_state_end = fpu_state_off+FPUStateSizeInWords-1, 1.13 + fpu_state_end = fpu_state_off+FPUStateSizeInWords, 1.14 st0_off, st0H_off, 1.15 st1_off, st1H_off, 1.16 st2_off, st2H_off, 1.17 @@ -59,16 +59,16 @@ 1.18 st5_off, st5H_off, 1.19 st6_off, st6H_off, 1.20 st7_off, st7H_off, 1.21 - 1.22 - xmm0_off, xmm0H_off, 1.23 - xmm1_off, xmm1H_off, 1.24 - xmm2_off, xmm2H_off, 1.25 - xmm3_off, xmm3H_off, 1.26 - xmm4_off, xmm4H_off, 1.27 - xmm5_off, xmm5H_off, 1.28 - xmm6_off, xmm6H_off, 1.29 - xmm7_off, xmm7H_off, 1.30 - flags_off, 1.31 + xmm_off, 1.32 + DEF_XMM_OFFS(0), 1.33 + DEF_XMM_OFFS(1), 1.34 + DEF_XMM_OFFS(2), 1.35 + DEF_XMM_OFFS(3), 1.36 + DEF_XMM_OFFS(4), 1.37 + DEF_XMM_OFFS(5), 1.38 + DEF_XMM_OFFS(6), 1.39 + DEF_XMM_OFFS(7), 1.40 + flags_off = xmm7_off + 16/BytesPerInt + 1, // 16-byte stack alignment fill word 1.41 rdi_off, 1.42 rsi_off, 1.43 ignore_off, // extra copy of rbp, 1.44 @@ -83,13 +83,13 @@ 1.45 rbp_off, 1.46 return_off, // slot for return address 1.47 reg_save_size }; 1.48 - 1.49 + enum { FPU_regs_live = flags_off - fpu_state_end }; 1.50 1.51 public: 1.52 1.53 static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, 1.54 - int* total_frame_words, bool verify_fpu = true); 1.55 - static void restore_live_registers(MacroAssembler* masm); 1.56 + int* total_frame_words, bool verify_fpu = true, bool save_vectors = false); 1.57 + static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false); 1.58 1.59 static int rax_offset() { return rax_off; } 1.60 static int rbx_offset() { return rbx_off; } 1.61 @@ -113,9 +113,20 @@ 1.62 }; 1.63 1.64 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, 1.65 - int* total_frame_words, bool verify_fpu) { 1.66 - 1.67 - int frame_size_in_bytes = (reg_save_size + additional_frame_words) * wordSize; 1.68 + int* total_frame_words, bool verify_fpu, bool save_vectors) { 1.69 + int vect_words = 0; 1.70 +#ifdef COMPILER2 1.71 + if (save_vectors) { 1.72 + assert(UseAVX > 0, "256bit vectors are supported only with AVX"); 1.73 + assert(MaxVectorSize == 32, "only 256bit vectors are supported now"); 1.74 + // Save upper half of YMM registes 1.75 + vect_words = 8 * 16 / wordSize; 1.76 + additional_frame_words += vect_words; 1.77 + } 1.78 +#else 1.79 + assert(!save_vectors, "vectors are generated only by C2"); 1.80 +#endif 1.81 + int frame_size_in_bytes = (reg_save_size + additional_frame_words) * wordSize; 1.82 int frame_words = frame_size_in_bytes / wordSize; 1.83 *total_frame_words = frame_words; 1.84 1.85 @@ -129,7 +140,7 @@ 1.86 __ enter(); 1.87 __ pusha(); 1.88 __ pushf(); 1.89 - __ subptr(rsp,FPU_regs_live*sizeof(jdouble)); // Push FPU registers space 1.90 + __ subptr(rsp,FPU_regs_live*wordSize); // Push FPU registers space 1.91 __ push_FPU_state(); // Save FPU state & init 1.92 1.93 if (verify_fpu) { 1.94 @@ -183,14 +194,28 @@ 1.95 __ movflt(Address(rsp,xmm6_off*wordSize),xmm6); 1.96 __ movflt(Address(rsp,xmm7_off*wordSize),xmm7); 1.97 } else if( UseSSE >= 2 ) { 1.98 - __ movdbl(Address(rsp,xmm0_off*wordSize),xmm0); 1.99 - __ movdbl(Address(rsp,xmm1_off*wordSize),xmm1); 1.100 - __ movdbl(Address(rsp,xmm2_off*wordSize),xmm2); 1.101 - __ movdbl(Address(rsp,xmm3_off*wordSize),xmm3); 1.102 - __ movdbl(Address(rsp,xmm4_off*wordSize),xmm4); 1.103 - __ movdbl(Address(rsp,xmm5_off*wordSize),xmm5); 1.104 - __ movdbl(Address(rsp,xmm6_off*wordSize),xmm6); 1.105 - __ movdbl(Address(rsp,xmm7_off*wordSize),xmm7); 1.106 + // Save whole 128bit (16 bytes) XMM regiters 1.107 + __ movdqu(Address(rsp,xmm0_off*wordSize),xmm0); 1.108 + __ movdqu(Address(rsp,xmm1_off*wordSize),xmm1); 1.109 + __ movdqu(Address(rsp,xmm2_off*wordSize),xmm2); 1.110 + __ movdqu(Address(rsp,xmm3_off*wordSize),xmm3); 1.111 + __ movdqu(Address(rsp,xmm4_off*wordSize),xmm4); 1.112 + __ movdqu(Address(rsp,xmm5_off*wordSize),xmm5); 1.113 + __ movdqu(Address(rsp,xmm6_off*wordSize),xmm6); 1.114 + __ movdqu(Address(rsp,xmm7_off*wordSize),xmm7); 1.115 + } 1.116 + 1.117 + if (vect_words > 0) { 1.118 + assert(vect_words*wordSize == 128, ""); 1.119 + __ subptr(rsp, 128); // Save upper half of YMM registes 1.120 + __ vextractf128h(Address(rsp, 0),xmm0); 1.121 + __ vextractf128h(Address(rsp, 16),xmm1); 1.122 + __ vextractf128h(Address(rsp, 32),xmm2); 1.123 + __ vextractf128h(Address(rsp, 48),xmm3); 1.124 + __ vextractf128h(Address(rsp, 64),xmm4); 1.125 + __ vextractf128h(Address(rsp, 80),xmm5); 1.126 + __ vextractf128h(Address(rsp, 96),xmm6); 1.127 + __ vextractf128h(Address(rsp,112),xmm7); 1.128 } 1.129 1.130 // Set an oopmap for the call site. This oopmap will map all 1.131 @@ -253,10 +278,20 @@ 1.132 1.133 } 1.134 1.135 -void RegisterSaver::restore_live_registers(MacroAssembler* masm) { 1.136 - 1.137 +void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) { 1.138 // Recover XMM & FPU state 1.139 - if( UseSSE == 1 ) { 1.140 + int additional_frame_bytes = 0; 1.141 +#ifdef COMPILER2 1.142 + if (restore_vectors) { 1.143 + assert(UseAVX > 0, "256bit vectors are supported only with AVX"); 1.144 + assert(MaxVectorSize == 32, "only 256bit vectors are supported now"); 1.145 + additional_frame_bytes = 128; 1.146 + } 1.147 +#else 1.148 + assert(!restore_vectors, "vectors are generated only by C2"); 1.149 +#endif 1.150 + if (UseSSE == 1) { 1.151 + assert(additional_frame_bytes == 0, ""); 1.152 __ movflt(xmm0,Address(rsp,xmm0_off*wordSize)); 1.153 __ movflt(xmm1,Address(rsp,xmm1_off*wordSize)); 1.154 __ movflt(xmm2,Address(rsp,xmm2_off*wordSize)); 1.155 @@ -265,18 +300,33 @@ 1.156 __ movflt(xmm5,Address(rsp,xmm5_off*wordSize)); 1.157 __ movflt(xmm6,Address(rsp,xmm6_off*wordSize)); 1.158 __ movflt(xmm7,Address(rsp,xmm7_off*wordSize)); 1.159 - } else if( UseSSE >= 2 ) { 1.160 - __ movdbl(xmm0,Address(rsp,xmm0_off*wordSize)); 1.161 - __ movdbl(xmm1,Address(rsp,xmm1_off*wordSize)); 1.162 - __ movdbl(xmm2,Address(rsp,xmm2_off*wordSize)); 1.163 - __ movdbl(xmm3,Address(rsp,xmm3_off*wordSize)); 1.164 - __ movdbl(xmm4,Address(rsp,xmm4_off*wordSize)); 1.165 - __ movdbl(xmm5,Address(rsp,xmm5_off*wordSize)); 1.166 - __ movdbl(xmm6,Address(rsp,xmm6_off*wordSize)); 1.167 - __ movdbl(xmm7,Address(rsp,xmm7_off*wordSize)); 1.168 + } else if (UseSSE >= 2) { 1.169 +#define STACK_ADDRESS(x) Address(rsp,(x)*wordSize + additional_frame_bytes) 1.170 + __ movdqu(xmm0,STACK_ADDRESS(xmm0_off)); 1.171 + __ movdqu(xmm1,STACK_ADDRESS(xmm1_off)); 1.172 + __ movdqu(xmm2,STACK_ADDRESS(xmm2_off)); 1.173 + __ movdqu(xmm3,STACK_ADDRESS(xmm3_off)); 1.174 + __ movdqu(xmm4,STACK_ADDRESS(xmm4_off)); 1.175 + __ movdqu(xmm5,STACK_ADDRESS(xmm5_off)); 1.176 + __ movdqu(xmm6,STACK_ADDRESS(xmm6_off)); 1.177 + __ movdqu(xmm7,STACK_ADDRESS(xmm7_off)); 1.178 +#undef STACK_ADDRESS 1.179 + } 1.180 + if (restore_vectors) { 1.181 + // Restore upper half of YMM registes. 1.182 + assert(additional_frame_bytes == 128, ""); 1.183 + __ vinsertf128h(xmm0, Address(rsp, 0)); 1.184 + __ vinsertf128h(xmm1, Address(rsp, 16)); 1.185 + __ vinsertf128h(xmm2, Address(rsp, 32)); 1.186 + __ vinsertf128h(xmm3, Address(rsp, 48)); 1.187 + __ vinsertf128h(xmm4, Address(rsp, 64)); 1.188 + __ vinsertf128h(xmm5, Address(rsp, 80)); 1.189 + __ vinsertf128h(xmm6, Address(rsp, 96)); 1.190 + __ vinsertf128h(xmm7, Address(rsp,112)); 1.191 + __ addptr(rsp, additional_frame_bytes); 1.192 } 1.193 __ pop_FPU_state(); 1.194 - __ addptr(rsp, FPU_regs_live*sizeof(jdouble)); // Pop FPU registers 1.195 + __ addptr(rsp, FPU_regs_live*wordSize); // Pop FPU registers 1.196 1.197 __ popf(); 1.198 __ popa(); 1.199 @@ -308,6 +358,13 @@ 1.200 __ addptr(rsp, return_off * wordSize); 1.201 } 1.202 1.203 +// Is vector's size (in bytes) bigger than a size saved by default? 1.204 +// 16 bytes XMM registers are saved by default using SSE2 movdqu instructions. 1.205 +// Note, MaxVectorSize == 0 with UseSSE < 2 and vectors are not generated. 1.206 +bool SharedRuntime::is_wide_vector(int size) { 1.207 + return size > 16; 1.208 +} 1.209 + 1.210 // The java_calling_convention describes stack locations as ideal slots on 1.211 // a frame with no abi restrictions. Since we must observe abi restrictions 1.212 // (like the placement of the register window) the slots must be biased by 1.213 @@ -2732,7 +2789,6 @@ 1.214 return 0; 1.215 } 1.216 1.217 - 1.218 //------------------------------generate_deopt_blob---------------------------- 1.219 void SharedRuntime::generate_deopt_blob() { 1.220 // allocate space for the code 1.221 @@ -3270,7 +3326,7 @@ 1.222 // setup oopmap, and calls safepoint code to stop the compiled code for 1.223 // a safepoint. 1.224 // 1.225 -SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, bool cause_return) { 1.226 +SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) { 1.227 1.228 // Account for thread arg in our frame 1.229 const int additional_words = 1; 1.230 @@ -3290,17 +3346,18 @@ 1.231 const Register java_thread = rdi; // callee-saved for VC++ 1.232 address start = __ pc(); 1.233 address call_pc = NULL; 1.234 - 1.235 + bool cause_return = (poll_type == POLL_AT_RETURN); 1.236 + bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP); 1.237 // If cause_return is true we are at a poll_return and there is 1.238 // the return address on the stack to the caller on the nmethod 1.239 // that is safepoint. We can leave this return on the stack and 1.240 // effectively complete the return and safepoint in the caller. 1.241 // Otherwise we push space for a return address that the safepoint 1.242 // handler will install later to make the stack walking sensible. 1.243 - if( !cause_return ) 1.244 - __ push(rbx); // Make room for return address (or push it again) 1.245 - 1.246 - map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, false); 1.247 + if (!cause_return) 1.248 + __ push(rbx); // Make room for return address (or push it again) 1.249 + 1.250 + map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, false, save_vectors); 1.251 1.252 // The following is basically a call_VM. However, we need the precise 1.253 // address of the call in order to generate an oopmap. Hence, we do all the 1.254 @@ -3312,7 +3369,7 @@ 1.255 __ set_last_Java_frame(java_thread, noreg, noreg, NULL); 1.256 1.257 // if this was not a poll_return then we need to correct the return address now. 1.258 - if( !cause_return ) { 1.259 + if (!cause_return) { 1.260 __ movptr(rax, Address(java_thread, JavaThread::saved_exception_pc_offset())); 1.261 __ movptr(Address(rbp, wordSize), rax); 1.262 } 1.263 @@ -3340,15 +3397,14 @@ 1.264 __ jcc(Assembler::equal, noException); 1.265 1.266 // Exception pending 1.267 - 1.268 - RegisterSaver::restore_live_registers(masm); 1.269 + RegisterSaver::restore_live_registers(masm, save_vectors); 1.270 1.271 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 1.272 1.273 __ bind(noException); 1.274 1.275 // Normal exit, register restoring and exit 1.276 - RegisterSaver::restore_live_registers(masm); 1.277 + RegisterSaver::restore_live_registers(masm, save_vectors); 1.278 1.279 __ ret(0); 1.280