src/cpu/x86/vm/sharedRuntime_x86_32.cpp

changeset 4103
137868b7aa6f
parent 4101
2cb2f30450c7
child 4251
18fb7da42534
     1.1 --- a/src/cpu/x86/vm/sharedRuntime_x86_32.cpp	Mon Sep 17 17:02:10 2012 -0700
     1.2 +++ b/src/cpu/x86/vm/sharedRuntime_x86_32.cpp	Mon Sep 17 19:39:07 2012 -0700
     1.3 @@ -46,11 +46,11 @@
     1.4  const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
     1.5  
     1.6  class RegisterSaver {
     1.7 -  enum { FPU_regs_live = 8 /*for the FPU stack*/+8/*eight more for XMM registers*/ };
     1.8    // Capture info about frame layout
     1.9 +#define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off
    1.10    enum layout {
    1.11                  fpu_state_off = 0,
    1.12 -                fpu_state_end = fpu_state_off+FPUStateSizeInWords-1,
    1.13 +                fpu_state_end = fpu_state_off+FPUStateSizeInWords,
    1.14                  st0_off, st0H_off,
    1.15                  st1_off, st1H_off,
    1.16                  st2_off, st2H_off,
    1.17 @@ -59,16 +59,16 @@
    1.18                  st5_off, st5H_off,
    1.19                  st6_off, st6H_off,
    1.20                  st7_off, st7H_off,
    1.21 -
    1.22 -                xmm0_off, xmm0H_off,
    1.23 -                xmm1_off, xmm1H_off,
    1.24 -                xmm2_off, xmm2H_off,
    1.25 -                xmm3_off, xmm3H_off,
    1.26 -                xmm4_off, xmm4H_off,
    1.27 -                xmm5_off, xmm5H_off,
    1.28 -                xmm6_off, xmm6H_off,
    1.29 -                xmm7_off, xmm7H_off,
    1.30 -                flags_off,
    1.31 +                xmm_off,
    1.32 +                DEF_XMM_OFFS(0),
    1.33 +                DEF_XMM_OFFS(1),
    1.34 +                DEF_XMM_OFFS(2),
    1.35 +                DEF_XMM_OFFS(3),
    1.36 +                DEF_XMM_OFFS(4),
    1.37 +                DEF_XMM_OFFS(5),
    1.38 +                DEF_XMM_OFFS(6),
    1.39 +                DEF_XMM_OFFS(7),
    1.40 +                flags_off = xmm7_off + 16/BytesPerInt + 1, // 16-byte stack alignment fill word
    1.41                  rdi_off,
    1.42                  rsi_off,
    1.43                  ignore_off,  // extra copy of rbp,
    1.44 @@ -83,13 +83,13 @@
    1.45                  rbp_off,
    1.46                  return_off,      // slot for return address
    1.47                  reg_save_size };
    1.48 -
    1.49 +  enum { FPU_regs_live = flags_off - fpu_state_end };
    1.50  
    1.51    public:
    1.52  
    1.53    static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words,
    1.54 -                                     int* total_frame_words, bool verify_fpu = true);
    1.55 -  static void restore_live_registers(MacroAssembler* masm);
    1.56 +                                     int* total_frame_words, bool verify_fpu = true, bool save_vectors = false);
    1.57 +  static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false);
    1.58  
    1.59    static int rax_offset() { return rax_off; }
    1.60    static int rbx_offset() { return rbx_off; }
    1.61 @@ -113,9 +113,20 @@
    1.62  };
    1.63  
    1.64  OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words,
    1.65 -                                           int* total_frame_words, bool verify_fpu) {
    1.66 -
    1.67 -  int frame_size_in_bytes =  (reg_save_size + additional_frame_words) * wordSize;
    1.68 +                                           int* total_frame_words, bool verify_fpu, bool save_vectors) {
    1.69 +  int vect_words = 0;
    1.70 +#ifdef COMPILER2
    1.71 +  if (save_vectors) {
    1.72 +    assert(UseAVX > 0, "256bit vectors are supported only with AVX");
    1.73 +    assert(MaxVectorSize == 32, "only 256bit vectors are supported now");
    1.74 +    // Save upper half of YMM registes
    1.75 +    vect_words = 8 * 16 / wordSize;
    1.76 +    additional_frame_words += vect_words;
    1.77 +  }
    1.78 +#else
    1.79 +  assert(!save_vectors, "vectors are generated only by C2");
    1.80 +#endif
    1.81 +  int frame_size_in_bytes = (reg_save_size + additional_frame_words) * wordSize;
    1.82    int frame_words = frame_size_in_bytes / wordSize;
    1.83    *total_frame_words = frame_words;
    1.84  
    1.85 @@ -129,7 +140,7 @@
    1.86    __ enter();
    1.87    __ pusha();
    1.88    __ pushf();
    1.89 -  __ subptr(rsp,FPU_regs_live*sizeof(jdouble)); // Push FPU registers space
    1.90 +  __ subptr(rsp,FPU_regs_live*wordSize); // Push FPU registers space
    1.91    __ push_FPU_state();          // Save FPU state & init
    1.92  
    1.93    if (verify_fpu) {
    1.94 @@ -183,14 +194,28 @@
    1.95      __ movflt(Address(rsp,xmm6_off*wordSize),xmm6);
    1.96      __ movflt(Address(rsp,xmm7_off*wordSize),xmm7);
    1.97    } else if( UseSSE >= 2 ) {
    1.98 -    __ movdbl(Address(rsp,xmm0_off*wordSize),xmm0);
    1.99 -    __ movdbl(Address(rsp,xmm1_off*wordSize),xmm1);
   1.100 -    __ movdbl(Address(rsp,xmm2_off*wordSize),xmm2);
   1.101 -    __ movdbl(Address(rsp,xmm3_off*wordSize),xmm3);
   1.102 -    __ movdbl(Address(rsp,xmm4_off*wordSize),xmm4);
   1.103 -    __ movdbl(Address(rsp,xmm5_off*wordSize),xmm5);
   1.104 -    __ movdbl(Address(rsp,xmm6_off*wordSize),xmm6);
   1.105 -    __ movdbl(Address(rsp,xmm7_off*wordSize),xmm7);
   1.106 +    // Save whole 128bit (16 bytes) XMM regiters
   1.107 +    __ movdqu(Address(rsp,xmm0_off*wordSize),xmm0);
   1.108 +    __ movdqu(Address(rsp,xmm1_off*wordSize),xmm1);
   1.109 +    __ movdqu(Address(rsp,xmm2_off*wordSize),xmm2);
   1.110 +    __ movdqu(Address(rsp,xmm3_off*wordSize),xmm3);
   1.111 +    __ movdqu(Address(rsp,xmm4_off*wordSize),xmm4);
   1.112 +    __ movdqu(Address(rsp,xmm5_off*wordSize),xmm5);
   1.113 +    __ movdqu(Address(rsp,xmm6_off*wordSize),xmm6);
   1.114 +    __ movdqu(Address(rsp,xmm7_off*wordSize),xmm7);
   1.115 +  }
   1.116 +
   1.117 +  if (vect_words > 0) {
   1.118 +    assert(vect_words*wordSize == 128, "");
   1.119 +    __ subptr(rsp, 128); // Save upper half of YMM registes
   1.120 +    __ vextractf128h(Address(rsp,  0),xmm0);
   1.121 +    __ vextractf128h(Address(rsp, 16),xmm1);
   1.122 +    __ vextractf128h(Address(rsp, 32),xmm2);
   1.123 +    __ vextractf128h(Address(rsp, 48),xmm3);
   1.124 +    __ vextractf128h(Address(rsp, 64),xmm4);
   1.125 +    __ vextractf128h(Address(rsp, 80),xmm5);
   1.126 +    __ vextractf128h(Address(rsp, 96),xmm6);
   1.127 +    __ vextractf128h(Address(rsp,112),xmm7);
   1.128    }
   1.129  
   1.130    // Set an oopmap for the call site.  This oopmap will map all
   1.131 @@ -253,10 +278,20 @@
   1.132  
   1.133  }
   1.134  
   1.135 -void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
   1.136 -
   1.137 +void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
   1.138    // Recover XMM & FPU state
   1.139 -  if( UseSSE == 1 ) {
   1.140 +  int additional_frame_bytes = 0;
   1.141 +#ifdef COMPILER2
   1.142 +  if (restore_vectors) {
   1.143 +    assert(UseAVX > 0, "256bit vectors are supported only with AVX");
   1.144 +    assert(MaxVectorSize == 32, "only 256bit vectors are supported now");
   1.145 +    additional_frame_bytes = 128;
   1.146 +  }
   1.147 +#else
   1.148 +  assert(!restore_vectors, "vectors are generated only by C2");
   1.149 +#endif
   1.150 +  if (UseSSE == 1) {
   1.151 +    assert(additional_frame_bytes == 0, "");
   1.152      __ movflt(xmm0,Address(rsp,xmm0_off*wordSize));
   1.153      __ movflt(xmm1,Address(rsp,xmm1_off*wordSize));
   1.154      __ movflt(xmm2,Address(rsp,xmm2_off*wordSize));
   1.155 @@ -265,18 +300,33 @@
   1.156      __ movflt(xmm5,Address(rsp,xmm5_off*wordSize));
   1.157      __ movflt(xmm6,Address(rsp,xmm6_off*wordSize));
   1.158      __ movflt(xmm7,Address(rsp,xmm7_off*wordSize));
   1.159 -  } else if( UseSSE >= 2 ) {
   1.160 -    __ movdbl(xmm0,Address(rsp,xmm0_off*wordSize));
   1.161 -    __ movdbl(xmm1,Address(rsp,xmm1_off*wordSize));
   1.162 -    __ movdbl(xmm2,Address(rsp,xmm2_off*wordSize));
   1.163 -    __ movdbl(xmm3,Address(rsp,xmm3_off*wordSize));
   1.164 -    __ movdbl(xmm4,Address(rsp,xmm4_off*wordSize));
   1.165 -    __ movdbl(xmm5,Address(rsp,xmm5_off*wordSize));
   1.166 -    __ movdbl(xmm6,Address(rsp,xmm6_off*wordSize));
   1.167 -    __ movdbl(xmm7,Address(rsp,xmm7_off*wordSize));
   1.168 +  } else if (UseSSE >= 2) {
   1.169 +#define STACK_ADDRESS(x) Address(rsp,(x)*wordSize + additional_frame_bytes)
   1.170 +    __ movdqu(xmm0,STACK_ADDRESS(xmm0_off));
   1.171 +    __ movdqu(xmm1,STACK_ADDRESS(xmm1_off));
   1.172 +    __ movdqu(xmm2,STACK_ADDRESS(xmm2_off));
   1.173 +    __ movdqu(xmm3,STACK_ADDRESS(xmm3_off));
   1.174 +    __ movdqu(xmm4,STACK_ADDRESS(xmm4_off));
   1.175 +    __ movdqu(xmm5,STACK_ADDRESS(xmm5_off));
   1.176 +    __ movdqu(xmm6,STACK_ADDRESS(xmm6_off));
   1.177 +    __ movdqu(xmm7,STACK_ADDRESS(xmm7_off));
   1.178 +#undef STACK_ADDRESS
   1.179 +  }
   1.180 +  if (restore_vectors) {
   1.181 +    // Restore upper half of YMM registes.
   1.182 +    assert(additional_frame_bytes == 128, "");
   1.183 +    __ vinsertf128h(xmm0, Address(rsp,  0));
   1.184 +    __ vinsertf128h(xmm1, Address(rsp, 16));
   1.185 +    __ vinsertf128h(xmm2, Address(rsp, 32));
   1.186 +    __ vinsertf128h(xmm3, Address(rsp, 48));
   1.187 +    __ vinsertf128h(xmm4, Address(rsp, 64));
   1.188 +    __ vinsertf128h(xmm5, Address(rsp, 80));
   1.189 +    __ vinsertf128h(xmm6, Address(rsp, 96));
   1.190 +    __ vinsertf128h(xmm7, Address(rsp,112));
   1.191 +    __ addptr(rsp, additional_frame_bytes);
   1.192    }
   1.193    __ pop_FPU_state();
   1.194 -  __ addptr(rsp, FPU_regs_live*sizeof(jdouble)); // Pop FPU registers
   1.195 +  __ addptr(rsp, FPU_regs_live*wordSize); // Pop FPU registers
   1.196  
   1.197    __ popf();
   1.198    __ popa();
   1.199 @@ -308,6 +358,13 @@
   1.200    __ addptr(rsp, return_off * wordSize);
   1.201  }
   1.202  
   1.203 +// Is vector's size (in bytes) bigger than a size saved by default?
   1.204 +// 16 bytes XMM registers are saved by default using SSE2 movdqu instructions.
   1.205 +// Note, MaxVectorSize == 0 with UseSSE < 2 and vectors are not generated.
   1.206 +bool SharedRuntime::is_wide_vector(int size) {
   1.207 +  return size > 16;
   1.208 +}
   1.209 +
   1.210  // The java_calling_convention describes stack locations as ideal slots on
   1.211  // a frame with no abi restrictions. Since we must observe abi restrictions
   1.212  // (like the placement of the register window) the slots must be biased by
   1.213 @@ -2732,7 +2789,6 @@
   1.214    return 0;
   1.215  }
   1.216  
   1.217 -
   1.218  //------------------------------generate_deopt_blob----------------------------
   1.219  void SharedRuntime::generate_deopt_blob() {
   1.220    // allocate space for the code
   1.221 @@ -3270,7 +3326,7 @@
   1.222  // setup oopmap, and calls safepoint code to stop the compiled code for
   1.223  // a safepoint.
   1.224  //
   1.225 -SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, bool cause_return) {
   1.226 +SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
   1.227  
   1.228    // Account for thread arg in our frame
   1.229    const int additional_words = 1;
   1.230 @@ -3290,17 +3346,18 @@
   1.231    const Register java_thread = rdi; // callee-saved for VC++
   1.232    address start   = __ pc();
   1.233    address call_pc = NULL;
   1.234 -
   1.235 +  bool cause_return = (poll_type == POLL_AT_RETURN);
   1.236 +  bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP);
   1.237    // If cause_return is true we are at a poll_return and there is
   1.238    // the return address on the stack to the caller on the nmethod
   1.239    // that is safepoint. We can leave this return on the stack and
   1.240    // effectively complete the return and safepoint in the caller.
   1.241    // Otherwise we push space for a return address that the safepoint
   1.242    // handler will install later to make the stack walking sensible.
   1.243 -  if( !cause_return )
   1.244 -    __ push(rbx);                // Make room for return address (or push it again)
   1.245 -
   1.246 -  map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, false);
   1.247 +  if (!cause_return)
   1.248 +    __ push(rbx);  // Make room for return address (or push it again)
   1.249 +
   1.250 +  map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, false, save_vectors);
   1.251  
   1.252    // The following is basically a call_VM. However, we need the precise
   1.253    // address of the call in order to generate an oopmap. Hence, we do all the
   1.254 @@ -3312,7 +3369,7 @@
   1.255    __ set_last_Java_frame(java_thread, noreg, noreg, NULL);
   1.256  
   1.257    // if this was not a poll_return then we need to correct the return address now.
   1.258 -  if( !cause_return ) {
   1.259 +  if (!cause_return) {
   1.260      __ movptr(rax, Address(java_thread, JavaThread::saved_exception_pc_offset()));
   1.261      __ movptr(Address(rbp, wordSize), rax);
   1.262    }
   1.263 @@ -3340,15 +3397,14 @@
   1.264    __ jcc(Assembler::equal, noException);
   1.265  
   1.266    // Exception pending
   1.267 -
   1.268 -  RegisterSaver::restore_live_registers(masm);
   1.269 +  RegisterSaver::restore_live_registers(masm, save_vectors);
   1.270  
   1.271    __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
   1.272  
   1.273    __ bind(noException);
   1.274  
   1.275    // Normal exit, register restoring and exit
   1.276 -  RegisterSaver::restore_live_registers(masm);
   1.277 +  RegisterSaver::restore_live_registers(masm, save_vectors);
   1.278  
   1.279    __ ret(0);
   1.280  

mercurial