8068945: Use RBP register as proper frame pointer in JIT compiled code on x86

Fri, 29 May 2015 10:58:45 +0200

author
zmajo
date
Fri, 29 May 2015 10:58:45 +0200
changeset 7854
e8260b6328fb
parent 7853
a1642365d69f
child 7855
62df92c92d33

8068945: Use RBP register as proper frame pointer in JIT compiled code on x86
Summary: Introduce the PreserveFramePointer flag to control if RBP is used as the frame pointer or as a general purpose register.
Reviewed-by: kvn, roland, dlong, enevill, shade

agent/src/share/classes/sun/jvm/hotspot/runtime/x86/X86Frame.java file | annotate | diff | comparison | revisions
src/cpu/ppc/vm/globals_ppc.hpp file | annotate | diff | comparison | revisions
src/cpu/sparc/vm/globals_sparc.hpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/assembler_x86.hpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/c1_FrameMap_x86.cpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/c1_MacroAssembler_x86.cpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/c1_Runtime1_x86.cpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/frame_x86.cpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/frame_x86.hpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/frame_x86.inline.hpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/globals_x86.hpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/macroAssembler_x86.cpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/methodHandles_x86.cpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/runtime_x86_32.cpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/sharedRuntime_x86_64.cpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/x86.ad file | annotate | diff | comparison | revisions
src/cpu/x86/vm/x86_32.ad file | annotate | diff | comparison | revisions
src/cpu/x86/vm/x86_64.ad file | annotate | diff | comparison | revisions
src/share/vm/c1/c1_GraphBuilder.cpp file | annotate | diff | comparison | revisions
src/share/vm/c1/c1_LIR.cpp file | annotate | diff | comparison | revisions
src/share/vm/c1/c1_LIR.hpp file | annotate | diff | comparison | revisions
src/share/vm/c1/c1_LIRGenerator.cpp file | annotate | diff | comparison | revisions
src/share/vm/opto/bytecodeInfo.cpp file | annotate | diff | comparison | revisions
src/share/vm/prims/forte.cpp file | annotate | diff | comparison | revisions
src/share/vm/runtime/globals.hpp file | annotate | diff | comparison | revisions
src/share/vm/runtime/sharedRuntime.cpp file | annotate | diff | comparison | revisions
src/share/vm/runtime/vframe.hpp file | annotate | diff | comparison | revisions
     1.1 --- a/agent/src/share/classes/sun/jvm/hotspot/runtime/x86/X86Frame.java	Fri Mar 27 10:57:42 2015 +0100
     1.2 +++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/x86/X86Frame.java	Fri May 29 10:58:45 2015 +0200
     1.3 @@ -317,26 +317,17 @@
     1.4    //------------------------------------------------------------------------------
     1.5    // frame::adjust_unextended_sp
     1.6    private void adjustUnextendedSP() {
     1.7 -    // If we are returning to a compiled MethodHandle call site, the
     1.8 -    // saved_fp will in fact be a saved value of the unextended SP.  The
     1.9 -    // simplest way to tell whether we are returning to such a call site
    1.10 -    // is as follows:
    1.11 +    // On x86, sites calling method handle intrinsics and lambda forms are treated
    1.12 +    // as any other call site. Therefore, no special action is needed when we are
    1.13 +    // returning to any of these call sites.
    1.14  
    1.15      CodeBlob cb = cb();
    1.16      NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull();
    1.17      if (senderNm != null) {
    1.18 -      // If the sender PC is a deoptimization point, get the original
    1.19 -      // PC.  For MethodHandle call site the unextended_sp is stored in
    1.20 -      // saved_fp.
    1.21 -      if (senderNm.isDeoptMhEntry(getPC())) {
    1.22 -        // DEBUG_ONLY(verifyDeoptMhOriginalPc(senderNm, getFP()));
    1.23 -        raw_unextendedSP = getFP();
    1.24 -      }
    1.25 -      else if (senderNm.isDeoptEntry(getPC())) {
    1.26 -        // DEBUG_ONLY(verifyDeoptOriginalPc(senderNm, raw_unextendedSp));
    1.27 -      }
    1.28 -      else if (senderNm.isMethodHandleReturn(getPC())) {
    1.29 -        raw_unextendedSP = getFP();
    1.30 +      // If the sender PC is a deoptimization point, get the original PC.
    1.31 +      if (senderNm.isDeoptEntry(getPC()) ||
    1.32 +          senderNm.isDeoptMhEntry(getPC())) {
    1.33 +        // DEBUG_ONLY(verifyDeoptriginalPc(senderNm, raw_unextendedSp));
    1.34        }
    1.35      }
    1.36    }
     2.1 --- a/src/cpu/ppc/vm/globals_ppc.hpp	Fri Mar 27 10:57:42 2015 +0100
     2.2 +++ b/src/cpu/ppc/vm/globals_ppc.hpp	Fri May 29 10:58:45 2015 +0200
     2.3 @@ -55,6 +55,8 @@
     2.4  
     2.5  define_pd_global(bool, UseMembar,             false);
     2.6  
     2.7 +define_pd_global(bool, PreserveFramePointer,  false);
     2.8 +
     2.9  // GC Ergo Flags
    2.10  define_pd_global(uintx, CMSYoungGenPerWorker, 16*M);  // Default max size of CMS young gen, per GC worker thread.
    2.11  
     3.1 --- a/src/cpu/sparc/vm/globals_sparc.hpp	Fri Mar 27 10:57:42 2015 +0100
     3.2 +++ b/src/cpu/sparc/vm/globals_sparc.hpp	Fri May 29 10:58:45 2015 +0200
     3.3 @@ -74,6 +74,8 @@
     3.4  
     3.5  define_pd_global(bool, UseMembar,            false);
     3.6  
     3.7 +define_pd_global(bool, PreserveFramePointer, false);
     3.8 +
     3.9  // GC Ergo Flags
    3.10  define_pd_global(uintx, CMSYoungGenPerWorker, 16*M);  // default max size of CMS young gen, per GC worker thread
    3.11  
     4.1 --- a/src/cpu/x86/vm/assembler_x86.hpp	Fri Mar 27 10:57:42 2015 +0100
     4.2 +++ b/src/cpu/x86/vm/assembler_x86.hpp	Fri May 29 10:58:45 2015 +0200
     4.3 @@ -141,8 +141,10 @@
     4.4  
     4.5  #endif // _LP64
     4.6  
     4.7 -// JSR 292 fixed register usages:
     4.8 -REGISTER_DECLARATION(Register, rbp_mh_SP_save, rbp);
     4.9 +// JSR 292
    4.10 +// On x86, the SP does not have to be saved when invoking method handle intrinsics
    4.11 +// or compiled lambda forms. We indicate that by setting rbp_mh_SP_save to noreg.
    4.12 +REGISTER_DECLARATION(Register, rbp_mh_SP_save, noreg);
    4.13  
    4.14  // Address is an abstraction used to represent a memory location
    4.15  // using any of the amd64 addressing modes with one object.
     5.1 --- a/src/cpu/x86/vm/c1_FrameMap_x86.cpp	Fri Mar 27 10:57:42 2015 +0100
     5.2 +++ b/src/cpu/x86/vm/c1_FrameMap_x86.cpp	Fri May 29 10:58:45 2015 +0200
     5.3 @@ -343,14 +343,13 @@
     5.4    return FrameMap::rsp_opr;
     5.5  }
     5.6  
     5.7 -
     5.8  // JSR 292
     5.9 +// On x86, there is no need to save the SP, because neither
    5.10 +// method handle intrinsics, nor compiled lambda forms modify it.
    5.11  LIR_Opr FrameMap::method_handle_invoke_SP_save_opr() {
    5.12 -  assert(rbp == rbp_mh_SP_save, "must be same register");
    5.13 -  return rbp_opr;
    5.14 +  return LIR_OprFact::illegalOpr;
    5.15  }
    5.16  
    5.17 -
    5.18  bool FrameMap::validate_frame() {
    5.19    return true;
    5.20  }
     6.1 --- a/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp	Fri Mar 27 10:57:42 2015 +0100
     6.2 +++ b/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp	Fri May 29 10:58:45 2015 +0200
     6.3 @@ -359,6 +359,9 @@
     6.4    generate_stack_overflow_check(bang_size_in_bytes);
     6.5  
     6.6    push(rbp);
     6.7 +  if (PreserveFramePointer) {
     6.8 +    mov(rbp, rsp);
     6.9 +  }
    6.10  #ifdef TIERED
    6.11    // c2 leaves fpu stack dirty. Clean it on entry
    6.12    if (UseSSE < 2 ) {
     7.1 --- a/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Fri Mar 27 10:57:42 2015 +0100
     7.2 +++ b/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Fri May 29 10:58:45 2015 +0200
     7.3 @@ -754,14 +754,9 @@
     7.4      // WIN64_ONLY: No need to add frame::arg_reg_save_area_bytes to SP
     7.5      // since we do a leave anyway.
     7.6  
     7.7 -    // Pop the return address since we are possibly changing SP (restoring from BP).
     7.8 +    // Pop the return address.
     7.9      __ leave();
    7.10      __ pop(rcx);
    7.11 -
    7.12 -    // Restore SP from BP if the exception PC is a method handle call site.
    7.13 -    NOT_LP64(__ get_thread(thread);)
    7.14 -    __ cmpl(Address(thread, JavaThread::is_method_handle_return_offset()), 0);
    7.15 -    __ cmovptr(Assembler::notEqual, rsp, rbp_mh_SP_save);
    7.16      __ jmp(rcx);  // jump to exception handler
    7.17      break;
    7.18    default:  ShouldNotReachHere();
    7.19 @@ -832,11 +827,6 @@
    7.20    // the pop is also necessary to simulate the effect of a ret(0)
    7.21    __ pop(exception_pc);
    7.22  
    7.23 -  // Restore SP from BP if the exception PC is a method handle call site.
    7.24 -  NOT_LP64(__ get_thread(thread);)
    7.25 -  __ cmpl(Address(thread, JavaThread::is_method_handle_return_offset()), 0);
    7.26 -  __ cmovptr(Assembler::notEqual, rsp, rbp_mh_SP_save);
    7.27 -
    7.28    // continue at exception handler (return address removed)
    7.29    // note: do *not* remove arguments when unwinding the
    7.30    //       activation since the caller assumes having
     8.1 --- a/src/cpu/x86/vm/frame_x86.cpp	Fri Mar 27 10:57:42 2015 +0100
     8.2 +++ b/src/cpu/x86/vm/frame_x86.cpp	Fri May 29 10:58:45 2015 +0200
     8.3 @@ -216,7 +216,8 @@
     8.4      if (sender_blob->is_nmethod()) {
     8.5          nmethod* nm = sender_blob->as_nmethod_or_null();
     8.6          if (nm != NULL) {
     8.7 -            if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc)) {
     8.8 +            if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc) ||
     8.9 +                nm->method()->is_method_handle_intrinsic()) {
    8.10                  return false;
    8.11              }
    8.12          }
    8.13 @@ -383,10 +384,9 @@
    8.14  // frame::verify_deopt_original_pc
    8.15  //
    8.16  // Verifies the calculated original PC of a deoptimization PC for the
    8.17 -// given unextended SP.  The unextended SP might also be the saved SP
    8.18 -// for MethodHandle call sites.
    8.19 +// given unextended SP.
    8.20  #ifdef ASSERT
    8.21 -void frame::verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp, bool is_method_handle_return) {
    8.22 +void frame::verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp) {
    8.23    frame fr;
    8.24  
    8.25    // This is ugly but it's better than to change {get,set}_original_pc
    8.26 @@ -396,33 +396,23 @@
    8.27  
    8.28    address original_pc = nm->get_original_pc(&fr);
    8.29    assert(nm->insts_contains(original_pc), "original PC must be in nmethod");
    8.30 -  assert(nm->is_method_handle_return(original_pc) == is_method_handle_return, "must be");
    8.31  }
    8.32  #endif
    8.33  
    8.34  //------------------------------------------------------------------------------
    8.35  // frame::adjust_unextended_sp
    8.36  void frame::adjust_unextended_sp() {
    8.37 -  // If we are returning to a compiled MethodHandle call site, the
    8.38 -  // saved_fp will in fact be a saved value of the unextended SP.  The
    8.39 -  // simplest way to tell whether we are returning to such a call site
    8.40 -  // is as follows:
    8.41 +  // On x86, sites calling method handle intrinsics and lambda forms are treated
    8.42 +  // as any other call site. Therefore, no special action is needed when we are
    8.43 +  // returning to any of these call sites.
    8.44  
    8.45    nmethod* sender_nm = (_cb == NULL) ? NULL : _cb->as_nmethod_or_null();
    8.46    if (sender_nm != NULL) {
    8.47 -    // If the sender PC is a deoptimization point, get the original
    8.48 -    // PC.  For MethodHandle call site the unextended_sp is stored in
    8.49 -    // saved_fp.
    8.50 -    if (sender_nm->is_deopt_mh_entry(_pc)) {
    8.51 -      DEBUG_ONLY(verify_deopt_mh_original_pc(sender_nm, _fp));
    8.52 -      _unextended_sp = _fp;
    8.53 -    }
    8.54 -    else if (sender_nm->is_deopt_entry(_pc)) {
    8.55 +    // If the sender PC is a deoptimization point, get the original PC.
    8.56 +    if (sender_nm->is_deopt_entry(_pc) ||
    8.57 +        sender_nm->is_deopt_mh_entry(_pc)) {
    8.58        DEBUG_ONLY(verify_deopt_original_pc(sender_nm, _unextended_sp));
    8.59      }
    8.60 -    else if (sender_nm->is_method_handle_return(_pc)) {
    8.61 -      _unextended_sp = _fp;
    8.62 -    }
    8.63    }
    8.64  }
    8.65  
     9.1 --- a/src/cpu/x86/vm/frame_x86.hpp	Fri Mar 27 10:57:42 2015 +0100
     9.2 +++ b/src/cpu/x86/vm/frame_x86.hpp	Fri May 29 10:58:45 2015 +0200
     9.3 @@ -76,11 +76,11 @@
     9.4  //    [locals and parameters   ]
     9.5  //                               <- sender sp
     9.6  
     9.7 -// [1] When the c++ interpreter calls a new method it returns to the frame
     9.8 +// [1] When the C++ interpreter calls a new method it returns to the frame
     9.9  //     manager which allocates a new frame on the stack. In that case there
    9.10  //     is no real callee of this newly allocated frame. The frame manager is
    9.11 -//     aware of the  additional frame(s) and will pop them as nested calls
    9.12 -//     complete. Howevers tTo make it look good in the debugger the frame
    9.13 +//     aware of the additional frame(s) and will pop them as nested calls
    9.14 +//     complete. However, to make it look good in the debugger the frame
    9.15  //     manager actually installs a dummy pc pointing to RecursiveInterpreterActivation
    9.16  //     with a fake interpreter_state* parameter to make it easy to debug
    9.17  //     nested calls.
    9.18 @@ -88,7 +88,7 @@
    9.19  // Note that contrary to the layout for the assembly interpreter the
    9.20  // expression stack allocated for the C++ interpreter is full sized.
    9.21  // However this is not as bad as it seems as the interpreter frame_manager
    9.22 -// will truncate the unused space on succesive method calls.
    9.23 +// will truncate the unused space on successive method calls.
    9.24  //
    9.25  // ------------------------------ C++ interpreter ----------------------------------------
    9.26  
    9.27 @@ -172,10 +172,7 @@
    9.28  
    9.29  #ifdef ASSERT
    9.30    // Used in frame::sender_for_{interpreter,compiled}_frame
    9.31 -  static void verify_deopt_original_pc(   nmethod* nm, intptr_t* unextended_sp, bool is_method_handle_return = false);
    9.32 -  static void verify_deopt_mh_original_pc(nmethod* nm, intptr_t* unextended_sp) {
    9.33 -    verify_deopt_original_pc(nm, unextended_sp, true);
    9.34 -  }
    9.35 +  static void verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp);
    9.36  #endif
    9.37  
    9.38   public:
    10.1 --- a/src/cpu/x86/vm/frame_x86.inline.hpp	Fri Mar 27 10:57:42 2015 +0100
    10.2 +++ b/src/cpu/x86/vm/frame_x86.inline.hpp	Fri May 29 10:58:45 2015 +0200
    10.3 @@ -93,7 +93,7 @@
    10.4    // find_blob call. This is also why we can have no asserts on the validity
    10.5    // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler
    10.6    // -> pd_last_frame should use a specialized version of pd_last_frame which could
    10.7 -  // call a specilaized frame constructor instead of this one.
    10.8 +  // call a specialized frame constructor instead of this one.
    10.9    // Then we could use the assert below. However this assert is of somewhat dubious
   10.10    // value.
   10.11    // assert(_pc != NULL, "no pc?");
    11.1 --- a/src/cpu/x86/vm/globals_x86.hpp	Fri Mar 27 10:57:42 2015 +0100
    11.2 +++ b/src/cpu/x86/vm/globals_x86.hpp	Fri May 29 10:58:45 2015 +0200
    11.3 @@ -82,6 +82,8 @@
    11.4  
    11.5  define_pd_global(uintx, TypeProfileLevel, 111);
    11.6  
    11.7 +define_pd_global(bool, PreserveFramePointer, false);
    11.8 +
    11.9  #define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \
   11.10                                                                              \
   11.11    develop(bool, IEEEPrecision, true,                                        \
    12.1 --- a/src/cpu/x86/vm/macroAssembler_x86.cpp	Fri Mar 27 10:57:42 2015 +0100
    12.2 +++ b/src/cpu/x86/vm/macroAssembler_x86.cpp	Fri May 29 10:58:45 2015 +0200
    12.3 @@ -6122,6 +6122,10 @@
    12.4      // We always push rbp, so that on return to interpreter rbp, will be
    12.5      // restored correctly and we can correct the stack.
    12.6      push(rbp);
    12.7 +    // Save caller's stack pointer into RBP if the frame pointer is preserved.
    12.8 +    if (PreserveFramePointer) {
    12.9 +      mov(rbp, rsp);
   12.10 +    }
   12.11      // Remove word for ebp
   12.12      framesize -= wordSize;
   12.13  
   12.14 @@ -6136,6 +6140,11 @@
   12.15      // Save RBP register now.
   12.16      framesize -= wordSize;
   12.17      movptr(Address(rsp, framesize), rbp);
   12.18 +    // Save caller's stack pointer into RBP if the frame pointer is preserved.
   12.19 +    if (PreserveFramePointer) {
   12.20 +      movptr(rbp, rsp);
   12.21 +      addptr(rbp, framesize + wordSize);
   12.22 +    }
   12.23    }
   12.24  
   12.25    if (VerifyStackAtCalls) { // Majik cookie to verify stack depth
    13.1 --- a/src/cpu/x86/vm/methodHandles_x86.cpp	Fri Mar 27 10:57:42 2015 +0100
    13.2 +++ b/src/cpu/x86/vm/methodHandles_x86.cpp	Fri May 29 10:58:45 2015 +0200
    13.3 @@ -373,7 +373,7 @@
    13.4      //  member_reg - MemberName that was the trailing argument
    13.5      //  temp1_recv_klass - klass of stacked receiver, if needed
    13.6      //  rsi/r13 - interpreter linkage (if interpreted)
    13.7 -    //  rcx, rdx, rsi, rdi, r8, r8 - compiler arguments (if compiled)
    13.8 +    //  rcx, rdx, rsi, rdi, r8 - compiler arguments (if compiled)
    13.9  
   13.10      Label L_incompatible_class_change_error;
   13.11      switch (iid) {
    14.1 --- a/src/cpu/x86/vm/runtime_x86_32.cpp	Fri Mar 27 10:57:42 2015 +0100
    14.2 +++ b/src/cpu/x86/vm/runtime_x86_32.cpp	Fri May 29 10:58:45 2015 +0200
    14.3 @@ -126,10 +126,6 @@
    14.4  
    14.5    // rax: exception handler for given <exception oop/exception pc>
    14.6  
    14.7 -  // Restore SP from BP if the exception PC is a MethodHandle call site.
    14.8 -  __ cmpl(Address(rcx, JavaThread::is_method_handle_return_offset()), 0);
    14.9 -  __ cmovptr(Assembler::notEqual, rsp, rbp_mh_SP_save);
   14.10 -
   14.11    // We have a handler in rax, (could be deopt blob)
   14.12    // rdx - throwing pc, deopt blob will need it.
   14.13  
    15.1 --- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Fri Mar 27 10:57:42 2015 +0100
    15.2 +++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Fri May 29 10:58:45 2015 +0200
    15.3 @@ -4017,8 +4017,8 @@
    15.4  
    15.5    // Save callee-saved registers.  See x86_64.ad.
    15.6  
    15.7 -  // rbp is an implicitly saved callee saved register (i.e. the calling
    15.8 -  // convention will save restore it in prolog/epilog) Other than that
    15.9 +  // rbp is an implicitly saved callee saved register (i.e., the calling
   15.10 +  // convention will save/restore it in the prolog/epilog). Other than that
   15.11    // there are no callee save registers now that adapter frames are gone.
   15.12  
   15.13    __ movptr(Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt), rbp);
   15.14 @@ -4060,9 +4060,9 @@
   15.15  
   15.16    // Restore callee-saved registers
   15.17  
   15.18 -  // rbp is an implicitly saved callee saved register (i.e. the calling
   15.19 +  // rbp is an implicitly saved callee-saved register (i.e., the calling
   15.20    // convention will save restore it in prolog/epilog) Other than that
   15.21 -  // there are no callee save registers no that adapter frames are gone.
   15.22 +  // there are no callee save registers now that adapter frames are gone.
   15.23  
   15.24    __ movptr(rbp, Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt));
   15.25  
   15.26 @@ -4071,10 +4071,6 @@
   15.27  
   15.28    // rax: exception handler
   15.29  
   15.30 -  // Restore SP from BP if the exception PC is a MethodHandle call site.
   15.31 -  __ cmpl(Address(r15_thread, JavaThread::is_method_handle_return_offset()), 0);
   15.32 -  __ cmovptr(Assembler::notEqual, rsp, rbp_mh_SP_save);
   15.33 -
   15.34    // We have a handler in rax (could be deopt blob).
   15.35    __ mov(r8, rax);
   15.36  
    16.1 --- a/src/cpu/x86/vm/x86.ad	Fri Mar 27 10:57:42 2015 +0100
    16.2 +++ b/src/cpu/x86/vm/x86.ad	Fri May 29 10:58:45 2015 +0200
    16.3 @@ -912,21 +912,6 @@
    16.4  
    16.5  encode %{
    16.6  
    16.7 -  enc_class preserve_SP %{
    16.8 -    debug_only(int off0 = cbuf.insts_size());
    16.9 -    MacroAssembler _masm(&cbuf);
   16.10 -    // RBP is preserved across all calls, even compiled calls.
   16.11 -    // Use it to preserve RSP in places where the callee might change the SP.
   16.12 -    __ movptr(rbp_mh_SP_save, rsp);
   16.13 -    debug_only(int off1 = cbuf.insts_size());
   16.14 -    assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
   16.15 -  %}
   16.16 -
   16.17 -  enc_class restore_SP %{
   16.18 -    MacroAssembler _masm(&cbuf);
   16.19 -    __ movptr(rsp, rbp_mh_SP_save);
   16.20 -  %}
   16.21 -
   16.22    enc_class call_epilog %{
   16.23      if (VerifyStackAtCalls) {
   16.24        // Check that stack depth is unchanged: find majik cookie on stack
    17.1 --- a/src/cpu/x86/vm/x86_32.ad	Fri Mar 27 10:57:42 2015 +0100
    17.2 +++ b/src/cpu/x86/vm/x86_32.ad	Fri May 29 10:58:45 2015 +0200
    17.3 @@ -123,50 +123,94 @@
    17.4  // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
    17.5  // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
    17.6  //
    17.7 +// Class for no registers (empty set).
    17.8 +reg_class no_reg();
    17.9 +
   17.10  // Class for all registers
   17.11 -reg_class any_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
   17.12 +reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
   17.13 +// Class for all registers (excluding EBP)
   17.14 +reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
   17.15 +// Dynamic register class that selects at runtime between register classes
   17.16 +// any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 
   17.17 +// Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
   17.18 +reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
   17.19 +
   17.20  // Class for general registers
   17.21 -reg_class int_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
   17.22 -// Class for general registers which may be used for implicit null checks on win95
   17.23 -// Also safe for use by tailjump. We don't want to allocate in rbp,
   17.24 -reg_class int_reg_no_rbp(EAX, EDX, EDI, ESI, ECX, EBX);
   17.25 +reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
   17.26 +// Class for general registers (excluding EBP).
   17.27 +// This register class can be used for implicit null checks on win95.
   17.28 +// It is also safe for use by tailjumps (we don't want to allocate in ebp).
   17.29 +// Used also if the PreserveFramePointer flag is true.
   17.30 +reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
   17.31 +// Dynamic register class that selects between int_reg and int_reg_no_ebp.
   17.32 +reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
   17.33 +
   17.34  // Class of "X" registers
   17.35  reg_class int_x_reg(EBX, ECX, EDX, EAX);
   17.36 +
   17.37  // Class of registers that can appear in an address with no offset.
   17.38  // EBP and ESP require an extra instruction byte for zero offset.
   17.39  // Used in fast-unlock
   17.40  reg_class p_reg(EDX, EDI, ESI, EBX);
   17.41 -// Class for general registers not including ECX
   17.42 -reg_class ncx_reg(EAX, EDX, EBP, EDI, ESI, EBX);
   17.43 -// Class for general registers not including EAX
   17.44 +
   17.45 +// Class for general registers excluding ECX
   17.46 +reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
   17.47 +// Class for general registers excluding ECX (and EBP)
   17.48 +reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
   17.49 +// Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
   17.50 +reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
   17.51 +
   17.52 +// Class for general registers excluding EAX
   17.53  reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
   17.54 -// Class for general registers not including EAX or EBX.
   17.55 -reg_class nabx_reg(EDX, EDI, ESI, ECX, EBP);
   17.56 +
   17.57 +// Class for general registers excluding EAX and EBX.
   17.58 +reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
   17.59 +// Class for general registers excluding EAX and EBX (and EBP)
   17.60 +reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
   17.61 +// Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
   17.62 +reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
   17.63 +
   17.64  // Class of EAX (for multiply and divide operations)
   17.65  reg_class eax_reg(EAX);
   17.66 +
   17.67  // Class of EBX (for atomic add)
   17.68  reg_class ebx_reg(EBX);
   17.69 +
   17.70  // Class of ECX (for shift and JCXZ operations and cmpLTMask)
   17.71  reg_class ecx_reg(ECX);
   17.72 +
   17.73  // Class of EDX (for multiply and divide operations)
   17.74  reg_class edx_reg(EDX);
   17.75 +
   17.76  // Class of EDI (for synchronization)
   17.77  reg_class edi_reg(EDI);
   17.78 +
   17.79  // Class of ESI (for synchronization)
   17.80  reg_class esi_reg(ESI);
   17.81 -// Singleton class for interpreter's stack pointer
   17.82 -reg_class ebp_reg(EBP);
   17.83 +
   17.84  // Singleton class for stack pointer
   17.85  reg_class sp_reg(ESP);
   17.86 +
   17.87  // Singleton class for instruction pointer
   17.88  // reg_class ip_reg(EIP);
   17.89 +
   17.90  // Class of integer register pairs
   17.91 -reg_class long_reg( EAX,EDX, ECX,EBX, EBP,EDI );
   17.92 +reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
   17.93 +// Class of integer register pairs (excluding EBP and EDI);
   17.94 +reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
   17.95 +// Dynamic register class that selects between long_reg and long_reg_no_ebp.
   17.96 +reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
   17.97 +
   17.98  // Class of integer register pairs that aligns with calling convention
   17.99  reg_class eadx_reg( EAX,EDX );
  17.100  reg_class ebcx_reg( ECX,EBX );
  17.101 +
  17.102  // Not AX or DX, used in divides
  17.103 -reg_class nadx_reg( EBX,ECX,ESI,EDI,EBP );
  17.104 +reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
  17.105 +// Not AX or DX (and neither EBP), used in divides
  17.106 +reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
  17.107 +// Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
  17.108 +reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
  17.109  
  17.110  // Floating point registers.  Notice FPR0 is not a choice.
  17.111  // FPR0 is not ever allocated; we use clever encodings to fake
  17.112 @@ -240,18 +284,11 @@
  17.113    return size;
  17.114  }
  17.115  
  17.116 -static int preserve_SP_size() {
  17.117 -  return 2;  // op, rm(reg/reg)
  17.118 -}
  17.119 -
  17.120  // !!!!! Special hack to get all type of calls to specify the byte offset
  17.121  //       from the start of the call to the point where the return address
  17.122  //       will point.
  17.123  int MachCallStaticJavaNode::ret_addr_offset() {
  17.124 -  int offset = 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
  17.125 -  if (_method_handle_invoke)
  17.126 -    offset += preserve_SP_size();
  17.127 -  return offset;
  17.128 +  return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points  
  17.129  }
  17.130  
  17.131  int MachCallDynamicJavaNode::ret_addr_offset() {
  17.132 @@ -285,15 +322,6 @@
  17.133  
  17.134  // The address of the call instruction needs to be 4-byte aligned to
  17.135  // ensure that it does not span a cache line so that it can be patched.
  17.136 -int CallStaticJavaHandleNode::compute_padding(int current_offset) const {
  17.137 -  current_offset += pre_call_resets_size();  // skip fldcw, if any
  17.138 -  current_offset += preserve_SP_size();   // skip mov rbp, rsp
  17.139 -  current_offset += 1;      // skip call opcode byte
  17.140 -  return round_to(current_offset, alignment_required()) - current_offset;
  17.141 -}
  17.142 -
  17.143 -// The address of the call instruction needs to be 4-byte aligned to
  17.144 -// ensure that it does not span a cache line so that it can be patched.
  17.145  int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
  17.146    current_offset += pre_call_resets_size();  // skip fldcw, if any
  17.147    current_offset += 5;      // skip MOV instruction
  17.148 @@ -523,6 +551,10 @@
  17.149      st->print("# stack bang (%d bytes)", bangsize);
  17.150      st->print("\n\t");
  17.151      st->print("PUSH   EBP\t# Save EBP");
  17.152 +    if (PreserveFramePointer) {
  17.153 +      st->print("\n\t");
  17.154 +      st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  17.155 +    }
  17.156      if (framesize) {
  17.157        st->print("\n\t");
  17.158        st->print("SUB    ESP, #%d\t# Create frame",framesize);
  17.159 @@ -532,6 +564,10 @@
  17.160      st->print("\n\t");
  17.161      framesize -= wordSize;
  17.162      st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
  17.163 +    if (PreserveFramePointer) {
  17.164 +      st->print("\n\t");
  17.165 +      st->print("MOV    EBP, [ESP + #%d]\t# Save the caller's SP into EBP", (framesize + wordSize));
  17.166 +    }
  17.167    }
  17.168  
  17.169    if (VerifyStackAtCalls) {
  17.170 @@ -1488,7 +1524,7 @@
  17.171  }
  17.172  
  17.173  const RegMask Matcher::method_handle_invoke_SP_save_mask() {
  17.174 -  return EBP_REG_mask();
  17.175 +  return NO_REG_mask();
  17.176  }
  17.177  
  17.178  // Returns true if the high 32 bits of the value is known to be zero.
  17.179 @@ -3734,7 +3770,7 @@
  17.180  
  17.181  // On windows95, EBP is not safe to use for implicit null tests.
  17.182  operand eRegP_no_EBP() %{
  17.183 -  constraint(ALLOC_IN_RC(int_reg_no_rbp));
  17.184 +  constraint(ALLOC_IN_RC(int_reg_no_ebp));
  17.185    match(RegP);
  17.186    match(eAXRegP);
  17.187    match(eBXRegP);
  17.188 @@ -3823,13 +3859,6 @@
  17.189    interface(REG_INTER);
  17.190  %}
  17.191  
  17.192 -operand eBPRegP() %{
  17.193 -  constraint(ALLOC_IN_RC(ebp_reg));
  17.194 -  match(RegP);
  17.195 -  format %{ "EBP" %}
  17.196 -  interface(REG_INTER);
  17.197 -%}
  17.198 -
  17.199  operand eRegL() %{
  17.200    constraint(ALLOC_IN_RC(long_reg));
  17.201    match(RegL);
  17.202 @@ -12708,7 +12737,6 @@
  17.203  //       compute_padding() functions will have to be adjusted.
  17.204  instruct CallStaticJavaDirect(method meth) %{
  17.205    match(CallStaticJava);
  17.206 -  predicate(! ((CallStaticJavaNode*)n)->is_method_handle_invoke());
  17.207    effect(USE meth);
  17.208  
  17.209    ins_cost(300);
  17.210 @@ -12722,29 +12750,6 @@
  17.211    ins_alignment(4);
  17.212  %}
  17.213  
  17.214 -// Call Java Static Instruction (method handle version)
  17.215 -// Note: If this code changes, the corresponding ret_addr_offset() and
  17.216 -//       compute_padding() functions will have to be adjusted.
  17.217 -instruct CallStaticJavaHandle(method meth, eBPRegP ebp_mh_SP_save) %{
  17.218 -  match(CallStaticJava);
  17.219 -  predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke());
  17.220 -  effect(USE meth);
  17.221 -  // EBP is saved by all callees (for interpreter stack correction).
  17.222 -  // We use it here for a similar purpose, in {preserve,restore}_SP.
  17.223 -
  17.224 -  ins_cost(300);
  17.225 -  format %{ "CALL,static/MethodHandle " %}
  17.226 -  opcode(0xE8); /* E8 cd */
  17.227 -  ins_encode( pre_call_resets,
  17.228 -              preserve_SP,
  17.229 -              Java_Static_Call( meth ),
  17.230 -              restore_SP,
  17.231 -              call_epilog,
  17.232 -              post_call_FPU );
  17.233 -  ins_pipe( pipe_slow );
  17.234 -  ins_alignment(4);
  17.235 -%}
  17.236 -
  17.237  // Call Java Dynamic Instruction
  17.238  // Note: If this code changes, the corresponding ret_addr_offset() and
  17.239  //       compute_padding() functions will have to be adjusted.
    18.1 --- a/src/cpu/x86/vm/x86_64.ad	Fri Mar 27 10:57:42 2015 +0100
    18.2 +++ b/src/cpu/x86/vm/x86_64.ad	Fri May 29 10:58:45 2015 +0200
    18.3 @@ -166,42 +166,67 @@
    18.4  // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
    18.5  //
    18.6  
    18.7 -// Class for all pointer registers (including RSP)
    18.8 -reg_class any_reg(RAX, RAX_H,
    18.9 -                  RDX, RDX_H,
   18.10 -                  RBP, RBP_H,
   18.11 -                  RDI, RDI_H,
   18.12 -                  RSI, RSI_H,
   18.13 -                  RCX, RCX_H,
   18.14 -                  RBX, RBX_H,
   18.15 -                  RSP, RSP_H,
   18.16 -                  R8,  R8_H,
   18.17 -                  R9,  R9_H,
   18.18 -                  R10, R10_H,
   18.19 -                  R11, R11_H,
   18.20 -                  R12, R12_H,
   18.21 -                  R13, R13_H,
   18.22 -                  R14, R14_H,
   18.23 -                  R15, R15_H);
   18.24 -
   18.25 -// Class for all pointer registers except RSP
   18.26 -reg_class ptr_reg(RAX, RAX_H,
   18.27 -                  RDX, RDX_H,
   18.28 -                  RBP, RBP_H,
   18.29 -                  RDI, RDI_H,
   18.30 -                  RSI, RSI_H,
   18.31 -                  RCX, RCX_H,
   18.32 -                  RBX, RBX_H,
   18.33 -                  R8,  R8_H,
   18.34 -                  R9,  R9_H,
   18.35 -                  R10, R10_H,
   18.36 -                  R11, R11_H,
   18.37 -                  R13, R13_H,
   18.38 -                  R14, R14_H);
   18.39 -
   18.40 -// Class for all pointer registers except RAX and RSP
   18.41 -reg_class ptr_no_rax_reg(RDX, RDX_H,
   18.42 -                         RBP, RBP_H,
   18.43 +// Empty register class.
   18.44 +reg_class no_reg();
   18.45 +
   18.46 +// Class for all pointer registers (including RSP and RBP)
   18.47 +reg_class any_reg_with_rbp(RAX, RAX_H,
   18.48 +                           RDX, RDX_H,
   18.49 +                           RBP, RBP_H,               
   18.50 +                           RDI, RDI_H,
   18.51 +                           RSI, RSI_H,
   18.52 +                           RCX, RCX_H,
   18.53 +                           RBX, RBX_H,
   18.54 +                           RSP, RSP_H,
   18.55 +                           R8,  R8_H,
   18.56 +                           R9,  R9_H,
   18.57 +                           R10, R10_H,
   18.58 +                           R11, R11_H,
   18.59 +                           R12, R12_H,
   18.60 +                           R13, R13_H,
   18.61 +                           R14, R14_H,
   18.62 +                           R15, R15_H);
   18.63 +
   18.64 +// Class for all pointer registers (including RSP, but excluding RBP)
   18.65 +reg_class any_reg_no_rbp(RAX, RAX_H,
   18.66 +                         RDX, RDX_H,                
   18.67 +                         RDI, RDI_H,
   18.68 +                         RSI, RSI_H,
   18.69 +                         RCX, RCX_H,
   18.70 +                         RBX, RBX_H,
   18.71 +                         RSP, RSP_H,
   18.72 +                         R8,  R8_H,
   18.73 +                         R9,  R9_H,
   18.74 +                         R10, R10_H,
   18.75 +                         R11, R11_H,
   18.76 +                         R12, R12_H,
   18.77 +                         R13, R13_H,
   18.78 +                         R14, R14_H,
   18.79 +                         R15, R15_H);
   18.80 +
   18.81 +// Dynamic register class that selects at runtime between register classes
   18.82 +// any_reg_no_rbp and any_reg_with_rbp (depending on the value of the flag PreserveFramePointer). 
   18.83 +// Equivalent to: return PreserveFramePointer ? any_reg_no_rbp : any_reg_with_rbp;
   18.84 +reg_class_dynamic any_reg(any_reg_no_rbp, any_reg_with_rbp, %{ PreserveFramePointer %});
   18.85 +                  
   18.86 +// Class for all pointer registers (excluding RSP)
   18.87 +reg_class ptr_reg_with_rbp(RAX, RAX_H,
   18.88 +                           RDX, RDX_H,
   18.89 +                           RBP, RBP_H,
   18.90 +                           RDI, RDI_H,
   18.91 +                           RSI, RSI_H,
   18.92 +                           RCX, RCX_H,
   18.93 +                           RBX, RBX_H,
   18.94 +                           R8,  R8_H,
   18.95 +                           R9,  R9_H,
   18.96 +                           R10, R10_H,
   18.97 +                           R11, R11_H,
   18.98 +                           R13, R13_H,
   18.99 +                           R14, R14_H);
  18.100 +
  18.101 +// Class for all pointer registers (excluding RSP and RBP)
  18.102 +reg_class ptr_reg_no_rbp(RAX, RAX_H,
  18.103 +                         RDX, RDX_H,                         
  18.104                           RDI, RDI_H,
  18.105                           RSI, RSI_H,
  18.106                           RCX, RCX_H,
  18.107 @@ -213,31 +238,66 @@
  18.108                           R13, R13_H,
  18.109                           R14, R14_H);
  18.110  
  18.111 -reg_class ptr_no_rbp_reg(RDX, RDX_H,
  18.112 -                         RAX, RAX_H,
  18.113 -                         RDI, RDI_H,
  18.114 -                         RSI, RSI_H,
  18.115 -                         RCX, RCX_H,
  18.116 -                         RBX, RBX_H,
  18.117 -                         R8,  R8_H,
  18.118 -                         R9,  R9_H,
  18.119 -                         R10, R10_H,
  18.120 -                         R11, R11_H,
  18.121 -                         R13, R13_H,
  18.122 -                         R14, R14_H);
  18.123 -
  18.124 -// Class for all pointer registers except RAX, RBX and RSP
  18.125 -reg_class ptr_no_rax_rbx_reg(RDX, RDX_H,
  18.126 -                             RBP, RBP_H,
  18.127 -                             RDI, RDI_H,
  18.128 -                             RSI, RSI_H,
  18.129 -                             RCX, RCX_H,
  18.130 -                             R8,  R8_H,
  18.131 -                             R9,  R9_H,
  18.132 -                             R10, R10_H,
  18.133 -                             R11, R11_H,
  18.134 -                             R13, R13_H,
  18.135 -                             R14, R14_H);
  18.136 +// Dynamic register class that selects between ptr_reg_no_rbp and ptr_reg_with_rbp.
  18.137 +reg_class_dynamic ptr_reg(ptr_reg_no_rbp, ptr_reg_with_rbp, %{ PreserveFramePointer %});
  18.138 +
  18.139 +// Class for all pointer registers (excluding RAX and RSP)
  18.140 +reg_class ptr_no_rax_reg_with_rbp(RDX, RDX_H,
  18.141 +                                  RBP, RBP_H,
  18.142 +                                  RDI, RDI_H,
  18.143 +                                  RSI, RSI_H,
  18.144 +                                  RCX, RCX_H,
  18.145 +                                  RBX, RBX_H,
  18.146 +                                  R8,  R8_H,
  18.147 +                                  R9,  R9_H,
  18.148 +                                  R10, R10_H,
  18.149 +                                  R11, R11_H,
  18.150 +                                  R13, R13_H,
  18.151 +                                  R14, R14_H);
  18.152 +
  18.153 +// Class for all pointer registers (excluding RAX, RSP, and RBP)
  18.154 +reg_class ptr_no_rax_reg_no_rbp(RDX, RDX_H,
  18.155 +                                RDI, RDI_H,
  18.156 +                                RSI, RSI_H,
  18.157 +                                RCX, RCX_H,
  18.158 +                                RBX, RBX_H,
  18.159 +                                R8,  R8_H,
  18.160 +                                R9,  R9_H,
  18.161 +                                R10, R10_H,
  18.162 +                                R11, R11_H,
  18.163 +                                R13, R13_H,
  18.164 +                                R14, R14_H);
  18.165 +
  18.166 +// Dynamic register class that selects between ptr_no_rax_reg_no_rbp and ptr_no_rax_reg_with_rbp.
  18.167 +reg_class_dynamic ptr_no_rax_reg(ptr_no_rax_reg_no_rbp, ptr_no_rax_reg_with_rbp, %{ PreserveFramePointer %});
  18.168 +
  18.169 +// Class for all pointer registers (excluding RAX, RBX, and RSP)
  18.170 +reg_class ptr_no_rax_rbx_reg_with_rbp(RDX, RDX_H,
  18.171 +                                      RBP, RBP_H,
  18.172 +                                      RDI, RDI_H,
  18.173 +                                      RSI, RSI_H,
  18.174 +                                      RCX, RCX_H,
  18.175 +                                      R8,  R8_H,
  18.176 +                                      R9,  R9_H,
  18.177 +                                      R10, R10_H,
  18.178 +                                      R11, R11_H,
  18.179 +                                      R13, R13_H,
  18.180 +                                      R14, R14_H);
  18.181 +
  18.182 +// Class for all pointer registers (excluding RAX, RBX, RSP, and RBP)
  18.183 +reg_class ptr_no_rax_rbx_reg_no_rbp(RDX, RDX_H,
  18.184 +                                    RDI, RDI_H,
  18.185 +                                    RSI, RSI_H,
  18.186 +                                    RCX, RCX_H,
  18.187 +                                    R8,  R8_H,
  18.188 +                                    R9,  R9_H,
  18.189 +                                    R10, R10_H,
  18.190 +                                    R11, R11_H,
  18.191 +                                    R13, R13_H,
  18.192 +                                    R14, R14_H);
  18.193 +
  18.194 +// Dynamic register class that selects between ptr_no_rax_rbx_reg_no_rbp and ptr_no_rax_rbx_reg_with_rbp.
  18.195 +reg_class_dynamic ptr_no_rax_rbx_reg(ptr_no_rax_rbx_reg_no_rbp, ptr_no_rax_rbx_reg_with_rbp, %{ PreserveFramePointer %});
  18.196  
  18.197  // Singleton class for RAX pointer register
  18.198  reg_class ptr_rax_reg(RAX, RAX_H);
  18.199 @@ -251,59 +311,29 @@
  18.200  // Singleton class for RDI pointer register
  18.201  reg_class ptr_rdi_reg(RDI, RDI_H);
  18.202  
  18.203 -// Singleton class for RBP pointer register
  18.204 -reg_class ptr_rbp_reg(RBP, RBP_H);
  18.205 -
  18.206  // Singleton class for stack pointer
  18.207  reg_class ptr_rsp_reg(RSP, RSP_H);
  18.208  
  18.209  // Singleton class for TLS pointer
  18.210  reg_class ptr_r15_reg(R15, R15_H);
  18.211  
  18.212 -// Class for all long registers (except RSP)
  18.213 -reg_class long_reg(RAX, RAX_H,
  18.214 -                   RDX, RDX_H,
  18.215 -                   RBP, RBP_H,
  18.216 -                   RDI, RDI_H,
  18.217 -                   RSI, RSI_H,
  18.218 -                   RCX, RCX_H,
  18.219 -                   RBX, RBX_H,
  18.220 -                   R8,  R8_H,
  18.221 -                   R9,  R9_H,
  18.222 -                   R10, R10_H,
  18.223 -                   R11, R11_H,
  18.224 -                   R13, R13_H,
  18.225 -                   R14, R14_H);
  18.226 -
  18.227 -// Class for all long registers except RAX, RDX (and RSP)
  18.228 -reg_class long_no_rax_rdx_reg(RBP, RBP_H,
  18.229 -                              RDI, RDI_H,
  18.230 -                              RSI, RSI_H,
  18.231 -                              RCX, RCX_H,
  18.232 -                              RBX, RBX_H,
  18.233 -                              R8,  R8_H,
  18.234 -                              R9,  R9_H,
  18.235 -                              R10, R10_H,
  18.236 -                              R11, R11_H,
  18.237 -                              R13, R13_H,
  18.238 -                              R14, R14_H);
  18.239 -
  18.240 -// Class for all long registers except RCX (and RSP)
  18.241 -reg_class long_no_rcx_reg(RBP, RBP_H,
  18.242 -                          RDI, RDI_H,
  18.243 -                          RSI, RSI_H,
  18.244 -                          RAX, RAX_H,
  18.245 -                          RDX, RDX_H,
  18.246 -                          RBX, RBX_H,
  18.247 -                          R8,  R8_H,
  18.248 -                          R9,  R9_H,
  18.249 -                          R10, R10_H,
  18.250 -                          R11, R11_H,
  18.251 -                          R13, R13_H,
  18.252 -                          R14, R14_H);
  18.253 -
  18.254 -// Class for all long registers except RAX (and RSP)
  18.255 -reg_class long_no_rax_reg(RBP, RBP_H,
  18.256 +// Class for all long registers (excluding RSP)
  18.257 +reg_class long_reg_with_rbp(RAX, RAX_H,
  18.258 +                            RDX, RDX_H,
  18.259 +                            RBP, RBP_H,
  18.260 +                            RDI, RDI_H,
  18.261 +                            RSI, RSI_H,
  18.262 +                            RCX, RCX_H,
  18.263 +                            RBX, RBX_H,
  18.264 +                            R8,  R8_H,
  18.265 +                            R9,  R9_H,
  18.266 +                            R10, R10_H,
  18.267 +                            R11, R11_H,
  18.268 +                            R13, R13_H,
  18.269 +                            R14, R14_H);
  18.270 +
  18.271 +// Class for all long registers (excluding RSP and RBP)
  18.272 +reg_class long_reg_no_rbp(RAX, RAX_H,
  18.273                            RDX, RDX_H,
  18.274                            RDI, RDI_H,
  18.275                            RSI, RSI_H,
  18.276 @@ -316,6 +346,67 @@
  18.277                            R13, R13_H,
  18.278                            R14, R14_H);
  18.279  
  18.280 +// Dynamic register class that selects between long_reg_no_rbp and long_reg_with_rbp.
  18.281 +reg_class_dynamic long_reg(long_reg_no_rbp, long_reg_with_rbp, %{ PreserveFramePointer %});
  18.282 +
  18.283 +// Class for all long registers (excluding RAX, RDX and RSP)
  18.284 +reg_class long_no_rax_rdx_reg_with_rbp(RBP, RBP_H,
  18.285 +                                       RDI, RDI_H,
  18.286 +                                       RSI, RSI_H,
  18.287 +                                       RCX, RCX_H,
  18.288 +                                       RBX, RBX_H,
  18.289 +                                       R8,  R8_H,
  18.290 +                                       R9,  R9_H,
  18.291 +                                       R10, R10_H,
  18.292 +                                       R11, R11_H,
  18.293 +                                       R13, R13_H,
  18.294 +                                       R14, R14_H);
  18.295 +
  18.296 +// Class for all long registers (excluding RAX, RDX, RSP, and RBP)
  18.297 +reg_class long_no_rax_rdx_reg_no_rbp(RDI, RDI_H,
  18.298 +                                     RSI, RSI_H,
  18.299 +                                     RCX, RCX_H,
  18.300 +                                     RBX, RBX_H,
  18.301 +                                     R8,  R8_H,
  18.302 +                                     R9,  R9_H,
  18.303 +                                     R10, R10_H,
  18.304 +                                     R11, R11_H,
  18.305 +                                     R13, R13_H,
  18.306 +                                     R14, R14_H);
  18.307 +
  18.308 +// Dynamic register class that selects between long_no_rax_rdx_reg_no_rbp and long_no_rax_rdx_reg_with_rbp.
  18.309 +reg_class_dynamic long_no_rax_rdx_reg(long_no_rax_rdx_reg_no_rbp, long_no_rax_rdx_reg_with_rbp, %{ PreserveFramePointer %});
  18.310 +
  18.311 +// Class for all long registers (excluding RCX and RSP)
  18.312 +reg_class long_no_rcx_reg_with_rbp(RBP, RBP_H,
  18.313 +                                   RDI, RDI_H,
  18.314 +                                   RSI, RSI_H,
  18.315 +                                   RAX, RAX_H,
  18.316 +                                   RDX, RDX_H,
  18.317 +                                   RBX, RBX_H,
  18.318 +                                   R8,  R8_H,
  18.319 +                                   R9,  R9_H,
  18.320 +                                   R10, R10_H,
  18.321 +                                   R11, R11_H,
  18.322 +                                   R13, R13_H,
  18.323 +                                   R14, R14_H);
  18.324 +
  18.325 +// Class for all long registers (excluding RCX, RSP, and RBP)
  18.326 +reg_class long_no_rcx_reg_no_rbp(RDI, RDI_H,
  18.327 +                                 RSI, RSI_H,
  18.328 +                                 RAX, RAX_H,
  18.329 +                                 RDX, RDX_H,
  18.330 +                                 RBX, RBX_H,
  18.331 +                                 R8,  R8_H,
  18.332 +                                 R9,  R9_H,
  18.333 +                                 R10, R10_H,
  18.334 +                                 R11, R11_H,
  18.335 +                                 R13, R13_H,
  18.336 +                                 R14, R14_H);
  18.337 +
  18.338 +// Dynamic register class that selects between long_no_rcx_reg_no_rbp and long_no_rcx_reg_with_rbp.
  18.339 +reg_class_dynamic long_no_rcx_reg(long_no_rcx_reg_no_rbp, long_no_rcx_reg_with_rbp, %{ PreserveFramePointer %});
  18.340 +
  18.341  // Singleton class for RAX long register
  18.342  reg_class long_rax_reg(RAX, RAX_H);
  18.343  
  18.344 @@ -325,27 +416,27 @@
  18.345  // Singleton class for RDX long register
  18.346  reg_class long_rdx_reg(RDX, RDX_H);
  18.347  
  18.348 -// Class for all int registers (except RSP)
  18.349 -reg_class int_reg(RAX,
  18.350 -                  RDX,
  18.351 -                  RBP,
  18.352 -                  RDI,
  18.353 -                  RSI,
  18.354 -                  RCX,
  18.355 -                  RBX,
  18.356 -                  R8,
  18.357 -                  R9,
  18.358 -                  R10,
  18.359 -                  R11,
  18.360 -                  R13,
  18.361 -                  R14);
  18.362 -
  18.363 -// Class for all int registers except RCX (and RSP)
  18.364 -reg_class int_no_rcx_reg(RAX,
  18.365 +// Class for all int registers (excluding RSP)
  18.366 +reg_class int_reg_with_rbp(RAX,
  18.367 +                           RDX,
  18.368 +                           RBP,
  18.369 +                           RDI,
  18.370 +                           RSI,
  18.371 +                           RCX,
  18.372 +                           RBX,
  18.373 +                           R8,
  18.374 +                           R9,
  18.375 +                           R10,
  18.376 +                           R11,
  18.377 +                           R13,
  18.378 +                           R14);
  18.379 +
  18.380 +// Class for all int registers (excluding RSP and RBP)
  18.381 +reg_class int_reg_no_rbp(RAX,
  18.382                           RDX,
  18.383 -                         RBP,
  18.384                           RDI,
  18.385                           RSI,
  18.386 +                         RCX,
  18.387                           RBX,
  18.388                           R8,
  18.389                           R9,
  18.390 @@ -354,18 +445,66 @@
  18.391                           R13,
  18.392                           R14);
  18.393  
  18.394 -// Class for all int registers except RAX, RDX (and RSP)
  18.395 -reg_class int_no_rax_rdx_reg(RBP,
  18.396 -                             RDI,
  18.397 -                             RSI,
  18.398 -                             RCX,
  18.399 -                             RBX,
  18.400 -                             R8,
  18.401 -                             R9,
  18.402 -                             R10,
  18.403 -                             R11,
  18.404 -                             R13,
  18.405 -                             R14);
  18.406 +// Dynamic register class that selects between int_reg_no_rbp and int_reg_with_rbp.
  18.407 +reg_class_dynamic int_reg(int_reg_no_rbp, int_reg_with_rbp, %{ PreserveFramePointer %});
  18.408 +
  18.409 +// Class for all int registers (excluding RCX and RSP)
  18.410 +reg_class int_no_rcx_reg_with_rbp(RAX,
  18.411 +                                  RDX,
  18.412 +                                  RBP,
  18.413 +                                  RDI,
  18.414 +                                  RSI,
  18.415 +                                  RBX,
  18.416 +                                  R8,
  18.417 +                                  R9,
  18.418 +                                  R10,
  18.419 +                                  R11,
  18.420 +                                  R13,
  18.421 +                                  R14);
  18.422 +
  18.423 +// Class for all int registers (excluding RCX, RSP, and RBP)
  18.424 +reg_class int_no_rcx_reg_no_rbp(RAX,
  18.425 +                                RDX,
  18.426 +                                RDI,
  18.427 +                                RSI,
  18.428 +                                RBX,
  18.429 +                                R8,
  18.430 +                                R9,
  18.431 +                                R10,
  18.432 +                                R11,
  18.433 +                                R13,
  18.434 +                                R14);
  18.435 +
  18.436 +// Dynamic register class that selects between int_no_rcx_reg_no_rbp and int_no_rcx_reg_with_rbp.
  18.437 +reg_class_dynamic int_no_rcx_reg(int_no_rcx_reg_no_rbp, int_no_rcx_reg_with_rbp, %{ PreserveFramePointer %});
  18.438 +
  18.439 +// Class for all int registers (excluding RAX, RDX, and RSP)
  18.440 +reg_class int_no_rax_rdx_reg_with_rbp(RBP,
  18.441 +                                      RDI,
  18.442 +                                      RSI,
  18.443 +                                      RCX,
  18.444 +                                      RBX,
  18.445 +                                      R8,
  18.446 +                                      R9,
  18.447 +                                      R10,
  18.448 +                                      R11,
  18.449 +                                      R13,
  18.450 +                                      R14);
  18.451 +
  18.452 +// Class for all int registers (excluding RAX, RDX, RSP, and RBP)
  18.453 +reg_class int_no_rax_rdx_reg_no_rbp(RDI,
  18.454 +                                    RSI,
  18.455 +                                    RCX,
  18.456 +                                    RBX,
  18.457 +                                    R8,
  18.458 +                                    R9,
  18.459 +                                    R10,
  18.460 +                                    R11,
  18.461 +                                    R13,
  18.462 +                                    R14);
  18.463 +
  18.464 +// Dynamic register class that selects between int_no_rax_rdx_reg_no_rbp and int_no_rax_rdx_reg_with_rbp.
  18.465 +reg_class_dynamic int_no_rax_rdx_reg(int_no_rax_rdx_reg_no_rbp, int_no_rax_rdx_reg_with_rbp, %{ PreserveFramePointer %});
  18.466  
  18.467  // Singleton class for RAX int register
  18.468  reg_class int_rax_reg(RAX);
  18.469 @@ -396,9 +535,6 @@
  18.470  
  18.471  #define __ _masm.
  18.472  
  18.473 -static int preserve_SP_size() {
  18.474 -  return 3;  // rex.w, op, rm(reg/reg)
  18.475 -}
  18.476  static int clear_avx_size() {
  18.477    return (Compile::current()->max_vector_size() > 16) ? 3 : 0;  // vzeroupper
  18.478  }
  18.479 @@ -409,9 +545,7 @@
  18.480  int MachCallStaticJavaNode::ret_addr_offset()
  18.481  {
  18.482    int offset = 5; // 5 bytes from start of call to where return address points
  18.483 -  offset += clear_avx_size();
  18.484 -  if (_method_handle_invoke)
  18.485 -    offset += preserve_SP_size();
  18.486 +  offset += clear_avx_size();  
  18.487    return offset;
  18.488  }
  18.489  
  18.490 @@ -450,16 +584,6 @@
  18.491  
  18.492  // The address of the call instruction needs to be 4-byte aligned to
  18.493  // ensure that it does not span a cache line so that it can be patched.
  18.494 -int CallStaticJavaHandleNode::compute_padding(int current_offset) const
  18.495 -{
  18.496 -  current_offset += preserve_SP_size();   // skip mov rbp, rsp
  18.497 -  current_offset += clear_avx_size(); // skip vzeroupper
  18.498 -  current_offset += 1; // skip call opcode byte
  18.499 -  return round_to(current_offset, alignment_required()) - current_offset;
  18.500 -}
  18.501 -
  18.502 -// The address of the call instruction needs to be 4-byte aligned to
  18.503 -// ensure that it does not span a cache line so that it can be patched.
  18.504  int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
  18.505  {
  18.506    current_offset += clear_avx_size(); // skip vzeroupper
  18.507 @@ -724,6 +848,10 @@
  18.508      st->print("# stack bang (%d bytes)", bangsize);
  18.509      st->print("\n\t");
  18.510      st->print("pushq   rbp\t# Save rbp");
  18.511 +    if (PreserveFramePointer) {
  18.512 +        st->print("\n\t");
  18.513 +        st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
  18.514 +    }
  18.515      if (framesize) {
  18.516        st->print("\n\t");
  18.517        st->print("subq    rsp, #%d\t# Create frame",framesize);
  18.518 @@ -732,7 +860,11 @@
  18.519      st->print("subq    rsp, #%d\t# Create frame",framesize);
  18.520      st->print("\n\t");
  18.521      framesize -= wordSize;
  18.522 -    st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
  18.523 +    st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);    
  18.524 +    if (PreserveFramePointer) {
  18.525 +      st->print("\n\t");
  18.526 +      st->print("movq    rbp, [rsp + #%d]\t# Save the caller's SP into rbp", (framesize + wordSize));
  18.527 +    }
  18.528    }
  18.529  
  18.530    if (VerifyStackAtCalls) {
  18.531 @@ -1598,8 +1730,9 @@
  18.532    return LONG_RDX_REG_mask();
  18.533  }
  18.534  
  18.535 +// Register for saving SP into on method handle invokes. Not used on x86_64.
  18.536  const RegMask Matcher::method_handle_invoke_SP_save_mask() {
  18.537 -  return PTR_RBP_REG_mask();
  18.538 +    return NO_REG_mask();
  18.539  }
  18.540  
  18.541  %}
  18.542 @@ -3202,7 +3335,7 @@
  18.543  // Pointer Register
  18.544  operand any_RegP()
  18.545  %{
  18.546 -  constraint(ALLOC_IN_RC(any_reg));
  18.547 +  constraint(ALLOC_IN_RC(any_reg));  
  18.548    match(RegP);
  18.549    match(rax_RegP);
  18.550    match(rbx_RegP);
  18.551 @@ -3224,8 +3357,8 @@
  18.552    match(rbx_RegP);
  18.553    match(rdi_RegP);
  18.554    match(rsi_RegP);
  18.555 -  match(rbp_RegP);
  18.556 -  match(r15_RegP);  // See Q&A below about r15_RegP.
  18.557 +  match(rbp_RegP);  // See Q&A below about
  18.558 +  match(r15_RegP);  // r15_RegP and rbp_RegP.
  18.559  
  18.560    format %{ %}
  18.561    interface(REG_INTER);
  18.562 @@ -3241,11 +3374,14 @@
  18.563  
  18.564  // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
  18.565  // Answer: Operand match rules govern the DFA as it processes instruction inputs.
  18.566 -// It's fine for an instruction input which expects rRegP to match a r15_RegP.
  18.567 +// It's fine for an instruction input that expects rRegP to match a r15_RegP.
  18.568  // The output of an instruction is controlled by the allocator, which respects
  18.569  // register class masks, not match rules.  Unless an instruction mentions
  18.570  // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
  18.571  // by the allocator as an input.
  18.572 +// The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
  18.573 +// the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
  18.574 +// result, RBP is not included in the output of the instruction either.
  18.575  
  18.576  operand no_rax_RegP()
  18.577  %{
  18.578 @@ -3259,9 +3395,11 @@
  18.579    interface(REG_INTER);
  18.580  %}
  18.581  
  18.582 +// This operand is not allowed to use RBP even if
  18.583 +// RBP is not used to hold the frame pointer.
  18.584  operand no_rbp_RegP()
  18.585  %{
  18.586 -  constraint(ALLOC_IN_RC(ptr_no_rbp_reg));
  18.587 +  constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
  18.588    match(RegP);
  18.589    match(rbx_RegP);
  18.590    match(rsi_RegP);
  18.591 @@ -3338,16 +3476,6 @@
  18.592    interface(REG_INTER);
  18.593  %}
  18.594  
  18.595 -operand rbp_RegP()
  18.596 -%{
  18.597 -  constraint(ALLOC_IN_RC(ptr_rbp_reg));
  18.598 -  match(RegP);
  18.599 -  match(rRegP);
  18.600 -
  18.601 -  format %{ %}
  18.602 -  interface(REG_INTER);
  18.603 -%}
  18.604 -
  18.605  operand r15_RegP()
  18.606  %{
  18.607    constraint(ALLOC_IN_RC(ptr_r15_reg));
  18.608 @@ -11414,7 +11542,6 @@
  18.609  //       compute_padding() functions will have to be adjusted.
  18.610  instruct CallStaticJavaDirect(method meth) %{
  18.611    match(CallStaticJava);
  18.612 -  predicate(!((CallStaticJavaNode*) n)->is_method_handle_invoke());
  18.613    effect(USE meth);
  18.614  
  18.615    ins_cost(300);
  18.616 @@ -11425,27 +11552,6 @@
  18.617    ins_alignment(4);
  18.618  %}
  18.619  
  18.620 -// Call Java Static Instruction (method handle version)
  18.621 -// Note: If this code changes, the corresponding ret_addr_offset() and
  18.622 -//       compute_padding() functions will have to be adjusted.
  18.623 -instruct CallStaticJavaHandle(method meth, rbp_RegP rbp_mh_SP_save) %{
  18.624 -  match(CallStaticJava);
  18.625 -  predicate(((CallStaticJavaNode*) n)->is_method_handle_invoke());
  18.626 -  effect(USE meth);
  18.627 -  // RBP is saved by all callees (for interpreter stack correction).
  18.628 -  // We use it here for a similar purpose, in {preserve,restore}_SP.
  18.629 -
  18.630 -  ins_cost(300);
  18.631 -  format %{ "call,static/MethodHandle " %}
  18.632 -  opcode(0xE8); /* E8 cd */
  18.633 -  ins_encode(clear_avx, preserve_SP,
  18.634 -             Java_Static_Call(meth),
  18.635 -             restore_SP,
  18.636 -             call_epilog);
  18.637 -  ins_pipe(pipe_slow);
  18.638 -  ins_alignment(4);
  18.639 -%}
  18.640 -
  18.641  // Call Java Dynamic Instruction
  18.642  // Note: If this code changes, the corresponding ret_addr_offset() and
  18.643  //       compute_padding() functions will have to be adjusted.
    19.1 --- a/src/share/vm/c1/c1_GraphBuilder.cpp	Fri Mar 27 10:57:42 2015 +0100
    19.2 +++ b/src/share/vm/c1/c1_GraphBuilder.cpp	Fri May 29 10:58:45 2015 +0200
    19.3 @@ -4064,7 +4064,7 @@
    19.4        ValueType* type = apop()->type();
    19.5        if (type->is_constant()) {
    19.6          ciMethod* target = type->as_ObjectType()->constant_value()->as_member_name()->get_vmtarget();
    19.7 -        // If the target is another method handle invoke try recursivly to get
    19.8 +        // If the target is another method handle invoke, try to recursively get
    19.9          // a better target.
   19.10          if (target->is_method_handle_intrinsic()) {
   19.11            if (try_method_handle_inline(target)) {
    20.1 --- a/src/share/vm/c1/c1_LIR.cpp	Fri Mar 27 10:57:42 2015 +0100
    20.2 +++ b/src/share/vm/c1/c1_LIR.cpp	Fri May 29 10:58:45 2015 +0200
    20.3 @@ -454,7 +454,7 @@
    20.4  //-------------------visits--------------------------
    20.5  
    20.6  // complete rework of LIR instruction visitor.
    20.7 -// The virtual calls for each instruction type is replaced by a big
    20.8 +// The virtual call for each instruction type is replaced by a big
    20.9  // switch that adds the operands for each instruction
   20.10  
   20.11  void LIR_OpVisitState::visit(LIR_Op* op) {
   20.12 @@ -823,7 +823,8 @@
   20.13        }
   20.14  
   20.15        if (opJavaCall->_info)                     do_info(opJavaCall->_info);
   20.16 -      if (opJavaCall->is_method_handle_invoke()) {
   20.17 +      if (FrameMap::method_handle_invoke_SP_save_opr() != LIR_OprFact::illegalOpr &&
   20.18 +          opJavaCall->is_method_handle_invoke()) {
   20.19          opJavaCall->_method_handle_invoke_SP_save_opr = FrameMap::method_handle_invoke_SP_save_opr();
   20.20          do_temp(opJavaCall->_method_handle_invoke_SP_save_opr);
   20.21        }
    21.1 --- a/src/share/vm/c1/c1_LIR.hpp	Fri Mar 27 10:57:42 2015 +0100
    21.2 +++ b/src/share/vm/c1/c1_LIR.hpp	Fri May 29 10:58:45 2015 +0200
    21.3 @@ -1216,10 +1216,8 @@
    21.4    // JSR 292 support.
    21.5    bool is_invokedynamic() const                  { return code() == lir_dynamic_call; }
    21.6    bool is_method_handle_invoke() const {
    21.7 -    return
    21.8 -      method()->is_compiled_lambda_form()  // Java-generated adapter
    21.9 -      ||
   21.10 -      method()->is_method_handle_intrinsic();  // JVM-generated MH intrinsic
   21.11 +    return method()->is_compiled_lambda_form() ||   // Java-generated lambda form
   21.12 +           method()->is_method_handle_intrinsic();  // JVM-generated MH intrinsic
   21.13    }
   21.14  
   21.15    intptr_t vtable_offset() const {
    22.1 --- a/src/share/vm/c1/c1_LIRGenerator.cpp	Fri Mar 27 10:57:42 2015 +0100
    22.2 +++ b/src/share/vm/c1/c1_LIRGenerator.cpp	Fri May 29 10:58:45 2015 +0200
    22.3 @@ -2887,7 +2887,7 @@
    22.4  //   g) lock result registers and emit call operation
    22.5  //
    22.6  // Before issuing a call, we must spill-save all values on stack
    22.7 -// that are in caller-save register. "spill-save" moves thos registers
    22.8 +// that are in caller-save register. "spill-save" moves those registers
    22.9  // either in a free callee-save register or spills them if no free
   22.10  // callee save register is available.
   22.11  //
   22.12 @@ -2895,7 +2895,7 @@
   22.13  // - if invoked between e) and f), we may lock callee save
   22.14  //   register in "spill-save" that destroys the receiver register
   22.15  //   before f) is executed
   22.16 -// - if we rearange the f) to be earlier, by loading %o0, it
   22.17 +// - if we rearrange f) to be earlier (by loading %o0) it
   22.18  //   may destroy a value on the stack that is currently in %o0
   22.19  //   and is waiting to be spilled
   22.20  // - if we keep the receiver locked while doing spill-save,
   22.21 @@ -2928,14 +2928,16 @@
   22.22    assert(receiver->is_illegal() || receiver->is_equal(LIR_Assembler::receiverOpr()), "must match");
   22.23  
   22.24    // JSR 292
   22.25 -  // Preserve the SP over MethodHandle call sites.
   22.26 +  // Preserve the SP over MethodHandle call sites, if needed.
   22.27    ciMethod* target = x->target();
   22.28    bool is_method_handle_invoke = (// %%% FIXME: Are both of these relevant?
   22.29                                    target->is_method_handle_intrinsic() ||
   22.30                                    target->is_compiled_lambda_form());
   22.31    if (is_method_handle_invoke) {
   22.32      info->set_is_method_handle_invoke(true);
   22.33 -    __ move(FrameMap::stack_pointer(), FrameMap::method_handle_invoke_SP_save_opr());
   22.34 +    if(FrameMap::method_handle_invoke_SP_save_opr() != LIR_OprFact::illegalOpr) {
   22.35 +        __ move(FrameMap::stack_pointer(), FrameMap::method_handle_invoke_SP_save_opr());
   22.36 +    }
   22.37    }
   22.38  
   22.39    switch (x->code()) {
   22.40 @@ -2975,8 +2977,9 @@
   22.41    }
   22.42  
   22.43    // JSR 292
   22.44 -  // Restore the SP after MethodHandle call sites.
   22.45 -  if (is_method_handle_invoke) {
   22.46 +  // Restore the SP after MethodHandle call sites, if needed.
   22.47 +  if (is_method_handle_invoke
   22.48 +      && FrameMap::method_handle_invoke_SP_save_opr() != LIR_OprFact::illegalOpr) {
   22.49      __ move(FrameMap::method_handle_invoke_SP_save_opr(), FrameMap::stack_pointer());
   22.50    }
   22.51  
    23.1 --- a/src/share/vm/opto/bytecodeInfo.cpp	Fri Mar 27 10:57:42 2015 +0100
    23.2 +++ b/src/share/vm/opto/bytecodeInfo.cpp	Fri May 29 10:58:45 2015 +0200
    23.3 @@ -608,11 +608,11 @@
    23.4    }
    23.5    int max_inline_level_adjust = 0;
    23.6    if (caller_jvms->method() != NULL) {
    23.7 -    if (caller_jvms->method()->is_compiled_lambda_form())
    23.8 +    if (caller_jvms->method()->is_compiled_lambda_form()) {
    23.9        max_inline_level_adjust += 1;  // don't count actions in MH or indy adapter frames
   23.10 -    else if (callee_method->is_method_handle_intrinsic() ||
   23.11 -             callee_method->is_compiled_lambda_form()) {
   23.12 -      max_inline_level_adjust += 1;  // don't count method handle calls from java.lang.invoke implem
   23.13 +    } else if (callee_method->is_method_handle_intrinsic() ||
   23.14 +               callee_method->is_compiled_lambda_form()) {
   23.15 +      max_inline_level_adjust += 1;  // don't count method handle calls from java.lang.invoke implementation
   23.16      }
   23.17      if (max_inline_level_adjust != 0 && C->print_inlining() && (Verbose || WizardMode)) {
   23.18        CompileTask::print_inline_indent(inline_level());
    24.1 --- a/src/share/vm/prims/forte.cpp	Fri Mar 27 10:57:42 2015 +0100
    24.2 +++ b/src/share/vm/prims/forte.cpp	Fri May 29 10:58:45 2015 +0200
    24.3 @@ -172,8 +172,27 @@
    24.4    // Now do we have a useful PcDesc?
    24.5    if (pc_desc == NULL ||
    24.6        pc_desc->scope_decode_offset() == DebugInformationRecorder::serialized_null) {
    24.7 -    // No debug information available for this pc
    24.8 -    // vframeStream would explode if we try and walk the frames.
    24.9 +    // No debug information is available for this PC.
   24.10 +    //
   24.11 +    // vframeStreamCommon::fill_from_frame() will decode the frame depending
   24.12 +    // on the state of the thread.
   24.13 +    //
   24.14 +    // Case #1: If the thread is in Java (state == _thread_in_Java), then
   24.15 +    // the vframeStreamCommon object will be filled as if the frame were a native
   24.16 +    // compiled frame. Therefore, no debug information is needed.
   24.17 +    //
   24.18 +    // Case #2: If the thread is in any other state, then two steps will be performed:
   24.19 +    // - if asserts are enabled, found_bad_method_frame() will be called and
   24.20 +    //   the assert in found_bad_method_frame() will be triggered;
   24.21 +    // - if asserts are disabled, the vframeStreamCommon object will be filled
   24.22 +    //   as if it were a native compiled frame.
   24.23 +    //
   24.24 +    // Case (2) is similar to the way interpreter frames are processed in
   24.25 +    // vframeStreamCommon::fill_from_interpreter_frame in case no valid BCI
   24.26 +    // was found for an interpreted frame. If asserts are enabled, the assert
   24.27 +    // in found_bad_method_frame() will be triggered. If asserts are disabled,
   24.28 +    // the vframeStreamCommon object will be filled afterwards as if the
   24.29 +    // interpreter were at the point of entering into the method.
   24.30      return false;
   24.31    }
   24.32  
   24.33 @@ -230,9 +249,10 @@
   24.34      // a valid method. Then again we may have caught an interpreter
   24.35      // frame in the middle of construction and the bci field is
   24.36      // not yet valid.
   24.37 -
   24.38 -    *method_p = method;
   24.39      if (!method->is_valid_method()) return false;
   24.40 +    *method_p = method; // If the Method* found is invalid, it is
   24.41 +                        // ignored by forte_fill_call_trace_given_top().
   24.42 +                        // So set method_p only if the Method is valid.
   24.43  
   24.44      intptr_t bcx = fr->interpreter_frame_bcx();
   24.45  
   24.46 @@ -247,18 +267,33 @@
   24.47  }
   24.48  
   24.49  
   24.50 -// Determine if 'fr' can be used to find an initial Java frame.
   24.51 -// Return false if it can not find a fully decipherable Java frame
   24.52 -// (in other words a frame that isn't safe to use in a vframe stream).
   24.53 -// Obviously if it can't even find a Java frame false will also be returned.
   24.54 +// Determine if a Java frame can be found starting with the frame 'fr'.
   24.55  //
   24.56 -// If we find a Java frame decipherable or not then by definition we have
   24.57 -// identified a method and that will be returned to the caller via method_p.
   24.58 -// If we can determine a bci that is returned also. (Hmm is it possible
   24.59 -// to return a method and bci and still return false? )
   24.60 +// Check the return value of find_initial_Java_frame and the value of
   24.61 +// 'method_p' to decide on how use the results returned by this method.
   24.62  //
   24.63 -// The initial Java frame we find (if any) is return via initial_frame_p.
   24.64 +// If 'method_p' is not NULL, an initial Java frame has been found and
   24.65 +// the stack can be walked starting from that initial frame. In this case,
   24.66 +// 'method_p' points to the Method that the initial frame belongs to and
   24.67 +// the initial Java frame is returned in initial_frame_p.
   24.68  //
   24.69 +// find_initial_Java_frame() returns true if a Method has been found (i.e.,
   24.70 +// 'method_p' is not NULL) and the initial frame that belongs to that Method
   24.71 +// is decipherable.
   24.72 +//
   24.73 +// A frame is considered to be decipherable:
   24.74 +//
   24.75 +// - if the frame is a compiled frame and a PCDesc is available;
   24.76 +//
   24.77 +// - if the frame is an interpreter frame that is valid or the thread is
   24.78 +//   state (_thread_in_native || state == _thread_in_vm || state == _thread_blocked).
   24.79 +//
   24.80 +// Note that find_initial_Java_frame() can return false even if an initial
   24.81 +// Java method was found (e.g., there is no PCDesc available for the method).
   24.82 +//
   24.83 +// If 'method_p' is NULL, it was not possible to find a Java frame when
   24.84 +// walking the stack starting from 'fr'. In this case find_initial_Java_frame
   24.85 +// returns false.
   24.86  
   24.87  static bool find_initial_Java_frame(JavaThread* thread,
   24.88                                      frame* fr,
   24.89 @@ -278,8 +313,6 @@
   24.90    // recognizable to us. This should only happen if we are in a JRT_LEAF
   24.91    // or something called by a JRT_LEAF method.
   24.92  
   24.93 -
   24.94 -
   24.95    frame candidate = *fr;
   24.96  
   24.97    // If the starting frame we were given has no codeBlob associated with
   24.98 @@ -334,9 +367,11 @@
   24.99        nmethod* nm = (nmethod*) candidate.cb();
  24.100        *method_p = nm->method();
  24.101  
  24.102 -      // If the frame isn't fully decipherable then the default
  24.103 -      // value for the bci is a signal that we don't have a bci.
  24.104 -      // If we have a decipherable frame this bci value will
  24.105 +      // If the frame is not decipherable, then the value of -1
  24.106 +      // for the BCI is used to signal that no BCI is available.
  24.107 +      // Furthermore, the method returns false in this case.
  24.108 +      //
  24.109 +      // If a decipherable frame is available, the BCI value will
  24.110        // not be used.
  24.111  
  24.112        *bci_p = -1;
  24.113 @@ -347,9 +382,9 @@
  24.114  
  24.115        if (nm->is_native_method()) return true;
  24.116  
  24.117 -      // If it isn't decipherable then we have found a pc that doesn't
  24.118 -      // have a PCDesc that can get us a bci however we did find
  24.119 -      // a method
  24.120 +      // If the frame is not decipherable, then a PC was found
  24.121 +      // that does not have a PCDesc from which a BCI can be obtained.
  24.122 +      // Nevertheless, a Method was found.
  24.123  
  24.124        if (!is_decipherable_compiled_frame(thread, &candidate, nm)) {
  24.125          return false;
  24.126 @@ -358,7 +393,7 @@
  24.127        // is_decipherable_compiled_frame may modify candidate's pc
  24.128        *initial_frame_p = candidate;
  24.129  
  24.130 -      assert(nm->pc_desc_at(candidate.pc()) != NULL, "if it's decipherable then pc must be valid");
  24.131 +      assert(nm->pc_desc_at(candidate.pc()) != NULL, "debug information must be available if the frame is decipherable");
  24.132  
  24.133        return true;
  24.134      }
  24.135 @@ -388,17 +423,17 @@
  24.136  
  24.137    frame initial_Java_frame;
  24.138    Method* method;
  24.139 -  int bci;
  24.140 +  int bci = -1; // assume BCI is not available for method
  24.141 +                // update with correct information if available
  24.142    int count;
  24.143  
  24.144    count = 0;
  24.145    assert(trace->frames != NULL, "trace->frames must be non-NULL");
  24.146  
  24.147 -  bool fully_decipherable = find_initial_Java_frame(thd, &top_frame, &initial_Java_frame, &method, &bci);
  24.148 +  // Walk the stack starting from 'top_frame' and search for an initial Java frame.
  24.149 +  find_initial_Java_frame(thd, &top_frame, &initial_Java_frame, &method, &bci);
  24.150  
  24.151 -  // The frame might not be walkable but still recovered a method
  24.152 -  // (e.g. an nmethod with no scope info for the pc)
  24.153 -
  24.154 +  // Check if a Java Method has been found.
  24.155    if (method == NULL) return;
  24.156  
  24.157    if (!method->is_valid_method()) {
  24.158 @@ -406,29 +441,6 @@
  24.159      return;
  24.160    }
  24.161  
  24.162 -  // We got a Java frame however it isn't fully decipherable
  24.163 -  // so it won't necessarily be safe to use it for the
  24.164 -  // initial frame in the vframe stream.
  24.165 -
  24.166 -  if (!fully_decipherable) {
  24.167 -    // Take whatever method the top-frame decoder managed to scrape up.
  24.168 -    // We look further at the top frame only if non-safepoint
  24.169 -    // debugging information is available.
  24.170 -    count++;
  24.171 -    trace->num_frames = count;
  24.172 -    trace->frames[0].method_id = method->find_jmethod_id_or_null();
  24.173 -    if (!method->is_native()) {
  24.174 -      trace->frames[0].lineno = bci;
  24.175 -    } else {
  24.176 -      trace->frames[0].lineno = -3;
  24.177 -    }
  24.178 -
  24.179 -    if (!initial_Java_frame.safe_for_sender(thd)) return;
  24.180 -
  24.181 -    RegisterMap map(thd, false);
  24.182 -    initial_Java_frame = initial_Java_frame.sender(&map);
  24.183 -  }
  24.184 -
  24.185    vframeStreamForte st(thd, initial_Java_frame, false);
  24.186  
  24.187    for (; !st.at_end() && count < depth; st.forte_next(), count++) {
    25.1 --- a/src/share/vm/runtime/globals.hpp	Fri Mar 27 10:57:42 2015 +0100
    25.2 +++ b/src/share/vm/runtime/globals.hpp	Fri May 29 10:58:45 2015 +0200
    25.3 @@ -3940,7 +3940,11 @@
    25.4            "Enable event-based tracing")                                     \
    25.5                                                                              \
    25.6    product(bool, UseLockedTracing, false,                                    \
    25.7 -          "Use locked-tracing when doing event-based tracing")
    25.8 +          "Use locked-tracing when doing event-based tracing")              \
    25.9 +                                                                            \
   25.10 +  product_pd(bool, PreserveFramePointer,                                    \
   25.11 +             "Use the FP register for holding the frame pointer "           \
   25.12 +             "and not as a general purpose register.")
   25.13  
   25.14  /*
   25.15   *  Macros for factoring of globals
    26.1 --- a/src/share/vm/runtime/sharedRuntime.cpp	Fri Mar 27 10:57:42 2015 +0100
    26.2 +++ b/src/share/vm/runtime/sharedRuntime.cpp	Fri May 29 10:58:45 2015 +0200
    26.3 @@ -1230,7 +1230,7 @@
    26.4  #endif
    26.5  
    26.6    // JSR 292 key invariant:
    26.7 -  // If the resolved method is a MethodHandle invoke target the call
    26.8 +  // If the resolved method is a MethodHandle invoke target, the call
    26.9    // site must be a MethodHandle call site, because the lambda form might tail-call
   26.10    // leaving the stack in a state unknown to either caller or callee
   26.11    // TODO detune for now but we might need it again
    27.1 --- a/src/share/vm/runtime/vframe.hpp	Fri Mar 27 10:57:42 2015 +0100
    27.2 +++ b/src/share/vm/runtime/vframe.hpp	Fri May 29 10:58:45 2015 +0200
    27.3 @@ -389,12 +389,12 @@
    27.4        decode_offset < 0 ||
    27.5        decode_offset >= nm()->scopes_data_size()) {
    27.6      // 6379830 AsyncGetCallTrace sometimes feeds us wild frames.
    27.7 -    // If we attempt to read nmethod::scopes_data at serialized_null (== 0),
    27.8 -    // or if we read some at other crazy offset,
    27.9 -    // we will decode garbage and make wild references into the heap,
   27.10 -    // leading to crashes in product mode.
   27.11 -    // (This isn't airtight, of course, since there are internal
   27.12 -    // offsets which are also crazy.)
   27.13 +    // If we read nmethod::scopes_data at serialized_null (== 0)
   27.14 +    // or if read some at other invalid offset, invalid values will be decoded.
   27.15 +    // Based on these values, invalid heap locations could be referenced
   27.16 +    // that could lead to crashes in product mode.
   27.17 +    // Therefore, do not use the decode offset if invalid, but fill the frame
   27.18 +    // as it were a native compiled frame (no Java-level assumptions).
   27.19  #ifdef ASSERT
   27.20      if (WizardMode) {
   27.21        tty->print_cr("Error in fill_from_frame: pc_desc for "
   27.22 @@ -514,9 +514,15 @@
   27.23    intptr_t  bcx    = _frame.interpreter_frame_bcx();
   27.24    int       bci    = method->validate_bci_from_bcx(bcx);
   27.25    // 6379830 AsyncGetCallTrace sometimes feeds us wild frames.
   27.26 +  // AsyncGetCallTrace interrupts the VM asynchronously. As a result
   27.27 +  // it is possible to access an interpreter frame for which
   27.28 +  // no Java-level information is yet available (e.g., becasue
   27.29 +  // the frame was being created when the VM interrupted it).
   27.30 +  // In this scenario, pretend that the interpreter is at the point
   27.31 +  // of entering the method.
   27.32    if (bci < 0) {
   27.33      found_bad_method_frame();
   27.34 -    bci = 0;  // pretend it's on the point of entering
   27.35 +    bci = 0;
   27.36    }
   27.37    _mode   = interpreted_mode;
   27.38    _method = method;

mercurial