src/share/vm/runtime/vframeArray.cpp

Fri, 22 Oct 2010 15:59:34 -0400

author
acorn
date
Fri, 22 Oct 2010 15:59:34 -0400
changeset 2233
fa83ab460c54
parent 2169
fd5d4527cdf5
child 2314
f95d63e2154a
permissions
-rw-r--r--

6988353: refactor contended sync subsystem
Summary: reduce complexity by factoring synchronizer.cpp
Reviewed-by: dholmes, never, coleenp

     1 /*
     2  * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.
     8  *
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12  * version 2 for more details (a copy is included in the LICENSE file that
    13  * accompanied this code).
    14  *
    15  * You should have received a copy of the GNU General Public License version
    16  * 2 along with this work; if not, write to the Free Software Foundation,
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18  *
    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    20  * or visit www.oracle.com if you need additional information or have any
    21  * questions.
    22  *
    23  */
    25 # include "incls/_precompiled.incl"
    26 # include "incls/_vframeArray.cpp.incl"
    29 int vframeArrayElement:: bci(void) const { return (_bci == SynchronizationEntryBCI ? 0 : _bci); }
    31 void vframeArrayElement::free_monitors(JavaThread* jt) {
    32   if (_monitors != NULL) {
    33      MonitorChunk* chunk = _monitors;
    34      _monitors = NULL;
    35      jt->remove_monitor_chunk(chunk);
    36      delete chunk;
    37   }
    38 }
    40 void vframeArrayElement::fill_in(compiledVFrame* vf) {
    42 // Copy the information from the compiled vframe to the
    43 // interpreter frame we will be creating to replace vf
    45   _method = vf->method();
    46   _bci    = vf->raw_bci();
    47   _reexecute = vf->should_reexecute();
    49   int index;
    51   // Get the monitors off-stack
    53   GrowableArray<MonitorInfo*>* list = vf->monitors();
    54   if (list->is_empty()) {
    55     _monitors = NULL;
    56   } else {
    58     // Allocate monitor chunk
    59     _monitors = new MonitorChunk(list->length());
    60     vf->thread()->add_monitor_chunk(_monitors);
    62     // Migrate the BasicLocks from the stack to the monitor chunk
    63     for (index = 0; index < list->length(); index++) {
    64       MonitorInfo* monitor = list->at(index);
    65       assert(!monitor->owner_is_scalar_replaced(), "object should be reallocated already");
    66       assert(monitor->owner() == NULL || (!monitor->owner()->is_unlocked() && !monitor->owner()->has_bias_pattern()), "object must be null or locked, and unbiased");
    67       BasicObjectLock* dest = _monitors->at(index);
    68       dest->set_obj(monitor->owner());
    69       monitor->lock()->move_to(monitor->owner(), dest->lock());
    70     }
    71   }
    73   // Convert the vframe locals and expressions to off stack
    74   // values. Because we will not gc all oops can be converted to
    75   // intptr_t (i.e. a stack slot) and we are fine. This is
    76   // good since we are inside a HandleMark and the oops in our
    77   // collection would go away between packing them here and
    78   // unpacking them in unpack_on_stack.
    80   // First the locals go off-stack
    82   // FIXME this seems silly it creates a StackValueCollection
    83   // in order to get the size to then copy them and
    84   // convert the types to intptr_t size slots. Seems like it
    85   // could do it in place... Still uses less memory than the
    86   // old way though
    88   StackValueCollection *locs = vf->locals();
    89   _locals = new StackValueCollection(locs->size());
    90   for(index = 0; index < locs->size(); index++) {
    91     StackValue* value = locs->at(index);
    92     switch(value->type()) {
    93       case T_OBJECT:
    94         assert(!value->obj_is_scalar_replaced(), "object should be reallocated already");
    95         // preserve object type
    96         _locals->add( new StackValue((intptr_t) (value->get_obj()()), T_OBJECT ));
    97         break;
    98       case T_CONFLICT:
    99         // A dead local.  Will be initialized to null/zero.
   100         _locals->add( new StackValue());
   101         break;
   102       case T_INT:
   103         _locals->add( new StackValue(value->get_int()));
   104         break;
   105       default:
   106         ShouldNotReachHere();
   107     }
   108   }
   110   // Now the expressions off-stack
   111   // Same silliness as above
   113   StackValueCollection *exprs = vf->expressions();
   114   _expressions = new StackValueCollection(exprs->size());
   115   for(index = 0; index < exprs->size(); index++) {
   116     StackValue* value = exprs->at(index);
   117     switch(value->type()) {
   118       case T_OBJECT:
   119         assert(!value->obj_is_scalar_replaced(), "object should be reallocated already");
   120         // preserve object type
   121         _expressions->add( new StackValue((intptr_t) (value->get_obj()()), T_OBJECT ));
   122         break;
   123       case T_CONFLICT:
   124         // A dead stack element.  Will be initialized to null/zero.
   125         // This can occur when the compiler emits a state in which stack
   126         // elements are known to be dead (because of an imminent exception).
   127         _expressions->add( new StackValue());
   128         break;
   129       case T_INT:
   130         _expressions->add( new StackValue(value->get_int()));
   131         break;
   132       default:
   133         ShouldNotReachHere();
   134     }
   135   }
   136 }
   138 int unpack_counter = 0;
   140 void vframeArrayElement::unpack_on_stack(int callee_parameters,
   141                                          int callee_locals,
   142                                          frame* caller,
   143                                          bool is_top_frame,
   144                                          int exec_mode) {
   145   JavaThread* thread = (JavaThread*) Thread::current();
   147   // Look at bci and decide on bcp and continuation pc
   148   address bcp;
   149   // C++ interpreter doesn't need a pc since it will figure out what to do when it
   150   // begins execution
   151   address pc;
   152   bool use_next_mdp = false; // true if we should use the mdp associated with the next bci
   153                              // rather than the one associated with bcp
   154   if (raw_bci() == SynchronizationEntryBCI) {
   155     // We are deoptimizing while hanging in prologue code for synchronized method
   156     bcp = method()->bcp_from(0); // first byte code
   157     pc  = Interpreter::deopt_entry(vtos, 0); // step = 0 since we don't skip current bytecode
   158   } else if (should_reexecute()) { //reexecute this bytecode
   159     assert(is_top_frame, "reexecute allowed only for the top frame");
   160     bcp = method()->bcp_from(bci());
   161     pc  = Interpreter::deopt_reexecute_entry(method(), bcp);
   162   } else {
   163     bcp = method()->bcp_from(bci());
   164     pc  = Interpreter::deopt_continue_after_entry(method(), bcp, callee_parameters, is_top_frame);
   165     use_next_mdp = true;
   166   }
   167   assert(Bytecodes::is_defined(*bcp), "must be a valid bytecode");
   169   // Monitorenter and pending exceptions:
   170   //
   171   // For Compiler2, there should be no pending exception when deoptimizing at monitorenter
   172   // because there is no safepoint at the null pointer check (it is either handled explicitly
   173   // or prior to the monitorenter) and asynchronous exceptions are not made "pending" by the
   174   // runtime interface for the slow case (see JRT_ENTRY_FOR_MONITORENTER).  If an asynchronous
   175   // exception was processed, the bytecode pointer would have to be extended one bytecode beyond
   176   // the monitorenter to place it in the proper exception range.
   177   //
   178   // For Compiler1, deoptimization can occur while throwing a NullPointerException at monitorenter,
   179   // in which case bcp should point to the monitorenter since it is within the exception's range.
   181   assert(*bcp != Bytecodes::_monitorenter || is_top_frame, "a _monitorenter must be a top frame");
   182   assert(thread->deopt_nmethod() != NULL, "nmethod should be known");
   183   guarantee(!(thread->deopt_nmethod()->is_compiled_by_c2() &&
   184               *bcp == Bytecodes::_monitorenter             &&
   185               exec_mode == Deoptimization::Unpack_exception),
   186             "shouldn't get exception during monitorenter");
   188   int popframe_preserved_args_size_in_bytes = 0;
   189   int popframe_preserved_args_size_in_words = 0;
   190   if (is_top_frame) {
   191     JvmtiThreadState *state = thread->jvmti_thread_state();
   192     if (JvmtiExport::can_pop_frame() &&
   193         (thread->has_pending_popframe() || thread->popframe_forcing_deopt_reexecution())) {
   194       if (thread->has_pending_popframe()) {
   195         // Pop top frame after deoptimization
   196 #ifndef CC_INTERP
   197         pc = Interpreter::remove_activation_preserving_args_entry();
   198 #else
   199         // Do an uncommon trap type entry. c++ interpreter will know
   200         // to pop frame and preserve the args
   201         pc = Interpreter::deopt_entry(vtos, 0);
   202         use_next_mdp = false;
   203 #endif
   204       } else {
   205         // Reexecute invoke in top frame
   206         pc = Interpreter::deopt_entry(vtos, 0);
   207         use_next_mdp = false;
   208         popframe_preserved_args_size_in_bytes = in_bytes(thread->popframe_preserved_args_size());
   209         // Note: the PopFrame-related extension of the expression stack size is done in
   210         // Deoptimization::fetch_unroll_info_helper
   211         popframe_preserved_args_size_in_words = in_words(thread->popframe_preserved_args_size_in_words());
   212       }
   213     } else if (JvmtiExport::can_force_early_return() && state != NULL && state->is_earlyret_pending()) {
   214       // Force early return from top frame after deoptimization
   215 #ifndef CC_INTERP
   216       pc = Interpreter::remove_activation_early_entry(state->earlyret_tos());
   217 #else
   218      // TBD: Need to implement ForceEarlyReturn for CC_INTERP (ia64)
   219 #endif
   220     } else {
   221       // Possibly override the previous pc computation of the top (youngest) frame
   222       switch (exec_mode) {
   223       case Deoptimization::Unpack_deopt:
   224         // use what we've got
   225         break;
   226       case Deoptimization::Unpack_exception:
   227         // exception is pending
   228         pc = SharedRuntime::raw_exception_handler_for_return_address(thread, pc);
   229         // [phh] We're going to end up in some handler or other, so it doesn't
   230         // matter what mdp we point to.  See exception_handler_for_exception()
   231         // in interpreterRuntime.cpp.
   232         break;
   233       case Deoptimization::Unpack_uncommon_trap:
   234       case Deoptimization::Unpack_reexecute:
   235         // redo last byte code
   236         pc  = Interpreter::deopt_entry(vtos, 0);
   237         use_next_mdp = false;
   238         break;
   239       default:
   240         ShouldNotReachHere();
   241       }
   242     }
   243   }
   245   // Setup the interpreter frame
   247   assert(method() != NULL, "method must exist");
   248   int temps = expressions()->size();
   250   int locks = monitors() == NULL ? 0 : monitors()->number_of_monitors();
   252   Interpreter::layout_activation(method(),
   253                                  temps + callee_parameters,
   254                                  popframe_preserved_args_size_in_words,
   255                                  locks,
   256                                  callee_parameters,
   257                                  callee_locals,
   258                                  caller,
   259                                  iframe(),
   260                                  is_top_frame);
   262   // Update the pc in the frame object and overwrite the temporary pc
   263   // we placed in the skeletal frame now that we finally know the
   264   // exact interpreter address we should use.
   266   _frame.patch_pc(thread, pc);
   268   assert (!method()->is_synchronized() || locks > 0, "synchronized methods must have monitors");
   270   BasicObjectLock* top = iframe()->interpreter_frame_monitor_begin();
   271   for (int index = 0; index < locks; index++) {
   272     top = iframe()->previous_monitor_in_interpreter_frame(top);
   273     BasicObjectLock* src = _monitors->at(index);
   274     top->set_obj(src->obj());
   275     src->lock()->move_to(src->obj(), top->lock());
   276   }
   277   if (ProfileInterpreter) {
   278     iframe()->interpreter_frame_set_mdx(0); // clear out the mdp.
   279   }
   280   iframe()->interpreter_frame_set_bcx((intptr_t)bcp); // cannot use bcp because frame is not initialized yet
   281   if (ProfileInterpreter) {
   282     methodDataOop mdo = method()->method_data();
   283     if (mdo != NULL) {
   284       int bci = iframe()->interpreter_frame_bci();
   285       if (use_next_mdp) ++bci;
   286       address mdp = mdo->bci_to_dp(bci);
   287       iframe()->interpreter_frame_set_mdp(mdp);
   288     }
   289   }
   291   // Unpack expression stack
   292   // If this is an intermediate frame (i.e. not top frame) then this
   293   // only unpacks the part of the expression stack not used by callee
   294   // as parameters. The callee parameters are unpacked as part of the
   295   // callee locals.
   296   int i;
   297   for(i = 0; i < expressions()->size(); i++) {
   298     StackValue *value = expressions()->at(i);
   299     intptr_t*   addr  = iframe()->interpreter_frame_expression_stack_at(i);
   300     switch(value->type()) {
   301       case T_INT:
   302         *addr = value->get_int();
   303         break;
   304       case T_OBJECT:
   305         *addr = value->get_int(T_OBJECT);
   306         break;
   307       case T_CONFLICT:
   308         // A dead stack slot.  Initialize to null in case it is an oop.
   309         *addr = NULL_WORD;
   310         break;
   311       default:
   312         ShouldNotReachHere();
   313     }
   314   }
   317   // Unpack the locals
   318   for(i = 0; i < locals()->size(); i++) {
   319     StackValue *value = locals()->at(i);
   320     intptr_t* addr  = iframe()->interpreter_frame_local_at(i);
   321     switch(value->type()) {
   322       case T_INT:
   323         *addr = value->get_int();
   324         break;
   325       case T_OBJECT:
   326         *addr = value->get_int(T_OBJECT);
   327         break;
   328       case T_CONFLICT:
   329         // A dead location. If it is an oop then we need a NULL to prevent GC from following it
   330         *addr = NULL_WORD;
   331         break;
   332       default:
   333         ShouldNotReachHere();
   334     }
   335   }
   337   if (is_top_frame && JvmtiExport::can_pop_frame() && thread->popframe_forcing_deopt_reexecution()) {
   338     // An interpreted frame was popped but it returns to a deoptimized
   339     // frame. The incoming arguments to the interpreted activation
   340     // were preserved in thread-local storage by the
   341     // remove_activation_preserving_args_entry in the interpreter; now
   342     // we put them back into the just-unpacked interpreter frame.
   343     // Note that this assumes that the locals arena grows toward lower
   344     // addresses.
   345     if (popframe_preserved_args_size_in_words != 0) {
   346       void* saved_args = thread->popframe_preserved_args();
   347       assert(saved_args != NULL, "must have been saved by interpreter");
   348 #ifdef ASSERT
   349       assert(popframe_preserved_args_size_in_words <=
   350              iframe()->interpreter_frame_expression_stack_size()*Interpreter::stackElementWords,
   351              "expression stack size should have been extended");
   352 #endif // ASSERT
   353       int top_element = iframe()->interpreter_frame_expression_stack_size()-1;
   354       intptr_t* base;
   355       if (frame::interpreter_frame_expression_stack_direction() < 0) {
   356         base = iframe()->interpreter_frame_expression_stack_at(top_element);
   357       } else {
   358         base = iframe()->interpreter_frame_expression_stack();
   359       }
   360       Copy::conjoint_jbytes(saved_args,
   361                             base,
   362                             popframe_preserved_args_size_in_bytes);
   363       thread->popframe_free_preserved_args();
   364     }
   365   }
   367 #ifndef PRODUCT
   368   if (TraceDeoptimization && Verbose) {
   369     ttyLocker ttyl;
   370     tty->print_cr("[%d Interpreted Frame]", ++unpack_counter);
   371     iframe()->print_on(tty);
   372     RegisterMap map(thread);
   373     vframe* f = vframe::new_vframe(iframe(), &map, thread);
   374     f->print();
   376     tty->print_cr("locals size     %d", locals()->size());
   377     tty->print_cr("expression size %d", expressions()->size());
   379     method()->print_value();
   380     tty->cr();
   381     // method()->print_codes();
   382   } else if (TraceDeoptimization) {
   383     tty->print("     ");
   384     method()->print_value();
   385     Bytecodes::Code code = Bytecodes::java_code_at(bcp);
   386     int bci = method()->bci_from(bcp);
   387     tty->print(" - %s", Bytecodes::name(code));
   388     tty->print(" @ bci %d ", bci);
   389     tty->print_cr("sp = " PTR_FORMAT, iframe()->sp());
   390   }
   391 #endif // PRODUCT
   393   // The expression stack and locals are in the resource area don't leave
   394   // a dangling pointer in the vframeArray we leave around for debug
   395   // purposes
   397   _locals = _expressions = NULL;
   399 }
   401 int vframeArrayElement::on_stack_size(int callee_parameters,
   402                                       int callee_locals,
   403                                       bool is_top_frame,
   404                                       int popframe_extra_stack_expression_els) const {
   405   assert(method()->max_locals() == locals()->size(), "just checking");
   406   int locks = monitors() == NULL ? 0 : monitors()->number_of_monitors();
   407   int temps = expressions()->size();
   408   return Interpreter::size_activation(method(),
   409                                       temps + callee_parameters,
   410                                       popframe_extra_stack_expression_els,
   411                                       locks,
   412                                       callee_parameters,
   413                                       callee_locals,
   414                                       is_top_frame);
   415 }
   419 vframeArray* vframeArray::allocate(JavaThread* thread, int frame_size, GrowableArray<compiledVFrame*>* chunk,
   420                                    RegisterMap *reg_map, frame sender, frame caller, frame self) {
   422   // Allocate the vframeArray
   423   vframeArray * result = (vframeArray*) AllocateHeap(sizeof(vframeArray) + // fixed part
   424                                                      sizeof(vframeArrayElement) * (chunk->length() - 1), // variable part
   425                                                      "vframeArray::allocate");
   426   result->_frames = chunk->length();
   427   result->_owner_thread = thread;
   428   result->_sender = sender;
   429   result->_caller = caller;
   430   result->_original = self;
   431   result->set_unroll_block(NULL); // initialize it
   432   result->fill_in(thread, frame_size, chunk, reg_map);
   433   return result;
   434 }
   436 void vframeArray::fill_in(JavaThread* thread,
   437                           int frame_size,
   438                           GrowableArray<compiledVFrame*>* chunk,
   439                           const RegisterMap *reg_map) {
   440   // Set owner first, it is used when adding monitor chunks
   442   _frame_size = frame_size;
   443   for(int i = 0; i < chunk->length(); i++) {
   444     element(i)->fill_in(chunk->at(i));
   445   }
   447   // Copy registers for callee-saved registers
   448   if (reg_map != NULL) {
   449     for(int i = 0; i < RegisterMap::reg_count; i++) {
   450 #ifdef AMD64
   451       // The register map has one entry for every int (32-bit value), so
   452       // 64-bit physical registers have two entries in the map, one for
   453       // each half.  Ignore the high halves of 64-bit registers, just like
   454       // frame::oopmapreg_to_location does.
   455       //
   456       // [phh] FIXME: this is a temporary hack!  This code *should* work
   457       // correctly w/o this hack, possibly by changing RegisterMap::pd_location
   458       // in frame_amd64.cpp and the values of the phantom high half registers
   459       // in amd64.ad.
   460       //      if (VMReg::Name(i) < SharedInfo::stack0 && is_even(i)) {
   461         intptr_t* src = (intptr_t*) reg_map->location(VMRegImpl::as_VMReg(i));
   462         _callee_registers[i] = src != NULL ? *src : NULL_WORD;
   463         //      } else {
   464         //      jint* src = (jint*) reg_map->location(VMReg::Name(i));
   465         //      _callee_registers[i] = src != NULL ? *src : NULL_WORD;
   466         //      }
   467 #else
   468       jint* src = (jint*) reg_map->location(VMRegImpl::as_VMReg(i));
   469       _callee_registers[i] = src != NULL ? *src : NULL_WORD;
   470 #endif
   471       if (src == NULL) {
   472         set_location_valid(i, false);
   473       } else {
   474         set_location_valid(i, true);
   475         jint* dst = (jint*) register_location(i);
   476         *dst = *src;
   477       }
   478     }
   479   }
   480 }
   482 void vframeArray::unpack_to_stack(frame &unpack_frame, int exec_mode) {
   483   // stack picture
   484   //   unpack_frame
   485   //   [new interpreter frames ] (frames are skeletal but walkable)
   486   //   caller_frame
   487   //
   488   //  This routine fills in the missing data for the skeletal interpreter frames
   489   //  in the above picture.
   491   // Find the skeletal interpreter frames to unpack into
   492   RegisterMap map(JavaThread::current(), false);
   493   // Get the youngest frame we will unpack (last to be unpacked)
   494   frame me = unpack_frame.sender(&map);
   495   int index;
   496   for (index = 0; index < frames(); index++ ) {
   497     *element(index)->iframe() = me;
   498     // Get the caller frame (possibly skeletal)
   499     me = me.sender(&map);
   500   }
   502   frame caller_frame = me;
   504   // Do the unpacking of interpreter frames; the frame at index 0 represents the top activation, so it has no callee
   506   // Unpack the frames from the oldest (frames() -1) to the youngest (0)
   508   for (index = frames() - 1; index >= 0 ; index--) {
   509     int callee_parameters = index == 0 ? 0 : element(index-1)->method()->size_of_parameters();
   510     int callee_locals     = index == 0 ? 0 : element(index-1)->method()->max_locals();
   511     element(index)->unpack_on_stack(callee_parameters,
   512                                     callee_locals,
   513                                     &caller_frame,
   514                                     index == 0,
   515                                     exec_mode);
   516     if (index == frames() - 1) {
   517       Deoptimization::unwind_callee_save_values(element(index)->iframe(), this);
   518     }
   519     caller_frame = *element(index)->iframe();
   520   }
   523   deallocate_monitor_chunks();
   524 }
   526 void vframeArray::deallocate_monitor_chunks() {
   527   JavaThread* jt = JavaThread::current();
   528   for (int index = 0; index < frames(); index++ ) {
   529      element(index)->free_monitors(jt);
   530   }
   531 }
   533 #ifndef PRODUCT
   535 bool vframeArray::structural_compare(JavaThread* thread, GrowableArray<compiledVFrame*>* chunk) {
   536   if (owner_thread() != thread) return false;
   537   int index = 0;
   538 #if 0 // FIXME can't do this comparison
   540   // Compare only within vframe array.
   541   for (deoptimizedVFrame* vf = deoptimizedVFrame::cast(vframe_at(first_index())); vf; vf = vf->deoptimized_sender_or_null()) {
   542     if (index >= chunk->length() || !vf->structural_compare(chunk->at(index))) return false;
   543     index++;
   544   }
   545   if (index != chunk->length()) return false;
   546 #endif
   548   return true;
   549 }
   551 #endif
   553 address vframeArray::register_location(int i) const {
   554   assert(0 <= i && i < RegisterMap::reg_count, "index out of bounds");
   555   return (address) & _callee_registers[i];
   556 }
   559 #ifndef PRODUCT
   561 // Printing
   563 // Note: we cannot have print_on as const, as we allocate inside the method
   564 void vframeArray::print_on_2(outputStream* st)  {
   565   st->print_cr(" - sp: " INTPTR_FORMAT, sp());
   566   st->print(" - thread: ");
   567   Thread::current()->print();
   568   st->print_cr(" - frame size: %d", frame_size());
   569   for (int index = 0; index < frames() ; index++ ) {
   570     element(index)->print(st);
   571   }
   572 }
   574 void vframeArrayElement::print(outputStream* st) {
   575   st->print_cr(" - interpreter_frame -> sp: " INTPTR_FORMAT, iframe()->sp());
   576 }
   578 void vframeArray::print_value_on(outputStream* st) const {
   579   st->print_cr("vframeArray [%d] ", frames());
   580 }
   583 #endif

mercurial