Mon, 09 Mar 2020 12:54:53 +0000
8240295: hs_err elapsed time in seconds is not accurate enough
Reviewed-by: dholmes, sspitsyn
1 /*
2 * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #include "precompiled.hpp"
26 #include "classfile/vmSymbols.hpp"
27 #include "interpreter/bytecode.hpp"
28 #include "interpreter/interpreter.hpp"
29 #include "memory/allocation.inline.hpp"
30 #include "memory/resourceArea.hpp"
31 #include "memory/universe.inline.hpp"
32 #include "oops/methodData.hpp"
33 #include "oops/oop.inline.hpp"
34 #include "prims/jvmtiThreadState.hpp"
35 #include "runtime/handles.inline.hpp"
36 #include "runtime/monitorChunk.hpp"
37 #include "runtime/sharedRuntime.hpp"
38 #include "runtime/vframe.hpp"
39 #include "runtime/vframeArray.hpp"
40 #include "runtime/vframe_hp.hpp"
41 #include "utilities/events.hpp"
42 #ifdef COMPILER2
43 #include "opto/runtime.hpp"
44 #endif
46 PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
48 int vframeArrayElement:: bci(void) const { return (_bci == SynchronizationEntryBCI ? 0 : _bci); }
50 void vframeArrayElement::free_monitors(JavaThread* jt) {
51 if (_monitors != NULL) {
52 MonitorChunk* chunk = _monitors;
53 _monitors = NULL;
54 jt->remove_monitor_chunk(chunk);
55 delete chunk;
56 }
57 }
59 void vframeArrayElement::fill_in(compiledVFrame* vf, bool realloc_failures) {
61 // Copy the information from the compiled vframe to the
62 // interpreter frame we will be creating to replace vf
64 _method = vf->method();
65 _bci = vf->raw_bci();
66 _reexecute = vf->should_reexecute();
67 #ifdef ASSERT
68 _removed_monitors = false;
69 #endif
71 int index;
73 // Get the monitors off-stack
75 GrowableArray<MonitorInfo*>* list = vf->monitors();
76 if (list->is_empty()) {
77 _monitors = NULL;
78 } else {
80 // Allocate monitor chunk
81 _monitors = new MonitorChunk(list->length());
82 vf->thread()->add_monitor_chunk(_monitors);
84 // Migrate the BasicLocks from the stack to the monitor chunk
85 for (index = 0; index < list->length(); index++) {
86 MonitorInfo* monitor = list->at(index);
87 assert(!monitor->owner_is_scalar_replaced() || realloc_failures, "object should be reallocated already");
88 BasicObjectLock* dest = _monitors->at(index);
89 if (monitor->owner_is_scalar_replaced()) {
90 dest->set_obj(NULL);
91 } else {
92 assert(monitor->owner() == NULL || (!monitor->owner()->is_unlocked() && !monitor->owner()->has_bias_pattern()), "object must be null or locked, and unbiased");
93 dest->set_obj(monitor->owner());
94 monitor->lock()->move_to(monitor->owner(), dest->lock());
95 }
96 }
97 }
99 // Convert the vframe locals and expressions to off stack
100 // values. Because we will not gc all oops can be converted to
101 // intptr_t (i.e. a stack slot) and we are fine. This is
102 // good since we are inside a HandleMark and the oops in our
103 // collection would go away between packing them here and
104 // unpacking them in unpack_on_stack.
106 // First the locals go off-stack
108 // FIXME this seems silly it creates a StackValueCollection
109 // in order to get the size to then copy them and
110 // convert the types to intptr_t size slots. Seems like it
111 // could do it in place... Still uses less memory than the
112 // old way though
114 StackValueCollection *locs = vf->locals();
115 _locals = new StackValueCollection(locs->size());
116 for(index = 0; index < locs->size(); index++) {
117 StackValue* value = locs->at(index);
118 switch(value->type()) {
119 case T_OBJECT:
120 assert(!value->obj_is_scalar_replaced() || realloc_failures, "object should be reallocated already");
121 // preserve object type
122 _locals->add( new StackValue(cast_from_oop<intptr_t>((value->get_obj()())), T_OBJECT ));
123 break;
124 case T_CONFLICT:
125 // A dead local. Will be initialized to null/zero.
126 _locals->add( new StackValue());
127 break;
128 case T_INT:
129 _locals->add( new StackValue(value->get_int()));
130 break;
131 default:
132 ShouldNotReachHere();
133 }
134 }
136 // Now the expressions off-stack
137 // Same silliness as above
139 StackValueCollection *exprs = vf->expressions();
140 _expressions = new StackValueCollection(exprs->size());
141 for(index = 0; index < exprs->size(); index++) {
142 StackValue* value = exprs->at(index);
143 switch(value->type()) {
144 case T_OBJECT:
145 assert(!value->obj_is_scalar_replaced() || realloc_failures, "object should be reallocated already");
146 // preserve object type
147 _expressions->add( new StackValue(cast_from_oop<intptr_t>((value->get_obj()())), T_OBJECT ));
148 break;
149 case T_CONFLICT:
150 // A dead stack element. Will be initialized to null/zero.
151 // This can occur when the compiler emits a state in which stack
152 // elements are known to be dead (because of an imminent exception).
153 _expressions->add( new StackValue());
154 break;
155 case T_INT:
156 _expressions->add( new StackValue(value->get_int()));
157 break;
158 default:
159 ShouldNotReachHere();
160 }
161 }
162 }
164 int unpack_counter = 0;
166 void vframeArrayElement::unpack_on_stack(int caller_actual_parameters,
167 int callee_parameters,
168 int callee_locals,
169 frame* caller,
170 bool is_top_frame,
171 bool is_bottom_frame,
172 int exec_mode) {
173 JavaThread* thread = (JavaThread*) Thread::current();
175 // Look at bci and decide on bcp and continuation pc
176 address bcp;
177 // C++ interpreter doesn't need a pc since it will figure out what to do when it
178 // begins execution
179 address pc;
180 bool use_next_mdp = false; // true if we should use the mdp associated with the next bci
181 // rather than the one associated with bcp
182 if (raw_bci() == SynchronizationEntryBCI) {
183 // We are deoptimizing while hanging in prologue code for synchronized method
184 bcp = method()->bcp_from(0); // first byte code
185 pc = Interpreter::deopt_entry(vtos, 0); // step = 0 since we don't skip current bytecode
186 } else if (should_reexecute()) { //reexecute this bytecode
187 assert(is_top_frame, "reexecute allowed only for the top frame");
188 bcp = method()->bcp_from(bci());
189 pc = Interpreter::deopt_reexecute_entry(method(), bcp);
190 } else {
191 bcp = method()->bcp_from(bci());
192 pc = Interpreter::deopt_continue_after_entry(method(), bcp, callee_parameters, is_top_frame);
193 use_next_mdp = true;
194 }
195 assert(Bytecodes::is_defined(*bcp), "must be a valid bytecode");
197 // Monitorenter and pending exceptions:
198 //
199 // For Compiler2, there should be no pending exception when deoptimizing at monitorenter
200 // because there is no safepoint at the null pointer check (it is either handled explicitly
201 // or prior to the monitorenter) and asynchronous exceptions are not made "pending" by the
202 // runtime interface for the slow case (see JRT_ENTRY_FOR_MONITORENTER). If an asynchronous
203 // exception was processed, the bytecode pointer would have to be extended one bytecode beyond
204 // the monitorenter to place it in the proper exception range.
205 //
206 // For Compiler1, deoptimization can occur while throwing a NullPointerException at monitorenter,
207 // in which case bcp should point to the monitorenter since it is within the exception's range.
209 assert(*bcp != Bytecodes::_monitorenter || is_top_frame, "a _monitorenter must be a top frame");
210 assert(thread->deopt_nmethod() != NULL, "nmethod should be known");
211 guarantee(!(thread->deopt_nmethod()->is_compiled_by_c2() &&
212 *bcp == Bytecodes::_monitorenter &&
213 exec_mode == Deoptimization::Unpack_exception),
214 "shouldn't get exception during monitorenter");
216 int popframe_preserved_args_size_in_bytes = 0;
217 int popframe_preserved_args_size_in_words = 0;
218 if (is_top_frame) {
219 JvmtiThreadState *state = thread->jvmti_thread_state();
220 if (JvmtiExport::can_pop_frame() &&
221 (thread->has_pending_popframe() || thread->popframe_forcing_deopt_reexecution())) {
222 if (thread->has_pending_popframe()) {
223 // Pop top frame after deoptimization
224 #ifndef CC_INTERP
225 pc = Interpreter::remove_activation_preserving_args_entry();
226 #else
227 // Do an uncommon trap type entry. c++ interpreter will know
228 // to pop frame and preserve the args
229 pc = Interpreter::deopt_entry(vtos, 0);
230 use_next_mdp = false;
231 #endif
232 } else {
233 // Reexecute invoke in top frame
234 pc = Interpreter::deopt_entry(vtos, 0);
235 use_next_mdp = false;
236 popframe_preserved_args_size_in_bytes = in_bytes(thread->popframe_preserved_args_size());
237 // Note: the PopFrame-related extension of the expression stack size is done in
238 // Deoptimization::fetch_unroll_info_helper
239 popframe_preserved_args_size_in_words = in_words(thread->popframe_preserved_args_size_in_words());
240 }
241 } else if (JvmtiExport::can_force_early_return() && state != NULL && state->is_earlyret_pending()) {
242 // Force early return from top frame after deoptimization
243 #ifndef CC_INTERP
244 pc = Interpreter::remove_activation_early_entry(state->earlyret_tos());
245 #endif
246 } else {
247 // Possibly override the previous pc computation of the top (youngest) frame
248 switch (exec_mode) {
249 case Deoptimization::Unpack_deopt:
250 // use what we've got
251 break;
252 case Deoptimization::Unpack_exception:
253 // exception is pending
254 pc = SharedRuntime::raw_exception_handler_for_return_address(thread, pc);
255 // [phh] We're going to end up in some handler or other, so it doesn't
256 // matter what mdp we point to. See exception_handler_for_exception()
257 // in interpreterRuntime.cpp.
258 break;
259 case Deoptimization::Unpack_uncommon_trap:
260 case Deoptimization::Unpack_reexecute:
261 // redo last byte code
262 pc = Interpreter::deopt_entry(vtos, 0);
263 use_next_mdp = false;
264 break;
265 default:
266 ShouldNotReachHere();
267 }
268 }
269 }
271 // Setup the interpreter frame
273 assert(method() != NULL, "method must exist");
274 int temps = expressions()->size();
276 int locks = monitors() == NULL ? 0 : monitors()->number_of_monitors();
278 Interpreter::layout_activation(method(),
279 temps + callee_parameters,
280 popframe_preserved_args_size_in_words,
281 locks,
282 caller_actual_parameters,
283 callee_parameters,
284 callee_locals,
285 caller,
286 iframe(),
287 is_top_frame,
288 is_bottom_frame);
290 // Update the pc in the frame object and overwrite the temporary pc
291 // we placed in the skeletal frame now that we finally know the
292 // exact interpreter address we should use.
294 _frame.patch_pc(thread, pc);
296 assert (!method()->is_synchronized() || locks > 0 || _removed_monitors, "synchronized methods must have monitors");
298 BasicObjectLock* top = iframe()->interpreter_frame_monitor_begin();
299 for (int index = 0; index < locks; index++) {
300 top = iframe()->previous_monitor_in_interpreter_frame(top);
301 BasicObjectLock* src = _monitors->at(index);
302 top->set_obj(src->obj());
303 src->lock()->move_to(src->obj(), top->lock());
304 }
305 if (ProfileInterpreter) {
306 iframe()->interpreter_frame_set_mdx(0); // clear out the mdp.
307 }
308 iframe()->interpreter_frame_set_bcx((intptr_t)bcp); // cannot use bcp because frame is not initialized yet
309 if (ProfileInterpreter) {
310 MethodData* mdo = method()->method_data();
311 if (mdo != NULL) {
312 int bci = iframe()->interpreter_frame_bci();
313 if (use_next_mdp) ++bci;
314 address mdp = mdo->bci_to_dp(bci);
315 iframe()->interpreter_frame_set_mdp(mdp);
316 }
317 }
319 // Unpack expression stack
320 // If this is an intermediate frame (i.e. not top frame) then this
321 // only unpacks the part of the expression stack not used by callee
322 // as parameters. The callee parameters are unpacked as part of the
323 // callee locals.
324 int i;
325 for(i = 0; i < expressions()->size(); i++) {
326 StackValue *value = expressions()->at(i);
327 intptr_t* addr = iframe()->interpreter_frame_expression_stack_at(i);
328 switch(value->type()) {
329 case T_INT:
330 *addr = value->get_int();
331 break;
332 case T_OBJECT:
333 *addr = value->get_int(T_OBJECT);
334 break;
335 case T_CONFLICT:
336 // A dead stack slot. Initialize to null in case it is an oop.
337 *addr = NULL_WORD;
338 break;
339 default:
340 ShouldNotReachHere();
341 }
342 }
345 // Unpack the locals
346 for(i = 0; i < locals()->size(); i++) {
347 StackValue *value = locals()->at(i);
348 intptr_t* addr = iframe()->interpreter_frame_local_at(i);
349 switch(value->type()) {
350 case T_INT:
351 *addr = value->get_int();
352 break;
353 case T_OBJECT:
354 *addr = value->get_int(T_OBJECT);
355 break;
356 case T_CONFLICT:
357 // A dead location. If it is an oop then we need a NULL to prevent GC from following it
358 *addr = NULL_WORD;
359 break;
360 default:
361 ShouldNotReachHere();
362 }
363 }
365 if (is_top_frame && JvmtiExport::can_pop_frame() && thread->popframe_forcing_deopt_reexecution()) {
366 // An interpreted frame was popped but it returns to a deoptimized
367 // frame. The incoming arguments to the interpreted activation
368 // were preserved in thread-local storage by the
369 // remove_activation_preserving_args_entry in the interpreter; now
370 // we put them back into the just-unpacked interpreter frame.
371 // Note that this assumes that the locals arena grows toward lower
372 // addresses.
373 if (popframe_preserved_args_size_in_words != 0) {
374 void* saved_args = thread->popframe_preserved_args();
375 assert(saved_args != NULL, "must have been saved by interpreter");
376 #ifdef ASSERT
377 assert(popframe_preserved_args_size_in_words <=
378 iframe()->interpreter_frame_expression_stack_size()*Interpreter::stackElementWords,
379 "expression stack size should have been extended");
380 #endif // ASSERT
381 int top_element = iframe()->interpreter_frame_expression_stack_size()-1;
382 intptr_t* base;
383 if (frame::interpreter_frame_expression_stack_direction() < 0) {
384 base = iframe()->interpreter_frame_expression_stack_at(top_element);
385 } else {
386 base = iframe()->interpreter_frame_expression_stack();
387 }
388 Copy::conjoint_jbytes(saved_args,
389 base,
390 popframe_preserved_args_size_in_bytes);
391 thread->popframe_free_preserved_args();
392 }
393 }
395 #ifndef PRODUCT
396 if (TraceDeoptimization && Verbose) {
397 ttyLocker ttyl;
398 tty->print_cr("[%d Interpreted Frame]", ++unpack_counter);
399 iframe()->print_on(tty);
400 RegisterMap map(thread);
401 vframe* f = vframe::new_vframe(iframe(), &map, thread);
402 f->print();
404 tty->print_cr("locals size %d", locals()->size());
405 tty->print_cr("expression size %d", expressions()->size());
407 method()->print_value();
408 tty->cr();
409 // method()->print_codes();
410 } else if (TraceDeoptimization) {
411 tty->print(" ");
412 method()->print_value();
413 Bytecodes::Code code = Bytecodes::java_code_at(method(), bcp);
414 int bci = method()->bci_from(bcp);
415 tty->print(" - %s", Bytecodes::name(code));
416 tty->print(" @ bci %d ", bci);
417 tty->print_cr("sp = " PTR_FORMAT, iframe()->sp());
418 }
419 #endif // PRODUCT
421 // The expression stack and locals are in the resource area don't leave
422 // a dangling pointer in the vframeArray we leave around for debug
423 // purposes
425 _locals = _expressions = NULL;
427 }
429 int vframeArrayElement::on_stack_size(int callee_parameters,
430 int callee_locals,
431 bool is_top_frame,
432 int popframe_extra_stack_expression_els) const {
433 assert(method()->max_locals() == locals()->size(), "just checking");
434 int locks = monitors() == NULL ? 0 : monitors()->number_of_monitors();
435 int temps = expressions()->size();
436 return Interpreter::size_activation(method()->max_stack(),
437 temps + callee_parameters,
438 popframe_extra_stack_expression_els,
439 locks,
440 callee_parameters,
441 callee_locals,
442 is_top_frame);
443 }
447 vframeArray* vframeArray::allocate(JavaThread* thread, int frame_size, GrowableArray<compiledVFrame*>* chunk,
448 RegisterMap *reg_map, frame sender, frame caller, frame self,
449 bool realloc_failures) {
451 // Allocate the vframeArray
452 vframeArray * result = (vframeArray*) AllocateHeap(sizeof(vframeArray) + // fixed part
453 sizeof(vframeArrayElement) * (chunk->length() - 1), // variable part
454 mtCompiler);
455 result->_frames = chunk->length();
456 result->_owner_thread = thread;
457 result->_sender = sender;
458 result->_caller = caller;
459 result->_original = self;
460 result->set_unroll_block(NULL); // initialize it
461 result->fill_in(thread, frame_size, chunk, reg_map, realloc_failures);
462 return result;
463 }
465 void vframeArray::fill_in(JavaThread* thread,
466 int frame_size,
467 GrowableArray<compiledVFrame*>* chunk,
468 const RegisterMap *reg_map,
469 bool realloc_failures) {
470 // Set owner first, it is used when adding monitor chunks
472 _frame_size = frame_size;
473 for(int i = 0; i < chunk->length(); i++) {
474 element(i)->fill_in(chunk->at(i), realloc_failures);
475 }
477 // Copy registers for callee-saved registers
478 if (reg_map != NULL) {
479 for(int i = 0; i < RegisterMap::reg_count; i++) {
480 #ifdef AMD64
481 // The register map has one entry for every int (32-bit value), so
482 // 64-bit physical registers have two entries in the map, one for
483 // each half. Ignore the high halves of 64-bit registers, just like
484 // frame::oopmapreg_to_location does.
485 //
486 // [phh] FIXME: this is a temporary hack! This code *should* work
487 // correctly w/o this hack, possibly by changing RegisterMap::pd_location
488 // in frame_amd64.cpp and the values of the phantom high half registers
489 // in amd64.ad.
490 // if (VMReg::Name(i) < SharedInfo::stack0 && is_even(i)) {
491 intptr_t* src = (intptr_t*) reg_map->location(VMRegImpl::as_VMReg(i));
492 _callee_registers[i] = src != NULL ? *src : NULL_WORD;
493 // } else {
494 // jint* src = (jint*) reg_map->location(VMReg::Name(i));
495 // _callee_registers[i] = src != NULL ? *src : NULL_WORD;
496 // }
497 #else
498 jint* src = (jint*) reg_map->location(VMRegImpl::as_VMReg(i));
499 _callee_registers[i] = src != NULL ? *src : NULL_WORD;
500 #endif
501 if (src == NULL) {
502 set_location_valid(i, false);
503 } else {
504 set_location_valid(i, true);
505 jint* dst = (jint*) register_location(i);
506 *dst = *src;
507 }
508 }
509 }
510 }
512 void vframeArray::unpack_to_stack(frame &unpack_frame, int exec_mode, int caller_actual_parameters) {
513 // stack picture
514 // unpack_frame
515 // [new interpreter frames ] (frames are skeletal but walkable)
516 // caller_frame
517 //
518 // This routine fills in the missing data for the skeletal interpreter frames
519 // in the above picture.
521 // Find the skeletal interpreter frames to unpack into
522 JavaThread* THREAD = JavaThread::current();
523 RegisterMap map(THREAD, false);
524 // Get the youngest frame we will unpack (last to be unpacked)
525 frame me = unpack_frame.sender(&map);
526 int index;
527 for (index = 0; index < frames(); index++ ) {
528 *element(index)->iframe() = me;
529 // Get the caller frame (possibly skeletal)
530 me = me.sender(&map);
531 }
533 // Do the unpacking of interpreter frames; the frame at index 0 represents the top activation, so it has no callee
534 // Unpack the frames from the oldest (frames() -1) to the youngest (0)
535 frame* caller_frame = &me;
536 for (index = frames() - 1; index >= 0 ; index--) {
537 vframeArrayElement* elem = element(index); // caller
538 int callee_parameters, callee_locals;
539 if (index == 0) {
540 callee_parameters = callee_locals = 0;
541 } else {
542 methodHandle caller = elem->method();
543 methodHandle callee = element(index - 1)->method();
544 Bytecode_invoke inv(caller, elem->bci());
545 // invokedynamic instructions don't have a class but obviously don't have a MemberName appendix.
546 // NOTE: Use machinery here that avoids resolving of any kind.
547 const bool has_member_arg =
548 !inv.is_invokedynamic() && MethodHandles::has_member_arg(inv.klass(), inv.name());
549 callee_parameters = callee->size_of_parameters() + (has_member_arg ? 1 : 0);
550 callee_locals = callee->max_locals();
551 }
552 elem->unpack_on_stack(caller_actual_parameters,
553 callee_parameters,
554 callee_locals,
555 caller_frame,
556 index == 0,
557 index == frames() - 1,
558 exec_mode);
559 if (index == frames() - 1) {
560 Deoptimization::unwind_callee_save_values(elem->iframe(), this);
561 }
562 caller_frame = elem->iframe();
563 caller_actual_parameters = callee_parameters;
564 }
565 deallocate_monitor_chunks();
566 }
568 void vframeArray::deallocate_monitor_chunks() {
569 JavaThread* jt = JavaThread::current();
570 for (int index = 0; index < frames(); index++ ) {
571 element(index)->free_monitors(jt);
572 }
573 }
575 #ifndef PRODUCT
577 bool vframeArray::structural_compare(JavaThread* thread, GrowableArray<compiledVFrame*>* chunk) {
578 if (owner_thread() != thread) return false;
579 int index = 0;
580 #if 0 // FIXME can't do this comparison
582 // Compare only within vframe array.
583 for (deoptimizedVFrame* vf = deoptimizedVFrame::cast(vframe_at(first_index())); vf; vf = vf->deoptimized_sender_or_null()) {
584 if (index >= chunk->length() || !vf->structural_compare(chunk->at(index))) return false;
585 index++;
586 }
587 if (index != chunk->length()) return false;
588 #endif
590 return true;
591 }
593 #endif
595 address vframeArray::register_location(int i) const {
596 assert(0 <= i && i < RegisterMap::reg_count, "index out of bounds");
597 return (address) & _callee_registers[i];
598 }
601 #ifndef PRODUCT
603 // Printing
605 // Note: we cannot have print_on as const, as we allocate inside the method
606 void vframeArray::print_on_2(outputStream* st) {
607 st->print_cr(" - sp: " INTPTR_FORMAT, sp());
608 st->print(" - thread: ");
609 Thread::current()->print();
610 st->print_cr(" - frame size: %d", frame_size());
611 for (int index = 0; index < frames() ; index++ ) {
612 element(index)->print(st);
613 }
614 }
616 void vframeArrayElement::print(outputStream* st) {
617 st->print_cr(" - interpreter_frame -> sp: " INTPTR_FORMAT, iframe()->sp());
618 }
620 void vframeArray::print_value_on(outputStream* st) const {
621 st->print_cr("vframeArray [%d] ", frames());
622 }
625 #endif