Tue, 15 Mar 2011 06:35:10 -0700
7024234: 2/3 jvmti tests fail assert(!_oops_are_stale) failed: oops are stale on Win-AMD64
Summary: Move initialization of the '_instance' field to avoid race with ServiceThread start.
Reviewed-by: dholmes, kamg, never, dsamersoff, ysr, coleenp, acorn
1 /*
2 * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #include "precompiled.hpp"
26 #include "classfile/vmSymbols.hpp"
27 #include "interpreter/interpreter.hpp"
28 #include "memory/allocation.inline.hpp"
29 #include "memory/resourceArea.hpp"
30 #include "memory/universe.inline.hpp"
31 #include "oops/methodDataOop.hpp"
32 #include "oops/oop.inline.hpp"
33 #include "prims/jvmtiThreadState.hpp"
34 #include "runtime/handles.inline.hpp"
35 #include "runtime/monitorChunk.hpp"
36 #include "runtime/sharedRuntime.hpp"
37 #include "runtime/vframe.hpp"
38 #include "runtime/vframeArray.hpp"
39 #include "runtime/vframe_hp.hpp"
40 #include "utilities/events.hpp"
41 #ifdef COMPILER2
42 #include "opto/runtime.hpp"
43 #endif
46 int vframeArrayElement:: bci(void) const { return (_bci == SynchronizationEntryBCI ? 0 : _bci); }
48 void vframeArrayElement::free_monitors(JavaThread* jt) {
49 if (_monitors != NULL) {
50 MonitorChunk* chunk = _monitors;
51 _monitors = NULL;
52 jt->remove_monitor_chunk(chunk);
53 delete chunk;
54 }
55 }
57 void vframeArrayElement::fill_in(compiledVFrame* vf) {
59 // Copy the information from the compiled vframe to the
60 // interpreter frame we will be creating to replace vf
62 _method = vf->method();
63 _bci = vf->raw_bci();
64 _reexecute = vf->should_reexecute();
66 int index;
68 // Get the monitors off-stack
70 GrowableArray<MonitorInfo*>* list = vf->monitors();
71 if (list->is_empty()) {
72 _monitors = NULL;
73 } else {
75 // Allocate monitor chunk
76 _monitors = new MonitorChunk(list->length());
77 vf->thread()->add_monitor_chunk(_monitors);
79 // Migrate the BasicLocks from the stack to the monitor chunk
80 for (index = 0; index < list->length(); index++) {
81 MonitorInfo* monitor = list->at(index);
82 assert(!monitor->owner_is_scalar_replaced(), "object should be reallocated already");
83 assert(monitor->owner() == NULL || (!monitor->owner()->is_unlocked() && !monitor->owner()->has_bias_pattern()), "object must be null or locked, and unbiased");
84 BasicObjectLock* dest = _monitors->at(index);
85 dest->set_obj(monitor->owner());
86 monitor->lock()->move_to(monitor->owner(), dest->lock());
87 }
88 }
90 // Convert the vframe locals and expressions to off stack
91 // values. Because we will not gc all oops can be converted to
92 // intptr_t (i.e. a stack slot) and we are fine. This is
93 // good since we are inside a HandleMark and the oops in our
94 // collection would go away between packing them here and
95 // unpacking them in unpack_on_stack.
97 // First the locals go off-stack
99 // FIXME this seems silly it creates a StackValueCollection
100 // in order to get the size to then copy them and
101 // convert the types to intptr_t size slots. Seems like it
102 // could do it in place... Still uses less memory than the
103 // old way though
105 StackValueCollection *locs = vf->locals();
106 _locals = new StackValueCollection(locs->size());
107 for(index = 0; index < locs->size(); index++) {
108 StackValue* value = locs->at(index);
109 switch(value->type()) {
110 case T_OBJECT:
111 assert(!value->obj_is_scalar_replaced(), "object should be reallocated already");
112 // preserve object type
113 _locals->add( new StackValue((intptr_t) (value->get_obj()()), T_OBJECT ));
114 break;
115 case T_CONFLICT:
116 // A dead local. Will be initialized to null/zero.
117 _locals->add( new StackValue());
118 break;
119 case T_INT:
120 _locals->add( new StackValue(value->get_int()));
121 break;
122 default:
123 ShouldNotReachHere();
124 }
125 }
127 // Now the expressions off-stack
128 // Same silliness as above
130 StackValueCollection *exprs = vf->expressions();
131 _expressions = new StackValueCollection(exprs->size());
132 for(index = 0; index < exprs->size(); index++) {
133 StackValue* value = exprs->at(index);
134 switch(value->type()) {
135 case T_OBJECT:
136 assert(!value->obj_is_scalar_replaced(), "object should be reallocated already");
137 // preserve object type
138 _expressions->add( new StackValue((intptr_t) (value->get_obj()()), T_OBJECT ));
139 break;
140 case T_CONFLICT:
141 // A dead stack element. Will be initialized to null/zero.
142 // This can occur when the compiler emits a state in which stack
143 // elements are known to be dead (because of an imminent exception).
144 _expressions->add( new StackValue());
145 break;
146 case T_INT:
147 _expressions->add( new StackValue(value->get_int()));
148 break;
149 default:
150 ShouldNotReachHere();
151 }
152 }
153 }
155 int unpack_counter = 0;
157 void vframeArrayElement::unpack_on_stack(int callee_parameters,
158 int callee_locals,
159 frame* caller,
160 bool is_top_frame,
161 int exec_mode) {
162 JavaThread* thread = (JavaThread*) Thread::current();
164 // Look at bci and decide on bcp and continuation pc
165 address bcp;
166 // C++ interpreter doesn't need a pc since it will figure out what to do when it
167 // begins execution
168 address pc;
169 bool use_next_mdp = false; // true if we should use the mdp associated with the next bci
170 // rather than the one associated with bcp
171 if (raw_bci() == SynchronizationEntryBCI) {
172 // We are deoptimizing while hanging in prologue code for synchronized method
173 bcp = method()->bcp_from(0); // first byte code
174 pc = Interpreter::deopt_entry(vtos, 0); // step = 0 since we don't skip current bytecode
175 } else if (should_reexecute()) { //reexecute this bytecode
176 assert(is_top_frame, "reexecute allowed only for the top frame");
177 bcp = method()->bcp_from(bci());
178 pc = Interpreter::deopt_reexecute_entry(method(), bcp);
179 } else {
180 bcp = method()->bcp_from(bci());
181 pc = Interpreter::deopt_continue_after_entry(method(), bcp, callee_parameters, is_top_frame);
182 use_next_mdp = true;
183 }
184 assert(Bytecodes::is_defined(*bcp), "must be a valid bytecode");
186 // Monitorenter and pending exceptions:
187 //
188 // For Compiler2, there should be no pending exception when deoptimizing at monitorenter
189 // because there is no safepoint at the null pointer check (it is either handled explicitly
190 // or prior to the monitorenter) and asynchronous exceptions are not made "pending" by the
191 // runtime interface for the slow case (see JRT_ENTRY_FOR_MONITORENTER). If an asynchronous
192 // exception was processed, the bytecode pointer would have to be extended one bytecode beyond
193 // the monitorenter to place it in the proper exception range.
194 //
195 // For Compiler1, deoptimization can occur while throwing a NullPointerException at monitorenter,
196 // in which case bcp should point to the monitorenter since it is within the exception's range.
198 assert(*bcp != Bytecodes::_monitorenter || is_top_frame, "a _monitorenter must be a top frame");
199 assert(thread->deopt_nmethod() != NULL, "nmethod should be known");
200 guarantee(!(thread->deopt_nmethod()->is_compiled_by_c2() &&
201 *bcp == Bytecodes::_monitorenter &&
202 exec_mode == Deoptimization::Unpack_exception),
203 "shouldn't get exception during monitorenter");
205 int popframe_preserved_args_size_in_bytes = 0;
206 int popframe_preserved_args_size_in_words = 0;
207 if (is_top_frame) {
208 JvmtiThreadState *state = thread->jvmti_thread_state();
209 if (JvmtiExport::can_pop_frame() &&
210 (thread->has_pending_popframe() || thread->popframe_forcing_deopt_reexecution())) {
211 if (thread->has_pending_popframe()) {
212 // Pop top frame after deoptimization
213 #ifndef CC_INTERP
214 pc = Interpreter::remove_activation_preserving_args_entry();
215 #else
216 // Do an uncommon trap type entry. c++ interpreter will know
217 // to pop frame and preserve the args
218 pc = Interpreter::deopt_entry(vtos, 0);
219 use_next_mdp = false;
220 #endif
221 } else {
222 // Reexecute invoke in top frame
223 pc = Interpreter::deopt_entry(vtos, 0);
224 use_next_mdp = false;
225 popframe_preserved_args_size_in_bytes = in_bytes(thread->popframe_preserved_args_size());
226 // Note: the PopFrame-related extension of the expression stack size is done in
227 // Deoptimization::fetch_unroll_info_helper
228 popframe_preserved_args_size_in_words = in_words(thread->popframe_preserved_args_size_in_words());
229 }
230 } else if (JvmtiExport::can_force_early_return() && state != NULL && state->is_earlyret_pending()) {
231 // Force early return from top frame after deoptimization
232 #ifndef CC_INTERP
233 pc = Interpreter::remove_activation_early_entry(state->earlyret_tos());
234 #else
235 // TBD: Need to implement ForceEarlyReturn for CC_INTERP (ia64)
236 #endif
237 } else {
238 // Possibly override the previous pc computation of the top (youngest) frame
239 switch (exec_mode) {
240 case Deoptimization::Unpack_deopt:
241 // use what we've got
242 break;
243 case Deoptimization::Unpack_exception:
244 // exception is pending
245 pc = SharedRuntime::raw_exception_handler_for_return_address(thread, pc);
246 // [phh] We're going to end up in some handler or other, so it doesn't
247 // matter what mdp we point to. See exception_handler_for_exception()
248 // in interpreterRuntime.cpp.
249 break;
250 case Deoptimization::Unpack_uncommon_trap:
251 case Deoptimization::Unpack_reexecute:
252 // redo last byte code
253 pc = Interpreter::deopt_entry(vtos, 0);
254 use_next_mdp = false;
255 break;
256 default:
257 ShouldNotReachHere();
258 }
259 }
260 }
262 // Setup the interpreter frame
264 assert(method() != NULL, "method must exist");
265 int temps = expressions()->size();
267 int locks = monitors() == NULL ? 0 : monitors()->number_of_monitors();
269 Interpreter::layout_activation(method(),
270 temps + callee_parameters,
271 popframe_preserved_args_size_in_words,
272 locks,
273 callee_parameters,
274 callee_locals,
275 caller,
276 iframe(),
277 is_top_frame);
279 // Update the pc in the frame object and overwrite the temporary pc
280 // we placed in the skeletal frame now that we finally know the
281 // exact interpreter address we should use.
283 _frame.patch_pc(thread, pc);
285 assert (!method()->is_synchronized() || locks > 0, "synchronized methods must have monitors");
287 BasicObjectLock* top = iframe()->interpreter_frame_monitor_begin();
288 for (int index = 0; index < locks; index++) {
289 top = iframe()->previous_monitor_in_interpreter_frame(top);
290 BasicObjectLock* src = _monitors->at(index);
291 top->set_obj(src->obj());
292 src->lock()->move_to(src->obj(), top->lock());
293 }
294 if (ProfileInterpreter) {
295 iframe()->interpreter_frame_set_mdx(0); // clear out the mdp.
296 }
297 iframe()->interpreter_frame_set_bcx((intptr_t)bcp); // cannot use bcp because frame is not initialized yet
298 if (ProfileInterpreter) {
299 methodDataOop mdo = method()->method_data();
300 if (mdo != NULL) {
301 int bci = iframe()->interpreter_frame_bci();
302 if (use_next_mdp) ++bci;
303 address mdp = mdo->bci_to_dp(bci);
304 iframe()->interpreter_frame_set_mdp(mdp);
305 }
306 }
308 // Unpack expression stack
309 // If this is an intermediate frame (i.e. not top frame) then this
310 // only unpacks the part of the expression stack not used by callee
311 // as parameters. The callee parameters are unpacked as part of the
312 // callee locals.
313 int i;
314 for(i = 0; i < expressions()->size(); i++) {
315 StackValue *value = expressions()->at(i);
316 intptr_t* addr = iframe()->interpreter_frame_expression_stack_at(i);
317 switch(value->type()) {
318 case T_INT:
319 *addr = value->get_int();
320 break;
321 case T_OBJECT:
322 *addr = value->get_int(T_OBJECT);
323 break;
324 case T_CONFLICT:
325 // A dead stack slot. Initialize to null in case it is an oop.
326 *addr = NULL_WORD;
327 break;
328 default:
329 ShouldNotReachHere();
330 }
331 }
334 // Unpack the locals
335 for(i = 0; i < locals()->size(); i++) {
336 StackValue *value = locals()->at(i);
337 intptr_t* addr = iframe()->interpreter_frame_local_at(i);
338 switch(value->type()) {
339 case T_INT:
340 *addr = value->get_int();
341 break;
342 case T_OBJECT:
343 *addr = value->get_int(T_OBJECT);
344 break;
345 case T_CONFLICT:
346 // A dead location. If it is an oop then we need a NULL to prevent GC from following it
347 *addr = NULL_WORD;
348 break;
349 default:
350 ShouldNotReachHere();
351 }
352 }
354 if (is_top_frame && JvmtiExport::can_pop_frame() && thread->popframe_forcing_deopt_reexecution()) {
355 // An interpreted frame was popped but it returns to a deoptimized
356 // frame. The incoming arguments to the interpreted activation
357 // were preserved in thread-local storage by the
358 // remove_activation_preserving_args_entry in the interpreter; now
359 // we put them back into the just-unpacked interpreter frame.
360 // Note that this assumes that the locals arena grows toward lower
361 // addresses.
362 if (popframe_preserved_args_size_in_words != 0) {
363 void* saved_args = thread->popframe_preserved_args();
364 assert(saved_args != NULL, "must have been saved by interpreter");
365 #ifdef ASSERT
366 assert(popframe_preserved_args_size_in_words <=
367 iframe()->interpreter_frame_expression_stack_size()*Interpreter::stackElementWords,
368 "expression stack size should have been extended");
369 #endif // ASSERT
370 int top_element = iframe()->interpreter_frame_expression_stack_size()-1;
371 intptr_t* base;
372 if (frame::interpreter_frame_expression_stack_direction() < 0) {
373 base = iframe()->interpreter_frame_expression_stack_at(top_element);
374 } else {
375 base = iframe()->interpreter_frame_expression_stack();
376 }
377 Copy::conjoint_jbytes(saved_args,
378 base,
379 popframe_preserved_args_size_in_bytes);
380 thread->popframe_free_preserved_args();
381 }
382 }
384 #ifndef PRODUCT
385 if (TraceDeoptimization && Verbose) {
386 ttyLocker ttyl;
387 tty->print_cr("[%d Interpreted Frame]", ++unpack_counter);
388 iframe()->print_on(tty);
389 RegisterMap map(thread);
390 vframe* f = vframe::new_vframe(iframe(), &map, thread);
391 f->print();
393 tty->print_cr("locals size %d", locals()->size());
394 tty->print_cr("expression size %d", expressions()->size());
396 method()->print_value();
397 tty->cr();
398 // method()->print_codes();
399 } else if (TraceDeoptimization) {
400 tty->print(" ");
401 method()->print_value();
402 Bytecodes::Code code = Bytecodes::java_code_at(method(), bcp);
403 int bci = method()->bci_from(bcp);
404 tty->print(" - %s", Bytecodes::name(code));
405 tty->print(" @ bci %d ", bci);
406 tty->print_cr("sp = " PTR_FORMAT, iframe()->sp());
407 }
408 #endif // PRODUCT
410 // The expression stack and locals are in the resource area don't leave
411 // a dangling pointer in the vframeArray we leave around for debug
412 // purposes
414 _locals = _expressions = NULL;
416 }
418 int vframeArrayElement::on_stack_size(int callee_parameters,
419 int callee_locals,
420 bool is_top_frame,
421 int popframe_extra_stack_expression_els) const {
422 assert(method()->max_locals() == locals()->size(), "just checking");
423 int locks = monitors() == NULL ? 0 : monitors()->number_of_monitors();
424 int temps = expressions()->size();
425 return Interpreter::size_activation(method(),
426 temps + callee_parameters,
427 popframe_extra_stack_expression_els,
428 locks,
429 callee_parameters,
430 callee_locals,
431 is_top_frame);
432 }
436 vframeArray* vframeArray::allocate(JavaThread* thread, int frame_size, GrowableArray<compiledVFrame*>* chunk,
437 RegisterMap *reg_map, frame sender, frame caller, frame self) {
439 // Allocate the vframeArray
440 vframeArray * result = (vframeArray*) AllocateHeap(sizeof(vframeArray) + // fixed part
441 sizeof(vframeArrayElement) * (chunk->length() - 1), // variable part
442 "vframeArray::allocate");
443 result->_frames = chunk->length();
444 result->_owner_thread = thread;
445 result->_sender = sender;
446 result->_caller = caller;
447 result->_original = self;
448 result->set_unroll_block(NULL); // initialize it
449 result->fill_in(thread, frame_size, chunk, reg_map);
450 return result;
451 }
453 void vframeArray::fill_in(JavaThread* thread,
454 int frame_size,
455 GrowableArray<compiledVFrame*>* chunk,
456 const RegisterMap *reg_map) {
457 // Set owner first, it is used when adding monitor chunks
459 _frame_size = frame_size;
460 for(int i = 0; i < chunk->length(); i++) {
461 element(i)->fill_in(chunk->at(i));
462 }
464 // Copy registers for callee-saved registers
465 if (reg_map != NULL) {
466 for(int i = 0; i < RegisterMap::reg_count; i++) {
467 #ifdef AMD64
468 // The register map has one entry for every int (32-bit value), so
469 // 64-bit physical registers have two entries in the map, one for
470 // each half. Ignore the high halves of 64-bit registers, just like
471 // frame::oopmapreg_to_location does.
472 //
473 // [phh] FIXME: this is a temporary hack! This code *should* work
474 // correctly w/o this hack, possibly by changing RegisterMap::pd_location
475 // in frame_amd64.cpp and the values of the phantom high half registers
476 // in amd64.ad.
477 // if (VMReg::Name(i) < SharedInfo::stack0 && is_even(i)) {
478 intptr_t* src = (intptr_t*) reg_map->location(VMRegImpl::as_VMReg(i));
479 _callee_registers[i] = src != NULL ? *src : NULL_WORD;
480 // } else {
481 // jint* src = (jint*) reg_map->location(VMReg::Name(i));
482 // _callee_registers[i] = src != NULL ? *src : NULL_WORD;
483 // }
484 #else
485 jint* src = (jint*) reg_map->location(VMRegImpl::as_VMReg(i));
486 _callee_registers[i] = src != NULL ? *src : NULL_WORD;
487 #endif
488 if (src == NULL) {
489 set_location_valid(i, false);
490 } else {
491 set_location_valid(i, true);
492 jint* dst = (jint*) register_location(i);
493 *dst = *src;
494 }
495 }
496 }
497 }
499 void vframeArray::unpack_to_stack(frame &unpack_frame, int exec_mode) {
500 // stack picture
501 // unpack_frame
502 // [new interpreter frames ] (frames are skeletal but walkable)
503 // caller_frame
504 //
505 // This routine fills in the missing data for the skeletal interpreter frames
506 // in the above picture.
508 // Find the skeletal interpreter frames to unpack into
509 RegisterMap map(JavaThread::current(), false);
510 // Get the youngest frame we will unpack (last to be unpacked)
511 frame me = unpack_frame.sender(&map);
512 int index;
513 for (index = 0; index < frames(); index++ ) {
514 *element(index)->iframe() = me;
515 // Get the caller frame (possibly skeletal)
516 me = me.sender(&map);
517 }
519 frame caller_frame = me;
521 // Do the unpacking of interpreter frames; the frame at index 0 represents the top activation, so it has no callee
523 // Unpack the frames from the oldest (frames() -1) to the youngest (0)
525 for (index = frames() - 1; index >= 0 ; index--) {
526 int callee_parameters = index == 0 ? 0 : element(index-1)->method()->size_of_parameters();
527 int callee_locals = index == 0 ? 0 : element(index-1)->method()->max_locals();
528 element(index)->unpack_on_stack(callee_parameters,
529 callee_locals,
530 &caller_frame,
531 index == 0,
532 exec_mode);
533 if (index == frames() - 1) {
534 Deoptimization::unwind_callee_save_values(element(index)->iframe(), this);
535 }
536 caller_frame = *element(index)->iframe();
537 }
540 deallocate_monitor_chunks();
541 }
543 void vframeArray::deallocate_monitor_chunks() {
544 JavaThread* jt = JavaThread::current();
545 for (int index = 0; index < frames(); index++ ) {
546 element(index)->free_monitors(jt);
547 }
548 }
550 #ifndef PRODUCT
552 bool vframeArray::structural_compare(JavaThread* thread, GrowableArray<compiledVFrame*>* chunk) {
553 if (owner_thread() != thread) return false;
554 int index = 0;
555 #if 0 // FIXME can't do this comparison
557 // Compare only within vframe array.
558 for (deoptimizedVFrame* vf = deoptimizedVFrame::cast(vframe_at(first_index())); vf; vf = vf->deoptimized_sender_or_null()) {
559 if (index >= chunk->length() || !vf->structural_compare(chunk->at(index))) return false;
560 index++;
561 }
562 if (index != chunk->length()) return false;
563 #endif
565 return true;
566 }
568 #endif
570 address vframeArray::register_location(int i) const {
571 assert(0 <= i && i < RegisterMap::reg_count, "index out of bounds");
572 return (address) & _callee_registers[i];
573 }
576 #ifndef PRODUCT
578 // Printing
580 // Note: we cannot have print_on as const, as we allocate inside the method
581 void vframeArray::print_on_2(outputStream* st) {
582 st->print_cr(" - sp: " INTPTR_FORMAT, sp());
583 st->print(" - thread: ");
584 Thread::current()->print();
585 st->print_cr(" - frame size: %d", frame_size());
586 for (int index = 0; index < frames() ; index++ ) {
587 element(index)->print(st);
588 }
589 }
591 void vframeArrayElement::print(outputStream* st) {
592 st->print_cr(" - interpreter_frame -> sp: " INTPTR_FORMAT, iframe()->sp());
593 }
595 void vframeArray::print_value_on(outputStream* st) const {
596 st->print_cr("vframeArray [%d] ", frames());
597 }
600 #endif