Mon, 25 Jan 2010 18:03:29 -0500
6919980: G1: remove +UseG1GC from under experimental options (second attempt)
Summary: Trying this again, as the original change was lost.
Reviewed-by: ysr, jmasa
1 /*
2 * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20 * CA 95054 USA or visit www.sun.com if you need additional information or
21 * have any questions.
22 *
23 */
25 # include "incls/_precompiled.incl"
26 # include "incls/_vframeArray.cpp.incl"
29 int vframeArrayElement:: bci(void) const { return (_bci == SynchronizationEntryBCI ? 0 : _bci); }
31 void vframeArrayElement::free_monitors(JavaThread* jt) {
32 if (_monitors != NULL) {
33 MonitorChunk* chunk = _monitors;
34 _monitors = NULL;
35 jt->remove_monitor_chunk(chunk);
36 delete chunk;
37 }
38 }
40 void vframeArrayElement::fill_in(compiledVFrame* vf) {
42 // Copy the information from the compiled vframe to the
43 // interpreter frame we will be creating to replace vf
45 _method = vf->method();
46 _bci = vf->raw_bci();
47 _reexecute = vf->should_reexecute();
49 int index;
51 // Get the monitors off-stack
53 GrowableArray<MonitorInfo*>* list = vf->monitors();
54 if (list->is_empty()) {
55 _monitors = NULL;
56 } else {
58 // Allocate monitor chunk
59 _monitors = new MonitorChunk(list->length());
60 vf->thread()->add_monitor_chunk(_monitors);
62 // Migrate the BasicLocks from the stack to the monitor chunk
63 for (index = 0; index < list->length(); index++) {
64 MonitorInfo* monitor = list->at(index);
65 assert(!monitor->owner_is_scalar_replaced(), "object should be reallocated already");
66 assert(monitor->owner() == NULL || (!monitor->owner()->is_unlocked() && !monitor->owner()->has_bias_pattern()), "object must be null or locked, and unbiased");
67 BasicObjectLock* dest = _monitors->at(index);
68 dest->set_obj(monitor->owner());
69 monitor->lock()->move_to(monitor->owner(), dest->lock());
70 }
71 }
73 // Convert the vframe locals and expressions to off stack
74 // values. Because we will not gc all oops can be converted to
75 // intptr_t (i.e. a stack slot) and we are fine. This is
76 // good since we are inside a HandleMark and the oops in our
77 // collection would go away between packing them here and
78 // unpacking them in unpack_on_stack.
80 // First the locals go off-stack
82 // FIXME this seems silly it creates a StackValueCollection
83 // in order to get the size to then copy them and
84 // convert the types to intptr_t size slots. Seems like it
85 // could do it in place... Still uses less memory than the
86 // old way though
88 StackValueCollection *locs = vf->locals();
89 _locals = new StackValueCollection(locs->size());
90 for(index = 0; index < locs->size(); index++) {
91 StackValue* value = locs->at(index);
92 switch(value->type()) {
93 case T_OBJECT:
94 assert(!value->obj_is_scalar_replaced(), "object should be reallocated already");
95 // preserve object type
96 _locals->add( new StackValue((intptr_t) (value->get_obj()()), T_OBJECT ));
97 break;
98 case T_CONFLICT:
99 // A dead local. Will be initialized to null/zero.
100 _locals->add( new StackValue());
101 break;
102 case T_INT:
103 _locals->add( new StackValue(value->get_int()));
104 break;
105 default:
106 ShouldNotReachHere();
107 }
108 }
110 // Now the expressions off-stack
111 // Same silliness as above
113 StackValueCollection *exprs = vf->expressions();
114 _expressions = new StackValueCollection(exprs->size());
115 for(index = 0; index < exprs->size(); index++) {
116 StackValue* value = exprs->at(index);
117 switch(value->type()) {
118 case T_OBJECT:
119 assert(!value->obj_is_scalar_replaced(), "object should be reallocated already");
120 // preserve object type
121 _expressions->add( new StackValue((intptr_t) (value->get_obj()()), T_OBJECT ));
122 break;
123 case T_CONFLICT:
124 // A dead stack element. Will be initialized to null/zero.
125 // This can occur when the compiler emits a state in which stack
126 // elements are known to be dead (because of an imminent exception).
127 _expressions->add( new StackValue());
128 break;
129 case T_INT:
130 _expressions->add( new StackValue(value->get_int()));
131 break;
132 default:
133 ShouldNotReachHere();
134 }
135 }
136 }
138 int unpack_counter = 0;
140 void vframeArrayElement::unpack_on_stack(int callee_parameters,
141 int callee_locals,
142 frame* caller,
143 bool is_top_frame,
144 int exec_mode) {
145 JavaThread* thread = (JavaThread*) Thread::current();
147 // Look at bci and decide on bcp and continuation pc
148 address bcp;
149 // C++ interpreter doesn't need a pc since it will figure out what to do when it
150 // begins execution
151 address pc;
152 bool use_next_mdp = false; // true if we should use the mdp associated with the next bci
153 // rather than the one associated with bcp
154 if (raw_bci() == SynchronizationEntryBCI) {
155 // We are deoptimizing while hanging in prologue code for synchronized method
156 bcp = method()->bcp_from(0); // first byte code
157 pc = Interpreter::deopt_entry(vtos, 0); // step = 0 since we don't skip current bytecode
158 } else if (should_reexecute()) { //reexecute this bytecode
159 assert(is_top_frame, "reexecute allowed only for the top frame");
160 bcp = method()->bcp_from(bci());
161 pc = Interpreter::deopt_reexecute_entry(method(), bcp);
162 } else {
163 bcp = method()->bcp_from(bci());
164 pc = Interpreter::deopt_continue_after_entry(method(), bcp, callee_parameters, is_top_frame);
165 use_next_mdp = true;
166 }
167 assert(Bytecodes::is_defined(*bcp), "must be a valid bytecode");
169 // Monitorenter and pending exceptions:
170 //
171 // For Compiler2, there should be no pending exception when deoptimizing at monitorenter
172 // because there is no safepoint at the null pointer check (it is either handled explicitly
173 // or prior to the monitorenter) and asynchronous exceptions are not made "pending" by the
174 // runtime interface for the slow case (see JRT_ENTRY_FOR_MONITORENTER). If an asynchronous
175 // exception was processed, the bytecode pointer would have to be extended one bytecode beyond
176 // the monitorenter to place it in the proper exception range.
177 //
178 // For Compiler1, deoptimization can occur while throwing a NullPointerException at monitorenter,
179 // in which case bcp should point to the monitorenter since it is within the exception's range.
181 assert(*bcp != Bytecodes::_monitorenter || is_top_frame, "a _monitorenter must be a top frame");
182 // TIERED Must know the compiler of the deoptee QQQ
183 COMPILER2_PRESENT(guarantee(*bcp != Bytecodes::_monitorenter || exec_mode != Deoptimization::Unpack_exception,
184 "shouldn't get exception during monitorenter");)
186 int popframe_preserved_args_size_in_bytes = 0;
187 int popframe_preserved_args_size_in_words = 0;
188 if (is_top_frame) {
189 JvmtiThreadState *state = thread->jvmti_thread_state();
190 if (JvmtiExport::can_pop_frame() &&
191 (thread->has_pending_popframe() || thread->popframe_forcing_deopt_reexecution())) {
192 if (thread->has_pending_popframe()) {
193 // Pop top frame after deoptimization
194 #ifndef CC_INTERP
195 pc = Interpreter::remove_activation_preserving_args_entry();
196 #else
197 // Do an uncommon trap type entry. c++ interpreter will know
198 // to pop frame and preserve the args
199 pc = Interpreter::deopt_entry(vtos, 0);
200 use_next_mdp = false;
201 #endif
202 } else {
203 // Reexecute invoke in top frame
204 pc = Interpreter::deopt_entry(vtos, 0);
205 use_next_mdp = false;
206 popframe_preserved_args_size_in_bytes = in_bytes(thread->popframe_preserved_args_size());
207 // Note: the PopFrame-related extension of the expression stack size is done in
208 // Deoptimization::fetch_unroll_info_helper
209 popframe_preserved_args_size_in_words = in_words(thread->popframe_preserved_args_size_in_words());
210 }
211 } else if (JvmtiExport::can_force_early_return() && state != NULL && state->is_earlyret_pending()) {
212 // Force early return from top frame after deoptimization
213 #ifndef CC_INTERP
214 pc = Interpreter::remove_activation_early_entry(state->earlyret_tos());
215 #else
216 // TBD: Need to implement ForceEarlyReturn for CC_INTERP (ia64)
217 #endif
218 } else {
219 // Possibly override the previous pc computation of the top (youngest) frame
220 switch (exec_mode) {
221 case Deoptimization::Unpack_deopt:
222 // use what we've got
223 break;
224 case Deoptimization::Unpack_exception:
225 // exception is pending
226 pc = SharedRuntime::raw_exception_handler_for_return_address(pc);
227 // [phh] We're going to end up in some handler or other, so it doesn't
228 // matter what mdp we point to. See exception_handler_for_exception()
229 // in interpreterRuntime.cpp.
230 break;
231 case Deoptimization::Unpack_uncommon_trap:
232 case Deoptimization::Unpack_reexecute:
233 // redo last byte code
234 pc = Interpreter::deopt_entry(vtos, 0);
235 use_next_mdp = false;
236 break;
237 default:
238 ShouldNotReachHere();
239 }
240 }
241 }
243 // Setup the interpreter frame
245 assert(method() != NULL, "method must exist");
246 int temps = expressions()->size();
248 int locks = monitors() == NULL ? 0 : monitors()->number_of_monitors();
250 Interpreter::layout_activation(method(),
251 temps + callee_parameters,
252 popframe_preserved_args_size_in_words,
253 locks,
254 callee_parameters,
255 callee_locals,
256 caller,
257 iframe(),
258 is_top_frame);
260 // Update the pc in the frame object and overwrite the temporary pc
261 // we placed in the skeletal frame now that we finally know the
262 // exact interpreter address we should use.
264 _frame.patch_pc(thread, pc);
266 assert (!method()->is_synchronized() || locks > 0, "synchronized methods must have monitors");
268 BasicObjectLock* top = iframe()->interpreter_frame_monitor_begin();
269 for (int index = 0; index < locks; index++) {
270 top = iframe()->previous_monitor_in_interpreter_frame(top);
271 BasicObjectLock* src = _monitors->at(index);
272 top->set_obj(src->obj());
273 src->lock()->move_to(src->obj(), top->lock());
274 }
275 if (ProfileInterpreter) {
276 iframe()->interpreter_frame_set_mdx(0); // clear out the mdp.
277 }
278 iframe()->interpreter_frame_set_bcx((intptr_t)bcp); // cannot use bcp because frame is not initialized yet
279 if (ProfileInterpreter) {
280 methodDataOop mdo = method()->method_data();
281 if (mdo != NULL) {
282 int bci = iframe()->interpreter_frame_bci();
283 if (use_next_mdp) ++bci;
284 address mdp = mdo->bci_to_dp(bci);
285 iframe()->interpreter_frame_set_mdp(mdp);
286 }
287 }
289 // Unpack expression stack
290 // If this is an intermediate frame (i.e. not top frame) then this
291 // only unpacks the part of the expression stack not used by callee
292 // as parameters. The callee parameters are unpacked as part of the
293 // callee locals.
294 int i;
295 for(i = 0; i < expressions()->size(); i++) {
296 StackValue *value = expressions()->at(i);
297 intptr_t* addr = iframe()->interpreter_frame_expression_stack_at(i);
298 switch(value->type()) {
299 case T_INT:
300 *addr = value->get_int();
301 break;
302 case T_OBJECT:
303 *addr = value->get_int(T_OBJECT);
304 break;
305 case T_CONFLICT:
306 // A dead stack slot. Initialize to null in case it is an oop.
307 *addr = NULL_WORD;
308 break;
309 default:
310 ShouldNotReachHere();
311 }
312 if (TaggedStackInterpreter) {
313 // Write tag to the stack
314 iframe()->interpreter_frame_set_expression_stack_tag(i,
315 frame::tag_for_basic_type(value->type()));
316 }
317 }
320 // Unpack the locals
321 for(i = 0; i < locals()->size(); i++) {
322 StackValue *value = locals()->at(i);
323 intptr_t* addr = iframe()->interpreter_frame_local_at(i);
324 switch(value->type()) {
325 case T_INT:
326 *addr = value->get_int();
327 break;
328 case T_OBJECT:
329 *addr = value->get_int(T_OBJECT);
330 break;
331 case T_CONFLICT:
332 // A dead location. If it is an oop then we need a NULL to prevent GC from following it
333 *addr = NULL_WORD;
334 break;
335 default:
336 ShouldNotReachHere();
337 }
338 if (TaggedStackInterpreter) {
339 // Write tag to stack
340 iframe()->interpreter_frame_set_local_tag(i,
341 frame::tag_for_basic_type(value->type()));
342 }
343 }
345 if (is_top_frame && JvmtiExport::can_pop_frame() && thread->popframe_forcing_deopt_reexecution()) {
346 // An interpreted frame was popped but it returns to a deoptimized
347 // frame. The incoming arguments to the interpreted activation
348 // were preserved in thread-local storage by the
349 // remove_activation_preserving_args_entry in the interpreter; now
350 // we put them back into the just-unpacked interpreter frame.
351 // Note that this assumes that the locals arena grows toward lower
352 // addresses.
353 if (popframe_preserved_args_size_in_words != 0) {
354 void* saved_args = thread->popframe_preserved_args();
355 assert(saved_args != NULL, "must have been saved by interpreter");
356 #ifdef ASSERT
357 int stack_words = Interpreter::stackElementWords();
358 assert(popframe_preserved_args_size_in_words <=
359 iframe()->interpreter_frame_expression_stack_size()*stack_words,
360 "expression stack size should have been extended");
361 #endif // ASSERT
362 int top_element = iframe()->interpreter_frame_expression_stack_size()-1;
363 intptr_t* base;
364 if (frame::interpreter_frame_expression_stack_direction() < 0) {
365 base = iframe()->interpreter_frame_expression_stack_at(top_element);
366 } else {
367 base = iframe()->interpreter_frame_expression_stack();
368 }
369 Copy::conjoint_bytes(saved_args,
370 base,
371 popframe_preserved_args_size_in_bytes);
372 thread->popframe_free_preserved_args();
373 }
374 }
376 #ifndef PRODUCT
377 if (TraceDeoptimization && Verbose) {
378 ttyLocker ttyl;
379 tty->print_cr("[%d Interpreted Frame]", ++unpack_counter);
380 iframe()->print_on(tty);
381 RegisterMap map(thread);
382 vframe* f = vframe::new_vframe(iframe(), &map, thread);
383 f->print();
384 iframe()->interpreter_frame_print_on(tty);
386 tty->print_cr("locals size %d", locals()->size());
387 tty->print_cr("expression size %d", expressions()->size());
389 method()->print_value();
390 tty->cr();
391 // method()->print_codes();
392 } else if (TraceDeoptimization) {
393 tty->print(" ");
394 method()->print_value();
395 Bytecodes::Code code = Bytecodes::java_code_at(bcp);
396 int bci = method()->bci_from(bcp);
397 tty->print(" - %s", Bytecodes::name(code));
398 tty->print(" @ bci %d ", bci);
399 tty->print_cr("sp = " PTR_FORMAT, iframe()->sp());
400 }
401 #endif // PRODUCT
403 // The expression stack and locals are in the resource area don't leave
404 // a dangling pointer in the vframeArray we leave around for debug
405 // purposes
407 _locals = _expressions = NULL;
409 }
411 int vframeArrayElement::on_stack_size(int callee_parameters,
412 int callee_locals,
413 bool is_top_frame,
414 int popframe_extra_stack_expression_els) const {
415 assert(method()->max_locals() == locals()->size(), "just checking");
416 int locks = monitors() == NULL ? 0 : monitors()->number_of_monitors();
417 int temps = expressions()->size();
418 return Interpreter::size_activation(method(),
419 temps + callee_parameters,
420 popframe_extra_stack_expression_els,
421 locks,
422 callee_parameters,
423 callee_locals,
424 is_top_frame);
425 }
429 vframeArray* vframeArray::allocate(JavaThread* thread, int frame_size, GrowableArray<compiledVFrame*>* chunk,
430 RegisterMap *reg_map, frame sender, frame caller, frame self) {
432 // Allocate the vframeArray
433 vframeArray * result = (vframeArray*) AllocateHeap(sizeof(vframeArray) + // fixed part
434 sizeof(vframeArrayElement) * (chunk->length() - 1), // variable part
435 "vframeArray::allocate");
436 result->_frames = chunk->length();
437 result->_owner_thread = thread;
438 result->_sender = sender;
439 result->_caller = caller;
440 result->_original = self;
441 result->set_unroll_block(NULL); // initialize it
442 result->fill_in(thread, frame_size, chunk, reg_map);
443 return result;
444 }
446 void vframeArray::fill_in(JavaThread* thread,
447 int frame_size,
448 GrowableArray<compiledVFrame*>* chunk,
449 const RegisterMap *reg_map) {
450 // Set owner first, it is used when adding monitor chunks
452 _frame_size = frame_size;
453 for(int i = 0; i < chunk->length(); i++) {
454 element(i)->fill_in(chunk->at(i));
455 }
457 // Copy registers for callee-saved registers
458 if (reg_map != NULL) {
459 for(int i = 0; i < RegisterMap::reg_count; i++) {
460 #ifdef AMD64
461 // The register map has one entry for every int (32-bit value), so
462 // 64-bit physical registers have two entries in the map, one for
463 // each half. Ignore the high halves of 64-bit registers, just like
464 // frame::oopmapreg_to_location does.
465 //
466 // [phh] FIXME: this is a temporary hack! This code *should* work
467 // correctly w/o this hack, possibly by changing RegisterMap::pd_location
468 // in frame_amd64.cpp and the values of the phantom high half registers
469 // in amd64.ad.
470 // if (VMReg::Name(i) < SharedInfo::stack0 && is_even(i)) {
471 intptr_t* src = (intptr_t*) reg_map->location(VMRegImpl::as_VMReg(i));
472 _callee_registers[i] = src != NULL ? *src : NULL_WORD;
473 // } else {
474 // jint* src = (jint*) reg_map->location(VMReg::Name(i));
475 // _callee_registers[i] = src != NULL ? *src : NULL_WORD;
476 // }
477 #else
478 jint* src = (jint*) reg_map->location(VMRegImpl::as_VMReg(i));
479 _callee_registers[i] = src != NULL ? *src : NULL_WORD;
480 #endif
481 if (src == NULL) {
482 set_location_valid(i, false);
483 } else {
484 set_location_valid(i, true);
485 jint* dst = (jint*) register_location(i);
486 *dst = *src;
487 }
488 }
489 }
490 }
492 void vframeArray::unpack_to_stack(frame &unpack_frame, int exec_mode) {
493 // stack picture
494 // unpack_frame
495 // [new interpreter frames ] (frames are skeletal but walkable)
496 // caller_frame
497 //
498 // This routine fills in the missing data for the skeletal interpreter frames
499 // in the above picture.
501 // Find the skeletal interpreter frames to unpack into
502 RegisterMap map(JavaThread::current(), false);
503 // Get the youngest frame we will unpack (last to be unpacked)
504 frame me = unpack_frame.sender(&map);
505 int index;
506 for (index = 0; index < frames(); index++ ) {
507 *element(index)->iframe() = me;
508 // Get the caller frame (possibly skeletal)
509 me = me.sender(&map);
510 }
512 frame caller_frame = me;
514 // Do the unpacking of interpreter frames; the frame at index 0 represents the top activation, so it has no callee
516 // Unpack the frames from the oldest (frames() -1) to the youngest (0)
518 for (index = frames() - 1; index >= 0 ; index--) {
519 int callee_parameters = index == 0 ? 0 : element(index-1)->method()->size_of_parameters();
520 int callee_locals = index == 0 ? 0 : element(index-1)->method()->max_locals();
521 element(index)->unpack_on_stack(callee_parameters,
522 callee_locals,
523 &caller_frame,
524 index == 0,
525 exec_mode);
526 if (index == frames() - 1) {
527 Deoptimization::unwind_callee_save_values(element(index)->iframe(), this);
528 }
529 caller_frame = *element(index)->iframe();
530 }
533 deallocate_monitor_chunks();
534 }
536 void vframeArray::deallocate_monitor_chunks() {
537 JavaThread* jt = JavaThread::current();
538 for (int index = 0; index < frames(); index++ ) {
539 element(index)->free_monitors(jt);
540 }
541 }
543 #ifndef PRODUCT
545 bool vframeArray::structural_compare(JavaThread* thread, GrowableArray<compiledVFrame*>* chunk) {
546 if (owner_thread() != thread) return false;
547 int index = 0;
548 #if 0 // FIXME can't do this comparison
550 // Compare only within vframe array.
551 for (deoptimizedVFrame* vf = deoptimizedVFrame::cast(vframe_at(first_index())); vf; vf = vf->deoptimized_sender_or_null()) {
552 if (index >= chunk->length() || !vf->structural_compare(chunk->at(index))) return false;
553 index++;
554 }
555 if (index != chunk->length()) return false;
556 #endif
558 return true;
559 }
561 #endif
563 address vframeArray::register_location(int i) const {
564 assert(0 <= i && i < RegisterMap::reg_count, "index out of bounds");
565 return (address) & _callee_registers[i];
566 }
569 #ifndef PRODUCT
571 // Printing
573 // Note: we cannot have print_on as const, as we allocate inside the method
574 void vframeArray::print_on_2(outputStream* st) {
575 st->print_cr(" - sp: " INTPTR_FORMAT, sp());
576 st->print(" - thread: ");
577 Thread::current()->print();
578 st->print_cr(" - frame size: %d", frame_size());
579 for (int index = 0; index < frames() ; index++ ) {
580 element(index)->print(st);
581 }
582 }
584 void vframeArrayElement::print(outputStream* st) {
585 st->print_cr(" - interpreter_frame -> sp: ", INTPTR_FORMAT, iframe()->sp());
586 }
588 void vframeArray::print_value_on(outputStream* st) const {
589 st->print_cr("vframeArray [%d] ", frames());
590 }
593 #endif