Thu, 07 Apr 2011 09:53:20 -0700
7009266: G1: assert(obj->is_oop_or_null(true )) failed: Error
Summary: A referent object that is only weakly reachable at the start of concurrent marking but is re-attached to the strongly reachable object graph during marking may not be marked as live. This can cause the reference object to be processed prematurely and leave dangling pointers to the referent object. Implement a read barrier for the java.lang.ref.Reference::referent field by intrinsifying the Reference.get() method, and intercepting accesses though JNI, reflection, and Unsafe, so that when a non-null referent object is read it is also logged in an SATB buffer.
Reviewed-by: kvn, iveresov, never, tonyp, dholmes
1 /*
2 * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #include "precompiled.hpp"
26 #include "asm/assembler.hpp"
27 #include "interpreter/bytecodeHistogram.hpp"
28 #include "interpreter/interpreter.hpp"
29 #include "interpreter/interpreterGenerator.hpp"
30 #include "interpreter/interpreterRuntime.hpp"
31 #include "interpreter/templateTable.hpp"
32 #include "oops/arrayOop.hpp"
33 #include "oops/methodDataOop.hpp"
34 #include "oops/methodOop.hpp"
35 #include "oops/oop.inline.hpp"
36 #include "prims/jvmtiExport.hpp"
37 #include "prims/jvmtiThreadState.hpp"
38 #include "prims/methodHandles.hpp"
39 #include "runtime/arguments.hpp"
40 #include "runtime/deoptimization.hpp"
41 #include "runtime/frame.inline.hpp"
42 #include "runtime/sharedRuntime.hpp"
43 #include "runtime/stubRoutines.hpp"
44 #include "runtime/synchronizer.hpp"
45 #include "runtime/timer.hpp"
46 #include "runtime/vframeArray.hpp"
47 #include "utilities/debug.hpp"
48 #ifdef COMPILER1
49 #include "c1/c1_Runtime1.hpp"
50 #endif
54 // Generation of Interpreter
55 //
56 // The InterpreterGenerator generates the interpreter into Interpreter::_code.
59 #define __ _masm->
62 //----------------------------------------------------------------------------------------------------
67 int AbstractInterpreter::BasicType_as_index(BasicType type) {
68 int i = 0;
69 switch (type) {
70 case T_BOOLEAN: i = 0; break;
71 case T_CHAR : i = 1; break;
72 case T_BYTE : i = 2; break;
73 case T_SHORT : i = 3; break;
74 case T_INT : i = 4; break;
75 case T_LONG : i = 5; break;
76 case T_VOID : i = 6; break;
77 case T_FLOAT : i = 7; break;
78 case T_DOUBLE : i = 8; break;
79 case T_OBJECT : i = 9; break;
80 case T_ARRAY : i = 9; break;
81 default : ShouldNotReachHere();
82 }
83 assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers, "index out of bounds");
84 return i;
85 }
88 #ifndef _LP64
89 address AbstractInterpreterGenerator::generate_slow_signature_handler() {
90 address entry = __ pc();
91 Argument argv(0, true);
93 // We are in the jni transition frame. Save the last_java_frame corresponding to the
94 // outer interpreter frame
95 //
96 __ set_last_Java_frame(FP, noreg);
97 // make sure the interpreter frame we've pushed has a valid return pc
98 __ mov(O7, I7);
99 __ mov(Lmethod, G3_scratch);
100 __ mov(Llocals, G4_scratch);
101 __ save_frame(0);
102 __ mov(G2_thread, L7_thread_cache);
103 __ add(argv.address_in_frame(), O3);
104 __ mov(G2_thread, O0);
105 __ mov(G3_scratch, O1);
106 __ call(CAST_FROM_FN_PTR(address, InterpreterRuntime::slow_signature_handler), relocInfo::runtime_call_type);
107 __ delayed()->mov(G4_scratch, O2);
108 __ mov(L7_thread_cache, G2_thread);
109 __ reset_last_Java_frame();
111 // load the register arguments (the C code packed them as varargs)
112 for (Argument ldarg = argv.successor(); ldarg.is_register(); ldarg = ldarg.successor()) {
113 __ ld_ptr(ldarg.address_in_frame(), ldarg.as_register());
114 }
115 __ ret();
116 __ delayed()->
117 restore(O0, 0, Lscratch); // caller's Lscratch gets the result handler
118 return entry;
119 }
122 #else
123 // LP64 passes floating point arguments in F1, F3, F5, etc. instead of
124 // O0, O1, O2 etc..
125 // Doubles are passed in D0, D2, D4
126 // We store the signature of the first 16 arguments in the first argument
127 // slot because it will be overwritten prior to calling the native
128 // function, with the pointer to the JNIEnv.
129 // If LP64 there can be up to 16 floating point arguments in registers
130 // or 6 integer registers.
131 address AbstractInterpreterGenerator::generate_slow_signature_handler() {
133 enum {
134 non_float = 0,
135 float_sig = 1,
136 double_sig = 2,
137 sig_mask = 3
138 };
140 address entry = __ pc();
141 Argument argv(0, true);
143 // We are in the jni transition frame. Save the last_java_frame corresponding to the
144 // outer interpreter frame
145 //
146 __ set_last_Java_frame(FP, noreg);
147 // make sure the interpreter frame we've pushed has a valid return pc
148 __ mov(O7, I7);
149 __ mov(Lmethod, G3_scratch);
150 __ mov(Llocals, G4_scratch);
151 __ save_frame(0);
152 __ mov(G2_thread, L7_thread_cache);
153 __ add(argv.address_in_frame(), O3);
154 __ mov(G2_thread, O0);
155 __ mov(G3_scratch, O1);
156 __ call(CAST_FROM_FN_PTR(address, InterpreterRuntime::slow_signature_handler), relocInfo::runtime_call_type);
157 __ delayed()->mov(G4_scratch, O2);
158 __ mov(L7_thread_cache, G2_thread);
159 __ reset_last_Java_frame();
162 // load the register arguments (the C code packed them as varargs)
163 Address Sig = argv.address_in_frame(); // Argument 0 holds the signature
164 __ ld_ptr( Sig, G3_scratch ); // Get register argument signature word into G3_scratch
165 __ mov( G3_scratch, G4_scratch);
166 __ srl( G4_scratch, 2, G4_scratch); // Skip Arg 0
167 Label done;
168 for (Argument ldarg = argv.successor(); ldarg.is_float_register(); ldarg = ldarg.successor()) {
169 Label NonFloatArg;
170 Label LoadFloatArg;
171 Label LoadDoubleArg;
172 Label NextArg;
173 Address a = ldarg.address_in_frame();
174 __ andcc(G4_scratch, sig_mask, G3_scratch);
175 __ br(Assembler::zero, false, Assembler::pt, NonFloatArg);
176 __ delayed()->nop();
178 __ cmp(G3_scratch, float_sig );
179 __ br(Assembler::equal, false, Assembler::pt, LoadFloatArg);
180 __ delayed()->nop();
182 __ cmp(G3_scratch, double_sig );
183 __ br(Assembler::equal, false, Assembler::pt, LoadDoubleArg);
184 __ delayed()->nop();
186 __ bind(NonFloatArg);
187 // There are only 6 integer register arguments!
188 if ( ldarg.is_register() )
189 __ ld_ptr(ldarg.address_in_frame(), ldarg.as_register());
190 else {
191 // Optimization, see if there are any more args and get out prior to checking
192 // all 16 float registers. My guess is that this is rare.
193 // If is_register is false, then we are done the first six integer args.
194 __ tst(G4_scratch);
195 __ brx(Assembler::zero, false, Assembler::pt, done);
196 __ delayed()->nop();
198 }
199 __ ba(false, NextArg);
200 __ delayed()->srl( G4_scratch, 2, G4_scratch );
202 __ bind(LoadFloatArg);
203 __ ldf( FloatRegisterImpl::S, a, ldarg.as_float_register(), 4);
204 __ ba(false, NextArg);
205 __ delayed()->srl( G4_scratch, 2, G4_scratch );
207 __ bind(LoadDoubleArg);
208 __ ldf( FloatRegisterImpl::D, a, ldarg.as_double_register() );
209 __ ba(false, NextArg);
210 __ delayed()->srl( G4_scratch, 2, G4_scratch );
212 __ bind(NextArg);
214 }
216 __ bind(done);
217 __ ret();
218 __ delayed()->
219 restore(O0, 0, Lscratch); // caller's Lscratch gets the result handler
220 return entry;
221 }
222 #endif
224 void InterpreterGenerator::generate_counter_overflow(Label& Lcontinue) {
226 // Generate code to initiate compilation on the counter overflow.
228 // InterpreterRuntime::frequency_counter_overflow takes two arguments,
229 // the first indicates if the counter overflow occurs at a backwards branch (NULL bcp)
230 // and the second is only used when the first is true. We pass zero for both.
231 // The call returns the address of the verified entry point for the method or NULL
232 // if the compilation did not complete (either went background or bailed out).
233 __ set((int)false, O2);
234 __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), O2, O2, true);
235 // returns verified_entry_point or NULL
236 // we ignore it in any case
237 __ ba(false, Lcontinue);
238 __ delayed()->nop();
240 }
243 // End of helpers
245 // Various method entries
247 // Abstract method entry
248 // Attempt to execute abstract method. Throw exception
249 //
250 address InterpreterGenerator::generate_abstract_entry(void) {
251 address entry = __ pc();
252 // abstract method entry
253 // throw exception
254 __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError));
255 // the call_VM checks for exception, so we should never return here.
256 __ should_not_reach_here();
257 return entry;
259 }
262 // Method handle invoker
263 // Dispatch a method of the form java.lang.invoke.MethodHandles::invoke(...)
264 address InterpreterGenerator::generate_method_handle_entry(void) {
265 if (!EnableMethodHandles) {
266 return generate_abstract_entry();
267 }
269 return MethodHandles::generate_method_handle_interpreter_entry(_masm);
270 }
273 //----------------------------------------------------------------------------------------------------
274 // Entry points & stack frame layout
275 //
276 // Here we generate the various kind of entries into the interpreter.
277 // The two main entry type are generic bytecode methods and native call method.
278 // These both come in synchronized and non-synchronized versions but the
279 // frame layout they create is very similar. The other method entry
280 // types are really just special purpose entries that are really entry
281 // and interpretation all in one. These are for trivial methods like
282 // accessor, empty, or special math methods.
283 //
284 // When control flow reaches any of the entry types for the interpreter
285 // the following holds ->
286 //
287 // C2 Calling Conventions:
288 //
289 // The entry code below assumes that the following registers are set
290 // when coming in:
291 // G5_method: holds the methodOop of the method to call
292 // Lesp: points to the TOS of the callers expression stack
293 // after having pushed all the parameters
294 //
295 // The entry code does the following to setup an interpreter frame
296 // pop parameters from the callers stack by adjusting Lesp
297 // set O0 to Lesp
298 // compute X = (max_locals - num_parameters)
299 // bump SP up by X to accomadate the extra locals
300 // compute X = max_expression_stack
301 // + vm_local_words
302 // + 16 words of register save area
303 // save frame doing a save sp, -X, sp growing towards lower addresses
304 // set Lbcp, Lmethod, LcpoolCache
305 // set Llocals to i0
306 // set Lmonitors to FP - rounded_vm_local_words
307 // set Lesp to Lmonitors - 4
308 //
309 // The frame has now been setup to do the rest of the entry code
311 // Try this optimization: Most method entries could live in a
312 // "one size fits all" stack frame without all the dynamic size
313 // calculations. It might be profitable to do all this calculation
314 // statically and approximately for "small enough" methods.
316 //-----------------------------------------------------------------------------------------------
318 // C1 Calling conventions
319 //
320 // Upon method entry, the following registers are setup:
321 //
322 // g2 G2_thread: current thread
323 // g5 G5_method: method to activate
324 // g4 Gargs : pointer to last argument
325 //
326 //
327 // Stack:
328 //
329 // +---------------+ <--- sp
330 // | |
331 // : reg save area :
332 // | |
333 // +---------------+ <--- sp + 0x40
334 // | |
335 // : extra 7 slots : note: these slots are not really needed for the interpreter (fix later)
336 // | |
337 // +---------------+ <--- sp + 0x5c
338 // | |
339 // : free :
340 // | |
341 // +---------------+ <--- Gargs
342 // | |
343 // : arguments :
344 // | |
345 // +---------------+
346 // | |
347 //
348 //
349 //
350 // AFTER FRAME HAS BEEN SETUP for method interpretation the stack looks like:
351 //
352 // +---------------+ <--- sp
353 // | |
354 // : reg save area :
355 // | |
356 // +---------------+ <--- sp + 0x40
357 // | |
358 // : extra 7 slots : note: these slots are not really needed for the interpreter (fix later)
359 // | |
360 // +---------------+ <--- sp + 0x5c
361 // | |
362 // : :
363 // | | <--- Lesp
364 // +---------------+ <--- Lmonitors (fp - 0x18)
365 // | VM locals |
366 // +---------------+ <--- fp
367 // | |
368 // : reg save area :
369 // | |
370 // +---------------+ <--- fp + 0x40
371 // | |
372 // : extra 7 slots : note: these slots are not really needed for the interpreter (fix later)
373 // | |
374 // +---------------+ <--- fp + 0x5c
375 // | |
376 // : free :
377 // | |
378 // +---------------+
379 // | |
380 // : nonarg locals :
381 // | |
382 // +---------------+
383 // | |
384 // : arguments :
385 // | | <--- Llocals
386 // +---------------+ <--- Gargs
387 // | |
389 address AbstractInterpreterGenerator::generate_method_entry(AbstractInterpreter::MethodKind kind) {
390 // determine code generation flags
391 bool synchronized = false;
392 address entry_point = NULL;
394 switch (kind) {
395 case Interpreter::zerolocals : break;
396 case Interpreter::zerolocals_synchronized: synchronized = true; break;
397 case Interpreter::native : entry_point = ((InterpreterGenerator*)this)->generate_native_entry(false); break;
398 case Interpreter::native_synchronized : entry_point = ((InterpreterGenerator*)this)->generate_native_entry(true); break;
399 case Interpreter::empty : entry_point = ((InterpreterGenerator*)this)->generate_empty_entry(); break;
400 case Interpreter::accessor : entry_point = ((InterpreterGenerator*)this)->generate_accessor_entry(); break;
401 case Interpreter::abstract : entry_point = ((InterpreterGenerator*)this)->generate_abstract_entry(); break;
402 case Interpreter::method_handle : entry_point = ((InterpreterGenerator*)this)->generate_method_handle_entry(); break;
403 case Interpreter::java_lang_math_sin : break;
404 case Interpreter::java_lang_math_cos : break;
405 case Interpreter::java_lang_math_tan : break;
406 case Interpreter::java_lang_math_sqrt : break;
407 case Interpreter::java_lang_math_abs : break;
408 case Interpreter::java_lang_math_log : break;
409 case Interpreter::java_lang_math_log10 : break;
410 case Interpreter::java_lang_ref_reference_get
411 : entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry(); break;
412 default : ShouldNotReachHere(); break;
413 }
415 if (entry_point) return entry_point;
417 return ((InterpreterGenerator*)this)->generate_normal_entry(synchronized);
418 }
421 bool AbstractInterpreter::can_be_compiled(methodHandle m) {
422 // No special entry points that preclude compilation
423 return true;
424 }
426 // This method tells the deoptimizer how big an interpreted frame must be:
427 int AbstractInterpreter::size_activation(methodOop method,
428 int tempcount,
429 int popframe_extra_args,
430 int moncount,
431 int callee_param_count,
432 int callee_locals,
433 bool is_top_frame) {
434 return layout_activation(method,
435 tempcount,
436 popframe_extra_args,
437 moncount,
438 callee_param_count,
439 callee_locals,
440 (frame*)NULL,
441 (frame*)NULL,
442 is_top_frame);
443 }
445 void Deoptimization::unwind_callee_save_values(frame* f, vframeArray* vframe_array) {
447 // This code is sort of the equivalent of C2IAdapter::setup_stack_frame back in
448 // the days we had adapter frames. When we deoptimize a situation where a
449 // compiled caller calls a compiled caller will have registers it expects
450 // to survive the call to the callee. If we deoptimize the callee the only
451 // way we can restore these registers is to have the oldest interpreter
452 // frame that we create restore these values. That is what this routine
453 // will accomplish.
455 // At the moment we have modified c2 to not have any callee save registers
456 // so this problem does not exist and this routine is just a place holder.
458 assert(f->is_interpreted_frame(), "must be interpreted");
459 }
462 //----------------------------------------------------------------------------------------------------
463 // Exceptions