Thu, 07 Apr 2011 09:53:20 -0700
7009266: G1: assert(obj->is_oop_or_null(true )) failed: Error
Summary: A referent object that is only weakly reachable at the start of concurrent marking but is re-attached to the strongly reachable object graph during marking may not be marked as live. This can cause the reference object to be processed prematurely and leave dangling pointers to the referent object. Implement a read barrier for the java.lang.ref.Reference::referent field by intrinsifying the Reference.get() method, and intercepting accesses though JNI, reflection, and Unsafe, so that when a non-null referent object is read it is also logged in an SATB buffer.
Reviewed-by: kvn, iveresov, never, tonyp, dholmes
1 /*
2 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #include "precompiled.hpp"
26 #include "asm/assembler.hpp"
27 #include "assembler_sparc.inline.hpp"
28 #include "code/debugInfoRec.hpp"
29 #include "code/icBuffer.hpp"
30 #include "code/vtableStubs.hpp"
31 #include "interpreter/interpreter.hpp"
32 #include "oops/compiledICHolderOop.hpp"
33 #include "prims/jvmtiRedefineClassesTrace.hpp"
34 #include "runtime/sharedRuntime.hpp"
35 #include "runtime/vframeArray.hpp"
36 #include "vmreg_sparc.inline.hpp"
37 #ifdef COMPILER1
38 #include "c1/c1_Runtime1.hpp"
39 #endif
40 #ifdef COMPILER2
41 #include "opto/runtime.hpp"
42 #endif
43 #ifdef SHARK
44 #include "compiler/compileBroker.hpp"
45 #include "shark/sharkCompiler.hpp"
46 #endif
48 #define __ masm->
50 #ifdef COMPILER2
51 UncommonTrapBlob* SharedRuntime::_uncommon_trap_blob;
52 #endif // COMPILER2
54 DeoptimizationBlob* SharedRuntime::_deopt_blob;
55 SafepointBlob* SharedRuntime::_polling_page_safepoint_handler_blob;
56 SafepointBlob* SharedRuntime::_polling_page_return_handler_blob;
57 RuntimeStub* SharedRuntime::_wrong_method_blob;
58 RuntimeStub* SharedRuntime::_ic_miss_blob;
59 RuntimeStub* SharedRuntime::_resolve_opt_virtual_call_blob;
60 RuntimeStub* SharedRuntime::_resolve_virtual_call_blob;
61 RuntimeStub* SharedRuntime::_resolve_static_call_blob;
63 class RegisterSaver {
65 // Used for saving volatile registers. This is Gregs, Fregs, I/L/O.
66 // The Oregs are problematic. In the 32bit build the compiler can
67 // have O registers live with 64 bit quantities. A window save will
68 // cut the heads off of the registers. We have to do a very extensive
69 // stack dance to save and restore these properly.
71 // Note that the Oregs problem only exists if we block at either a polling
72 // page exception a compiled code safepoint that was not originally a call
73 // or deoptimize following one of these kinds of safepoints.
75 // Lots of registers to save. For all builds, a window save will preserve
76 // the %i and %l registers. For the 32-bit longs-in-two entries and 64-bit
77 // builds a window-save will preserve the %o registers. In the LION build
78 // we need to save the 64-bit %o registers which requires we save them
79 // before the window-save (as then they become %i registers and get their
80 // heads chopped off on interrupt). We have to save some %g registers here
81 // as well.
82 enum {
83 // This frame's save area. Includes extra space for the native call:
84 // vararg's layout space and the like. Briefly holds the caller's
85 // register save area.
86 call_args_area = frame::register_save_words_sp_offset +
87 frame::memory_parameter_word_sp_offset*wordSize,
88 // Make sure save locations are always 8 byte aligned.
89 // can't use round_to because it doesn't produce compile time constant
90 start_of_extra_save_area = ((call_args_area + 7) & ~7),
91 g1_offset = start_of_extra_save_area, // g-regs needing saving
92 g3_offset = g1_offset+8,
93 g4_offset = g3_offset+8,
94 g5_offset = g4_offset+8,
95 o0_offset = g5_offset+8,
96 o1_offset = o0_offset+8,
97 o2_offset = o1_offset+8,
98 o3_offset = o2_offset+8,
99 o4_offset = o3_offset+8,
100 o5_offset = o4_offset+8,
101 start_of_flags_save_area = o5_offset+8,
102 ccr_offset = start_of_flags_save_area,
103 fsr_offset = ccr_offset + 8,
104 d00_offset = fsr_offset+8, // Start of float save area
105 register_save_size = d00_offset+8*32
106 };
109 public:
111 static int Oexception_offset() { return o0_offset; };
112 static int G3_offset() { return g3_offset; };
113 static int G5_offset() { return g5_offset; };
114 static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words);
115 static void restore_live_registers(MacroAssembler* masm);
117 // During deoptimization only the result register need to be restored
118 // all the other values have already been extracted.
120 static void restore_result_registers(MacroAssembler* masm);
121 };
123 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) {
124 // Record volatile registers as callee-save values in an OopMap so their save locations will be
125 // propagated to the caller frame's RegisterMap during StackFrameStream construction (needed for
126 // deoptimization; see compiledVFrame::create_stack_value). The caller's I, L and O registers
127 // are saved in register windows - I's and L's in the caller's frame and O's in the stub frame
128 // (as the stub's I's) when the runtime routine called by the stub creates its frame.
129 int i;
130 // Always make the frame size 16 byte aligned.
131 int frame_size = round_to(additional_frame_words + register_save_size, 16);
132 // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words
133 int frame_size_in_slots = frame_size / sizeof(jint);
134 // CodeBlob frame size is in words.
135 *total_frame_words = frame_size / wordSize;
136 // OopMap* map = new OopMap(*total_frame_words, 0);
137 OopMap* map = new OopMap(frame_size_in_slots, 0);
139 #if !defined(_LP64)
141 // Save 64-bit O registers; they will get their heads chopped off on a 'save'.
142 __ stx(O0, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8);
143 __ stx(O1, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8);
144 __ stx(O2, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+2*8);
145 __ stx(O3, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+3*8);
146 __ stx(O4, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+4*8);
147 __ stx(O5, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+5*8);
148 #endif /* _LP64 */
150 __ save(SP, -frame_size, SP);
152 #ifndef _LP64
153 // Reload the 64 bit Oregs. Although they are now Iregs we load them
154 // to Oregs here to avoid interrupts cutting off their heads
156 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8, O0);
157 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8, O1);
158 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+2*8, O2);
159 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+3*8, O3);
160 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+4*8, O4);
161 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+5*8, O5);
163 __ stx(O0, SP, o0_offset+STACK_BIAS);
164 map->set_callee_saved(VMRegImpl::stack2reg((o0_offset + 4)>>2), O0->as_VMReg());
166 __ stx(O1, SP, o1_offset+STACK_BIAS);
168 map->set_callee_saved(VMRegImpl::stack2reg((o1_offset + 4)>>2), O1->as_VMReg());
170 __ stx(O2, SP, o2_offset+STACK_BIAS);
171 map->set_callee_saved(VMRegImpl::stack2reg((o2_offset + 4)>>2), O2->as_VMReg());
173 __ stx(O3, SP, o3_offset+STACK_BIAS);
174 map->set_callee_saved(VMRegImpl::stack2reg((o3_offset + 4)>>2), O3->as_VMReg());
176 __ stx(O4, SP, o4_offset+STACK_BIAS);
177 map->set_callee_saved(VMRegImpl::stack2reg((o4_offset + 4)>>2), O4->as_VMReg());
179 __ stx(O5, SP, o5_offset+STACK_BIAS);
180 map->set_callee_saved(VMRegImpl::stack2reg((o5_offset + 4)>>2), O5->as_VMReg());
181 #endif /* _LP64 */
184 #ifdef _LP64
185 int debug_offset = 0;
186 #else
187 int debug_offset = 4;
188 #endif
189 // Save the G's
190 __ stx(G1, SP, g1_offset+STACK_BIAS);
191 map->set_callee_saved(VMRegImpl::stack2reg((g1_offset + debug_offset)>>2), G1->as_VMReg());
193 __ stx(G3, SP, g3_offset+STACK_BIAS);
194 map->set_callee_saved(VMRegImpl::stack2reg((g3_offset + debug_offset)>>2), G3->as_VMReg());
196 __ stx(G4, SP, g4_offset+STACK_BIAS);
197 map->set_callee_saved(VMRegImpl::stack2reg((g4_offset + debug_offset)>>2), G4->as_VMReg());
199 __ stx(G5, SP, g5_offset+STACK_BIAS);
200 map->set_callee_saved(VMRegImpl::stack2reg((g5_offset + debug_offset)>>2), G5->as_VMReg());
202 // This is really a waste but we'll keep things as they were for now
203 if (true) {
204 #ifndef _LP64
205 map->set_callee_saved(VMRegImpl::stack2reg((o0_offset)>>2), O0->as_VMReg()->next());
206 map->set_callee_saved(VMRegImpl::stack2reg((o1_offset)>>2), O1->as_VMReg()->next());
207 map->set_callee_saved(VMRegImpl::stack2reg((o2_offset)>>2), O2->as_VMReg()->next());
208 map->set_callee_saved(VMRegImpl::stack2reg((o3_offset)>>2), O3->as_VMReg()->next());
209 map->set_callee_saved(VMRegImpl::stack2reg((o4_offset)>>2), O4->as_VMReg()->next());
210 map->set_callee_saved(VMRegImpl::stack2reg((o5_offset)>>2), O5->as_VMReg()->next());
211 map->set_callee_saved(VMRegImpl::stack2reg((g1_offset)>>2), G1->as_VMReg()->next());
212 map->set_callee_saved(VMRegImpl::stack2reg((g3_offset)>>2), G3->as_VMReg()->next());
213 map->set_callee_saved(VMRegImpl::stack2reg((g4_offset)>>2), G4->as_VMReg()->next());
214 map->set_callee_saved(VMRegImpl::stack2reg((g5_offset)>>2), G5->as_VMReg()->next());
215 #endif /* _LP64 */
216 }
219 // Save the flags
220 __ rdccr( G5 );
221 __ stx(G5, SP, ccr_offset+STACK_BIAS);
222 __ stxfsr(SP, fsr_offset+STACK_BIAS);
224 // Save all the FP registers: 32 doubles (32 floats correspond to the 2 halves of the first 16 doubles)
225 int offset = d00_offset;
226 for( int i=0; i<FloatRegisterImpl::number_of_registers; i+=2 ) {
227 FloatRegister f = as_FloatRegister(i);
228 __ stf(FloatRegisterImpl::D, f, SP, offset+STACK_BIAS);
229 // Record as callee saved both halves of double registers (2 float registers).
230 map->set_callee_saved(VMRegImpl::stack2reg(offset>>2), f->as_VMReg());
231 map->set_callee_saved(VMRegImpl::stack2reg((offset + sizeof(float))>>2), f->as_VMReg()->next());
232 offset += sizeof(double);
233 }
235 // And we're done.
237 return map;
238 }
241 // Pop the current frame and restore all the registers that we
242 // saved.
243 void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
245 // Restore all the FP registers
246 for( int i=0; i<FloatRegisterImpl::number_of_registers; i+=2 ) {
247 __ ldf(FloatRegisterImpl::D, SP, d00_offset+i*sizeof(float)+STACK_BIAS, as_FloatRegister(i));
248 }
250 __ ldx(SP, ccr_offset+STACK_BIAS, G1);
251 __ wrccr (G1) ;
253 // Restore the G's
254 // Note that G2 (AKA GThread) must be saved and restored separately.
255 // TODO-FIXME: save and restore some of the other ASRs, viz., %asi and %gsr.
257 __ ldx(SP, g1_offset+STACK_BIAS, G1);
258 __ ldx(SP, g3_offset+STACK_BIAS, G3);
259 __ ldx(SP, g4_offset+STACK_BIAS, G4);
260 __ ldx(SP, g5_offset+STACK_BIAS, G5);
263 #if !defined(_LP64)
264 // Restore the 64-bit O's.
265 __ ldx(SP, o0_offset+STACK_BIAS, O0);
266 __ ldx(SP, o1_offset+STACK_BIAS, O1);
267 __ ldx(SP, o2_offset+STACK_BIAS, O2);
268 __ ldx(SP, o3_offset+STACK_BIAS, O3);
269 __ ldx(SP, o4_offset+STACK_BIAS, O4);
270 __ ldx(SP, o5_offset+STACK_BIAS, O5);
272 // And temporarily place them in TLS
274 __ stx(O0, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8);
275 __ stx(O1, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8);
276 __ stx(O2, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+2*8);
277 __ stx(O3, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+3*8);
278 __ stx(O4, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+4*8);
279 __ stx(O5, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+5*8);
280 #endif /* _LP64 */
282 // Restore flags
284 __ ldxfsr(SP, fsr_offset+STACK_BIAS);
286 __ restore();
288 #if !defined(_LP64)
289 // Now reload the 64bit Oregs after we've restore the window.
290 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8, O0);
291 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8, O1);
292 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+2*8, O2);
293 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+3*8, O3);
294 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+4*8, O4);
295 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+5*8, O5);
296 #endif /* _LP64 */
298 }
300 // Pop the current frame and restore the registers that might be holding
301 // a result.
302 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
304 #if !defined(_LP64)
305 // 32bit build returns longs in G1
306 __ ldx(SP, g1_offset+STACK_BIAS, G1);
308 // Retrieve the 64-bit O's.
309 __ ldx(SP, o0_offset+STACK_BIAS, O0);
310 __ ldx(SP, o1_offset+STACK_BIAS, O1);
311 // and save to TLS
312 __ stx(O0, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8);
313 __ stx(O1, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8);
314 #endif /* _LP64 */
316 __ ldf(FloatRegisterImpl::D, SP, d00_offset+STACK_BIAS, as_FloatRegister(0));
318 __ restore();
320 #if !defined(_LP64)
321 // Now reload the 64bit Oregs after we've restore the window.
322 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8, O0);
323 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8, O1);
324 #endif /* _LP64 */
326 }
328 // The java_calling_convention describes stack locations as ideal slots on
329 // a frame with no abi restrictions. Since we must observe abi restrictions
330 // (like the placement of the register window) the slots must be biased by
331 // the following value.
332 static int reg2offset(VMReg r) {
333 return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
334 }
336 // ---------------------------------------------------------------------------
337 // Read the array of BasicTypes from a signature, and compute where the
338 // arguments should go. Values in the VMRegPair regs array refer to 4-byte (VMRegImpl::stack_slot_size)
339 // quantities. Values less than VMRegImpl::stack0 are registers, those above
340 // refer to 4-byte stack slots. All stack slots are based off of the window
341 // top. VMRegImpl::stack0 refers to the first slot past the 16-word window,
342 // and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register
343 // values 0-63 (up to RegisterImpl::number_of_registers) are the 64-bit
344 // integer registers. Values 64-95 are the (32-bit only) float registers.
345 // Each 32-bit quantity is given its own number, so the integer registers
346 // (in either 32- or 64-bit builds) use 2 numbers. For example, there is
347 // an O0-low and an O0-high. Essentially, all int register numbers are doubled.
349 // Register results are passed in O0-O5, for outgoing call arguments. To
350 // convert to incoming arguments, convert all O's to I's. The regs array
351 // refer to the low and hi 32-bit words of 64-bit registers or stack slots.
352 // If the regs[].second() field is set to VMRegImpl::Bad(), it means it's unused (a
353 // 32-bit value was passed). If both are VMRegImpl::Bad(), it means no value was
354 // passed (used as a placeholder for the other half of longs and doubles in
355 // the 64-bit build). regs[].second() is either VMRegImpl::Bad() or regs[].second() is
356 // regs[].first()+1 (regs[].first() may be misaligned in the C calling convention).
357 // Sparc never passes a value in regs[].second() but not regs[].first() (regs[].first()
358 // == VMRegImpl::Bad() && regs[].second() != VMRegImpl::Bad()) nor unrelated values in the
359 // same VMRegPair.
361 // Note: the INPUTS in sig_bt are in units of Java argument words, which are
362 // either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit
363 // units regardless of build.
366 // ---------------------------------------------------------------------------
367 // The compiled Java calling convention. The Java convention always passes
368 // 64-bit values in adjacent aligned locations (either registers or stack),
369 // floats in float registers and doubles in aligned float pairs. Values are
370 // packed in the registers. There is no backing varargs store for values in
371 // registers. In the 32-bit build, longs are passed in G1 and G4 (cannot be
372 // passed in I's, because longs in I's get their heads chopped off at
373 // interrupt).
374 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
375 VMRegPair *regs,
376 int total_args_passed,
377 int is_outgoing) {
378 assert(F31->as_VMReg()->is_reg(), "overlapping stack/register numbers");
380 // Convention is to pack the first 6 int/oop args into the first 6 registers
381 // (I0-I5), extras spill to the stack. Then pack the first 8 float args
382 // into F0-F7, extras spill to the stack. Then pad all register sets to
383 // align. Then put longs and doubles into the same registers as they fit,
384 // else spill to the stack.
385 const int int_reg_max = SPARC_ARGS_IN_REGS_NUM;
386 const int flt_reg_max = 8;
387 //
388 // Where 32-bit 1-reg longs start being passed
389 // In tiered we must pass on stack because c1 can't use a "pair" in a single reg.
390 // So make it look like we've filled all the G regs that c2 wants to use.
391 Register g_reg = TieredCompilation ? noreg : G1;
393 // Count int/oop and float args. See how many stack slots we'll need and
394 // where the longs & doubles will go.
395 int int_reg_cnt = 0;
396 int flt_reg_cnt = 0;
397 // int stk_reg_pairs = frame::register_save_words*(wordSize>>2);
398 // int stk_reg_pairs = SharedRuntime::out_preserve_stack_slots();
399 int stk_reg_pairs = 0;
400 for (int i = 0; i < total_args_passed; i++) {
401 switch (sig_bt[i]) {
402 case T_LONG: // LP64, longs compete with int args
403 assert(sig_bt[i+1] == T_VOID, "");
404 #ifdef _LP64
405 if (int_reg_cnt < int_reg_max) int_reg_cnt++;
406 #endif
407 break;
408 case T_OBJECT:
409 case T_ARRAY:
410 case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address
411 if (int_reg_cnt < int_reg_max) int_reg_cnt++;
412 #ifndef _LP64
413 else stk_reg_pairs++;
414 #endif
415 break;
416 case T_INT:
417 case T_SHORT:
418 case T_CHAR:
419 case T_BYTE:
420 case T_BOOLEAN:
421 if (int_reg_cnt < int_reg_max) int_reg_cnt++;
422 else stk_reg_pairs++;
423 break;
424 case T_FLOAT:
425 if (flt_reg_cnt < flt_reg_max) flt_reg_cnt++;
426 else stk_reg_pairs++;
427 break;
428 case T_DOUBLE:
429 assert(sig_bt[i+1] == T_VOID, "");
430 break;
431 case T_VOID:
432 break;
433 default:
434 ShouldNotReachHere();
435 }
436 }
438 // This is where the longs/doubles start on the stack.
439 stk_reg_pairs = (stk_reg_pairs+1) & ~1; // Round
441 int int_reg_pairs = (int_reg_cnt+1) & ~1; // 32-bit 2-reg longs only
442 int flt_reg_pairs = (flt_reg_cnt+1) & ~1;
444 // int stk_reg = frame::register_save_words*(wordSize>>2);
445 // int stk_reg = SharedRuntime::out_preserve_stack_slots();
446 int stk_reg = 0;
447 int int_reg = 0;
448 int flt_reg = 0;
450 // Now do the signature layout
451 for (int i = 0; i < total_args_passed; i++) {
452 switch (sig_bt[i]) {
453 case T_INT:
454 case T_SHORT:
455 case T_CHAR:
456 case T_BYTE:
457 case T_BOOLEAN:
458 #ifndef _LP64
459 case T_OBJECT:
460 case T_ARRAY:
461 case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address
462 #endif // _LP64
463 if (int_reg < int_reg_max) {
464 Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++);
465 regs[i].set1(r->as_VMReg());
466 } else {
467 regs[i].set1(VMRegImpl::stack2reg(stk_reg++));
468 }
469 break;
471 #ifdef _LP64
472 case T_OBJECT:
473 case T_ARRAY:
474 case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address
475 if (int_reg < int_reg_max) {
476 Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++);
477 regs[i].set2(r->as_VMReg());
478 } else {
479 regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs));
480 stk_reg_pairs += 2;
481 }
482 break;
483 #endif // _LP64
485 case T_LONG:
486 assert(sig_bt[i+1] == T_VOID, "expecting VOID in other half");
487 #ifdef _LP64
488 if (int_reg < int_reg_max) {
489 Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++);
490 regs[i].set2(r->as_VMReg());
491 } else {
492 regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs));
493 stk_reg_pairs += 2;
494 }
495 #else
496 #ifdef COMPILER2
497 // For 32-bit build, can't pass longs in O-regs because they become
498 // I-regs and get trashed. Use G-regs instead. G1 and G4 are almost
499 // spare and available. This convention isn't used by the Sparc ABI or
500 // anywhere else. If we're tiered then we don't use G-regs because c1
501 // can't deal with them as a "pair". (Tiered makes this code think g's are filled)
502 // G0: zero
503 // G1: 1st Long arg
504 // G2: global allocated to TLS
505 // G3: used in inline cache check
506 // G4: 2nd Long arg
507 // G5: used in inline cache check
508 // G6: used by OS
509 // G7: used by OS
511 if (g_reg == G1) {
512 regs[i].set2(G1->as_VMReg()); // This long arg in G1
513 g_reg = G4; // Where the next arg goes
514 } else if (g_reg == G4) {
515 regs[i].set2(G4->as_VMReg()); // The 2nd long arg in G4
516 g_reg = noreg; // No more longs in registers
517 } else {
518 regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs));
519 stk_reg_pairs += 2;
520 }
521 #else // COMPILER2
522 if (int_reg_pairs + 1 < int_reg_max) {
523 if (is_outgoing) {
524 regs[i].set_pair(as_oRegister(int_reg_pairs + 1)->as_VMReg(), as_oRegister(int_reg_pairs)->as_VMReg());
525 } else {
526 regs[i].set_pair(as_iRegister(int_reg_pairs + 1)->as_VMReg(), as_iRegister(int_reg_pairs)->as_VMReg());
527 }
528 int_reg_pairs += 2;
529 } else {
530 regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs));
531 stk_reg_pairs += 2;
532 }
533 #endif // COMPILER2
534 #endif // _LP64
535 break;
537 case T_FLOAT:
538 if (flt_reg < flt_reg_max) regs[i].set1(as_FloatRegister(flt_reg++)->as_VMReg());
539 else regs[i].set1( VMRegImpl::stack2reg(stk_reg++));
540 break;
541 case T_DOUBLE:
542 assert(sig_bt[i+1] == T_VOID, "expecting half");
543 if (flt_reg_pairs + 1 < flt_reg_max) {
544 regs[i].set2(as_FloatRegister(flt_reg_pairs)->as_VMReg());
545 flt_reg_pairs += 2;
546 } else {
547 regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs));
548 stk_reg_pairs += 2;
549 }
550 break;
551 case T_VOID: regs[i].set_bad(); break; // Halves of longs & doubles
552 default:
553 ShouldNotReachHere();
554 }
555 }
557 // retun the amount of stack space these arguments will need.
558 return stk_reg_pairs;
560 }
562 // Helper class mostly to avoid passing masm everywhere, and handle
563 // store displacement overflow logic.
564 class AdapterGenerator {
565 MacroAssembler *masm;
566 Register Rdisp;
567 void set_Rdisp(Register r) { Rdisp = r; }
569 void patch_callers_callsite();
571 // base+st_off points to top of argument
572 int arg_offset(const int st_off) { return st_off; }
573 int next_arg_offset(const int st_off) {
574 return st_off - Interpreter::stackElementSize;
575 }
577 // Argument slot values may be loaded first into a register because
578 // they might not fit into displacement.
579 RegisterOrConstant arg_slot(const int st_off);
580 RegisterOrConstant next_arg_slot(const int st_off);
582 // Stores long into offset pointed to by base
583 void store_c2i_long(Register r, Register base,
584 const int st_off, bool is_stack);
585 void store_c2i_object(Register r, Register base,
586 const int st_off);
587 void store_c2i_int(Register r, Register base,
588 const int st_off);
589 void store_c2i_double(VMReg r_2,
590 VMReg r_1, Register base, const int st_off);
591 void store_c2i_float(FloatRegister f, Register base,
592 const int st_off);
594 public:
595 void gen_c2i_adapter(int total_args_passed,
596 // VMReg max_arg,
597 int comp_args_on_stack, // VMRegStackSlots
598 const BasicType *sig_bt,
599 const VMRegPair *regs,
600 Label& skip_fixup);
601 void gen_i2c_adapter(int total_args_passed,
602 // VMReg max_arg,
603 int comp_args_on_stack, // VMRegStackSlots
604 const BasicType *sig_bt,
605 const VMRegPair *regs);
607 AdapterGenerator(MacroAssembler *_masm) : masm(_masm) {}
608 };
611 // Patch the callers callsite with entry to compiled code if it exists.
612 void AdapterGenerator::patch_callers_callsite() {
613 Label L;
614 __ ld_ptr(G5_method, in_bytes(methodOopDesc::code_offset()), G3_scratch);
615 __ br_null(G3_scratch, false, __ pt, L);
616 // Schedule the branch target address early.
617 __ delayed()->ld_ptr(G5_method, in_bytes(methodOopDesc::interpreter_entry_offset()), G3_scratch);
618 // Call into the VM to patch the caller, then jump to compiled callee
619 __ save_frame(4); // Args in compiled layout; do not blow them
621 // Must save all the live Gregs the list is:
622 // G1: 1st Long arg (32bit build)
623 // G2: global allocated to TLS
624 // G3: used in inline cache check (scratch)
625 // G4: 2nd Long arg (32bit build);
626 // G5: used in inline cache check (methodOop)
628 // The longs must go to the stack by hand since in the 32 bit build they can be trashed by window ops.
630 #ifdef _LP64
631 // mov(s,d)
632 __ mov(G1, L1);
633 __ mov(G4, L4);
634 __ mov(G5_method, L5);
635 __ mov(G5_method, O0); // VM needs target method
636 __ mov(I7, O1); // VM needs caller's callsite
637 // Must be a leaf call...
638 // can be very far once the blob has been relocated
639 AddressLiteral dest(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite));
640 __ relocate(relocInfo::runtime_call_type);
641 __ jumpl_to(dest, O7, O7);
642 __ delayed()->mov(G2_thread, L7_thread_cache);
643 __ mov(L7_thread_cache, G2_thread);
644 __ mov(L1, G1);
645 __ mov(L4, G4);
646 __ mov(L5, G5_method);
647 #else
648 __ stx(G1, FP, -8 + STACK_BIAS);
649 __ stx(G4, FP, -16 + STACK_BIAS);
650 __ mov(G5_method, L5);
651 __ mov(G5_method, O0); // VM needs target method
652 __ mov(I7, O1); // VM needs caller's callsite
653 // Must be a leaf call...
654 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), relocInfo::runtime_call_type);
655 __ delayed()->mov(G2_thread, L7_thread_cache);
656 __ mov(L7_thread_cache, G2_thread);
657 __ ldx(FP, -8 + STACK_BIAS, G1);
658 __ ldx(FP, -16 + STACK_BIAS, G4);
659 __ mov(L5, G5_method);
660 __ ld_ptr(G5_method, in_bytes(methodOopDesc::interpreter_entry_offset()), G3_scratch);
661 #endif /* _LP64 */
663 __ restore(); // Restore args
664 __ bind(L);
665 }
668 RegisterOrConstant AdapterGenerator::arg_slot(const int st_off) {
669 RegisterOrConstant roc(arg_offset(st_off));
670 return __ ensure_simm13_or_reg(roc, Rdisp);
671 }
673 RegisterOrConstant AdapterGenerator::next_arg_slot(const int st_off) {
674 RegisterOrConstant roc(next_arg_offset(st_off));
675 return __ ensure_simm13_or_reg(roc, Rdisp);
676 }
679 // Stores long into offset pointed to by base
680 void AdapterGenerator::store_c2i_long(Register r, Register base,
681 const int st_off, bool is_stack) {
682 #ifdef _LP64
683 // In V9, longs are given 2 64-bit slots in the interpreter, but the
684 // data is passed in only 1 slot.
685 __ stx(r, base, next_arg_slot(st_off));
686 #else
687 #ifdef COMPILER2
688 // Misaligned store of 64-bit data
689 __ stw(r, base, arg_slot(st_off)); // lo bits
690 __ srlx(r, 32, r);
691 __ stw(r, base, next_arg_slot(st_off)); // hi bits
692 #else
693 if (is_stack) {
694 // Misaligned store of 64-bit data
695 __ stw(r, base, arg_slot(st_off)); // lo bits
696 __ srlx(r, 32, r);
697 __ stw(r, base, next_arg_slot(st_off)); // hi bits
698 } else {
699 __ stw(r->successor(), base, arg_slot(st_off) ); // lo bits
700 __ stw(r , base, next_arg_slot(st_off)); // hi bits
701 }
702 #endif // COMPILER2
703 #endif // _LP64
704 }
706 void AdapterGenerator::store_c2i_object(Register r, Register base,
707 const int st_off) {
708 __ st_ptr (r, base, arg_slot(st_off));
709 }
711 void AdapterGenerator::store_c2i_int(Register r, Register base,
712 const int st_off) {
713 __ st (r, base, arg_slot(st_off));
714 }
716 // Stores into offset pointed to by base
717 void AdapterGenerator::store_c2i_double(VMReg r_2,
718 VMReg r_1, Register base, const int st_off) {
719 #ifdef _LP64
720 // In V9, doubles are given 2 64-bit slots in the interpreter, but the
721 // data is passed in only 1 slot.
722 __ stf(FloatRegisterImpl::D, r_1->as_FloatRegister(), base, next_arg_slot(st_off));
723 #else
724 // Need to marshal 64-bit value from misaligned Lesp loads
725 __ stf(FloatRegisterImpl::S, r_1->as_FloatRegister(), base, next_arg_slot(st_off));
726 __ stf(FloatRegisterImpl::S, r_2->as_FloatRegister(), base, arg_slot(st_off) );
727 #endif
728 }
730 void AdapterGenerator::store_c2i_float(FloatRegister f, Register base,
731 const int st_off) {
732 __ stf(FloatRegisterImpl::S, f, base, arg_slot(st_off));
733 }
735 void AdapterGenerator::gen_c2i_adapter(
736 int total_args_passed,
737 // VMReg max_arg,
738 int comp_args_on_stack, // VMRegStackSlots
739 const BasicType *sig_bt,
740 const VMRegPair *regs,
741 Label& skip_fixup) {
743 // Before we get into the guts of the C2I adapter, see if we should be here
744 // at all. We've come from compiled code and are attempting to jump to the
745 // interpreter, which means the caller made a static call to get here
746 // (vcalls always get a compiled target if there is one). Check for a
747 // compiled target. If there is one, we need to patch the caller's call.
748 // However we will run interpreted if we come thru here. The next pass
749 // thru the call site will run compiled. If we ran compiled here then
750 // we can (theorectically) do endless i2c->c2i->i2c transitions during
751 // deopt/uncommon trap cycles. If we always go interpreted here then
752 // we can have at most one and don't need to play any tricks to keep
753 // from endlessly growing the stack.
754 //
755 // Actually if we detected that we had an i2c->c2i transition here we
756 // ought to be able to reset the world back to the state of the interpreted
757 // call and not bother building another interpreter arg area. We don't
758 // do that at this point.
760 patch_callers_callsite();
762 __ bind(skip_fixup);
764 // Since all args are passed on the stack, total_args_passed*wordSize is the
765 // space we need. Add in varargs area needed by the interpreter. Round up
766 // to stack alignment.
767 const int arg_size = total_args_passed * Interpreter::stackElementSize;
768 const int varargs_area =
769 (frame::varargs_offset - frame::register_save_words)*wordSize;
770 const int extraspace = round_to(arg_size + varargs_area, 2*wordSize);
772 int bias = STACK_BIAS;
773 const int interp_arg_offset = frame::varargs_offset*wordSize +
774 (total_args_passed-1)*Interpreter::stackElementSize;
776 Register base = SP;
778 #ifdef _LP64
779 // In the 64bit build because of wider slots and STACKBIAS we can run
780 // out of bits in the displacement to do loads and stores. Use g3 as
781 // temporary displacement.
782 if (! __ is_simm13(extraspace)) {
783 __ set(extraspace, G3_scratch);
784 __ sub(SP, G3_scratch, SP);
785 } else {
786 __ sub(SP, extraspace, SP);
787 }
788 set_Rdisp(G3_scratch);
789 #else
790 __ sub(SP, extraspace, SP);
791 #endif // _LP64
793 // First write G1 (if used) to where ever it must go
794 for (int i=0; i<total_args_passed; i++) {
795 const int st_off = interp_arg_offset - (i*Interpreter::stackElementSize) + bias;
796 VMReg r_1 = regs[i].first();
797 VMReg r_2 = regs[i].second();
798 if (r_1 == G1_scratch->as_VMReg()) {
799 if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) {
800 store_c2i_object(G1_scratch, base, st_off);
801 } else if (sig_bt[i] == T_LONG) {
802 assert(!TieredCompilation, "should not use register args for longs");
803 store_c2i_long(G1_scratch, base, st_off, false);
804 } else {
805 store_c2i_int(G1_scratch, base, st_off);
806 }
807 }
808 }
810 // Now write the args into the outgoing interpreter space
811 for (int i=0; i<total_args_passed; i++) {
812 const int st_off = interp_arg_offset - (i*Interpreter::stackElementSize) + bias;
813 VMReg r_1 = regs[i].first();
814 VMReg r_2 = regs[i].second();
815 if (!r_1->is_valid()) {
816 assert(!r_2->is_valid(), "");
817 continue;
818 }
819 // Skip G1 if found as we did it first in order to free it up
820 if (r_1 == G1_scratch->as_VMReg()) {
821 continue;
822 }
823 #ifdef ASSERT
824 bool G1_forced = false;
825 #endif // ASSERT
826 if (r_1->is_stack()) { // Pretend stack targets are loaded into G1
827 #ifdef _LP64
828 Register ld_off = Rdisp;
829 __ set(reg2offset(r_1) + extraspace + bias, ld_off);
830 #else
831 int ld_off = reg2offset(r_1) + extraspace + bias;
832 #endif // _LP64
833 #ifdef ASSERT
834 G1_forced = true;
835 #endif // ASSERT
836 r_1 = G1_scratch->as_VMReg();// as part of the load/store shuffle
837 if (!r_2->is_valid()) __ ld (base, ld_off, G1_scratch);
838 else __ ldx(base, ld_off, G1_scratch);
839 }
841 if (r_1->is_Register()) {
842 Register r = r_1->as_Register()->after_restore();
843 if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) {
844 store_c2i_object(r, base, st_off);
845 } else if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
846 #ifndef _LP64
847 if (TieredCompilation) {
848 assert(G1_forced || sig_bt[i] != T_LONG, "should not use register args for longs");
849 }
850 #endif // _LP64
851 store_c2i_long(r, base, st_off, r_2->is_stack());
852 } else {
853 store_c2i_int(r, base, st_off);
854 }
855 } else {
856 assert(r_1->is_FloatRegister(), "");
857 if (sig_bt[i] == T_FLOAT) {
858 store_c2i_float(r_1->as_FloatRegister(), base, st_off);
859 } else {
860 assert(sig_bt[i] == T_DOUBLE, "wrong type");
861 store_c2i_double(r_2, r_1, base, st_off);
862 }
863 }
864 }
866 #ifdef _LP64
867 // Need to reload G3_scratch, used for temporary displacements.
868 __ ld_ptr(G5_method, in_bytes(methodOopDesc::interpreter_entry_offset()), G3_scratch);
870 // Pass O5_savedSP as an argument to the interpreter.
871 // The interpreter will restore SP to this value before returning.
872 __ set(extraspace, G1);
873 __ add(SP, G1, O5_savedSP);
874 #else
875 // Pass O5_savedSP as an argument to the interpreter.
876 // The interpreter will restore SP to this value before returning.
877 __ add(SP, extraspace, O5_savedSP);
878 #endif // _LP64
880 __ mov((frame::varargs_offset)*wordSize -
881 1*Interpreter::stackElementSize+bias+BytesPerWord, G1);
882 // Jump to the interpreter just as if interpreter was doing it.
883 __ jmpl(G3_scratch, 0, G0);
884 // Setup Lesp for the call. Cannot actually set Lesp as the current Lesp
885 // (really L0) is in use by the compiled frame as a generic temp. However,
886 // the interpreter does not know where its args are without some kind of
887 // arg pointer being passed in. Pass it in Gargs.
888 __ delayed()->add(SP, G1, Gargs);
889 }
891 void AdapterGenerator::gen_i2c_adapter(
892 int total_args_passed,
893 // VMReg max_arg,
894 int comp_args_on_stack, // VMRegStackSlots
895 const BasicType *sig_bt,
896 const VMRegPair *regs) {
898 // Generate an I2C adapter: adjust the I-frame to make space for the C-frame
899 // layout. Lesp was saved by the calling I-frame and will be restored on
900 // return. Meanwhile, outgoing arg space is all owned by the callee
901 // C-frame, so we can mangle it at will. After adjusting the frame size,
902 // hoist register arguments and repack other args according to the compiled
903 // code convention. Finally, end in a jump to the compiled code. The entry
904 // point address is the start of the buffer.
906 // We will only enter here from an interpreted frame and never from after
907 // passing thru a c2i. Azul allowed this but we do not. If we lose the
908 // race and use a c2i we will remain interpreted for the race loser(s).
909 // This removes all sorts of headaches on the x86 side and also eliminates
910 // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.
912 // As you can see from the list of inputs & outputs there are not a lot
913 // of temp registers to work with: mostly G1, G3 & G4.
915 // Inputs:
916 // G2_thread - TLS
917 // G5_method - Method oop
918 // G4 (Gargs) - Pointer to interpreter's args
919 // O0..O4 - free for scratch
920 // O5_savedSP - Caller's saved SP, to be restored if needed
921 // O6 - Current SP!
922 // O7 - Valid return address
923 // L0-L7, I0-I7 - Caller's temps (no frame pushed yet)
925 // Outputs:
926 // G2_thread - TLS
927 // G1, G4 - Outgoing long args in 32-bit build
928 // O0-O5 - Outgoing args in compiled layout
929 // O6 - Adjusted or restored SP
930 // O7 - Valid return address
931 // L0-L7, I0-I7 - Caller's temps (no frame pushed yet)
932 // F0-F7 - more outgoing args
935 // Gargs is the incoming argument base, and also an outgoing argument.
936 __ sub(Gargs, BytesPerWord, Gargs);
938 // ON ENTRY TO THE CODE WE ARE MAKING, WE HAVE AN INTERPRETED FRAME
939 // WITH O7 HOLDING A VALID RETURN PC
940 //
941 // | |
942 // : java stack :
943 // | |
944 // +--------------+ <--- start of outgoing args
945 // | receiver | |
946 // : rest of args : |---size is java-arg-words
947 // | | |
948 // +--------------+ <--- O4_args (misaligned) and Lesp if prior is not C2I
949 // | | |
950 // : unused : |---Space for max Java stack, plus stack alignment
951 // | | |
952 // +--------------+ <--- SP + 16*wordsize
953 // | |
954 // : window :
955 // | |
956 // +--------------+ <--- SP
958 // WE REPACK THE STACK. We use the common calling convention layout as
959 // discovered by calling SharedRuntime::calling_convention. We assume it
960 // causes an arbitrary shuffle of memory, which may require some register
961 // temps to do the shuffle. We hope for (and optimize for) the case where
962 // temps are not needed. We may have to resize the stack slightly, in case
963 // we need alignment padding (32-bit interpreter can pass longs & doubles
964 // misaligned, but the compilers expect them aligned).
965 //
966 // | |
967 // : java stack :
968 // | |
969 // +--------------+ <--- start of outgoing args
970 // | pad, align | |
971 // +--------------+ |
972 // | ints, floats | |---Outgoing stack args, packed low.
973 // +--------------+ | First few args in registers.
974 // : doubles : |
975 // | longs | |
976 // +--------------+ <--- SP' + 16*wordsize
977 // | |
978 // : window :
979 // | |
980 // +--------------+ <--- SP'
982 // ON EXIT FROM THE CODE WE ARE MAKING, WE STILL HAVE AN INTERPRETED FRAME
983 // WITH O7 HOLDING A VALID RETURN PC - ITS JUST THAT THE ARGS ARE NOW SETUP
984 // FOR COMPILED CODE AND THE FRAME SLIGHTLY GROWN.
986 // Cut-out for having no stack args. Since up to 6 args are passed
987 // in registers, we will commonly have no stack args.
988 if (comp_args_on_stack > 0) {
990 // Convert VMReg stack slots to words.
991 int comp_words_on_stack = round_to(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
992 // Round up to miminum stack alignment, in wordSize
993 comp_words_on_stack = round_to(comp_words_on_stack, 2);
994 // Now compute the distance from Lesp to SP. This calculation does not
995 // include the space for total_args_passed because Lesp has not yet popped
996 // the arguments.
997 __ sub(SP, (comp_words_on_stack)*wordSize, SP);
998 }
1000 // Will jump to the compiled code just as if compiled code was doing it.
1001 // Pre-load the register-jump target early, to schedule it better.
1002 __ ld_ptr(G5_method, in_bytes(methodOopDesc::from_compiled_offset()), G3);
1004 // Now generate the shuffle code. Pick up all register args and move the
1005 // rest through G1_scratch.
1006 for (int i=0; i<total_args_passed; i++) {
1007 if (sig_bt[i] == T_VOID) {
1008 // Longs and doubles are passed in native word order, but misaligned
1009 // in the 32-bit build.
1010 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
1011 continue;
1012 }
1014 // Pick up 0, 1 or 2 words from Lesp+offset. Assume mis-aligned in the
1015 // 32-bit build and aligned in the 64-bit build. Look for the obvious
1016 // ldx/lddf optimizations.
1018 // Load in argument order going down.
1019 const int ld_off = (total_args_passed-i)*Interpreter::stackElementSize;
1020 set_Rdisp(G1_scratch);
1022 VMReg r_1 = regs[i].first();
1023 VMReg r_2 = regs[i].second();
1024 if (!r_1->is_valid()) {
1025 assert(!r_2->is_valid(), "");
1026 continue;
1027 }
1028 if (r_1->is_stack()) { // Pretend stack targets are loaded into F8/F9
1029 r_1 = F8->as_VMReg(); // as part of the load/store shuffle
1030 if (r_2->is_valid()) r_2 = r_1->next();
1031 }
1032 if (r_1->is_Register()) { // Register argument
1033 Register r = r_1->as_Register()->after_restore();
1034 if (!r_2->is_valid()) {
1035 __ ld(Gargs, arg_slot(ld_off), r);
1036 } else {
1037 #ifdef _LP64
1038 // In V9, longs are given 2 64-bit slots in the interpreter, but the
1039 // data is passed in only 1 slot.
1040 RegisterOrConstant slot = (sig_bt[i] == T_LONG) ?
1041 next_arg_slot(ld_off) : arg_slot(ld_off);
1042 __ ldx(Gargs, slot, r);
1043 #else
1044 // Need to load a 64-bit value into G1/G4, but G1/G4 is being used in the
1045 // stack shuffle. Load the first 2 longs into G1/G4 later.
1046 #endif
1047 }
1048 } else {
1049 assert(r_1->is_FloatRegister(), "");
1050 if (!r_2->is_valid()) {
1051 __ ldf(FloatRegisterImpl::S, Gargs, arg_slot(ld_off), r_1->as_FloatRegister());
1052 } else {
1053 #ifdef _LP64
1054 // In V9, doubles are given 2 64-bit slots in the interpreter, but the
1055 // data is passed in only 1 slot. This code also handles longs that
1056 // are passed on the stack, but need a stack-to-stack move through a
1057 // spare float register.
1058 RegisterOrConstant slot = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ?
1059 next_arg_slot(ld_off) : arg_slot(ld_off);
1060 __ ldf(FloatRegisterImpl::D, Gargs, slot, r_1->as_FloatRegister());
1061 #else
1062 // Need to marshal 64-bit value from misaligned Lesp loads
1063 __ ldf(FloatRegisterImpl::S, Gargs, next_arg_slot(ld_off), r_1->as_FloatRegister());
1064 __ ldf(FloatRegisterImpl::S, Gargs, arg_slot(ld_off), r_2->as_FloatRegister());
1065 #endif
1066 }
1067 }
1068 // Was the argument really intended to be on the stack, but was loaded
1069 // into F8/F9?
1070 if (regs[i].first()->is_stack()) {
1071 assert(r_1->as_FloatRegister() == F8, "fix this code");
1072 // Convert stack slot to an SP offset
1073 int st_off = reg2offset(regs[i].first()) + STACK_BIAS;
1074 // Store down the shuffled stack word. Target address _is_ aligned.
1075 RegisterOrConstant slot = __ ensure_simm13_or_reg(st_off, Rdisp);
1076 if (!r_2->is_valid()) __ stf(FloatRegisterImpl::S, r_1->as_FloatRegister(), SP, slot);
1077 else __ stf(FloatRegisterImpl::D, r_1->as_FloatRegister(), SP, slot);
1078 }
1079 }
1080 bool made_space = false;
1081 #ifndef _LP64
1082 // May need to pick up a few long args in G1/G4
1083 bool g4_crushed = false;
1084 bool g3_crushed = false;
1085 for (int i=0; i<total_args_passed; i++) {
1086 if (regs[i].first()->is_Register() && regs[i].second()->is_valid()) {
1087 // Load in argument order going down
1088 int ld_off = (total_args_passed-i)*Interpreter::stackElementSize;
1089 // Need to marshal 64-bit value from misaligned Lesp loads
1090 Register r = regs[i].first()->as_Register()->after_restore();
1091 if (r == G1 || r == G4) {
1092 assert(!g4_crushed, "ordering problem");
1093 if (r == G4){
1094 g4_crushed = true;
1095 __ lduw(Gargs, arg_slot(ld_off) , G3_scratch); // Load lo bits
1096 __ ld (Gargs, next_arg_slot(ld_off), r); // Load hi bits
1097 } else {
1098 // better schedule this way
1099 __ ld (Gargs, next_arg_slot(ld_off), r); // Load hi bits
1100 __ lduw(Gargs, arg_slot(ld_off) , G3_scratch); // Load lo bits
1101 }
1102 g3_crushed = true;
1103 __ sllx(r, 32, r);
1104 __ or3(G3_scratch, r, r);
1105 } else {
1106 assert(r->is_out(), "longs passed in two O registers");
1107 __ ld (Gargs, arg_slot(ld_off) , r->successor()); // Load lo bits
1108 __ ld (Gargs, next_arg_slot(ld_off), r); // Load hi bits
1109 }
1110 }
1111 }
1112 #endif
1114 // Jump to the compiled code just as if compiled code was doing it.
1115 //
1116 #ifndef _LP64
1117 if (g3_crushed) {
1118 // Rats load was wasted, at least it is in cache...
1119 __ ld_ptr(G5_method, methodOopDesc::from_compiled_offset(), G3);
1120 }
1121 #endif /* _LP64 */
1123 // 6243940 We might end up in handle_wrong_method if
1124 // the callee is deoptimized as we race thru here. If that
1125 // happens we don't want to take a safepoint because the
1126 // caller frame will look interpreted and arguments are now
1127 // "compiled" so it is much better to make this transition
1128 // invisible to the stack walking code. Unfortunately if
1129 // we try and find the callee by normal means a safepoint
1130 // is possible. So we stash the desired callee in the thread
1131 // and the vm will find there should this case occur.
1132 Address callee_target_addr(G2_thread, JavaThread::callee_target_offset());
1133 __ st_ptr(G5_method, callee_target_addr);
1135 if (StressNonEntrant) {
1136 // Open a big window for deopt failure
1137 __ save_frame(0);
1138 __ mov(G0, L0);
1139 Label loop;
1140 __ bind(loop);
1141 __ sub(L0, 1, L0);
1142 __ br_null(L0, false, Assembler::pt, loop);
1143 __ delayed()->nop();
1145 __ restore();
1146 }
1149 __ jmpl(G3, 0, G0);
1150 __ delayed()->nop();
1151 }
1153 // ---------------------------------------------------------------
1154 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
1155 int total_args_passed,
1156 // VMReg max_arg,
1157 int comp_args_on_stack, // VMRegStackSlots
1158 const BasicType *sig_bt,
1159 const VMRegPair *regs,
1160 AdapterFingerPrint* fingerprint) {
1161 address i2c_entry = __ pc();
1163 AdapterGenerator agen(masm);
1165 agen.gen_i2c_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs);
1168 // -------------------------------------------------------------------------
1169 // Generate a C2I adapter. On entry we know G5 holds the methodOop. The
1170 // args start out packed in the compiled layout. They need to be unpacked
1171 // into the interpreter layout. This will almost always require some stack
1172 // space. We grow the current (compiled) stack, then repack the args. We
1173 // finally end in a jump to the generic interpreter entry point. On exit
1174 // from the interpreter, the interpreter will restore our SP (lest the
1175 // compiled code, which relys solely on SP and not FP, get sick).
1177 address c2i_unverified_entry = __ pc();
1178 Label skip_fixup;
1179 {
1180 #if !defined(_LP64) && defined(COMPILER2)
1181 Register R_temp = L0; // another scratch register
1182 #else
1183 Register R_temp = G1; // another scratch register
1184 #endif
1186 AddressLiteral ic_miss(SharedRuntime::get_ic_miss_stub());
1188 __ verify_oop(O0);
1189 __ verify_oop(G5_method);
1190 __ load_klass(O0, G3_scratch);
1191 __ verify_oop(G3_scratch);
1193 #if !defined(_LP64) && defined(COMPILER2)
1194 __ save(SP, -frame::register_save_words*wordSize, SP);
1195 __ ld_ptr(G5_method, compiledICHolderOopDesc::holder_klass_offset(), R_temp);
1196 __ verify_oop(R_temp);
1197 __ cmp(G3_scratch, R_temp);
1198 __ restore();
1199 #else
1200 __ ld_ptr(G5_method, compiledICHolderOopDesc::holder_klass_offset(), R_temp);
1201 __ verify_oop(R_temp);
1202 __ cmp(G3_scratch, R_temp);
1203 #endif
1205 Label ok, ok2;
1206 __ brx(Assembler::equal, false, Assembler::pt, ok);
1207 __ delayed()->ld_ptr(G5_method, compiledICHolderOopDesc::holder_method_offset(), G5_method);
1208 __ jump_to(ic_miss, G3_scratch);
1209 __ delayed()->nop();
1211 __ bind(ok);
1212 // Method might have been compiled since the call site was patched to
1213 // interpreted if that is the case treat it as a miss so we can get
1214 // the call site corrected.
1215 __ ld_ptr(G5_method, in_bytes(methodOopDesc::code_offset()), G3_scratch);
1216 __ bind(ok2);
1217 __ br_null(G3_scratch, false, __ pt, skip_fixup);
1218 __ delayed()->ld_ptr(G5_method, in_bytes(methodOopDesc::interpreter_entry_offset()), G3_scratch);
1219 __ jump_to(ic_miss, G3_scratch);
1220 __ delayed()->nop();
1222 }
1224 address c2i_entry = __ pc();
1226 agen.gen_c2i_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
1228 __ flush();
1229 return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
1231 }
1233 // Helper function for native calling conventions
1234 static VMReg int_stk_helper( int i ) {
1235 // Bias any stack based VMReg we get by ignoring the window area
1236 // but not the register parameter save area.
1237 //
1238 // This is strange for the following reasons. We'd normally expect
1239 // the calling convention to return an VMReg for a stack slot
1240 // completely ignoring any abi reserved area. C2 thinks of that
1241 // abi area as only out_preserve_stack_slots. This does not include
1242 // the area allocated by the C abi to store down integer arguments
1243 // because the java calling convention does not use it. So
1244 // since c2 assumes that there are only out_preserve_stack_slots
1245 // to bias the optoregs (which impacts VMRegs) when actually referencing any actual stack
1246 // location the c calling convention must add in this bias amount
1247 // to make up for the fact that the out_preserve_stack_slots is
1248 // insufficient for C calls. What a mess. I sure hope those 6
1249 // stack words were worth it on every java call!
1251 // Another way of cleaning this up would be for out_preserve_stack_slots
1252 // to take a parameter to say whether it was C or java calling conventions.
1253 // Then things might look a little better (but not much).
1255 int mem_parm_offset = i - SPARC_ARGS_IN_REGS_NUM;
1256 if( mem_parm_offset < 0 ) {
1257 return as_oRegister(i)->as_VMReg();
1258 } else {
1259 int actual_offset = (mem_parm_offset + frame::memory_parameter_word_sp_offset) * VMRegImpl::slots_per_word;
1260 // Now return a biased offset that will be correct when out_preserve_slots is added back in
1261 return VMRegImpl::stack2reg(actual_offset - SharedRuntime::out_preserve_stack_slots());
1262 }
1263 }
1266 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
1267 VMRegPair *regs,
1268 int total_args_passed) {
1270 // Return the number of VMReg stack_slots needed for the args.
1271 // This value does not include an abi space (like register window
1272 // save area).
1274 // The native convention is V8 if !LP64
1275 // The LP64 convention is the V9 convention which is slightly more sane.
1277 // We return the amount of VMReg stack slots we need to reserve for all
1278 // the arguments NOT counting out_preserve_stack_slots. Since we always
1279 // have space for storing at least 6 registers to memory we start with that.
1280 // See int_stk_helper for a further discussion.
1281 int max_stack_slots = (frame::varargs_offset * VMRegImpl::slots_per_word) - SharedRuntime::out_preserve_stack_slots();
1283 #ifdef _LP64
1284 // V9 convention: All things "as-if" on double-wide stack slots.
1285 // Hoist any int/ptr/long's in the first 6 to int regs.
1286 // Hoist any flt/dbl's in the first 16 dbl regs.
1287 int j = 0; // Count of actual args, not HALVES
1288 for( int i=0; i<total_args_passed; i++, j++ ) {
1289 switch( sig_bt[i] ) {
1290 case T_BOOLEAN:
1291 case T_BYTE:
1292 case T_CHAR:
1293 case T_INT:
1294 case T_SHORT:
1295 regs[i].set1( int_stk_helper( j ) ); break;
1296 case T_LONG:
1297 assert( sig_bt[i+1] == T_VOID, "expecting half" );
1298 case T_ADDRESS: // raw pointers, like current thread, for VM calls
1299 case T_ARRAY:
1300 case T_OBJECT:
1301 regs[i].set2( int_stk_helper( j ) );
1302 break;
1303 case T_FLOAT:
1304 if ( j < 16 ) {
1305 // V9ism: floats go in ODD registers
1306 regs[i].set1(as_FloatRegister(1 + (j<<1))->as_VMReg());
1307 } else {
1308 // V9ism: floats go in ODD stack slot
1309 regs[i].set1(VMRegImpl::stack2reg(1 + (j<<1)));
1310 }
1311 break;
1312 case T_DOUBLE:
1313 assert( sig_bt[i+1] == T_VOID, "expecting half" );
1314 if ( j < 16 ) {
1315 // V9ism: doubles go in EVEN/ODD regs
1316 regs[i].set2(as_FloatRegister(j<<1)->as_VMReg());
1317 } else {
1318 // V9ism: doubles go in EVEN/ODD stack slots
1319 regs[i].set2(VMRegImpl::stack2reg(j<<1));
1320 }
1321 break;
1322 case T_VOID: regs[i].set_bad(); j--; break; // Do not count HALVES
1323 default:
1324 ShouldNotReachHere();
1325 }
1326 if (regs[i].first()->is_stack()) {
1327 int off = regs[i].first()->reg2stack();
1328 if (off > max_stack_slots) max_stack_slots = off;
1329 }
1330 if (regs[i].second()->is_stack()) {
1331 int off = regs[i].second()->reg2stack();
1332 if (off > max_stack_slots) max_stack_slots = off;
1333 }
1334 }
1336 #else // _LP64
1337 // V8 convention: first 6 things in O-regs, rest on stack.
1338 // Alignment is willy-nilly.
1339 for( int i=0; i<total_args_passed; i++ ) {
1340 switch( sig_bt[i] ) {
1341 case T_ADDRESS: // raw pointers, like current thread, for VM calls
1342 case T_ARRAY:
1343 case T_BOOLEAN:
1344 case T_BYTE:
1345 case T_CHAR:
1346 case T_FLOAT:
1347 case T_INT:
1348 case T_OBJECT:
1349 case T_SHORT:
1350 regs[i].set1( int_stk_helper( i ) );
1351 break;
1352 case T_DOUBLE:
1353 case T_LONG:
1354 assert( sig_bt[i+1] == T_VOID, "expecting half" );
1355 regs[i].set_pair( int_stk_helper( i+1 ), int_stk_helper( i ) );
1356 break;
1357 case T_VOID: regs[i].set_bad(); break;
1358 default:
1359 ShouldNotReachHere();
1360 }
1361 if (regs[i].first()->is_stack()) {
1362 int off = regs[i].first()->reg2stack();
1363 if (off > max_stack_slots) max_stack_slots = off;
1364 }
1365 if (regs[i].second()->is_stack()) {
1366 int off = regs[i].second()->reg2stack();
1367 if (off > max_stack_slots) max_stack_slots = off;
1368 }
1369 }
1370 #endif // _LP64
1372 return round_to(max_stack_slots + 1, 2);
1374 }
1377 // ---------------------------------------------------------------------------
1378 void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1379 switch (ret_type) {
1380 case T_FLOAT:
1381 __ stf(FloatRegisterImpl::S, F0, SP, frame_slots*VMRegImpl::stack_slot_size - 4+STACK_BIAS);
1382 break;
1383 case T_DOUBLE:
1384 __ stf(FloatRegisterImpl::D, F0, SP, frame_slots*VMRegImpl::stack_slot_size - 8+STACK_BIAS);
1385 break;
1386 }
1387 }
1389 void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1390 switch (ret_type) {
1391 case T_FLOAT:
1392 __ ldf(FloatRegisterImpl::S, SP, frame_slots*VMRegImpl::stack_slot_size - 4+STACK_BIAS, F0);
1393 break;
1394 case T_DOUBLE:
1395 __ ldf(FloatRegisterImpl::D, SP, frame_slots*VMRegImpl::stack_slot_size - 8+STACK_BIAS, F0);
1396 break;
1397 }
1398 }
1400 // Check and forward and pending exception. Thread is stored in
1401 // L7_thread_cache and possibly NOT in G2_thread. Since this is a native call, there
1402 // is no exception handler. We merely pop this frame off and throw the
1403 // exception in the caller's frame.
1404 static void check_forward_pending_exception(MacroAssembler *masm, Register Rex_oop) {
1405 Label L;
1406 __ br_null(Rex_oop, false, Assembler::pt, L);
1407 __ delayed()->mov(L7_thread_cache, G2_thread); // restore in case we have exception
1408 // Since this is a native call, we *know* the proper exception handler
1409 // without calling into the VM: it's the empty function. Just pop this
1410 // frame and then jump to forward_exception_entry; O7 will contain the
1411 // native caller's return PC.
1412 AddressLiteral exception_entry(StubRoutines::forward_exception_entry());
1413 __ jump_to(exception_entry, G3_scratch);
1414 __ delayed()->restore(); // Pop this frame off.
1415 __ bind(L);
1416 }
1418 // A simple move of integer like type
1419 static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1420 if (src.first()->is_stack()) {
1421 if (dst.first()->is_stack()) {
1422 // stack to stack
1423 __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5);
1424 __ st(L5, SP, reg2offset(dst.first()) + STACK_BIAS);
1425 } else {
1426 // stack to reg
1427 __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());
1428 }
1429 } else if (dst.first()->is_stack()) {
1430 // reg to stack
1431 __ st(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS);
1432 } else {
1433 __ mov(src.first()->as_Register(), dst.first()->as_Register());
1434 }
1435 }
1437 // On 64 bit we will store integer like items to the stack as
1438 // 64 bits items (sparc abi) even though java would only store
1439 // 32bits for a parameter. On 32bit it will simply be 32 bits
1440 // So this routine will do 32->32 on 32bit and 32->64 on 64bit
1441 static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1442 if (src.first()->is_stack()) {
1443 if (dst.first()->is_stack()) {
1444 // stack to stack
1445 __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5);
1446 __ st_ptr(L5, SP, reg2offset(dst.first()) + STACK_BIAS);
1447 } else {
1448 // stack to reg
1449 __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());
1450 }
1451 } else if (dst.first()->is_stack()) {
1452 // reg to stack
1453 __ st_ptr(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS);
1454 } else {
1455 __ mov(src.first()->as_Register(), dst.first()->as_Register());
1456 }
1457 }
1460 // An oop arg. Must pass a handle not the oop itself
1461 static void object_move(MacroAssembler* masm,
1462 OopMap* map,
1463 int oop_handle_offset,
1464 int framesize_in_slots,
1465 VMRegPair src,
1466 VMRegPair dst,
1467 bool is_receiver,
1468 int* receiver_offset) {
1470 // must pass a handle. First figure out the location we use as a handle
1472 if (src.first()->is_stack()) {
1473 // Oop is already on the stack
1474 Register rHandle = dst.first()->is_stack() ? L5 : dst.first()->as_Register();
1475 __ add(FP, reg2offset(src.first()) + STACK_BIAS, rHandle);
1476 __ ld_ptr(rHandle, 0, L4);
1477 #ifdef _LP64
1478 __ movr( Assembler::rc_z, L4, G0, rHandle );
1479 #else
1480 __ tst( L4 );
1481 __ movcc( Assembler::zero, false, Assembler::icc, G0, rHandle );
1482 #endif
1483 if (dst.first()->is_stack()) {
1484 __ st_ptr(rHandle, SP, reg2offset(dst.first()) + STACK_BIAS);
1485 }
1486 int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
1487 if (is_receiver) {
1488 *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;
1489 }
1490 map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
1491 } else {
1492 // Oop is in an input register pass we must flush it to the stack
1493 const Register rOop = src.first()->as_Register();
1494 const Register rHandle = L5;
1495 int oop_slot = rOop->input_number() * VMRegImpl::slots_per_word + oop_handle_offset;
1496 int offset = oop_slot*VMRegImpl::stack_slot_size;
1497 Label skip;
1498 __ st_ptr(rOop, SP, offset + STACK_BIAS);
1499 if (is_receiver) {
1500 *receiver_offset = oop_slot * VMRegImpl::stack_slot_size;
1501 }
1502 map->set_oop(VMRegImpl::stack2reg(oop_slot));
1503 __ add(SP, offset + STACK_BIAS, rHandle);
1504 #ifdef _LP64
1505 __ movr( Assembler::rc_z, rOop, G0, rHandle );
1506 #else
1507 __ tst( rOop );
1508 __ movcc( Assembler::zero, false, Assembler::icc, G0, rHandle );
1509 #endif
1511 if (dst.first()->is_stack()) {
1512 __ st_ptr(rHandle, SP, reg2offset(dst.first()) + STACK_BIAS);
1513 } else {
1514 __ mov(rHandle, dst.first()->as_Register());
1515 }
1516 }
1517 }
1519 // A float arg may have to do float reg int reg conversion
1520 static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1521 assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move");
1523 if (src.first()->is_stack()) {
1524 if (dst.first()->is_stack()) {
1525 // stack to stack the easiest of the bunch
1526 __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5);
1527 __ st(L5, SP, reg2offset(dst.first()) + STACK_BIAS);
1528 } else {
1529 // stack to reg
1530 if (dst.first()->is_Register()) {
1531 __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());
1532 } else {
1533 __ ldf(FloatRegisterImpl::S, FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_FloatRegister());
1534 }
1535 }
1536 } else if (dst.first()->is_stack()) {
1537 // reg to stack
1538 if (src.first()->is_Register()) {
1539 __ st(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS);
1540 } else {
1541 __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), SP, reg2offset(dst.first()) + STACK_BIAS);
1542 }
1543 } else {
1544 // reg to reg
1545 if (src.first()->is_Register()) {
1546 if (dst.first()->is_Register()) {
1547 // gpr -> gpr
1548 __ mov(src.first()->as_Register(), dst.first()->as_Register());
1549 } else {
1550 // gpr -> fpr
1551 __ st(src.first()->as_Register(), FP, -4 + STACK_BIAS);
1552 __ ldf(FloatRegisterImpl::S, FP, -4 + STACK_BIAS, dst.first()->as_FloatRegister());
1553 }
1554 } else if (dst.first()->is_Register()) {
1555 // fpr -> gpr
1556 __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), FP, -4 + STACK_BIAS);
1557 __ ld(FP, -4 + STACK_BIAS, dst.first()->as_Register());
1558 } else {
1559 // fpr -> fpr
1560 // In theory these overlap but the ordering is such that this is likely a nop
1561 if ( src.first() != dst.first()) {
1562 __ fmov(FloatRegisterImpl::S, src.first()->as_FloatRegister(), dst.first()->as_FloatRegister());
1563 }
1564 }
1565 }
1566 }
1568 static void split_long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1569 VMRegPair src_lo(src.first());
1570 VMRegPair src_hi(src.second());
1571 VMRegPair dst_lo(dst.first());
1572 VMRegPair dst_hi(dst.second());
1573 simple_move32(masm, src_lo, dst_lo);
1574 simple_move32(masm, src_hi, dst_hi);
1575 }
1577 // A long move
1578 static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1580 // Do the simple ones here else do two int moves
1581 if (src.is_single_phys_reg() ) {
1582 if (dst.is_single_phys_reg()) {
1583 __ mov(src.first()->as_Register(), dst.first()->as_Register());
1584 } else {
1585 // split src into two separate registers
1586 // Remember hi means hi address or lsw on sparc
1587 // Move msw to lsw
1588 if (dst.second()->is_reg()) {
1589 // MSW -> MSW
1590 __ srax(src.first()->as_Register(), 32, dst.first()->as_Register());
1591 // Now LSW -> LSW
1592 // this will only move lo -> lo and ignore hi
1593 VMRegPair split(dst.second());
1594 simple_move32(masm, src, split);
1595 } else {
1596 VMRegPair split(src.first(), L4->as_VMReg());
1597 // MSW -> MSW (lo ie. first word)
1598 __ srax(src.first()->as_Register(), 32, L4);
1599 split_long_move(masm, split, dst);
1600 }
1601 }
1602 } else if (dst.is_single_phys_reg()) {
1603 if (src.is_adjacent_aligned_on_stack(2)) {
1604 __ ldx(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());
1605 } else {
1606 // dst is a single reg.
1607 // Remember lo is low address not msb for stack slots
1608 // and lo is the "real" register for registers
1609 // src is
1611 VMRegPair split;
1613 if (src.first()->is_reg()) {
1614 // src.lo (msw) is a reg, src.hi is stk/reg
1615 // we will move: src.hi (LSW) -> dst.lo, src.lo (MSW) -> src.lo [the MSW is in the LSW of the reg]
1616 split.set_pair(dst.first(), src.first());
1617 } else {
1618 // msw is stack move to L5
1619 // lsw is stack move to dst.lo (real reg)
1620 // we will move: src.hi (LSW) -> dst.lo, src.lo (MSW) -> L5
1621 split.set_pair(dst.first(), L5->as_VMReg());
1622 }
1624 // src.lo -> src.lo/L5, src.hi -> dst.lo (the real reg)
1625 // msw -> src.lo/L5, lsw -> dst.lo
1626 split_long_move(masm, src, split);
1628 // So dst now has the low order correct position the
1629 // msw half
1630 __ sllx(split.first()->as_Register(), 32, L5);
1632 const Register d = dst.first()->as_Register();
1633 __ or3(L5, d, d);
1634 }
1635 } else {
1636 // For LP64 we can probably do better.
1637 split_long_move(masm, src, dst);
1638 }
1639 }
1641 // A double move
1642 static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1644 // The painful thing here is that like long_move a VMRegPair might be
1645 // 1: a single physical register
1646 // 2: two physical registers (v8)
1647 // 3: a physical reg [lo] and a stack slot [hi] (v8)
1648 // 4: two stack slots
1650 // Since src is always a java calling convention we know that the src pair
1651 // is always either all registers or all stack (and aligned?)
1653 // in a register [lo] and a stack slot [hi]
1654 if (src.first()->is_stack()) {
1655 if (dst.first()->is_stack()) {
1656 // stack to stack the easiest of the bunch
1657 // ought to be a way to do this where if alignment is ok we use ldd/std when possible
1658 __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5);
1659 __ ld(FP, reg2offset(src.second()) + STACK_BIAS, L4);
1660 __ st(L5, SP, reg2offset(dst.first()) + STACK_BIAS);
1661 __ st(L4, SP, reg2offset(dst.second()) + STACK_BIAS);
1662 } else {
1663 // stack to reg
1664 if (dst.second()->is_stack()) {
1665 // stack -> reg, stack -> stack
1666 __ ld(FP, reg2offset(src.second()) + STACK_BIAS, L4);
1667 if (dst.first()->is_Register()) {
1668 __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());
1669 } else {
1670 __ ldf(FloatRegisterImpl::S, FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_FloatRegister());
1671 }
1672 // This was missing. (very rare case)
1673 __ st(L4, SP, reg2offset(dst.second()) + STACK_BIAS);
1674 } else {
1675 // stack -> reg
1676 // Eventually optimize for alignment QQQ
1677 if (dst.first()->is_Register()) {
1678 __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());
1679 __ ld(FP, reg2offset(src.second()) + STACK_BIAS, dst.second()->as_Register());
1680 } else {
1681 __ ldf(FloatRegisterImpl::S, FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_FloatRegister());
1682 __ ldf(FloatRegisterImpl::S, FP, reg2offset(src.second()) + STACK_BIAS, dst.second()->as_FloatRegister());
1683 }
1684 }
1685 }
1686 } else if (dst.first()->is_stack()) {
1687 // reg to stack
1688 if (src.first()->is_Register()) {
1689 // Eventually optimize for alignment QQQ
1690 __ st(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS);
1691 if (src.second()->is_stack()) {
1692 __ ld(FP, reg2offset(src.second()) + STACK_BIAS, L4);
1693 __ st(L4, SP, reg2offset(dst.second()) + STACK_BIAS);
1694 } else {
1695 __ st(src.second()->as_Register(), SP, reg2offset(dst.second()) + STACK_BIAS);
1696 }
1697 } else {
1698 // fpr to stack
1699 if (src.second()->is_stack()) {
1700 ShouldNotReachHere();
1701 } else {
1702 // Is the stack aligned?
1703 if (reg2offset(dst.first()) & 0x7) {
1704 // No do as pairs
1705 __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), SP, reg2offset(dst.first()) + STACK_BIAS);
1706 __ stf(FloatRegisterImpl::S, src.second()->as_FloatRegister(), SP, reg2offset(dst.second()) + STACK_BIAS);
1707 } else {
1708 __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), SP, reg2offset(dst.first()) + STACK_BIAS);
1709 }
1710 }
1711 }
1712 } else {
1713 // reg to reg
1714 if (src.first()->is_Register()) {
1715 if (dst.first()->is_Register()) {
1716 // gpr -> gpr
1717 __ mov(src.first()->as_Register(), dst.first()->as_Register());
1718 __ mov(src.second()->as_Register(), dst.second()->as_Register());
1719 } else {
1720 // gpr -> fpr
1721 // ought to be able to do a single store
1722 __ stx(src.first()->as_Register(), FP, -8 + STACK_BIAS);
1723 __ stx(src.second()->as_Register(), FP, -4 + STACK_BIAS);
1724 // ought to be able to do a single load
1725 __ ldf(FloatRegisterImpl::S, FP, -8 + STACK_BIAS, dst.first()->as_FloatRegister());
1726 __ ldf(FloatRegisterImpl::S, FP, -4 + STACK_BIAS, dst.second()->as_FloatRegister());
1727 }
1728 } else if (dst.first()->is_Register()) {
1729 // fpr -> gpr
1730 // ought to be able to do a single store
1731 __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), FP, -8 + STACK_BIAS);
1732 // ought to be able to do a single load
1733 // REMEMBER first() is low address not LSB
1734 __ ld(FP, -8 + STACK_BIAS, dst.first()->as_Register());
1735 if (dst.second()->is_Register()) {
1736 __ ld(FP, -4 + STACK_BIAS, dst.second()->as_Register());
1737 } else {
1738 __ ld(FP, -4 + STACK_BIAS, L4);
1739 __ st(L4, SP, reg2offset(dst.second()) + STACK_BIAS);
1740 }
1741 } else {
1742 // fpr -> fpr
1743 // In theory these overlap but the ordering is such that this is likely a nop
1744 if ( src.first() != dst.first()) {
1745 __ fmov(FloatRegisterImpl::D, src.first()->as_FloatRegister(), dst.first()->as_FloatRegister());
1746 }
1747 }
1748 }
1749 }
1751 // Creates an inner frame if one hasn't already been created, and
1752 // saves a copy of the thread in L7_thread_cache
1753 static void create_inner_frame(MacroAssembler* masm, bool* already_created) {
1754 if (!*already_created) {
1755 __ save_frame(0);
1756 // Save thread in L7 (INNER FRAME); it crosses a bunch of VM calls below
1757 // Don't use save_thread because it smashes G2 and we merely want to save a
1758 // copy
1759 __ mov(G2_thread, L7_thread_cache);
1760 *already_created = true;
1761 }
1762 }
1764 // ---------------------------------------------------------------------------
1765 // Generate a native wrapper for a given method. The method takes arguments
1766 // in the Java compiled code convention, marshals them to the native
1767 // convention (handlizes oops, etc), transitions to native, makes the call,
1768 // returns to java state (possibly blocking), unhandlizes any result and
1769 // returns.
1770 nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
1771 methodHandle method,
1772 int total_in_args,
1773 int comp_args_on_stack, // in VMRegStackSlots
1774 BasicType *in_sig_bt,
1775 VMRegPair *in_regs,
1776 BasicType ret_type) {
1778 // Native nmethod wrappers never take possesion of the oop arguments.
1779 // So the caller will gc the arguments. The only thing we need an
1780 // oopMap for is if the call is static
1781 //
1782 // An OopMap for lock (and class if static), and one for the VM call itself
1783 OopMapSet *oop_maps = new OopMapSet();
1784 intptr_t start = (intptr_t)__ pc();
1786 // First thing make an ic check to see if we should even be here
1787 {
1788 Label L;
1789 const Register temp_reg = G3_scratch;
1790 AddressLiteral ic_miss(SharedRuntime::get_ic_miss_stub());
1791 __ verify_oop(O0);
1792 __ load_klass(O0, temp_reg);
1793 __ cmp(temp_reg, G5_inline_cache_reg);
1794 __ brx(Assembler::equal, true, Assembler::pt, L);
1795 __ delayed()->nop();
1797 __ jump_to(ic_miss, temp_reg);
1798 __ delayed()->nop();
1799 __ align(CodeEntryAlignment);
1800 __ bind(L);
1801 }
1803 int vep_offset = ((intptr_t)__ pc()) - start;
1805 #ifdef COMPILER1
1806 if (InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) {
1807 // Object.hashCode can pull the hashCode from the header word
1808 // instead of doing a full VM transition once it's been computed.
1809 // Since hashCode is usually polymorphic at call sites we can't do
1810 // this optimization at the call site without a lot of work.
1811 Label slowCase;
1812 Register receiver = O0;
1813 Register result = O0;
1814 Register header = G3_scratch;
1815 Register hash = G3_scratch; // overwrite header value with hash value
1816 Register mask = G1; // to get hash field from header
1818 // Read the header and build a mask to get its hash field. Give up if the object is not unlocked.
1819 // We depend on hash_mask being at most 32 bits and avoid the use of
1820 // hash_mask_in_place because it could be larger than 32 bits in a 64-bit
1821 // vm: see markOop.hpp.
1822 __ ld_ptr(receiver, oopDesc::mark_offset_in_bytes(), header);
1823 __ sethi(markOopDesc::hash_mask, mask);
1824 __ btst(markOopDesc::unlocked_value, header);
1825 __ br(Assembler::zero, false, Assembler::pn, slowCase);
1826 if (UseBiasedLocking) {
1827 // Check if biased and fall through to runtime if so
1828 __ delayed()->nop();
1829 __ btst(markOopDesc::biased_lock_bit_in_place, header);
1830 __ br(Assembler::notZero, false, Assembler::pn, slowCase);
1831 }
1832 __ delayed()->or3(mask, markOopDesc::hash_mask & 0x3ff, mask);
1834 // Check for a valid (non-zero) hash code and get its value.
1835 #ifdef _LP64
1836 __ srlx(header, markOopDesc::hash_shift, hash);
1837 #else
1838 __ srl(header, markOopDesc::hash_shift, hash);
1839 #endif
1840 __ andcc(hash, mask, hash);
1841 __ br(Assembler::equal, false, Assembler::pn, slowCase);
1842 __ delayed()->nop();
1844 // leaf return.
1845 __ retl();
1846 __ delayed()->mov(hash, result);
1847 __ bind(slowCase);
1848 }
1849 #endif // COMPILER1
1852 // We have received a description of where all the java arg are located
1853 // on entry to the wrapper. We need to convert these args to where
1854 // the jni function will expect them. To figure out where they go
1855 // we convert the java signature to a C signature by inserting
1856 // the hidden arguments as arg[0] and possibly arg[1] (static method)
1858 int total_c_args = total_in_args + 1;
1859 if (method->is_static()) {
1860 total_c_args++;
1861 }
1863 BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
1864 VMRegPair * out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
1866 int argc = 0;
1867 out_sig_bt[argc++] = T_ADDRESS;
1868 if (method->is_static()) {
1869 out_sig_bt[argc++] = T_OBJECT;
1870 }
1872 for (int i = 0; i < total_in_args ; i++ ) {
1873 out_sig_bt[argc++] = in_sig_bt[i];
1874 }
1876 // Now figure out where the args must be stored and how much stack space
1877 // they require (neglecting out_preserve_stack_slots but space for storing
1878 // the 1st six register arguments). It's weird see int_stk_helper.
1879 //
1880 int out_arg_slots;
1881 out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);
1883 // Compute framesize for the wrapper. We need to handlize all oops in
1884 // registers. We must create space for them here that is disjoint from
1885 // the windowed save area because we have no control over when we might
1886 // flush the window again and overwrite values that gc has since modified.
1887 // (The live window race)
1888 //
1889 // We always just allocate 6 word for storing down these object. This allow
1890 // us to simply record the base and use the Ireg number to decide which
1891 // slot to use. (Note that the reg number is the inbound number not the
1892 // outbound number).
1893 // We must shuffle args to match the native convention, and include var-args space.
1895 // Calculate the total number of stack slots we will need.
1897 // First count the abi requirement plus all of the outgoing args
1898 int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
1900 // Now the space for the inbound oop handle area
1902 int oop_handle_offset = stack_slots;
1903 stack_slots += 6*VMRegImpl::slots_per_word;
1905 // Now any space we need for handlizing a klass if static method
1907 int oop_temp_slot_offset = 0;
1908 int klass_slot_offset = 0;
1909 int klass_offset = -1;
1910 int lock_slot_offset = 0;
1911 bool is_static = false;
1913 if (method->is_static()) {
1914 klass_slot_offset = stack_slots;
1915 stack_slots += VMRegImpl::slots_per_word;
1916 klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
1917 is_static = true;
1918 }
1920 // Plus a lock if needed
1922 if (method->is_synchronized()) {
1923 lock_slot_offset = stack_slots;
1924 stack_slots += VMRegImpl::slots_per_word;
1925 }
1927 // Now a place to save return value or as a temporary for any gpr -> fpr moves
1928 stack_slots += 2;
1930 // Ok The space we have allocated will look like:
1931 //
1932 //
1933 // FP-> | |
1934 // |---------------------|
1935 // | 2 slots for moves |
1936 // |---------------------|
1937 // | lock box (if sync) |
1938 // |---------------------| <- lock_slot_offset
1939 // | klass (if static) |
1940 // |---------------------| <- klass_slot_offset
1941 // | oopHandle area |
1942 // |---------------------| <- oop_handle_offset
1943 // | outbound memory |
1944 // | based arguments |
1945 // | |
1946 // |---------------------|
1947 // | vararg area |
1948 // |---------------------|
1949 // | |
1950 // SP-> | out_preserved_slots |
1951 //
1952 //
1955 // Now compute actual number of stack words we need rounding to make
1956 // stack properly aligned.
1957 stack_slots = round_to(stack_slots, 2 * VMRegImpl::slots_per_word);
1959 int stack_size = stack_slots * VMRegImpl::stack_slot_size;
1961 // Generate stack overflow check before creating frame
1962 __ generate_stack_overflow_check(stack_size);
1964 // Generate a new frame for the wrapper.
1965 __ save(SP, -stack_size, SP);
1967 int frame_complete = ((intptr_t)__ pc()) - start;
1969 __ verify_thread();
1972 //
1973 // We immediately shuffle the arguments so that any vm call we have to
1974 // make from here on out (sync slow path, jvmti, etc.) we will have
1975 // captured the oops from our caller and have a valid oopMap for
1976 // them.
1978 // -----------------
1979 // The Grand Shuffle
1980 //
1981 // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
1982 // (derived from JavaThread* which is in L7_thread_cache) and, if static,
1983 // the class mirror instead of a receiver. This pretty much guarantees that
1984 // register layout will not match. We ignore these extra arguments during
1985 // the shuffle. The shuffle is described by the two calling convention
1986 // vectors we have in our possession. We simply walk the java vector to
1987 // get the source locations and the c vector to get the destinations.
1988 // Because we have a new window and the argument registers are completely
1989 // disjoint ( I0 -> O1, I1 -> O2, ...) we have nothing to worry about
1990 // here.
1992 // This is a trick. We double the stack slots so we can claim
1993 // the oops in the caller's frame. Since we are sure to have
1994 // more args than the caller doubling is enough to make
1995 // sure we can capture all the incoming oop args from the
1996 // caller.
1997 //
1998 OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1999 int c_arg = total_c_args - 1;
2000 // Record sp-based slot for receiver on stack for non-static methods
2001 int receiver_offset = -1;
2003 // We move the arguments backward because the floating point registers
2004 // destination will always be to a register with a greater or equal register
2005 // number or the stack.
2007 #ifdef ASSERT
2008 bool reg_destroyed[RegisterImpl::number_of_registers];
2009 bool freg_destroyed[FloatRegisterImpl::number_of_registers];
2010 for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
2011 reg_destroyed[r] = false;
2012 }
2013 for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
2014 freg_destroyed[f] = false;
2015 }
2017 #endif /* ASSERT */
2019 for ( int i = total_in_args - 1; i >= 0 ; i--, c_arg-- ) {
2021 #ifdef ASSERT
2022 if (in_regs[i].first()->is_Register()) {
2023 assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "ack!");
2024 } else if (in_regs[i].first()->is_FloatRegister()) {
2025 assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding(FloatRegisterImpl::S)], "ack!");
2026 }
2027 if (out_regs[c_arg].first()->is_Register()) {
2028 reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
2029 } else if (out_regs[c_arg].first()->is_FloatRegister()) {
2030 freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding(FloatRegisterImpl::S)] = true;
2031 }
2032 #endif /* ASSERT */
2034 switch (in_sig_bt[i]) {
2035 case T_ARRAY:
2036 case T_OBJECT:
2037 object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
2038 ((i == 0) && (!is_static)),
2039 &receiver_offset);
2040 break;
2041 case T_VOID:
2042 break;
2044 case T_FLOAT:
2045 float_move(masm, in_regs[i], out_regs[c_arg]);
2046 break;
2048 case T_DOUBLE:
2049 assert( i + 1 < total_in_args &&
2050 in_sig_bt[i + 1] == T_VOID &&
2051 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
2052 double_move(masm, in_regs[i], out_regs[c_arg]);
2053 break;
2055 case T_LONG :
2056 long_move(masm, in_regs[i], out_regs[c_arg]);
2057 break;
2059 case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
2061 default:
2062 move32_64(masm, in_regs[i], out_regs[c_arg]);
2063 }
2064 }
2066 // Pre-load a static method's oop into O1. Used both by locking code and
2067 // the normal JNI call code.
2068 if (method->is_static()) {
2069 __ set_oop_constant(JNIHandles::make_local(Klass::cast(method->method_holder())->java_mirror()), O1);
2071 // Now handlize the static class mirror in O1. It's known not-null.
2072 __ st_ptr(O1, SP, klass_offset + STACK_BIAS);
2073 map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
2074 __ add(SP, klass_offset + STACK_BIAS, O1);
2075 }
2078 const Register L6_handle = L6;
2080 if (method->is_synchronized()) {
2081 __ mov(O1, L6_handle);
2082 }
2084 // We have all of the arguments setup at this point. We MUST NOT touch any Oregs
2085 // except O6/O7. So if we must call out we must push a new frame. We immediately
2086 // push a new frame and flush the windows.
2088 #ifdef _LP64
2089 intptr_t thepc = (intptr_t) __ pc();
2090 {
2091 address here = __ pc();
2092 // Call the next instruction
2093 __ call(here + 8, relocInfo::none);
2094 __ delayed()->nop();
2095 }
2096 #else
2097 intptr_t thepc = __ load_pc_address(O7, 0);
2098 #endif /* _LP64 */
2100 // We use the same pc/oopMap repeatedly when we call out
2101 oop_maps->add_gc_map(thepc - start, map);
2103 // O7 now has the pc loaded that we will use when we finally call to native.
2105 // Save thread in L7; it crosses a bunch of VM calls below
2106 // Don't use save_thread because it smashes G2 and we merely
2107 // want to save a copy
2108 __ mov(G2_thread, L7_thread_cache);
2111 // If we create an inner frame once is plenty
2112 // when we create it we must also save G2_thread
2113 bool inner_frame_created = false;
2115 // dtrace method entry support
2116 {
2117 SkipIfEqual skip_if(
2118 masm, G3_scratch, &DTraceMethodProbes, Assembler::zero);
2119 // create inner frame
2120 __ save_frame(0);
2121 __ mov(G2_thread, L7_thread_cache);
2122 __ set_oop_constant(JNIHandles::make_local(method()), O1);
2123 __ call_VM_leaf(L7_thread_cache,
2124 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
2125 G2_thread, O1);
2126 __ restore();
2127 }
2129 // RedefineClasses() tracing support for obsolete method entry
2130 if (RC_TRACE_IN_RANGE(0x00001000, 0x00002000)) {
2131 // create inner frame
2132 __ save_frame(0);
2133 __ mov(G2_thread, L7_thread_cache);
2134 __ set_oop_constant(JNIHandles::make_local(method()), O1);
2135 __ call_VM_leaf(L7_thread_cache,
2136 CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),
2137 G2_thread, O1);
2138 __ restore();
2139 }
2141 // We are in the jni frame unless saved_frame is true in which case
2142 // we are in one frame deeper (the "inner" frame). If we are in the
2143 // "inner" frames the args are in the Iregs and if the jni frame then
2144 // they are in the Oregs.
2145 // If we ever need to go to the VM (for locking, jvmti) then
2146 // we will always be in the "inner" frame.
2148 // Lock a synchronized method
2149 int lock_offset = -1; // Set if locked
2150 if (method->is_synchronized()) {
2151 Register Roop = O1;
2152 const Register L3_box = L3;
2154 create_inner_frame(masm, &inner_frame_created);
2156 __ ld_ptr(I1, 0, O1);
2157 Label done;
2159 lock_offset = (lock_slot_offset * VMRegImpl::stack_slot_size);
2160 __ add(FP, lock_offset+STACK_BIAS, L3_box);
2161 #ifdef ASSERT
2162 if (UseBiasedLocking) {
2163 // making the box point to itself will make it clear it went unused
2164 // but also be obviously invalid
2165 __ st_ptr(L3_box, L3_box, 0);
2166 }
2167 #endif // ASSERT
2168 //
2169 // Compiler_lock_object (Roop, Rmark, Rbox, Rscratch) -- kills Rmark, Rbox, Rscratch
2170 //
2171 __ compiler_lock_object(Roop, L1, L3_box, L2);
2172 __ br(Assembler::equal, false, Assembler::pt, done);
2173 __ delayed() -> add(FP, lock_offset+STACK_BIAS, L3_box);
2176 // None of the above fast optimizations worked so we have to get into the
2177 // slow case of monitor enter. Inline a special case of call_VM that
2178 // disallows any pending_exception.
2179 __ mov(Roop, O0); // Need oop in O0
2180 __ mov(L3_box, O1);
2182 // Record last_Java_sp, in case the VM code releases the JVM lock.
2184 __ set_last_Java_frame(FP, I7);
2186 // do the call
2187 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type);
2188 __ delayed()->mov(L7_thread_cache, O2);
2190 __ restore_thread(L7_thread_cache); // restore G2_thread
2191 __ reset_last_Java_frame();
2193 #ifdef ASSERT
2194 { Label L;
2195 __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O0);
2196 __ br_null(O0, false, Assembler::pt, L);
2197 __ delayed()->nop();
2198 __ stop("no pending exception allowed on exit from IR::monitorenter");
2199 __ bind(L);
2200 }
2201 #endif
2202 __ bind(done);
2203 }
2206 // Finally just about ready to make the JNI call
2208 __ flush_windows();
2209 if (inner_frame_created) {
2210 __ restore();
2211 } else {
2212 // Store only what we need from this frame
2213 // QQQ I think that non-v9 (like we care) we don't need these saves
2214 // either as the flush traps and the current window goes too.
2215 __ st_ptr(FP, SP, FP->sp_offset_in_saved_window()*wordSize + STACK_BIAS);
2216 __ st_ptr(I7, SP, I7->sp_offset_in_saved_window()*wordSize + STACK_BIAS);
2217 }
2219 // get JNIEnv* which is first argument to native
2221 __ add(G2_thread, in_bytes(JavaThread::jni_environment_offset()), O0);
2223 // Use that pc we placed in O7 a while back as the current frame anchor
2225 __ set_last_Java_frame(SP, O7);
2227 // Transition from _thread_in_Java to _thread_in_native.
2228 __ set(_thread_in_native, G3_scratch);
2229 __ st(G3_scratch, G2_thread, JavaThread::thread_state_offset());
2231 // We flushed the windows ages ago now mark them as flushed
2233 // mark windows as flushed
2234 __ set(JavaFrameAnchor::flushed, G3_scratch);
2236 Address flags(G2_thread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::flags_offset());
2238 #ifdef _LP64
2239 AddressLiteral dest(method->native_function());
2240 __ relocate(relocInfo::runtime_call_type);
2241 __ jumpl_to(dest, O7, O7);
2242 #else
2243 __ call(method->native_function(), relocInfo::runtime_call_type);
2244 #endif
2245 __ delayed()->st(G3_scratch, flags);
2247 __ restore_thread(L7_thread_cache); // restore G2_thread
2249 // Unpack native results. For int-types, we do any needed sign-extension
2250 // and move things into I0. The return value there will survive any VM
2251 // calls for blocking or unlocking. An FP or OOP result (handle) is done
2252 // specially in the slow-path code.
2253 switch (ret_type) {
2254 case T_VOID: break; // Nothing to do!
2255 case T_FLOAT: break; // Got it where we want it (unless slow-path)
2256 case T_DOUBLE: break; // Got it where we want it (unless slow-path)
2257 // In 64 bits build result is in O0, in O0, O1 in 32bit build
2258 case T_LONG:
2259 #ifndef _LP64
2260 __ mov(O1, I1);
2261 #endif
2262 // Fall thru
2263 case T_OBJECT: // Really a handle
2264 case T_ARRAY:
2265 case T_INT:
2266 __ mov(O0, I0);
2267 break;
2268 case T_BOOLEAN: __ subcc(G0, O0, G0); __ addc(G0, 0, I0); break; // !0 => true; 0 => false
2269 case T_BYTE : __ sll(O0, 24, O0); __ sra(O0, 24, I0); break;
2270 case T_CHAR : __ sll(O0, 16, O0); __ srl(O0, 16, I0); break; // cannot use and3, 0xFFFF too big as immediate value!
2271 case T_SHORT : __ sll(O0, 16, O0); __ sra(O0, 16, I0); break;
2272 break; // Cannot de-handlize until after reclaiming jvm_lock
2273 default:
2274 ShouldNotReachHere();
2275 }
2277 // must we block?
2279 // Block, if necessary, before resuming in _thread_in_Java state.
2280 // In order for GC to work, don't clear the last_Java_sp until after blocking.
2281 { Label no_block;
2282 AddressLiteral sync_state(SafepointSynchronize::address_of_state());
2284 // Switch thread to "native transition" state before reading the synchronization state.
2285 // This additional state is necessary because reading and testing the synchronization
2286 // state is not atomic w.r.t. GC, as this scenario demonstrates:
2287 // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
2288 // VM thread changes sync state to synchronizing and suspends threads for GC.
2289 // Thread A is resumed to finish this native method, but doesn't block here since it
2290 // didn't see any synchronization is progress, and escapes.
2291 __ set(_thread_in_native_trans, G3_scratch);
2292 __ st(G3_scratch, G2_thread, JavaThread::thread_state_offset());
2293 if(os::is_MP()) {
2294 if (UseMembar) {
2295 // Force this write out before the read below
2296 __ membar(Assembler::StoreLoad);
2297 } else {
2298 // Write serialization page so VM thread can do a pseudo remote membar.
2299 // We use the current thread pointer to calculate a thread specific
2300 // offset to write to within the page. This minimizes bus traffic
2301 // due to cache line collision.
2302 __ serialize_memory(G2_thread, G1_scratch, G3_scratch);
2303 }
2304 }
2305 __ load_contents(sync_state, G3_scratch);
2306 __ cmp(G3_scratch, SafepointSynchronize::_not_synchronized);
2308 Label L;
2309 Address suspend_state(G2_thread, JavaThread::suspend_flags_offset());
2310 __ br(Assembler::notEqual, false, Assembler::pn, L);
2311 __ delayed()->ld(suspend_state, G3_scratch);
2312 __ cmp(G3_scratch, 0);
2313 __ br(Assembler::equal, false, Assembler::pt, no_block);
2314 __ delayed()->nop();
2315 __ bind(L);
2317 // Block. Save any potential method result value before the operation and
2318 // use a leaf call to leave the last_Java_frame setup undisturbed. Doing this
2319 // lets us share the oopMap we used when we went native rather the create
2320 // a distinct one for this pc
2321 //
2322 save_native_result(masm, ret_type, stack_slots);
2323 __ call_VM_leaf(L7_thread_cache,
2324 CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans),
2325 G2_thread);
2327 // Restore any method result value
2328 restore_native_result(masm, ret_type, stack_slots);
2329 __ bind(no_block);
2330 }
2332 // thread state is thread_in_native_trans. Any safepoint blocking has already
2333 // happened so we can now change state to _thread_in_Java.
2336 __ set(_thread_in_Java, G3_scratch);
2337 __ st(G3_scratch, G2_thread, JavaThread::thread_state_offset());
2340 Label no_reguard;
2341 __ ld(G2_thread, JavaThread::stack_guard_state_offset(), G3_scratch);
2342 __ cmp(G3_scratch, JavaThread::stack_guard_yellow_disabled);
2343 __ br(Assembler::notEqual, false, Assembler::pt, no_reguard);
2344 __ delayed()->nop();
2346 save_native_result(masm, ret_type, stack_slots);
2347 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
2348 __ delayed()->nop();
2350 __ restore_thread(L7_thread_cache); // restore G2_thread
2351 restore_native_result(masm, ret_type, stack_slots);
2353 __ bind(no_reguard);
2355 // Handle possible exception (will unlock if necessary)
2357 // native result if any is live in freg or I0 (and I1 if long and 32bit vm)
2359 // Unlock
2360 if (method->is_synchronized()) {
2361 Label done;
2362 Register I2_ex_oop = I2;
2363 const Register L3_box = L3;
2364 // Get locked oop from the handle we passed to jni
2365 __ ld_ptr(L6_handle, 0, L4);
2366 __ add(SP, lock_offset+STACK_BIAS, L3_box);
2367 // Must save pending exception around the slow-path VM call. Since it's a
2368 // leaf call, the pending exception (if any) can be kept in a register.
2369 __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), I2_ex_oop);
2370 // Now unlock
2371 // (Roop, Rmark, Rbox, Rscratch)
2372 __ compiler_unlock_object(L4, L1, L3_box, L2);
2373 __ br(Assembler::equal, false, Assembler::pt, done);
2374 __ delayed()-> add(SP, lock_offset+STACK_BIAS, L3_box);
2376 // save and restore any potential method result value around the unlocking
2377 // operation. Will save in I0 (or stack for FP returns).
2378 save_native_result(masm, ret_type, stack_slots);
2380 // Must clear pending-exception before re-entering the VM. Since this is
2381 // a leaf call, pending-exception-oop can be safely kept in a register.
2382 __ st_ptr(G0, G2_thread, in_bytes(Thread::pending_exception_offset()));
2384 // slow case of monitor enter. Inline a special case of call_VM that
2385 // disallows any pending_exception.
2386 __ mov(L3_box, O1);
2388 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), relocInfo::runtime_call_type);
2389 __ delayed()->mov(L4, O0); // Need oop in O0
2391 __ restore_thread(L7_thread_cache); // restore G2_thread
2393 #ifdef ASSERT
2394 { Label L;
2395 __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O0);
2396 __ br_null(O0, false, Assembler::pt, L);
2397 __ delayed()->nop();
2398 __ stop("no pending exception allowed on exit from IR::monitorexit");
2399 __ bind(L);
2400 }
2401 #endif
2402 restore_native_result(masm, ret_type, stack_slots);
2403 // check_forward_pending_exception jump to forward_exception if any pending
2404 // exception is set. The forward_exception routine expects to see the
2405 // exception in pending_exception and not in a register. Kind of clumsy,
2406 // since all folks who branch to forward_exception must have tested
2407 // pending_exception first and hence have it in a register already.
2408 __ st_ptr(I2_ex_oop, G2_thread, in_bytes(Thread::pending_exception_offset()));
2409 __ bind(done);
2410 }
2412 // Tell dtrace about this method exit
2413 {
2414 SkipIfEqual skip_if(
2415 masm, G3_scratch, &DTraceMethodProbes, Assembler::zero);
2416 save_native_result(masm, ret_type, stack_slots);
2417 __ set_oop_constant(JNIHandles::make_local(method()), O1);
2418 __ call_VM_leaf(L7_thread_cache,
2419 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
2420 G2_thread, O1);
2421 restore_native_result(masm, ret_type, stack_slots);
2422 }
2424 // Clear "last Java frame" SP and PC.
2425 __ verify_thread(); // G2_thread must be correct
2426 __ reset_last_Java_frame();
2428 // Unpack oop result
2429 if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
2430 Label L;
2431 __ addcc(G0, I0, G0);
2432 __ brx(Assembler::notZero, true, Assembler::pt, L);
2433 __ delayed()->ld_ptr(I0, 0, I0);
2434 __ mov(G0, I0);
2435 __ bind(L);
2436 __ verify_oop(I0);
2437 }
2439 // reset handle block
2440 __ ld_ptr(G2_thread, in_bytes(JavaThread::active_handles_offset()), L5);
2441 __ st_ptr(G0, L5, JNIHandleBlock::top_offset_in_bytes());
2443 __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), G3_scratch);
2444 check_forward_pending_exception(masm, G3_scratch);
2447 // Return
2449 #ifndef _LP64
2450 if (ret_type == T_LONG) {
2452 // Must leave proper result in O0,O1 and G1 (c2/tiered only)
2453 __ sllx(I0, 32, G1); // Shift bits into high G1
2454 __ srl (I1, 0, I1); // Zero extend O1 (harmless?)
2455 __ or3 (I1, G1, G1); // OR 64 bits into G1
2456 }
2457 #endif
2459 __ ret();
2460 __ delayed()->restore();
2462 __ flush();
2464 nmethod *nm = nmethod::new_native_nmethod(method,
2465 masm->code(),
2466 vep_offset,
2467 frame_complete,
2468 stack_slots / VMRegImpl::slots_per_word,
2469 (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
2470 in_ByteSize(lock_offset),
2471 oop_maps);
2472 return nm;
2474 }
2476 #ifdef HAVE_DTRACE_H
2477 // ---------------------------------------------------------------------------
2478 // Generate a dtrace nmethod for a given signature. The method takes arguments
2479 // in the Java compiled code convention, marshals them to the native
2480 // abi and then leaves nops at the position you would expect to call a native
2481 // function. When the probe is enabled the nops are replaced with a trap
2482 // instruction that dtrace inserts and the trace will cause a notification
2483 // to dtrace.
2484 //
2485 // The probes are only able to take primitive types and java/lang/String as
2486 // arguments. No other java types are allowed. Strings are converted to utf8
2487 // strings so that from dtrace point of view java strings are converted to C
2488 // strings. There is an arbitrary fixed limit on the total space that a method
2489 // can use for converting the strings. (256 chars per string in the signature).
2490 // So any java string larger then this is truncated.
2492 static int fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 };
2493 static bool offsets_initialized = false;
2495 static VMRegPair reg64_to_VMRegPair(Register r) {
2496 VMRegPair ret;
2497 if (wordSize == 8) {
2498 ret.set2(r->as_VMReg());
2499 } else {
2500 ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg());
2501 }
2502 return ret;
2503 }
2506 nmethod *SharedRuntime::generate_dtrace_nmethod(
2507 MacroAssembler *masm, methodHandle method) {
2510 // generate_dtrace_nmethod is guarded by a mutex so we are sure to
2511 // be single threaded in this method.
2512 assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be");
2514 // Fill in the signature array, for the calling-convention call.
2515 int total_args_passed = method->size_of_parameters();
2517 BasicType* in_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed);
2518 VMRegPair *in_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed);
2520 // The signature we are going to use for the trap that dtrace will see
2521 // java/lang/String is converted. We drop "this" and any other object
2522 // is converted to NULL. (A one-slot java/lang/Long object reference
2523 // is converted to a two-slot long, which is why we double the allocation).
2524 BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2);
2525 VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2);
2527 int i=0;
2528 int total_strings = 0;
2529 int first_arg_to_pass = 0;
2530 int total_c_args = 0;
2532 // Skip the receiver as dtrace doesn't want to see it
2533 if( !method->is_static() ) {
2534 in_sig_bt[i++] = T_OBJECT;
2535 first_arg_to_pass = 1;
2536 }
2538 SignatureStream ss(method->signature());
2539 for ( ; !ss.at_return_type(); ss.next()) {
2540 BasicType bt = ss.type();
2541 in_sig_bt[i++] = bt; // Collect remaining bits of signature
2542 out_sig_bt[total_c_args++] = bt;
2543 if( bt == T_OBJECT) {
2544 Symbol* s = ss.as_symbol_or_null();
2545 if (s == vmSymbols::java_lang_String()) {
2546 total_strings++;
2547 out_sig_bt[total_c_args-1] = T_ADDRESS;
2548 } else if (s == vmSymbols::java_lang_Boolean() ||
2549 s == vmSymbols::java_lang_Byte()) {
2550 out_sig_bt[total_c_args-1] = T_BYTE;
2551 } else if (s == vmSymbols::java_lang_Character() ||
2552 s == vmSymbols::java_lang_Short()) {
2553 out_sig_bt[total_c_args-1] = T_SHORT;
2554 } else if (s == vmSymbols::java_lang_Integer() ||
2555 s == vmSymbols::java_lang_Float()) {
2556 out_sig_bt[total_c_args-1] = T_INT;
2557 } else if (s == vmSymbols::java_lang_Long() ||
2558 s == vmSymbols::java_lang_Double()) {
2559 out_sig_bt[total_c_args-1] = T_LONG;
2560 out_sig_bt[total_c_args++] = T_VOID;
2561 }
2562 } else if ( bt == T_LONG || bt == T_DOUBLE ) {
2563 in_sig_bt[i++] = T_VOID; // Longs & doubles take 2 Java slots
2564 // We convert double to long
2565 out_sig_bt[total_c_args-1] = T_LONG;
2566 out_sig_bt[total_c_args++] = T_VOID;
2567 } else if ( bt == T_FLOAT) {
2568 // We convert float to int
2569 out_sig_bt[total_c_args-1] = T_INT;
2570 }
2571 }
2573 assert(i==total_args_passed, "validly parsed signature");
2575 // Now get the compiled-Java layout as input arguments
2576 int comp_args_on_stack;
2577 comp_args_on_stack = SharedRuntime::java_calling_convention(
2578 in_sig_bt, in_regs, total_args_passed, false);
2580 // We have received a description of where all the java arg are located
2581 // on entry to the wrapper. We need to convert these args to where
2582 // the a native (non-jni) function would expect them. To figure out
2583 // where they go we convert the java signature to a C signature and remove
2584 // T_VOID for any long/double we might have received.
2587 // Now figure out where the args must be stored and how much stack space
2588 // they require (neglecting out_preserve_stack_slots but space for storing
2589 // the 1st six register arguments). It's weird see int_stk_helper.
2590 //
2591 int out_arg_slots;
2592 out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);
2594 // Calculate the total number of stack slots we will need.
2596 // First count the abi requirement plus all of the outgoing args
2597 int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
2599 // Plus a temp for possible converion of float/double/long register args
2601 int conversion_temp = stack_slots;
2602 stack_slots += 2;
2605 // Now space for the string(s) we must convert
2607 int string_locs = stack_slots;
2608 stack_slots += total_strings *
2609 (max_dtrace_string_size / VMRegImpl::stack_slot_size);
2611 // Ok The space we have allocated will look like:
2612 //
2613 //
2614 // FP-> | |
2615 // |---------------------|
2616 // | string[n] |
2617 // |---------------------| <- string_locs[n]
2618 // | string[n-1] |
2619 // |---------------------| <- string_locs[n-1]
2620 // | ... |
2621 // | ... |
2622 // |---------------------| <- string_locs[1]
2623 // | string[0] |
2624 // |---------------------| <- string_locs[0]
2625 // | temp |
2626 // |---------------------| <- conversion_temp
2627 // | outbound memory |
2628 // | based arguments |
2629 // | |
2630 // |---------------------|
2631 // | |
2632 // SP-> | out_preserved_slots |
2633 //
2634 //
2636 // Now compute actual number of stack words we need rounding to make
2637 // stack properly aligned.
2638 stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word);
2640 int stack_size = stack_slots * VMRegImpl::stack_slot_size;
2642 intptr_t start = (intptr_t)__ pc();
2644 // First thing make an ic check to see if we should even be here
2646 {
2647 Label L;
2648 const Register temp_reg = G3_scratch;
2649 AddressLiteral ic_miss(SharedRuntime::get_ic_miss_stub());
2650 __ verify_oop(O0);
2651 __ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), temp_reg);
2652 __ cmp(temp_reg, G5_inline_cache_reg);
2653 __ brx(Assembler::equal, true, Assembler::pt, L);
2654 __ delayed()->nop();
2656 __ jump_to(ic_miss, temp_reg);
2657 __ delayed()->nop();
2658 __ align(CodeEntryAlignment);
2659 __ bind(L);
2660 }
2662 int vep_offset = ((intptr_t)__ pc()) - start;
2665 // The instruction at the verified entry point must be 5 bytes or longer
2666 // because it can be patched on the fly by make_non_entrant. The stack bang
2667 // instruction fits that requirement.
2669 // Generate stack overflow check before creating frame
2670 __ generate_stack_overflow_check(stack_size);
2672 assert(((intptr_t)__ pc() - start - vep_offset) >= 5,
2673 "valid size for make_non_entrant");
2675 // Generate a new frame for the wrapper.
2676 __ save(SP, -stack_size, SP);
2678 // Frame is now completed as far a size and linkage.
2680 int frame_complete = ((intptr_t)__ pc()) - start;
2682 #ifdef ASSERT
2683 bool reg_destroyed[RegisterImpl::number_of_registers];
2684 bool freg_destroyed[FloatRegisterImpl::number_of_registers];
2685 for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
2686 reg_destroyed[r] = false;
2687 }
2688 for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
2689 freg_destroyed[f] = false;
2690 }
2692 #endif /* ASSERT */
2694 VMRegPair zero;
2695 const Register g0 = G0; // without this we get a compiler warning (why??)
2696 zero.set2(g0->as_VMReg());
2698 int c_arg, j_arg;
2700 Register conversion_off = noreg;
2702 for (j_arg = first_arg_to_pass, c_arg = 0 ;
2703 j_arg < total_args_passed ; j_arg++, c_arg++ ) {
2705 VMRegPair src = in_regs[j_arg];
2706 VMRegPair dst = out_regs[c_arg];
2708 #ifdef ASSERT
2709 if (src.first()->is_Register()) {
2710 assert(!reg_destroyed[src.first()->as_Register()->encoding()], "ack!");
2711 } else if (src.first()->is_FloatRegister()) {
2712 assert(!freg_destroyed[src.first()->as_FloatRegister()->encoding(
2713 FloatRegisterImpl::S)], "ack!");
2714 }
2715 if (dst.first()->is_Register()) {
2716 reg_destroyed[dst.first()->as_Register()->encoding()] = true;
2717 } else if (dst.first()->is_FloatRegister()) {
2718 freg_destroyed[dst.first()->as_FloatRegister()->encoding(
2719 FloatRegisterImpl::S)] = true;
2720 }
2721 #endif /* ASSERT */
2723 switch (in_sig_bt[j_arg]) {
2724 case T_ARRAY:
2725 case T_OBJECT:
2726 {
2727 if (out_sig_bt[c_arg] == T_BYTE || out_sig_bt[c_arg] == T_SHORT ||
2728 out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) {
2729 // need to unbox a one-slot value
2730 Register in_reg = L0;
2731 Register tmp = L2;
2732 if ( src.first()->is_reg() ) {
2733 in_reg = src.first()->as_Register();
2734 } else {
2735 assert(Assembler::is_simm13(reg2offset(src.first()) + STACK_BIAS),
2736 "must be");
2737 __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, in_reg);
2738 }
2739 // If the final destination is an acceptable register
2740 if ( dst.first()->is_reg() ) {
2741 if ( dst.is_single_phys_reg() || out_sig_bt[c_arg] != T_LONG ) {
2742 tmp = dst.first()->as_Register();
2743 }
2744 }
2746 Label skipUnbox;
2747 if ( wordSize == 4 && out_sig_bt[c_arg] == T_LONG ) {
2748 __ mov(G0, tmp->successor());
2749 }
2750 __ br_null(in_reg, true, Assembler::pn, skipUnbox);
2751 __ delayed()->mov(G0, tmp);
2753 BasicType bt = out_sig_bt[c_arg];
2754 int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt);
2755 switch (bt) {
2756 case T_BYTE:
2757 __ ldub(in_reg, box_offset, tmp); break;
2758 case T_SHORT:
2759 __ lduh(in_reg, box_offset, tmp); break;
2760 case T_INT:
2761 __ ld(in_reg, box_offset, tmp); break;
2762 case T_LONG:
2763 __ ld_long(in_reg, box_offset, tmp); break;
2764 default: ShouldNotReachHere();
2765 }
2767 __ bind(skipUnbox);
2768 // If tmp wasn't final destination copy to final destination
2769 if (tmp == L2) {
2770 VMRegPair tmp_as_VM = reg64_to_VMRegPair(L2);
2771 if (out_sig_bt[c_arg] == T_LONG) {
2772 long_move(masm, tmp_as_VM, dst);
2773 } else {
2774 move32_64(masm, tmp_as_VM, out_regs[c_arg]);
2775 }
2776 }
2777 if (out_sig_bt[c_arg] == T_LONG) {
2778 assert(out_sig_bt[c_arg+1] == T_VOID, "must be");
2779 ++c_arg; // move over the T_VOID to keep the loop indices in sync
2780 }
2781 } else if (out_sig_bt[c_arg] == T_ADDRESS) {
2782 Register s =
2783 src.first()->is_reg() ? src.first()->as_Register() : L2;
2784 Register d =
2785 dst.first()->is_reg() ? dst.first()->as_Register() : L2;
2787 // We store the oop now so that the conversion pass can reach
2788 // while in the inner frame. This will be the only store if
2789 // the oop is NULL.
2790 if (s != L2) {
2791 // src is register
2792 if (d != L2) {
2793 // dst is register
2794 __ mov(s, d);
2795 } else {
2796 assert(Assembler::is_simm13(reg2offset(dst.first()) +
2797 STACK_BIAS), "must be");
2798 __ st_ptr(s, SP, reg2offset(dst.first()) + STACK_BIAS);
2799 }
2800 } else {
2801 // src not a register
2802 assert(Assembler::is_simm13(reg2offset(src.first()) +
2803 STACK_BIAS), "must be");
2804 __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, d);
2805 if (d == L2) {
2806 assert(Assembler::is_simm13(reg2offset(dst.first()) +
2807 STACK_BIAS), "must be");
2808 __ st_ptr(d, SP, reg2offset(dst.first()) + STACK_BIAS);
2809 }
2810 }
2811 } else if (out_sig_bt[c_arg] != T_VOID) {
2812 // Convert the arg to NULL
2813 if (dst.first()->is_reg()) {
2814 __ mov(G0, dst.first()->as_Register());
2815 } else {
2816 assert(Assembler::is_simm13(reg2offset(dst.first()) +
2817 STACK_BIAS), "must be");
2818 __ st_ptr(G0, SP, reg2offset(dst.first()) + STACK_BIAS);
2819 }
2820 }
2821 }
2822 break;
2823 case T_VOID:
2824 break;
2826 case T_FLOAT:
2827 if (src.first()->is_stack()) {
2828 // Stack to stack/reg is simple
2829 move32_64(masm, src, dst);
2830 } else {
2831 if (dst.first()->is_reg()) {
2832 // freg -> reg
2833 int off =
2834 STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
2835 Register d = dst.first()->as_Register();
2836 if (Assembler::is_simm13(off)) {
2837 __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
2838 SP, off);
2839 __ ld(SP, off, d);
2840 } else {
2841 if (conversion_off == noreg) {
2842 __ set(off, L6);
2843 conversion_off = L6;
2844 }
2845 __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
2846 SP, conversion_off);
2847 __ ld(SP, conversion_off , d);
2848 }
2849 } else {
2850 // freg -> mem
2851 int off = STACK_BIAS + reg2offset(dst.first());
2852 if (Assembler::is_simm13(off)) {
2853 __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
2854 SP, off);
2855 } else {
2856 if (conversion_off == noreg) {
2857 __ set(off, L6);
2858 conversion_off = L6;
2859 }
2860 __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
2861 SP, conversion_off);
2862 }
2863 }
2864 }
2865 break;
2867 case T_DOUBLE:
2868 assert( j_arg + 1 < total_args_passed &&
2869 in_sig_bt[j_arg + 1] == T_VOID &&
2870 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
2871 if (src.first()->is_stack()) {
2872 // Stack to stack/reg is simple
2873 long_move(masm, src, dst);
2874 } else {
2875 Register d = dst.first()->is_reg() ? dst.first()->as_Register() : L2;
2877 // Destination could be an odd reg on 32bit in which case
2878 // we can't load direct to the destination.
2880 if (!d->is_even() && wordSize == 4) {
2881 d = L2;
2882 }
2883 int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
2884 if (Assembler::is_simm13(off)) {
2885 __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(),
2886 SP, off);
2887 __ ld_long(SP, off, d);
2888 } else {
2889 if (conversion_off == noreg) {
2890 __ set(off, L6);
2891 conversion_off = L6;
2892 }
2893 __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(),
2894 SP, conversion_off);
2895 __ ld_long(SP, conversion_off, d);
2896 }
2897 if (d == L2) {
2898 long_move(masm, reg64_to_VMRegPair(L2), dst);
2899 }
2900 }
2901 break;
2903 case T_LONG :
2904 // 32bit can't do a split move of something like g1 -> O0, O1
2905 // so use a memory temp
2906 if (src.is_single_phys_reg() && wordSize == 4) {
2907 Register tmp = L2;
2908 if (dst.first()->is_reg() &&
2909 (wordSize == 8 || dst.first()->as_Register()->is_even())) {
2910 tmp = dst.first()->as_Register();
2911 }
2913 int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
2914 if (Assembler::is_simm13(off)) {
2915 __ stx(src.first()->as_Register(), SP, off);
2916 __ ld_long(SP, off, tmp);
2917 } else {
2918 if (conversion_off == noreg) {
2919 __ set(off, L6);
2920 conversion_off = L6;
2921 }
2922 __ stx(src.first()->as_Register(), SP, conversion_off);
2923 __ ld_long(SP, conversion_off, tmp);
2924 }
2926 if (tmp == L2) {
2927 long_move(masm, reg64_to_VMRegPair(L2), dst);
2928 }
2929 } else {
2930 long_move(masm, src, dst);
2931 }
2932 break;
2934 case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
2936 default:
2937 move32_64(masm, src, dst);
2938 }
2939 }
2942 // If we have any strings we must store any register based arg to the stack
2943 // This includes any still live xmm registers too.
2945 if (total_strings > 0 ) {
2947 // protect all the arg registers
2948 __ save_frame(0);
2949 __ mov(G2_thread, L7_thread_cache);
2950 const Register L2_string_off = L2;
2952 // Get first string offset
2953 __ set(string_locs * VMRegImpl::stack_slot_size, L2_string_off);
2955 for (c_arg = 0 ; c_arg < total_c_args ; c_arg++ ) {
2956 if (out_sig_bt[c_arg] == T_ADDRESS) {
2958 VMRegPair dst = out_regs[c_arg];
2959 const Register d = dst.first()->is_reg() ?
2960 dst.first()->as_Register()->after_save() : noreg;
2962 // It's a string the oop and it was already copied to the out arg
2963 // position
2964 if (d != noreg) {
2965 __ mov(d, O0);
2966 } else {
2967 assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS),
2968 "must be");
2969 __ ld_ptr(FP, reg2offset(dst.first()) + STACK_BIAS, O0);
2970 }
2971 Label skip;
2973 __ br_null(O0, false, Assembler::pn, skip);
2974 __ delayed()->add(FP, L2_string_off, O1);
2976 if (d != noreg) {
2977 __ mov(O1, d);
2978 } else {
2979 assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS),
2980 "must be");
2981 __ st_ptr(O1, FP, reg2offset(dst.first()) + STACK_BIAS);
2982 }
2984 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::get_utf),
2985 relocInfo::runtime_call_type);
2986 __ delayed()->add(L2_string_off, max_dtrace_string_size, L2_string_off);
2988 __ bind(skip);
2990 }
2992 }
2993 __ mov(L7_thread_cache, G2_thread);
2994 __ restore();
2996 }
2999 // Ok now we are done. Need to place the nop that dtrace wants in order to
3000 // patch in the trap
3002 int patch_offset = ((intptr_t)__ pc()) - start;
3004 __ nop();
3007 // Return
3009 __ ret();
3010 __ delayed()->restore();
3012 __ flush();
3014 nmethod *nm = nmethod::new_dtrace_nmethod(
3015 method, masm->code(), vep_offset, patch_offset, frame_complete,
3016 stack_slots / VMRegImpl::slots_per_word);
3017 return nm;
3019 }
3021 #endif // HAVE_DTRACE_H
3023 // this function returns the adjust size (in number of words) to a c2i adapter
3024 // activation for use during deoptimization
3025 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
3026 assert(callee_locals >= callee_parameters,
3027 "test and remove; got more parms than locals");
3028 if (callee_locals < callee_parameters)
3029 return 0; // No adjustment for negative locals
3030 int diff = (callee_locals - callee_parameters) * Interpreter::stackElementWords;
3031 return round_to(diff, WordsPerLong);
3032 }
3034 // "Top of Stack" slots that may be unused by the calling convention but must
3035 // otherwise be preserved.
3036 // On Intel these are not necessary and the value can be zero.
3037 // On Sparc this describes the words reserved for storing a register window
3038 // when an interrupt occurs.
3039 uint SharedRuntime::out_preserve_stack_slots() {
3040 return frame::register_save_words * VMRegImpl::slots_per_word;
3041 }
3043 static void gen_new_frame(MacroAssembler* masm, bool deopt) {
3044 //
3045 // Common out the new frame generation for deopt and uncommon trap
3046 //
3047 Register G3pcs = G3_scratch; // Array of new pcs (input)
3048 Register Oreturn0 = O0;
3049 Register Oreturn1 = O1;
3050 Register O2UnrollBlock = O2;
3051 Register O3array = O3; // Array of frame sizes (input)
3052 Register O4array_size = O4; // number of frames (input)
3053 Register O7frame_size = O7; // number of frames (input)
3055 __ ld_ptr(O3array, 0, O7frame_size);
3056 __ sub(G0, O7frame_size, O7frame_size);
3057 __ save(SP, O7frame_size, SP);
3058 __ ld_ptr(G3pcs, 0, I7); // load frame's new pc
3060 #ifdef ASSERT
3061 // make sure that the frames are aligned properly
3062 #ifndef _LP64
3063 __ btst(wordSize*2-1, SP);
3064 __ breakpoint_trap(Assembler::notZero);
3065 #endif
3066 #endif
3068 // Deopt needs to pass some extra live values from frame to frame
3070 if (deopt) {
3071 __ mov(Oreturn0->after_save(), Oreturn0);
3072 __ mov(Oreturn1->after_save(), Oreturn1);
3073 }
3075 __ mov(O4array_size->after_save(), O4array_size);
3076 __ sub(O4array_size, 1, O4array_size);
3077 __ mov(O3array->after_save(), O3array);
3078 __ mov(O2UnrollBlock->after_save(), O2UnrollBlock);
3079 __ add(G3pcs, wordSize, G3pcs); // point to next pc value
3081 #ifdef ASSERT
3082 // trash registers to show a clear pattern in backtraces
3083 __ set(0xDEAD0000, I0);
3084 __ add(I0, 2, I1);
3085 __ add(I0, 4, I2);
3086 __ add(I0, 6, I3);
3087 __ add(I0, 8, I4);
3088 // Don't touch I5 could have valuable savedSP
3089 __ set(0xDEADBEEF, L0);
3090 __ mov(L0, L1);
3091 __ mov(L0, L2);
3092 __ mov(L0, L3);
3093 __ mov(L0, L4);
3094 __ mov(L0, L5);
3096 // trash the return value as there is nothing to return yet
3097 __ set(0xDEAD0001, O7);
3098 #endif
3100 __ mov(SP, O5_savedSP);
3101 }
3104 static void make_new_frames(MacroAssembler* masm, bool deopt) {
3105 //
3106 // loop through the UnrollBlock info and create new frames
3107 //
3108 Register G3pcs = G3_scratch;
3109 Register Oreturn0 = O0;
3110 Register Oreturn1 = O1;
3111 Register O2UnrollBlock = O2;
3112 Register O3array = O3;
3113 Register O4array_size = O4;
3114 Label loop;
3116 // Before we make new frames, check to see if stack is available.
3117 // Do this after the caller's return address is on top of stack
3118 if (UseStackBanging) {
3119 // Get total frame size for interpreted frames
3120 __ ld(O2UnrollBlock, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes(), O4);
3121 __ bang_stack_size(O4, O3, G3_scratch);
3122 }
3124 __ ld(O2UnrollBlock, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes(), O4array_size);
3125 __ ld_ptr(O2UnrollBlock, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes(), G3pcs);
3126 __ ld_ptr(O2UnrollBlock, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes(), O3array);
3128 // Adjust old interpreter frame to make space for new frame's extra java locals
3129 //
3130 // We capture the original sp for the transition frame only because it is needed in
3131 // order to properly calculate interpreter_sp_adjustment. Even though in real life
3132 // every interpreter frame captures a savedSP it is only needed at the transition
3133 // (fortunately). If we had to have it correct everywhere then we would need to
3134 // be told the sp_adjustment for each frame we create. If the frame size array
3135 // were to have twice the frame count entries then we could have pairs [sp_adjustment, frame_size]
3136 // for each frame we create and keep up the illusion every where.
3137 //
3139 __ ld(O2UnrollBlock, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes(), O7);
3140 __ mov(SP, O5_savedSP); // remember initial sender's original sp before adjustment
3141 __ sub(SP, O7, SP);
3143 #ifdef ASSERT
3144 // make sure that there is at least one entry in the array
3145 __ tst(O4array_size);
3146 __ breakpoint_trap(Assembler::zero);
3147 #endif
3149 // Now push the new interpreter frames
3150 __ bind(loop);
3152 // allocate a new frame, filling the registers
3154 gen_new_frame(masm, deopt); // allocate an interpreter frame
3156 __ tst(O4array_size);
3157 __ br(Assembler::notZero, false, Assembler::pn, loop);
3158 __ delayed()->add(O3array, wordSize, O3array);
3159 __ ld_ptr(G3pcs, 0, O7); // load final frame new pc
3161 }
3163 //------------------------------generate_deopt_blob----------------------------
3164 // Ought to generate an ideal graph & compile, but here's some SPARC ASM
3165 // instead.
3166 void SharedRuntime::generate_deopt_blob() {
3167 // allocate space for the code
3168 ResourceMark rm;
3169 // setup code generation tools
3170 int pad = VerifyThread ? 512 : 0;// Extra slop space for more verify code
3171 #ifdef _LP64
3172 CodeBuffer buffer("deopt_blob", 2100+pad, 512);
3173 #else
3174 // Measured 8/7/03 at 1212 in 32bit debug build (no VerifyThread)
3175 // Measured 8/7/03 at 1396 in 32bit debug build (VerifyThread)
3176 CodeBuffer buffer("deopt_blob", 1600+pad, 512);
3177 #endif /* _LP64 */
3178 MacroAssembler* masm = new MacroAssembler(&buffer);
3179 FloatRegister Freturn0 = F0;
3180 Register Greturn1 = G1;
3181 Register Oreturn0 = O0;
3182 Register Oreturn1 = O1;
3183 Register O2UnrollBlock = O2;
3184 Register L0deopt_mode = L0;
3185 Register G4deopt_mode = G4_scratch;
3186 int frame_size_words;
3187 Address saved_Freturn0_addr(FP, -sizeof(double) + STACK_BIAS);
3188 #if !defined(_LP64) && defined(COMPILER2)
3189 Address saved_Greturn1_addr(FP, -sizeof(double) -sizeof(jlong) + STACK_BIAS);
3190 #endif
3191 Label cont;
3193 OopMapSet *oop_maps = new OopMapSet();
3195 //
3196 // This is the entry point for code which is returning to a de-optimized
3197 // frame.
3198 // The steps taken by this frame are as follows:
3199 // - push a dummy "register_save" and save the return values (O0, O1, F0/F1, G1)
3200 // and all potentially live registers (at a pollpoint many registers can be live).
3201 //
3202 // - call the C routine: Deoptimization::fetch_unroll_info (this function
3203 // returns information about the number and size of interpreter frames
3204 // which are equivalent to the frame which is being deoptimized)
3205 // - deallocate the unpack frame, restoring only results values. Other
3206 // volatile registers will now be captured in the vframeArray as needed.
3207 // - deallocate the deoptimization frame
3208 // - in a loop using the information returned in the previous step
3209 // push new interpreter frames (take care to propagate the return
3210 // values through each new frame pushed)
3211 // - create a dummy "unpack_frame" and save the return values (O0, O1, F0)
3212 // - call the C routine: Deoptimization::unpack_frames (this function
3213 // lays out values on the interpreter frame which was just created)
3214 // - deallocate the dummy unpack_frame
3215 // - ensure that all the return values are correctly set and then do
3216 // a return to the interpreter entry point
3217 //
3218 // Refer to the following methods for more information:
3219 // - Deoptimization::fetch_unroll_info
3220 // - Deoptimization::unpack_frames
3222 OopMap* map = NULL;
3224 int start = __ offset();
3226 // restore G2, the trampoline destroyed it
3227 __ get_thread();
3229 // On entry we have been called by the deoptimized nmethod with a call that
3230 // replaced the original call (or safepoint polling location) so the deoptimizing
3231 // pc is now in O7. Return values are still in the expected places
3233 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words);
3234 __ ba(false, cont);
3235 __ delayed()->mov(Deoptimization::Unpack_deopt, L0deopt_mode);
3237 int exception_offset = __ offset() - start;
3239 // restore G2, the trampoline destroyed it
3240 __ get_thread();
3242 // On entry we have been jumped to by the exception handler (or exception_blob
3243 // for server). O0 contains the exception oop and O7 contains the original
3244 // exception pc. So if we push a frame here it will look to the
3245 // stack walking code (fetch_unroll_info) just like a normal call so
3246 // state will be extracted normally.
3248 // save exception oop in JavaThread and fall through into the
3249 // exception_in_tls case since they are handled in same way except
3250 // for where the pending exception is kept.
3251 __ st_ptr(Oexception, G2_thread, JavaThread::exception_oop_offset());
3253 //
3254 // Vanilla deoptimization with an exception pending in exception_oop
3255 //
3256 int exception_in_tls_offset = __ offset() - start;
3258 // No need to update oop_map as each call to save_live_registers will produce identical oopmap
3259 (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_words);
3261 // Restore G2_thread
3262 __ get_thread();
3264 #ifdef ASSERT
3265 {
3266 // verify that there is really an exception oop in exception_oop
3267 Label has_exception;
3268 __ ld_ptr(G2_thread, JavaThread::exception_oop_offset(), Oexception);
3269 __ br_notnull(Oexception, false, Assembler::pt, has_exception);
3270 __ delayed()-> nop();
3271 __ stop("no exception in thread");
3272 __ bind(has_exception);
3274 // verify that there is no pending exception
3275 Label no_pending_exception;
3276 Address exception_addr(G2_thread, Thread::pending_exception_offset());
3277 __ ld_ptr(exception_addr, Oexception);
3278 __ br_null(Oexception, false, Assembler::pt, no_pending_exception);
3279 __ delayed()->nop();
3280 __ stop("must not have pending exception here");
3281 __ bind(no_pending_exception);
3282 }
3283 #endif
3285 __ ba(false, cont);
3286 __ delayed()->mov(Deoptimization::Unpack_exception, L0deopt_mode);;
3288 //
3289 // Reexecute entry, similar to c2 uncommon trap
3290 //
3291 int reexecute_offset = __ offset() - start;
3293 // No need to update oop_map as each call to save_live_registers will produce identical oopmap
3294 (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_words);
3296 __ mov(Deoptimization::Unpack_reexecute, L0deopt_mode);
3298 __ bind(cont);
3300 __ set_last_Java_frame(SP, noreg);
3302 // do the call by hand so we can get the oopmap
3304 __ mov(G2_thread, L7_thread_cache);
3305 __ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type);
3306 __ delayed()->mov(G2_thread, O0);
3308 // Set an oopmap for the call site this describes all our saved volatile registers
3310 oop_maps->add_gc_map( __ offset()-start, map);
3312 __ mov(L7_thread_cache, G2_thread);
3314 __ reset_last_Java_frame();
3316 // NOTE: we know that only O0/O1 will be reloaded by restore_result_registers
3317 // so this move will survive
3319 __ mov(L0deopt_mode, G4deopt_mode);
3321 __ mov(O0, O2UnrollBlock->after_save());
3323 RegisterSaver::restore_result_registers(masm);
3325 Label noException;
3326 __ cmp(G4deopt_mode, Deoptimization::Unpack_exception); // Was exception pending?
3327 __ br(Assembler::notEqual, false, Assembler::pt, noException);
3328 __ delayed()->nop();
3330 // Move the pending exception from exception_oop to Oexception so
3331 // the pending exception will be picked up the interpreter.
3332 __ ld_ptr(G2_thread, in_bytes(JavaThread::exception_oop_offset()), Oexception);
3333 __ st_ptr(G0, G2_thread, in_bytes(JavaThread::exception_oop_offset()));
3334 __ bind(noException);
3336 // deallocate the deoptimization frame taking care to preserve the return values
3337 __ mov(Oreturn0, Oreturn0->after_save());
3338 __ mov(Oreturn1, Oreturn1->after_save());
3339 __ mov(O2UnrollBlock, O2UnrollBlock->after_save());
3340 __ restore();
3342 // Allocate new interpreter frame(s) and possible c2i adapter frame
3344 make_new_frames(masm, true);
3346 // push a dummy "unpack_frame" taking care of float return values and
3347 // call Deoptimization::unpack_frames to have the unpacker layout
3348 // information in the interpreter frames just created and then return
3349 // to the interpreter entry point
3350 __ save(SP, -frame_size_words*wordSize, SP);
3351 __ stf(FloatRegisterImpl::D, Freturn0, saved_Freturn0_addr);
3352 #if !defined(_LP64)
3353 #if defined(COMPILER2)
3354 // 32-bit 1-register longs return longs in G1
3355 __ stx(Greturn1, saved_Greturn1_addr);
3356 #endif
3357 __ set_last_Java_frame(SP, noreg);
3358 __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), G2_thread, G4deopt_mode);
3359 #else
3360 // LP64 uses g4 in set_last_Java_frame
3361 __ mov(G4deopt_mode, O1);
3362 __ set_last_Java_frame(SP, G0);
3363 __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), G2_thread, O1);
3364 #endif
3365 __ reset_last_Java_frame();
3366 __ ldf(FloatRegisterImpl::D, saved_Freturn0_addr, Freturn0);
3368 #if !defined(_LP64) && defined(COMPILER2)
3369 // In 32 bit, C2 returns longs in G1 so restore the saved G1 into
3370 // I0/I1 if the return value is long.
3371 Label not_long;
3372 __ cmp(O0,T_LONG);
3373 __ br(Assembler::notEqual, false, Assembler::pt, not_long);
3374 __ delayed()->nop();
3375 __ ldd(saved_Greturn1_addr,I0);
3376 __ bind(not_long);
3377 #endif
3378 __ ret();
3379 __ delayed()->restore();
3381 masm->flush();
3382 _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_words);
3383 _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
3384 }
3386 #ifdef COMPILER2
3388 //------------------------------generate_uncommon_trap_blob--------------------
3389 // Ought to generate an ideal graph & compile, but here's some SPARC ASM
3390 // instead.
3391 void SharedRuntime::generate_uncommon_trap_blob() {
3392 // allocate space for the code
3393 ResourceMark rm;
3394 // setup code generation tools
3395 int pad = VerifyThread ? 512 : 0;
3396 #ifdef _LP64
3397 CodeBuffer buffer("uncommon_trap_blob", 2700+pad, 512);
3398 #else
3399 // Measured 8/7/03 at 660 in 32bit debug build (no VerifyThread)
3400 // Measured 8/7/03 at 1028 in 32bit debug build (VerifyThread)
3401 CodeBuffer buffer("uncommon_trap_blob", 2000+pad, 512);
3402 #endif
3403 MacroAssembler* masm = new MacroAssembler(&buffer);
3404 Register O2UnrollBlock = O2;
3405 Register O2klass_index = O2;
3407 //
3408 // This is the entry point for all traps the compiler takes when it thinks
3409 // it cannot handle further execution of compilation code. The frame is
3410 // deoptimized in these cases and converted into interpreter frames for
3411 // execution
3412 // The steps taken by this frame are as follows:
3413 // - push a fake "unpack_frame"
3414 // - call the C routine Deoptimization::uncommon_trap (this function
3415 // packs the current compiled frame into vframe arrays and returns
3416 // information about the number and size of interpreter frames which
3417 // are equivalent to the frame which is being deoptimized)
3418 // - deallocate the "unpack_frame"
3419 // - deallocate the deoptimization frame
3420 // - in a loop using the information returned in the previous step
3421 // push interpreter frames;
3422 // - create a dummy "unpack_frame"
3423 // - call the C routine: Deoptimization::unpack_frames (this function
3424 // lays out values on the interpreter frame which was just created)
3425 // - deallocate the dummy unpack_frame
3426 // - return to the interpreter entry point
3427 //
3428 // Refer to the following methods for more information:
3429 // - Deoptimization::uncommon_trap
3430 // - Deoptimization::unpack_frame
3432 // the unloaded class index is in O0 (first parameter to this blob)
3434 // push a dummy "unpack_frame"
3435 // and call Deoptimization::uncommon_trap to pack the compiled frame into
3436 // vframe array and return the UnrollBlock information
3437 __ save_frame(0);
3438 __ set_last_Java_frame(SP, noreg);
3439 __ mov(I0, O2klass_index);
3440 __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap), G2_thread, O2klass_index);
3441 __ reset_last_Java_frame();
3442 __ mov(O0, O2UnrollBlock->after_save());
3443 __ restore();
3445 // deallocate the deoptimized frame taking care to preserve the return values
3446 __ mov(O2UnrollBlock, O2UnrollBlock->after_save());
3447 __ restore();
3449 // Allocate new interpreter frame(s) and possible c2i adapter frame
3451 make_new_frames(masm, false);
3453 // push a dummy "unpack_frame" taking care of float return values and
3454 // call Deoptimization::unpack_frames to have the unpacker layout
3455 // information in the interpreter frames just created and then return
3456 // to the interpreter entry point
3457 __ save_frame(0);
3458 __ set_last_Java_frame(SP, noreg);
3459 __ mov(Deoptimization::Unpack_uncommon_trap, O3); // indicate it is the uncommon trap case
3460 __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), G2_thread, O3);
3461 __ reset_last_Java_frame();
3462 __ ret();
3463 __ delayed()->restore();
3465 masm->flush();
3466 _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, NULL, __ total_frame_size_in_bytes(0)/wordSize);
3467 }
3469 #endif // COMPILER2
3471 //------------------------------generate_handler_blob-------------------
3472 //
3473 // Generate a special Compile2Runtime blob that saves all registers, and sets
3474 // up an OopMap.
3475 //
3476 // This blob is jumped to (via a breakpoint and the signal handler) from a
3477 // safepoint in compiled code. On entry to this blob, O7 contains the
3478 // address in the original nmethod at which we should resume normal execution.
3479 // Thus, this blob looks like a subroutine which must preserve lots of
3480 // registers and return normally. Note that O7 is never register-allocated,
3481 // so it is guaranteed to be free here.
3482 //
3484 // The hardest part of what this blob must do is to save the 64-bit %o
3485 // registers in the 32-bit build. A simple 'save' turn the %o's to %i's and
3486 // an interrupt will chop off their heads. Making space in the caller's frame
3487 // first will let us save the 64-bit %o's before save'ing, but we cannot hand
3488 // the adjusted FP off to the GC stack-crawler: this will modify the caller's
3489 // SP and mess up HIS OopMaps. So we first adjust the caller's SP, then save
3490 // the 64-bit %o's, then do a save, then fixup the caller's SP (our FP).
3491 // Tricky, tricky, tricky...
3493 static SafepointBlob* generate_handler_blob(address call_ptr, bool cause_return) {
3494 assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
3496 // allocate space for the code
3497 ResourceMark rm;
3498 // setup code generation tools
3499 // Measured 8/7/03 at 896 in 32bit debug build (no VerifyThread)
3500 // Measured 8/7/03 at 1080 in 32bit debug build (VerifyThread)
3501 // even larger with TraceJumps
3502 int pad = TraceJumps ? 512 : 0;
3503 CodeBuffer buffer("handler_blob", 1600 + pad, 512);
3504 MacroAssembler* masm = new MacroAssembler(&buffer);
3505 int frame_size_words;
3506 OopMapSet *oop_maps = new OopMapSet();
3507 OopMap* map = NULL;
3509 int start = __ offset();
3511 // If this causes a return before the processing, then do a "restore"
3512 if (cause_return) {
3513 __ restore();
3514 } else {
3515 // Make it look like we were called via the poll
3516 // so that frame constructor always sees a valid return address
3517 __ ld_ptr(G2_thread, in_bytes(JavaThread::saved_exception_pc_offset()), O7);
3518 __ sub(O7, frame::pc_return_offset, O7);
3519 }
3521 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words);
3523 // setup last_Java_sp (blows G4)
3524 __ set_last_Java_frame(SP, noreg);
3526 // call into the runtime to handle illegal instructions exception
3527 // Do not use call_VM_leaf, because we need to make a GC map at this call site.
3528 __ mov(G2_thread, O0);
3529 __ save_thread(L7_thread_cache);
3530 __ call(call_ptr);
3531 __ delayed()->nop();
3533 // Set an oopmap for the call site.
3534 // We need this not only for callee-saved registers, but also for volatile
3535 // registers that the compiler might be keeping live across a safepoint.
3537 oop_maps->add_gc_map( __ offset() - start, map);
3539 __ restore_thread(L7_thread_cache);
3540 // clear last_Java_sp
3541 __ reset_last_Java_frame();
3543 // Check for exceptions
3544 Label pending;
3546 __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O1);
3547 __ tst(O1);
3548 __ brx(Assembler::notEqual, true, Assembler::pn, pending);
3549 __ delayed()->nop();
3551 RegisterSaver::restore_live_registers(masm);
3553 // We are back the the original state on entry and ready to go.
3555 __ retl();
3556 __ delayed()->nop();
3558 // Pending exception after the safepoint
3560 __ bind(pending);
3562 RegisterSaver::restore_live_registers(masm);
3564 // We are back the the original state on entry.
3566 // Tail-call forward_exception_entry, with the issuing PC in O7,
3567 // so it looks like the original nmethod called forward_exception_entry.
3568 __ set((intptr_t)StubRoutines::forward_exception_entry(), O0);
3569 __ JMP(O0, 0);
3570 __ delayed()->nop();
3572 // -------------
3573 // make sure all code is generated
3574 masm->flush();
3576 // return exception blob
3577 return SafepointBlob::create(&buffer, oop_maps, frame_size_words);
3578 }
3580 //
3581 // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
3582 //
3583 // Generate a stub that calls into vm to find out the proper destination
3584 // of a java call. All the argument registers are live at this point
3585 // but since this is generic code we don't know what they are and the caller
3586 // must do any gc of the args.
3587 //
3588 static RuntimeStub* generate_resolve_blob(address destination, const char* name) {
3589 assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
3591 // allocate space for the code
3592 ResourceMark rm;
3593 // setup code generation tools
3594 // Measured 8/7/03 at 896 in 32bit debug build (no VerifyThread)
3595 // Measured 8/7/03 at 1080 in 32bit debug build (VerifyThread)
3596 // even larger with TraceJumps
3597 int pad = TraceJumps ? 512 : 0;
3598 CodeBuffer buffer(name, 1600 + pad, 512);
3599 MacroAssembler* masm = new MacroAssembler(&buffer);
3600 int frame_size_words;
3601 OopMapSet *oop_maps = new OopMapSet();
3602 OopMap* map = NULL;
3604 int start = __ offset();
3606 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words);
3608 int frame_complete = __ offset();
3610 // setup last_Java_sp (blows G4)
3611 __ set_last_Java_frame(SP, noreg);
3613 // call into the runtime to handle illegal instructions exception
3614 // Do not use call_VM_leaf, because we need to make a GC map at this call site.
3615 __ mov(G2_thread, O0);
3616 __ save_thread(L7_thread_cache);
3617 __ call(destination, relocInfo::runtime_call_type);
3618 __ delayed()->nop();
3620 // O0 contains the address we are going to jump to assuming no exception got installed
3622 // Set an oopmap for the call site.
3623 // We need this not only for callee-saved registers, but also for volatile
3624 // registers that the compiler might be keeping live across a safepoint.
3626 oop_maps->add_gc_map( __ offset() - start, map);
3628 __ restore_thread(L7_thread_cache);
3629 // clear last_Java_sp
3630 __ reset_last_Java_frame();
3632 // Check for exceptions
3633 Label pending;
3635 __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O1);
3636 __ tst(O1);
3637 __ brx(Assembler::notEqual, true, Assembler::pn, pending);
3638 __ delayed()->nop();
3640 // get the returned methodOop
3642 __ get_vm_result(G5_method);
3643 __ stx(G5_method, SP, RegisterSaver::G5_offset()+STACK_BIAS);
3645 // O0 is where we want to jump, overwrite G3 which is saved and scratch
3647 __ stx(O0, SP, RegisterSaver::G3_offset()+STACK_BIAS);
3649 RegisterSaver::restore_live_registers(masm);
3651 // We are back the the original state on entry and ready to go.
3653 __ JMP(G3, 0);
3654 __ delayed()->nop();
3656 // Pending exception after the safepoint
3658 __ bind(pending);
3660 RegisterSaver::restore_live_registers(masm);
3662 // We are back the the original state on entry.
3664 // Tail-call forward_exception_entry, with the issuing PC in O7,
3665 // so it looks like the original nmethod called forward_exception_entry.
3666 __ set((intptr_t)StubRoutines::forward_exception_entry(), O0);
3667 __ JMP(O0, 0);
3668 __ delayed()->nop();
3670 // -------------
3671 // make sure all code is generated
3672 masm->flush();
3674 // return the blob
3675 // frame_size_words or bytes??
3676 return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);
3677 }
3679 void SharedRuntime::generate_stubs() {
3681 _wrong_method_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::handle_wrong_method),
3682 "wrong_method_stub");
3684 _ic_miss_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::handle_wrong_method_ic_miss),
3685 "ic_miss_stub");
3687 _resolve_opt_virtual_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_opt_virtual_call_C),
3688 "resolve_opt_virtual_call");
3690 _resolve_virtual_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_virtual_call_C),
3691 "resolve_virtual_call");
3693 _resolve_static_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_static_call_C),
3694 "resolve_static_call");
3696 _polling_page_safepoint_handler_blob =
3697 generate_handler_blob(CAST_FROM_FN_PTR(address,
3698 SafepointSynchronize::handle_polling_page_exception), false);
3700 _polling_page_return_handler_blob =
3701 generate_handler_blob(CAST_FROM_FN_PTR(address,
3702 SafepointSynchronize::handle_polling_page_exception), true);
3704 generate_deopt_blob();
3706 #ifdef COMPILER2
3707 generate_uncommon_trap_blob();
3708 #endif // COMPILER2
3709 }