Fri, 27 Sep 2019 11:31:13 +0800
#10071 MIPS Port of 8176100: [REDO][REDO] G1 Needs pre barrier on dereference of weak JNI handles
Summary: runtime/jni/CallWithJNIWeak/test.sh runtime/jni/ReturnJNIWeak/test.sh crash
Reviewed-by: aoqi
1 /*
2 * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
26 #include "precompiled.hpp"
27 #include "asm/macroAssembler.hpp"
28 #include "asm/macroAssembler.inline.hpp"
29 #include "code/debugInfoRec.hpp"
30 #include "code/icBuffer.hpp"
31 #include "code/vtableStubs.hpp"
32 #include "interpreter/interpreter.hpp"
33 #include "oops/compiledICHolder.hpp"
34 #include "prims/jvmtiRedefineClassesTrace.hpp"
35 #include "runtime/sharedRuntime.hpp"
36 #include "runtime/vframeArray.hpp"
37 #include "vmreg_mips.inline.hpp"
38 #ifdef COMPILER1
39 #include "c1/c1_Runtime1.hpp"
40 #endif
41 #ifdef COMPILER2
42 #include "opto/runtime.hpp"
43 #endif
45 #include <alloca.h>
47 #define __ masm->
49 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
51 class RegisterSaver {
52 enum { FPU_regs_live = 32 };
53 // Capture info about frame layout
54 enum layout {
55 #define DEF_LAYOUT_OFFS(regname) regname ## _off, regname ## H_off,
56 DEF_LAYOUT_OFFS(for_16_bytes_aligned)
57 DEF_LAYOUT_OFFS(fpr0)
58 DEF_LAYOUT_OFFS(fpr1)
59 DEF_LAYOUT_OFFS(fpr2)
60 DEF_LAYOUT_OFFS(fpr3)
61 DEF_LAYOUT_OFFS(fpr4)
62 DEF_LAYOUT_OFFS(fpr5)
63 DEF_LAYOUT_OFFS(fpr6)
64 DEF_LAYOUT_OFFS(fpr7)
65 DEF_LAYOUT_OFFS(fpr8)
66 DEF_LAYOUT_OFFS(fpr9)
67 DEF_LAYOUT_OFFS(fpr10)
68 DEF_LAYOUT_OFFS(fpr11)
69 DEF_LAYOUT_OFFS(fpr12)
70 DEF_LAYOUT_OFFS(fpr13)
71 DEF_LAYOUT_OFFS(fpr14)
72 DEF_LAYOUT_OFFS(fpr15)
73 DEF_LAYOUT_OFFS(fpr16)
74 DEF_LAYOUT_OFFS(fpr17)
75 DEF_LAYOUT_OFFS(fpr18)
76 DEF_LAYOUT_OFFS(fpr19)
77 DEF_LAYOUT_OFFS(fpr20)
78 DEF_LAYOUT_OFFS(fpr21)
79 DEF_LAYOUT_OFFS(fpr22)
80 DEF_LAYOUT_OFFS(fpr23)
81 DEF_LAYOUT_OFFS(fpr24)
82 DEF_LAYOUT_OFFS(fpr25)
83 DEF_LAYOUT_OFFS(fpr26)
84 DEF_LAYOUT_OFFS(fpr27)
85 DEF_LAYOUT_OFFS(fpr28)
86 DEF_LAYOUT_OFFS(fpr29)
87 DEF_LAYOUT_OFFS(fpr30)
88 DEF_LAYOUT_OFFS(fpr31)
90 DEF_LAYOUT_OFFS(v0)
91 DEF_LAYOUT_OFFS(v1)
92 DEF_LAYOUT_OFFS(a0)
93 DEF_LAYOUT_OFFS(a1)
94 DEF_LAYOUT_OFFS(a2)
95 DEF_LAYOUT_OFFS(a3)
96 DEF_LAYOUT_OFFS(a4)
97 DEF_LAYOUT_OFFS(a5)
98 DEF_LAYOUT_OFFS(a6)
99 DEF_LAYOUT_OFFS(a7)
100 DEF_LAYOUT_OFFS(t0)
101 DEF_LAYOUT_OFFS(t1)
102 DEF_LAYOUT_OFFS(t2)
103 DEF_LAYOUT_OFFS(t3)
104 DEF_LAYOUT_OFFS(s0)
105 DEF_LAYOUT_OFFS(s1)
106 DEF_LAYOUT_OFFS(s2)
107 DEF_LAYOUT_OFFS(s3)
108 DEF_LAYOUT_OFFS(s4)
109 DEF_LAYOUT_OFFS(s5)
110 DEF_LAYOUT_OFFS(s6)
111 DEF_LAYOUT_OFFS(s7)
112 DEF_LAYOUT_OFFS(t8)
113 DEF_LAYOUT_OFFS(t9)
115 DEF_LAYOUT_OFFS(gp)
116 DEF_LAYOUT_OFFS(fp)
117 DEF_LAYOUT_OFFS(return)
118 reg_save_size
119 };
121 public:
123 static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors =false );
124 static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false);
125 static int raOffset(void) { return return_off / 2; }
126 //Rmethod
127 static int methodOffset(void) { return s3_off / 2; }
129 static int v0Offset(void) { return v0_off / 2; }
130 static int v1Offset(void) { return v1_off / 2; }
132 static int fpResultOffset(void) { return fpr0_off / 2; }
134 // During deoptimization only the result register need to be restored
135 // all the other values have already been extracted.
136 static void restore_result_registers(MacroAssembler* masm);
137 };
139 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors ) {
141 // Always make the frame size 16-byte aligned
142 int frame_size_in_bytes = round_to(additional_frame_words*wordSize +
143 reg_save_size*BytesPerInt, 16);
144 // OopMap frame size is in compiler stack slots (jint's) not bytes or words
145 int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
146 // The caller will allocate additional_frame_words
147 int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt;
148 // CodeBlob frame size is in words.
149 int frame_size_in_words = frame_size_in_bytes / wordSize;
150 *total_frame_words = frame_size_in_words;
152 // save registers
154 __ daddiu(SP, SP, - reg_save_size * jintSize);
156 __ sdc1(F0, SP, fpr0_off * jintSize); __ sdc1(F1, SP, fpr1_off * jintSize);
157 __ sdc1(F2, SP, fpr2_off * jintSize); __ sdc1(F3, SP, fpr3_off * jintSize);
158 __ sdc1(F4, SP, fpr4_off * jintSize); __ sdc1(F5, SP, fpr5_off * jintSize);
159 __ sdc1(F6, SP, fpr6_off * jintSize); __ sdc1(F7, SP, fpr7_off * jintSize);
160 __ sdc1(F8, SP, fpr8_off * jintSize); __ sdc1(F9, SP, fpr9_off * jintSize);
161 __ sdc1(F10, SP, fpr10_off * jintSize); __ sdc1(F11, SP, fpr11_off * jintSize);
162 __ sdc1(F12, SP, fpr12_off * jintSize); __ sdc1(F13, SP, fpr13_off * jintSize);
163 __ sdc1(F14, SP, fpr14_off * jintSize); __ sdc1(F15, SP, fpr15_off * jintSize);
164 __ sdc1(F16, SP, fpr16_off * jintSize); __ sdc1(F17, SP, fpr17_off * jintSize);
165 __ sdc1(F18, SP, fpr18_off * jintSize); __ sdc1(F19, SP, fpr19_off * jintSize);
166 __ sdc1(F20, SP, fpr20_off * jintSize); __ sdc1(F21, SP, fpr21_off * jintSize);
167 __ sdc1(F22, SP, fpr22_off * jintSize); __ sdc1(F23, SP, fpr23_off * jintSize);
168 __ sdc1(F24, SP, fpr24_off * jintSize); __ sdc1(F25, SP, fpr25_off * jintSize);
169 __ sdc1(F26, SP, fpr26_off * jintSize); __ sdc1(F27, SP, fpr27_off * jintSize);
170 __ sdc1(F28, SP, fpr28_off * jintSize); __ sdc1(F29, SP, fpr29_off * jintSize);
171 __ sdc1(F30, SP, fpr30_off * jintSize); __ sdc1(F31, SP, fpr31_off * jintSize);
172 __ sd(V0, SP, v0_off * jintSize); __ sd(V1, SP, v1_off * jintSize);
173 __ sd(A0, SP, a0_off * jintSize); __ sd(A1, SP, a1_off * jintSize);
174 __ sd(A2, SP, a2_off * jintSize); __ sd(A3, SP, a3_off * jintSize);
175 __ sd(A4, SP, a4_off * jintSize); __ sd(A5, SP, a5_off * jintSize);
176 __ sd(A6, SP, a6_off * jintSize); __ sd(A7, SP, a7_off * jintSize);
177 __ sd(T0, SP, t0_off * jintSize);
178 __ sd(T1, SP, t1_off * jintSize);
179 __ sd(T2, SP, t2_off * jintSize);
180 __ sd(T3, SP, t3_off * jintSize);
181 __ sd(S0, SP, s0_off * jintSize);
182 __ sd(S1, SP, s1_off * jintSize);
183 __ sd(S2, SP, s2_off * jintSize);
184 __ sd(S3, SP, s3_off * jintSize);
185 __ sd(S4, SP, s4_off * jintSize);
186 __ sd(S5, SP, s5_off * jintSize);
187 __ sd(S6, SP, s6_off * jintSize);
188 __ sd(S7, SP, s7_off * jintSize);
190 __ sd(T8, SP, t8_off * jintSize);
191 __ sd(T9, SP, t9_off * jintSize);
193 __ sd(GP, SP, gp_off * jintSize);
194 __ sd(FP, SP, fp_off * jintSize);
195 __ sd(RA, SP, return_off * jintSize);
196 __ daddi(FP, SP, fp_off * jintSize);
198 OopMapSet *oop_maps = new OopMapSet();
199 //OopMap* map = new OopMap( frame_words, 0 );
200 OopMap* map = new OopMap( frame_size_in_slots, 0 );
203 //#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_words)
204 #define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots)
205 map->set_callee_saved(STACK_OFFSET( v0_off), V0->as_VMReg());
206 map->set_callee_saved(STACK_OFFSET( v1_off), V1->as_VMReg());
207 map->set_callee_saved(STACK_OFFSET( a0_off), A0->as_VMReg());
208 map->set_callee_saved(STACK_OFFSET( a1_off), A1->as_VMReg());
209 map->set_callee_saved(STACK_OFFSET( a2_off), A2->as_VMReg());
210 map->set_callee_saved(STACK_OFFSET( a3_off), A3->as_VMReg());
211 map->set_callee_saved(STACK_OFFSET( a4_off), A4->as_VMReg());
212 map->set_callee_saved(STACK_OFFSET( a5_off), A5->as_VMReg());
213 map->set_callee_saved(STACK_OFFSET( a6_off), A6->as_VMReg());
214 map->set_callee_saved(STACK_OFFSET( a7_off), A7->as_VMReg());
215 map->set_callee_saved(STACK_OFFSET( t0_off), T0->as_VMReg());
216 map->set_callee_saved(STACK_OFFSET( t1_off), T1->as_VMReg());
217 map->set_callee_saved(STACK_OFFSET( t2_off), T2->as_VMReg());
218 map->set_callee_saved(STACK_OFFSET( t3_off), T3->as_VMReg());
219 map->set_callee_saved(STACK_OFFSET( s0_off), S0->as_VMReg());
220 map->set_callee_saved(STACK_OFFSET( s1_off), S1->as_VMReg());
221 map->set_callee_saved(STACK_OFFSET( s2_off), S2->as_VMReg());
222 map->set_callee_saved(STACK_OFFSET( s3_off), S3->as_VMReg());
223 map->set_callee_saved(STACK_OFFSET( s4_off), S4->as_VMReg());
224 map->set_callee_saved(STACK_OFFSET( s5_off), S5->as_VMReg());
225 map->set_callee_saved(STACK_OFFSET( s6_off), S6->as_VMReg());
226 map->set_callee_saved(STACK_OFFSET( s7_off), S7->as_VMReg());
227 map->set_callee_saved(STACK_OFFSET( t8_off), T8->as_VMReg());
228 map->set_callee_saved(STACK_OFFSET( t9_off), T9->as_VMReg());
229 map->set_callee_saved(STACK_OFFSET( gp_off), GP->as_VMReg());
230 map->set_callee_saved(STACK_OFFSET( fp_off), FP->as_VMReg());
231 map->set_callee_saved(STACK_OFFSET( return_off), RA->as_VMReg());
233 map->set_callee_saved(STACK_OFFSET( fpr0_off), F0->as_VMReg());
234 map->set_callee_saved(STACK_OFFSET( fpr1_off), F1->as_VMReg());
235 map->set_callee_saved(STACK_OFFSET( fpr2_off), F2->as_VMReg());
236 map->set_callee_saved(STACK_OFFSET( fpr3_off), F3->as_VMReg());
237 map->set_callee_saved(STACK_OFFSET( fpr4_off), F4->as_VMReg());
238 map->set_callee_saved(STACK_OFFSET( fpr5_off), F5->as_VMReg());
239 map->set_callee_saved(STACK_OFFSET( fpr6_off), F6->as_VMReg());
240 map->set_callee_saved(STACK_OFFSET( fpr7_off), F7->as_VMReg());
241 map->set_callee_saved(STACK_OFFSET( fpr8_off), F8->as_VMReg());
242 map->set_callee_saved(STACK_OFFSET( fpr9_off), F9->as_VMReg());
243 map->set_callee_saved(STACK_OFFSET( fpr10_off), F10->as_VMReg());
244 map->set_callee_saved(STACK_OFFSET( fpr11_off), F11->as_VMReg());
245 map->set_callee_saved(STACK_OFFSET( fpr12_off), F12->as_VMReg());
246 map->set_callee_saved(STACK_OFFSET( fpr13_off), F13->as_VMReg());
247 map->set_callee_saved(STACK_OFFSET( fpr14_off), F14->as_VMReg());
248 map->set_callee_saved(STACK_OFFSET( fpr15_off), F15->as_VMReg());
249 map->set_callee_saved(STACK_OFFSET( fpr16_off), F16->as_VMReg());
250 map->set_callee_saved(STACK_OFFSET( fpr17_off), F17->as_VMReg());
251 map->set_callee_saved(STACK_OFFSET( fpr18_off), F18->as_VMReg());
252 map->set_callee_saved(STACK_OFFSET( fpr19_off), F19->as_VMReg());
253 map->set_callee_saved(STACK_OFFSET( fpr20_off), F20->as_VMReg());
254 map->set_callee_saved(STACK_OFFSET( fpr21_off), F21->as_VMReg());
255 map->set_callee_saved(STACK_OFFSET( fpr22_off), F22->as_VMReg());
256 map->set_callee_saved(STACK_OFFSET( fpr23_off), F23->as_VMReg());
257 map->set_callee_saved(STACK_OFFSET( fpr24_off), F24->as_VMReg());
258 map->set_callee_saved(STACK_OFFSET( fpr25_off), F25->as_VMReg());
259 map->set_callee_saved(STACK_OFFSET( fpr26_off), F26->as_VMReg());
260 map->set_callee_saved(STACK_OFFSET( fpr27_off), F27->as_VMReg());
261 map->set_callee_saved(STACK_OFFSET( fpr28_off), F28->as_VMReg());
262 map->set_callee_saved(STACK_OFFSET( fpr29_off), F29->as_VMReg());
263 map->set_callee_saved(STACK_OFFSET( fpr30_off), F30->as_VMReg());
264 map->set_callee_saved(STACK_OFFSET( fpr31_off), F31->as_VMReg());
266 #undef STACK_OFFSET
267 return map;
268 }
271 // Pop the current frame and restore all the registers that we
272 // saved.
273 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
274 __ ldc1(F0, SP, fpr0_off * jintSize); __ ldc1(F1, SP, fpr1_off * jintSize);
275 __ ldc1(F2, SP, fpr2_off * jintSize); __ ldc1(F3, SP, fpr3_off * jintSize);
276 __ ldc1(F4, SP, fpr4_off * jintSize); __ ldc1(F5, SP, fpr5_off * jintSize);
277 __ ldc1(F6, SP, fpr6_off * jintSize); __ ldc1(F7, SP, fpr7_off * jintSize);
278 __ ldc1(F8, SP, fpr8_off * jintSize); __ ldc1(F9, SP, fpr9_off * jintSize);
279 __ ldc1(F10, SP, fpr10_off * jintSize); __ ldc1(F11, SP, fpr11_off * jintSize);
280 __ ldc1(F12, SP, fpr12_off * jintSize); __ ldc1(F13, SP, fpr13_off * jintSize);
281 __ ldc1(F14, SP, fpr14_off * jintSize); __ ldc1(F15, SP, fpr15_off * jintSize);
282 __ ldc1(F16, SP, fpr16_off * jintSize); __ ldc1(F17, SP, fpr17_off * jintSize);
283 __ ldc1(F18, SP, fpr18_off * jintSize); __ ldc1(F19, SP, fpr19_off * jintSize);
284 __ ldc1(F20, SP, fpr20_off * jintSize); __ ldc1(F21, SP, fpr21_off * jintSize);
285 __ ldc1(F22, SP, fpr22_off * jintSize); __ ldc1(F23, SP, fpr23_off * jintSize);
286 __ ldc1(F24, SP, fpr24_off * jintSize); __ ldc1(F25, SP, fpr25_off * jintSize);
287 __ ldc1(F26, SP, fpr26_off * jintSize); __ ldc1(F27, SP, fpr27_off * jintSize);
288 __ ldc1(F28, SP, fpr28_off * jintSize); __ ldc1(F29, SP, fpr29_off * jintSize);
289 __ ldc1(F30, SP, fpr30_off * jintSize); __ ldc1(F31, SP, fpr31_off * jintSize);
291 __ ld(V0, SP, v0_off * jintSize); __ ld(V1, SP, v1_off * jintSize);
292 __ ld(A0, SP, a0_off * jintSize); __ ld(A1, SP, a1_off * jintSize);
293 __ ld(A2, SP, a2_off * jintSize); __ ld(A3, SP, a3_off * jintSize);
294 __ ld(A4, SP, a4_off * jintSize); __ ld(A5, SP, a5_off * jintSize);
295 __ ld(A6, SP, a6_off * jintSize); __ ld(A7, SP, a7_off * jintSize);
296 __ ld(T0, SP, t0_off * jintSize);
297 __ ld(T1, SP, t1_off * jintSize);
298 __ ld(T2, SP, t2_off * jintSize);
299 __ ld(T3, SP, t3_off * jintSize);
300 __ ld(S0, SP, s0_off * jintSize);
301 __ ld(S1, SP, s1_off * jintSize);
302 __ ld(S2, SP, s2_off * jintSize);
303 __ ld(S3, SP, s3_off * jintSize);
304 __ ld(S4, SP, s4_off * jintSize);
305 __ ld(S5, SP, s5_off * jintSize);
306 __ ld(S6, SP, s6_off * jintSize);
307 __ ld(S7, SP, s7_off * jintSize);
309 __ ld(T8, SP, t8_off * jintSize);
310 __ ld(T9, SP, t9_off * jintSize);
312 __ ld(GP, SP, gp_off * jintSize);
313 __ ld(FP, SP, fp_off * jintSize);
314 __ ld(RA, SP, return_off * jintSize);
316 __ addiu(SP, SP, reg_save_size * jintSize);
317 }
319 // Pop the current frame and restore the registers that might be holding
320 // a result.
321 // FIXME, if the result is float?
322 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
324 // Just restore result register. Only used by deoptimization. By
325 // now any callee save register that needs to be restore to a c2
326 // caller of the deoptee has been extracted into the vframeArray
327 // and will be stuffed into the c2i adapter we create for later
328 // restoration so only result registers need to be restored here.
330 __ ld(V0, SP, v0_off * jintSize);
331 __ ld(V1, SP, v1_off * jintSize);
332 __ addiu(SP, SP, return_off * jintSize);
333 }
335 // Is vector's size (in bytes) bigger than a size saved by default?
336 // 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions.
337 bool SharedRuntime::is_wide_vector(int size) {
338 return size > 16;
339 }
341 // The java_calling_convention describes stack locations as ideal slots on
342 // a frame with no abi restrictions. Since we must observe abi restrictions
343 // (like the placement of the register window) the slots must be biased by
344 // the following value.
346 static int reg2offset_in(VMReg r) {
347 // Account for saved fp and return address
348 // This should really be in_preserve_stack_slots
349 return (r->reg2stack() + 2 * VMRegImpl::slots_per_word) * VMRegImpl::stack_slot_size; // + 2 * VMRegImpl::stack_slot_size);
350 }
352 static int reg2offset_out(VMReg r) {
353 return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
354 }
356 // ---------------------------------------------------------------------------
357 // Read the array of BasicTypes from a signature, and compute where the
358 // arguments should go. Values in the VMRegPair regs array refer to 4-byte
359 // quantities. Values less than SharedInfo::stack0 are registers, those above
360 // refer to 4-byte stack slots. All stack slots are based off of the stack pointer
361 // as framesizes are fixed.
362 // VMRegImpl::stack0 refers to the first slot 0(sp).
363 // and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register
364 // up to RegisterImpl::number_of_registers) are the 32-bit
365 // integer registers.
367 // Pass first five oop/int args in registers T0, A0 - A3.
368 // Pass float/double/long args in stack.
369 // Doubles have precedence, so if you pass a mix of floats and doubles
370 // the doubles will grab the registers before the floats will.
372 // Note: the INPUTS in sig_bt are in units of Java argument words, which are
373 // either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit
374 // units regardless of build.
377 // ---------------------------------------------------------------------------
378 // The compiled Java calling convention.
379 // Pass first five oop/int args in registers T0, A0 - A3.
380 // Pass float/double/long args in stack.
381 // Doubles have precedence, so if you pass a mix of floats and doubles
382 // the doubles will grab the registers before the floats will.
384 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
385 VMRegPair *regs,
386 int total_args_passed,
387 int is_outgoing) {
389 // Create the mapping between argument positions and
390 // registers.
391 //static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = {
392 static const Register INT_ArgReg[Argument::n_register_parameters + 1] = {
393 T0, A0, A1, A2, A3, A4, A5, A6, A7
394 };
395 //static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_j] = {
396 static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = {
397 F12, F13, F14, F15, F16, F17, F18, F19
398 };
401 uint args = 0;
402 uint stk_args = 0; // inc by 2 each time
404 for (int i = 0; i < total_args_passed; i++) {
405 switch (sig_bt[i]) {
406 case T_VOID:
407 // halves of T_LONG or T_DOUBLE
408 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
409 regs[i].set_bad();
410 break;
411 case T_BOOLEAN:
412 case T_CHAR:
413 case T_BYTE:
414 case T_SHORT:
415 case T_INT:
416 if (args < Argument::n_register_parameters) {
417 regs[i].set1(INT_ArgReg[args++]->as_VMReg());
418 } else {
419 regs[i].set1(VMRegImpl::stack2reg(stk_args));
420 stk_args += 2;
421 }
422 break;
423 case T_LONG:
424 assert(sig_bt[i + 1] == T_VOID, "expecting half");
425 // fall through
426 case T_OBJECT:
427 case T_ARRAY:
428 case T_ADDRESS:
429 if (args < Argument::n_register_parameters) {
430 regs[i].set2(INT_ArgReg[args++]->as_VMReg());
431 } else {
432 regs[i].set2(VMRegImpl::stack2reg(stk_args));
433 stk_args += 2;
434 }
435 break;
436 case T_FLOAT:
437 if (args < Argument::n_float_register_parameters) {
438 regs[i].set1(FP_ArgReg[args++]->as_VMReg());
439 } else {
440 regs[i].set1(VMRegImpl::stack2reg(stk_args));
441 stk_args += 2;
442 }
443 break;
444 case T_DOUBLE:
445 assert(sig_bt[i + 1] == T_VOID, "expecting half");
446 if (args < Argument::n_float_register_parameters) {
447 regs[i].set2(FP_ArgReg[args++]->as_VMReg());
448 } else {
449 regs[i].set2(VMRegImpl::stack2reg(stk_args));
450 stk_args += 2;
451 }
452 break;
453 default:
454 ShouldNotReachHere();
455 break;
456 }
457 }
459 return round_to(stk_args, 2);
460 }
462 // Helper class mostly to avoid passing masm everywhere, and handle store
463 // displacement overflow logic for LP64
464 class AdapterGenerator {
465 MacroAssembler *masm;
466 #ifdef _LP64
467 Register Rdisp;
468 void set_Rdisp(Register r) { Rdisp = r; }
469 #endif // _LP64
471 void patch_callers_callsite();
473 // base+st_off points to top of argument
474 int arg_offset(const int st_off) { return st_off; }
475 int next_arg_offset(const int st_off) {
476 return st_off - Interpreter::stackElementSize;
477 }
479 #ifdef _LP64
480 // On _LP64 argument slot values are loaded first into a register
481 // because they might not fit into displacement.
482 Register arg_slot(const int st_off);
483 Register next_arg_slot(const int st_off);
484 #else
485 int arg_slot(const int st_off) { return arg_offset(st_off); }
486 int next_arg_slot(const int st_off) { return next_arg_offset(st_off); }
487 #endif // _LP64
489 // Stores long into offset pointed to by base
490 void store_c2i_long(Register r, Register base,
491 const int st_off, bool is_stack);
492 void store_c2i_object(Register r, Register base,
493 const int st_off);
494 void store_c2i_int(Register r, Register base,
495 const int st_off);
496 void store_c2i_double(VMReg r_2,
497 VMReg r_1, Register base, const int st_off);
498 void store_c2i_float(FloatRegister f, Register base,
499 const int st_off);
501 public:
502 //void tag_stack(const BasicType sig, int st_off);
503 void gen_c2i_adapter(int total_args_passed,
504 // VMReg max_arg,
505 int comp_args_on_stack, // VMRegStackSlots
506 const BasicType *sig_bt,
507 const VMRegPair *regs,
508 Label& skip_fixup);
509 void gen_i2c_adapter(int total_args_passed,
510 // VMReg max_arg,
511 int comp_args_on_stack, // VMRegStackSlots
512 const BasicType *sig_bt,
513 const VMRegPair *regs);
515 AdapterGenerator(MacroAssembler *_masm) : masm(_masm) {}
516 };
519 // Patch the callers callsite with entry to compiled code if it exists.
520 void AdapterGenerator::patch_callers_callsite() {
521 Label L;
522 __ verify_oop(Rmethod);
523 __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset()));
524 __ beq(AT, R0, L);
525 __ delayed()->nop();
526 // Schedule the branch target address early.
527 // Call into the VM to patch the caller, then jump to compiled callee
528 // V0 isn't live so capture return address while we easily can
529 __ move(V0, RA);
531 __ pushad();
532 #ifdef COMPILER2
533 // C2 may leave the stack dirty if not in SSE2+ mode
534 __ empty_FPU_stack();
535 #endif
537 // VM needs caller's callsite
538 // VM needs target method
540 __ move(A0, Rmethod);
541 __ move(A1, V0);
542 // we should preserve the return address
543 __ verify_oop(Rmethod);
544 __ move(S0, SP);
545 __ move(AT, -(StackAlignmentInBytes)); // align the stack
546 __ andr(SP, SP, AT);
547 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite),
548 relocInfo::runtime_call_type);
550 __ delayed()->nop();
551 __ move(SP, S0);
552 __ popad();
553 __ bind(L);
554 }
556 #ifdef _LP64
557 Register AdapterGenerator::arg_slot(const int st_off) {
558 Unimplemented();
559 }
561 Register AdapterGenerator::next_arg_slot(const int st_off){
562 Unimplemented();
563 }
564 #endif // _LP64
566 // Stores long into offset pointed to by base
567 void AdapterGenerator::store_c2i_long(Register r, Register base,
568 const int st_off, bool is_stack) {
569 Unimplemented();
570 }
572 void AdapterGenerator::store_c2i_object(Register r, Register base,
573 const int st_off) {
574 Unimplemented();
575 }
577 void AdapterGenerator::store_c2i_int(Register r, Register base,
578 const int st_off) {
579 Unimplemented();
580 }
582 // Stores into offset pointed to by base
583 void AdapterGenerator::store_c2i_double(VMReg r_2,
584 VMReg r_1, Register base, const int st_off) {
585 Unimplemented();
586 }
588 void AdapterGenerator::store_c2i_float(FloatRegister f, Register base,
589 const int st_off) {
590 Unimplemented();
591 }
593 void AdapterGenerator::gen_c2i_adapter(
594 int total_args_passed,
595 // VMReg max_arg,
596 int comp_args_on_stack, // VMRegStackSlots
597 const BasicType *sig_bt,
598 const VMRegPair *regs,
599 Label& skip_fixup) {
601 // Before we get into the guts of the C2I adapter, see if we should be here
602 // at all. We've come from compiled code and are attempting to jump to the
603 // interpreter, which means the caller made a static call to get here
604 // (vcalls always get a compiled target if there is one). Check for a
605 // compiled target. If there is one, we need to patch the caller's call.
606 // However we will run interpreted if we come thru here. The next pass
607 // thru the call site will run compiled. If we ran compiled here then
608 // we can (theorectically) do endless i2c->c2i->i2c transitions during
609 // deopt/uncommon trap cycles. If we always go interpreted here then
610 // we can have at most one and don't need to play any tricks to keep
611 // from endlessly growing the stack.
612 //
613 // Actually if we detected that we had an i2c->c2i transition here we
614 // ought to be able to reset the world back to the state of the interpreted
615 // call and not bother building another interpreter arg area. We don't
616 // do that at this point.
618 patch_callers_callsite();
620 __ bind(skip_fixup);
622 #ifdef COMPILER2
623 __ empty_FPU_stack();
624 #endif
625 //this is for native ?
626 // Since all args are passed on the stack, total_args_passed * interpreter_
627 // stack_element_size is the
628 // space we need.
629 int extraspace = total_args_passed * Interpreter::stackElementSize;
631 // stack is aligned, keep it that way
632 extraspace = round_to(extraspace, 2*wordSize);
634 // Get return address
635 __ move(V0, RA);
636 // set senderSP value
637 //refer to interpreter_mips.cpp:generate_asm_entry
638 __ move(Rsender, SP);
639 __ addi(SP, SP, -extraspace);
641 // Now write the args into the outgoing interpreter space
642 for (int i = 0; i < total_args_passed; i++) {
643 if (sig_bt[i] == T_VOID) {
644 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
645 continue;
646 }
648 // st_off points to lowest address on stack.
649 int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize;
650 // Say 4 args:
651 // i st_off
652 // 0 12 T_LONG
653 // 1 8 T_VOID
654 // 2 4 T_OBJECT
655 // 3 0 T_BOOL
656 VMReg r_1 = regs[i].first();
657 VMReg r_2 = regs[i].second();
658 if (!r_1->is_valid()) {
659 assert(!r_2->is_valid(), "");
660 continue;
661 }
662 if (r_1->is_stack()) {
663 // memory to memory use fpu stack top
664 int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
665 if (!r_2->is_valid()) {
666 __ ld_ptr(AT, SP, ld_off);
667 __ st_ptr(AT, SP, st_off);
669 } else {
672 int next_off = st_off - Interpreter::stackElementSize;
673 __ ld_ptr(AT, SP, ld_off);
674 __ st_ptr(AT, SP, st_off);
676 // Ref to is_Register condition
677 if(sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE)
678 __ st_ptr(AT, SP, st_off - 8);
679 }
680 } else if (r_1->is_Register()) {
681 Register r = r_1->as_Register();
682 if (!r_2->is_valid()) {
683 __ sd(r, SP, st_off);
684 } else {
685 //FIXME, mips will not enter here
686 // long/double in gpr
687 __ sd(r, SP, st_off);
688 // In [java/util/zip/ZipFile.java]
689 //
690 // private static native long open(String name, int mode, long lastModified);
691 // private static native int getTotal(long jzfile);
692 //
693 // We need to transfer T_LONG paramenters from a compiled method to a native method.
694 // It's a complex process:
695 //
696 // Caller -> lir_static_call -> gen_resolve_stub
697 // -> -- resolve_static_call_C
698 // `- gen_c2i_adapter() [*]
699 // |
700 // `- AdapterHandlerLibrary::get_create_apapter_index
701 // -> generate_native_entry
702 // -> InterpreterRuntime::SignatureHandlerGenerator::pass_long [**]
703 //
704 // In [**], T_Long parameter is stored in stack as:
705 //
706 // (high)
707 // | |
708 // -----------
709 // | 8 bytes |
710 // | (void) |
711 // -----------
712 // | 8 bytes |
713 // | (long) |
714 // -----------
715 // | |
716 // (low)
717 //
718 // However, the sequence is reversed here:
719 //
720 // (high)
721 // | |
722 // -----------
723 // | 8 bytes |
724 // | (long) |
725 // -----------
726 // | 8 bytes |
727 // | (void) |
728 // -----------
729 // | |
730 // (low)
731 //
732 // So I stored another 8 bytes in the T_VOID slot. It then can be accessed from generate_native_entry().
733 //
734 if (sig_bt[i] == T_LONG)
735 __ sd(r, SP, st_off - 8);
736 }
737 } else if (r_1->is_FloatRegister()) {
738 assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register");
740 FloatRegister fr = r_1->as_FloatRegister();
741 if (sig_bt[i] == T_FLOAT)
742 __ swc1(fr, SP, st_off);
743 else {
744 __ sdc1(fr, SP, st_off);
745 __ sdc1(fr, SP, st_off - 8); // T_DOUBLE needs two slots
746 }
747 }
748 }
750 // Schedule the branch target address early.
751 __ ld_ptr(AT, Rmethod, in_bytes(Method::interpreter_entry_offset()) );
752 // And repush original return address
753 __ move(RA, V0);
754 __ jr (AT);
755 __ delayed()->nop();
756 }
758 void AdapterGenerator::gen_i2c_adapter(
759 int total_args_passed,
760 // VMReg max_arg,
761 int comp_args_on_stack, // VMRegStackSlots
762 const BasicType *sig_bt,
763 const VMRegPair *regs) {
765 // Generate an I2C adapter: adjust the I-frame to make space for the C-frame
766 // layout. Lesp was saved by the calling I-frame and will be restored on
767 // return. Meanwhile, outgoing arg space is all owned by the callee
768 // C-frame, so we can mangle it at will. After adjusting the frame size,
769 // hoist register arguments and repack other args according to the compiled
770 // code convention. Finally, end in a jump to the compiled code. The entry
771 // point address is the start of the buffer.
773 // We will only enter here from an interpreted frame and never from after
774 // passing thru a c2i. Azul allowed this but we do not. If we lose the
775 // race and use a c2i we will remain interpreted for the race loser(s).
776 // This removes all sorts of headaches on the mips side and also eliminates
777 // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.
780 __ move(T9, SP);
782 // Cut-out for having no stack args. Since up to 2 int/oop args are passed
783 // in registers, we will occasionally have no stack args.
784 int comp_words_on_stack = 0;
785 if (comp_args_on_stack) {
786 // Sig words on the stack are greater-than VMRegImpl::stack0. Those in
787 // registers are below. By subtracting stack0, we either get a negative
788 // number (all values in registers) or the maximum stack slot accessed.
789 // int comp_args_on_stack = VMRegImpl::reg2stack(max_arg);
790 // Convert 4-byte stack slots to words.
791 // did mips need round? FIXME aoqi
792 comp_words_on_stack = round_to(comp_args_on_stack*4, wordSize)>>LogBytesPerWord;
793 // Round up to miminum stack alignment, in wordSize
794 comp_words_on_stack = round_to(comp_words_on_stack, 2);
795 __ daddi(SP, SP, -comp_words_on_stack * wordSize);
796 }
798 // Align the outgoing SP
799 __ move(AT, -(StackAlignmentInBytes));
800 __ andr(SP, SP, AT);
801 // push the return address on the stack (note that pushing, rather
802 // than storing it, yields the correct frame alignment for the callee)
803 // Put saved SP in another register
804 const Register saved_sp = V0;
805 __ move(saved_sp, T9);
808 // Will jump to the compiled code just as if compiled code was doing it.
809 // Pre-load the register-jump target early, to schedule it better.
810 __ ld(T9, Rmethod, in_bytes(Method::from_compiled_offset()));
812 // Now generate the shuffle code. Pick up all register args and move the
813 // rest through the floating point stack top.
814 for (int i = 0; i < total_args_passed; i++) {
815 if (sig_bt[i] == T_VOID) {
816 // Longs and doubles are passed in native word order, but misaligned
817 // in the 32-bit build.
818 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
819 continue;
820 }
822 // Pick up 0, 1 or 2 words from SP+offset.
824 //FIXME. aoqi. just delete the assert
825 //assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "scrambled load targets?");
826 // Load in argument order going down.
827 int ld_off = (total_args_passed -1 - i)*Interpreter::stackElementSize;
828 // Point to interpreter value (vs. tag)
829 int next_off = ld_off - Interpreter::stackElementSize;
830 VMReg r_1 = regs[i].first();
831 VMReg r_2 = regs[i].second();
832 if (!r_1->is_valid()) {
833 assert(!r_2->is_valid(), "");
834 continue;
835 }
836 if (r_1->is_stack()) {
837 // Convert stack slot to an SP offset (+ wordSize to
838 // account for return address )
839 // NOTICE HERE!!!! I sub a wordSize here
840 int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size;
841 //+ wordSize;
843 if (!r_2->is_valid()) {
844 __ ld(AT, saved_sp, ld_off);
845 __ sd(AT, SP, st_off);
846 } else {
847 // Interpreter local[n] == MSW, local[n+1] == LSW however locals
848 // are accessed as negative so LSW is at LOW address
850 // ld_off is MSW so get LSW
851 // st_off is LSW (i.e. reg.first())
853 // [./org/eclipse/swt/graphics/GC.java]
854 // void drawImageXRender(Image srcImage, int srcX, int srcY, int srcWidth, int srcHeight,
855 // int destX, int destY, int destWidth, int destHeight,
856 // boolean simple,
857 // int imgWidth, int imgHeight,
858 // long maskPixmap, <-- Pass T_LONG in stack
859 // int maskType);
860 // Before this modification, Eclipse displays icons with solid black background.
861 //
862 __ ld(AT, saved_sp, ld_off);
863 if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE)
864 __ ld(AT, saved_sp, ld_off - 8);
865 __ sd(AT, SP, st_off);
866 }
867 } else if (r_1->is_Register()) { // Register argument
868 Register r = r_1->as_Register();
869 if (r_2->is_valid()) {
870 // Remember r_1 is low address (and LSB on mips)
871 // So r_2 gets loaded from high address regardless of the platform
872 assert(r_2->as_Register() == r_1->as_Register(), "");
873 __ ld(r, saved_sp, ld_off);
875 //
876 // For T_LONG type, the real layout is as below:
877 //
878 // (high)
879 // | |
880 // -----------
881 // | 8 bytes |
882 // | (void) |
883 // -----------
884 // | 8 bytes |
885 // | (long) |
886 // -----------
887 // | |
888 // (low)
889 //
890 // We should load the low-8 bytes.
891 //
892 if (sig_bt[i] == T_LONG)
893 __ ld(r, saved_sp, ld_off - 8);
894 } else {
895 __ lw(r, saved_sp, ld_off);
896 }
897 } else if (r_1->is_FloatRegister()) { // Float Register
898 assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register");
900 FloatRegister fr = r_1->as_FloatRegister();
901 if (sig_bt[i] == T_FLOAT)
902 __ lwc1(fr, saved_sp, ld_off);
903 else {
904 __ ldc1(fr, saved_sp, ld_off);
905 __ ldc1(fr, saved_sp, ld_off - 8);
906 }
907 }
908 }
910 // 6243940 We might end up in handle_wrong_method if
911 // the callee is deoptimized as we race thru here. If that
912 // happens we don't want to take a safepoint because the
913 // caller frame will look interpreted and arguments are now
914 // "compiled" so it is much better to make this transition
915 // invisible to the stack walking code. Unfortunately if
916 // we try and find the callee by normal means a safepoint
917 // is possible. So we stash the desired callee in the thread
918 // and the vm will find there should this case occur.
919 __ get_thread(T8);
920 __ sd(Rmethod, T8, in_bytes(JavaThread::callee_target_offset()));
922 // move methodOop to V0 in case we end up in an c2i adapter.
923 // the c2i adapters expect methodOop in V0 (c2) because c2's
924 // resolve stubs return the result (the method) in V0.
925 // I'd love to fix this.
926 __ move(V0, Rmethod);
927 __ jr(T9);
928 __ delayed()->nop();
929 }
931 // ---------------------------------------------------------------
932 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
933 int total_args_passed,
934 // VMReg max_arg,
935 int comp_args_on_stack, // VMRegStackSlots
936 const BasicType *sig_bt,
937 const VMRegPair *regs,
938 AdapterFingerPrint* fingerprint) {
939 address i2c_entry = __ pc();
941 AdapterGenerator agen(masm);
943 agen.gen_i2c_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs);
946 // -------------------------------------------------------------------------
947 // Generate a C2I adapter. On entry we know G5 holds the methodOop. The
948 // args start out packed in the compiled layout. They need to be unpacked
949 // into the interpreter layout. This will almost always require some stack
950 // space. We grow the current (compiled) stack, then repack the args. We
951 // finally end in a jump to the generic interpreter entry point. On exit
952 // from the interpreter, the interpreter will restore our SP (lest the
953 // compiled code, which relys solely on SP and not FP, get sick).
955 address c2i_unverified_entry = __ pc();
956 Label skip_fixup;
957 {
958 Register holder = T1;
959 Register receiver = T0;
960 Register temp = T8;
961 address ic_miss = SharedRuntime::get_ic_miss_stub();
963 Label missed;
965 __ verify_oop(holder);
966 //add for compressedoops
967 __ load_klass(temp, receiver);
968 __ verify_oop(temp);
970 __ ld_ptr(AT, holder, CompiledICHolder::holder_klass_offset());
971 __ ld_ptr(Rmethod, holder, CompiledICHolder::holder_metadata_offset());
972 __ bne(AT, temp, missed);
973 __ delayed()->nop();
974 // Method might have been compiled since the call site was patched to
975 // interpreted if that is the case treat it as a miss so we can get
976 // the call site corrected.
977 __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset()));
978 __ beq(AT, R0, skip_fixup);
979 __ delayed()->nop();
980 __ bind(missed);
982 __ jmp(ic_miss, relocInfo::runtime_call_type);
983 __ delayed()->nop();
984 }
986 address c2i_entry = __ pc();
988 agen.gen_c2i_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
990 __ flush();
991 return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
992 }
994 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
995 VMRegPair *regs,
996 VMRegPair *regs2,
997 int total_args_passed) {
998 assert(regs2 == NULL, "not needed on MIPS");
999 // Return the number of VMReg stack_slots needed for the args.
1000 // This value does not include an abi space (like register window
1001 // save area).
1003 // The native convention is V8 if !LP64
1004 // The LP64 convention is the V9 convention which is slightly more sane.
1006 // We return the amount of VMReg stack slots we need to reserve for all
1007 // the arguments NOT counting out_preserve_stack_slots. Since we always
1008 // have space for storing at least 6 registers to memory we start with that.
1009 // See int_stk_helper for a further discussion.
1010 // We return the amount of VMRegImpl stack slots we need to reserve for all
1011 // the arguments NOT counting out_preserve_stack_slots.
1012 static const Register INT_ArgReg[Argument::n_register_parameters] = {
1013 A0, A1, A2, A3, A4, A5, A6, A7
1014 };
1015 static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = {
1016 F12, F13, F14, F15, F16, F17, F18, F19
1017 };
1018 uint args = 0;
1019 uint stk_args = 0; // inc by 2 each time
1021 // Example:
1022 // n java.lang.UNIXProcess::forkAndExec
1023 // private native int forkAndExec(byte[] prog,
1024 // byte[] argBlock, int argc,
1025 // byte[] envBlock, int envc,
1026 // byte[] dir,
1027 // boolean redirectErrorStream,
1028 // FileDescriptor stdin_fd,
1029 // FileDescriptor stdout_fd,
1030 // FileDescriptor stderr_fd)
1031 // JNIEXPORT jint JNICALL
1032 // Java_java_lang_UNIXProcess_forkAndExec(JNIEnv *env,
1033 // jobject process,
1034 // jbyteArray prog,
1035 // jbyteArray argBlock, jint argc,
1036 // jbyteArray envBlock, jint envc,
1037 // jbyteArray dir,
1038 // jboolean redirectErrorStream,
1039 // jobject stdin_fd,
1040 // jobject stdout_fd,
1041 // jobject stderr_fd)
1042 //
1043 // ::c_calling_convention
1044 // 0: // env <-- a0
1045 // 1: L // klass/obj <-- t0 => a1
1046 // 2: [ // prog[] <-- a0 => a2
1047 // 3: [ // argBlock[] <-- a1 => a3
1048 // 4: I // argc
1049 // 5: [ // envBlock[] <-- a3 => a5
1050 // 6: I // envc
1051 // 7: [ // dir[] <-- a5 => a7
1052 // 8: Z // redirectErrorStream a6 => sp[0]
1053 // 9: L // stdin a7 => sp[8]
1054 // 10: L // stdout fp[16] => sp[16]
1055 // 11: L // stderr fp[24] => sp[24]
1056 //
1057 for (int i = 0; i < total_args_passed; i++) {
1058 switch (sig_bt[i]) {
1059 case T_VOID: // Halves of longs and doubles
1060 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
1061 regs[i].set_bad();
1062 break;
1063 case T_BOOLEAN:
1064 case T_CHAR:
1065 case T_BYTE:
1066 case T_SHORT:
1067 case T_INT:
1068 if (args < Argument::n_register_parameters) {
1069 regs[i].set1(INT_ArgReg[args++]->as_VMReg());
1070 } else {
1071 regs[i].set1(VMRegImpl::stack2reg(stk_args));
1072 stk_args += 2;
1073 }
1074 break;
1075 case T_LONG:
1076 assert(sig_bt[i + 1] == T_VOID, "expecting half");
1077 // fall through
1078 case T_OBJECT:
1079 case T_ARRAY:
1080 case T_ADDRESS:
1081 case T_METADATA:
1082 if (args < Argument::n_register_parameters) {
1083 regs[i].set2(INT_ArgReg[args++]->as_VMReg());
1084 } else {
1085 regs[i].set2(VMRegImpl::stack2reg(stk_args));
1086 stk_args += 2;
1087 }
1088 break;
1089 case T_FLOAT:
1090 if (args < Argument::n_float_register_parameters) {
1091 regs[i].set1(FP_ArgReg[args++]->as_VMReg());
1092 } else {
1093 regs[i].set1(VMRegImpl::stack2reg(stk_args));
1094 stk_args += 2;
1095 }
1096 break;
1097 case T_DOUBLE:
1098 assert(sig_bt[i + 1] == T_VOID, "expecting half");
1099 if (args < Argument::n_float_register_parameters) {
1100 regs[i].set2(FP_ArgReg[args++]->as_VMReg());
1101 } else {
1102 regs[i].set2(VMRegImpl::stack2reg(stk_args));
1103 stk_args += 2;
1104 }
1105 break;
1106 default:
1107 ShouldNotReachHere();
1108 break;
1109 }
1110 }
1112 return round_to(stk_args, 2);
1113 }
1115 // ---------------------------------------------------------------------------
1116 void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1117 // We always ignore the frame_slots arg and just use the space just below frame pointer
1118 // which by this time is free to use
1119 switch (ret_type) {
1120 case T_FLOAT:
1121 __ swc1(FSF, FP, -wordSize);
1122 break;
1123 case T_DOUBLE:
1124 __ sdc1(FSF, FP, -wordSize );
1125 break;
1126 case T_VOID: break;
1127 case T_LONG:
1128 __ sd(V0, FP, -wordSize);
1129 break;
1130 case T_OBJECT:
1131 case T_ARRAY:
1132 __ sd(V0, FP, -wordSize);
1133 break;
1134 default: {
1135 __ sw(V0, FP, -wordSize);
1136 }
1137 }
1138 }
1140 void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1141 // We always ignore the frame_slots arg and just use the space just below frame pointer
1142 // which by this time is free to use
1143 switch (ret_type) {
1144 case T_FLOAT:
1145 __ lwc1(FSF, FP, -wordSize);
1146 break;
1147 case T_DOUBLE:
1148 __ ldc1(FSF, FP, -wordSize );
1149 break;
1150 case T_LONG:
1151 __ ld(V0, FP, -wordSize);
1152 break;
1153 case T_VOID: break;
1154 case T_OBJECT:
1155 case T_ARRAY:
1156 __ ld(V0, FP, -wordSize);
1157 break;
1158 default: {
1159 __ lw(V0, FP, -wordSize);
1160 }
1161 }
1162 }
1164 static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
1165 for ( int i = first_arg ; i < arg_count ; i++ ) {
1166 if (args[i].first()->is_Register()) {
1167 __ push(args[i].first()->as_Register());
1168 } else if (args[i].first()->is_FloatRegister()) {
1169 __ push(args[i].first()->as_FloatRegister());
1170 }
1171 }
1172 }
1174 static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
1175 for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) {
1176 if (args[i].first()->is_Register()) {
1177 __ pop(args[i].first()->as_Register());
1178 } else if (args[i].first()->is_FloatRegister()) {
1179 __ pop(args[i].first()->as_FloatRegister());
1180 }
1181 }
1182 }
1184 // A simple move of integer like type
1185 static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1186 if (src.first()->is_stack()) {
1187 if (dst.first()->is_stack()) {
1188 // stack to stack
1189 __ lw(AT, FP, reg2offset_in(src.first()));
1190 __ sd(AT, SP, reg2offset_out(dst.first()));
1191 } else {
1192 // stack to reg
1193 __ lw(dst.first()->as_Register(), FP, reg2offset_in(src.first()));
1194 }
1195 } else if (dst.first()->is_stack()) {
1196 // reg to stack
1197 __ sd(src.first()->as_Register(), SP, reg2offset_out(dst.first()));
1198 } else {
1199 if (dst.first() != src.first()){
1200 __ move(dst.first()->as_Register(), src.first()->as_Register()); // fujie error:dst.first()
1201 }
1202 }
1203 }
1205 // An oop arg. Must pass a handle not the oop itself
1206 static void object_move(MacroAssembler* masm,
1207 OopMap* map,
1208 int oop_handle_offset,
1209 int framesize_in_slots,
1210 VMRegPair src,
1211 VMRegPair dst,
1212 bool is_receiver,
1213 int* receiver_offset) {
1215 // must pass a handle. First figure out the location we use as a handle
1217 //FIXME, for mips, dst can be register
1218 if (src.first()->is_stack()) {
1219 // Oop is already on the stack as an argument
1220 Register rHandle = V0;
1221 Label nil;
1222 __ xorr(rHandle, rHandle, rHandle);
1223 __ ld(AT, FP, reg2offset_in(src.first()));
1224 __ beq(AT, R0, nil);
1225 __ delayed()->nop();
1226 __ lea(rHandle, Address(FP, reg2offset_in(src.first())));
1227 __ bind(nil);
1228 if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first()));
1229 else __ move( (dst.first())->as_Register(), rHandle);
1230 //if dst is register
1231 //FIXME, do mips need out preserve stack slots?
1232 int offset_in_older_frame = src.first()->reg2stack()
1233 + SharedRuntime::out_preserve_stack_slots();
1234 map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
1235 if (is_receiver) {
1236 *receiver_offset = (offset_in_older_frame
1237 + framesize_in_slots) * VMRegImpl::stack_slot_size;
1238 }
1239 } else {
1240 // Oop is in an a register we must store it to the space we reserve
1241 // on the stack for oop_handles
1242 const Register rOop = src.first()->as_Register();
1243 assert( (rOop->encoding() >= A0->encoding()) && (rOop->encoding() <= T0->encoding()),"wrong register");
1244 const Register rHandle = V0;
1245 //Important: refer to java_calling_convertion
1246 int oop_slot = (rOop->encoding() - A0->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset;
1247 int offset = oop_slot*VMRegImpl::stack_slot_size;
1248 Label skip;
1249 __ sd( rOop , SP, offset );
1250 map->set_oop(VMRegImpl::stack2reg(oop_slot));
1251 __ xorr( rHandle, rHandle, rHandle);
1252 __ beq(rOop, R0, skip);
1253 __ delayed()->nop();
1254 __ lea(rHandle, Address(SP, offset));
1255 __ bind(skip);
1256 // Store the handle parameter
1257 if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first()));
1258 else __ move((dst.first())->as_Register(), rHandle);
1259 //if dst is register
1261 if (is_receiver) {
1262 *receiver_offset = offset;
1263 }
1264 }
1265 }
1267 // A float arg may have to do float reg int reg conversion
1268 static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1269 assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move");
1271 if (src.first()->is_stack()) {
1272 if (dst.first()->is_stack()) {
1273 __ lwc1(F12, FP, reg2offset_in(src.first()));
1274 __ swc1(F12, SP, reg2offset_out(dst.first()));
1275 }
1276 else
1277 __ lwc1(dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first()));
1278 } else {
1279 // reg to stack
1280 if(dst.first()->is_stack())
1281 __ swc1(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first()));
1282 else
1283 __ mov_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
1284 }
1285 }
1287 // A long move
1288 static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1290 // The only legal possibility for a long_move VMRegPair is:
1291 // 1: two stack slots (possibly unaligned)
1292 // as neither the java or C calling convention will use registers
1293 // for longs.
1295 if (src.first()->is_stack()) {
1296 assert(src.second()->is_stack() && dst.second()->is_stack(), "must be all stack");
1297 if( dst.first()->is_stack()){
1298 __ ld(AT, FP, reg2offset_in(src.first()));
1299 __ sd(AT, SP, reg2offset_out(dst.first()));
1300 } else {
1301 __ ld( (dst.first())->as_Register() , FP, reg2offset_in(src.first()));
1302 }
1303 } else {
1304 if( dst.first()->is_stack()){
1305 __ sd( (src.first())->as_Register(), SP, reg2offset_out(dst.first()));
1306 } else {
1307 __ move( (dst.first())->as_Register() , (src.first())->as_Register());
1308 }
1309 }
1310 }
1312 // A double move
1313 static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1315 // The only legal possibilities for a double_move VMRegPair are:
1316 // The painful thing here is that like long_move a VMRegPair might be
1318 // Because of the calling convention we know that src is either
1319 // 1: a single physical register (xmm registers only)
1320 // 2: two stack slots (possibly unaligned)
1321 // dst can only be a pair of stack slots.
1324 if (src.first()->is_stack()) {
1325 // source is all stack
1326 if( dst.first()->is_stack()){
1327 __ ldc1(F12, FP, reg2offset_in(src.first()));
1329 __ sdc1(F12, SP, reg2offset_out(dst.first()));
1330 } else {
1331 __ ldc1( (dst.first())->as_FloatRegister(), FP, reg2offset_in(src.first()));
1332 }
1334 } else {
1335 // reg to stack
1336 // No worries about stack alignment
1337 if( dst.first()->is_stack()){
1338 __ sdc1(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first()));
1339 }
1340 else
1341 __ mov_d( dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
1343 }
1344 }
1346 static void verify_oop_args(MacroAssembler* masm,
1347 methodHandle method,
1348 const BasicType* sig_bt,
1349 const VMRegPair* regs) {
1350 Register temp_reg = T9; // not part of any compiled calling seq
1351 if (VerifyOops) {
1352 for (int i = 0; i < method->size_of_parameters(); i++) {
1353 if (sig_bt[i] == T_OBJECT ||
1354 sig_bt[i] == T_ARRAY) {
1355 VMReg r = regs[i].first();
1356 assert(r->is_valid(), "bad oop arg");
1357 if (r->is_stack()) {
1358 __ ld(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
1359 __ verify_oop(temp_reg);
1360 } else {
1361 __ verify_oop(r->as_Register());
1362 }
1363 }
1364 }
1365 }
1366 }
1368 static void gen_special_dispatch(MacroAssembler* masm,
1369 methodHandle method,
1370 const BasicType* sig_bt,
1371 const VMRegPair* regs) {
1372 verify_oop_args(masm, method, sig_bt, regs);
1373 vmIntrinsics::ID iid = method->intrinsic_id();
1375 // Now write the args into the outgoing interpreter space
1376 bool has_receiver = false;
1377 Register receiver_reg = noreg;
1378 int member_arg_pos = -1;
1379 Register member_reg = noreg;
1380 int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
1381 if (ref_kind != 0) {
1382 member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument
1383 member_reg = S3; // known to be free at this point
1384 has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
1385 } else if (iid == vmIntrinsics::_invokeBasic) {
1386 has_receiver = true;
1387 } else {
1388 fatal(err_msg_res("unexpected intrinsic id %d", iid));
1389 }
1391 if (member_reg != noreg) {
1392 // Load the member_arg into register, if necessary.
1393 SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
1394 VMReg r = regs[member_arg_pos].first();
1395 if (r->is_stack()) {
1396 __ ld(member_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
1397 } else {
1398 // no data motion is needed
1399 member_reg = r->as_Register();
1400 }
1401 }
1403 if (has_receiver) {
1404 // Make sure the receiver is loaded into a register.
1405 assert(method->size_of_parameters() > 0, "oob");
1406 assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
1407 VMReg r = regs[0].first();
1408 assert(r->is_valid(), "bad receiver arg");
1409 if (r->is_stack()) {
1410 // Porting note: This assumes that compiled calling conventions always
1411 // pass the receiver oop in a register. If this is not true on some
1412 // platform, pick a temp and load the receiver from stack.
1413 fatal("receiver always in a register");
1414 receiver_reg = SSR; // known to be free at this point
1415 __ ld(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
1416 } else {
1417 // no data motion is needed
1418 receiver_reg = r->as_Register();
1419 }
1420 }
1422 // Figure out which address we are really jumping to:
1423 MethodHandles::generate_method_handle_dispatch(masm, iid,
1424 receiver_reg, member_reg, /*for_compiler_entry:*/ true);
1425 }
1427 // ---------------------------------------------------------------------------
1428 // Generate a native wrapper for a given method. The method takes arguments
1429 // in the Java compiled code convention, marshals them to the native
1430 // convention (handlizes oops, etc), transitions to native, makes the call,
1431 // returns to java state (possibly blocking), unhandlizes any result and
1432 // returns.
1433 nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
1434 methodHandle method,
1435 int compile_id,
1436 BasicType* in_sig_bt,
1437 VMRegPair* in_regs,
1438 BasicType ret_type) {
1439 if (method->is_method_handle_intrinsic()) {
1440 vmIntrinsics::ID iid = method->intrinsic_id();
1441 intptr_t start = (intptr_t)__ pc();
1442 int vep_offset = ((intptr_t)__ pc()) - start;
1443 gen_special_dispatch(masm,
1444 method,
1445 in_sig_bt,
1446 in_regs);
1447 int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period
1448 __ flush();
1449 int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually
1450 return nmethod::new_native_nmethod(method,
1451 compile_id,
1452 masm->code(),
1453 vep_offset,
1454 frame_complete,
1455 stack_slots / VMRegImpl::slots_per_word,
1456 in_ByteSize(-1),
1457 in_ByteSize(-1),
1458 (OopMapSet*)NULL);
1459 }
1460 bool is_critical_native = true;
1461 address native_func = method->critical_native_function();
1462 if (native_func == NULL) {
1463 native_func = method->native_function();
1464 is_critical_native = false;
1465 }
1466 assert(native_func != NULL, "must have function");
1468 // Native nmethod wrappers never take possesion of the oop arguments.
1469 // So the caller will gc the arguments. The only thing we need an
1470 // oopMap for is if the call is static
1471 //
1472 // An OopMap for lock (and class if static), and one for the VM call itself
1473 OopMapSet *oop_maps = new OopMapSet();
1475 // We have received a description of where all the java arg are located
1476 // on entry to the wrapper. We need to convert these args to where
1477 // the jni function will expect them. To figure out where they go
1478 // we convert the java signature to a C signature by inserting
1479 // the hidden arguments as arg[0] and possibly arg[1] (static method)
1481 const int total_in_args = method->size_of_parameters();
1482 int total_c_args = total_in_args;
1483 if (!is_critical_native) {
1484 total_c_args += 1;
1485 if (method->is_static()) {
1486 total_c_args++;
1487 }
1488 } else {
1489 for (int i = 0; i < total_in_args; i++) {
1490 if (in_sig_bt[i] == T_ARRAY) {
1491 total_c_args++;
1492 }
1493 }
1494 }
1496 BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
1497 VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
1498 BasicType* in_elem_bt = NULL;
1500 int argc = 0;
1501 if (!is_critical_native) {
1502 out_sig_bt[argc++] = T_ADDRESS;
1503 if (method->is_static()) {
1504 out_sig_bt[argc++] = T_OBJECT;
1505 }
1507 for (int i = 0; i < total_in_args ; i++ ) {
1508 out_sig_bt[argc++] = in_sig_bt[i];
1509 }
1510 } else {
1511 Thread* THREAD = Thread::current();
1512 in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
1513 SignatureStream ss(method->signature());
1514 for (int i = 0; i < total_in_args ; i++ ) {
1515 if (in_sig_bt[i] == T_ARRAY) {
1516 // Arrays are passed as int, elem* pair
1517 out_sig_bt[argc++] = T_INT;
1518 out_sig_bt[argc++] = T_ADDRESS;
1519 Symbol* atype = ss.as_symbol(CHECK_NULL);
1520 const char* at = atype->as_C_string();
1521 if (strlen(at) == 2) {
1522 assert(at[0] == '[', "must be");
1523 switch (at[1]) {
1524 case 'B': in_elem_bt[i] = T_BYTE; break;
1525 case 'C': in_elem_bt[i] = T_CHAR; break;
1526 case 'D': in_elem_bt[i] = T_DOUBLE; break;
1527 case 'F': in_elem_bt[i] = T_FLOAT; break;
1528 case 'I': in_elem_bt[i] = T_INT; break;
1529 case 'J': in_elem_bt[i] = T_LONG; break;
1530 case 'S': in_elem_bt[i] = T_SHORT; break;
1531 case 'Z': in_elem_bt[i] = T_BOOLEAN; break;
1532 default: ShouldNotReachHere();
1533 }
1534 }
1535 } else {
1536 out_sig_bt[argc++] = in_sig_bt[i];
1537 in_elem_bt[i] = T_VOID;
1538 }
1539 if (in_sig_bt[i] != T_VOID) {
1540 assert(in_sig_bt[i] == ss.type(), "must match");
1541 ss.next();
1542 }
1543 }
1544 }
1546 // Now figure out where the args must be stored and how much stack space
1547 // they require (neglecting out_preserve_stack_slots but space for storing
1548 // the 1st six register arguments). It's weird see int_stk_helper.
1549 //
1550 int out_arg_slots;
1551 out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
1553 // Compute framesize for the wrapper. We need to handlize all oops in
1554 // registers. We must create space for them here that is disjoint from
1555 // the windowed save area because we have no control over when we might
1556 // flush the window again and overwrite values that gc has since modified.
1557 // (The live window race)
1558 //
1559 // We always just allocate 6 word for storing down these object. This allow
1560 // us to simply record the base and use the Ireg number to decide which
1561 // slot to use. (Note that the reg number is the inbound number not the
1562 // outbound number).
1563 // We must shuffle args to match the native convention, and include var-args space.
1565 // Calculate the total number of stack slots we will need.
1567 // First count the abi requirement plus all of the outgoing args
1568 int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
1570 // Now the space for the inbound oop handle area
1571 int total_save_slots = 9 * VMRegImpl::slots_per_word; // 9 arguments passed in registers
1572 if (is_critical_native) {
1573 // Critical natives may have to call out so they need a save area
1574 // for register arguments.
1575 int double_slots = 0;
1576 int single_slots = 0;
1577 for ( int i = 0; i < total_in_args; i++) {
1578 if (in_regs[i].first()->is_Register()) {
1579 const Register reg = in_regs[i].first()->as_Register();
1580 switch (in_sig_bt[i]) {
1581 case T_BOOLEAN:
1582 case T_BYTE:
1583 case T_SHORT:
1584 case T_CHAR:
1585 case T_INT: single_slots++; break;
1586 case T_ARRAY: // specific to LP64 (7145024)
1587 case T_LONG: double_slots++; break;
1588 default: ShouldNotReachHere();
1589 }
1590 } else if (in_regs[i].first()->is_FloatRegister()) {
1591 switch (in_sig_bt[i]) {
1592 case T_FLOAT: single_slots++; break;
1593 case T_DOUBLE: double_slots++; break;
1594 default: ShouldNotReachHere();
1595 }
1596 }
1597 }
1598 total_save_slots = double_slots * 2 + single_slots;
1599 // align the save area
1600 if (double_slots != 0) {
1601 stack_slots = round_to(stack_slots, 2);
1602 }
1603 }
1605 int oop_handle_offset = stack_slots;
1606 stack_slots += total_save_slots;
1608 // Now any space we need for handlizing a klass if static method
1610 int klass_slot_offset = 0;
1611 int klass_offset = -1;
1612 int lock_slot_offset = 0;
1613 bool is_static = false;
1615 if (method->is_static()) {
1616 klass_slot_offset = stack_slots;
1617 stack_slots += VMRegImpl::slots_per_word;
1618 klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
1619 is_static = true;
1620 }
1622 // Plus a lock if needed
1624 if (method->is_synchronized()) {
1625 lock_slot_offset = stack_slots;
1626 stack_slots += VMRegImpl::slots_per_word;
1627 }
1629 // Now a place to save return value or as a temporary for any gpr -> fpr moves
1630 // + 2 for return address (which we own) and saved fp
1631 stack_slots += 2 + 9 * VMRegImpl::slots_per_word; // (T0, A0, A1, A2, A3, A4, A5, A6, A7)
1633 // Ok The space we have allocated will look like:
1634 //
1635 //
1636 // FP-> | |
1637 // |---------------------|
1638 // | 2 slots for moves |
1639 // |---------------------|
1640 // | lock box (if sync) |
1641 // |---------------------| <- lock_slot_offset
1642 // | klass (if static) |
1643 // |---------------------| <- klass_slot_offset
1644 // | oopHandle area |
1645 // |---------------------| <- oop_handle_offset
1646 // | outbound memory |
1647 // | based arguments |
1648 // | |
1649 // |---------------------|
1650 // | vararg area |
1651 // |---------------------|
1652 // | |
1653 // SP-> | out_preserved_slots |
1654 //
1655 //
1658 // Now compute actual number of stack words we need rounding to make
1659 // stack properly aligned.
1660 stack_slots = round_to(stack_slots, StackAlignmentInSlots);
1662 int stack_size = stack_slots * VMRegImpl::stack_slot_size;
1664 intptr_t start = (intptr_t)__ pc();
1668 // First thing make an ic check to see if we should even be here
1669 address ic_miss = SharedRuntime::get_ic_miss_stub();
1671 // We are free to use all registers as temps without saving them and
1672 // restoring them except fp. fp is the only callee save register
1673 // as far as the interpreter and the compiler(s) are concerned.
1675 //refer to register_mips.hpp:IC_Klass
1676 const Register ic_reg = T1;
1677 const Register receiver = T0;
1679 Label hit;
1680 Label exception_pending;
1682 __ verify_oop(receiver);
1683 //add for compressedoops
1684 __ load_klass(T9, receiver);
1685 __ beq(T9, ic_reg, hit);
1686 __ delayed()->nop();
1687 __ jmp(ic_miss, relocInfo::runtime_call_type);
1688 __ delayed()->nop();
1689 // verified entry must be aligned for code patching.
1690 // and the first 5 bytes must be in the same cache line
1691 // if we align at 8 then we will be sure 5 bytes are in the same line
1692 __ align(8);
1694 __ bind(hit);
1697 int vep_offset = ((intptr_t)__ pc()) - start;
1698 #ifdef COMPILER1
1699 if (InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) {
1700 // Object.hashCode can pull the hashCode from the header word
1701 // instead of doing a full VM transition once it's been computed.
1702 // Since hashCode is usually polymorphic at call sites we can't do
1703 // this optimization at the call site without a lot of work.
1704 Label slowCase;
1705 Register receiver = T0;
1706 Register result = V0;
1707 __ ld ( result, receiver, oopDesc::mark_offset_in_bytes());
1708 // check if locked
1709 __ andi(AT, result, markOopDesc::unlocked_value);
1710 __ beq(AT, R0, slowCase);
1711 __ delayed()->nop();
1712 if (UseBiasedLocking) {
1713 // Check if biased and fall through to runtime if so
1714 __ andi (AT, result, markOopDesc::biased_lock_bit_in_place);
1715 __ bne(AT, R0, slowCase);
1716 __ delayed()->nop();
1717 }
1718 // get hash
1719 __ li(AT, markOopDesc::hash_mask_in_place);
1720 __ andr (AT, result, AT);
1721 // test if hashCode exists
1722 __ beq (AT, R0, slowCase);
1723 __ delayed()->nop();
1724 __ shr(result, markOopDesc::hash_shift);
1725 __ jr(RA);
1726 __ delayed()->nop();
1727 __ bind (slowCase);
1728 }
1729 #endif // COMPILER1
1731 // The instruction at the verified entry point must be 5 bytes or longer
1732 // because it can be patched on the fly by make_non_entrant. The stack bang
1733 // instruction fits that requirement.
1735 // Generate stack overflow check
1737 if (UseStackBanging) {
1738 __ bang_stack_with_offset(StackShadowPages*os::vm_page_size());
1739 } else {
1740 // need a 5 byte instruction to allow MT safe patching to non-entrant
1741 __ nop();
1742 __ nop();
1743 __ nop();
1744 __ nop();
1745 __ nop();
1746 }
1747 // Generate a new frame for the wrapper.
1748 // do mips need this ?
1749 #ifndef OPT_THREAD
1750 __ get_thread(TREG);
1751 #endif
1752 __ st_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset()));
1753 __ move(AT, -(StackAlignmentInBytes));
1754 __ andr(SP, SP, AT);
1756 __ enter();
1757 // -2 because return address is already present and so is saved fp
1758 __ addiu(SP, SP, -1 * (stack_size - 2*wordSize));
1760 // Frame is now completed as far a size and linkage.
1762 int frame_complete = ((intptr_t)__ pc()) - start;
1764 // Calculate the difference between sp and fp. We need to know it
1765 // after the native call because on windows Java Natives will pop
1766 // the arguments and it is painful to do sp relative addressing
1767 // in a platform independent way. So after the call we switch to
1768 // fp relative addressing.
1769 //FIXME actually , the fp_adjustment may not be the right, because andr(sp, sp, at) may change
1770 //the SP
1771 int fp_adjustment = stack_size - 2*wordSize;
1773 #ifdef COMPILER2
1774 // C2 may leave the stack dirty if not in SSE2+ mode
1775 __ empty_FPU_stack();
1776 #endif
1778 // Compute the fp offset for any slots used after the jni call
1780 int lock_slot_fp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment;
1781 // We use TREG as a thread pointer because it is callee save and
1782 // if we load it once it is usable thru the entire wrapper
1783 const Register thread = TREG;
1785 // We use S4 as the oop handle for the receiver/klass
1786 // It is callee save so it survives the call to native
1788 const Register oop_handle_reg = S4;
1789 if (is_critical_native) {
1790 __ stop("generate_native_wrapper in sharedRuntime <2>");
1791 //TODO:Fu
1792 // check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args,
1793 // oop_handle_offset, oop_maps, in_regs, in_sig_bt);
1794 }
1796 #ifndef OPT_THREAD
1797 __ get_thread(thread);
1798 #endif
1800 //
1801 // We immediately shuffle the arguments so that any vm call we have to
1802 // make from here on out (sync slow path, jvmpi, etc.) we will have
1803 // captured the oops from our caller and have a valid oopMap for
1804 // them.
1806 // -----------------
1807 // The Grand Shuffle
1808 //
1809 // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
1810 // and, if static, the class mirror instead of a receiver. This pretty much
1811 // guarantees that register layout will not match (and mips doesn't use reg
1812 // parms though amd does). Since the native abi doesn't use register args
1813 // and the java conventions does we don't have to worry about collisions.
1814 // All of our moved are reg->stack or stack->stack.
1815 // We ignore the extra arguments during the shuffle and handle them at the
1816 // last moment. The shuffle is described by the two calling convention
1817 // vectors we have in our possession. We simply walk the java vector to
1818 // get the source locations and the c vector to get the destinations.
1820 int c_arg = method->is_static() ? 2 : 1 ;
1822 // Record sp-based slot for receiver on stack for non-static methods
1823 int receiver_offset = -1;
1825 // This is a trick. We double the stack slots so we can claim
1826 // the oops in the caller's frame. Since we are sure to have
1827 // more args than the caller doubling is enough to make
1828 // sure we can capture all the incoming oop args from the
1829 // caller.
1830 //
1831 OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1833 // Mark location of fp (someday)
1834 // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(fp));
1836 #ifdef ASSERT
1837 bool reg_destroyed[RegisterImpl::number_of_registers];
1838 bool freg_destroyed[FloatRegisterImpl::number_of_registers];
1839 for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
1840 reg_destroyed[r] = false;
1841 }
1842 for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
1843 freg_destroyed[f] = false;
1844 }
1846 #endif /* ASSERT */
1848 // This may iterate in two different directions depending on the
1849 // kind of native it is. The reason is that for regular JNI natives
1850 // the incoming and outgoing registers are offset upwards and for
1851 // critical natives they are offset down.
1852 GrowableArray<int> arg_order(2 * total_in_args);
1853 VMRegPair tmp_vmreg;
1854 tmp_vmreg.set1(T8->as_VMReg());
1856 if (!is_critical_native) {
1857 for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) {
1858 arg_order.push(i);
1859 arg_order.push(c_arg);
1860 }
1861 } else {
1862 // Compute a valid move order, using tmp_vmreg to break any cycles
1863 __ stop("generate_native_wrapper in sharedRuntime <2>");
1864 //TODO:Fu
1865 // ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg);
1866 }
1868 int temploc = -1;
1869 for (int ai = 0; ai < arg_order.length(); ai += 2) {
1870 int i = arg_order.at(ai);
1871 int c_arg = arg_order.at(ai + 1);
1872 __ block_comment(err_msg("move %d -> %d", i, c_arg));
1873 if (c_arg == -1) {
1874 assert(is_critical_native, "should only be required for critical natives");
1875 // This arg needs to be moved to a temporary
1876 __ move(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register());
1877 in_regs[i] = tmp_vmreg;
1878 temploc = i;
1879 continue;
1880 } else if (i == -1) {
1881 assert(is_critical_native, "should only be required for critical natives");
1882 // Read from the temporary location
1883 assert(temploc != -1, "must be valid");
1884 i = temploc;
1885 temploc = -1;
1886 }
1887 #ifdef ASSERT
1888 if (in_regs[i].first()->is_Register()) {
1889 assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
1890 } else if (in_regs[i].first()->is_FloatRegister()) {
1891 assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!");
1892 }
1893 if (out_regs[c_arg].first()->is_Register()) {
1894 reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
1895 } else if (out_regs[c_arg].first()->is_FloatRegister()) {
1896 freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
1897 }
1898 #endif /* ASSERT */
1899 switch (in_sig_bt[i]) {
1900 case T_ARRAY:
1901 if (is_critical_native) {
1902 __ stop("generate_native_wrapper in sharedRuntime <2>");
1903 //TODO:Fu
1904 // unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);
1905 c_arg++;
1906 #ifdef ASSERT
1907 if (out_regs[c_arg].first()->is_Register()) {
1908 reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
1909 } else if (out_regs[c_arg].first()->is_FloatRegister()) {
1910 freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
1911 }
1912 #endif
1913 break;
1914 }
1915 case T_OBJECT:
1916 assert(!is_critical_native, "no oop arguments");
1917 object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
1918 ((i == 0) && (!is_static)),
1919 &receiver_offset);
1920 break;
1921 case T_VOID:
1922 break;
1924 case T_FLOAT:
1925 float_move(masm, in_regs[i], out_regs[c_arg]);
1926 break;
1928 case T_DOUBLE:
1929 assert( i + 1 < total_in_args &&
1930 in_sig_bt[i + 1] == T_VOID &&
1931 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
1932 double_move(masm, in_regs[i], out_regs[c_arg]);
1933 break;
1935 case T_LONG :
1936 long_move(masm, in_regs[i], out_regs[c_arg]);
1937 break;
1939 case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
1941 default:
1942 simple_move32(masm, in_regs[i], out_regs[c_arg]);
1943 }
1944 }
1946 // point c_arg at the first arg that is already loaded in case we
1947 // need to spill before we call out
1948 c_arg = total_c_args - total_in_args;
1949 // Pre-load a static method's oop. Used both by locking code and
1950 // the normal JNI call code.
1952 __ move(oop_handle_reg, A1);
1954 if (method->is_static() && !is_critical_native) {
1956 // load opp into a register
1957 int oop_index = __ oop_recorder()->find_index(JNIHandles::make_local(
1958 (method->method_holder())->java_mirror()));
1961 RelocationHolder rspec = oop_Relocation::spec(oop_index);
1962 __ relocate(rspec);
1963 __ patchable_set48(oop_handle_reg, (long)JNIHandles::make_local((method->method_holder())->java_mirror()));
1964 // Now handlize the static class mirror it's known not-null.
1965 __ sd( oop_handle_reg, SP, klass_offset);
1966 map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
1968 // Now get the handle
1969 __ lea(oop_handle_reg, Address(SP, klass_offset));
1970 // store the klass handle as second argument
1971 __ move(A1, oop_handle_reg);
1972 // and protect the arg if we must spill
1973 c_arg--;
1974 }
1976 // Change state to native (we save the return address in the thread, since it might not
1977 // be pushed on the stack when we do a a stack traversal). It is enough that the pc()
1978 // points into the right code segment. It does not have to be the correct return pc.
1979 // We use the same pc/oopMap repeatedly when we call out
1981 intptr_t the_pc = (intptr_t) __ pc();
1982 oop_maps->add_gc_map(the_pc - start, map);
1984 __ set_last_Java_frame(SP, noreg, NULL);
1985 __ relocate(relocInfo::internal_pc_type);
1986 {
1987 intptr_t save_pc = (intptr_t)the_pc ;
1988 __ patchable_set48(AT, save_pc);
1989 }
1990 __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
1993 // We have all of the arguments setup at this point. We must not touch any register
1994 // argument registers at this point (what if we save/restore them there are no oop?
1995 {
1996 SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
1997 int metadata_index = __ oop_recorder()->find_index(method());
1998 RelocationHolder rspec = metadata_Relocation::spec(metadata_index);
1999 __ relocate(rspec);
2000 __ patchable_set48(AT, (long)(method()));
2002 __ call_VM_leaf(
2003 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
2004 thread, AT);
2006 }
2008 // These are register definitions we need for locking/unlocking
2009 const Register swap_reg = T8; // Must use T8 for cmpxchg instruction
2010 const Register obj_reg = T9; // Will contain the oop
2011 //const Register lock_reg = T6; // Address of compiler lock object (BasicLock)
2012 const Register lock_reg = c_rarg0; // Address of compiler lock object (BasicLock)
2016 Label slow_path_lock;
2017 Label lock_done;
2019 // Lock a synchronized method
2020 if (method->is_synchronized()) {
2021 assert(!is_critical_native, "unhandled");
2023 const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
2025 // Get the handle (the 2nd argument)
2026 __ move(oop_handle_reg, A1);
2028 // Get address of the box
2029 __ lea(lock_reg, Address(FP, lock_slot_fp_offset));
2031 // Load the oop from the handle
2032 __ ld(obj_reg, oop_handle_reg, 0);
2034 if (UseBiasedLocking) {
2035 // Note that oop_handle_reg is trashed during this call
2036 __ biased_locking_enter(lock_reg, obj_reg, swap_reg, A1, false, lock_done, &slow_path_lock);
2037 }
2039 // Load immediate 1 into swap_reg %T8
2040 __ move(swap_reg, 1);
2042 __ ld(AT, obj_reg, 0);
2043 __ orr(swap_reg, swap_reg, AT);
2045 __ sd( swap_reg, lock_reg, mark_word_offset);
2046 __ cmpxchg(lock_reg, Address(obj_reg, 0), swap_reg);
2047 __ bne(AT, R0, lock_done);
2048 __ delayed()->nop();
2049 // Test if the oopMark is an obvious stack pointer, i.e.,
2050 // 1) (mark & 3) == 0, and
2051 // 2) sp <= mark < mark + os::pagesize()
2052 // These 3 tests can be done by evaluating the following
2053 // expression: ((mark - sp) & (3 - os::vm_page_size())),
2054 // assuming both stack pointer and pagesize have their
2055 // least significant 2 bits clear.
2056 // NOTE: the oopMark is in swap_reg %T8 as the result of cmpxchg
2058 __ dsub(swap_reg, swap_reg, SP);
2059 __ move(AT, 3 - os::vm_page_size());
2060 __ andr(swap_reg , swap_reg, AT);
2061 // Save the test result, for recursive case, the result is zero
2062 __ sd(swap_reg, lock_reg, mark_word_offset);
2063 __ bne(swap_reg, R0, slow_path_lock);
2064 __ delayed()->nop();
2065 // Slow path will re-enter here
2066 __ bind(lock_done);
2068 if (UseBiasedLocking) {
2069 // Re-fetch oop_handle_reg as we trashed it above
2070 __ move(A1, oop_handle_reg);
2071 }
2072 }
2075 // Finally just about ready to make the JNI call
2078 // get JNIEnv* which is first argument to native
2079 if (!is_critical_native) {
2080 __ addi(A0, thread, in_bytes(JavaThread::jni_environment_offset()));
2081 }
2083 // Example: Java_java_lang_ref_Finalizer_invokeFinalizeMethod(JNIEnv *env, jclass clazz, jobject ob)
2084 // Load the second arguments into A1
2085 //__ ld(A1, SP , wordSize ); // klass
2087 // Now set thread in native
2088 __ addi(AT, R0, _thread_in_native);
2089 __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset()));
2090 // do the call
2091 __ call(method->native_function(), relocInfo::runtime_call_type);
2092 __ delayed()->nop();
2093 // WARNING - on Windows Java Natives use pascal calling convention and pop the
2094 // arguments off of the stack. We could just re-adjust the stack pointer here
2095 // and continue to do SP relative addressing but we instead switch to FP
2096 // relative addressing.
2098 // Unpack native results.
2099 switch (ret_type) {
2100 case T_BOOLEAN: __ c2bool(V0); break;
2101 case T_CHAR : __ andi(V0, V0, 0xFFFF); break;
2102 case T_BYTE : __ sign_extend_byte (V0); break;
2103 case T_SHORT : __ sign_extend_short(V0); break;
2104 case T_INT : // nothing to do break;
2105 case T_DOUBLE :
2106 case T_FLOAT :
2107 // Result is in st0 we'll save as needed
2108 break;
2109 case T_ARRAY: // Really a handle
2110 case T_OBJECT: // Really a handle
2111 break; // can't de-handlize until after safepoint check
2112 case T_VOID: break;
2113 case T_LONG: break;
2114 default : ShouldNotReachHere();
2115 }
2116 // Switch thread to "native transition" state before reading the synchronization state.
2117 // This additional state is necessary because reading and testing the synchronization
2118 // state is not atomic w.r.t. GC, as this scenario demonstrates:
2119 // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
2120 // VM thread changes sync state to synchronizing and suspends threads for GC.
2121 // Thread A is resumed to finish this native method, but doesn't block here since it
2122 // didn't see any synchronization is progress, and escapes.
2123 __ addi(AT, R0, _thread_in_native_trans);
2124 __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset()));
2126 //if(os::is_MP()) {}
2128 Label after_transition;
2130 // check for safepoint operation in progress and/or pending suspend requests
2131 {
2132 Label Continue;
2133 __ li(AT, SafepointSynchronize::address_of_state());
2134 __ lw(A0, AT, 0);
2135 __ addi(AT, A0, -SafepointSynchronize::_not_synchronized);
2136 Label L;
2137 __ bne(AT, R0, L);
2138 __ delayed()->nop();
2139 __ lw(AT, thread, in_bytes(JavaThread::suspend_flags_offset()));
2140 __ beq(AT, R0, Continue);
2141 __ delayed()->nop();
2142 __ bind(L);
2144 // Don't use call_VM as it will see a possible pending exception and forward it
2145 // and never return here preventing us from clearing _last_native_pc down below.
2146 //
2147 save_native_result(masm, ret_type, stack_slots);
2148 __ move(A0, thread);
2149 __ addi(SP, SP, -wordSize);
2150 __ push(S2);
2151 __ move(AT, -(StackAlignmentInBytes));
2152 __ move(S2, SP); // use S2 as a sender SP holder
2153 __ andr(SP, SP, AT); // align stack as required by ABI
2154 if (!is_critical_native) {
2155 __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::runtime_call_type);
2156 __ delayed()->nop();
2157 } else {
2158 __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition), relocInfo::runtime_call_type);
2159 __ delayed()->nop();
2160 }
2161 __ move(SP, S2); // use S2 as a sender SP holder
2162 __ pop(S2);
2163 __ addi(SP, SP, wordSize);
2164 //add for compressedoops
2165 __ reinit_heapbase();
2166 // Restore any method result value
2167 restore_native_result(masm, ret_type, stack_slots);
2169 if (is_critical_native) {
2170 // The call above performed the transition to thread_in_Java so
2171 // skip the transition logic below.
2172 __ beq(R0, R0, after_transition);
2173 __ delayed()->nop();
2174 }
2176 __ bind(Continue);
2177 }
2179 // change thread state
2180 __ addi(AT, R0, _thread_in_Java);
2181 __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset()));
2182 __ bind(after_transition);
2183 Label reguard;
2184 Label reguard_done;
2185 __ lw(AT, thread, in_bytes(JavaThread::stack_guard_state_offset()));
2186 __ addi(AT, AT, -JavaThread::stack_guard_yellow_disabled);
2187 __ beq(AT, R0, reguard);
2188 __ delayed()->nop();
2189 // slow path reguard re-enters here
2190 __ bind(reguard_done);
2192 // Handle possible exception (will unlock if necessary)
2194 // native result if any is live
2196 // Unlock
2197 Label slow_path_unlock;
2198 Label unlock_done;
2199 if (method->is_synchronized()) {
2201 Label done;
2203 // Get locked oop from the handle we passed to jni
2204 __ ld( obj_reg, oop_handle_reg, 0);
2205 if (UseBiasedLocking) {
2206 __ biased_locking_exit(obj_reg, T8, done);
2208 }
2210 // Simple recursive lock?
2212 __ ld(AT, FP, lock_slot_fp_offset);
2213 __ beq(AT, R0, done);
2214 __ delayed()->nop();
2215 // Must save FSF if if it is live now because cmpxchg must use it
2216 if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
2217 save_native_result(masm, ret_type, stack_slots);
2218 }
2220 // get old displaced header
2221 __ ld (T8, FP, lock_slot_fp_offset);
2222 // get address of the stack lock
2223 __ addi (c_rarg0, FP, lock_slot_fp_offset);
2224 // Atomic swap old header if oop still contains the stack lock
2225 __ cmpxchg(T8, Address(obj_reg, 0), c_rarg0);
2227 __ beq(AT, R0, slow_path_unlock);
2228 __ delayed()->nop();
2229 // slow path re-enters here
2230 __ bind(unlock_done);
2231 if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
2232 restore_native_result(masm, ret_type, stack_slots);
2233 }
2235 __ bind(done);
2237 }
2238 {
2239 SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
2240 // Tell dtrace about this method exit
2241 save_native_result(masm, ret_type, stack_slots);
2242 int metadata_index = __ oop_recorder()->find_index( (method()));
2243 RelocationHolder rspec = metadata_Relocation::spec(metadata_index);
2244 __ relocate(rspec);
2245 __ patchable_set48(AT, (long)(method()));
2247 __ call_VM_leaf(
2248 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
2249 thread, AT);
2250 restore_native_result(masm, ret_type, stack_slots);
2251 }
2253 // We can finally stop using that last_Java_frame we setup ages ago
2255 __ reset_last_Java_frame(false);
2257 // Unpack oop result, e.g. JNIHandles::resolve value.
2258 if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
2259 __ resolve_jobject(V0, thread, T9);
2260 }
2262 if (!is_critical_native) {
2263 // reset handle block
2264 __ ld(AT, thread, in_bytes(JavaThread::active_handles_offset()));
2265 __ sw(R0, AT, JNIHandleBlock::top_offset_in_bytes());
2266 }
2268 if (!is_critical_native) {
2269 // Any exception pending?
2270 __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
2271 __ bne(AT, R0, exception_pending);
2272 __ delayed()->nop();
2273 }
2274 // no exception, we're almost done
2276 // check that only result value is on FPU stack
2277 __ verify_FPU(ret_type == T_FLOAT || ret_type == T_DOUBLE ? 1 : 0, "native_wrapper normal exit");
2279 // Return
2280 #ifndef OPT_THREAD
2281 __ get_thread(TREG);
2282 #endif
2283 //__ ld_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset()));
2284 __ leave();
2286 __ jr(RA);
2287 __ delayed()->nop();
2288 // Unexpected paths are out of line and go here
2289 // Slow path locking & unlocking
2290 if (method->is_synchronized()) {
2292 // BEGIN Slow path lock
2293 __ bind(slow_path_lock);
2295 // protect the args we've loaded
2296 save_args(masm, total_c_args, c_arg, out_regs);
2298 // has last_Java_frame setup. No exceptions so do vanilla call not call_VM
2299 // args are (oop obj, BasicLock* lock, JavaThread* thread)
2301 __ move(A0, obj_reg);
2302 __ move(A1, lock_reg);
2303 __ move(A2, thread);
2304 __ addi(SP, SP, - 3*wordSize);
2306 __ move(AT, -(StackAlignmentInBytes));
2307 __ move(S2, SP); // use S2 as a sender SP holder
2308 __ andr(SP, SP, AT); // align stack as required by ABI
2310 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type);
2311 __ delayed()->nop();
2312 __ move(SP, S2);
2313 __ addi(SP, SP, 3*wordSize);
2315 restore_args(masm, total_c_args, c_arg, out_regs);
2317 #ifdef ASSERT
2318 { Label L;
2319 __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
2320 __ beq(AT, R0, L);
2321 __ delayed()->nop();
2322 __ stop("no pending exception allowed on exit from monitorenter");
2323 __ bind(L);
2324 }
2325 #endif
2326 __ b(lock_done);
2327 __ delayed()->nop();
2328 // END Slow path lock
2330 // BEGIN Slow path unlock
2331 __ bind(slow_path_unlock);
2333 // Slow path unlock
2335 if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
2336 save_native_result(masm, ret_type, stack_slots);
2337 }
2338 // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
2340 __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
2341 __ push(AT);
2342 __ sd(R0, thread, in_bytes(Thread::pending_exception_offset()));
2344 __ move(AT, -(StackAlignmentInBytes));
2345 __ move(S2, SP); // use S2 as a sender SP holder
2346 __ andr(SP, SP, AT); // align stack as required by ABI
2348 // should be a peal
2349 // +wordSize because of the push above
2350 __ addi(A1, FP, lock_slot_fp_offset);
2352 __ move(A0, obj_reg);
2353 __ addi(SP,SP, -2*wordSize);
2354 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C),
2355 relocInfo::runtime_call_type);
2356 __ delayed()->nop();
2357 __ addi(SP, SP, 2*wordSize);
2358 __ move(SP, S2);
2359 //add for compressedoops
2360 __ reinit_heapbase();
2361 #ifdef ASSERT
2362 {
2363 Label L;
2364 __ lw( AT, thread, in_bytes(Thread::pending_exception_offset()));
2365 __ beq(AT, R0, L);
2366 __ delayed()->nop();
2367 __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
2368 __ bind(L);
2369 }
2370 #endif /* ASSERT */
2372 __ pop(AT);
2373 __ sd(AT, thread, in_bytes(Thread::pending_exception_offset()));
2374 if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
2375 restore_native_result(masm, ret_type, stack_slots);
2376 }
2377 __ b(unlock_done);
2378 __ delayed()->nop();
2379 // END Slow path unlock
2381 }
2383 // SLOW PATH Reguard the stack if needed
2385 __ bind(reguard);
2386 save_native_result(masm, ret_type, stack_slots);
2387 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages),
2388 relocInfo::runtime_call_type);
2389 __ delayed()->nop();
2390 //add for compressedoops
2391 __ reinit_heapbase();
2392 restore_native_result(masm, ret_type, stack_slots);
2393 __ b(reguard_done);
2394 __ delayed()->nop();
2396 // BEGIN EXCEPTION PROCESSING
2397 if (!is_critical_native) {
2398 // Forward the exception
2399 __ bind(exception_pending);
2401 // remove possible return value from FPU register stack
2402 __ empty_FPU_stack();
2404 // pop our frame
2405 //forward_exception_entry need return address on stack
2406 __ addiu(SP, FP, wordSize);
2407 __ ld(FP, SP, (-1) * wordSize);
2409 // and forward the exception
2410 __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
2411 __ delayed()->nop();
2412 }
2413 __ flush();
2415 nmethod *nm = nmethod::new_native_nmethod(method,
2416 compile_id,
2417 masm->code(),
2418 vep_offset,
2419 frame_complete,
2420 stack_slots / VMRegImpl::slots_per_word,
2421 (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
2422 in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
2423 oop_maps);
2425 if (is_critical_native) {
2426 nm->set_lazy_critical_native(true);
2427 }
2429 return nm;
2431 }
2433 #ifdef HAVE_DTRACE_H
2434 // ---------------------------------------------------------------------------
2435 // Generate a dtrace nmethod for a given signature. The method takes arguments
2436 // in the Java compiled code convention, marshals them to the native
2437 // abi and then leaves nops at the position you would expect to call a native
2438 // function. When the probe is enabled the nops are replaced with a trap
2439 // instruction that dtrace inserts and the trace will cause a notification
2440 // to dtrace.
2441 //
2442 // The probes are only able to take primitive types and java/lang/String as
2443 // arguments. No other java types are allowed. Strings are converted to utf8
2444 // strings so that from dtrace point of view java strings are converted to C
2445 // strings. There is an arbitrary fixed limit on the total space that a method
2446 // can use for converting the strings. (256 chars per string in the signature).
2447 // So any java string larger then this is truncated.
2449 static int fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 };
2450 static bool offsets_initialized = false;
2452 static VMRegPair reg64_to_VMRegPair(Register r) {
2453 VMRegPair ret;
2454 if (wordSize == 8) {
2455 ret.set2(r->as_VMReg());
2456 } else {
2457 ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg());
2458 }
2459 return ret;
2460 }
2463 nmethod *SharedRuntime::generate_dtrace_nmethod(MacroAssembler *masm,
2464 methodHandle method) {
2467 // generate_dtrace_nmethod is guarded by a mutex so we are sure to
2468 // be single threaded in this method.
2469 assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be");
2471 // Fill in the signature array, for the calling-convention call.
2472 int total_args_passed = method->size_of_parameters();
2474 BasicType* in_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed);
2475 VMRegPair *in_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed);
2477 // The signature we are going to use for the trap that dtrace will see
2478 // java/lang/String is converted. We drop "this" and any other object
2479 // is converted to NULL. (A one-slot java/lang/Long object reference
2480 // is converted to a two-slot long, which is why we double the allocation).
2481 BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2);
2482 VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2);
2484 int i=0;
2485 int total_strings = 0;
2486 int first_arg_to_pass = 0;
2487 int total_c_args = 0;
2489 // Skip the receiver as dtrace doesn't want to see it
2490 if( !method->is_static() ) {
2491 in_sig_bt[i++] = T_OBJECT;
2492 first_arg_to_pass = 1;
2493 }
2495 SignatureStream ss(method->signature());
2496 for ( ; !ss.at_return_type(); ss.next()) {
2497 BasicType bt = ss.type();
2498 in_sig_bt[i++] = bt; // Collect remaining bits of signature
2499 out_sig_bt[total_c_args++] = bt;
2500 if( bt == T_OBJECT) {
2501 symbolOop s = ss.as_symbol_or_null();
2502 if (s == vmSymbols::java_lang_String()) {
2503 total_strings++;
2504 out_sig_bt[total_c_args-1] = T_ADDRESS;
2505 } else if (s == vmSymbols::java_lang_Boolean() ||
2506 s == vmSymbols::java_lang_Byte()) {
2507 out_sig_bt[total_c_args-1] = T_BYTE;
2508 } else if (s == vmSymbols::java_lang_Character() ||
2509 s == vmSymbols::java_lang_Short()) {
2510 out_sig_bt[total_c_args-1] = T_SHORT;
2511 } else if (s == vmSymbols::java_lang_Integer() ||
2512 s == vmSymbols::java_lang_Float()) {
2513 out_sig_bt[total_c_args-1] = T_INT;
2514 } else if (s == vmSymbols::java_lang_Long() ||
2515 s == vmSymbols::java_lang_Double()) {
2516 out_sig_bt[total_c_args-1] = T_LONG;
2517 out_sig_bt[total_c_args++] = T_VOID;
2518 }
2519 } else if ( bt == T_LONG || bt == T_DOUBLE ) {
2520 in_sig_bt[i++] = T_VOID; // Longs & doubles take 2 Java slots
2521 // We convert double to long
2522 out_sig_bt[total_c_args-1] = T_LONG;
2523 out_sig_bt[total_c_args++] = T_VOID;
2524 } else if ( bt == T_FLOAT) {
2525 // We convert float to int
2526 out_sig_bt[total_c_args-1] = T_INT;
2527 }
2528 }
2530 assert(i==total_args_passed, "validly parsed signature");
2532 // Now get the compiled-Java layout as input arguments
2533 int comp_args_on_stack;
2534 comp_args_on_stack = SharedRuntime::java_calling_convention(
2535 in_sig_bt, in_regs, total_args_passed, false);
2537 // We have received a description of where all the java arg are located
2538 // on entry to the wrapper. We need to convert these args to where
2539 // the a native (non-jni) function would expect them. To figure out
2540 // where they go we convert the java signature to a C signature and remove
2541 // T_VOID for any long/double we might have received.
2544 // Now figure out where the args must be stored and how much stack space
2545 // they require (neglecting out_preserve_stack_slots but space for storing
2546 // the 1st six register arguments). It's weird see int_stk_helper.
2548 int out_arg_slots;
2549 out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
2551 // Calculate the total number of stack slots we will need.
2553 // First count the abi requirement plus all of the outgoing args
2554 int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
2556 // Plus a temp for possible converion of float/double/long register args
2558 int conversion_temp = stack_slots;
2559 stack_slots += 2;
2562 // Now space for the string(s) we must convert
2564 int string_locs = stack_slots;
2565 stack_slots += total_strings *
2566 (max_dtrace_string_size / VMRegImpl::stack_slot_size);
2568 // Ok The space we have allocated will look like:
2569 //
2570 //
2571 // FP-> | |
2572 // |---------------------|
2573 // | string[n] |
2574 // |---------------------| <- string_locs[n]
2575 // | string[n-1] |
2576 // |---------------------| <- string_locs[n-1]
2577 // | ... |
2578 // | ... |
2579 // |---------------------| <- string_locs[1]
2580 // | string[0] |
2581 // |---------------------| <- string_locs[0]
2582 // | temp |
2583 // |---------------------| <- conversion_temp
2584 // | outbound memory |
2585 // | based arguments |
2586 // | |
2587 // |---------------------|
2588 // | |
2589 // SP-> | out_preserved_slots |
2590 //
2591 //
2593 // Now compute actual number of stack words we need rounding to make
2594 // stack properly aligned.
2595 stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word);
2597 int stack_size = stack_slots * VMRegImpl::stack_slot_size;
2599 intptr_t start = (intptr_t)__ pc();
2601 // First thing make an ic check to see if we should even be here
2603 {
2604 Label L;
2605 const Register temp_reg = G3_scratch;
2606 Address ic_miss(temp_reg, SharedRuntime::get_ic_miss_stub());
2607 __ verify_oop(O0);
2608 __ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), temp_reg);
2609 __ cmp(temp_reg, G5_inline_cache_reg);
2610 __ brx(Assembler::equal, true, Assembler::pt, L);
2611 __ delayed()->nop();
2613 __ jump_to(ic_miss, 0);
2614 __ delayed()->nop();
2615 __ align(CodeEntryAlignment);
2616 __ bind(L);
2617 }
2619 int vep_offset = ((intptr_t)__ pc()) - start;
2622 // The instruction at the verified entry point must be 5 bytes or longer
2623 // because it can be patched on the fly by make_non_entrant. The stack bang
2624 // instruction fits that requirement.
2626 // Generate stack overflow check before creating frame
2627 __ generate_stack_overflow_check(stack_size);
2629 assert(((intptr_t)__ pc() - start - vep_offset) >= 5,
2630 "valid size for make_non_entrant");
2632 // Generate a new frame for the wrapper.
2633 __ save(SP, -stack_size, SP);
2635 // Frame is now completed as far a size and linkage.
2637 int frame_complete = ((intptr_t)__ pc()) - start;
2639 #ifdef ASSERT
2640 bool reg_destroyed[RegisterImpl::number_of_registers];
2641 bool freg_destroyed[FloatRegisterImpl::number_of_registers];
2642 for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
2643 reg_destroyed[r] = false;
2644 }
2645 for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
2646 freg_destroyed[f] = false;
2647 }
2649 #endif /* ASSERT */
2651 VMRegPair zero;
2652 const Register g0 = G0; // without this we get a compiler warning (why??)
2653 zero.set2(g0->as_VMReg());
2655 int c_arg, j_arg;
2657 Register conversion_off = noreg;
2659 for (j_arg = first_arg_to_pass, c_arg = 0 ;
2660 j_arg < total_args_passed ; j_arg++, c_arg++ ) {
2662 VMRegPair src = in_regs[j_arg];
2663 VMRegPair dst = out_regs[c_arg];
2665 #ifdef ASSERT
2666 if (src.first()->is_Register()) {
2667 assert(!reg_destroyed[src.first()->as_Register()->encoding()], "ack!");
2668 } else if (src.first()->is_FloatRegister()) {
2669 assert(!freg_destroyed[src.first()->as_FloatRegister()->encoding(
2670 FloatRegisterImpl::S)], "ack!");
2671 }
2672 if (dst.first()->is_Register()) {
2673 reg_destroyed[dst.first()->as_Register()->encoding()] = true;
2674 } else if (dst.first()->is_FloatRegister()) {
2675 freg_destroyed[dst.first()->as_FloatRegister()->encoding(
2676 FloatRegisterImpl::S)] = true;
2677 }
2678 #endif /* ASSERT */
2680 switch (in_sig_bt[j_arg]) {
2681 case T_ARRAY:
2682 case T_OBJECT:
2683 {
2684 if (out_sig_bt[c_arg] == T_BYTE || out_sig_bt[c_arg] == T_SHORT ||
2685 out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) {
2686 // need to unbox a one-slot value
2687 Register in_reg = L0;
2688 Register tmp = L2;
2689 if ( src.first()->is_reg() ) {
2690 in_reg = src.first()->as_Register();
2691 } else {
2692 assert(Assembler::is_simm13(reg2offset(src.first()) + STACK_BIAS),
2693 "must be");
2694 __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, in_reg);
2695 }
2696 // If the final destination is an acceptable register
2697 if ( dst.first()->is_reg() ) {
2698 if ( dst.is_single_phys_reg() || out_sig_bt[c_arg] != T_LONG ) {
2699 tmp = dst.first()->as_Register();
2700 }
2701 }
2703 Label skipUnbox;
2704 if ( wordSize == 4 && out_sig_bt[c_arg] == T_LONG ) {
2705 __ mov(G0, tmp->successor());
2706 }
2707 __ br_null(in_reg, true, Assembler::pn, skipUnbox);
2708 __ delayed()->mov(G0, tmp);
2710 BasicType bt = out_sig_bt[c_arg];
2711 int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt);
2712 switch (bt) {
2713 case T_BYTE:
2714 __ ldub(in_reg, box_offset, tmp); break;
2715 case T_SHORT:
2716 __ lduh(in_reg, box_offset, tmp); break;
2717 case T_INT:
2718 __ ld(in_reg, box_offset, tmp); break;
2719 case T_LONG:
2720 __ ld_long(in_reg, box_offset, tmp); break;
2721 default: ShouldNotReachHere();
2722 }
2724 __ bind(skipUnbox);
2725 // If tmp wasn't final destination copy to final destination
2726 if (tmp == L2) {
2727 VMRegPair tmp_as_VM = reg64_to_VMRegPair(L2);
2728 if (out_sig_bt[c_arg] == T_LONG) {
2729 long_move(masm, tmp_as_VM, dst);
2730 } else {
2731 move32_64(masm, tmp_as_VM, out_regs[c_arg]);
2732 }
2733 }
2734 if (out_sig_bt[c_arg] == T_LONG) {
2735 assert(out_sig_bt[c_arg+1] == T_VOID, "must be");
2736 ++c_arg; // move over the T_VOID to keep the loop indices in sync
2737 }
2738 } else if (out_sig_bt[c_arg] == T_ADDRESS) {
2739 Register s =
2740 src.first()->is_reg() ? src.first()->as_Register() : L2;
2741 Register d =
2742 dst.first()->is_reg() ? dst.first()->as_Register() : L2;
2744 // We store the oop now so that the conversion pass can reach
2745 // while in the inner frame. This will be the only store if
2746 // the oop is NULL.
2747 if (s != L2) {
2748 // src is register
2749 if (d != L2) {
2750 // dst is register
2751 __ mov(s, d);
2752 } else {
2753 assert(Assembler::is_simm13(reg2offset(dst.first()) +
2754 STACK_BIAS), "must be");
2755 __ st_ptr(s, SP, reg2offset(dst.first()) + STACK_BIAS);
2756 }
2757 } else {
2758 // src not a register
2759 assert(Assembler::is_simm13(reg2offset(src.first()) +
2760 STACK_BIAS), "must be");
2761 __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, d);
2762 if (d == L2) {
2763 assert(Assembler::is_simm13(reg2offset(dst.first()) +
2764 STACK_BIAS), "must be");
2765 __ st_ptr(d, SP, reg2offset(dst.first()) + STACK_BIAS);
2766 }
2767 }
2768 } else if (out_sig_bt[c_arg] != T_VOID) {
2769 // Convert the arg to NULL
2770 if (dst.first()->is_reg()) {
2771 __ mov(G0, dst.first()->as_Register());
2772 } else {
2773 assert(Assembler::is_simm13(reg2offset(dst.first()) +
2774 STACK_BIAS), "must be");
2775 __ st_ptr(G0, SP, reg2offset(dst.first()) + STACK_BIAS);
2776 }
2777 }
2778 }
2779 break;
2780 case T_VOID:
2781 break;
2783 case T_FLOAT:
2784 if (src.first()->is_stack()) {
2785 // Stack to stack/reg is simple
2786 move32_64(masm, src, dst);
2787 } else {
2788 if (dst.first()->is_reg()) {
2789 // freg -> reg
2790 int off =
2791 STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
2792 Register d = dst.first()->as_Register();
2793 if (Assembler::is_simm13(off)) {
2794 __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
2795 SP, off);
2796 __ ld(SP, off, d);
2797 } else {
2798 if (conversion_off == noreg) {
2799 __ set(off, L6);
2800 conversion_off = L6;
2801 }
2802 __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
2803 SP, conversion_off);
2804 __ ld(SP, conversion_off , d);
2805 }
2806 } else {
2807 // freg -> mem
2808 int off = STACK_BIAS + reg2offset(dst.first());
2809 if (Assembler::is_simm13(off)) {
2810 __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
2811 SP, off);
2812 } else {
2813 if (conversion_off == noreg) {
2814 __ set(off, L6);
2815 conversion_off = L6;
2816 }
2817 __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
2818 SP, conversion_off);
2819 }
2820 }
2821 }
2822 break;
2824 case T_DOUBLE:
2825 assert( j_arg + 1 < total_args_passed &&
2826 in_sig_bt[j_arg + 1] == T_VOID &&
2827 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
2828 if (src.first()->is_stack()) {
2829 // Stack to stack/reg is simple
2830 long_move(masm, src, dst);
2831 } else {
2832 Register d = dst.first()->is_reg() ? dst.first()->as_Register() : L2;
2834 // Destination could be an odd reg on 32bit in which case
2835 // we can't load direct to the destination.
2837 if (!d->is_even() && wordSize == 4) {
2838 d = L2;
2839 }
2840 int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
2841 if (Assembler::is_simm13(off)) {
2842 __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(),
2843 SP, off);
2844 __ ld_long(SP, off, d);
2845 } else {
2846 if (conversion_off == noreg) {
2847 __ set(off, L6);
2848 conversion_off = L6;
2849 }
2850 __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(),
2851 SP, conversion_off);
2852 __ ld_long(SP, conversion_off, d);
2853 }
2854 if (d == L2) {
2855 long_move(masm, reg64_to_VMRegPair(L2), dst);
2856 }
2857 }
2858 break;
2860 case T_LONG :
2861 // 32bit can't do a split move of something like g1 -> O0, O1
2862 // so use a memory temp
2863 if (src.is_single_phys_reg() && wordSize == 4) {
2864 Register tmp = L2;
2865 if (dst.first()->is_reg() &&
2866 (wordSize == 8 || dst.first()->as_Register()->is_even())) {
2867 tmp = dst.first()->as_Register();
2868 }
2870 int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
2871 if (Assembler::is_simm13(off)) {
2872 __ stx(src.first()->as_Register(), SP, off);
2873 __ ld_long(SP, off, tmp);
2874 } else {
2875 if (conversion_off == noreg) {
2876 __ set(off, L6);
2877 conversion_off = L6;
2878 }
2879 __ stx(src.first()->as_Register(), SP, conversion_off);
2880 __ ld_long(SP, conversion_off, tmp);
2881 }
2883 if (tmp == L2) {
2884 long_move(masm, reg64_to_VMRegPair(L2), dst);
2885 }
2886 } else {
2887 long_move(masm, src, dst);
2888 }
2889 break;
2891 case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
2893 default:
2894 move32_64(masm, src, dst);
2895 }
2896 }
2899 // If we have any strings we must store any register based arg to the stack
2900 // This includes any still live xmm registers too.
2902 if (total_strings > 0 ) {
2904 // protect all the arg registers
2905 __ save_frame(0);
2906 __ mov(G2_thread, L7_thread_cache);
2907 const Register L2_string_off = L2;
2909 // Get first string offset
2910 __ set(string_locs * VMRegImpl::stack_slot_size, L2_string_off);
2912 for (c_arg = 0 ; c_arg < total_c_args ; c_arg++ ) {
2913 if (out_sig_bt[c_arg] == T_ADDRESS) {
2915 VMRegPair dst = out_regs[c_arg];
2916 const Register d = dst.first()->is_reg() ?
2917 dst.first()->as_Register()->after_save() : noreg;
2919 // It's a string the oop and it was already copied to the out arg
2920 // position
2921 if (d != noreg) {
2922 __ mov(d, O0);
2923 } else {
2924 assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS),
2925 "must be");
2926 __ ld_ptr(FP, reg2offset(dst.first()) + STACK_BIAS, O0);
2927 }
2928 Label skip;
2930 __ br_null(O0, false, Assembler::pn, skip);
2931 __ delayed()->add(FP, L2_string_off, O1);
2933 if (d != noreg) {
2934 __ mov(O1, d);
2935 } else {
2936 assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS),
2937 "must be");
2938 __ st_ptr(O1, FP, reg2offset(dst.first()) + STACK_BIAS);
2939 }
2941 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::get_utf),
2942 relocInfo::runtime_call_type);
2943 __ delayed()->add(L2_string_off, max_dtrace_string_size, L2_string_off);
2945 __ bind(skip);
2947 }
2949 }
2950 __ mov(L7_thread_cache, G2_thread);
2951 __ restore();
2953 }
2956 // Ok now we are done. Need to place the nop that dtrace wants in order to
2957 // patch in the trap
2959 int patch_offset = ((intptr_t)__ pc()) - start;
2961 __ nop();
2964 // Return
2966 __ ret();
2967 __ delayed()->restore();
2969 __ flush();
2971 nmethod *nm = nmethod::new_dtrace_nmethod(
2972 method, masm->code(), vep_offset, patch_offset, frame_complete,
2973 stack_slots / VMRegImpl::slots_per_word);
2974 return nm;
2976 }
2978 #endif // HAVE_DTRACE_H
2980 // this function returns the adjust size (in number of words) to a c2i adapter
2981 // activation for use during deoptimization
2982 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
2983 return (callee_locals - callee_parameters) * Interpreter::stackElementWords;
2984 }
2986 // "Top of Stack" slots that may be unused by the calling convention but must
2987 // otherwise be preserved.
2988 // On Intel these are not necessary and the value can be zero.
2989 // On Sparc this describes the words reserved for storing a register window
2990 // when an interrupt occurs.
2991 uint SharedRuntime::out_preserve_stack_slots() {
2992 return 0;
2993 }
2995 //------------------------------generate_deopt_blob----------------------------
2996 // Ought to generate an ideal graph & compile, but here's some SPARC ASM
2997 // instead.
2998 void SharedRuntime::generate_deopt_blob() {
2999 // allocate space for the code
3000 ResourceMark rm;
3001 // setup code generation tools
3002 //CodeBuffer buffer ("deopt_blob", 4000, 2048);
3003 CodeBuffer buffer ("deopt_blob", 8000, 2048);//aoqi FIXME for debug
3004 MacroAssembler* masm = new MacroAssembler( & buffer);
3005 int frame_size_in_words;
3006 OopMap* map = NULL;
3007 // Account for the extra args we place on the stack
3008 // by the time we call fetch_unroll_info
3009 const int additional_words = 2; // deopt kind, thread
3011 OopMapSet *oop_maps = new OopMapSet();
3013 address start = __ pc();
3014 Label cont;
3015 // we use S3 for DeOpt reason register
3016 Register reason = S3;
3017 // use S6 for thread register
3018 Register thread = TREG;
3019 // use S7 for fetch_unroll_info returned UnrollBlock
3020 Register unroll = S7;
3021 // Prolog for non exception case!
3022 // Correct the return address we were given.
3023 //FIXME, return address is on the tos or Ra?
3024 __ addi(RA, RA, - (NativeCall::return_address_offset_long));
3025 // Save everything in sight.
3026 map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
3027 // Normal deoptimization
3028 __ move(reason, Deoptimization::Unpack_deopt);
3029 __ b(cont);
3030 __ delayed()->nop();
3032 int reexecute_offset = __ pc() - start;
3034 // Reexecute case
3035 // return address is the pc describes what bci to do re-execute at
3037 // No need to update map as each call to save_live_registers will produce identical oopmap
3038 (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
3039 __ move(reason, Deoptimization::Unpack_reexecute);
3040 __ b(cont);
3041 __ delayed()->nop();
3043 int exception_offset = __ pc() - start;
3044 // Prolog for exception case
3046 // all registers are dead at this entry point, except for V0 and
3047 // V1 which contain the exception oop and exception pc
3048 // respectively. Set them in TLS and fall thru to the
3049 // unpack_with_exception_in_tls entry point.
3051 __ get_thread(thread);
3052 __ st_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
3053 __ st_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset()));
3054 int exception_in_tls_offset = __ pc() - start;
3055 // new implementation because exception oop is now passed in JavaThread
3057 // Prolog for exception case
3058 // All registers must be preserved because they might be used by LinearScan
3059 // Exceptiop oop and throwing PC are passed in JavaThread
3060 // tos: stack at point of call to method that threw the exception (i.e. only
3061 // args are on the stack, no return address)
3063 // Return address will be patched later with the throwing pc. The correct value is not
3064 // available now because loading it from memory would destroy registers.
3065 // Save everything in sight.
3066 // No need to update map as each call to save_live_registers will produce identical oopmap
3067 __ addi(RA, RA, - (NativeCall::return_address_offset_long));
3068 (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
3070 // Now it is safe to overwrite any register
3071 // store the correct deoptimization type
3072 __ move(reason, Deoptimization::Unpack_exception);
3073 // load throwing pc from JavaThread and patch it as the return address
3074 // of the current frame. Then clear the field in JavaThread
3075 __ get_thread(thread);
3076 __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
3077 __ st_ptr(V1, SP, RegisterSaver::raOffset() * wordSize); //save ra
3078 __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset()));
3081 #ifdef ASSERT
3082 // verify that there is really an exception oop in JavaThread
3083 __ ld_ptr(AT, thread, in_bytes(JavaThread::exception_oop_offset()));
3084 __ verify_oop(AT);
3085 // verify that there is no pending exception
3086 Label no_pending_exception;
3087 __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
3088 __ beq(AT, R0, no_pending_exception);
3089 __ delayed()->nop();
3090 __ stop("must not have pending exception here");
3091 __ bind(no_pending_exception);
3092 #endif
3093 __ bind(cont);
3094 // Compiled code leaves the floating point stack dirty, empty it.
3095 __ empty_FPU_stack();
3098 // Call C code. Need thread and this frame, but NOT official VM entry
3099 // crud. We cannot block on this call, no GC can happen.
3100 #ifndef OPT_THREAD
3101 __ get_thread(thread);
3102 #endif
3104 __ move(A0, thread);
3105 __ addi(SP, SP, -additional_words * wordSize);
3107 __ set_last_Java_frame(NOREG, NOREG, NULL);
3109 // Call fetch_unroll_info(). Need thread and this frame, but NOT official VM entry - cannot block on
3110 // this call, no GC can happen. Call should capture return values.
3112 __ relocate(relocInfo::internal_pc_type);
3113 {
3114 intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 28;
3115 __ patchable_set48(AT, save_pc);
3116 }
3117 __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
3119 __ call((address)Deoptimization::fetch_unroll_info);
3120 //__ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type);
3121 __ delayed()->nop();
3122 oop_maps->add_gc_map(__ pc() - start, map);
3123 __ addiu(SP, SP, additional_words * wordSize);
3124 __ get_thread(thread);
3125 __ reset_last_Java_frame(false);
3127 // Load UnrollBlock into S7
3128 __ move(unroll, V0);
3131 // Move the unpack kind to a safe place in the UnrollBlock because
3132 // we are very short of registers
3134 Address unpack_kind(unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes());
3135 __ sw(reason, unpack_kind);
3136 // save the unpack_kind value
3137 // Retrieve the possible live values (return values)
3138 // All callee save registers representing jvm state
3139 // are now in the vframeArray.
3141 Label noException;
3142 __ move(AT, Deoptimization::Unpack_exception);
3143 __ bne(AT, reason, noException);// Was exception pending?
3144 __ delayed()->nop();
3145 __ ld_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset()));
3146 __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
3147 __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset()));
3148 __ st_ptr(R0, thread, in_bytes(JavaThread::exception_oop_offset()));
3150 __ verify_oop(V0);
3152 // Overwrite the result registers with the exception results.
3153 __ st_ptr(V0, SP, RegisterSaver::v0Offset()*wordSize);
3154 __ st_ptr(V1, SP, RegisterSaver::v1Offset()*wordSize);
3156 __ bind(noException);
3159 // Stack is back to only having register save data on the stack.
3160 // Now restore the result registers. Everything else is either dead or captured
3161 // in the vframeArray.
3163 RegisterSaver::restore_result_registers(masm);
3164 // All of the register save area has been popped of the stack. Only the
3165 // return address remains.
3166 // Pop all the frames we must move/replace.
3167 // Frame picture (youngest to oldest)
3168 // 1: self-frame (no frame link)
3169 // 2: deopting frame (no frame link)
3170 // 3: caller of deopting frame (could be compiled/interpreted).
3171 //
3172 // Note: by leaving the return address of self-frame on the stack
3173 // and using the size of frame 2 to adjust the stack
3174 // when we are done the return to frame 3 will still be on the stack.
3176 // register for the sender's sp
3177 Register sender_sp = Rsender;
3178 // register for frame pcs
3179 Register pcs = T0;
3180 // register for frame sizes
3181 Register sizes = T1;
3182 // register for frame count
3183 Register count = T3;
3185 // Pop deoptimized frame
3186 __ lw(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes());
3187 __ add(SP, SP, AT);
3188 // sp should be pointing at the return address to the caller (3)
3190 // Load array of frame pcs into pcs
3191 __ ld_ptr(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes());
3192 __ addi(SP, SP, wordSize); // trash the old pc
3193 // Load array of frame sizes into T6
3194 __ ld_ptr(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes());
3198 // Load count of frams into T3
3199 __ lw(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes());
3200 // Pick up the initial fp we should save
3201 __ ld(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes());
3202 // Now adjust the caller's stack to make up for the extra locals
3203 // but record the original sp so that we can save it in the skeletal interpreter
3204 // frame and the stack walking of interpreter_sender will get the unextended sp
3205 // value and not the "real" sp value.
3206 __ move(sender_sp, SP);
3207 __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes());
3208 __ sub(SP, SP, AT);
3210 // Push interpreter frames in a loop
3211 //
3212 //Loop:
3213 // 0x000000555bd82d18: lw t2, 0x0(t1) ; lw sizes[i] <--- error lw->ld
3214 // 0x000000555bd82d1c: ld at, 0x0(t0) ; ld pcs[i]
3215 // 0x000000555bd82d20: daddi t2, t2, 0xfffffff0 ; t2 -= 16
3216 // 0x000000555bd82d24: daddi sp, sp, 0xfffffff0
3217 // 0x000000555bd82d28: sd fp, 0x0(sp) ; push fp
3218 // 0x000000555bd82d2c: sd at, 0x8(sp) ; push at
3219 // 0x000000555bd82d30: dadd fp, sp, zero ; fp <- sp
3220 // 0x000000555bd82d34: dsub sp, sp, t2 ; sp -= t2
3221 // 0x000000555bd82d38: sd zero, 0xfffffff0(fp) ; __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
3222 // 0x000000555bd82d3c: sd s4, 0xfffffff8(fp) ; __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);
3223 // 0x000000555bd82d40: dadd s4, sp, zero ; move(sender_sp, SP);
3224 // 0x000000555bd82d44: daddi t3, t3, 0xffffffff ; count --
3225 // 0x000000555bd82d48: daddi t1, t1, 0x4 ; sizes += 4
3226 // 0x000000555bd82d4c: bne t3, zero, 0x000000555bd82d18
3227 // 0x000000555bd82d50: daddi t0, t0, 0x4 ; <--- error t0 += 8
3228 //
3229 // pcs[0] = frame_pcs[0] = deopt_sender.raw_pc(); regex.split
3230 Label loop;
3231 __ bind(loop);
3232 __ ld(T2, sizes, 0); // Load frame size
3233 __ ld_ptr(AT, pcs, 0); // save return address
3234 __ addi(T2, T2, -2*wordSize); // we'll push pc and fp, by hand
3235 __ push2(AT, FP);
3236 __ move(FP, SP);
3237 __ sub(SP, SP, T2); // Prolog!
3238 // This value is corrected by layout_activation_impl
3239 __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
3240 __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable
3241 __ move(sender_sp, SP); // pass to next frame
3242 __ addi(count, count, -1); // decrement counter
3243 __ addi(sizes, sizes, wordSize); // Bump array pointer (sizes)
3244 __ bne(count, R0, loop);
3245 __ delayed()->addi(pcs, pcs, wordSize); // Bump array pointer (pcs)
3246 __ ld(AT, pcs, 0); // frame_pcs[number_of_frames] = Interpreter::deopt_entry(vtos, 0);
3247 // Re-push self-frame
3248 __ push2(AT, FP);
3249 __ move(FP, SP);
3250 __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
3251 __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);
3252 __ addi(SP, SP, -(frame_size_in_words - 2 - additional_words) * wordSize);
3254 // Restore frame locals after moving the frame
3255 __ sd(V0, SP, RegisterSaver::v0Offset() * wordSize);
3256 __ sd(V1, SP, RegisterSaver::v1Offset() * wordSize);
3257 __ sdc1(F0, SP, RegisterSaver::fpResultOffset()* wordSize);// Pop float stack and store in local
3258 __ sdc1(F1, SP, (RegisterSaver::fpResultOffset() + 1) * wordSize);
3261 // Call unpack_frames(). Need thread and this frame, but NOT official VM entry - cannot block on
3262 // this call, no GC can happen.
3263 __ move(A1, reason); // exec_mode
3264 __ get_thread(thread);
3265 __ move(A0, thread); // thread
3266 __ addi(SP, SP, (-additional_words) *wordSize);
3268 // set last_Java_sp, last_Java_fp
3269 __ set_last_Java_frame(NOREG, FP, NULL);
3271 __ move(AT, -(StackAlignmentInBytes));
3272 __ andr(SP, SP, AT); // Fix stack alignment as required by ABI
3274 __ relocate(relocInfo::internal_pc_type);
3275 {
3276 intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 28;
3277 __ patchable_set48(AT, save_pc);
3278 }
3279 __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
3281 __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), relocInfo::runtime_call_type);
3282 __ delayed()->nop();
3283 // Revert SP alignment after call since we're going to do some SP relative addressing below
3284 __ ld(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
3285 // Set an oopmap for the call site
3286 oop_maps->add_gc_map(__ offset(), new OopMap( frame_size_in_words , 0));
3288 __ push(V0);
3290 __ get_thread(thread);
3291 __ reset_last_Java_frame(true);
3293 // Collect return values
3294 __ ld(V0, SP, (RegisterSaver::v0Offset() + additional_words +1) * wordSize);
3295 __ ld(V1, SP, (RegisterSaver::v1Offset() + additional_words +1) * wordSize);
3296 __ ldc1(F0, SP, RegisterSaver::fpResultOffset()* wordSize);// Pop float stack and store in local
3297 __ ldc1(F1, SP, (RegisterSaver::fpResultOffset() + 1) * wordSize);
3298 //FIXME,
3299 // Clear floating point stack before returning to interpreter
3300 __ empty_FPU_stack();
3301 //FIXME, we should consider about float and double
3302 // Push a float or double return value if necessary.
3303 __ leave();
3305 // Jump to interpreter
3306 __ jr(RA);
3307 __ delayed()->nop();
3309 masm->flush();
3310 _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
3311 _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
3312 }
3314 #ifdef COMPILER2
3316 //------------------------------generate_uncommon_trap_blob--------------------
3317 // Ought to generate an ideal graph & compile, but here's some SPARC ASM
3318 // instead.
3319 void SharedRuntime::generate_uncommon_trap_blob() {
3320 // allocate space for the code
3321 ResourceMark rm;
3322 // setup code generation tools
3323 CodeBuffer buffer ("uncommon_trap_blob", 512*80 , 512*40 );
3324 MacroAssembler* masm = new MacroAssembler(&buffer);
3326 enum frame_layout {
3327 s0_off, s0_off2,
3328 s1_off, s1_off2,
3329 s2_off, s2_off2,
3330 s3_off, s3_off2,
3331 s4_off, s4_off2,
3332 s5_off, s5_off2,
3333 s6_off, s6_off2,
3334 s7_off, s7_off2,
3335 fp_off, fp_off2,
3336 return_off, return_off2, // slot for return address sp + 9
3337 framesize
3338 };
3339 assert(framesize % 4 == 0, "sp not 16-byte aligned");
3341 address start = __ pc();
3343 // Push self-frame.
3344 __ daddiu(SP, SP, -framesize * BytesPerInt);
3346 __ sd(RA, SP, return_off * BytesPerInt);
3347 __ sd(FP, SP, fp_off * BytesPerInt);
3349 // Save callee saved registers. None for UseSSE=0,
3350 // floats-only for UseSSE=1, and doubles for UseSSE=2.
3351 __ sd(S0, SP, s0_off * BytesPerInt);
3352 __ sd(S1, SP, s1_off * BytesPerInt);
3353 __ sd(S2, SP, s2_off * BytesPerInt);
3354 __ sd(S3, SP, s3_off * BytesPerInt);
3355 __ sd(S4, SP, s4_off * BytesPerInt);
3356 __ sd(S5, SP, s5_off * BytesPerInt);
3357 __ sd(S6, SP, s6_off * BytesPerInt);
3358 __ sd(S7, SP, s7_off * BytesPerInt);
3360 __ daddi(FP, SP, fp_off * BytesPerInt);
3362 // Clear the floating point exception stack
3363 __ empty_FPU_stack();
3365 Register thread = TREG;
3367 #ifndef OPT_THREAD
3368 __ get_thread(thread);
3369 #endif
3370 // set last_Java_sp
3371 __ set_last_Java_frame(NOREG, FP, NULL);
3372 __ relocate(relocInfo::internal_pc_type);
3373 {
3374 long save_pc = (long)__ pc() + 52;
3375 __ patchable_set48(AT, (long)save_pc);
3376 __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
3377 }
3378 // Call C code. Need thread but NOT official VM entry
3379 // crud. We cannot block on this call, no GC can happen. Call should
3380 // capture callee-saved registers as well as return values.
3381 __ move(A0, thread);
3382 // argument already in T0
3383 __ move(A1, T0);
3384 __ patchable_call((address)Deoptimization::uncommon_trap);
3386 // Set an oopmap for the call site
3387 OopMapSet *oop_maps = new OopMapSet();
3388 OopMap* map = new OopMap( framesize, 0 );
3390 map->set_callee_saved( VMRegImpl::stack2reg(s0_off ), S0->as_VMReg() );
3391 map->set_callee_saved( VMRegImpl::stack2reg(s1_off ), S1->as_VMReg() );
3392 map->set_callee_saved( VMRegImpl::stack2reg(s2_off ), S2->as_VMReg() );
3393 map->set_callee_saved( VMRegImpl::stack2reg(s3_off ), S3->as_VMReg() );
3394 map->set_callee_saved( VMRegImpl::stack2reg(s4_off ), S4->as_VMReg() );
3395 map->set_callee_saved( VMRegImpl::stack2reg(s5_off ), S5->as_VMReg() );
3396 map->set_callee_saved( VMRegImpl::stack2reg(s6_off ), S6->as_VMReg() );
3397 map->set_callee_saved( VMRegImpl::stack2reg(s7_off ), S7->as_VMReg() );
3399 //oop_maps->add_gc_map( __ offset(), true, map);
3400 oop_maps->add_gc_map( __ offset(), map);
3402 #ifndef OPT_THREAD
3403 __ get_thread(thread);
3404 #endif
3405 __ reset_last_Java_frame(false);
3407 // Load UnrollBlock into S7
3408 Register unroll = S7;
3409 __ move(unroll, V0);
3411 // Pop all the frames we must move/replace.
3412 //
3413 // Frame picture (youngest to oldest)
3414 // 1: self-frame (no frame link)
3415 // 2: deopting frame (no frame link)
3416 // 3: possible-i2c-adapter-frame
3417 // 4: caller of deopting frame (could be compiled/interpreted. If interpreted we will create an
3418 // and c2i here)
3420 __ daddiu(SP, SP, framesize * BytesPerInt);
3422 // Pop deoptimized frame
3423 __ lw(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes());
3424 __ dadd(SP, SP, AT);
3426 // register for frame pcs
3427 Register pcs = T8;
3428 // register for frame sizes
3429 Register sizes = T9;
3430 // register for frame count
3431 Register count = T3;
3432 // register for the sender's sp
3433 Register sender_sp = T1;
3435 // sp should be pointing at the return address to the caller (4)
3436 // Load array of frame pcs
3437 __ ld(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes());
3439 // Load array of frame sizes
3440 __ ld(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes());
3441 __ lwu(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes());
3443 // Pick up the initial fp we should save
3444 __ ld(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes());
3445 // Now adjust the caller's stack to make up for the extra locals
3446 // but record the original sp so that we can save it in the skeletal interpreter
3447 // frame and the stack walking of interpreter_sender will get the unextended sp
3448 // value and not the "real" sp value.
3450 __ move(sender_sp, SP);
3451 __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes());
3452 __ dsub(SP, SP, AT);
3453 // Push interpreter frames in a loop
3454 Label loop;
3455 __ bind(loop);
3456 __ ld(T2, sizes, 0); // Load frame size
3457 __ ld(AT, pcs, 0); // save return address
3458 __ daddi(T2, T2, -2*wordSize); // we'll push pc and fp, by hand
3459 __ push2(AT, FP);
3460 __ move(FP, SP);
3461 __ dsub(SP, SP, T2); // Prolog!
3462 // This value is corrected by layout_activation_impl
3463 __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
3464 __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable
3465 __ move(sender_sp, SP); // pass to next frame
3466 __ daddi(count, count, -1); // decrement counter
3467 __ daddi(sizes, sizes, wordSize); // Bump array pointer (sizes)
3468 __ addi(pcs, pcs, wordSize); // Bump array pointer (pcs)
3469 __ bne(count, R0, loop);
3470 __ delayed()->nop(); // Bump array pointer (pcs)
3472 __ ld(RA, pcs, 0);
3474 // Re-push self-frame
3475 __ daddi(SP, SP, - 2 * wordSize); // save old & set new FP
3476 __ sd(FP, SP, 0 * wordSize); // save final return address
3477 __ sd(RA, SP, 1 * wordSize);
3478 __ move(FP, SP);
3479 __ daddi(SP, SP, -(framesize / 2 - 2) * wordSize);
3481 // set last_Java_sp, last_Java_fp
3482 __ set_last_Java_frame(NOREG, FP, NULL);
3484 __ move(AT, -(StackAlignmentInBytes));
3485 __ andr(SP, SP, AT); // Fix stack alignment as required by ABI
3487 __ relocate(relocInfo::internal_pc_type);
3488 {
3489 long save_pc = (long)__ pc() + 52;
3490 __ patchable_set48(AT, (long)save_pc);
3491 }
3492 __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
3494 // Call C code. Need thread but NOT official VM entry
3495 // crud. We cannot block on this call, no GC can happen. Call should
3496 // restore return values to their stack-slots with the new SP.
3497 __ move(A0, thread);
3498 __ move(A1, Deoptimization::Unpack_uncommon_trap);
3499 __ patchable_call((address)Deoptimization::unpack_frames);
3500 // Set an oopmap for the call site
3501 oop_maps->add_gc_map( __ offset(), new OopMap( framesize, 0 ) );
3503 __ reset_last_Java_frame(true);
3505 // Pop self-frame.
3506 __ leave(); // Epilog!
3508 // Jump to interpreter
3509 __ jr(RA);
3510 __ delayed()->nop();
3511 // -------------
3512 // make sure all code is generated
3513 masm->flush();
3515 _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, framesize / 2);
3516 }
3518 #endif // COMPILER2
3520 //------------------------------generate_handler_blob-------------------
3521 //
3522 // Generate a special Compile2Runtime blob that saves all registers, and sets
3523 // up an OopMap and calls safepoint code to stop the compiled code for
3524 // a safepoint.
3525 //
3526 // This blob is jumped to (via a breakpoint and the signal handler) from a
3527 // safepoint in compiled code.
3529 SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int pool_type) {
3531 // Account for thread arg in our frame
3532 const int additional_words = 0;
3533 int frame_size_in_words;
3535 assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
3537 ResourceMark rm;
3538 OopMapSet *oop_maps = new OopMapSet();
3539 OopMap* map;
3541 // allocate space for the code
3542 // setup code generation tools
3543 CodeBuffer buffer ("handler_blob", 2048, 512);
3544 MacroAssembler* masm = new MacroAssembler( &buffer);
3546 const Register thread = TREG;
3547 address start = __ pc();
3548 address call_pc = NULL;
3549 bool cause_return = (pool_type == POLL_AT_RETURN);
3550 bool save_vectors = (pool_type == POLL_AT_VECTOR_LOOP);
3552 // If cause_return is true we are at a poll_return and there is
3553 // the return address in RA to the caller on the nmethod
3554 // that is safepoint. We can leave this return in RA and
3555 // effectively complete the return and safepoint in the caller.
3556 // Otherwise we load exception pc to RA.
3557 __ push(thread);
3558 #ifndef OPT_THREAD
3559 __ get_thread(thread);
3560 #endif
3562 if(!cause_return) {
3563 __ ld_ptr(RA, Address(thread, JavaThread::saved_exception_pc_offset()));
3564 }
3566 __ pop(thread);
3567 map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, save_vectors);
3569 #ifndef OPT_THREAD
3570 __ get_thread(thread);
3571 #endif
3572 // The following is basically a call_VM. However, we need the precise
3573 // address of the call in order to generate an oopmap. Hence, we do all the
3574 // work outselvs.
3576 __ move(A0, thread);
3577 __ set_last_Java_frame(NOREG, NOREG, NULL);
3580 // do the call
3581 __ call(call_ptr);
3582 __ delayed()->nop();
3584 // Set an oopmap for the call site. This oopmap will map all
3585 // oop-registers and debug-info registers as callee-saved. This
3586 // will allow deoptimization at this safepoint to find all possible
3587 // debug-info recordings, as well as let GC find all oops.
3588 oop_maps->add_gc_map(__ offset(), map);
3590 Label noException;
3592 // Clear last_Java_sp again
3593 __ reset_last_Java_frame(false);
3595 __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
3596 __ beq(AT, R0, noException);
3597 __ delayed()->nop();
3599 // Exception pending
3601 RegisterSaver::restore_live_registers(masm, save_vectors);
3602 //forward_exception_entry need return address on the stack
3603 __ push(RA);
3604 __ patchable_jump((address)StubRoutines::forward_exception_entry());
3606 // No exception case
3607 __ bind(noException);
3608 // Normal exit, register restoring and exit
3609 RegisterSaver::restore_live_registers(masm, save_vectors);
3610 __ jr(RA);
3611 __ delayed()->nop();
3613 masm->flush();
3615 // Fill-out other meta info
3616 return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words);
3617 }
3619 //
3620 // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
3621 //
3622 // Generate a stub that calls into vm to find out the proper destination
3623 // of a java call. All the argument registers are live at this point
3624 // but since this is generic code we don't know what they are and the caller
3625 // must do any gc of the args.
3626 //
3627 RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
3628 assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
3630 // allocate space for the code
3631 ResourceMark rm;
3633 //CodeBuffer buffer(name, 1000, 512);
3634 //FIXME. aoqi. code_size
3635 CodeBuffer buffer(name, 2000, 2048);
3636 MacroAssembler* masm = new MacroAssembler(&buffer);
3638 int frame_size_words;
3639 //we put the thread in A0
3641 OopMapSet *oop_maps = new OopMapSet();
3642 OopMap* map = NULL;
3644 int start = __ offset();
3645 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words);
3648 int frame_complete = __ offset();
3650 const Register thread = T8;
3651 __ get_thread(thread);
3653 __ move(A0, thread);
3654 __ set_last_Java_frame(noreg, FP, NULL);
3655 //align the stack before invoke native
3656 __ move(AT, -(StackAlignmentInBytes));
3657 __ andr(SP, SP, AT);
3658 __ relocate(relocInfo::internal_pc_type);
3659 {
3660 intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 24 + 1 * BytesPerInstWord;
3661 __ patchable_set48(AT, save_pc);
3662 }
3663 __ sd(AT, thread, in_bytes(JavaThread::last_Java_pc_offset()));
3665 __ call(destination);
3666 __ delayed()->nop();
3668 // Set an oopmap for the call site.
3669 // We need this not only for callee-saved registers, but also for volatile
3670 // registers that the compiler might be keeping live across a safepoint.
3671 oop_maps->add_gc_map( __ offset() - start, map);
3672 // V0 contains the address we are going to jump to assuming no exception got installed
3673 __ get_thread(thread);
3674 __ ld_ptr(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
3675 // clear last_Java_sp
3676 __ reset_last_Java_frame(true);
3677 // check for pending exceptions
3678 Label pending;
3679 __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
3680 __ bne(AT, R0, pending);
3681 __ delayed()->nop();
3682 // get the returned Method*
3683 //FIXME, do mips need this ?
3684 __ get_vm_result_2(Rmethod, thread); // Refer to OpenJDK8
3685 __ st_ptr(Rmethod, SP, RegisterSaver::methodOffset() * wordSize);
3686 __ st_ptr(V0, SP, RegisterSaver::v0Offset() * wordSize);
3687 RegisterSaver::restore_live_registers(masm);
3689 // We are back the the original state on entry and ready to go the callee method.
3690 __ jr(V0);
3691 __ delayed()->nop();
3692 // Pending exception after the safepoint
3694 __ bind(pending);
3696 RegisterSaver::restore_live_registers(masm);
3698 // exception pending => remove activation and forward to exception handler
3699 //forward_exception_entry need return address on the stack
3700 __ push(RA);
3701 __ get_thread(thread);
3702 __ st_ptr(R0, thread, in_bytes(JavaThread::vm_result_offset()));
3703 __ ld_ptr(V0, thread, in_bytes(Thread::pending_exception_offset()));
3704 __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
3705 __ delayed()->nop();
3706 //
3707 // make sure all code is generated
3708 masm->flush();
3710 RuntimeStub* tmp= RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);
3711 return tmp;
3712 }
3714 extern "C" int SpinPause() {return 0;}
3717 //------------------------------Montgomery multiplication------------------------
3718 //
3720 // Subtract 0:b from carry:a. Return carry.
3721 static unsigned long
3722 sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) {
3723 long borrow = 0, t = 0;
3724 unsigned long tmp0, tmp1;
3725 __asm__ __volatile__ (
3726 "0: \n"
3727 "ld %[tmp0], 0(%[a]) \n"
3728 "ld %[tmp1], 0(%[b]) \n"
3729 "sltu %[t], %[tmp0], %[borrow] \n"
3730 "dsubu %[tmp0], %[tmp0], %[borrow] \n"
3731 "sltu %[borrow], %[tmp0], %[tmp1] \n"
3732 "or %[borrow], %[borrow], %[t] \n"
3733 "dsubu %[tmp0], %[tmp0], %[tmp1] \n"
3734 "sd %[tmp0], 0(%[a]) \n"
3735 "daddiu %[a], %[a], 8 \n"
3736 "daddiu %[b], %[b], 8 \n"
3737 "daddiu %[len], %[len], -1 \n"
3738 "bgtz %[len], 0b \n"
3739 "dsubu %[tmp0], %[carry], %[borrow] \n"
3740 : [len]"+r"(len), [tmp0]"=&r"(tmp0), [tmp1]"=&r"(tmp1), [borrow]"+r"(borrow), [a]"+r"(a), [b]"+r"(b), [t]"+r"(t)
3741 : [carry]"r"(carry)
3742 : "memory"
3743 );
3744 return tmp0;
3745 }
3747 // Multiply (unsigned) Long A by Long B, accumulating the double-
3748 // length result into the accumulator formed of t0, t1, and t2.
3749 inline void MACC(unsigned long A, unsigned long B, unsigned long &t0, unsigned long &t1, unsigned long &t2) {
3750 unsigned long hi, lo, carry = 0, t = 0;
3751 __asm__ __volatile__(
3752 "dmultu %[A], %[B] \n"
3753 "mfhi %[hi] \n"
3754 "mflo %[lo] \n"
3755 "daddu %[t0], %[t0], %[lo] \n"
3756 "sltu %[carry], %[t0], %[lo] \n"
3757 "daddu %[t1], %[t1], %[carry] \n"
3758 "sltu %[t], %[t1], %[carry] \n"
3759 "daddu %[t1], %[t1], %[hi] \n"
3760 "sltu %[carry], %[t1], %[hi] \n"
3761 "or %[carry], %[carry], %[t] \n"
3762 "daddu %[t2], %[t2], %[carry] \n"
3763 : [hi]"=&r"(hi), [lo]"=&r"(lo), [t0]"+r"(t0), [t1]"+r"(t1), [t2]"+r"(t2), [carry]"+r"(carry), [t]"+r"(t)
3764 : [A]"r"(A), [B]"r"(B)
3765 :
3766 );
3767 }
3769 // As above, but add twice the double-length result into the
3770 // accumulator.
3771 inline void MACC2(unsigned long A, unsigned long B, unsigned long &t0, unsigned long &t1, unsigned long &t2) {
3772 unsigned long hi, lo, carry = 0, t = 0;
3773 __asm__ __volatile__(
3774 "dmultu %[A], %[B] \n"
3775 "mfhi %[hi] \n"
3776 "mflo %[lo] \n"
3777 "daddu %[t0], %[t0], %[lo] \n"
3778 "sltu %[carry], %[t0], %[lo] \n"
3779 "daddu %[t1], %[t1], %[carry] \n"
3780 "sltu %[t], %[t1], %[carry] \n"
3781 "daddu %[t1], %[t1], %[hi] \n"
3782 "sltu %[carry], %[t1], %[hi] \n"
3783 "or %[carry], %[carry], %[t] \n"
3784 "daddu %[t2], %[t2], %[carry] \n"
3785 "daddu %[t0], %[t0], %[lo] \n"
3786 "sltu %[carry], %[t0], %[lo] \n"
3787 "daddu %[t1], %[t1], %[carry] \n"
3788 "sltu %[t], %[t1], %[carry] \n"
3789 "daddu %[t1], %[t1], %[hi] \n"
3790 "sltu %[carry], %[t1], %[hi] \n"
3791 "or %[carry], %[carry], %[t] \n"
3792 "daddu %[t2], %[t2], %[carry] \n"
3793 : [hi]"=&r"(hi), [lo]"=&r"(lo), [t0]"+r"(t0), [t1]"+r"(t1), [t2]"+r"(t2), [carry]"+r"(carry), [t]"+r"(t)
3794 : [A]"r"(A), [B]"r"(B)
3795 :
3796 );
3797 }
3799 // Fast Montgomery multiplication. The derivation of the algorithm is
3800 // in A Cryptographic Library for the Motorola DSP56000,
3801 // Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237.
3803 static void __attribute__((noinline))
3804 montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[],
3805 unsigned long m[], unsigned long inv, int len) {
3806 unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
3807 int i;
3809 assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
3811 for (i = 0; i < len; i++) {
3812 int j;
3813 for (j = 0; j < i; j++) {
3814 MACC(a[j], b[i-j], t0, t1, t2);
3815 MACC(m[j], n[i-j], t0, t1, t2);
3816 }
3817 MACC(a[i], b[0], t0, t1, t2);
3818 m[i] = t0 * inv;
3819 MACC(m[i], n[0], t0, t1, t2);
3821 assert(t0 == 0, "broken Montgomery multiply");
3823 t0 = t1; t1 = t2; t2 = 0;
3824 }
3826 for (i = len; i < 2*len; i++) {
3827 int j;
3828 for (j = i-len+1; j < len; j++) {
3829 MACC(a[j], b[i-j], t0, t1, t2);
3830 MACC(m[j], n[i-j], t0, t1, t2);
3831 }
3832 m[i-len] = t0;
3833 t0 = t1; t1 = t2; t2 = 0;
3834 }
3836 while (t0)
3837 t0 = sub(m, n, t0, len);
3838 }
3840 // Fast Montgomery squaring. This uses asymptotically 25% fewer
3841 // multiplies so it should be up to 25% faster than Montgomery
3842 // multiplication. However, its loop control is more complex and it
3843 // may actually run slower on some machines.
3845 static void __attribute__((noinline))
3846 montgomery_square(unsigned long a[], unsigned long n[],
3847 unsigned long m[], unsigned long inv, int len) {
3848 unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
3849 int i;
3851 assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
3853 for (i = 0; i < len; i++) {
3854 int j;
3855 int end = (i+1)/2;
3856 for (j = 0; j < end; j++) {
3857 MACC2(a[j], a[i-j], t0, t1, t2);
3858 MACC(m[j], n[i-j], t0, t1, t2);
3859 }
3860 if ((i & 1) == 0) {
3861 MACC(a[j], a[j], t0, t1, t2);
3862 }
3863 for (; j < i; j++) {
3864 MACC(m[j], n[i-j], t0, t1, t2);
3865 }
3866 m[i] = t0 * inv;
3867 MACC(m[i], n[0], t0, t1, t2);
3869 assert(t0 == 0, "broken Montgomery square");
3871 t0 = t1; t1 = t2; t2 = 0;
3872 }
3874 for (i = len; i < 2*len; i++) {
3875 int start = i-len+1;
3876 int end = start + (len - start)/2;
3877 int j;
3878 for (j = start; j < end; j++) {
3879 MACC2(a[j], a[i-j], t0, t1, t2);
3880 MACC(m[j], n[i-j], t0, t1, t2);
3881 }
3882 if ((i & 1) == 0) {
3883 MACC(a[j], a[j], t0, t1, t2);
3884 }
3885 for (; j < len; j++) {
3886 MACC(m[j], n[i-j], t0, t1, t2);
3887 }
3888 m[i-len] = t0;
3889 t0 = t1; t1 = t2; t2 = 0;
3890 }
3892 while (t0)
3893 t0 = sub(m, n, t0, len);
3894 }
3896 // Swap words in a longword.
3897 static unsigned long swap(unsigned long x) {
3898 return (x << 32) | (x >> 32);
3899 }
3901 // Copy len longwords from s to d, word-swapping as we go. The
3902 // destination array is reversed.
3903 static void reverse_words(unsigned long *s, unsigned long *d, int len) {
3904 d += len;
3905 while(len-- > 0) {
3906 d--;
3907 *d = swap(*s);
3908 s++;
3909 }
3910 }
3912 // The threshold at which squaring is advantageous was determined
3913 // experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz.
3914 // Doesn't seem to be relevant for MIPS64 so we use the same value.
3915 #define MONTGOMERY_SQUARING_THRESHOLD 64
3917 void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints,
3918 jint len, jlong inv,
3919 jint *m_ints) {
3920 assert(len % 2 == 0, "array length in montgomery_multiply must be even");
3921 int longwords = len/2;
3923 // Make very sure we don't use so much space that the stack might
3924 // overflow. 512 jints corresponds to an 16384-bit integer and
3925 // will use here a total of 8k bytes of stack space.
3926 int total_allocation = longwords * sizeof (unsigned long) * 4;
3927 guarantee(total_allocation <= 8192, "must be");
3928 unsigned long *scratch = (unsigned long *)alloca(total_allocation);
3930 // Local scratch arrays
3931 unsigned long
3932 *a = scratch + 0 * longwords,
3933 *b = scratch + 1 * longwords,
3934 *n = scratch + 2 * longwords,
3935 *m = scratch + 3 * longwords;
3937 reverse_words((unsigned long *)a_ints, a, longwords);
3938 reverse_words((unsigned long *)b_ints, b, longwords);
3939 reverse_words((unsigned long *)n_ints, n, longwords);
3941 ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords);
3943 reverse_words(m, (unsigned long *)m_ints, longwords);
3944 }
3946 void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints,
3947 jint len, jlong inv,
3948 jint *m_ints) {
3949 assert(len % 2 == 0, "array length in montgomery_square must be even");
3950 int longwords = len/2;
3952 // Make very sure we don't use so much space that the stack might
3953 // overflow. 512 jints corresponds to an 16384-bit integer and
3954 // will use here a total of 6k bytes of stack space.
3955 int total_allocation = longwords * sizeof (unsigned long) * 3;
3956 guarantee(total_allocation <= 8192, "must be");
3957 unsigned long *scratch = (unsigned long *)alloca(total_allocation);
3959 // Local scratch arrays
3960 unsigned long
3961 *a = scratch + 0 * longwords,
3962 *n = scratch + 1 * longwords,
3963 *m = scratch + 2 * longwords;
3965 reverse_words((unsigned long *)a_ints, a, longwords);
3966 reverse_words((unsigned long *)n_ints, n, longwords);
3968 if (len >= MONTGOMERY_SQUARING_THRESHOLD) {
3969 ::montgomery_square(a, n, m, (unsigned long)inv, longwords);
3970 } else {
3971 ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords);
3972 }
3974 reverse_words(m, (unsigned long *)m_ints, longwords);
3975 }