Mon, 06 Nov 2017 16:51:47 +0800
[Code Reorganization] remove trailing whitespace to pass jcheck test
1 /*
2 * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
26 #include "precompiled.hpp"
27 #include "asm/macroAssembler.hpp"
28 #include "asm/macroAssembler.inline.hpp"
29 #include "code/debugInfoRec.hpp"
30 #include "code/icBuffer.hpp"
31 #include "code/vtableStubs.hpp"
32 #include "interpreter/interpreter.hpp"
33 #include "oops/compiledICHolder.hpp"
34 #include "prims/jvmtiRedefineClassesTrace.hpp"
35 #include "runtime/sharedRuntime.hpp"
36 #include "runtime/vframeArray.hpp"
37 #include "vmreg_mips.inline.hpp"
38 #ifdef COMPILER1
39 #include "c1/c1_Runtime1.hpp"
40 #endif
41 #ifdef COMPILER2
42 #include "opto/runtime.hpp"
43 #endif
45 #define __ masm->
47 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
49 class RegisterSaver {
50 enum { FPU_regs_live = 32 };
51 // Capture info about frame layout
52 enum layout {
53 #define DEF_LAYOUT_OFFS(regname) regname ## _off, regname ## H_off,
54 DEF_LAYOUT_OFFS(for_16_bytes_aligned)
55 DEF_LAYOUT_OFFS(fpr0)
56 DEF_LAYOUT_OFFS(fpr1)
57 DEF_LAYOUT_OFFS(fpr2)
58 DEF_LAYOUT_OFFS(fpr3)
59 DEF_LAYOUT_OFFS(fpr4)
60 DEF_LAYOUT_OFFS(fpr5)
61 DEF_LAYOUT_OFFS(fpr6)
62 DEF_LAYOUT_OFFS(fpr7)
63 DEF_LAYOUT_OFFS(fpr8)
64 DEF_LAYOUT_OFFS(fpr9)
65 DEF_LAYOUT_OFFS(fpr10)
66 DEF_LAYOUT_OFFS(fpr11)
67 DEF_LAYOUT_OFFS(fpr12)
68 DEF_LAYOUT_OFFS(fpr13)
69 DEF_LAYOUT_OFFS(fpr14)
70 DEF_LAYOUT_OFFS(fpr15)
71 DEF_LAYOUT_OFFS(fpr16)
72 DEF_LAYOUT_OFFS(fpr17)
73 DEF_LAYOUT_OFFS(fpr18)
74 DEF_LAYOUT_OFFS(fpr19)
75 DEF_LAYOUT_OFFS(fpr20)
76 DEF_LAYOUT_OFFS(fpr21)
77 DEF_LAYOUT_OFFS(fpr22)
78 DEF_LAYOUT_OFFS(fpr23)
79 DEF_LAYOUT_OFFS(fpr24)
80 DEF_LAYOUT_OFFS(fpr25)
81 DEF_LAYOUT_OFFS(fpr26)
82 DEF_LAYOUT_OFFS(fpr27)
83 DEF_LAYOUT_OFFS(fpr28)
84 DEF_LAYOUT_OFFS(fpr29)
85 DEF_LAYOUT_OFFS(fpr30)
86 DEF_LAYOUT_OFFS(fpr31)
88 DEF_LAYOUT_OFFS(v0)
89 DEF_LAYOUT_OFFS(v1)
90 DEF_LAYOUT_OFFS(a0)
91 DEF_LAYOUT_OFFS(a1)
92 DEF_LAYOUT_OFFS(a2)
93 DEF_LAYOUT_OFFS(a3)
94 DEF_LAYOUT_OFFS(a4)
95 DEF_LAYOUT_OFFS(a5)
96 DEF_LAYOUT_OFFS(a6)
97 DEF_LAYOUT_OFFS(a7)
98 DEF_LAYOUT_OFFS(t0)
99 DEF_LAYOUT_OFFS(t1)
100 DEF_LAYOUT_OFFS(t2)
101 DEF_LAYOUT_OFFS(t3)
102 DEF_LAYOUT_OFFS(s0)
103 DEF_LAYOUT_OFFS(s1)
104 DEF_LAYOUT_OFFS(s2)
105 DEF_LAYOUT_OFFS(s3)
106 DEF_LAYOUT_OFFS(s4)
107 DEF_LAYOUT_OFFS(s5)
108 DEF_LAYOUT_OFFS(s6)
109 DEF_LAYOUT_OFFS(s7)
110 DEF_LAYOUT_OFFS(t8)
111 DEF_LAYOUT_OFFS(t9)
113 DEF_LAYOUT_OFFS(gp)
114 DEF_LAYOUT_OFFS(fp)
115 DEF_LAYOUT_OFFS(return)
116 reg_save_size
117 };
119 public:
121 static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors =false );
122 static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false);
123 static int raOffset(void) { return return_off / 2; }
124 //Rmethod
125 static int methodOffset(void) { return s3_off / 2; }
127 static int v0Offset(void) { return v0_off / 2; }
128 static int v1Offset(void) { return v1_off / 2; }
130 static int fpResultOffset(void) { return fpr0_off / 2; }
132 // During deoptimization only the result register need to be restored
133 // all the other values have already been extracted.
134 static void restore_result_registers(MacroAssembler* masm);
135 };
137 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors ) {
139 /*
140 int frame_words = reg_save_size + additional_frame_words;
141 int frame_size_in_bytes = frame_words * wordSize;
142 *total_frame_words = frame_words;
143 */
144 // Always make the frame size 16-byte aligned
145 int frame_size_in_bytes = round_to(additional_frame_words*wordSize +
146 reg_save_size*BytesPerInt, 16);
147 // OopMap frame size is in compiler stack slots (jint's) not bytes or words
148 int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
149 // The caller will allocate additional_frame_words
150 int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt;
151 // CodeBlob frame size is in words.
152 int frame_size_in_words = frame_size_in_bytes / wordSize;
153 *total_frame_words = frame_size_in_words;
155 // save registers, fpu state, and flags
156 // We assume caller has already has return address slot on the stack
157 // We push epb twice in this sequence because we want the real ebp
158 // to be under the return like a normal enter and we want to use pushad
159 // We push by hand instead of pusing push
161 __ daddiu(SP, SP, - reg_save_size * jintSize);
163 __ sdc1(F0, SP, fpr0_off * jintSize); __ sdc1(F1, SP, fpr1_off * jintSize);
164 __ sdc1(F2, SP, fpr2_off * jintSize); __ sdc1(F3, SP, fpr3_off * jintSize);
165 __ sdc1(F4, SP, fpr4_off * jintSize); __ sdc1(F5, SP, fpr5_off * jintSize);
166 __ sdc1(F6, SP, fpr6_off * jintSize); __ sdc1(F7, SP, fpr7_off * jintSize);
167 __ sdc1(F8, SP, fpr8_off * jintSize); __ sdc1(F9, SP, fpr9_off * jintSize);
168 __ sdc1(F10, SP, fpr10_off * jintSize); __ sdc1(F11, SP, fpr11_off * jintSize);
169 __ sdc1(F12, SP, fpr12_off * jintSize); __ sdc1(F13, SP, fpr13_off * jintSize);
170 __ sdc1(F14, SP, fpr14_off * jintSize); __ sdc1(F15, SP, fpr15_off * jintSize);
171 __ sdc1(F16, SP, fpr16_off * jintSize); __ sdc1(F17, SP, fpr17_off * jintSize);
172 __ sdc1(F18, SP, fpr18_off * jintSize); __ sdc1(F19, SP, fpr19_off * jintSize);
173 __ sdc1(F20, SP, fpr20_off * jintSize); __ sdc1(F21, SP, fpr21_off * jintSize);
174 __ sdc1(F22, SP, fpr22_off * jintSize); __ sdc1(F23, SP, fpr23_off * jintSize);
175 __ sdc1(F24, SP, fpr24_off * jintSize); __ sdc1(F25, SP, fpr25_off * jintSize);
176 __ sdc1(F26, SP, fpr26_off * jintSize); __ sdc1(F27, SP, fpr27_off * jintSize);
177 __ sdc1(F28, SP, fpr28_off * jintSize); __ sdc1(F29, SP, fpr29_off * jintSize);
178 __ sdc1(F30, SP, fpr30_off * jintSize); __ sdc1(F31, SP, fpr31_off * jintSize);
179 __ sd(V0, SP, v0_off * jintSize); __ sd(V1, SP, v1_off * jintSize);
180 __ sd(A0, SP, a0_off * jintSize); __ sd(A1, SP, a1_off * jintSize);
181 __ sd(A2, SP, a2_off * jintSize); __ sd(A3, SP, a3_off * jintSize);
182 __ sd(A4, SP, a4_off * jintSize); __ sd(A5, SP, a5_off * jintSize);
183 __ sd(A6, SP, a6_off * jintSize); __ sd(A7, SP, a7_off * jintSize);
184 __ sd(T0, SP, t0_off * jintSize);
185 __ sd(T1, SP, t1_off * jintSize);
186 __ sd(T2, SP, t2_off * jintSize);
187 __ sd(T3, SP, t3_off * jintSize);
188 __ sd(S0, SP, s0_off * jintSize);
189 __ sd(S1, SP, s1_off * jintSize);
190 __ sd(S2, SP, s2_off * jintSize);
191 __ sd(S3, SP, s3_off * jintSize);
192 __ sd(S4, SP, s4_off * jintSize);
193 __ sd(S5, SP, s5_off * jintSize);
194 __ sd(S6, SP, s6_off * jintSize);
195 __ sd(S7, SP, s7_off * jintSize);
197 __ sd(T8, SP, t8_off * jintSize);
198 __ sd(T9, SP, t9_off * jintSize);
200 __ sd(GP, SP, gp_off * jintSize);
201 __ sd(FP, SP, fp_off * jintSize);
202 __ sd(RA, SP, return_off * jintSize);
203 __ daddi(FP, SP, fp_off * jintSize);
205 OopMapSet *oop_maps = new OopMapSet();
206 //OopMap* map = new OopMap( frame_words, 0 );
207 OopMap* map = new OopMap( frame_size_in_slots, 0 );
210 //#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_words)
211 #define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots)
212 map->set_callee_saved(STACK_OFFSET( v0_off), V0->as_VMReg());
213 map->set_callee_saved(STACK_OFFSET( v1_off), V1->as_VMReg());
214 map->set_callee_saved(STACK_OFFSET( a0_off), A0->as_VMReg());
215 map->set_callee_saved(STACK_OFFSET( a1_off), A1->as_VMReg());
216 map->set_callee_saved(STACK_OFFSET( a2_off), A2->as_VMReg());
217 map->set_callee_saved(STACK_OFFSET( a3_off), A3->as_VMReg());
218 map->set_callee_saved(STACK_OFFSET( a4_off), A4->as_VMReg());
219 map->set_callee_saved(STACK_OFFSET( a5_off), A5->as_VMReg());
220 map->set_callee_saved(STACK_OFFSET( a6_off), A6->as_VMReg());
221 map->set_callee_saved(STACK_OFFSET( a7_off), A7->as_VMReg());
222 map->set_callee_saved(STACK_OFFSET( t0_off), T0->as_VMReg());
223 map->set_callee_saved(STACK_OFFSET( t1_off), T1->as_VMReg());
224 map->set_callee_saved(STACK_OFFSET( t2_off), T2->as_VMReg());
225 map->set_callee_saved(STACK_OFFSET( t3_off), T3->as_VMReg());
226 map->set_callee_saved(STACK_OFFSET( s0_off), S0->as_VMReg());
227 map->set_callee_saved(STACK_OFFSET( s1_off), S1->as_VMReg());
228 map->set_callee_saved(STACK_OFFSET( s2_off), S2->as_VMReg());
229 map->set_callee_saved(STACK_OFFSET( s3_off), S3->as_VMReg());
230 map->set_callee_saved(STACK_OFFSET( s4_off), S4->as_VMReg());
231 map->set_callee_saved(STACK_OFFSET( s5_off), S5->as_VMReg());
232 map->set_callee_saved(STACK_OFFSET( s6_off), S6->as_VMReg());
233 map->set_callee_saved(STACK_OFFSET( s7_off), S7->as_VMReg());
234 map->set_callee_saved(STACK_OFFSET( t8_off), T8->as_VMReg());
235 map->set_callee_saved(STACK_OFFSET( t9_off), T9->as_VMReg());
236 map->set_callee_saved(STACK_OFFSET( gp_off), GP->as_VMReg());
237 map->set_callee_saved(STACK_OFFSET( fp_off), FP->as_VMReg());
238 map->set_callee_saved(STACK_OFFSET( return_off), RA->as_VMReg());
240 map->set_callee_saved(STACK_OFFSET( fpr0_off), F0->as_VMReg());
241 map->set_callee_saved(STACK_OFFSET( fpr1_off), F1->as_VMReg());
242 map->set_callee_saved(STACK_OFFSET( fpr2_off), F2->as_VMReg());
243 map->set_callee_saved(STACK_OFFSET( fpr3_off), F3->as_VMReg());
244 map->set_callee_saved(STACK_OFFSET( fpr4_off), F4->as_VMReg());
245 map->set_callee_saved(STACK_OFFSET( fpr5_off), F5->as_VMReg());
246 map->set_callee_saved(STACK_OFFSET( fpr6_off), F6->as_VMReg());
247 map->set_callee_saved(STACK_OFFSET( fpr7_off), F7->as_VMReg());
248 map->set_callee_saved(STACK_OFFSET( fpr8_off), F8->as_VMReg());
249 map->set_callee_saved(STACK_OFFSET( fpr9_off), F9->as_VMReg());
250 map->set_callee_saved(STACK_OFFSET( fpr10_off), F10->as_VMReg());
251 map->set_callee_saved(STACK_OFFSET( fpr11_off), F11->as_VMReg());
252 map->set_callee_saved(STACK_OFFSET( fpr12_off), F12->as_VMReg());
253 map->set_callee_saved(STACK_OFFSET( fpr13_off), F13->as_VMReg());
254 map->set_callee_saved(STACK_OFFSET( fpr14_off), F14->as_VMReg());
255 map->set_callee_saved(STACK_OFFSET( fpr15_off), F15->as_VMReg());
256 map->set_callee_saved(STACK_OFFSET( fpr16_off), F16->as_VMReg());
257 map->set_callee_saved(STACK_OFFSET( fpr17_off), F17->as_VMReg());
258 map->set_callee_saved(STACK_OFFSET( fpr18_off), F18->as_VMReg());
259 map->set_callee_saved(STACK_OFFSET( fpr19_off), F19->as_VMReg());
260 map->set_callee_saved(STACK_OFFSET( fpr20_off), F20->as_VMReg());
261 map->set_callee_saved(STACK_OFFSET( fpr21_off), F21->as_VMReg());
262 map->set_callee_saved(STACK_OFFSET( fpr22_off), F22->as_VMReg());
263 map->set_callee_saved(STACK_OFFSET( fpr23_off), F23->as_VMReg());
264 map->set_callee_saved(STACK_OFFSET( fpr24_off), F24->as_VMReg());
265 map->set_callee_saved(STACK_OFFSET( fpr25_off), F25->as_VMReg());
266 map->set_callee_saved(STACK_OFFSET( fpr26_off), F26->as_VMReg());
267 map->set_callee_saved(STACK_OFFSET( fpr27_off), F27->as_VMReg());
268 map->set_callee_saved(STACK_OFFSET( fpr28_off), F28->as_VMReg());
269 map->set_callee_saved(STACK_OFFSET( fpr29_off), F29->as_VMReg());
270 map->set_callee_saved(STACK_OFFSET( fpr30_off), F30->as_VMReg());
271 map->set_callee_saved(STACK_OFFSET( fpr31_off), F31->as_VMReg());
273 #undef STACK_OFFSET
274 return map;
275 }
278 // Pop the current frame and restore all the registers that we
279 // saved.
280 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
281 __ ldc1(F0, SP, fpr0_off * jintSize); __ ldc1(F1, SP, fpr1_off * jintSize);
282 __ ldc1(F2, SP, fpr2_off * jintSize); __ ldc1(F3, SP, fpr3_off * jintSize);
283 __ ldc1(F4, SP, fpr4_off * jintSize); __ ldc1(F5, SP, fpr5_off * jintSize);
284 __ ldc1(F6, SP, fpr6_off * jintSize); __ ldc1(F7, SP, fpr7_off * jintSize);
285 __ ldc1(F8, SP, fpr8_off * jintSize); __ ldc1(F9, SP, fpr9_off * jintSize);
286 __ ldc1(F10, SP, fpr10_off * jintSize); __ ldc1(F11, SP, fpr11_off * jintSize);
287 __ ldc1(F12, SP, fpr12_off * jintSize); __ ldc1(F13, SP, fpr13_off * jintSize);
288 __ ldc1(F14, SP, fpr14_off * jintSize); __ ldc1(F15, SP, fpr15_off * jintSize);
289 __ ldc1(F16, SP, fpr16_off * jintSize); __ ldc1(F17, SP, fpr17_off * jintSize);
290 __ ldc1(F18, SP, fpr18_off * jintSize); __ ldc1(F19, SP, fpr19_off * jintSize);
291 __ ldc1(F20, SP, fpr20_off * jintSize); __ ldc1(F21, SP, fpr21_off * jintSize);
292 __ ldc1(F22, SP, fpr22_off * jintSize); __ ldc1(F23, SP, fpr23_off * jintSize);
293 __ ldc1(F24, SP, fpr24_off * jintSize); __ ldc1(F25, SP, fpr25_off * jintSize);
294 __ ldc1(F26, SP, fpr26_off * jintSize); __ ldc1(F27, SP, fpr27_off * jintSize);
295 __ ldc1(F28, SP, fpr28_off * jintSize); __ ldc1(F29, SP, fpr29_off * jintSize);
296 __ ldc1(F30, SP, fpr30_off * jintSize); __ ldc1(F31, SP, fpr31_off * jintSize);
298 __ ld(V0, SP, v0_off * jintSize); __ ld(V1, SP, v1_off * jintSize);
299 __ ld(A0, SP, a0_off * jintSize); __ ld(A1, SP, a1_off * jintSize);
300 __ ld(A2, SP, a2_off * jintSize); __ ld(A3, SP, a3_off * jintSize);
301 __ ld(A4, SP, a4_off * jintSize); __ ld(A5, SP, a5_off * jintSize);
302 __ ld(A6, SP, a6_off * jintSize); __ ld(A7, SP, a7_off * jintSize);
303 __ ld(T0, SP, t0_off * jintSize);
304 __ ld(T1, SP, t1_off * jintSize);
305 __ ld(T2, SP, t2_off * jintSize);
306 __ ld(T3, SP, t3_off * jintSize);
307 __ ld(S0, SP, s0_off * jintSize);
308 __ ld(S1, SP, s1_off * jintSize);
309 __ ld(S2, SP, s2_off * jintSize);
310 __ ld(S3, SP, s3_off * jintSize);
311 __ ld(S4, SP, s4_off * jintSize);
312 __ ld(S5, SP, s5_off * jintSize);
313 __ ld(S6, SP, s6_off * jintSize);
314 __ ld(S7, SP, s7_off * jintSize);
316 __ ld(T8, SP, t8_off * jintSize);
317 __ ld(T9, SP, t9_off * jintSize);
319 __ ld(GP, SP, gp_off * jintSize);
320 __ ld(FP, SP, fp_off * jintSize);
321 __ ld(RA, SP, return_off * jintSize);
323 __ addiu(SP, SP, reg_save_size * jintSize);
324 }
326 // Pop the current frame and restore the registers that might be holding
327 // a result.
328 // FIXME, if the result is float?
329 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
331 // Just restore result register. Only used by deoptimization. By
332 // now any callee save register that needs to be restore to a c2
333 // caller of the deoptee has been extracted into the vframeArray
334 // and will be stuffed into the c2i adapter we create for later
335 // restoration so only result registers need to be restored here.
337 __ ld(V0, SP, v0_off * jintSize);
338 __ ld(V1, SP, v1_off * jintSize);
339 __ addiu(SP, SP, return_off * jintSize);
340 }
342 // Is vector's size (in bytes) bigger than a size saved by default?
343 // 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions.
344 bool SharedRuntime::is_wide_vector(int size) {
345 return size > 16;
346 }
348 // The java_calling_convention describes stack locations as ideal slots on
349 // a frame with no abi restrictions. Since we must observe abi restrictions
350 // (like the placement of the register window) the slots must be biased by
351 // the following value.
353 static int reg2offset_in(VMReg r) {
354 // Account for saved ebp and return address
355 // This should really be in_preserve_stack_slots
356 return (r->reg2stack() + 2 * VMRegImpl::slots_per_word) * VMRegImpl::stack_slot_size; // + 2 * VMRegImpl::stack_slot_size);
357 }
359 static int reg2offset_out(VMReg r) {
360 return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
361 }
363 // ---------------------------------------------------------------------------
364 // Read the array of BasicTypes from a signature, and compute where the
365 // arguments should go. Values in the VMRegPair regs array refer to 4-byte
366 // quantities. Values less than SharedInfo::stack0 are registers, those above
367 // refer to 4-byte stack slots. All stack slots are based off of the stack pointer
368 // as framesizes are fixed.
369 // VMRegImpl::stack0 refers to the first slot 0(sp).
370 // and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register
371 // up to RegisterImpl::number_of_registers) are the 32-bit
372 // integer registers.
374 // Pass first five oop/int args in registers T0, A0 - A3.
375 // Pass float/double/long args in stack.
376 // Doubles have precedence, so if you pass a mix of floats and doubles
377 // the doubles will grab the registers before the floats will.
379 // Note: the INPUTS in sig_bt are in units of Java argument words, which are
380 // either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit
381 // units regardless of build. Of course for i486 there is no 64 bit build
384 // ---------------------------------------------------------------------------
385 // The compiled Java calling convention.
386 // Pass first five oop/int args in registers T0, A0 - A3.
387 // Pass float/double/long args in stack.
388 // Doubles have precedence, so if you pass a mix of floats and doubles
389 // the doubles will grab the registers before the floats will.
391 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
392 VMRegPair *regs,
393 int total_args_passed,
394 int is_outgoing) {
396 // Create the mapping between argument positions and
397 // registers.
398 //static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = {
399 static const Register INT_ArgReg[Argument::n_register_parameters + 1] = {
400 T0, A0, A1, A2, A3, A4, A5, A6, A7
401 };
402 //static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_j] = {
403 static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = {
404 F12, F13, F14, F15, F16, F17, F18, F19
405 };
408 uint args = 0;
409 uint stk_args = 0; // inc by 2 each time
411 for (int i = 0; i < total_args_passed; i++) {
412 switch (sig_bt[i]) {
413 case T_VOID:
414 // halves of T_LONG or T_DOUBLE
415 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
416 regs[i].set_bad();
417 break;
418 case T_BOOLEAN:
419 case T_CHAR:
420 case T_BYTE:
421 case T_SHORT:
422 case T_INT:
423 if (args < Argument::n_register_parameters) {
424 regs[i].set1(INT_ArgReg[args++]->as_VMReg());
425 } else {
426 regs[i].set1(VMRegImpl::stack2reg(stk_args));
427 stk_args += 2;
428 }
429 break;
430 case T_LONG:
431 assert(sig_bt[i + 1] == T_VOID, "expecting half");
432 // fall through
433 case T_OBJECT:
434 case T_ARRAY:
435 case T_ADDRESS:
436 if (args < Argument::n_register_parameters) {
437 regs[i].set2(INT_ArgReg[args++]->as_VMReg());
438 } else {
439 regs[i].set2(VMRegImpl::stack2reg(stk_args));
440 stk_args += 2;
441 }
442 break;
443 case T_FLOAT:
444 if (args < Argument::n_float_register_parameters) {
445 regs[i].set1(FP_ArgReg[args++]->as_VMReg());
446 } else {
447 regs[i].set1(VMRegImpl::stack2reg(stk_args));
448 stk_args += 2;
449 }
450 break;
451 case T_DOUBLE:
452 assert(sig_bt[i + 1] == T_VOID, "expecting half");
453 if (args < Argument::n_float_register_parameters) {
454 regs[i].set2(FP_ArgReg[args++]->as_VMReg());
455 } else {
456 regs[i].set2(VMRegImpl::stack2reg(stk_args));
457 stk_args += 2;
458 }
459 break;
460 default:
461 ShouldNotReachHere();
462 break;
463 }
464 }
466 return round_to(stk_args, 2);
467 }
469 // Helper class mostly to avoid passing masm everywhere, and handle store
470 // displacement overflow logic for LP64
471 class AdapterGenerator {
472 MacroAssembler *masm;
473 #ifdef _LP64
474 Register Rdisp;
475 void set_Rdisp(Register r) { Rdisp = r; }
476 #endif // _LP64
478 void patch_callers_callsite();
480 // base+st_off points to top of argument
481 int arg_offset(const int st_off) { return st_off; }
482 int next_arg_offset(const int st_off) {
483 return st_off - Interpreter::stackElementSize;
484 }
486 #ifdef _LP64
487 // On _LP64 argument slot values are loaded first into a register
488 // because they might not fit into displacement.
489 Register arg_slot(const int st_off);
490 Register next_arg_slot(const int st_off);
491 #else
492 int arg_slot(const int st_off) { return arg_offset(st_off); }
493 int next_arg_slot(const int st_off) { return next_arg_offset(st_off); }
494 #endif // _LP64
496 // Stores long into offset pointed to by base
497 void store_c2i_long(Register r, Register base,
498 const int st_off, bool is_stack);
499 void store_c2i_object(Register r, Register base,
500 const int st_off);
501 void store_c2i_int(Register r, Register base,
502 const int st_off);
503 void store_c2i_double(VMReg r_2,
504 VMReg r_1, Register base, const int st_off);
505 void store_c2i_float(FloatRegister f, Register base,
506 const int st_off);
508 public:
509 //void tag_stack(const BasicType sig, int st_off);
510 void gen_c2i_adapter(int total_args_passed,
511 // VMReg max_arg,
512 int comp_args_on_stack, // VMRegStackSlots
513 const BasicType *sig_bt,
514 const VMRegPair *regs,
515 Label& skip_fixup);
516 void gen_i2c_adapter(int total_args_passed,
517 // VMReg max_arg,
518 int comp_args_on_stack, // VMRegStackSlots
519 const BasicType *sig_bt,
520 const VMRegPair *regs);
522 AdapterGenerator(MacroAssembler *_masm) : masm(_masm) {}
523 };
526 // Patch the callers callsite with entry to compiled code if it exists.
527 void AdapterGenerator::patch_callers_callsite() {
528 Label L;
529 __ verify_oop(Rmethod);
530 __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset()));
531 __ beq(AT,R0,L);
532 __ delayed()->nop();
533 // Schedule the branch target address early.
534 // Call into the VM to patch the caller, then jump to compiled callee
535 // eax isn't live so capture return address while we easily can
536 __ move(V0, RA);
538 __ pushad();
539 #ifdef COMPILER2
540 // C2 may leave the stack dirty if not in SSE2+ mode
541 __ empty_FPU_stack();
542 #endif /* COMPILER2 */
544 // VM needs caller's callsite
545 // VM needs target method
547 __ move(A0, Rmethod);
548 __ move(A1, V0);
549 //we should preserve the return address
550 __ verify_oop(Rmethod);
551 __ move(S0, SP);
552 __ move(AT, -(StackAlignmentInBytes)); // align the stack
553 __ andr(SP, SP, AT);
554 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite),
555 relocInfo::runtime_call_type);
557 __ delayed()->nop();
558 __ move(SP, S0);
559 __ popad();
560 __ bind(L);
561 }
563 #ifdef _LP64
564 Register AdapterGenerator::arg_slot(const int st_off) {
565 Unimplemented();
566 }
568 Register AdapterGenerator::next_arg_slot(const int st_off){
569 Unimplemented();
570 }
571 #endif // _LP64
573 // Stores long into offset pointed to by base
574 void AdapterGenerator::store_c2i_long(Register r, Register base,
575 const int st_off, bool is_stack) {
576 Unimplemented();
577 }
579 void AdapterGenerator::store_c2i_object(Register r, Register base,
580 const int st_off) {
581 Unimplemented();
582 }
584 void AdapterGenerator::store_c2i_int(Register r, Register base,
585 const int st_off) {
586 Unimplemented();
587 }
589 // Stores into offset pointed to by base
590 void AdapterGenerator::store_c2i_double(VMReg r_2,
591 VMReg r_1, Register base, const int st_off) {
592 Unimplemented();
593 }
595 void AdapterGenerator::store_c2i_float(FloatRegister f, Register base,
596 const int st_off) {
597 Unimplemented();
598 }
600 void AdapterGenerator::gen_c2i_adapter(
601 int total_args_passed,
602 // VMReg max_arg,
603 int comp_args_on_stack, // VMRegStackSlots
604 const BasicType *sig_bt,
605 const VMRegPair *regs,
606 Label& skip_fixup) {
608 // Before we get into the guts of the C2I adapter, see if we should be here
609 // at all. We've come from compiled code and are attempting to jump to the
610 // interpreter, which means the caller made a static call to get here
611 // (vcalls always get a compiled target if there is one). Check for a
612 // compiled target. If there is one, we need to patch the caller's call.
613 // However we will run interpreted if we come thru here. The next pass
614 // thru the call site will run compiled. If we ran compiled here then
615 // we can (theorectically) do endless i2c->c2i->i2c transitions during
616 // deopt/uncommon trap cycles. If we always go interpreted here then
617 // we can have at most one and don't need to play any tricks to keep
618 // from endlessly growing the stack.
619 //
620 // Actually if we detected that we had an i2c->c2i transition here we
621 // ought to be able to reset the world back to the state of the interpreted
622 // call and not bother building another interpreter arg area. We don't
623 // do that at this point.
625 patch_callers_callsite();
627 __ bind(skip_fixup);
629 #ifdef COMPILER2
630 __ empty_FPU_stack();
631 #endif /* COMPILER2 */
632 //this is for native ?
633 // Since all args are passed on the stack, total_args_passed * interpreter_
634 // stack_element_size is the
635 // space we need.
636 int extraspace = total_args_passed * Interpreter::stackElementSize;
638 // stack is aligned, keep it that way
639 extraspace = round_to(extraspace, 2*wordSize);
641 // Get return address
642 __ move(V0, RA);
643 // set senderSP value
644 //refer to interpreter_mips.cpp:generate_asm_entry
645 __ move(Rsender, SP);
646 __ addi(SP, SP, -extraspace);
648 // Now write the args into the outgoing interpreter space
649 for (int i = 0; i < total_args_passed; i++) {
650 if (sig_bt[i] == T_VOID) {
651 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
652 continue;
653 }
655 // st_off points to lowest address on stack.
656 int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize;
657 // Say 4 args:
658 // i st_off
659 // 0 12 T_LONG
660 // 1 8 T_VOID
661 // 2 4 T_OBJECT
662 // 3 0 T_BOOL
663 VMReg r_1 = regs[i].first();
664 VMReg r_2 = regs[i].second();
665 if (!r_1->is_valid()) {
666 assert(!r_2->is_valid(), "");
667 continue;
668 }
669 if (r_1->is_stack()) {
670 // memory to memory use fpu stack top
671 int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
672 if (!r_2->is_valid()) {
673 __ ld_ptr(AT, SP, ld_off);
674 __ st_ptr(AT, SP, st_off);
676 } else {
679 int next_off = st_off - Interpreter::stackElementSize;
680 __ ld_ptr(AT, SP, ld_off);
681 __ st_ptr(AT, SP, st_off);
683 /* Ref to is_Register condition */
684 if(sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE)
685 __ st_ptr(AT,SP,st_off - 8);
686 }
687 } else if (r_1->is_Register()) {
688 Register r = r_1->as_Register();
689 if (!r_2->is_valid()) {
690 __ sd(r,SP, st_off); //aoqi_test FIXME
691 } else {
692 //FIXME, mips will not enter here
693 // long/double in gpr
694 __ sd(r,SP, st_off); //aoqi_test FIXME
695 /* Jin: In [java/util/zip/ZipFile.java]
697 private static native long open(String name, int mode, long lastModified);
698 private static native int getTotal(long jzfile);
699 *
700 * We need to transfer T_LONG paramenters from a compiled method to a native method.
701 * It's a complex process:
702 *
703 * Caller -> lir_static_call -> gen_resolve_stub
704 -> -- resolve_static_call_C
705 `- gen_c2i_adapter() [*]
706 |
707 `- AdapterHandlerLibrary::get_create_apapter_index
708 -> generate_native_entry
709 -> InterpreterRuntime::SignatureHandlerGenerator::pass_long [**]
711 * In [**], T_Long parameter is stored in stack as:
713 (high)
714 | |
715 -----------
716 | 8 bytes |
717 | (void) |
718 -----------
719 | 8 bytes |
720 | (long) |
721 -----------
722 | |
723 (low)
724 *
725 * However, the sequence is reversed here:
726 *
727 (high)
728 | |
729 -----------
730 | 8 bytes |
731 | (long) |
732 -----------
733 | 8 bytes |
734 | (void) |
735 -----------
736 | |
737 (low)
738 *
739 * So I stored another 8 bytes in the T_VOID slot. It then can be accessed from generate_native_entry().
740 */
741 if (sig_bt[i] == T_LONG)
742 __ sd(r,SP, st_off - 8);
743 }
744 } else if (r_1->is_FloatRegister()) {
745 assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register");
747 FloatRegister fr = r_1->as_FloatRegister();
748 if (sig_bt[i] == T_FLOAT)
749 __ swc1(fr,SP, st_off);
750 else {
751 __ sdc1(fr,SP, st_off);
752 __ sdc1(fr,SP, st_off - 8); /* T_DOUBLE needs two slots */
753 }
754 }
755 }
757 // Schedule the branch target address early.
758 __ ld_ptr(AT, Rmethod,in_bytes(Method::interpreter_entry_offset()) );
759 // And repush original return address
760 __ move(RA, V0);
761 __ jr (AT);
762 __ delayed()->nop();
763 }
765 void AdapterGenerator::gen_i2c_adapter(
766 int total_args_passed,
767 // VMReg max_arg,
768 int comp_args_on_stack, // VMRegStackSlots
769 const BasicType *sig_bt,
770 const VMRegPair *regs) {
772 // Generate an I2C adapter: adjust the I-frame to make space for the C-frame
773 // layout. Lesp was saved by the calling I-frame and will be restored on
774 // return. Meanwhile, outgoing arg space is all owned by the callee
775 // C-frame, so we can mangle it at will. After adjusting the frame size,
776 // hoist register arguments and repack other args according to the compiled
777 // code convention. Finally, end in a jump to the compiled code. The entry
778 // point address is the start of the buffer.
780 // We will only enter here from an interpreted frame and never from after
781 // passing thru a c2i. Azul allowed this but we do not. If we lose the
782 // race and use a c2i we will remain interpreted for the race loser(s).
783 // This removes all sorts of headaches on the mips side and also eliminates
784 // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.
787 __ move(T9, SP);
789 // Cut-out for having no stack args. Since up to 2 int/oop args are passed
790 // in registers, we will occasionally have no stack args.
791 int comp_words_on_stack = 0;
792 if (comp_args_on_stack) {
793 // Sig words on the stack are greater-than VMRegImpl::stack0. Those in
794 // registers are below. By subtracting stack0, we either get a negative
795 // number (all values in registers) or the maximum stack slot accessed.
796 // int comp_args_on_stack = VMRegImpl::reg2stack(max_arg);
797 // Convert 4-byte stack slots to words.
798 // did mips need round? FIXME aoqi
799 comp_words_on_stack = round_to(comp_args_on_stack*4, wordSize)>>LogBytesPerWord;
800 // Round up to miminum stack alignment, in wordSize
801 comp_words_on_stack = round_to(comp_words_on_stack, 2);
802 __ daddi(SP, SP, -comp_words_on_stack * wordSize);
803 }
805 // Align the outgoing SP
806 __ move(AT, -(StackAlignmentInBytes));
807 __ andr(SP, SP, AT);
808 // push the return address on the stack (note that pushing, rather
809 // than storing it, yields the correct frame alignment for the callee)
810 // Put saved SP in another register
811 // const Register saved_sp = eax;
812 const Register saved_sp = V0;
813 __ move(saved_sp, T9);
816 // Will jump to the compiled code just as if compiled code was doing it.
817 // Pre-load the register-jump target early, to schedule it better.
818 __ ld(T9, Rmethod, in_bytes(Method::from_compiled_offset()));
820 // Now generate the shuffle code. Pick up all register args and move the
821 // rest through the floating point stack top.
822 for (int i = 0; i < total_args_passed; i++) {
823 if (sig_bt[i] == T_VOID) {
824 // Longs and doubles are passed in native word order, but misaligned
825 // in the 32-bit build.
826 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
827 continue;
828 }
830 // Pick up 0, 1 or 2 words from SP+offset.
832 //FIXME. aoqi. just delete the assert
833 //assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "scrambled load targets?");
834 // Load in argument order going down.
835 int ld_off = (total_args_passed -1 - i)*Interpreter::stackElementSize;
836 // Point to interpreter value (vs. tag)
837 int next_off = ld_off - Interpreter::stackElementSize;
838 //
839 //
840 //
841 VMReg r_1 = regs[i].first();
842 VMReg r_2 = regs[i].second();
843 if (!r_1->is_valid()) {
844 assert(!r_2->is_valid(), "");
845 continue;
846 }
847 if (r_1->is_stack()) {
848 // Convert stack slot to an SP offset (+ wordSize to
849 // account for return address )
850 //NOTICE HERE!!!! I sub a wordSize here
851 int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size;
852 //+ wordSize;
854 // We can use esi as a temp here because compiled code doesn't
855 // need esi as an input
856 // and if we end up going thru a c2i because of a miss a reasonable
857 // value of esi
858 // we be generated.
859 if (!r_2->is_valid()) {
860 __ ld(AT, saved_sp, ld_off);
861 __ sd(AT, SP, st_off);
862 } else {
863 // Interpreter local[n] == MSW, local[n+1] == LSW however locals
864 // are accessed as negative so LSW is at LOW address
866 // ld_off is MSW so get LSW
867 // st_off is LSW (i.e. reg.first())
868 /*
869 __ ld(AT, saved_sp, next_off);
870 __ sd(AT, SP, st_off);
871 __ ld(AT, saved_sp, ld_off);
872 __ sd(AT, SP, st_off + wordSize);
873 */
875 /* 2012/4/9 Jin
876 * [./org/eclipse/swt/graphics/GC.java]
877 * void drawImageXRender(Image srcImage, int srcX, int srcY, int srcWidth, int srcHeight,
878 int destX, int destY, int destWidth, int destHeight,
879 boolean simple,
880 int imgWidth, int imgHeight,
881 long maskPixmap, <-- Pass T_LONG in stack
882 int maskType);
883 * Before this modification, Eclipse displays icons with solid black background.
884 */
885 __ ld(AT, saved_sp, ld_off);
886 if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE)
887 __ ld(AT, saved_sp, ld_off - 8);
888 __ sd(AT, SP, st_off);
889 }
890 } else if (r_1->is_Register()) { // Register argument
891 Register r = r_1->as_Register();
892 // assert(r != eax, "must be different");
893 if (r_2->is_valid()) {
894 // assert(r_2->as_Register() != eax, "need another temporary register");
895 // Remember r_1 is low address (and LSB on mips)
896 // So r_2 gets loaded from high address regardless of the platform
897 //aoqi
898 assert(r_2->as_Register() == r_1->as_Register(), "");
899 //__ ld(r_2->as_Register(), saved_sp, ld_off);
900 //__ ld(r, saved_sp, next_off);
901 __ ld(r, saved_sp, ld_off);
903 /* Jin:
904 *
905 * For T_LONG type, the real layout is as below:
907 (high)
908 | |
909 -----------
910 | 8 bytes |
911 | (void) |
912 -----------
913 | 8 bytes |
914 | (long) |
915 -----------
916 | |
917 (low)
918 *
919 * We should load the low-8 bytes.
920 */
921 if (sig_bt[i] == T_LONG)
922 __ ld(r, saved_sp, ld_off - 8);
923 } else {
924 __ lw(r, saved_sp, ld_off);
925 }
926 } else if (r_1->is_FloatRegister()) { // Float Register
927 assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register");
929 FloatRegister fr = r_1->as_FloatRegister();
930 if (sig_bt[i] == T_FLOAT)
931 __ lwc1(fr, saved_sp, ld_off);
932 else {
933 __ ldc1(fr, saved_sp, ld_off);
934 __ ldc1(fr, saved_sp, ld_off - 8);
935 }
936 }
937 }
939 // 6243940 We might end up in handle_wrong_method if
940 // the callee is deoptimized as we race thru here. If that
941 // happens we don't want to take a safepoint because the
942 // caller frame will look interpreted and arguments are now
943 // "compiled" so it is much better to make this transition
944 // invisible to the stack walking code. Unfortunately if
945 // we try and find the callee by normal means a safepoint
946 // is possible. So we stash the desired callee in the thread
947 // and the vm will find there should this case occur.
948 __ get_thread(T8);
949 __ sd(Rmethod, T8, in_bytes(JavaThread::callee_target_offset()));
951 // move methodOop to eax in case we end up in an c2i adapter.
952 // the c2i adapters expect methodOop in eax (c2) because c2's
953 // resolve stubs return the result (the method) in eax.
954 // I'd love to fix this.
955 __ move(V0, Rmethod);
956 __ jr(T9);
957 __ delayed()->nop();
958 }
960 // ---------------------------------------------------------------
961 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
962 int total_args_passed,
963 // VMReg max_arg,
964 int comp_args_on_stack, // VMRegStackSlots
965 const BasicType *sig_bt,
966 const VMRegPair *regs,
967 AdapterFingerPrint* fingerprint) {
968 address i2c_entry = __ pc();
970 AdapterGenerator agen(masm);
972 agen.gen_i2c_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs);
975 // -------------------------------------------------------------------------
976 // Generate a C2I adapter. On entry we know G5 holds the methodOop. The
977 // args start out packed in the compiled layout. They need to be unpacked
978 // into the interpreter layout. This will almost always require some stack
979 // space. We grow the current (compiled) stack, then repack the args. We
980 // finally end in a jump to the generic interpreter entry point. On exit
981 // from the interpreter, the interpreter will restore our SP (lest the
982 // compiled code, which relys solely on SP and not FP, get sick).
984 address c2i_unverified_entry = __ pc();
985 Label skip_fixup;
986 {
987 Register holder = T1;
988 Register receiver = T0;
989 Register temp = T8;
990 address ic_miss = SharedRuntime::get_ic_miss_stub();
992 Label missed;
994 __ verify_oop(holder);
995 //add for compressedoops
996 __ load_klass(temp, receiver);
997 __ verify_oop(temp);
999 __ ld_ptr(AT, holder, CompiledICHolder::holder_klass_offset());
1000 __ ld_ptr(Rmethod, holder, CompiledICHolder::holder_method_offset());
1001 __ bne(AT, temp, missed);
1002 __ delayed()->nop();
1003 // Method might have been compiled since the call site was patched to
1004 // interpreted if that is the case treat it as a miss so we can get
1005 // the call site corrected.
1006 __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset()));
1007 __ beq(AT, R0, skip_fixup);
1008 __ delayed()->nop();
1009 __ bind(missed);
1011 __ jmp(ic_miss, relocInfo::runtime_call_type);
1012 __ delayed()->nop();
1013 }
1015 address c2i_entry = __ pc();
1017 agen.gen_c2i_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
1019 __ flush();
1020 return AdapterHandlerLibrary::new_entry(fingerprint,i2c_entry, c2i_entry, c2i_unverified_entry);
1021 }
1023 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
1024 VMRegPair *regs,
1025 VMRegPair *regs2,
1026 int total_args_passed) {
1027 assert(regs2 == NULL, "not needed on MIPS");
1028 // Return the number of VMReg stack_slots needed for the args.
1029 // This value does not include an abi space (like register window
1030 // save area).
1032 // The native convention is V8 if !LP64
1033 // The LP64 convention is the V9 convention which is slightly more sane.
1035 // We return the amount of VMReg stack slots we need to reserve for all
1036 // the arguments NOT counting out_preserve_stack_slots. Since we always
1037 // have space for storing at least 6 registers to memory we start with that.
1038 // See int_stk_helper for a further discussion.
1039 // We return the amount of VMRegImpl stack slots we need to reserve for all
1040 // the arguments NOT counting out_preserve_stack_slots.
1041 static const Register INT_ArgReg[Argument::n_register_parameters] = {
1042 A0, A1, A2, A3, A4, A5, A6, A7
1043 };
1044 static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = {
1045 F12, F13, F14, F15, F16, F17, F18, F19
1046 };
1047 uint args = 0;
1048 uint stk_args = 0; // inc by 2 each time
1050 /* Example:
1051 --- n java.lang.UNIXProcess::forkAndExec
1052 private native int forkAndExec(byte[] prog,
1053 byte[] argBlock, int argc,
1054 byte[] envBlock, int envc,
1055 byte[] dir,
1056 boolean redirectErrorStream,
1057 FileDescriptor stdin_fd,
1058 FileDescriptor stdout_fd,
1059 FileDescriptor stderr_fd)
1060 JNIEXPORT jint JNICALL
1061 Java_java_lang_UNIXProcess_forkAndExec(JNIEnv *env,
1062 jobject process,
1063 jbyteArray prog,
1064 jbyteArray argBlock, jint argc,
1065 jbyteArray envBlock, jint envc,
1066 jbyteArray dir,
1067 jboolean redirectErrorStream,
1068 jobject stdin_fd,
1069 jobject stdout_fd,
1070 jobject stderr_fd)
1072 ::c_calling_convention
1073 0: // env <-- a0
1074 1: L // klass/obj <-- t0 => a1
1075 2: [ // prog[] <-- a0 => a2
1076 3: [ // argBlock[] <-- a1 => a3
1077 4: I // argc
1078 5: [ // envBlock[] <-- a3 => a5
1079 6: I // envc
1080 7: [ // dir[] <-- a5 => a7
1081 8: Z // redirectErrorStream a6 => sp[0]
1082 9: L // stdin a7 => sp[8]
1083 10: L // stdout fp[16] => sp[16]
1084 11: L // stderr fp[24] => sp[24]
1085 */
1086 for (int i = 0; i < total_args_passed; i++) {
1087 switch (sig_bt[i]) {
1088 case T_VOID: // Halves of longs and doubles
1089 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
1090 regs[i].set_bad();
1091 break;
1092 case T_BOOLEAN:
1093 case T_CHAR:
1094 case T_BYTE:
1095 case T_SHORT:
1096 case T_INT:
1097 if (args < Argument::n_register_parameters) {
1098 regs[i].set1(INT_ArgReg[args++]->as_VMReg());
1099 } else {
1100 regs[i].set1(VMRegImpl::stack2reg(stk_args));
1101 stk_args += 2;
1102 }
1103 break;
1104 case T_LONG:
1105 assert(sig_bt[i + 1] == T_VOID, "expecting half");
1106 // fall through
1107 case T_OBJECT:
1108 case T_ARRAY:
1109 case T_ADDRESS:
1110 case T_METADATA:
1111 if (args < Argument::n_register_parameters) {
1112 regs[i].set2(INT_ArgReg[args++]->as_VMReg());
1113 } else {
1114 regs[i].set2(VMRegImpl::stack2reg(stk_args));
1115 stk_args += 2;
1116 }
1117 break;
1118 case T_FLOAT:
1119 if (args < Argument::n_float_register_parameters) {
1120 regs[i].set1(FP_ArgReg[args++]->as_VMReg());
1121 } else {
1122 regs[i].set1(VMRegImpl::stack2reg(stk_args));
1123 stk_args += 2;
1124 }
1125 break;
1126 case T_DOUBLE:
1127 assert(sig_bt[i + 1] == T_VOID, "expecting half");
1128 if (args < Argument::n_float_register_parameters) {
1129 regs[i].set2(FP_ArgReg[args++]->as_VMReg());
1130 } else {
1131 regs[i].set2(VMRegImpl::stack2reg(stk_args));
1132 stk_args += 2;
1133 }
1134 break;
1135 default:
1136 ShouldNotReachHere();
1137 break;
1138 }
1139 }
1141 return round_to(stk_args, 2);
1142 }
1144 // ---------------------------------------------------------------------------
1145 void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1146 // We always ignore the frame_slots arg and just use the space just below frame pointer
1147 // which by this time is free to use
1148 switch (ret_type) {
1149 case T_FLOAT:
1150 __ swc1(FSF, FP, -wordSize);
1151 break;
1152 case T_DOUBLE:
1153 __ sdc1(FSF, FP, -wordSize );
1154 break;
1155 case T_VOID: break;
1156 case T_LONG:
1157 __ sd(V0, FP, -wordSize);
1158 break;
1159 case T_OBJECT:
1160 case T_ARRAY:
1161 __ sd(V0, FP, -wordSize);
1162 break;
1163 default: {
1164 __ sw(V0, FP, -wordSize);
1165 }
1166 }
1167 }
1169 void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1170 // We always ignore the frame_slots arg and just use the space just below frame pointer
1171 // which by this time is free to use
1172 switch (ret_type) {
1173 case T_FLOAT:
1174 __ lwc1(FSF, FP, -wordSize);
1175 break;
1176 case T_DOUBLE:
1177 __ ldc1(FSF, FP, -wordSize );
1178 break;
1179 case T_LONG:
1180 __ ld(V0, FP, -wordSize);
1181 break;
1182 case T_VOID: break;
1183 case T_OBJECT:
1184 case T_ARRAY:
1185 __ ld(V0, FP, -wordSize);
1186 break;
1187 default: {
1188 __ lw(V0, FP, -wordSize);
1189 }
1190 }
1191 }
1193 static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
1194 for ( int i = first_arg ; i < arg_count ; i++ ) {
1195 if (args[i].first()->is_Register()) {
1196 __ push(args[i].first()->as_Register());
1197 } else if (args[i].first()->is_FloatRegister()) {
1198 __ push(args[i].first()->as_FloatRegister());
1199 }
1200 }
1201 }
1203 static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
1204 for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) {
1205 if (args[i].first()->is_Register()) {
1206 __ pop(args[i].first()->as_Register());
1207 } else if (args[i].first()->is_FloatRegister()) {
1208 __ pop(args[i].first()->as_FloatRegister());
1209 }
1210 }
1211 }
1213 // A simple move of integer like type
1214 static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1215 if (src.first()->is_stack()) {
1216 if (dst.first()->is_stack()) {
1217 // stack to stack
1218 __ lw(AT, FP, reg2offset_in(src.first()));
1219 __ sd(AT,SP, reg2offset_out(dst.first()));
1220 } else {
1221 // stack to reg
1222 __ lw(dst.first()->as_Register(), FP, reg2offset_in(src.first()));
1223 }
1224 } else if (dst.first()->is_stack()) {
1225 // reg to stack
1226 __ sd(src.first()->as_Register(), SP, reg2offset_out(dst.first()));
1227 } else {
1228 if (dst.first() != src.first()){
1229 __ move(dst.first()->as_Register(), src.first()->as_Register()); // fujie error:dst.first()
1230 }
1231 }
1232 }
1234 // An oop arg. Must pass a handle not the oop itself
1235 static void object_move(MacroAssembler* masm,
1236 OopMap* map,
1237 int oop_handle_offset,
1238 int framesize_in_slots,
1239 VMRegPair src,
1240 VMRegPair dst,
1241 bool is_receiver,
1242 int* receiver_offset) {
1244 // must pass a handle. First figure out the location we use as a handle
1246 //FIXME, for mips, dst can be register
1247 if (src.first()->is_stack()) {
1248 // Oop is already on the stack as an argument
1249 Register rHandle = V0;
1250 Label nil;
1251 __ xorr(rHandle, rHandle, rHandle);
1252 __ ld(AT, FP, reg2offset_in(src.first()));
1253 __ beq(AT,R0, nil);
1254 __ delayed()->nop();
1255 __ lea(rHandle, Address(FP, reg2offset_in(src.first())));
1256 __ bind(nil);
1257 if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first()));
1258 else __ move( (dst.first())->as_Register(),rHandle);
1259 //if dst is register
1260 //FIXME, do mips need out preserve stack slots?
1261 int offset_in_older_frame = src.first()->reg2stack()
1262 + SharedRuntime::out_preserve_stack_slots();
1263 map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
1264 if (is_receiver) {
1265 *receiver_offset = (offset_in_older_frame
1266 + framesize_in_slots) * VMRegImpl::stack_slot_size;
1267 }
1268 } else {
1269 // Oop is in an a register we must store it to the space we reserve
1270 // on the stack for oop_handles
1271 const Register rOop = src.first()->as_Register();
1272 assert( (rOop->encoding() >= A0->encoding()) && (rOop->encoding() <= T0->encoding()),"wrong register");
1273 const Register rHandle = V0;
1274 //Important: refer to java_calling_convertion
1275 int oop_slot = (rOop->encoding() - A0->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset;
1276 int offset = oop_slot*VMRegImpl::stack_slot_size;
1277 Label skip;
1278 __ sd( rOop , SP, offset );
1279 map->set_oop(VMRegImpl::stack2reg(oop_slot));
1280 __ xorr( rHandle, rHandle, rHandle);
1281 __ beq(rOop, R0, skip);
1282 __ delayed()->nop();
1283 __ lea(rHandle, Address(SP, offset));
1284 __ bind(skip);
1285 // Store the handle parameter
1286 if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first()));
1287 else __ move((dst.first())->as_Register(), rHandle);
1288 //if dst is register
1290 if (is_receiver) {
1291 *receiver_offset = offset;
1292 }
1293 }
1294 }
1296 // A float arg may have to do float reg int reg conversion
1297 static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1298 assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move");
1300 if (src.first()->is_stack()) {
1301 if (dst.first()->is_stack()) {
1302 __ lwc1(F12 , FP, reg2offset_in(src.first()));
1303 __ swc1(F12 ,SP, reg2offset_out(dst.first()));
1304 }
1305 else
1306 __ lwc1( dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first()));
1307 } else {
1308 // reg to stack
1309 if(dst.first()->is_stack())
1310 __ swc1( src.first()->as_FloatRegister(),SP, reg2offset_out(dst.first()));
1311 else
1312 __ mov_s( dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
1313 }
1314 }
1316 // A long move
1317 static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1319 // The only legal possibility for a long_move VMRegPair is:
1320 // 1: two stack slots (possibly unaligned)
1321 // as neither the java or C calling convention will use registers
1322 // for longs.
1324 if (src.first()->is_stack()) {
1325 assert(src.second()->is_stack() && dst.second()->is_stack(), "must be all stack");
1326 if( dst.first()->is_stack()){
1327 __ ld(AT, FP, reg2offset_in(src.first()));
1328 __ sd(AT, SP, reg2offset_out(dst.first()));
1329 } else {
1330 __ ld( (dst.first())->as_Register() , FP, reg2offset_in(src.first()));
1331 }
1332 } else {
1333 if( dst.first()->is_stack()){
1334 __ sd( (src.first())->as_Register(), SP, reg2offset_out(dst.first()));
1335 } else{
1336 __ move( (dst.first())->as_Register() , (src.first())->as_Register());
1337 }
1338 }
1339 }
1341 // A double move
1342 static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1344 // The only legal possibilities for a double_move VMRegPair are:
1345 // The painful thing here is that like long_move a VMRegPair might be
1347 // Because of the calling convention we know that src is either
1348 // 1: a single physical register (xmm registers only)
1349 // 2: two stack slots (possibly unaligned)
1350 // dst can only be a pair of stack slots.
1353 if (src.first()->is_stack()) {
1354 // source is all stack
1355 if( dst.first()->is_stack()){
1356 __ ldc1(F12, FP, reg2offset_in(src.first()));
1358 __ sdc1(F12, SP, reg2offset_out(dst.first()));
1359 } else{
1360 __ ldc1( (dst.first())->as_FloatRegister(), FP, reg2offset_in(src.first()));
1361 }
1363 } else {
1364 // reg to stack
1365 // No worries about stack alignment
1366 if( dst.first()->is_stack()){
1367 __ sdc1( src.first()->as_FloatRegister(),SP, reg2offset_out(dst.first()));
1368 }
1369 else
1370 __ mov_d( dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
1372 }
1373 }
1375 static void verify_oop_args(MacroAssembler* masm,
1376 methodHandle method,
1377 const BasicType* sig_bt,
1378 const VMRegPair* regs) {
1379 Register temp_reg = T9; // not part of any compiled calling seq
1380 if (VerifyOops) {
1381 for (int i = 0; i < method->size_of_parameters(); i++) {
1382 if (sig_bt[i] == T_OBJECT ||
1383 sig_bt[i] == T_ARRAY) {
1384 VMReg r = regs[i].first();
1385 assert(r->is_valid(), "bad oop arg");
1386 if (r->is_stack()) {
1387 __ ld(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
1388 __ verify_oop(temp_reg);
1389 } else {
1390 __ verify_oop(r->as_Register());
1391 }
1392 }
1393 }
1394 }
1395 }
1397 static void gen_special_dispatch(MacroAssembler* masm,
1398 methodHandle method,
1399 const BasicType* sig_bt,
1400 const VMRegPair* regs) {
1401 verify_oop_args(masm, method, sig_bt, regs);
1402 vmIntrinsics::ID iid = method->intrinsic_id();
1404 // Now write the args into the outgoing interpreter space
1405 bool has_receiver = false;
1406 Register receiver_reg = noreg;
1407 int member_arg_pos = -1;
1408 Register member_reg = noreg;
1409 int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
1410 if (ref_kind != 0) {
1411 member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument
1412 member_reg = S3; // known to be free at this point
1413 has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
1414 } else if (iid == vmIntrinsics::_invokeBasic) {
1415 has_receiver = true;
1416 } else {
1417 fatal(err_msg_res("unexpected intrinsic id %d", iid));
1418 }
1420 if (member_reg != noreg) {
1421 // Load the member_arg into register, if necessary.
1422 SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
1423 VMReg r = regs[member_arg_pos].first();
1424 if (r->is_stack()) {
1425 __ ld(member_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
1426 } else {
1427 // no data motion is needed
1428 member_reg = r->as_Register();
1429 }
1430 }
1432 if (has_receiver) {
1433 // Make sure the receiver is loaded into a register.
1434 assert(method->size_of_parameters() > 0, "oob");
1435 assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
1436 VMReg r = regs[0].first();
1437 assert(r->is_valid(), "bad receiver arg");
1438 if (r->is_stack()) {
1439 // Porting note: This assumes that compiled calling conventions always
1440 // pass the receiver oop in a register. If this is not true on some
1441 // platform, pick a temp and load the receiver from stack.
1442 fatal("receiver always in a register");
1443 receiver_reg = SSR; // known to be free at this point
1444 __ ld(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
1445 } else {
1446 // no data motion is needed
1447 receiver_reg = r->as_Register();
1448 }
1449 }
1451 // Figure out which address we are really jumping to:
1452 MethodHandles::generate_method_handle_dispatch(masm, iid,
1453 receiver_reg, member_reg, /*for_compiler_entry:*/ true);
1454 }
1456 // ---------------------------------------------------------------------------
1457 // Generate a native wrapper for a given method. The method takes arguments
1458 // in the Java compiled code convention, marshals them to the native
1459 // convention (handlizes oops, etc), transitions to native, makes the call,
1460 // returns to java state (possibly blocking), unhandlizes any result and
1461 // returns.
1462 nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
1463 methodHandle method,
1464 int compile_id,
1465 BasicType* in_sig_bt,
1466 VMRegPair* in_regs,
1467 BasicType ret_type) {
1468 if (method->is_method_handle_intrinsic()) {
1469 vmIntrinsics::ID iid = method->intrinsic_id();
1470 intptr_t start = (intptr_t)__ pc();
1471 int vep_offset = ((intptr_t)__ pc()) - start;
1472 gen_special_dispatch(masm,
1473 method,
1474 in_sig_bt,
1475 in_regs);
1476 int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period
1477 __ flush();
1478 int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually
1479 return nmethod::new_native_nmethod(method,
1480 compile_id,
1481 masm->code(),
1482 vep_offset,
1483 frame_complete,
1484 stack_slots / VMRegImpl::slots_per_word,
1485 in_ByteSize(-1),
1486 in_ByteSize(-1),
1487 (OopMapSet*)NULL);
1488 }
1489 bool is_critical_native = true;
1490 address native_func = method->critical_native_function();
1491 if (native_func == NULL) {
1492 native_func = method->native_function();
1493 is_critical_native = false;
1494 }
1495 assert(native_func != NULL, "must have function");
1497 // Native nmethod wrappers never take possesion of the oop arguments.
1498 // So the caller will gc the arguments. The only thing we need an
1499 // oopMap for is if the call is static
1500 //
1501 // An OopMap for lock (and class if static), and one for the VM call itself
1502 OopMapSet *oop_maps = new OopMapSet();
1504 // We have received a description of where all the java arg are located
1505 // on entry to the wrapper. We need to convert these args to where
1506 // the jni function will expect them. To figure out where they go
1507 // we convert the java signature to a C signature by inserting
1508 // the hidden arguments as arg[0] and possibly arg[1] (static method)
1510 const int total_in_args = method->size_of_parameters();
1511 int total_c_args = total_in_args;
1512 if (!is_critical_native) {
1513 total_c_args += 1;
1514 if (method->is_static()) {
1515 total_c_args++;
1516 }
1517 } else {
1518 for (int i = 0; i < total_in_args; i++) {
1519 if (in_sig_bt[i] == T_ARRAY) {
1520 total_c_args++;
1521 }
1522 }
1523 }
1525 BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
1526 VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
1527 BasicType* in_elem_bt = NULL;
1529 int argc = 0;
1530 if (!is_critical_native) {
1531 out_sig_bt[argc++] = T_ADDRESS;
1532 if (method->is_static()) {
1533 out_sig_bt[argc++] = T_OBJECT;
1534 }
1536 for (int i = 0; i < total_in_args ; i++ ) {
1537 out_sig_bt[argc++] = in_sig_bt[i];
1538 }
1539 } else {
1540 Thread* THREAD = Thread::current();
1541 in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
1542 SignatureStream ss(method->signature());
1543 for (int i = 0; i < total_in_args ; i++ ) {
1544 if (in_sig_bt[i] == T_ARRAY) {
1545 // Arrays are passed as int, elem* pair
1546 out_sig_bt[argc++] = T_INT;
1547 out_sig_bt[argc++] = T_ADDRESS;
1548 Symbol* atype = ss.as_symbol(CHECK_NULL);
1549 const char* at = atype->as_C_string();
1550 if (strlen(at) == 2) {
1551 assert(at[0] == '[', "must be");
1552 switch (at[1]) {
1553 case 'B': in_elem_bt[i] = T_BYTE; break;
1554 case 'C': in_elem_bt[i] = T_CHAR; break;
1555 case 'D': in_elem_bt[i] = T_DOUBLE; break;
1556 case 'F': in_elem_bt[i] = T_FLOAT; break;
1557 case 'I': in_elem_bt[i] = T_INT; break;
1558 case 'J': in_elem_bt[i] = T_LONG; break;
1559 case 'S': in_elem_bt[i] = T_SHORT; break;
1560 case 'Z': in_elem_bt[i] = T_BOOLEAN; break;
1561 default: ShouldNotReachHere();
1562 }
1563 }
1564 } else {
1565 out_sig_bt[argc++] = in_sig_bt[i];
1566 in_elem_bt[i] = T_VOID;
1567 }
1568 if (in_sig_bt[i] != T_VOID) {
1569 assert(in_sig_bt[i] == ss.type(), "must match");
1570 ss.next();
1571 }
1572 }
1573 }
1575 // Now figure out where the args must be stored and how much stack space
1576 // they require (neglecting out_preserve_stack_slots but space for storing
1577 // the 1st six register arguments). It's weird see int_stk_helper.
1578 //
1579 int out_arg_slots;
1580 //out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);
1581 out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
1583 // Compute framesize for the wrapper. We need to handlize all oops in
1584 // registers. We must create space for them here that is disjoint from
1585 // the windowed save area because we have no control over when we might
1586 // flush the window again and overwrite values that gc has since modified.
1587 // (The live window race)
1588 //
1589 // We always just allocate 6 word for storing down these object. This allow
1590 // us to simply record the base and use the Ireg number to decide which
1591 // slot to use. (Note that the reg number is the inbound number not the
1592 // outbound number).
1593 // We must shuffle args to match the native convention, and include var-args space.
1595 // Calculate the total number of stack slots we will need.
1597 // First count the abi requirement plus all of the outgoing args
1598 int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
1600 // Now the space for the inbound oop handle area
1601 int total_save_slots = 9 * VMRegImpl::slots_per_word; // 9 arguments passed in registers
1602 if (is_critical_native) {
1603 // Critical natives may have to call out so they need a save area
1604 // for register arguments.
1605 int double_slots = 0;
1606 int single_slots = 0;
1607 for ( int i = 0; i < total_in_args; i++) {
1608 if (in_regs[i].first()->is_Register()) {
1609 const Register reg = in_regs[i].first()->as_Register();
1610 switch (in_sig_bt[i]) {
1611 case T_BOOLEAN:
1612 case T_BYTE:
1613 case T_SHORT:
1614 case T_CHAR:
1615 case T_INT: single_slots++; break;
1616 case T_ARRAY: // specific to LP64 (7145024)
1617 case T_LONG: double_slots++; break;
1618 default: ShouldNotReachHere();
1619 }
1620 } else if (in_regs[i].first()->is_FloatRegister()) {
1621 switch (in_sig_bt[i]) {
1622 case T_FLOAT: single_slots++; break;
1623 case T_DOUBLE: double_slots++; break;
1624 default: ShouldNotReachHere();
1625 }
1626 }
1627 }
1628 total_save_slots = double_slots * 2 + single_slots;
1629 // align the save area
1630 if (double_slots != 0) {
1631 stack_slots = round_to(stack_slots, 2);
1632 }
1633 }
1635 int oop_handle_offset = stack_slots;
1636 stack_slots += total_save_slots;
1638 // Now any space we need for handlizing a klass if static method
1640 int klass_slot_offset = 0;
1641 int klass_offset = -1;
1642 int lock_slot_offset = 0;
1643 bool is_static = false;
1645 if (method->is_static()) {
1646 klass_slot_offset = stack_slots;
1647 stack_slots += VMRegImpl::slots_per_word;
1648 klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
1649 is_static = true;
1650 }
1652 // Plus a lock if needed
1654 if (method->is_synchronized()) {
1655 lock_slot_offset = stack_slots;
1656 stack_slots += VMRegImpl::slots_per_word;
1657 }
1659 // Now a place to save return value or as a temporary for any gpr -> fpr moves
1660 // + 2 for return address (which we own) and saved ebp
1661 stack_slots += 2 + 9 * VMRegImpl::slots_per_word; // (T0, A0, A1, A2, A3, A4, A5, A6, A7)
1663 // Ok The space we have allocated will look like:
1664 //
1665 //
1666 // FP-> | |
1667 // |---------------------|
1668 // | 2 slots for moves |
1669 // |---------------------|
1670 // | lock box (if sync) |
1671 // |---------------------| <- lock_slot_offset
1672 // | klass (if static) |
1673 // |---------------------| <- klass_slot_offset
1674 // | oopHandle area |
1675 // |---------------------| <- oop_handle_offset
1676 // | outbound memory |
1677 // | based arguments |
1678 // | |
1679 // |---------------------|
1680 // | vararg area |
1681 // |---------------------|
1682 // | |
1683 // SP-> | out_preserved_slots |
1684 //
1685 //
1688 // Now compute actual number of stack words we need rounding to make
1689 // stack properly aligned.
1690 stack_slots = round_to(stack_slots, StackAlignmentInSlots);
1692 int stack_size = stack_slots * VMRegImpl::stack_slot_size;
1694 intptr_t start = (intptr_t)__ pc();
1698 // First thing make an ic check to see if we should even be here
1699 address ic_miss = SharedRuntime::get_ic_miss_stub();
1701 // We are free to use all registers as temps without saving them and
1702 // restoring them except ebp. ebp is the only callee save register
1703 // as far as the interpreter and the compiler(s) are concerned.
1705 //refer to register_mips.hpp:IC_Klass
1706 const Register ic_reg = T1;
1707 const Register receiver = T0;
1709 Label hit;
1710 Label exception_pending;
1712 __ verify_oop(receiver);
1713 //add for compressedoops
1714 __ load_klass(T9, receiver);
1715 __ beq(T9, ic_reg, hit);
1716 __ delayed()->nop();
1717 __ jmp(ic_miss, relocInfo::runtime_call_type);
1718 __ delayed()->nop();
1719 // verified entry must be aligned for code patching.
1720 // and the first 5 bytes must be in the same cache line
1721 // if we align at 8 then we will be sure 5 bytes are in the same line
1722 __ align(8);
1724 __ bind(hit);
1727 int vep_offset = ((intptr_t)__ pc()) - start;
1728 #ifdef COMPILER1
1729 if (InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) {
1730 // Object.hashCode can pull the hashCode from the header word
1731 // instead of doing a full VM transition once it's been computed.
1732 // Since hashCode is usually polymorphic at call sites we can't do
1733 // this optimization at the call site without a lot of work.
1734 Label slowCase;
1735 Register receiver = T0;
1736 Register result = V0;
1737 __ ld ( result, receiver, oopDesc::mark_offset_in_bytes());
1738 // check if locked
1739 __ andi(AT, result, markOopDesc::unlocked_value);
1740 __ beq(AT, R0, slowCase);
1741 __ delayed()->nop();
1742 if (UseBiasedLocking) {
1743 // Check if biased and fall through to runtime if so
1744 __ andi (AT, result, markOopDesc::biased_lock_bit_in_place);
1745 __ bne(AT,R0, slowCase);
1746 __ delayed()->nop();
1747 }
1748 // get hash
1749 __ li(AT, markOopDesc::hash_mask_in_place);
1750 __ andr (AT, result, AT);
1751 // test if hashCode exists
1752 __ beq (AT, R0, slowCase);
1753 __ delayed()->nop();
1754 __ shr(result, markOopDesc::hash_shift);
1755 __ jr(RA);
1756 __ delayed()->nop();
1757 __ bind (slowCase);
1758 }
1759 #endif // COMPILER1
1761 // The instruction at the verified entry point must be 5 bytes or longer
1762 // because it can be patched on the fly by make_non_entrant. The stack bang
1763 // instruction fits that requirement.
1765 // Generate stack overflow check
1767 if (UseStackBanging) {
1768 //this function will modify the value in A0
1769 __ push(A0);
1770 __ bang_stack_with_offset(StackShadowPages*os::vm_page_size());
1771 __ pop(A0);
1772 } else {
1773 // need a 5 byte instruction to allow MT safe patching to non-entrant
1774 __ nop();
1775 __ nop();
1776 __ nop();
1777 __ nop();
1778 __ nop();
1779 }
1780 // Generate a new frame for the wrapper.
1781 // do mips need this ?
1782 #ifndef OPT_THREAD
1783 __ get_thread(TREG);
1784 #endif
1785 //FIXME here
1786 __ st_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset()));
1787 // -2 because return address is already present and so is saved ebp
1788 __ move(AT, -(StackAlignmentInBytes));
1789 __ andr(SP, SP, AT);
1791 __ enter();
1792 __ addiu(SP, SP, -1 * (stack_size - 2*wordSize));
1794 // Frame is now completed as far a size and linkage.
1796 int frame_complete = ((intptr_t)__ pc()) - start;
1798 // Calculate the difference between esp and ebp. We need to know it
1799 // after the native call because on windows Java Natives will pop
1800 // the arguments and it is painful to do esp relative addressing
1801 // in a platform independent way. So after the call we switch to
1802 // ebp relative addressing.
1803 //FIXME actually , the fp_adjustment may not be the right, because andr(sp,sp,at)may change
1804 //the SP
1805 int fp_adjustment = stack_size - 2*wordSize;
1807 #ifdef COMPILER2
1808 // C2 may leave the stack dirty if not in SSE2+ mode
1809 __ empty_FPU_stack();
1810 #endif /* COMPILER2 */
1812 // Compute the ebp offset for any slots used after the jni call
1814 int lock_slot_ebp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment;
1815 // We use edi as a thread pointer because it is callee save and
1816 // if we load it once it is usable thru the entire wrapper
1817 // const Register thread = edi;
1818 const Register thread = TREG;
1820 // We use esi as the oop handle for the receiver/klass
1821 // It is callee save so it survives the call to native
1823 // const Register oop_handle_reg = esi;
1824 const Register oop_handle_reg = S4;
1825 if (is_critical_native) {
1826 __ stop("generate_native_wrapper in sharedRuntime <2>");
1827 //TODO:Fu
1828 /*
1829 check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args,
1830 oop_handle_offset, oop_maps, in_regs, in_sig_bt);
1831 */
1832 }
1834 #ifndef OPT_THREAD
1835 __ get_thread(thread);
1836 #endif
1838 //
1839 // We immediately shuffle the arguments so that any vm call we have to
1840 // make from here on out (sync slow path, jvmpi, etc.) we will have
1841 // captured the oops from our caller and have a valid oopMap for
1842 // them.
1844 // -----------------
1845 // The Grand Shuffle
1846 //
1847 // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
1848 // and, if static, the class mirror instead of a receiver. This pretty much
1849 // guarantees that register layout will not match (and mips doesn't use reg
1850 // parms though amd does). Since the native abi doesn't use register args
1851 // and the java conventions does we don't have to worry about collisions.
1852 // All of our moved are reg->stack or stack->stack.
1853 // We ignore the extra arguments during the shuffle and handle them at the
1854 // last moment. The shuffle is described by the two calling convention
1855 // vectors we have in our possession. We simply walk the java vector to
1856 // get the source locations and the c vector to get the destinations.
1858 int c_arg = method->is_static() ? 2 : 1 ;
1860 // Record esp-based slot for receiver on stack for non-static methods
1861 int receiver_offset = -1;
1863 // This is a trick. We double the stack slots so we can claim
1864 // the oops in the caller's frame. Since we are sure to have
1865 // more args than the caller doubling is enough to make
1866 // sure we can capture all the incoming oop args from the
1867 // caller.
1868 //
1869 OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1871 // Mark location of rbp (someday)
1872 // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(rbp));
1874 // Use eax, ebx as temporaries during any memory-memory moves we have to do
1875 // All inbound args are referenced based on rbp and all outbound args via rsp.
1879 #ifdef ASSERT
1880 bool reg_destroyed[RegisterImpl::number_of_registers];
1881 bool freg_destroyed[FloatRegisterImpl::number_of_registers];
1882 for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
1883 reg_destroyed[r] = false;
1884 }
1885 for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
1886 freg_destroyed[f] = false;
1887 }
1889 #endif /* ASSERT */
1891 // We know that we only have args in at most two integer registers (ecx, edx). So eax, ebx
1892 // Are free to temporaries if we have to do stack to steck moves.
1893 // All inbound args are referenced based on ebp and all outbound args via esp.
1895 // This may iterate in two different directions depending on the
1896 // kind of native it is. The reason is that for regular JNI natives
1897 // the incoming and outgoing registers are offset upwards and for
1898 // critical natives they are offset down.
1899 GrowableArray<int> arg_order(2 * total_in_args);
1900 VMRegPair tmp_vmreg;
1901 tmp_vmreg.set1(T8->as_VMReg());
1903 if (!is_critical_native) {
1904 for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) {
1905 arg_order.push(i);
1906 arg_order.push(c_arg);
1907 }
1908 } else {
1909 // Compute a valid move order, using tmp_vmreg to break any cycles
1910 __ stop("generate_native_wrapper in sharedRuntime <2>");
1911 //TODO:Fu
1912 // ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg);
1913 }
1915 int temploc = -1;
1916 for (int ai = 0; ai < arg_order.length(); ai += 2) {
1917 int i = arg_order.at(ai);
1918 int c_arg = arg_order.at(ai + 1);
1919 __ block_comment(err_msg("move %d -> %d", i, c_arg));
1920 if (c_arg == -1) {
1921 assert(is_critical_native, "should only be required for critical natives");
1922 // This arg needs to be moved to a temporary
1923 __ move(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register());
1924 in_regs[i] = tmp_vmreg;
1925 temploc = i;
1926 continue;
1927 } else if (i == -1) {
1928 assert(is_critical_native, "should only be required for critical natives");
1929 // Read from the temporary location
1930 assert(temploc != -1, "must be valid");
1931 i = temploc;
1932 temploc = -1;
1933 }
1934 #ifdef ASSERT
1935 if (in_regs[i].first()->is_Register()) {
1936 assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
1937 } else if (in_regs[i].first()->is_FloatRegister()) {
1938 assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!");
1939 }
1940 if (out_regs[c_arg].first()->is_Register()) {
1941 reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
1942 } else if (out_regs[c_arg].first()->is_FloatRegister()) {
1943 freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
1944 }
1945 #endif /* ASSERT */
1946 switch (in_sig_bt[i]) {
1947 case T_ARRAY:
1948 if (is_critical_native) {
1949 __ stop("generate_native_wrapper in sharedRuntime <2>");
1950 //TODO:Fu
1951 // unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);
1952 c_arg++;
1953 #ifdef ASSERT
1954 if (out_regs[c_arg].first()->is_Register()) {
1955 reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
1956 } else if (out_regs[c_arg].first()->is_FloatRegister()) {
1957 freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
1958 }
1959 #endif
1960 break;
1961 }
1962 case T_OBJECT:
1963 assert(!is_critical_native, "no oop arguments");
1964 object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
1965 ((i == 0) && (!is_static)),
1966 &receiver_offset);
1967 break;
1968 case T_VOID:
1969 break;
1971 case T_FLOAT:
1972 float_move(masm, in_regs[i], out_regs[c_arg]);
1973 break;
1975 case T_DOUBLE:
1976 assert( i + 1 < total_in_args &&
1977 in_sig_bt[i + 1] == T_VOID &&
1978 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
1979 double_move(masm, in_regs[i], out_regs[c_arg]);
1980 break;
1982 case T_LONG :
1983 long_move(masm, in_regs[i], out_regs[c_arg]);
1984 break;
1986 case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
1988 default:
1989 simple_move32(masm, in_regs[i], out_regs[c_arg]);
1990 }
1991 }
1993 // point c_arg at the first arg that is already loaded in case we
1994 // need to spill before we call out
1995 c_arg = total_c_args - total_in_args;
1996 // Pre-load a static method's oop into esi. Used both by locking code and
1997 // the normal JNI call code.
1999 __ move(oop_handle_reg, A1);
2001 if (method->is_static() && !is_critical_native) {
2003 // load opp into a register
2004 int oop_index = __ oop_recorder()->find_index(JNIHandles::make_local(
2005 (method->method_holder())->java_mirror()));
2008 RelocationHolder rspec = oop_Relocation::spec(oop_index);
2009 __ relocate(rspec);
2010 __ patchable_set48(oop_handle_reg, (long)JNIHandles::make_local((method->method_holder())->java_mirror()));
2011 // Now handlize the static class mirror it's known not-null.
2012 __ sd( oop_handle_reg, SP, klass_offset);
2013 map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
2015 // Now get the handle
2016 __ lea(oop_handle_reg, Address(SP, klass_offset));
2017 // store the klass handle as second argument
2018 __ move(A1, oop_handle_reg);
2019 // and protect the arg if we must spill
2020 c_arg--;
2021 }
2023 // Change state to native (we save the return address in the thread, since it might not
2024 // be pushed on the stack when we do a a stack traversal). It is enough that the pc()
2025 // points into the right code segment. It does not have to be the correct return pc.
2026 // We use the same pc/oopMap repeatedly when we call out
2028 intptr_t the_pc = (intptr_t) __ pc();
2029 oop_maps->add_gc_map(the_pc - start, map);
2031 __ set_last_Java_frame(SP, noreg, NULL);
2032 __ relocate(relocInfo::internal_pc_type);
2033 {
2034 intptr_t save_pc = (intptr_t)the_pc ;
2035 __ patchable_set48(AT, save_pc);
2036 }
2037 __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
2040 // We have all of the arguments setup at this point. We must not touch any register
2041 // argument registers at this point (what if we save/restore them there are no oop?
2042 {
2043 SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
2044 int metadata_index = __ oop_recorder()->find_index(method());
2045 RelocationHolder rspec = metadata_Relocation::spec(metadata_index);
2046 __ relocate(rspec);
2047 __ patchable_set48(AT, (long)(method()));
2049 __ call_VM_leaf(
2050 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
2051 thread, AT);
2053 }
2055 // These are register definitions we need for locking/unlocking
2056 const Register swap_reg = T8; // Must use eax for cmpxchg instruction
2057 const Register obj_reg = T9; // Will contain the oop
2058 //const Register lock_reg = T6; // Address of compiler lock object (BasicLock)
2059 const Register lock_reg = c_rarg0; // Address of compiler lock object (BasicLock)
2063 Label slow_path_lock;
2064 Label lock_done;
2066 // Lock a synchronized method
2067 if (method->is_synchronized()) {
2068 assert(!is_critical_native, "unhandled");
2070 const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
2072 // Get the handle (the 2nd argument)
2073 __ move(oop_handle_reg, A1);
2075 // Get address of the box
2076 __ lea(lock_reg, Address(FP, lock_slot_ebp_offset));
2078 // Load the oop from the handle
2079 __ ld(obj_reg, oop_handle_reg, 0);
2081 if (UseBiasedLocking) {
2082 // Note that oop_handle_reg is trashed during this call
2083 __ biased_locking_enter(lock_reg, obj_reg, swap_reg, A1, false, lock_done, &slow_path_lock);
2084 }
2086 // Load immediate 1 into swap_reg %eax
2087 __ move(swap_reg, 1);
2089 __ ld(AT, obj_reg, 0);
2090 __ orr(swap_reg, swap_reg, AT);
2092 __ sd( swap_reg, lock_reg, mark_word_offset);
2093 __ cmpxchg(lock_reg, Address(obj_reg, 0), swap_reg);
2094 __ bne(AT, R0, lock_done);
2095 __ delayed()->nop();
2096 // Test if the oopMark is an obvious stack pointer, i.e.,
2097 // 1) (mark & 3) == 0, and
2098 // 2) esp <= mark < mark + os::pagesize()
2099 // These 3 tests can be done by evaluating the following
2100 // expression: ((mark - esp) & (3 - os::vm_page_size())),
2101 // assuming both stack pointer and pagesize have their
2102 // least significant 2 bits clear.
2103 // NOTE: the oopMark is in swap_reg %eax as the result of cmpxchg
2105 __ dsub(swap_reg, swap_reg,SP);
2106 __ move(AT, 3 - os::vm_page_size());
2107 __ andr(swap_reg , swap_reg, AT);
2108 // Save the test result, for recursive case, the result is zero
2109 __ sd(swap_reg, lock_reg, mark_word_offset);
2110 //FIXME here, Why notEqual?
2111 __ bne(swap_reg,R0, slow_path_lock);
2112 __ delayed()->nop();
2113 // Slow path will re-enter here
2114 __ bind(lock_done);
2116 if (UseBiasedLocking) {
2117 // Re-fetch oop_handle_reg as we trashed it above
2118 __ move(A1, oop_handle_reg);
2119 }
2120 }
2123 // Finally just about ready to make the JNI call
2126 // get JNIEnv* which is first argument to native
2127 if (!is_critical_native) {
2128 __ addi(A0, thread, in_bytes(JavaThread::jni_environment_offset()));
2129 }
2131 // Example: Java_java_lang_ref_Finalizer_invokeFinalizeMethod(JNIEnv *env, jclass clazz, jobject ob)
2132 /* Load the second arguments into A1 */
2133 //__ ld(A1, SP , wordSize ); // klass
2135 // Now set thread in native
2136 __ addi(AT, R0, _thread_in_native);
2137 __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset()));
2138 /* Jin: do the call */
2139 __ call(method->native_function(), relocInfo::runtime_call_type);
2140 __ delayed()->nop();
2141 // WARNING - on Windows Java Natives use pascal calling convention and pop the
2142 // arguments off of the stack. We could just re-adjust the stack pointer here
2143 // and continue to do SP relative addressing but we instead switch to FP
2144 // relative addressing.
2146 // Unpack native results.
2147 switch (ret_type) {
2148 case T_BOOLEAN: __ c2bool(V0); break;
2149 case T_CHAR : __ andi(V0,V0, 0xFFFF); break;
2150 case T_BYTE : __ sign_extend_byte (V0); break;
2151 case T_SHORT : __ sign_extend_short(V0); break;
2152 case T_INT : // nothing to do break;
2153 case T_DOUBLE :
2154 case T_FLOAT :
2155 // Result is in st0 we'll save as needed
2156 break;
2157 case T_ARRAY: // Really a handle
2158 case T_OBJECT: // Really a handle
2159 break; // can't de-handlize until after safepoint check
2160 case T_VOID: break;
2161 case T_LONG: break;
2162 default : ShouldNotReachHere();
2163 }
2164 // Switch thread to "native transition" state before reading the synchronization state.
2165 // This additional state is necessary because reading and testing the synchronization
2166 // state is not atomic w.r.t. GC, as this scenario demonstrates:
2167 // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
2168 // VM thread changes sync state to synchronizing and suspends threads for GC.
2169 // Thread A is resumed to finish this native method, but doesn't block here since it
2170 // didn't see any synchronization is progress, and escapes.
2171 // __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native_trans);
2172 //__ sw(_thread_in_native_trans, thread, JavaThread::thread_state_offset());
2173 // __ move(AT, (int)_thread_in_native_trans);
2174 __ addi(AT, R0, _thread_in_native_trans);
2175 __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset()));
2177 Label after_transition;
2179 // check for safepoint operation in progress and/or pending suspend requests
2180 { Label Continue;
2181 //FIXME here, which regiser should we use?
2182 // SafepointSynchronize::_not_synchronized);
2183 __ li(AT, SafepointSynchronize::address_of_state());
2184 __ lw(A0, AT, 0);
2185 __ addi(AT, A0, -SafepointSynchronize::_not_synchronized);
2186 Label L;
2187 __ bne(AT,R0, L);
2188 __ delayed()->nop();
2189 __ lw(AT, thread, in_bytes(JavaThread::suspend_flags_offset()));
2190 __ beq(AT, R0, Continue);
2191 __ delayed()->nop();
2192 __ bind(L);
2194 // Don't use call_VM as it will see a possible pending exception and forward it
2195 // and never return here preventing us from clearing _last_native_pc down below.
2196 // Also can't use call_VM_leaf either as it will check to see if esi & edi are
2197 // preserved and correspond to the bcp/locals pointers. So we do a runtime call
2198 // by hand.
2199 //
2200 save_native_result(masm, ret_type, stack_slots);
2201 __ move (A0, thread);
2202 __ addi(SP,SP, -wordSize);
2203 __ push(S2);
2204 __ move(AT, -(StackAlignmentInBytes));
2205 __ move(S2, SP); // use S2 as a sender SP holder
2206 __ andr(SP, SP, AT); // align stack as required by ABI
2207 if (!is_critical_native) {
2208 __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::runtime_call_type);
2209 __ delayed()->nop();
2210 } else {
2211 __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition), relocInfo::runtime_call_type);
2212 __ delayed()->nop();
2213 }
2214 __ move(SP, S2); // use S2 as a sender SP holder
2215 __ pop(S2);
2216 __ addi(SP,SP, wordSize);
2217 //add for compressedoops
2218 __ reinit_heapbase();
2219 // Restore any method result value
2220 restore_native_result(masm, ret_type, stack_slots);
2222 if (is_critical_native) {
2223 // The call above performed the transition to thread_in_Java so
2224 // skip the transition logic below.
2225 __ beq(R0, R0, after_transition);
2226 __ delayed()->nop();
2227 }
2229 __ bind(Continue);
2230 }
2232 // change thread state
2233 __ addi(AT, R0, _thread_in_Java);
2234 __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset()));
2235 __ bind(after_transition);
2236 Label reguard;
2237 Label reguard_done;
2238 __ lw(AT, thread, in_bytes(JavaThread::stack_guard_state_offset()));
2239 __ addi(AT, AT, -JavaThread::stack_guard_yellow_disabled);
2240 __ beq(AT, R0, reguard);
2241 __ delayed()->nop();
2242 // slow path reguard re-enters here
2243 __ bind(reguard_done);
2245 // Handle possible exception (will unlock if necessary)
2247 // native result if any is live
2249 // Unlock
2250 Label slow_path_unlock;
2251 Label unlock_done;
2252 if (method->is_synchronized()) {
2254 Label done;
2256 // Get locked oop from the handle we passed to jni
2257 __ ld( obj_reg, oop_handle_reg, 0);
2258 //FIXME
2259 if (UseBiasedLocking) {
2260 __ biased_locking_exit(obj_reg, T8, done);
2262 }
2264 // Simple recursive lock?
2266 __ ld(AT, FP, lock_slot_ebp_offset);
2267 __ beq(AT, R0, done);
2268 __ delayed()->nop();
2269 // Must save eax if if it is live now because cmpxchg must use it
2270 if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
2271 save_native_result(masm, ret_type, stack_slots);
2272 }
2274 // get old displaced header
2275 __ ld (T8, FP, lock_slot_ebp_offset);
2276 // get address of the stack lock
2277 __ addi (c_rarg0, FP, lock_slot_ebp_offset);
2278 // Atomic swap old header if oop still contains the stack lock
2279 __ cmpxchg(T8, Address(obj_reg, 0), c_rarg0);
2281 __ beq(AT, R0, slow_path_unlock);
2282 __ delayed()->nop();
2283 // slow path re-enters here
2284 __ bind(unlock_done);
2285 if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
2286 restore_native_result(masm, ret_type, stack_slots);
2287 }
2289 __ bind(done);
2291 }
2292 {
2293 SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
2294 // Tell dtrace about this method exit
2295 save_native_result(masm, ret_type, stack_slots);
2296 int metadata_index = __ oop_recorder()->find_index( (method()));
2297 RelocationHolder rspec = metadata_Relocation::spec(metadata_index);
2298 __ relocate(rspec);
2299 __ patchable_set48(AT, (long)(method()));
2301 __ call_VM_leaf(
2302 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
2303 thread, AT);
2304 restore_native_result(masm, ret_type, stack_slots);
2305 }
2307 // We can finally stop using that last_Java_frame we setup ages ago
2309 __ reset_last_Java_frame(false, true);
2311 // Unpack oop result
2312 if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
2313 Label L;
2314 __ beq(V0, R0,L );
2315 __ delayed()->nop();
2316 __ ld(V0, V0, 0);
2317 __ bind(L);
2318 __ verify_oop(V0);
2319 }
2321 if (!is_critical_native) {
2322 // reset handle block
2323 __ ld(AT, thread, in_bytes(JavaThread::active_handles_offset()));
2324 __ sw(R0, AT, JNIHandleBlock::top_offset_in_bytes());
2325 }
2327 if (!is_critical_native) {
2328 // Any exception pending?
2329 __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
2331 __ bne(AT, R0, exception_pending);
2332 __ delayed()->nop();
2333 }
2334 // no exception, we're almost done
2336 // check that only result value is on FPU stack
2337 __ verify_FPU(ret_type == T_FLOAT || ret_type == T_DOUBLE ? 1 : 0, "native_wrapper normal exit");
2339 // Return
2340 #ifndef OPT_THREAD
2341 __ get_thread(TREG);
2342 #endif
2343 __ ld_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset()));
2344 __ leave();
2346 __ jr(RA);
2347 __ delayed()->nop();
2348 // Unexpected paths are out of line and go here
2349 /*
2350 if (!is_critical_native) {
2351 // forward the exception
2352 __ bind(exception_pending);
2354 // and forward the exception
2355 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
2356 }
2357 */
2358 // Slow path locking & unlocking
2359 if (method->is_synchronized()) {
2361 // BEGIN Slow path lock
2362 __ bind(slow_path_lock);
2364 // protect the args we've loaded
2365 save_args(masm, total_c_args, c_arg, out_regs);
2367 // has last_Java_frame setup. No exceptions so do vanilla call not call_VM
2368 // args are (oop obj, BasicLock* lock, JavaThread* thread)
2370 __ move(A0, obj_reg);
2371 __ move(A1, lock_reg);
2372 __ move(A2, thread);
2373 __ addi(SP, SP, - 3*wordSize);
2375 __ move(AT, -(StackAlignmentInBytes));
2376 __ move(S2, SP); // use S2 as a sender SP holder
2377 __ andr(SP, SP, AT); // align stack as required by ABI
2379 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type);
2380 __ delayed()->nop();
2381 __ move(SP, S2);
2382 __ addi(SP, SP, 3*wordSize);
2384 restore_args(masm, total_c_args, c_arg, out_regs);
2386 #ifdef ASSERT
2387 { Label L;
2388 __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
2389 __ beq(AT, R0, L);
2390 __ delayed()->nop();
2391 __ stop("no pending exception allowed on exit from monitorenter");
2392 __ bind(L);
2393 }
2394 #endif
2395 __ b(lock_done);
2396 __ delayed()->nop();
2397 // END Slow path lock
2399 // BEGIN Slow path unlock
2400 __ bind(slow_path_unlock);
2402 // Slow path unlock
2404 if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
2405 save_native_result(masm, ret_type, stack_slots);
2406 }
2407 // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
2409 __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
2410 __ push(AT);
2411 __ sd(R0, thread, in_bytes(Thread::pending_exception_offset()));
2413 __ move(AT, -(StackAlignmentInBytes));
2414 __ move(S2, SP); // use S2 as a sender SP holder
2415 __ andr(SP, SP, AT); // align stack as required by ABI
2417 // should be a peal
2418 // +wordSize because of the push above
2419 __ addi(A1, FP, lock_slot_ebp_offset);
2421 __ move(A0, obj_reg);
2422 __ addi(SP,SP, -2*wordSize);
2423 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C),
2424 relocInfo::runtime_call_type);
2425 __ delayed()->nop();
2426 __ addi(SP,SP, 2*wordSize);
2427 __ move(SP, S2);
2428 //add for compressedoops
2429 __ reinit_heapbase();
2430 #ifdef ASSERT
2431 {
2432 Label L;
2433 __ lw( AT, thread, in_bytes(Thread::pending_exception_offset()));
2434 __ beq(AT, R0, L);
2435 __ delayed()->nop();
2436 __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
2437 __ bind(L);
2438 }
2439 #endif /* ASSERT */
2441 __ pop(AT);
2442 __ sd(AT, thread, in_bytes(Thread::pending_exception_offset()));
2443 if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
2444 restore_native_result(masm, ret_type, stack_slots);
2445 }
2446 __ b(unlock_done);
2447 __ delayed()->nop();
2448 // END Slow path unlock
2450 }
2452 // SLOW PATH Reguard the stack if needed
2454 __ bind(reguard);
2455 save_native_result(masm, ret_type, stack_slots);
2456 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages),
2457 relocInfo::runtime_call_type);
2458 __ delayed()->nop();
2459 //add for compressedoops
2460 __ reinit_heapbase();
2461 restore_native_result(masm, ret_type, stack_slots);
2462 __ b(reguard_done);
2463 __ delayed()->nop();
2465 // BEGIN EXCEPTION PROCESSING
2466 if (!is_critical_native) {
2467 // Forward the exception
2468 __ bind(exception_pending);
2470 // remove possible return value from FPU register stack
2471 __ empty_FPU_stack();
2473 // pop our frame
2474 //forward_exception_entry need return address on stack
2475 __ addiu(SP, FP, wordSize);
2476 __ ld(FP, SP, (-1) * wordSize);
2478 // and forward the exception
2479 __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
2480 __ delayed()->nop();
2481 }
2482 __ flush();
2484 nmethod *nm = nmethod::new_native_nmethod(method,
2485 compile_id,
2486 masm->code(),
2487 vep_offset,
2488 frame_complete,
2489 stack_slots / VMRegImpl::slots_per_word,
2490 (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
2491 in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
2492 oop_maps);
2494 if (is_critical_native) {
2495 nm->set_lazy_critical_native(true);
2496 }
2498 return nm;
2500 }
2502 #ifdef HAVE_DTRACE_H
2503 // ---------------------------------------------------------------------------
2504 // Generate a dtrace nmethod for a given signature. The method takes arguments
2505 // in the Java compiled code convention, marshals them to the native
2506 // abi and then leaves nops at the position you would expect to call a native
2507 // function. When the probe is enabled the nops are replaced with a trap
2508 // instruction that dtrace inserts and the trace will cause a notification
2509 // to dtrace.
2510 //
2511 // The probes are only able to take primitive types and java/lang/String as
2512 // arguments. No other java types are allowed. Strings are converted to utf8
2513 // strings so that from dtrace point of view java strings are converted to C
2514 // strings. There is an arbitrary fixed limit on the total space that a method
2515 // can use for converting the strings. (256 chars per string in the signature).
2516 // So any java string larger then this is truncated.
2518 static int fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 };
2519 static bool offsets_initialized = false;
2521 static VMRegPair reg64_to_VMRegPair(Register r) {
2522 VMRegPair ret;
2523 if (wordSize == 8) {
2524 ret.set2(r->as_VMReg());
2525 } else {
2526 ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg());
2527 }
2528 return ret;
2529 }
2532 nmethod *SharedRuntime::generate_dtrace_nmethod(MacroAssembler *masm,
2533 methodHandle method) {
2536 // generate_dtrace_nmethod is guarded by a mutex so we are sure to
2537 // be single threaded in this method.
2538 assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be");
2540 // Fill in the signature array, for the calling-convention call.
2541 int total_args_passed = method->size_of_parameters();
2543 BasicType* in_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed);
2544 VMRegPair *in_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed);
2546 // The signature we are going to use for the trap that dtrace will see
2547 // java/lang/String is converted. We drop "this" and any other object
2548 // is converted to NULL. (A one-slot java/lang/Long object reference
2549 // is converted to a two-slot long, which is why we double the allocation).
2550 BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2);
2551 VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2);
2553 int i=0;
2554 int total_strings = 0;
2555 int first_arg_to_pass = 0;
2556 int total_c_args = 0;
2558 // Skip the receiver as dtrace doesn't want to see it
2559 if( !method->is_static() ) {
2560 in_sig_bt[i++] = T_OBJECT;
2561 first_arg_to_pass = 1;
2562 }
2564 SignatureStream ss(method->signature());
2565 for ( ; !ss.at_return_type(); ss.next()) {
2566 BasicType bt = ss.type();
2567 in_sig_bt[i++] = bt; // Collect remaining bits of signature
2568 out_sig_bt[total_c_args++] = bt;
2569 if( bt == T_OBJECT) {
2570 symbolOop s = ss.as_symbol_or_null();
2571 if (s == vmSymbols::java_lang_String()) {
2572 total_strings++;
2573 out_sig_bt[total_c_args-1] = T_ADDRESS;
2574 } else if (s == vmSymbols::java_lang_Boolean() ||
2575 s == vmSymbols::java_lang_Byte()) {
2576 out_sig_bt[total_c_args-1] = T_BYTE;
2577 } else if (s == vmSymbols::java_lang_Character() ||
2578 s == vmSymbols::java_lang_Short()) {
2579 out_sig_bt[total_c_args-1] = T_SHORT;
2580 } else if (s == vmSymbols::java_lang_Integer() ||
2581 s == vmSymbols::java_lang_Float()) {
2582 out_sig_bt[total_c_args-1] = T_INT;
2583 } else if (s == vmSymbols::java_lang_Long() ||
2584 s == vmSymbols::java_lang_Double()) {
2585 out_sig_bt[total_c_args-1] = T_LONG;
2586 out_sig_bt[total_c_args++] = T_VOID;
2587 }
2588 } else if ( bt == T_LONG || bt == T_DOUBLE ) {
2589 in_sig_bt[i++] = T_VOID; // Longs & doubles take 2 Java slots
2590 // We convert double to long
2591 out_sig_bt[total_c_args-1] = T_LONG;
2592 out_sig_bt[total_c_args++] = T_VOID;
2593 } else if ( bt == T_FLOAT) {
2594 // We convert float to int
2595 out_sig_bt[total_c_args-1] = T_INT;
2596 }
2597 }
2599 assert(i==total_args_passed, "validly parsed signature");
2601 // Now get the compiled-Java layout as input arguments
2602 int comp_args_on_stack;
2603 comp_args_on_stack = SharedRuntime::java_calling_convention(
2604 in_sig_bt, in_regs, total_args_passed, false);
2606 // We have received a description of where all the java arg are located
2607 // on entry to the wrapper. We need to convert these args to where
2608 // the a native (non-jni) function would expect them. To figure out
2609 // where they go we convert the java signature to a C signature and remove
2610 // T_VOID for any long/double we might have received.
2613 // Now figure out where the args must be stored and how much stack space
2614 // they require (neglecting out_preserve_stack_slots but space for storing
2615 // the 1st six register arguments). It's weird see int_stk_helper.
2617 int out_arg_slots;
2618 out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
2620 // Calculate the total number of stack slots we will need.
2622 // First count the abi requirement plus all of the outgoing args
2623 int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
2625 // Plus a temp for possible converion of float/double/long register args
2627 int conversion_temp = stack_slots;
2628 stack_slots += 2;
2631 // Now space for the string(s) we must convert
2633 int string_locs = stack_slots;
2634 stack_slots += total_strings *
2635 (max_dtrace_string_size / VMRegImpl::stack_slot_size);
2637 // Ok The space we have allocated will look like:
2638 //
2639 //
2640 // FP-> | |
2641 // |---------------------|
2642 // | string[n] |
2643 // |---------------------| <- string_locs[n]
2644 // | string[n-1] |
2645 // |---------------------| <- string_locs[n-1]
2646 // | ... |
2647 // | ... |
2648 // |---------------------| <- string_locs[1]
2649 // | string[0] |
2650 // |---------------------| <- string_locs[0]
2651 // | temp |
2652 // |---------------------| <- conversion_temp
2653 // | outbound memory |
2654 // | based arguments |
2655 // | |
2656 // |---------------------|
2657 // | |
2658 // SP-> | out_preserved_slots |
2659 //
2660 //
2662 // Now compute actual number of stack words we need rounding to make
2663 // stack properly aligned.
2664 stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word);
2666 int stack_size = stack_slots * VMRegImpl::stack_slot_size;
2668 intptr_t start = (intptr_t)__ pc();
2670 // First thing make an ic check to see if we should even be here
2672 {
2673 Label L;
2674 const Register temp_reg = G3_scratch;
2675 Address ic_miss(temp_reg, SharedRuntime::get_ic_miss_stub());
2676 __ verify_oop(O0);
2677 __ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), temp_reg);
2678 __ cmp(temp_reg, G5_inline_cache_reg);
2679 __ brx(Assembler::equal, true, Assembler::pt, L);
2680 __ delayed()->nop();
2682 __ jump_to(ic_miss, 0);
2683 __ delayed()->nop();
2684 __ align(CodeEntryAlignment);
2685 __ bind(L);
2686 }
2688 int vep_offset = ((intptr_t)__ pc()) - start;
2691 // The instruction at the verified entry point must be 5 bytes or longer
2692 // because it can be patched on the fly by make_non_entrant. The stack bang
2693 // instruction fits that requirement.
2695 // Generate stack overflow check before creating frame
2696 __ generate_stack_overflow_check(stack_size);
2698 assert(((intptr_t)__ pc() - start - vep_offset) >= 5,
2699 "valid size for make_non_entrant");
2701 // Generate a new frame for the wrapper.
2702 __ save(SP, -stack_size, SP);
2704 // Frame is now completed as far a size and linkage.
2706 int frame_complete = ((intptr_t)__ pc()) - start;
2708 #ifdef ASSERT
2709 bool reg_destroyed[RegisterImpl::number_of_registers];
2710 bool freg_destroyed[FloatRegisterImpl::number_of_registers];
2711 for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
2712 reg_destroyed[r] = false;
2713 }
2714 for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
2715 freg_destroyed[f] = false;
2716 }
2718 #endif /* ASSERT */
2720 VMRegPair zero;
2721 const Register g0 = G0; // without this we get a compiler warning (why??)
2722 zero.set2(g0->as_VMReg());
2724 int c_arg, j_arg;
2726 Register conversion_off = noreg;
2728 for (j_arg = first_arg_to_pass, c_arg = 0 ;
2729 j_arg < total_args_passed ; j_arg++, c_arg++ ) {
2731 VMRegPair src = in_regs[j_arg];
2732 VMRegPair dst = out_regs[c_arg];
2734 #ifdef ASSERT
2735 if (src.first()->is_Register()) {
2736 assert(!reg_destroyed[src.first()->as_Register()->encoding()], "ack!");
2737 } else if (src.first()->is_FloatRegister()) {
2738 assert(!freg_destroyed[src.first()->as_FloatRegister()->encoding(
2739 FloatRegisterImpl::S)], "ack!");
2740 }
2741 if (dst.first()->is_Register()) {
2742 reg_destroyed[dst.first()->as_Register()->encoding()] = true;
2743 } else if (dst.first()->is_FloatRegister()) {
2744 freg_destroyed[dst.first()->as_FloatRegister()->encoding(
2745 FloatRegisterImpl::S)] = true;
2746 }
2747 #endif /* ASSERT */
2749 switch (in_sig_bt[j_arg]) {
2750 case T_ARRAY:
2751 case T_OBJECT:
2752 {
2753 if (out_sig_bt[c_arg] == T_BYTE || out_sig_bt[c_arg] == T_SHORT ||
2754 out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) {
2755 // need to unbox a one-slot value
2756 Register in_reg = L0;
2757 Register tmp = L2;
2758 if ( src.first()->is_reg() ) {
2759 in_reg = src.first()->as_Register();
2760 } else {
2761 assert(Assembler::is_simm13(reg2offset(src.first()) + STACK_BIAS),
2762 "must be");
2763 __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, in_reg);
2764 }
2765 // If the final destination is an acceptable register
2766 if ( dst.first()->is_reg() ) {
2767 if ( dst.is_single_phys_reg() || out_sig_bt[c_arg] != T_LONG ) {
2768 tmp = dst.first()->as_Register();
2769 }
2770 }
2772 Label skipUnbox;
2773 if ( wordSize == 4 && out_sig_bt[c_arg] == T_LONG ) {
2774 __ mov(G0, tmp->successor());
2775 }
2776 __ br_null(in_reg, true, Assembler::pn, skipUnbox);
2777 __ delayed()->mov(G0, tmp);
2779 BasicType bt = out_sig_bt[c_arg];
2780 int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt);
2781 switch (bt) {
2782 case T_BYTE:
2783 __ ldub(in_reg, box_offset, tmp); break;
2784 case T_SHORT:
2785 __ lduh(in_reg, box_offset, tmp); break;
2786 case T_INT:
2787 __ ld(in_reg, box_offset, tmp); break;
2788 case T_LONG:
2789 __ ld_long(in_reg, box_offset, tmp); break;
2790 default: ShouldNotReachHere();
2791 }
2793 __ bind(skipUnbox);
2794 // If tmp wasn't final destination copy to final destination
2795 if (tmp == L2) {
2796 VMRegPair tmp_as_VM = reg64_to_VMRegPair(L2);
2797 if (out_sig_bt[c_arg] == T_LONG) {
2798 long_move(masm, tmp_as_VM, dst);
2799 } else {
2800 move32_64(masm, tmp_as_VM, out_regs[c_arg]);
2801 }
2802 }
2803 if (out_sig_bt[c_arg] == T_LONG) {
2804 assert(out_sig_bt[c_arg+1] == T_VOID, "must be");
2805 ++c_arg; // move over the T_VOID to keep the loop indices in sync
2806 }
2807 } else if (out_sig_bt[c_arg] == T_ADDRESS) {
2808 Register s =
2809 src.first()->is_reg() ? src.first()->as_Register() : L2;
2810 Register d =
2811 dst.first()->is_reg() ? dst.first()->as_Register() : L2;
2813 // We store the oop now so that the conversion pass can reach
2814 // while in the inner frame. This will be the only store if
2815 // the oop is NULL.
2816 if (s != L2) {
2817 // src is register
2818 if (d != L2) {
2819 // dst is register
2820 __ mov(s, d);
2821 } else {
2822 assert(Assembler::is_simm13(reg2offset(dst.first()) +
2823 STACK_BIAS), "must be");
2824 __ st_ptr(s, SP, reg2offset(dst.first()) + STACK_BIAS);
2825 }
2826 } else {
2827 // src not a register
2828 assert(Assembler::is_simm13(reg2offset(src.first()) +
2829 STACK_BIAS), "must be");
2830 __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, d);
2831 if (d == L2) {
2832 assert(Assembler::is_simm13(reg2offset(dst.first()) +
2833 STACK_BIAS), "must be");
2834 __ st_ptr(d, SP, reg2offset(dst.first()) + STACK_BIAS);
2835 }
2836 }
2837 } else if (out_sig_bt[c_arg] != T_VOID) {
2838 // Convert the arg to NULL
2839 if (dst.first()->is_reg()) {
2840 __ mov(G0, dst.first()->as_Register());
2841 } else {
2842 assert(Assembler::is_simm13(reg2offset(dst.first()) +
2843 STACK_BIAS), "must be");
2844 __ st_ptr(G0, SP, reg2offset(dst.first()) + STACK_BIAS);
2845 }
2846 }
2847 }
2848 break;
2849 case T_VOID:
2850 break;
2852 case T_FLOAT:
2853 if (src.first()->is_stack()) {
2854 // Stack to stack/reg is simple
2855 move32_64(masm, src, dst);
2856 } else {
2857 if (dst.first()->is_reg()) {
2858 // freg -> reg
2859 int off =
2860 STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
2861 Register d = dst.first()->as_Register();
2862 if (Assembler::is_simm13(off)) {
2863 __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
2864 SP, off);
2865 __ ld(SP, off, d);
2866 } else {
2867 if (conversion_off == noreg) {
2868 __ set(off, L6);
2869 conversion_off = L6;
2870 }
2871 __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
2872 SP, conversion_off);
2873 __ ld(SP, conversion_off , d);
2874 }
2875 } else {
2876 // freg -> mem
2877 int off = STACK_BIAS + reg2offset(dst.first());
2878 if (Assembler::is_simm13(off)) {
2879 __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
2880 SP, off);
2881 } else {
2882 if (conversion_off == noreg) {
2883 __ set(off, L6);
2884 conversion_off = L6;
2885 }
2886 __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
2887 SP, conversion_off);
2888 }
2889 }
2890 }
2891 break;
2893 case T_DOUBLE:
2894 assert( j_arg + 1 < total_args_passed &&
2895 in_sig_bt[j_arg + 1] == T_VOID &&
2896 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
2897 if (src.first()->is_stack()) {
2898 // Stack to stack/reg is simple
2899 long_move(masm, src, dst);
2900 } else {
2901 Register d = dst.first()->is_reg() ? dst.first()->as_Register() : L2;
2903 // Destination could be an odd reg on 32bit in which case
2904 // we can't load direct to the destination.
2906 if (!d->is_even() && wordSize == 4) {
2907 d = L2;
2908 }
2909 int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
2910 if (Assembler::is_simm13(off)) {
2911 __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(),
2912 SP, off);
2913 __ ld_long(SP, off, d);
2914 } else {
2915 if (conversion_off == noreg) {
2916 __ set(off, L6);
2917 conversion_off = L6;
2918 }
2919 __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(),
2920 SP, conversion_off);
2921 __ ld_long(SP, conversion_off, d);
2922 }
2923 if (d == L2) {
2924 long_move(masm, reg64_to_VMRegPair(L2), dst);
2925 }
2926 }
2927 break;
2929 case T_LONG :
2930 // 32bit can't do a split move of something like g1 -> O0, O1
2931 // so use a memory temp
2932 if (src.is_single_phys_reg() && wordSize == 4) {
2933 Register tmp = L2;
2934 if (dst.first()->is_reg() &&
2935 (wordSize == 8 || dst.first()->as_Register()->is_even())) {
2936 tmp = dst.first()->as_Register();
2937 }
2939 int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
2940 if (Assembler::is_simm13(off)) {
2941 __ stx(src.first()->as_Register(), SP, off);
2942 __ ld_long(SP, off, tmp);
2943 } else {
2944 if (conversion_off == noreg) {
2945 __ set(off, L6);
2946 conversion_off = L6;
2947 }
2948 __ stx(src.first()->as_Register(), SP, conversion_off);
2949 __ ld_long(SP, conversion_off, tmp);
2950 }
2952 if (tmp == L2) {
2953 long_move(masm, reg64_to_VMRegPair(L2), dst);
2954 }
2955 } else {
2956 long_move(masm, src, dst);
2957 }
2958 break;
2960 case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
2962 default:
2963 move32_64(masm, src, dst);
2964 }
2965 }
2968 // If we have any strings we must store any register based arg to the stack
2969 // This includes any still live xmm registers too.
2971 if (total_strings > 0 ) {
2973 // protect all the arg registers
2974 __ save_frame(0);
2975 __ mov(G2_thread, L7_thread_cache);
2976 const Register L2_string_off = L2;
2978 // Get first string offset
2979 __ set(string_locs * VMRegImpl::stack_slot_size, L2_string_off);
2981 for (c_arg = 0 ; c_arg < total_c_args ; c_arg++ ) {
2982 if (out_sig_bt[c_arg] == T_ADDRESS) {
2984 VMRegPair dst = out_regs[c_arg];
2985 const Register d = dst.first()->is_reg() ?
2986 dst.first()->as_Register()->after_save() : noreg;
2988 // It's a string the oop and it was already copied to the out arg
2989 // position
2990 if (d != noreg) {
2991 __ mov(d, O0);
2992 } else {
2993 assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS),
2994 "must be");
2995 __ ld_ptr(FP, reg2offset(dst.first()) + STACK_BIAS, O0);
2996 }
2997 Label skip;
2999 __ br_null(O0, false, Assembler::pn, skip);
3000 __ delayed()->add(FP, L2_string_off, O1);
3002 if (d != noreg) {
3003 __ mov(O1, d);
3004 } else {
3005 assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS),
3006 "must be");
3007 __ st_ptr(O1, FP, reg2offset(dst.first()) + STACK_BIAS);
3008 }
3010 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::get_utf),
3011 relocInfo::runtime_call_type);
3012 __ delayed()->add(L2_string_off, max_dtrace_string_size, L2_string_off);
3014 __ bind(skip);
3016 }
3018 }
3019 __ mov(L7_thread_cache, G2_thread);
3020 __ restore();
3022 }
3025 // Ok now we are done. Need to place the nop that dtrace wants in order to
3026 // patch in the trap
3028 int patch_offset = ((intptr_t)__ pc()) - start;
3030 __ nop();
3033 // Return
3035 __ ret();
3036 __ delayed()->restore();
3038 __ flush();
3040 nmethod *nm = nmethod::new_dtrace_nmethod(
3041 method, masm->code(), vep_offset, patch_offset, frame_complete,
3042 stack_slots / VMRegImpl::slots_per_word);
3043 return nm;
3045 }
3047 #endif // HAVE_DTRACE_H
3049 // this function returns the adjust size (in number of words) to a c2i adapter
3050 // activation for use during deoptimization
3051 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
3052 return (callee_locals - callee_parameters) * Interpreter::stackElementWords;
3053 }
3055 // "Top of Stack" slots that may be unused by the calling convention but must
3056 // otherwise be preserved.
3057 // On Intel these are not necessary and the value can be zero.
3058 // On Sparc this describes the words reserved for storing a register window
3059 // when an interrupt occurs.
3060 uint SharedRuntime::out_preserve_stack_slots() {
3061 //return frame::register_save_words * VMRegImpl::slots_per_word;
3062 return 0;
3063 }
3065 //------------------------------generate_deopt_blob----------------------------
3066 // Ought to generate an ideal graph & compile, but here's some SPARC ASM
3067 // instead.
3068 void SharedRuntime::generate_deopt_blob() {
3069 // allocate space for the code
3070 ResourceMark rm;
3071 // setup code generation tools
3072 //CodeBuffer buffer ("deopt_blob", 4000, 2048);
3073 CodeBuffer buffer ("deopt_blob", 8000, 2048);//aoqi FIXME for debug
3074 MacroAssembler* masm = new MacroAssembler( & buffer);
3075 int frame_size_in_words;
3076 OopMap* map = NULL;
3077 // Account for the extra args we place on the stack
3078 // by the time we call fetch_unroll_info
3079 const int additional_words = 2; // deopt kind, thread
3081 OopMapSet *oop_maps = new OopMapSet();
3083 address start = __ pc();
3084 Label cont;
3085 // we use S3 for DeOpt reason register
3086 Register reason = S3;
3087 // use S6 for thread register
3088 Register thread = TREG;
3089 // use S7 for fetch_unroll_info returned UnrollBlock
3090 Register unroll = S7;
3091 // Prolog for non exception case!
3092 // Correct the return address we were given.
3093 //FIXME, return address is on the tos or Ra?
3094 __ addi(RA, RA, - (NativeCall::return_address_offset_long));
3095 // Save everything in sight.
3096 map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
3097 // Normal deoptimization
3098 __ move(reason, Deoptimization::Unpack_deopt);
3099 __ b(cont);
3100 __ delayed()->nop();
3102 int reexecute_offset = __ pc() - start;
3104 // Reexecute case
3105 // return address is the pc describes what bci to do re-execute at
3107 // No need to update map as each call to save_live_registers will produce identical oopmap
3108 (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
3109 __ move(reason, Deoptimization::Unpack_reexecute);
3110 __ b(cont);
3111 __ delayed()->nop();
3113 int exception_offset = __ pc() - start;
3114 // Prolog for exception case
3116 // all registers are dead at this entry point, except for eax and
3117 // edx which contain the exception oop and exception pc
3118 // respectively. Set them in TLS and fall thru to the
3119 // unpack_with_exception_in_tls entry point.
3121 __ get_thread(thread);
3122 __ st_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
3123 __ st_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset()));
3124 int exception_in_tls_offset = __ pc() - start;
3125 // new implementation because exception oop is now passed in JavaThread
3127 // Prolog for exception case
3128 // All registers must be preserved because they might be used by LinearScan
3129 // Exceptiop oop and throwing PC are passed in JavaThread
3130 // tos: stack at point of call to method that threw the exception (i.e. only
3131 // args are on the stack, no return address)
3133 // Return address will be patched later with the throwing pc. The correct value is not
3134 // available now because loading it from memory would destroy registers.
3135 // Save everything in sight.
3136 // No need to update map as each call to save_live_registers will produce identical oopmap
3137 __ addi(RA, RA, - (NativeCall::return_address_offset_long));
3138 (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
3140 // Now it is safe to overwrite any register
3141 // store the correct deoptimization type
3142 __ move(reason, Deoptimization::Unpack_exception);
3143 // load throwing pc from JavaThread and patch it as the return address
3144 // of the current frame. Then clear the field in JavaThread
3145 __ get_thread(thread);
3146 __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
3147 __ st_ptr(V1, SP, RegisterSaver::raOffset() * wordSize); //save ra
3148 __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset()));
3151 #ifdef ASSERT
3152 // verify that there is really an exception oop in JavaThread
3153 __ ld_ptr(AT, thread, in_bytes(JavaThread::exception_oop_offset()));
3154 __ verify_oop(AT);
3155 // verify that there is no pending exception
3156 Label no_pending_exception;
3157 __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
3158 __ beq(AT, R0, no_pending_exception);
3159 __ delayed()->nop();
3160 __ stop("must not have pending exception here");
3161 __ bind(no_pending_exception);
3162 #endif
3163 __ bind(cont);
3164 // Compiled code leaves the floating point stack dirty, empty it.
3165 __ empty_FPU_stack();
3168 // Call C code. Need thread and this frame, but NOT official VM entry
3169 // crud. We cannot block on this call, no GC can happen.
3170 #ifndef OPT_THREAD
3171 __ get_thread(thread);
3172 #endif
3174 __ move(A0, thread);
3175 __ addi(SP, SP, -additional_words * wordSize);
3177 __ set_last_Java_frame(NOREG, NOREG, NULL);
3179 // Call fetch_unroll_info(). Need thread and this frame, but NOT official VM entry - cannot block on
3180 // this call, no GC can happen. Call should capture return values.
3182 __ relocate(relocInfo::internal_pc_type);
3183 {
3184 intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 28;
3185 __ patchable_set48(AT, save_pc);
3186 }
3187 __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
3189 __ call((address)Deoptimization::fetch_unroll_info);
3190 //__ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type);
3191 __ delayed()->nop();
3192 oop_maps->add_gc_map(__ pc() - start, map);
3193 __ addiu(SP, SP, additional_words * wordSize);
3194 __ get_thread(thread);
3195 __ reset_last_Java_frame(false, true);
3197 // Load UnrollBlock into S7
3198 __ move(unroll, V0);
3201 // Move the unpack kind to a safe place in the UnrollBlock because
3202 // we are very short of registers
3204 Address unpack_kind(unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes());
3205 __ sw(reason, unpack_kind);
3206 // save the unpack_kind value
3207 // Retrieve the possible live values (return values)
3208 // All callee save registers representing jvm state
3209 // are now in the vframeArray.
3211 Label noException;
3212 __ move(AT, Deoptimization::Unpack_exception);
3213 __ bne(AT, reason, noException);// Was exception pending?
3214 __ delayed()->nop();
3215 __ ld_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset()));
3216 __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
3217 __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset()));
3218 __ st_ptr(R0, thread, in_bytes(JavaThread::exception_oop_offset()));
3220 __ verify_oop(V0);
3222 // Overwrite the result registers with the exception results.
3223 __ st_ptr(V0, SP, RegisterSaver::v0Offset()*wordSize);
3224 __ st_ptr(V1, SP, RegisterSaver::v1Offset()*wordSize);
3226 __ bind(noException);
3229 // Stack is back to only having register save data on the stack.
3230 // Now restore the result registers. Everything else is either dead or captured
3231 // in the vframeArray.
3233 RegisterSaver::restore_result_registers(masm);
3234 // All of the register save area has been popped of the stack. Only the
3235 // return address remains.
3236 // Pop all the frames we must move/replace.
3237 // Frame picture (youngest to oldest)
3238 // 1: self-frame (no frame link)
3239 // 2: deopting frame (no frame link)
3240 // 3: caller of deopting frame (could be compiled/interpreted).
3241 //
3242 // Note: by leaving the return address of self-frame on the stack
3243 // and using the size of frame 2 to adjust the stack
3244 // when we are done the return to frame 3 will still be on the stack.
3246 // register for the sender's sp
3247 Register sender_sp = Rsender;
3248 // register for frame pcs
3249 Register pcs = T0;
3250 // register for frame sizes
3251 Register sizes = T1;
3252 // register for frame count
3253 Register count = T3;
3255 // Pop deoptimized frame
3256 __ lw(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes());
3257 __ add(SP, SP, AT);
3258 // sp should be pointing at the return address to the caller (3)
3260 // Load array of frame pcs into pcs
3261 __ ld_ptr(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes());
3262 __ addi(SP, SP, wordSize); // trash the old pc
3263 // Load array of frame sizes into T6
3264 __ ld_ptr(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes());
3268 // Load count of frams into T3
3269 __ lw(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes());
3270 // Pick up the initial fp we should save
3271 __ ld(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes());
3272 // Now adjust the caller's stack to make up for the extra locals
3273 // but record the original sp so that we can save it in the skeletal interpreter
3274 // frame and the stack walking of interpreter_sender will get the unextended sp
3275 // value and not the "real" sp value.
3276 __ move(sender_sp, SP);
3277 __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes());
3278 __ sub(SP, SP, AT);
3280 // Push interpreter frames in a loop
3281 /*
3282 *
3283 Loop:
3284 0x000000555bd82d18: lw t2, 0x0(t1) ; lw sizes[i] <--- error lw->ld
3285 0x000000555bd82d1c: ld at, 0x0(t0) ; ld pcs[i]
3286 0x000000555bd82d20: daddi t2, t2, 0xfffffff0 ; t2 -= 16
3287 0x000000555bd82d24: daddi sp, sp, 0xfffffff0
3288 0x000000555bd82d28: sd fp, 0x0(sp) ; push fp
3289 0x000000555bd82d2c: sd at, 0x8(sp) ; push at
3290 0x000000555bd82d30: dadd fp, sp, zero ; fp <- sp
3291 0x000000555bd82d34: dsub sp, sp, t2 ; sp -= t2
3292 0x000000555bd82d38: sd zero, 0xfffffff0(fp) ; __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
3293 0x000000555bd82d3c: sd s4, 0xfffffff8(fp) ; __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);
3294 0x000000555bd82d40: dadd s4, sp, zero ; move(sender_sp, SP);
3295 0x000000555bd82d44: daddi t3, t3, 0xffffffff ; count --
3296 0x000000555bd82d48: daddi t1, t1, 0x4 ; sizes += 4
3297 0x000000555bd82d4c: bne t3, zero, 0x000000555bd82d18
3298 0x000000555bd82d50: daddi t0, t0, 0x4 ; <--- error t0 += 8
3299 */
3301 // pcs[0] = frame_pcs[0] = deopt_sender.raw_pc(); regex.split
3302 Label loop;
3303 __ bind(loop);
3304 __ ld(T2, sizes, 0); // Load frame size
3305 __ ld_ptr(AT, pcs, 0); // save return address
3306 __ addi(T2, T2, -2*wordSize); // we'll push pc and rbp, by hand
3307 __ push2(AT, FP);
3308 __ move(FP, SP);
3309 __ sub(SP, SP, T2); // Prolog!
3310 // This value is corrected by layout_activation_impl
3311 __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
3312 __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable
3313 __ move(sender_sp, SP); // pass to next frame
3314 __ addi(count, count, -1); // decrement counter
3315 __ addi(sizes, sizes, wordSize); // Bump array pointer (sizes)
3316 __ bne(count, R0, loop);
3317 __ delayed()->addi(pcs, pcs, wordSize); // Bump array pointer (pcs)
3318 __ ld(AT, pcs, 0); // frame_pcs[number_of_frames] = Interpreter::deopt_entry(vtos, 0);
3319 // Re-push self-frame
3320 __ push2(AT, FP);
3321 __ move(FP, SP);
3322 __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
3323 __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);
3324 __ addi(SP, SP, -(frame_size_in_words - 2 - additional_words) * wordSize);
3326 // Restore frame locals after moving the frame
3327 __ sd(V0, SP, RegisterSaver::v0Offset() * wordSize);
3328 __ sd(V1, SP, RegisterSaver::v1Offset() * wordSize);
3329 __ sdc1(F0, SP, RegisterSaver::fpResultOffset()* wordSize);// Pop float stack and store in local
3330 __ sdc1(F1, SP, (RegisterSaver::fpResultOffset() + 1) * wordSize);
3333 // Call unpack_frames(). Need thread and this frame, but NOT official VM entry - cannot block on
3334 // this call, no GC can happen.
3335 __ move(A1, reason); // exec_mode
3336 __ get_thread(thread);
3337 __ move(A0, thread); // thread
3338 __ addi(SP, SP, (-additional_words) *wordSize);
3340 // set last_Java_sp, last_Java_fp
3341 __ set_last_Java_frame(NOREG, FP, NULL);
3343 __ move(AT, -(StackAlignmentInBytes));
3344 __ andr(SP, SP, AT); // Fix stack alignment as required by ABI
3346 __ relocate(relocInfo::internal_pc_type);
3347 {
3348 intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 28;
3349 __ patchable_set48(AT, save_pc);
3350 }
3351 __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
3353 __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), relocInfo::runtime_call_type);
3354 __ delayed()->nop();
3355 // Revert SP alignment after call since we're going to do some SP relative addressing below
3356 __ ld(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
3357 // Set an oopmap for the call site
3358 oop_maps->add_gc_map(__ offset(), new OopMap( frame_size_in_words , 0));
3360 __ push(V0);
3362 __ get_thread(thread);
3363 __ reset_last_Java_frame(true, true);
3365 // Collect return values
3366 __ ld(V0, SP, (RegisterSaver::v0Offset() + additional_words +1) * wordSize);
3367 __ ld(V1, SP, (RegisterSaver::v1Offset() + additional_words +1) * wordSize);
3368 __ ldc1(F0, SP, RegisterSaver::fpResultOffset()* wordSize);// Pop float stack and store in local
3369 __ ldc1(F1, SP, (RegisterSaver::fpResultOffset() + 1) * wordSize);
3370 //FIXME,
3371 // Clear floating point stack before returning to interpreter
3372 __ empty_FPU_stack();
3373 //FIXME, we should consider about float and double
3374 // Push a float or double return value if necessary.
3375 __ leave();
3377 // Jump to interpreter
3378 __ jr(RA);
3379 __ delayed()->nop();
3381 masm->flush();
3382 _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
3383 _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
3384 }
3386 #ifdef COMPILER2
3388 //------------------------------generate_uncommon_trap_blob--------------------
3389 // Ought to generate an ideal graph & compile, but here's some SPARC ASM
3390 // instead.
3391 void SharedRuntime::generate_uncommon_trap_blob() {
3392 // allocate space for the code
3393 ResourceMark rm;
3394 // setup code generation tools
3395 CodeBuffer buffer ("uncommon_trap_blob", 512*80 , 512*40 );
3396 MacroAssembler* masm = new MacroAssembler(&buffer);
3398 enum frame_layout {
3399 s0_off, s0_off2,
3400 s1_off, s1_off2,
3401 s2_off, s2_off2,
3402 s3_off, s3_off2,
3403 s4_off, s4_off2,
3404 s5_off, s5_off2,
3405 s6_off, s6_off2,
3406 s7_off, s7_off2,
3407 fp_off, fp_off2,
3408 return_off, return_off2, // slot for return address sp + 9
3409 framesize
3410 };
3411 assert(framesize % 4 == 0, "sp not 16-byte aligned");
3413 address start = __ pc();
3415 // Push self-frame.
3416 __ daddiu(SP, SP, -framesize * BytesPerInt);
3418 __ sd(RA, SP, return_off * BytesPerInt);
3419 __ sd(FP, SP, fp_off * BytesPerInt);
3421 // Save callee saved registers. None for UseSSE=0,
3422 // floats-only for UseSSE=1, and doubles for UseSSE=2.
3423 __ sd(S0, SP, s0_off * BytesPerInt);
3424 __ sd(S1, SP, s1_off * BytesPerInt);
3425 __ sd(S2, SP, s2_off * BytesPerInt);
3426 __ sd(S3, SP, s3_off * BytesPerInt);
3427 __ sd(S4, SP, s4_off * BytesPerInt);
3428 __ sd(S5, SP, s5_off * BytesPerInt);
3429 __ sd(S6, SP, s6_off * BytesPerInt);
3430 __ sd(S7, SP, s7_off * BytesPerInt);
3432 __ daddi(FP, SP, fp_off * BytesPerInt);
3434 // Clear the floating point exception stack
3435 __ empty_FPU_stack();
3437 Register thread = TREG;
3439 #ifndef OPT_THREAD
3440 __ get_thread(thread);
3441 #endif
3442 // set last_Java_sp
3443 __ set_last_Java_frame(NOREG, FP, NULL);
3444 __ relocate(relocInfo::internal_pc_type);
3445 {
3446 long save_pc = (long)__ pc() + 52;
3447 __ patchable_set48(AT, (long)save_pc);
3448 __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
3449 }
3450 // Call C code. Need thread but NOT official VM entry
3451 // crud. We cannot block on this call, no GC can happen. Call should
3452 // capture callee-saved registers as well as return values.
3453 __ move(A0, thread);
3454 // argument already in T0
3455 __ move(A1, T0);
3456 __ patchable_call((address)Deoptimization::uncommon_trap);
3458 // Set an oopmap for the call site
3459 OopMapSet *oop_maps = new OopMapSet();
3460 OopMap* map = new OopMap( framesize, 0 );
3462 map->set_callee_saved( VMRegImpl::stack2reg(s0_off ), S0->as_VMReg() );
3463 map->set_callee_saved( VMRegImpl::stack2reg(s1_off ), S1->as_VMReg() );
3464 map->set_callee_saved( VMRegImpl::stack2reg(s2_off ), S2->as_VMReg() );
3465 map->set_callee_saved( VMRegImpl::stack2reg(s3_off ), S3->as_VMReg() );
3466 map->set_callee_saved( VMRegImpl::stack2reg(s4_off ), S4->as_VMReg() );
3467 map->set_callee_saved( VMRegImpl::stack2reg(s5_off ), S5->as_VMReg() );
3468 map->set_callee_saved( VMRegImpl::stack2reg(s6_off ), S6->as_VMReg() );
3469 map->set_callee_saved( VMRegImpl::stack2reg(s7_off ), S7->as_VMReg() );
3471 //oop_maps->add_gc_map( __ offset(), true, map);
3472 oop_maps->add_gc_map( __ offset(), map);
3474 #ifndef OPT_THREAD
3475 __ get_thread(thread);
3476 #endif
3477 __ reset_last_Java_frame(false,false);
3479 // Load UnrollBlock into S7
3480 Register unroll = S7;
3481 __ move(unroll, V0);
3483 // Pop all the frames we must move/replace.
3484 //
3485 // Frame picture (youngest to oldest)
3486 // 1: self-frame (no frame link)
3487 // 2: deopting frame (no frame link)
3488 // 3: possible-i2c-adapter-frame
3489 // 4: caller of deopting frame (could be compiled/interpreted. If interpreted we will create an
3490 // and c2i here)
3492 // Pop self-frame. We have no frame, and must rely only on EAX and ESP.
3493 __ daddiu(SP, SP, framesize * BytesPerInt);
3495 // Pop deoptimized frame
3496 __ lw(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes());
3497 __ dadd(SP, SP, AT);
3499 // register for frame pcs
3500 Register pcs = T8;
3501 // register for frame sizes
3502 Register sizes = T9;
3503 // register for frame count
3504 Register count = T3;
3505 // register for the sender's sp
3506 Register sender_sp = T1;
3508 // sp should be pointing at the return address to the caller (4)
3509 // Load array of frame pcs into ECX
3510 __ ld(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes());
3512 // Load array of frame sizes into ESI
3513 __ ld(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes());
3514 __ lwu(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes());
3516 // Pick up the initial fp we should save
3517 __ ld(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes());
3518 // Now adjust the caller's stack to make up for the extra locals
3519 // but record the original sp so that we can save it in the skeletal interpreter
3520 // frame and the stack walking of interpreter_sender will get the unextended sp
3521 // value and not the "real" sp value.
3523 __ move(sender_sp, SP);
3524 __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes());
3525 __ dsub(SP, SP, AT);
3526 // Push interpreter frames in a loop
3527 Label loop;
3528 __ bind(loop);
3529 __ ld(T2, sizes, 0); // Load frame size
3530 __ ld(AT, pcs, 0); // save return address
3531 __ daddi(T2, T2, -2*wordSize); // we'll push pc and rbp, by hand
3532 __ push2(AT, FP);
3533 __ move(FP, SP);
3534 __ dsub(SP, SP, T2); // Prolog!
3535 // This value is corrected by layout_activation_impl
3536 __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
3537 __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable
3538 __ move(sender_sp, SP); // pass to next frame
3539 __ daddi(count, count, -1); // decrement counter
3540 __ daddi(sizes, sizes, wordSize); // Bump array pointer (sizes)
3541 __ addi(pcs, pcs, wordSize); // Bump array pointer (pcs)
3542 __ bne(count, R0, loop);
3543 __ delayed()->nop(); // Bump array pointer (pcs)
3545 __ ld(RA, pcs, 0);
3547 // Re-push self-frame
3548 __ daddi(SP, SP, - 2 * wordSize); // save old & set new FP
3549 __ sd(FP, SP, 0 * wordSize); // save final return address
3550 __ sd(RA, SP, 1 * wordSize);
3551 __ move(FP, SP);
3552 __ daddi(SP, SP, -(framesize / 2 - 2) * wordSize);
3554 // set last_Java_sp, last_Java_fp
3555 __ set_last_Java_frame(NOREG, FP, NULL);
3557 __ move(AT, -(StackAlignmentInBytes));
3558 __ andr(SP, SP, AT); // Fix stack alignment as required by ABI
3560 __ relocate(relocInfo::internal_pc_type);
3561 {
3562 long save_pc = (long)__ pc() + 52;
3563 __ patchable_set48(AT, (long)save_pc);
3564 }
3565 __ sd(AT, thread,in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
3567 // Call C code. Need thread but NOT official VM entry
3568 // crud. We cannot block on this call, no GC can happen. Call should
3569 // restore return values to their stack-slots with the new SP.
3570 __ move(A0, thread);
3571 __ move(A1, Deoptimization::Unpack_uncommon_trap);
3572 __ patchable_call((address)Deoptimization::unpack_frames);
3573 // Set an oopmap for the call site
3574 //oop_maps->add_gc_map( __ offset(), true, new OopMap( framesize, 0 ) );
3575 oop_maps->add_gc_map( __ offset(), new OopMap( framesize, 0 ) );//Fu
3577 __ reset_last_Java_frame(true,true);
3579 // Pop self-frame.
3580 __ leave(); // Epilog!
3582 // Jump to interpreter
3583 __ jr(RA);
3584 __ delayed()->nop();
3585 // -------------
3586 // make sure all code is generated
3587 masm->flush();
3589 _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, framesize / 2);
3590 }
3592 #endif // COMPILER2
3594 //------------------------------generate_handler_blob-------------------
3595 //
3596 // Generate a special Compile2Runtime blob that saves all registers, and sets
3597 // up an OopMap and calls safepoint code to stop the compiled code for
3598 // a safepoint.
3599 //
3600 // This blob is jumped to (via a breakpoint and the signal handler) from a
3601 // safepoint in compiled code.
3603 SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int pool_type) {
3605 // Account for thread arg in our frame
3606 const int additional_words = 0;
3607 int frame_size_in_words;
3609 assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
3611 ResourceMark rm;
3612 OopMapSet *oop_maps = new OopMapSet();
3613 OopMap* map;
3615 // allocate space for the code
3616 // setup code generation tools
3617 CodeBuffer buffer ("handler_blob", 2048, 512);
3618 MacroAssembler* masm = new MacroAssembler( &buffer);
3620 const Register thread = TREG;
3621 address start = __ pc();
3622 address call_pc = NULL;
3623 bool cause_return = (pool_type == POLL_AT_RETURN);
3624 bool save_vectors = (pool_type == POLL_AT_VECTOR_LOOP);
3626 // If cause_return is true we are at a poll_return and there is
3627 // the return address in RA to the caller on the nmethod
3628 // that is safepoint. We can leave this return in RA and
3629 // effectively complete the return and safepoint in the caller.
3630 // Otherwise we load exception pc to RA.
3631 __ push(thread);
3632 #ifndef OPT_THREAD
3633 __ get_thread(thread);
3634 #endif
3636 if(!cause_return) {
3637 __ ld_ptr(RA, Address(thread, JavaThread::saved_exception_pc_offset()));
3638 }
3640 __ pop(thread);
3641 map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, save_vectors);
3643 #ifndef OPT_THREAD
3644 __ get_thread(thread);
3645 #endif
3646 // The following is basically a call_VM. However, we need the precise
3647 // address of the call in order to generate an oopmap. Hence, we do all the
3648 // work outselvs.
3650 __ move(A0, thread);
3651 __ set_last_Java_frame(NOREG, NOREG, NULL);
3654 // do the call
3655 //__ lui(T9, Assembler::split_high((int)call_ptr));
3656 //__ addiu(T9, T9, Assembler::split_low((int)call_ptr));
3657 __ call(call_ptr);
3658 __ delayed()->nop();
3660 // Set an oopmap for the call site. This oopmap will map all
3661 // oop-registers and debug-info registers as callee-saved. This
3662 // will allow deoptimization at this safepoint to find all possible
3663 // debug-info recordings, as well as let GC find all oops.
3664 oop_maps->add_gc_map(__ offset(), map);
3666 Label noException;
3668 // Clear last_Java_sp again
3669 __ reset_last_Java_frame(false, false);
3671 __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
3672 __ beq(AT, R0, noException);
3673 __ delayed()->nop();
3675 // Exception pending
3677 RegisterSaver::restore_live_registers(masm, save_vectors);
3678 //forward_exception_entry need return address on the stack
3679 __ push(RA);
3680 __ patchable_jump((address)StubRoutines::forward_exception_entry());
3682 // No exception case
3683 __ bind(noException);
3684 // Normal exit, register restoring and exit
3685 RegisterSaver::restore_live_registers(masm, save_vectors);
3686 __ jr(RA);
3687 __ delayed()->nop();
3689 masm->flush();
3691 // Fill-out other meta info
3692 return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words);
3693 }
3695 //
3696 // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
3697 //
3698 // Generate a stub that calls into vm to find out the proper destination
3699 // of a java call. All the argument registers are live at this point
3700 // but since this is generic code we don't know what they are and the caller
3701 // must do any gc of the args.
3702 //
3703 RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
3704 assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
3706 // allocate space for the code
3707 ResourceMark rm;
3709 //CodeBuffer buffer(name, 1000, 512);
3710 //FIXME. aoqi. code_size
3711 CodeBuffer buffer(name, 20000, 2048);
3712 MacroAssembler* masm = new MacroAssembler(&buffer);
3714 int frame_size_words;
3715 //we put the thread in A0
3717 OopMapSet *oop_maps = new OopMapSet();
3718 OopMap* map = NULL;
3720 int start = __ offset();
3721 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words);
3724 int frame_complete = __ offset();
3726 const Register thread = T8;
3727 __ get_thread(thread);
3729 __ move(A0, thread);
3730 __ set_last_Java_frame(noreg, FP, NULL);
3731 //align the stack before invoke native
3732 __ move(AT, -(StackAlignmentInBytes));
3733 __ andr(SP, SP, AT);
3734 __ relocate(relocInfo::internal_pc_type);
3735 {
3736 intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 24 + 1 * BytesPerInstWord;
3737 __ patchable_set48(AT, save_pc);
3738 }
3739 __ sd(AT, thread, in_bytes(JavaThread::last_Java_pc_offset()));
3741 __ call(destination);
3742 __ delayed()->nop();
3744 // Set an oopmap for the call site.
3745 // We need this not only for callee-saved registers, but also for volatile
3746 // registers that the compiler might be keeping live across a safepoint.
3747 oop_maps->add_gc_map( __ offset() - start, map);
3748 // V0 contains the address we are going to jump to assuming no exception got installed
3749 __ get_thread(thread);
3750 __ ld_ptr(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
3751 // clear last_Java_sp
3752 __ reset_last_Java_frame(true, true);
3753 // check for pending exceptions
3754 Label pending;
3755 __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
3756 __ bne(AT, R0, pending);
3757 __ delayed()->nop();
3758 // get the returned Method*
3759 //FIXME, do mips need this ?
3760 __ get_vm_result_2(Rmethod, thread); // Refer to OpenJDK8
3761 __ st_ptr(Rmethod, SP, RegisterSaver::methodOffset() * wordSize);
3762 __ st_ptr(V0, SP, RegisterSaver::v0Offset() * wordSize);
3763 RegisterSaver::restore_live_registers(masm);
3765 // We are back the the original state on entry and ready to go the callee method.
3766 __ jr(V0);
3767 __ delayed()->nop();
3768 // Pending exception after the safepoint
3770 __ bind(pending);
3772 RegisterSaver::restore_live_registers(masm);
3774 // exception pending => remove activation and forward to exception handler
3775 //forward_exception_entry need return address on the stack
3776 __ push(RA);
3777 __ get_thread(thread);
3778 __ st_ptr(R0, thread, in_bytes(JavaThread::vm_result_offset()));
3779 __ ld_ptr(V0, thread, in_bytes(Thread::pending_exception_offset()));
3780 __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
3781 __ delayed() -> nop();
3782 // -------------
3783 // make sure all code is generated
3784 masm->flush();
3786 RuntimeStub* tmp= RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);
3787 return tmp;
3788 }
3790 extern "C" int SpinPause() {return 0;}
3791 // extern "C" int SafeFetch32 (int * adr, int errValue) {return 0;} ;
3792 // extern "C" intptr_t SafeFetchN (intptr_t * adr, intptr_t errValue) {return *adr; } ;