src/cpu/mips/vm/sharedRuntime_mips_64.cpp

changeset 1
2d8a650513c2
child 5
7e3d3484b275
equal deleted inserted replaced
0:f90c822e73f8 1:2d8a650513c2
1 /*
2 * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
25
26 #include "precompiled.hpp"
27 #include "asm/macroAssembler.hpp"
28 #include "asm/macroAssembler.inline.hpp"
29 #include "code/debugInfoRec.hpp"
30 #include "code/icBuffer.hpp"
31 #include "code/vtableStubs.hpp"
32 #include "interpreter/interpreter.hpp"
33 #include "oops/compiledICHolder.hpp"
34 #include "prims/jvmtiRedefineClassesTrace.hpp"
35 #include "runtime/sharedRuntime.hpp"
36 #include "runtime/vframeArray.hpp"
37 #include "vmreg_mips.inline.hpp"
38 #ifdef COMPILER1
39 #include "c1/c1_Runtime1.hpp"
40 #endif
41 #ifdef COMPILER2
42 #include "opto/runtime.hpp"
43 #endif
44
45 #define __ masm->
46 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
47
48 class RegisterSaver {
49 enum { FPU_regs_live = 32 };
50 // Capture info about frame layout
51 enum layout {
52 #define DEF_LAYOUT_OFFS(regname) regname ## _off, regname ## H_off,
53 DEF_LAYOUT_OFFS(for_16_bytes_aligned)
54 DEF_LAYOUT_OFFS(fpr0)
55 DEF_LAYOUT_OFFS(fpr1)
56 DEF_LAYOUT_OFFS(fpr2)
57 DEF_LAYOUT_OFFS(fpr3)
58 DEF_LAYOUT_OFFS(fpr4)
59 DEF_LAYOUT_OFFS(fpr5)
60 DEF_LAYOUT_OFFS(fpr6)
61 DEF_LAYOUT_OFFS(fpr7)
62 DEF_LAYOUT_OFFS(fpr8)
63 DEF_LAYOUT_OFFS(fpr9)
64 DEF_LAYOUT_OFFS(fpr10)
65 DEF_LAYOUT_OFFS(fpr11)
66 DEF_LAYOUT_OFFS(fpr12)
67 DEF_LAYOUT_OFFS(fpr13)
68 DEF_LAYOUT_OFFS(fpr14)
69 DEF_LAYOUT_OFFS(fpr15)
70 DEF_LAYOUT_OFFS(fpr16)
71 DEF_LAYOUT_OFFS(fpr17)
72 DEF_LAYOUT_OFFS(fpr18)
73 DEF_LAYOUT_OFFS(fpr19)
74 DEF_LAYOUT_OFFS(fpr20)
75 DEF_LAYOUT_OFFS(fpr21)
76 DEF_LAYOUT_OFFS(fpr22)
77 DEF_LAYOUT_OFFS(fpr23)
78 DEF_LAYOUT_OFFS(fpr24)
79 DEF_LAYOUT_OFFS(fpr25)
80 DEF_LAYOUT_OFFS(fpr26)
81 DEF_LAYOUT_OFFS(fpr27)
82 DEF_LAYOUT_OFFS(fpr28)
83 DEF_LAYOUT_OFFS(fpr29)
84 DEF_LAYOUT_OFFS(fpr30)
85 DEF_LAYOUT_OFFS(fpr31)
86
87 DEF_LAYOUT_OFFS(v0)
88 DEF_LAYOUT_OFFS(v1)
89 DEF_LAYOUT_OFFS(a0)
90 DEF_LAYOUT_OFFS(a1)
91 DEF_LAYOUT_OFFS(a2)
92 DEF_LAYOUT_OFFS(a3)
93 DEF_LAYOUT_OFFS(a4)
94 DEF_LAYOUT_OFFS(a5)
95 DEF_LAYOUT_OFFS(a6)
96 DEF_LAYOUT_OFFS(a7)
97 DEF_LAYOUT_OFFS(t0)
98 DEF_LAYOUT_OFFS(t1)
99 DEF_LAYOUT_OFFS(t2)
100 DEF_LAYOUT_OFFS(t3)
101 DEF_LAYOUT_OFFS(s0)
102 DEF_LAYOUT_OFFS(s1)
103 DEF_LAYOUT_OFFS(s2)
104 DEF_LAYOUT_OFFS(s3)
105 DEF_LAYOUT_OFFS(s4)
106 DEF_LAYOUT_OFFS(s5)
107 DEF_LAYOUT_OFFS(s6)
108 DEF_LAYOUT_OFFS(s7)
109 DEF_LAYOUT_OFFS(t8)
110 DEF_LAYOUT_OFFS(t9)
111
112 DEF_LAYOUT_OFFS(gp)
113 DEF_LAYOUT_OFFS(fp)
114 DEF_LAYOUT_OFFS(return)
115 /*
116 fpr0_off, fpr1_off,
117 fpr2_off, fpr3_off,
118 fpr4_off, fpr5_off,
119 fpr6_off, fpr7_off,
120 fpr8_off, fpr9_off,
121 fpr10_off, fpr11_off,
122 fpr12_off, fpr13_off,
123 fpr14_off, fpr15_off,
124 fpr16_off, fpr17_off,
125 fpr18_off, fpr19_off,
126 fpr20_off, fpr21_off,
127 fpr22_off, fpr23_off,
128 fpr24_off, fpr25_off,
129 fpr26_off, fpr27_off,
130 fpr28_off, fpr29_off,
131 fpr30_off, fpr31_off,
132
133 v0_off, v1_off,
134 a0_off, a1_off,
135 a2_off, a3_off,
136 a4_off, a5_off,
137 a6_off, a7_off,
138 t0_off, t1_off, t2_off, t3_off,
139 s0_off, s1_off, s2_off, s3_off, s4_off, s5_off, s6_off, s7_off,
140 t8_off, t9_off,
141
142 gp_off, fp_off,
143 return_off,
144 */
145 reg_save_size
146 };
147
148 public:
149
150 static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors =false );
151 static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false);
152 //FIXME, I have no idea which register to use
153 static int raOffset(void) { return return_off / 2; }
154 //Rmethod
155 static int methodOffset(void) { return s3_off / 2; }
156
157 static int v0Offset(void) { return v0_off / 2; }
158 static int v1Offset(void) { return v1_off / 2; }
159
160 static int fpResultOffset(void) { return fpr0_off / 2; }
161
162 // During deoptimization only the result register need to be restored
163 // all the other values have already been extracted.
164
165 static void restore_result_registers(MacroAssembler* masm);
166 };
167
168 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors ) {
169
170 /*
171 int frame_words = reg_save_size + additional_frame_words;
172 int frame_size_in_bytes = frame_words * wordSize;
173 *total_frame_words = frame_words;
174 */
175 // Always make the frame size 16-byte aligned
176 int frame_size_in_bytes = round_to(additional_frame_words*wordSize +
177 reg_save_size*BytesPerInt, 16);
178 // OopMap frame size is in compiler stack slots (jint's) not bytes or words
179 int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
180 // The caller will allocate additional_frame_words
181 int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt;
182 // CodeBlob frame size is in words.
183 int frame_size_in_words = frame_size_in_bytes / wordSize;
184 *total_frame_words = frame_size_in_words;
185
186 // save registers, fpu state, and flags
187 // We assume caller has already has return address slot on the stack
188 // We push epb twice in this sequence because we want the real ebp
189 // to be under the return like a normal enter and we want to use pushad
190 // We push by hand instead of pusing push
191
192 __ daddiu(SP, SP, - reg_save_size * jintSize);
193
194 __ sdc1(F0, SP, fpr0_off * jintSize); __ sdc1(F1, SP, fpr1_off * jintSize);
195 __ sdc1(F2, SP, fpr2_off * jintSize); __ sdc1(F3, SP, fpr3_off * jintSize);
196 __ sdc1(F4, SP, fpr4_off * jintSize); __ sdc1(F5, SP, fpr5_off * jintSize);
197 __ sdc1(F6, SP, fpr6_off * jintSize); __ sdc1(F7, SP, fpr7_off * jintSize);
198 __ sdc1(F8, SP, fpr8_off * jintSize); __ sdc1(F9, SP, fpr9_off * jintSize);
199 __ sdc1(F10, SP, fpr10_off * jintSize); __ sdc1(F11, SP, fpr11_off * jintSize);
200 __ sdc1(F12, SP, fpr12_off * jintSize); __ sdc1(F13, SP, fpr13_off * jintSize);
201 __ sdc1(F14, SP, fpr14_off * jintSize); __ sdc1(F15, SP, fpr15_off * jintSize);
202 __ sdc1(F16, SP, fpr16_off * jintSize); __ sdc1(F17, SP, fpr17_off * jintSize);
203 __ sdc1(F18, SP, fpr18_off * jintSize); __ sdc1(F19, SP, fpr19_off * jintSize);
204 __ sdc1(F20, SP, fpr20_off * jintSize); __ sdc1(F21, SP, fpr21_off * jintSize);
205 __ sdc1(F22, SP, fpr22_off * jintSize); __ sdc1(F23, SP, fpr23_off * jintSize);
206 __ sdc1(F24, SP, fpr24_off * jintSize); __ sdc1(F25, SP, fpr25_off * jintSize);
207 __ sdc1(F26, SP, fpr26_off * jintSize); __ sdc1(F27, SP, fpr27_off * jintSize);
208 __ sdc1(F28, SP, fpr28_off * jintSize); __ sdc1(F29, SP, fpr29_off * jintSize);
209 __ sdc1(F30, SP, fpr30_off * jintSize); __ sdc1(F31, SP, fpr31_off * jintSize);
210 __ sd(V0, SP, v0_off * jintSize); __ sd(V1, SP, v1_off * jintSize);
211 __ sd(A0, SP, a0_off * jintSize); __ sd(A1, SP, a1_off * jintSize);
212 __ sd(A2, SP, a2_off * jintSize); __ sd(A3, SP, a3_off * jintSize);
213 __ sd(A4, SP, a4_off * jintSize); __ sd(A5, SP, a5_off * jintSize);
214 __ sd(A6, SP, a6_off * jintSize); __ sd(A7, SP, a7_off * jintSize);
215 __ sd(T0, SP, t0_off * jintSize);
216 __ sd(T1, SP, t1_off * jintSize);
217 __ sd(T2, SP, t2_off * jintSize);
218 __ sd(T3, SP, t3_off * jintSize);
219 __ sd(S0, SP, s0_off * jintSize);
220 __ sd(S1, SP, s1_off * jintSize);
221 __ sd(S2, SP, s2_off * jintSize);
222 __ sd(S3, SP, s3_off * jintSize);
223 __ sd(S4, SP, s4_off * jintSize);
224 __ sd(S5, SP, s5_off * jintSize);
225 __ sd(S6, SP, s6_off * jintSize);
226 __ sd(S7, SP, s7_off * jintSize);
227
228 __ sd(T8, SP, t8_off * jintSize);
229 __ sd(T9, SP, t9_off * jintSize);
230
231 __ sd(GP, SP, gp_off * jintSize);
232 __ sd(FP, SP, fp_off * jintSize);
233 __ sd(RA, SP, return_off * jintSize);
234 __ daddi(FP, SP, fp_off * jintSize);
235
236 OopMapSet *oop_maps = new OopMapSet();
237 //OopMap* map = new OopMap( frame_words, 0 );
238 OopMap* map = new OopMap( frame_size_in_slots, 0 );
239
240
241 //#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_words)
242 #define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots)
243 map->set_callee_saved(STACK_OFFSET( v0_off), V0->as_VMReg());
244 map->set_callee_saved(STACK_OFFSET( v1_off), V1->as_VMReg());
245 map->set_callee_saved(STACK_OFFSET( a0_off), A0->as_VMReg());
246 map->set_callee_saved(STACK_OFFSET( a1_off), A1->as_VMReg());
247 map->set_callee_saved(STACK_OFFSET( a2_off), A2->as_VMReg());
248 map->set_callee_saved(STACK_OFFSET( a3_off), A3->as_VMReg());
249 map->set_callee_saved(STACK_OFFSET( a4_off), A4->as_VMReg());
250 map->set_callee_saved(STACK_OFFSET( a5_off), A5->as_VMReg());
251 map->set_callee_saved(STACK_OFFSET( a6_off), A6->as_VMReg());
252 map->set_callee_saved(STACK_OFFSET( a7_off), A7->as_VMReg());
253 map->set_callee_saved(STACK_OFFSET( t0_off), T0->as_VMReg());
254 map->set_callee_saved(STACK_OFFSET( t1_off), T1->as_VMReg());
255 map->set_callee_saved(STACK_OFFSET( t2_off), T2->as_VMReg());
256 map->set_callee_saved(STACK_OFFSET( t3_off), T3->as_VMReg());
257 map->set_callee_saved(STACK_OFFSET( s0_off), S0->as_VMReg());
258 map->set_callee_saved(STACK_OFFSET( s1_off), S1->as_VMReg());
259 map->set_callee_saved(STACK_OFFSET( s2_off), S2->as_VMReg());
260 map->set_callee_saved(STACK_OFFSET( s3_off), S3->as_VMReg());
261 map->set_callee_saved(STACK_OFFSET( s4_off), S4->as_VMReg());
262 map->set_callee_saved(STACK_OFFSET( s5_off), S5->as_VMReg());
263 map->set_callee_saved(STACK_OFFSET( s6_off), S6->as_VMReg());
264 map->set_callee_saved(STACK_OFFSET( s7_off), S7->as_VMReg());
265 map->set_callee_saved(STACK_OFFSET( t8_off), T8->as_VMReg());
266 map->set_callee_saved(STACK_OFFSET( t9_off), T9->as_VMReg());
267 map->set_callee_saved(STACK_OFFSET( gp_off), GP->as_VMReg());
268 map->set_callee_saved(STACK_OFFSET( fp_off), FP->as_VMReg());
269 map->set_callee_saved(STACK_OFFSET( return_off), RA->as_VMReg());
270
271 map->set_callee_saved(STACK_OFFSET( fpr0_off), F0->as_VMReg());
272 map->set_callee_saved(STACK_OFFSET( fpr1_off), F1->as_VMReg());
273 map->set_callee_saved(STACK_OFFSET( fpr2_off), F2->as_VMReg());
274 map->set_callee_saved(STACK_OFFSET( fpr3_off), F3->as_VMReg());
275 map->set_callee_saved(STACK_OFFSET( fpr4_off), F4->as_VMReg());
276 map->set_callee_saved(STACK_OFFSET( fpr5_off), F5->as_VMReg());
277 map->set_callee_saved(STACK_OFFSET( fpr6_off), F6->as_VMReg());
278 map->set_callee_saved(STACK_OFFSET( fpr7_off), F7->as_VMReg());
279 map->set_callee_saved(STACK_OFFSET( fpr8_off), F8->as_VMReg());
280 map->set_callee_saved(STACK_OFFSET( fpr9_off), F9->as_VMReg());
281 map->set_callee_saved(STACK_OFFSET( fpr10_off), F10->as_VMReg());
282 map->set_callee_saved(STACK_OFFSET( fpr11_off), F11->as_VMReg());
283 map->set_callee_saved(STACK_OFFSET( fpr12_off), F12->as_VMReg());
284 map->set_callee_saved(STACK_OFFSET( fpr13_off), F13->as_VMReg());
285 map->set_callee_saved(STACK_OFFSET( fpr14_off), F14->as_VMReg());
286 map->set_callee_saved(STACK_OFFSET( fpr15_off), F15->as_VMReg());
287 map->set_callee_saved(STACK_OFFSET( fpr16_off), F16->as_VMReg());
288 map->set_callee_saved(STACK_OFFSET( fpr17_off), F17->as_VMReg());
289 map->set_callee_saved(STACK_OFFSET( fpr18_off), F18->as_VMReg());
290 map->set_callee_saved(STACK_OFFSET( fpr19_off), F19->as_VMReg());
291 map->set_callee_saved(STACK_OFFSET( fpr20_off), F20->as_VMReg());
292 map->set_callee_saved(STACK_OFFSET( fpr21_off), F21->as_VMReg());
293 map->set_callee_saved(STACK_OFFSET( fpr22_off), F22->as_VMReg());
294 map->set_callee_saved(STACK_OFFSET( fpr23_off), F23->as_VMReg());
295 map->set_callee_saved(STACK_OFFSET( fpr24_off), F24->as_VMReg());
296 map->set_callee_saved(STACK_OFFSET( fpr25_off), F25->as_VMReg());
297 map->set_callee_saved(STACK_OFFSET( fpr26_off), F26->as_VMReg());
298 map->set_callee_saved(STACK_OFFSET( fpr27_off), F27->as_VMReg());
299 map->set_callee_saved(STACK_OFFSET( fpr28_off), F28->as_VMReg());
300 map->set_callee_saved(STACK_OFFSET( fpr29_off), F29->as_VMReg());
301 map->set_callee_saved(STACK_OFFSET( fpr30_off), F30->as_VMReg());
302 map->set_callee_saved(STACK_OFFSET( fpr31_off), F31->as_VMReg());
303
304 /*
305 if (true) {
306 map->set_callee_saved(STACK_OFFSET( v0H_off), V0->as_VMReg()->next());
307 map->set_callee_saved(STACK_OFFSET( v1H_off), V1->as_VMReg()->next());
308 map->set_callee_saved(STACK_OFFSET( a0H_off), A0->as_VMReg()->next());
309 map->set_callee_saved(STACK_OFFSET( a1H_off), A1->as_VMReg()->next());
310 map->set_callee_saved(STACK_OFFSET( a2H_off), A2->as_VMReg()->next());
311 map->set_callee_saved(STACK_OFFSET( a3H_off), A3->as_VMReg()->next());
312 map->set_callee_saved(STACK_OFFSET( a4H_off), A4->as_VMReg()->next());
313 map->set_callee_saved(STACK_OFFSET( a5H_off), A5->as_VMReg()->next());
314 map->set_callee_saved(STACK_OFFSET( a6H_off), A6->as_VMReg()->next());
315 map->set_callee_saved(STACK_OFFSET( a7H_off), A7->as_VMReg()->next());
316 map->set_callee_saved(STACK_OFFSET( t0H_off), T0->as_VMReg()->next());
317 map->set_callee_saved(STACK_OFFSET( t1H_off), T1->as_VMReg()->next());
318 map->set_callee_saved(STACK_OFFSET( t2H_off), T2->as_VMReg()->next());
319 map->set_callee_saved(STACK_OFFSET( t3H_off), T3->as_VMReg()->next());
320 map->set_callee_saved(STACK_OFFSET( s0H_off), S0->as_VMReg()->next());
321 map->set_callee_saved(STACK_OFFSET( s1H_off), S1->as_VMReg()->next());
322 map->set_callee_saved(STACK_OFFSET( s2H_off), S2->as_VMReg()->next());
323 map->set_callee_saved(STACK_OFFSET( s3H_off), S3->as_VMReg()->next());
324 map->set_callee_saved(STACK_OFFSET( s4H_off), S4->as_VMReg()->next());
325 map->set_callee_saved(STACK_OFFSET( s5H_off), S5->as_VMReg()->next());
326 map->set_callee_saved(STACK_OFFSET( s6H_off), S6->as_VMReg()->next());
327 map->set_callee_saved(STACK_OFFSET( s7H_off), S7->as_VMReg()->next());
328 map->set_callee_saved(STACK_OFFSET( t8H_off), T8->as_VMReg()->next());
329 map->set_callee_saved(STACK_OFFSET( t9H_off), T9->as_VMReg()->next());
330 map->set_callee_saved(STACK_OFFSET( gpH_off), GP->as_VMReg()->next());
331 map->set_callee_saved(STACK_OFFSET( fpH_off), FP->as_VMReg()->next());
332 map->set_callee_saved(STACK_OFFSET( returnH_off), RA->as_VMReg()->next());
333
334 map->set_callee_saved(STACK_OFFSET( fpr0H_off), F0->as_VMReg()->next());
335 map->set_callee_saved(STACK_OFFSET( fpr2H_off), F2->as_VMReg()->next());
336 map->set_callee_saved(STACK_OFFSET( fpr4H_off), F4->as_VMReg()->next());
337 map->set_callee_saved(STACK_OFFSET( fpr6H_off), F6->as_VMReg()->next());
338 map->set_callee_saved(STACK_OFFSET( fpr8H_off), F8->as_VMReg()->next());
339 map->set_callee_saved(STACK_OFFSET( fpr10H_off), F10->as_VMReg()->next());
340 map->set_callee_saved(STACK_OFFSET( fpr12H_off), F12->as_VMReg()->next());
341 map->set_callee_saved(STACK_OFFSET( fpr14H_off), F14->as_VMReg()->next());
342 map->set_callee_saved(STACK_OFFSET( fpr16H_off), F16->as_VMReg()->next());
343 map->set_callee_saved(STACK_OFFSET( fpr18H_off), F18->as_VMReg()->next());
344 map->set_callee_saved(STACK_OFFSET( fpr20H_off), F20->as_VMReg()->next());
345 map->set_callee_saved(STACK_OFFSET( fpr22H_off), F22->as_VMReg()->next());
346 map->set_callee_saved(STACK_OFFSET( fpr24H_off), F24->as_VMReg()->next());
347 map->set_callee_saved(STACK_OFFSET( fpr26H_off), F26->as_VMReg()->next());
348 map->set_callee_saved(STACK_OFFSET( fpr28H_off), F28->as_VMReg()->next());
349 map->set_callee_saved(STACK_OFFSET( fpr30H_off), F30->as_VMReg()->next());
350 }
351 */
352 #undef STACK_OFFSET
353 return map;
354 }
355
356
357 // Pop the current frame and restore all the registers that we
358 // saved.
359 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
360 __ ldc1(F0, SP, fpr0_off * jintSize); __ ldc1(F1, SP, fpr1_off * jintSize);
361 __ ldc1(F2, SP, fpr2_off * jintSize); __ ldc1(F3, SP, fpr3_off * jintSize);
362 __ ldc1(F4, SP, fpr4_off * jintSize); __ ldc1(F5, SP, fpr5_off * jintSize);
363 __ ldc1(F6, SP, fpr6_off * jintSize); __ ldc1(F7, SP, fpr7_off * jintSize);
364 __ ldc1(F8, SP, fpr8_off * jintSize); __ ldc1(F9, SP, fpr9_off * jintSize);
365 __ ldc1(F10, SP, fpr10_off * jintSize); __ ldc1(F11, SP, fpr11_off * jintSize);
366 __ ldc1(F12, SP, fpr12_off * jintSize); __ ldc1(F13, SP, fpr13_off * jintSize);
367 __ ldc1(F14, SP, fpr14_off * jintSize); __ ldc1(F15, SP, fpr15_off * jintSize);
368 __ ldc1(F16, SP, fpr16_off * jintSize); __ ldc1(F17, SP, fpr17_off * jintSize);
369 __ ldc1(F18, SP, fpr18_off * jintSize); __ ldc1(F19, SP, fpr19_off * jintSize);
370 __ ldc1(F20, SP, fpr20_off * jintSize); __ ldc1(F21, SP, fpr21_off * jintSize);
371 __ ldc1(F22, SP, fpr22_off * jintSize); __ ldc1(F23, SP, fpr23_off * jintSize);
372 __ ldc1(F24, SP, fpr24_off * jintSize); __ ldc1(F25, SP, fpr25_off * jintSize);
373 __ ldc1(F26, SP, fpr26_off * jintSize); __ ldc1(F27, SP, fpr27_off * jintSize);
374 __ ldc1(F28, SP, fpr28_off * jintSize); __ ldc1(F29, SP, fpr29_off * jintSize);
375 __ ldc1(F30, SP, fpr30_off * jintSize); __ ldc1(F31, SP, fpr31_off * jintSize);
376
377 __ ld(V0, SP, v0_off * jintSize); __ ld(V1, SP, v1_off * jintSize);
378 __ ld(A0, SP, a0_off * jintSize); __ ld(A1, SP, a1_off * jintSize);
379 __ ld(A2, SP, a2_off * jintSize); __ ld(A3, SP, a3_off * jintSize);
380 __ ld(A4, SP, a4_off * jintSize); __ ld(A5, SP, a5_off * jintSize);
381 __ ld(A6, SP, a6_off * jintSize); __ ld(A7, SP, a7_off * jintSize);
382 __ ld(T0, SP, t0_off * jintSize);
383 __ ld(T1, SP, t1_off * jintSize);
384 __ ld(T2, SP, t2_off * jintSize);
385 __ ld(T3, SP, t3_off * jintSize);
386 __ ld(S0, SP, s0_off * jintSize);
387 __ ld(S1, SP, s1_off * jintSize);
388 __ ld(S2, SP, s2_off * jintSize);
389 __ ld(S3, SP, s3_off * jintSize);
390 __ ld(S4, SP, s4_off * jintSize);
391 __ ld(S5, SP, s5_off * jintSize);
392 __ ld(S6, SP, s6_off * jintSize);
393 __ ld(S7, SP, s7_off * jintSize);
394
395 __ ld(T8, SP, t8_off * jintSize);
396 __ ld(T9, SP, t9_off * jintSize);
397
398 __ ld(GP, SP, gp_off * jintSize);
399 __ ld(FP, SP, fp_off * jintSize);
400 __ ld(RA, SP, return_off * jintSize);
401
402 __ addiu(SP, SP, reg_save_size * jintSize);
403 }
404
405 // Pop the current frame and restore the registers that might be holding
406 // a result.
407 // FIXME, if the result is float?
408 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
409 // Just restore result register. Only used by deoptimization. By
410 // now any callee save register that needs to be restore to a c2
411 // caller of the deoptee has been extracted into the vframeArray
412 // and will be stuffed into the c2i adapter we create for later
413 // restoration so only result registers need to be restored here.
414 //
415 __ ld(V0, SP, v0_off * jintSize);
416 __ ld(V1, SP, v1_off * jintSize);
417 __ addiu(SP, SP, return_off * jintSize);
418 }
419
420 // Is vector's size (in bytes) bigger than a size saved by default?
421 // 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions.
422 bool SharedRuntime::is_wide_vector(int size) {
423 return size > 16;
424 }
425
426 // The java_calling_convention describes stack locations as ideal slots on
427 // a frame with no abi restrictions. Since we must observe abi restrictions
428 // (like the placement of the register window) the slots must be biased by
429 // the following value.
430
431 static int reg2offset_in(VMReg r) {
432 // Account for saved ebp and return address
433 // This should really be in_preserve_stack_slots
434 return (r->reg2stack() + 2 * VMRegImpl::slots_per_word) * VMRegImpl::stack_slot_size; // + 2 * VMRegImpl::stack_slot_size);
435 }
436
437 static int reg2offset_out(VMReg r) {
438 return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
439 }
440
441 // ---------------------------------------------------------------------------
442 // Read the array of BasicTypes from a signature, and compute where the
443 // arguments should go. Values in the VMRegPair regs array refer to 4-byte
444 // quantities. Values less than SharedInfo::stack0 are registers, those above
445 // refer to 4-byte stack slots. All stack slots are based off of the stack pointer
446 // as framesizes are fixed.
447 // VMRegImpl::stack0 refers to the first slot 0(sp).
448 // and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register
449 // up to RegisterImpl::number_of_registers) are the 32-bit
450 // integer registers.
451
452 // Pass first five oop/int args in registers T0, A0 - A3.
453 // Pass float/double/long args in stack.
454 // Doubles have precedence, so if you pass a mix of floats and doubles
455 // the doubles will grab the registers before the floats will.
456
457 // Note: the INPUTS in sig_bt are in units of Java argument words, which are
458 // either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit
459 // units regardless of build. Of course for i486 there is no 64 bit build
460
461
462 // ---------------------------------------------------------------------------
463 // The compiled Java calling convention.
464 // Pass first five oop/int args in registers T0, A0 - A3.
465 // Pass float/double/long args in stack.
466 // Doubles have precedence, so if you pass a mix of floats and doubles
467 // the doubles will grab the registers before the floats will.
468
469 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
470 VMRegPair *regs,
471 int total_args_passed,
472 int is_outgoing) {
473 //#define aoqi_test
474 #ifdef aoqi_test
475 tty->print_cr(" SharedRuntime::%s :%d, total_args_passed: %d", __func__, __LINE__, total_args_passed);
476 #endif
477
478 // Create the mapping between argument positions and
479 // registers.
480 //static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = {
481 static const Register INT_ArgReg[Argument::n_register_parameters + 1] = {
482 T0, A0, A1, A2, A3, A4, A5, A6, A7
483 };
484 //static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_j] = {
485 static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = {
486 F12, F13, F14, F15, F16, F17, F18, F19
487 };
488
489
490 uint args = 0;
491 uint stk_args = 0; // inc by 2 each time
492
493 for (int i = 0; i < total_args_passed; i++) {
494 switch (sig_bt[i]) {
495 case T_VOID:
496 // halves of T_LONG or T_DOUBLE
497 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
498 regs[i].set_bad();
499 break;
500 case T_BOOLEAN:
501 case T_CHAR:
502 case T_BYTE:
503 case T_SHORT:
504 case T_INT:
505 if (args < Argument::n_register_parameters) {
506 regs[i].set1(INT_ArgReg[args++]->as_VMReg());
507 } else {
508 regs[i].set1(VMRegImpl::stack2reg(stk_args));
509 stk_args += 2;
510 }
511 break;
512 case T_LONG:
513 assert(sig_bt[i + 1] == T_VOID, "expecting half");
514 // fall through
515 case T_OBJECT:
516 case T_ARRAY:
517 case T_ADDRESS:
518 if (args < Argument::n_register_parameters) {
519 regs[i].set2(INT_ArgReg[args++]->as_VMReg());
520 } else {
521 regs[i].set2(VMRegImpl::stack2reg(stk_args));
522 stk_args += 2;
523 }
524 break;
525 case T_FLOAT:
526 if (args < Argument::n_float_register_parameters) {
527 regs[i].set1(FP_ArgReg[args++]->as_VMReg());
528 } else {
529 regs[i].set1(VMRegImpl::stack2reg(stk_args));
530 stk_args += 2;
531 }
532 break;
533 case T_DOUBLE:
534 assert(sig_bt[i + 1] == T_VOID, "expecting half");
535 if (args < Argument::n_float_register_parameters) {
536 regs[i].set2(FP_ArgReg[args++]->as_VMReg());
537 } else {
538 regs[i].set2(VMRegImpl::stack2reg(stk_args));
539 stk_args += 2;
540 }
541 break;
542 default:
543 ShouldNotReachHere();
544 break;
545 }
546 #ifdef aoqi_test
547 tty->print_cr(" SharedRuntime::%s :%d, sig_bt[%d]: %d, reg[%d]:%d|%d, stk_args:%d", __func__, __LINE__, i, sig_bt[i], i, regs[i].first(), regs[i].second(), stk_args);
548 #endif
549 }
550
551 return round_to(stk_args, 2);
552 /*
553 // Starting stack position for args on stack
554 uint stack = 0;
555
556 // Pass first five oop/int args in registers T0, A0 - A3.
557 uint reg_arg0 = 9999;
558 uint reg_arg1 = 9999;
559 uint reg_arg2 = 9999;
560 uint reg_arg3 = 9999;
561 uint reg_arg4 = 9999;
562
563
564 // Pass doubles & longs &float ligned on the stack. First count stack slots for doubles
565 int i;
566 for( i = 0; i < total_args_passed; i++) {
567 if( sig_bt[i] == T_DOUBLE || sig_bt[i] == T_LONG ) {
568 stack += 2;
569 }
570 }
571 int dstack = 0; // Separate counter for placing doubles
572 for( i = 0; i < total_args_passed; i++) {
573 // From the type and the argument number (count) compute the location
574 switch( sig_bt[i] ) {
575 case T_SHORT:
576 case T_CHAR:
577 case T_BYTE:
578 case T_BOOLEAN:
579 case T_INT:
580 case T_ARRAY:
581 case T_OBJECT:
582 case T_ADDRESS:
583 if( reg_arg0 == 9999 ) {
584 reg_arg0 = i;
585 regs[i].set1(T0->as_VMReg());
586 } else if( reg_arg1 == 9999 ) {
587 reg_arg1 = i;
588 regs[i].set1(A0->as_VMReg());
589 } else if( reg_arg2 == 9999 ) {
590 reg_arg2 = i;
591 regs[i].set1(A1->as_VMReg());
592 }else if( reg_arg3 == 9999 ) {
593 reg_arg3 = i;
594 regs[i].set1(A2->as_VMReg());
595 }else if( reg_arg4 == 9999 ) {
596 reg_arg4 = i;
597 regs[i].set1(A3->as_VMReg());
598 } else {
599 regs[i].set1(VMRegImpl::stack2reg(stack++));
600 }
601 break;
602 case T_FLOAT:
603 regs[i].set1(VMRegImpl::stack2reg(stack++));
604 break;
605 case T_LONG:
606 assert(sig_bt[i+1] == T_VOID, "missing Half" );
607 regs[i].set2(VMRegImpl::stack2reg(dstack));
608 dstack += 2;
609 break;
610 case T_DOUBLE:
611 assert(sig_bt[i+1] == T_VOID, "missing Half" );
612 regs[i].set2(VMRegImpl::stack2reg(dstack));
613 dstack += 2;
614 break;
615 case T_VOID: regs[i].set_bad(); break;
616 break;
617 default:
618 ShouldNotReachHere();
619 break;
620 }
621 }
622 // return value can be odd number of VMRegImpl stack slots make multiple of 2
623 return round_to(stack, 2);
624 */
625 }
626
627 // Helper class mostly to avoid passing masm everywhere, and handle store
628 // displacement overflow logic for LP64
629 class AdapterGenerator {
630 MacroAssembler *masm;
631 #ifdef _LP64
632 Register Rdisp;
633 void set_Rdisp(Register r) { Rdisp = r; }
634 #endif // _LP64
635
636 void patch_callers_callsite();
637 // void tag_c2i_arg(frame::Tag t, Register base, int st_off, Register scratch);
638
639 // base+st_off points to top of argument
640 int arg_offset(const int st_off) { return st_off; }
641 int next_arg_offset(const int st_off) {
642 return st_off - Interpreter::stackElementSize;
643 }
644
645 #ifdef _LP64
646 // On _LP64 argument slot values are loaded first into a register
647 // because they might not fit into displacement.
648 Register arg_slot(const int st_off);
649 Register next_arg_slot(const int st_off);
650 #else
651 int arg_slot(const int st_off) { return arg_offset(st_off); }
652 int next_arg_slot(const int st_off) { return next_arg_offset(st_off); }
653 #endif // _LP64
654
655 // Stores long into offset pointed to by base
656 void store_c2i_long(Register r, Register base,
657 const int st_off, bool is_stack);
658 void store_c2i_object(Register r, Register base,
659 const int st_off);
660 void store_c2i_int(Register r, Register base,
661 const int st_off);
662 void store_c2i_double(VMReg r_2,
663 VMReg r_1, Register base, const int st_off);
664 void store_c2i_float(FloatRegister f, Register base,
665 const int st_off);
666
667 public:
668 //void tag_stack(const BasicType sig, int st_off);
669 void gen_c2i_adapter(int total_args_passed,
670 // VMReg max_arg,
671 int comp_args_on_stack, // VMRegStackSlots
672 const BasicType *sig_bt,
673 const VMRegPair *regs,
674 Label& skip_fixup);
675 void gen_i2c_adapter(int total_args_passed,
676 // VMReg max_arg,
677 int comp_args_on_stack, // VMRegStackSlots
678 const BasicType *sig_bt,
679 const VMRegPair *regs);
680
681 AdapterGenerator(MacroAssembler *_masm) : masm(_masm) {}
682 };
683
684
685 // Patch the callers callsite with entry to compiled code if it exists.
686 void AdapterGenerator::patch_callers_callsite() {
687 Label L;
688 //FIXME , what is stored in eax?
689 //__ verify_oop(ebx);
690 __ verify_oop(Rmethod);
691 // __ cmpl(Address(ebx, in_bytes(Method::code_offset())), NULL_WORD);
692 __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset()));
693 //__ jcc(Assembler::equal, L);
694 __ beq(AT,R0,L);
695 __ delayed()->nop();
696 // Schedule the branch target address early.
697 // Call into the VM to patch the caller, then jump to compiled callee
698 // eax isn't live so capture return address while we easily can
699 // __ movl(eax, Address(esp, 0));
700 // __ lw(T5,SP,0);
701 __ move(V0, RA);
702
703 __ pushad();
704 //jerome_for_debug
705 // __ pushad();
706 // __ pushfd();
707 #ifdef COMPILER2
708 // C2 may leave the stack dirty if not in SSE2+ mode
709 __ empty_FPU_stack();
710 #endif /* COMPILER2 */
711
712 // VM needs caller's callsite
713 // __ pushl(eax);
714
715 // VM needs target method
716 // __ pushl(ebx);
717 // __ push(Rmethod);
718 // __ verify_oop(ebx);
719
720 __ move(A0, Rmethod);
721 __ move(A1, V0);
722 // __ addi(SP, SP, -8);
723 //we should preserve the return address
724 __ verify_oop(Rmethod);
725 __ move(S0, SP);
726 __ move(AT, -(StackAlignmentInBytes)); // align the stack
727 __ andr(SP, SP, AT);
728 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite),
729 relocInfo::runtime_call_type);
730 //__ addl(esp, 2*wordSize);
731
732 __ delayed()->nop();
733 // __ addi(SP, SP, 8);
734 // __ popfd();
735 __ move(SP, S0);
736 __ popad();
737 __ bind(L);
738 }
739 /*
740 void AdapterGenerator::tag_c2i_arg(frame::Tag t, Register base, int st_off,
741 Register scratch) {
742 Unimplemented();
743 }*/
744
745 #ifdef _LP64
746 Register AdapterGenerator::arg_slot(const int st_off) {
747 Unimplemented();
748 }
749
750 Register AdapterGenerator::next_arg_slot(const int st_off){
751 Unimplemented();
752 }
753 #endif // _LP64
754
755 // Stores long into offset pointed to by base
756 void AdapterGenerator::store_c2i_long(Register r, Register base,
757 const int st_off, bool is_stack) {
758 Unimplemented();
759 }
760
761 void AdapterGenerator::store_c2i_object(Register r, Register base,
762 const int st_off) {
763 Unimplemented();
764 }
765
766 void AdapterGenerator::store_c2i_int(Register r, Register base,
767 const int st_off) {
768 Unimplemented();
769 }
770
771 // Stores into offset pointed to by base
772 void AdapterGenerator::store_c2i_double(VMReg r_2,
773 VMReg r_1, Register base, const int st_off) {
774 Unimplemented();
775 }
776
777 void AdapterGenerator::store_c2i_float(FloatRegister f, Register base,
778 const int st_off) {
779 Unimplemented();
780 }
781 /*
782 void AdapterGenerator::tag_stack(const BasicType sig, int st_off) {
783 if (TaggedStackInterpreter) {
784 int tag_offset = st_off + Interpreter::expr_tag_offset_in_bytes(0);
785 if (sig == T_OBJECT || sig == T_ARRAY) {
786 // __ movl(Address(esp, tag_offset), frame::TagReference);
787 // __ addi(AT,R0, frame::TagReference);
788
789 __ move(AT, frame::TagReference);
790 __ sw (AT, SP, tag_offset);
791 } else if (sig == T_LONG || sig == T_DOUBLE) {
792 int next_tag_offset = st_off + Interpreter::expr_tag_offset_in_bytes(1);
793 // __ movl(Address(esp, next_tag_offset), frame::TagValue);
794 // __ addi(AT,R0, frame::TagValue);
795 __ move(AT, frame::TagValue);
796 __ sw (AT, SP, next_tag_offset);
797 //__ movl(Address(esp, tag_offset), frame::TagValue);
798 // __ addi(AT,R0, frame::TagValue);
799 __ move(AT, frame::TagValue);
800 __ sw (AT, SP, tag_offset);
801
802 } else {
803 // __ movl(Address(esp, tag_offset), frame::TagValue);
804 //__ addi(AT,R0, frame::TagValue);
805 __ move(AT, frame::TagValue);
806 __ sw (AT, SP, tag_offset);
807
808 }
809 }
810 }*/
811
812 void AdapterGenerator::gen_c2i_adapter(
813 int total_args_passed,
814 // VMReg max_arg,
815 int comp_args_on_stack, // VMRegStackSlots
816 const BasicType *sig_bt,
817 const VMRegPair *regs,
818 Label& skip_fixup) {
819
820 // Before we get into the guts of the C2I adapter, see if we should be here
821 // at all. We've come from compiled code and are attempting to jump to the
822 // interpreter, which means the caller made a static call to get here
823 // (vcalls always get a compiled target if there is one). Check for a
824 // compiled target. If there is one, we need to patch the caller's call.
825 // However we will run interpreted if we come thru here. The next pass
826 // thru the call site will run compiled. If we ran compiled here then
827 // we can (theorectically) do endless i2c->c2i->i2c transitions during
828 // deopt/uncommon trap cycles. If we always go interpreted here then
829 // we can have at most one and don't need to play any tricks to keep
830 // from endlessly growing the stack.
831 //
832 // Actually if we detected that we had an i2c->c2i transition here we
833 // ought to be able to reset the world back to the state of the interpreted
834 // call and not bother building another interpreter arg area. We don't
835 // do that at this point.
836
837 patch_callers_callsite();
838
839 __ bind(skip_fixup);
840
841 #ifdef COMPILER2
842 __ empty_FPU_stack();
843 #endif /* COMPILER2 */
844 //this is for native ?
845 // Since all args are passed on the stack, total_args_passed * interpreter_
846 // stack_element_size is the
847 // space we need.
848 int extraspace = total_args_passed * Interpreter::stackElementSize;
849
850 // stack is aligned, keep it that way
851 extraspace = round_to(extraspace, 2*wordSize);
852
853 // Get return address
854 // __ popl(eax);
855 //__ pop(T4);
856 __ move(V0, RA);
857 // set senderSP value
858 // __ movl(esi, esp);
859 //refer to interpreter_mips.cpp:generate_asm_entry
860 __ move(Rsender, SP);
861 //__ subl(esp, extraspace);
862 __ addi(SP, SP, -extraspace);
863
864 // Now write the args into the outgoing interpreter space
865 for (int i = 0; i < total_args_passed; i++) {
866 if (sig_bt[i] == T_VOID) {
867 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE),
868 "missing half");
869 continue;
870 }
871
872 // st_off points to lowest address on stack.
873 int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize;
874 #ifdef aoqi_test
875 tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d, st_off:%d", __func__, __LINE__, i, sig_bt[i], total_args_passed, st_off);
876 #endif
877 // Say 4 args:
878 // i st_off
879 // 0 12 T_LONG
880 // 1 8 T_VOID
881 // 2 4 T_OBJECT
882 // 3 0 T_BOOL
883 VMReg r_1 = regs[i].first();
884 VMReg r_2 = regs[i].second();
885 if (!r_1->is_valid()) {
886 assert(!r_2->is_valid(), "");
887 continue;
888 }
889
890 if (r_1->is_stack()) {
891 // memory to memory use fpu stack top
892 int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
893 #ifdef aoqi_test
894 tty->print_cr(" AdapterGenerator::%s :%d, r_1->is_stack, ld_off:%x", __func__, __LINE__, ld_off);
895 #endif
896
897 if (!r_2->is_valid()) {
898 #ifdef aoqi_test
899 tty->print_cr(" AdapterGenerator::%s :%d, !r_2->is_valid, ld_off:%x", __func__, __LINE__, ld_off);
900 #endif
901 __ ld_ptr(AT, SP, ld_off);
902 __ st_ptr(AT, SP, st_off);
903 //tag_stack(sig_bt[i], st_off);
904 } else {
905 #ifdef aoqi_test
906 tty->print_cr(" AdapterGenerator::%s :%d, r_2->is_valid, ld_off:%x", __func__, __LINE__, ld_off);
907 #endif
908
909 // ld_off == LSW, ld_off+VMRegImpl::stack_slot_size == MSW
910 // st_off == MSW, st_off-wordSize == LSW
911
912 int next_off = st_off - Interpreter::stackElementSize;
913 /*
914 __ lw(AT, SP, ld_off);
915 __ sw(AT, SP, next_off);
916 __ lw(AT, SP, ld_off + wordSize);
917 __ sw(AT, SP, st_off);
918 */
919 __ ld_ptr(AT, SP, ld_off);
920 __ st_ptr(AT, SP, st_off);
921
922 /* Ref to is_Register condition */
923 if(sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE)
924 __ st_ptr(AT,SP,st_off - 8);
925 //tag_stack(sig_bt[i], next_off);
926 }
927 } else if (r_1->is_Register()) {
928 Register r = r_1->as_Register();
929 if (!r_2->is_valid()) {
930 #ifdef aoqi_test
931 tty->print_cr(" AdapterGenerator::%s :%d, r_1->is_Register, !r_2->is_valid, st_off: %lx", __func__, __LINE__, st_off);
932 #endif
933 // __ movl(Address(esp, st_off), r);
934 __ sd(r,SP, st_off); //aoqi_test FIXME
935 //tag_stack(sig_bt[i], st_off);
936 } else {
937 #ifdef aoqi_test
938 tty->print_cr(" AdapterGenerator::%s :%d, r_1->is_Register, r_2->is_valid, st_off: %lx", __func__, __LINE__, st_off);
939 #endif
940 //FIXME, mips will not enter here
941 // long/double in gpr
942 __ sd(r,SP, st_off); //aoqi_test FIXME
943 /* Jin: In [java/util/zip/ZipFile.java]
944
945 private static native long open(String name, int mode, long lastModified);
946 private static native int getTotal(long jzfile);
947 *
948 * We need to transfer T_LONG paramenters from a compiled method to a native method.
949 * It's a complex process:
950 *
951 * Caller -> lir_static_call -> gen_resolve_stub
952 -> -- resolve_static_call_C
953 `- gen_c2i_adapter() [*]
954 |
955 `- AdapterHandlerLibrary::get_create_apapter_index
956 -> generate_native_entry
957 -> InterpreterRuntime::SignatureHandlerGenerator::pass_long [**]
958
959 * In [**], T_Long parameter is stored in stack as:
960
961 (high)
962 | |
963 -----------
964 | 8 bytes |
965 | (void) |
966 -----------
967 | 8 bytes |
968 | (long) |
969 -----------
970 | |
971 (low)
972 *
973 * However, the sequence is reversed here:
974 *
975 (high)
976 | |
977 -----------
978 | 8 bytes |
979 | (long) |
980 -----------
981 | 8 bytes |
982 | (void) |
983 -----------
984 | |
985 (low)
986 *
987 * So I stored another 8 bytes in the T_VOID slot. It then can be accessed from generate_native_entry().
988 */
989 if (sig_bt[i] == T_LONG)
990 __ sd(r,SP, st_off - 8);
991 // ShouldNotReachHere();
992 // int next_off = st_off - Interpreter::stackElementSize;
993 // __ sw(r_2->as_Register(),SP, st_off);
994 // __ sw(r,SP, next_off);
995 // tag_stack(masm, sig_bt[i], next_off);
996 }
997 } else if (r_1->is_FloatRegister()) {
998 assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register");
999
1000 FloatRegister fr = r_1->as_FloatRegister();
1001 if (sig_bt[i] == T_FLOAT)
1002 __ swc1(fr,SP, st_off);
1003 else
1004 {
1005 __ sdc1(fr,SP, st_off);
1006 __ sdc1(fr,SP, st_off - 8); /* T_DOUBLE needs two slots */
1007 }
1008 }
1009 }
1010
1011 // Schedule the branch target address early.
1012 __ ld_ptr(AT, Rmethod,in_bytes(Method::interpreter_entry_offset()) );
1013 // And repush original return address
1014 __ move(RA, V0);
1015 __ jr (AT);
1016 __ delayed()->nop();
1017 }
1018
1019 void AdapterGenerator::gen_i2c_adapter(
1020 int total_args_passed,
1021 // VMReg max_arg,
1022 int comp_args_on_stack, // VMRegStackSlots
1023 const BasicType *sig_bt,
1024 const VMRegPair *regs) {
1025
1026 // Generate an I2C adapter: adjust the I-frame to make space for the C-frame
1027 // layout. Lesp was saved by the calling I-frame and will be restored on
1028 // return. Meanwhile, outgoing arg space is all owned by the callee
1029 // C-frame, so we can mangle it at will. After adjusting the frame size,
1030 // hoist register arguments and repack other args according to the compiled
1031 // code convention. Finally, end in a jump to the compiled code. The entry
1032 // point address is the start of the buffer.
1033
1034 // We will only enter here from an interpreted frame and never from after
1035 // passing thru a c2i. Azul allowed this but we do not. If we lose the
1036 // race and use a c2i we will remain interpreted for the race loser(s).
1037 // This removes all sorts of headaches on the mips side and also eliminates
1038 // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.
1039
1040
1041 __ move(T9, SP);
1042
1043 // Cut-out for having no stack args. Since up to 2 int/oop args are passed
1044 // in registers, we will occasionally have no stack args.
1045 int comp_words_on_stack = 0;
1046 if (comp_args_on_stack) {
1047 // Sig words on the stack are greater-than VMRegImpl::stack0. Those in
1048 // registers are below. By subtracting stack0, we either get a negative
1049 // number (all values in registers) or the maximum stack slot accessed.
1050 // int comp_args_on_stack = VMRegImpl::reg2stack(max_arg);
1051 // Convert 4-byte stack slots to words.
1052 // did mips need round? FIXME aoqi
1053 comp_words_on_stack = round_to(comp_args_on_stack*4, wordSize)>>LogBytesPerWord;
1054 // Round up to miminum stack alignment, in wordSize
1055 comp_words_on_stack = round_to(comp_words_on_stack, 2);
1056 __ daddi(SP, SP, -comp_words_on_stack * wordSize);
1057 }
1058
1059 // Align the outgoing SP
1060 __ move(AT, -(StackAlignmentInBytes));
1061 __ andr(SP, SP, AT);
1062 // push the return address on the stack (note that pushing, rather
1063 // than storing it, yields the correct frame alignment for the callee)
1064 // Put saved SP in another register
1065 // const Register saved_sp = eax;
1066 const Register saved_sp = V0;
1067 __ move(saved_sp, T9);
1068
1069
1070 // Will jump to the compiled code just as if compiled code was doing it.
1071 // Pre-load the register-jump target early, to schedule it better.
1072 __ ld(T9, Rmethod, in_bytes(Method::from_compiled_offset()));
1073
1074 // Now generate the shuffle code. Pick up all register args and move the
1075 // rest through the floating point stack top.
1076 for (int i = 0; i < total_args_passed; i++) {
1077 if (sig_bt[i] == T_VOID) {
1078 // Longs and doubles are passed in native word order, but misaligned
1079 // in the 32-bit build.
1080 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
1081 continue;
1082 }
1083
1084 // Pick up 0, 1 or 2 words from SP+offset.
1085
1086 //FIXME. aoqi. just delete the assert
1087 //assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "scrambled load targets?");
1088 // Load in argument order going down.
1089 int ld_off = (total_args_passed -1 - i)*Interpreter::stackElementSize;
1090 // Point to interpreter value (vs. tag)
1091 int next_off = ld_off - Interpreter::stackElementSize;
1092 //
1093 //
1094 //
1095 VMReg r_1 = regs[i].first();
1096 VMReg r_2 = regs[i].second();
1097 if (!r_1->is_valid()) {
1098 assert(!r_2->is_valid(), "");
1099 continue;
1100 }
1101 #ifdef aoqi_test
1102 tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d, ld_off:%d, next_off: %d", __func__, __LINE__, i, sig_bt[i], total_args_passed, ld_off, next_off);
1103 #endif
1104 if (r_1->is_stack()) {
1105 // Convert stack slot to an SP offset (+ wordSize to
1106 // account for return address )
1107 //NOTICE HERE!!!! I sub a wordSize here
1108 int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size;
1109 //+ wordSize;
1110
1111 // We can use esi as a temp here because compiled code doesn't
1112 // need esi as an input
1113 // and if we end up going thru a c2i because of a miss a reasonable
1114 // value of esi
1115 // we be generated.
1116 if (!r_2->is_valid()) {
1117 #ifdef aoqi_test
1118 tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d r_1->is_stack() !r_2->is_valid(), st_off:%d", __func__, __LINE__, i, sig_bt[i], total_args_passed, st_off);
1119 #endif
1120 __ ld(AT, saved_sp, ld_off);
1121 __ sd(AT, SP, st_off);
1122 } else {
1123 #ifdef aoqi_test
1124 tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d r_1->is_stack() r_2->is_valid(), st_off:%d", __func__, __LINE__, i, sig_bt[i], total_args_passed, st_off);
1125 #endif
1126 // Interpreter local[n] == MSW, local[n+1] == LSW however locals
1127 // are accessed as negative so LSW is at LOW address
1128
1129 // ld_off is MSW so get LSW
1130 // st_off is LSW (i.e. reg.first())
1131 /*
1132 __ ld(AT, saved_sp, next_off);
1133 __ sd(AT, SP, st_off);
1134 __ ld(AT, saved_sp, ld_off);
1135 __ sd(AT, SP, st_off + wordSize);
1136 */
1137
1138 /* 2012/4/9 Jin
1139 * [./org/eclipse/swt/graphics/GC.java]
1140 * void drawImageXRender(Image srcImage, int srcX, int srcY, int srcWidth, int srcHeight,
1141 int destX, int destY, int destWidth, int destHeight,
1142 boolean simple,
1143 int imgWidth, int imgHeight,
1144 long maskPixmap, <-- Pass T_LONG in stack
1145 int maskType);
1146 * Before this modification, Eclipse displays icons with solid black background.
1147 */
1148 __ ld(AT, saved_sp, ld_off);
1149 if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE)
1150 __ ld(AT, saved_sp, ld_off - 8);
1151 __ sd(AT, SP, st_off);
1152 //__ ld(AT, saved_sp, next_off);
1153 //__ sd(AT, SP, st_off + wordSize);
1154 }
1155 } else if (r_1->is_Register()) { // Register argument
1156 Register r = r_1->as_Register();
1157 // assert(r != eax, "must be different");
1158 if (r_2->is_valid()) {
1159 #ifdef aoqi_test
1160 tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d r_1->is_Register() r_2->is_valid()", __func__, __LINE__, i, sig_bt[i], total_args_passed);
1161 #endif
1162 // assert(r_2->as_Register() != eax, "need another temporary register");
1163 // Remember r_1 is low address (and LSB on mips)
1164 // So r_2 gets loaded from high address regardless of the platform
1165 //aoqi
1166 assert(r_2->as_Register() == r_1->as_Register(), "");
1167 //__ ld(r_2->as_Register(), saved_sp, ld_off);
1168 //__ ld(r, saved_sp, next_off);
1169 __ ld(r, saved_sp, ld_off);
1170
1171 /* Jin:
1172 *
1173 * For T_LONG type, the real layout is as below:
1174
1175 (high)
1176 | |
1177 -----------
1178 | 8 bytes |
1179 | (void) |
1180 -----------
1181 | 8 bytes |
1182 | (long) |
1183 -----------
1184 | |
1185 (low)
1186 *
1187 * We should load the low-8 bytes.
1188 */
1189 if (sig_bt[i] == T_LONG)
1190 __ ld(r, saved_sp, ld_off - 8);
1191 } else {
1192 #ifdef aoqi_test
1193 tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d r_1->is_Register() !r_2->is_valid()", __func__, __LINE__, i, sig_bt[i], total_args_passed);
1194 #endif
1195 __ lw(r, saved_sp, ld_off);
1196 }
1197 } else if (r_1->is_FloatRegister()) { // Float Register
1198 assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register");
1199
1200 FloatRegister fr = r_1->as_FloatRegister();
1201 if (sig_bt[i] == T_FLOAT)
1202 __ lwc1(fr, saved_sp, ld_off);
1203 else
1204 {
1205 __ ldc1(fr, saved_sp, ld_off);
1206 __ ldc1(fr, saved_sp, ld_off - 8);
1207 }
1208 }
1209 }
1210
1211 // 6243940 We might end up in handle_wrong_method if
1212 // the callee is deoptimized as we race thru here. If that
1213 // happens we don't want to take a safepoint because the
1214 // caller frame will look interpreted and arguments are now
1215 // "compiled" so it is much better to make this transition
1216 // invisible to the stack walking code. Unfortunately if
1217 // we try and find the callee by normal means a safepoint
1218 // is possible. So we stash the desired callee in the thread
1219 // and the vm will find there should this case occur.
1220 __ get_thread(T8);
1221 __ sd(Rmethod, T8, in_bytes(JavaThread::callee_target_offset()));
1222
1223 // move methodOop to eax in case we end up in an c2i adapter.
1224 // the c2i adapters expect methodOop in eax (c2) because c2's
1225 // resolve stubs return the result (the method) in eax.
1226 // I'd love to fix this.
1227 __ move(V0, Rmethod);
1228 __ jr(T9);
1229 __ delayed()->nop();
1230 }
1231
1232 // ---------------------------------------------------------------
1233 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
1234 int total_args_passed,
1235 // VMReg max_arg,
1236 int comp_args_on_stack, // VMRegStackSlots
1237 const BasicType *sig_bt,
1238 const VMRegPair *regs,
1239 AdapterFingerPrint* fingerprint) {
1240 address i2c_entry = __ pc();
1241
1242 AdapterGenerator agen(masm);
1243
1244 agen.gen_i2c_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs);
1245
1246
1247 // -------------------------------------------------------------------------
1248 // Generate a C2I adapter. On entry we know G5 holds the methodOop. The
1249 // args start out packed in the compiled layout. They need to be unpacked
1250 // into the interpreter layout. This will almost always require some stack
1251 // space. We grow the current (compiled) stack, then repack the args. We
1252 // finally end in a jump to the generic interpreter entry point. On exit
1253 // from the interpreter, the interpreter will restore our SP (lest the
1254 // compiled code, which relys solely on SP and not FP, get sick).
1255
1256 address c2i_unverified_entry = __ pc();
1257 Label skip_fixup;
1258 {
1259 Register holder = T1;
1260 Register receiver = T0;
1261 Register temp = T8;
1262 address ic_miss = SharedRuntime::get_ic_miss_stub();
1263
1264 Label missed;
1265
1266 __ verify_oop(holder);
1267 // __ movl(temp, Address(receiver, oopDesc::klass_offset_in_bytes()));
1268 //__ ld_ptr(temp, receiver, oopDesc::klass_offset_in_bytes());
1269 //add for compressedoops
1270 __ load_klass(temp, receiver);
1271 __ verify_oop(temp);
1272
1273 // __ cmpl(temp, Address(holder, CompiledICHolder::holder_klass_offset()));
1274 __ ld_ptr(AT, holder, CompiledICHolder::holder_klass_offset());
1275 //__ movl(ebx, Address(holder, CompiledICHolder::holder_method_offset()));
1276 __ ld_ptr(Rmethod, holder, CompiledICHolder::holder_method_offset());
1277 //__ jcc(Assembler::notEqual, missed);
1278 __ bne(AT, temp, missed);
1279 __ delayed()->nop();
1280 // Method might have been compiled since the call site was patched to
1281 // interpreted if that is the case treat it as a miss so we can get
1282 // the call site corrected.
1283 //__ cmpl(Address(ebx, in_bytes(Method::code_offset())), NULL_WORD);
1284 //__ jcc(Assembler::equal, skip_fixup);
1285 __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset()));
1286 __ beq(AT, R0, skip_fixup);
1287 __ delayed()->nop();
1288 __ bind(missed);
1289 // __ move(AT, (int)&jerome7);
1290 // __ sw(RA, AT, 0);
1291
1292 __ jmp(ic_miss, relocInfo::runtime_call_type);
1293 __ delayed()->nop();
1294 }
1295
1296 address c2i_entry = __ pc();
1297
1298 agen.gen_c2i_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
1299
1300 __ flush();
1301 return AdapterHandlerLibrary::new_entry(fingerprint,i2c_entry, c2i_entry, c2i_unverified_entry);
1302
1303 }
1304 /*
1305 // Helper function for native calling conventions
1306 static VMReg int_stk_helper( int i ) {
1307 // Bias any stack based VMReg we get by ignoring the window area
1308 // but not the register parameter save area.
1309 //
1310 // This is strange for the following reasons. We'd normally expect
1311 // the calling convention to return an VMReg for a stack slot
1312 // completely ignoring any abi reserved area. C2 thinks of that
1313 // abi area as only out_preserve_stack_slots. This does not include
1314 // the area allocated by the C abi to store down integer arguments
1315 // because the java calling convention does not use it. So
1316 // since c2 assumes that there are only out_preserve_stack_slots
1317 // to bias the optoregs (which impacts VMRegs) when actually referencing any actual stack
1318 // location the c calling convention must add in this bias amount
1319 // to make up for the fact that the out_preserve_stack_slots is
1320 // insufficient for C calls. What a mess. I sure hope those 6
1321 // stack words were worth it on every java call!
1322
1323 // Another way of cleaning this up would be for out_preserve_stack_slots
1324 // to take a parameter to say whether it was C or java calling conventions.
1325 // Then things might look a little better (but not much).
1326
1327 int mem_parm_offset = i - SPARC_ARGS_IN_REGS_NUM;
1328 if( mem_parm_offset < 0 ) {
1329 return as_oRegister(i)->as_VMReg();
1330 } else {
1331 int actual_offset = (mem_parm_offset + frame::memory_parameter_word_sp_offset) * VMRegImpl::slots_per_word;
1332 // Now return a biased offset that will be correct when out_preserve_slots is added back in
1333 return VMRegImpl::stack2reg(actual_offset - SharedRuntime::out_preserve_stack_slots());
1334 }
1335 }
1336 */
1337
1338
1339 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
1340 VMRegPair *regs,
1341 VMRegPair *regs2,
1342 int total_args_passed) {
1343 assert(regs2 == NULL, "not needed on MIPS");
1344 #ifdef aoqi_test
1345 tty->print_cr(" SharedRuntime::%s :%d total_args_passed:%d", __func__, __LINE__, total_args_passed);
1346 #endif
1347 // Return the number of VMReg stack_slots needed for the args.
1348 // This value does not include an abi space (like register window
1349 // save area).
1350
1351 // The native convention is V8 if !LP64
1352 // The LP64 convention is the V9 convention which is slightly more sane.
1353
1354 // We return the amount of VMReg stack slots we need to reserve for all
1355 // the arguments NOT counting out_preserve_stack_slots. Since we always
1356 // have space for storing at least 6 registers to memory we start with that.
1357 // See int_stk_helper for a further discussion.
1358 // We return the amount of VMRegImpl stack slots we need to reserve for all
1359 // the arguments NOT counting out_preserve_stack_slots.
1360 static const Register INT_ArgReg[Argument::n_register_parameters] = {
1361 A0, A1, A2, A3, A4, A5, A6, A7
1362 };
1363 static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = {
1364 F12, F13, F14, F15, F16, F17, F18, F19
1365 };
1366 uint args = 0;
1367 uint stk_args = 0; // inc by 2 each time
1368
1369 /* Example:
1370 --- n java.lang.UNIXProcess::forkAndExec
1371 private native int forkAndExec(byte[] prog,
1372 byte[] argBlock, int argc,
1373 byte[] envBlock, int envc,
1374 byte[] dir,
1375 boolean redirectErrorStream,
1376 FileDescriptor stdin_fd,
1377 FileDescriptor stdout_fd,
1378 FileDescriptor stderr_fd)
1379 JNIEXPORT jint JNICALL
1380 Java_java_lang_UNIXProcess_forkAndExec(JNIEnv *env,
1381 jobject process,
1382 jbyteArray prog,
1383 jbyteArray argBlock, jint argc,
1384 jbyteArray envBlock, jint envc,
1385 jbyteArray dir,
1386 jboolean redirectErrorStream,
1387 jobject stdin_fd,
1388 jobject stdout_fd,
1389 jobject stderr_fd)
1390
1391 ::c_calling_convention
1392 0: // env <-- a0
1393 1: L // klass/obj <-- t0 => a1
1394 2: [ // prog[] <-- a0 => a2
1395 3: [ // argBlock[] <-- a1 => a3
1396 4: I // argc
1397 5: [ // envBlock[] <-- a3 => a5
1398 6: I // envc
1399 7: [ // dir[] <-- a5 => a7
1400 8: Z // redirectErrorStream a6 => sp[0]
1401 9: L // stdin a7 => sp[8]
1402 10: L // stdout fp[16] => sp[16]
1403 11: L // stderr fp[24] => sp[24]
1404 */
1405 for (int i = 0; i < total_args_passed; i++) {
1406 switch (sig_bt[i]) {
1407 case T_VOID: // Halves of longs and doubles
1408 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
1409 regs[i].set_bad();
1410 break;
1411 case T_BOOLEAN:
1412 case T_CHAR:
1413 case T_BYTE:
1414 case T_SHORT:
1415 case T_INT:
1416 if (args < Argument::n_register_parameters) {
1417 regs[i].set1(INT_ArgReg[args++]->as_VMReg());
1418 } else {
1419 regs[i].set1(VMRegImpl::stack2reg(stk_args));
1420 stk_args += 2;
1421 }
1422 break;
1423 case T_LONG:
1424 assert(sig_bt[i + 1] == T_VOID, "expecting half");
1425 // fall through
1426 case T_OBJECT:
1427 case T_ARRAY:
1428 case T_ADDRESS:
1429 case T_METADATA:
1430 if (args < Argument::n_register_parameters) {
1431 regs[i].set2(INT_ArgReg[args++]->as_VMReg());
1432 } else {
1433 regs[i].set2(VMRegImpl::stack2reg(stk_args));
1434 stk_args += 2;
1435 }
1436 break;
1437 case T_FLOAT:
1438 if (args < Argument::n_float_register_parameters) {
1439 regs[i].set1(FP_ArgReg[args++]->as_VMReg());
1440 } else {
1441 regs[i].set1(VMRegImpl::stack2reg(stk_args));
1442 stk_args += 2;
1443 }
1444 break;
1445 case T_DOUBLE:
1446 assert(sig_bt[i + 1] == T_VOID, "expecting half");
1447 if (args < Argument::n_float_register_parameters) {
1448 regs[i].set2(FP_ArgReg[args++]->as_VMReg());
1449 } else {
1450 regs[i].set2(VMRegImpl::stack2reg(stk_args));
1451 stk_args += 2;
1452 }
1453 break;
1454 default:
1455 ShouldNotReachHere();
1456 break;
1457 }
1458 }
1459
1460 return round_to(stk_args, 2);
1461 }
1462 /*
1463 int SharedRuntime::c_calling_convention_jni(const BasicType *sig_bt,
1464 VMRegPair *regs,
1465 int total_args_passed) {
1466 // We return the amount of VMRegImpl stack slots we need to reserve for all
1467 // the arguments NOT counting out_preserve_stack_slots.
1468 bool unalign = 0;
1469 uint stack = 0; // All arguments on stack
1470 #ifdef aoqi_test
1471 tty->print_cr(" SharedRuntime::%s :%d total_args_passed:%d", __func__, __LINE__, total_args_passed);
1472 #endif
1473
1474 for( int i = 0; i < total_args_passed; i++) {
1475 // From the type and the argument number (count) compute the location
1476 switch( sig_bt[i] ) {
1477 case T_BOOLEAN:
1478 case T_CHAR:
1479 case T_FLOAT:
1480 case T_BYTE:
1481 case T_SHORT:
1482 case T_INT:
1483 case T_OBJECT:
1484 case T_ARRAY:
1485 case T_ADDRESS:
1486 regs[i].set1(VMRegImpl::stack2reg(stack++));
1487 unalign = !unalign;
1488 break;
1489 case T_LONG:
1490 case T_DOUBLE: // The stack numbering is reversed from Java
1491 // Since C arguments do not get reversed, the ordering for
1492 // doubles on the stack must be opposite the Java convention
1493 assert(sig_bt[i+1] == T_VOID, "missing Half" );
1494 if(unalign){
1495 stack += 1;
1496 unalign = ! unalign;
1497 }
1498 regs[i].set2(VMRegImpl::stack2reg(stack));
1499 stack += 2;
1500 break;
1501 case T_VOID: regs[i].set_bad(); break;
1502 default:
1503 ShouldNotReachHere();
1504 break;
1505 }
1506 }
1507 return stack;
1508 }
1509 */
1510
1511 // ---------------------------------------------------------------------------
1512 void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1513 // We always ignore the frame_slots arg and just use the space just below frame pointer
1514 // which by this time is free to use
1515 switch (ret_type) {
1516 case T_FLOAT:
1517 __ swc1(FSF, FP, -wordSize);
1518 break;
1519 case T_DOUBLE:
1520 __ sdc1(FSF, FP, -wordSize );
1521 break;
1522 case T_VOID: break;
1523 case T_LONG:
1524 __ sd(V0, FP, -wordSize);
1525 break;
1526 case T_OBJECT:
1527 case T_ARRAY:
1528 __ sd(V0, FP, -wordSize);
1529 break;
1530 default: {
1531 __ sw(V0, FP, -wordSize);
1532 }
1533 }
1534 }
1535
1536 void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1537 // We always ignore the frame_slots arg and just use the space just below frame pointer
1538 // which by this time is free to use
1539 switch (ret_type) {
1540 case T_FLOAT:
1541 __ lwc1(FSF, FP, -wordSize);
1542 break;
1543 case T_DOUBLE:
1544 __ ldc1(FSF, FP, -wordSize );
1545 break;
1546 case T_LONG:
1547 __ ld(V0, FP, -wordSize);
1548 break;
1549 case T_VOID: break;
1550 case T_OBJECT:
1551 case T_ARRAY:
1552 __ ld(V0, FP, -wordSize);
1553 break;
1554 default: {
1555 __ lw(V0, FP, -wordSize);
1556 }
1557 }
1558 }
1559
1560 static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
1561 for ( int i = first_arg ; i < arg_count ; i++ ) {
1562 if (args[i].first()->is_Register()) {
1563 __ push(args[i].first()->as_Register());
1564 } else if (args[i].first()->is_FloatRegister()) {
1565 __ push(args[i].first()->as_FloatRegister());
1566 }
1567 }
1568 }
1569
1570 static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
1571 for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) {
1572 if (args[i].first()->is_Register()) {
1573 __ pop(args[i].first()->as_Register());
1574 } else if (args[i].first()->is_FloatRegister()) {
1575 __ pop(args[i].first()->as_FloatRegister());
1576 }
1577 }
1578 }
1579
1580 // A simple move of integer like type
1581 static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1582 if (src.first()->is_stack()) {
1583 if (dst.first()->is_stack()) {
1584 // stack to stack
1585 __ lw(AT, FP, reg2offset_in(src.first()));
1586 __ sd(AT,SP, reg2offset_out(dst.first()));
1587 } else {
1588 // stack to reg
1589 //__ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());
1590 __ lw(dst.first()->as_Register(), FP, reg2offset_in(src.first()));
1591 }
1592 } else if (dst.first()->is_stack()) {
1593 // reg to stack
1594 __ sd(src.first()->as_Register(), SP, reg2offset_out(dst.first()));
1595 } else {
1596 //__ mov(src.first()->as_Register(), dst.first()->as_Register());
1597 if (dst.first() != src.first()){
1598 __ move(dst.first()->as_Register(), src.first()->as_Register()); // fujie error:dst.first()
1599 }
1600 }
1601 }
1602 /*
1603 // On 64 bit we will store integer like items to the stack as
1604 // 64 bits items (sparc abi) even though java would only store
1605 // 32bits for a parameter. On 32bit it will simply be 32 bits
1606 // So this routine will do 32->32 on 32bit and 32->64 on 64bit
1607 static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1608 if (src.first()->is_stack()) {
1609 if (dst.first()->is_stack()) {
1610 // stack to stack
1611 __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5);
1612 __ st_ptr(L5, SP, reg2offset(dst.first()) + STACK_BIAS);
1613 } else {
1614 // stack to reg
1615 __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());
1616 }
1617 } else if (dst.first()->is_stack()) {
1618 // reg to stack
1619 __ st_ptr(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS);
1620 } else {
1621 __ mov(src.first()->as_Register(), dst.first()->as_Register());
1622 }
1623 }
1624 */
1625
1626 // An oop arg. Must pass a handle not the oop itself
1627 static void object_move(MacroAssembler* masm,
1628 OopMap* map,
1629 int oop_handle_offset,
1630 int framesize_in_slots,
1631 VMRegPair src,
1632 VMRegPair dst,
1633 bool is_receiver,
1634 int* receiver_offset) {
1635
1636 // must pass a handle. First figure out the location we use as a handle
1637
1638 //FIXME, for mips, dst can be register
1639 if (src.first()->is_stack()) {
1640 // Oop is already on the stack as an argument
1641 Register rHandle = V0;
1642 Label nil;
1643 //__ xorl(rHandle, rHandle);
1644 __ xorr(rHandle, rHandle, rHandle);
1645 //__ cmpl(Address(ebp, reg2offset_in(src.first())), NULL_WORD);
1646 __ ld(AT, FP, reg2offset_in(src.first()));
1647 //__ jcc(Assembler::equal, nil);
1648 __ beq(AT,R0, nil);
1649 __ delayed()->nop();
1650 // __ leal(rHandle, Address(ebp, reg2offset_in(src.first())));
1651 __ lea(rHandle, Address(FP, reg2offset_in(src.first())));
1652 __ bind(nil);
1653 //__ movl(Address(esp, reg2offset_out(dst.first())), rHandle);
1654 if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first()));
1655 else __ move( (dst.first())->as_Register(),rHandle);
1656 //if dst is register
1657 //FIXME, do mips need out preserve stack slots?
1658 int offset_in_older_frame = src.first()->reg2stack()
1659 + SharedRuntime::out_preserve_stack_slots();
1660 map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
1661 if (is_receiver) {
1662 *receiver_offset = (offset_in_older_frame
1663 + framesize_in_slots) * VMRegImpl::stack_slot_size;
1664 }
1665 } else {
1666 // Oop is in an a register we must store it to the space we reserve
1667 // on the stack for oop_handles
1668 const Register rOop = src.first()->as_Register();
1669 assert( (rOop->encoding() >= A0->encoding()) && (rOop->encoding() <= T0->encoding()),"wrong register");
1670 // const Register rHandle = eax;
1671 const Register rHandle = V0;
1672 //Important: refer to java_calling_convertion
1673 int oop_slot = (rOop->encoding() - A0->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset;
1674 int offset = oop_slot*VMRegImpl::stack_slot_size;
1675 Label skip;
1676 // __ movl(Address(esp, offset), rOop);
1677 __ sd( rOop , SP, offset );
1678 map->set_oop(VMRegImpl::stack2reg(oop_slot));
1679 // __ xorl(rHandle, rHandle);
1680 __ xorr( rHandle, rHandle, rHandle);
1681 //__ cmpl(rOop, NULL_WORD);
1682 // __ jcc(Assembler::equal, skip);
1683 __ beq(rOop, R0, skip);
1684 __ delayed()->nop();
1685 // __ leal(rHandle, Address(esp, offset));
1686 __ lea(rHandle, Address(SP, offset));
1687 __ bind(skip);
1688 // Store the handle parameter
1689 //__ movl(Address(esp, reg2offset_out(dst.first())), rHandle);
1690 if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first()));
1691 else __ move((dst.first())->as_Register(), rHandle);
1692 //if dst is register
1693
1694 if (is_receiver) {
1695 *receiver_offset = offset;
1696 }
1697 }
1698 }
1699
1700 // A float arg may have to do float reg int reg conversion
1701 static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1702 assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move");
1703
1704 if (src.first()->is_stack()) {
1705 if(dst.first()->is_stack()){
1706 // __ movl(eax, Address(ebp, reg2offset_in(src.first())));
1707 __ lwc1(F12 , FP, reg2offset_in(src.first()));
1708 // __ movl(Address(esp, reg2offset_out(dst.first())), eax);
1709 __ swc1(F12 ,SP, reg2offset_out(dst.first()));
1710 }
1711 else
1712 __ lwc1( dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first()));
1713 } else {
1714 // reg to stack
1715 // __ movss(Address(esp, reg2offset_out(dst.first())),
1716 // src.first()->as_XMMRegister());
1717 // __ movl(Address(esp, reg2offset_out(dst.first())), eax);
1718 if(dst.first()->is_stack())
1719 __ swc1( src.first()->as_FloatRegister(),SP, reg2offset_out(dst.first()));
1720 else
1721 __ mov_s( dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
1722 }
1723 }
1724 /*
1725 static void split_long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1726 VMRegPair src_lo(src.first());
1727 VMRegPair src_hi(src.second());
1728 VMRegPair dst_lo(dst.first());
1729 VMRegPair dst_hi(dst.second());
1730 simple_move32(masm, src_lo, dst_lo);
1731 simple_move32(masm, src_hi, dst_hi);
1732 }
1733 */
1734 // A long move
1735 static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1736
1737 // The only legal possibility for a long_move VMRegPair is:
1738 // 1: two stack slots (possibly unaligned)
1739 // as neither the java or C calling convention will use registers
1740 // for longs.
1741
1742 if (src.first()->is_stack()) {
1743 assert(src.second()->is_stack() && dst.second()->is_stack(), "must be all stack");
1744 // __ movl(eax, Address(ebp, reg2offset_in(src.first())));
1745 if( dst.first()->is_stack()){
1746 __ ld(AT, FP, reg2offset_in(src.first()));
1747 // __ movl(ebx, address(ebp, reg2offset_in(src.second())));
1748 //__ lw(V0, FP, reg2offset_in(src.second()));
1749 // __ movl(address(esp, reg2offset_out(dst.first())), eax);
1750 __ sd(AT, SP, reg2offset_out(dst.first()));
1751 // __ movl(address(esp, reg2offset_out(dst.second())), ebx);
1752 //__ sw(V0, SP, reg2offset_out(dst.second()));
1753 } else{
1754 __ ld( (dst.first())->as_Register() , FP, reg2offset_in(src.first()));
1755 //__ lw( (dst.second())->as_Register(), FP, reg2offset_in(src.second()));
1756 }
1757 } else {
1758 if( dst.first()->is_stack()){
1759 __ sd( (src.first())->as_Register(), SP, reg2offset_out(dst.first()));
1760 //__ sw( (src.second())->as_Register(), SP, reg2offset_out(dst.second()));
1761 } else{
1762 __ move( (dst.first())->as_Register() , (src.first())->as_Register());
1763 //__ move( (dst.second())->as_Register(), (src.second())->as_Register());
1764 }
1765 }
1766 }
1767
1768 // A double move
1769 static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1770
1771 // The only legal possibilities for a double_move VMRegPair are:
1772 // The painful thing here is that like long_move a VMRegPair might be
1773
1774 // Because of the calling convention we know that src is either
1775 // 1: a single physical register (xmm registers only)
1776 // 2: two stack slots (possibly unaligned)
1777 // dst can only be a pair of stack slots.
1778
1779 // assert(dst.first()->is_stack() && (src.first()->is_XMMRegister() ||
1780 // src.first()->is_stack()), "bad args");
1781 // assert(dst.first()->is_stack() || src.first()->is_stack()), "bad args");
1782
1783 if (src.first()->is_stack()) {
1784 // source is all stack
1785 // __ movl(eax, Address(ebp, reg2offset_in(src.first())));
1786 if( dst.first()->is_stack()){
1787 __ ldc1(F12, FP, reg2offset_in(src.first()));
1788 //__ movl(ebx, Address(ebp, reg2offset_in(src.second())));
1789 //__ lwc1(F14, FP, reg2offset_in(src.second()));
1790
1791 // __ movl(Address(esp, reg2offset_out(dst.first())), eax);
1792 __ sdc1(F12, SP, reg2offset_out(dst.first()));
1793 // __ movl(Address(esp, reg2offset_out(dst.second())), ebx);
1794 //__ swc1(F14, SP, reg2offset_out(dst.second()));
1795 } else{
1796 __ ldc1( (dst.first())->as_FloatRegister(), FP, reg2offset_in(src.first()));
1797 //__ lwc1( (dst.second())->as_FloatRegister(), FP, reg2offset_in(src.second()));
1798 }
1799
1800 } else {
1801 // reg to stack
1802 // No worries about stack alignment
1803 // __ movsd(Address(esp, reg2offset_out(dst.first())), src.first()->as_XMMRegister());
1804 if( dst.first()->is_stack()){
1805 __ sdc1( src.first()->as_FloatRegister(),SP, reg2offset_out(dst.first()));
1806 //__ swc1( src.second()->as_FloatRegister(),SP, reg2offset_out(dst.second()));
1807 }
1808 else
1809 __ mov_d( dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
1810 //__ mov_s( dst.second()->as_FloatRegister(), src.second()->as_FloatRegister());
1811
1812 }
1813 }
1814
1815 static void verify_oop_args(MacroAssembler* masm,
1816 methodHandle method,
1817 const BasicType* sig_bt,
1818 const VMRegPair* regs) {
1819 Register temp_reg = T9; // not part of any compiled calling seq
1820 if (VerifyOops) {
1821 for (int i = 0; i < method->size_of_parameters(); i++) {
1822 if (sig_bt[i] == T_OBJECT ||
1823 sig_bt[i] == T_ARRAY) {
1824 VMReg r = regs[i].first();
1825 assert(r->is_valid(), "bad oop arg");
1826 if (r->is_stack()) {
1827 // __ movptr(temp_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
1828 __ ld(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
1829 __ verify_oop(temp_reg);
1830 } else {
1831 __ verify_oop(r->as_Register());
1832 }
1833 }
1834 }
1835 }
1836 }
1837
1838 static void gen_special_dispatch(MacroAssembler* masm,
1839 methodHandle method,
1840 const BasicType* sig_bt,
1841 const VMRegPair* regs) {
1842 verify_oop_args(masm, method, sig_bt, regs);
1843 vmIntrinsics::ID iid = method->intrinsic_id();
1844
1845 // Now write the args into the outgoing interpreter space
1846 bool has_receiver = false;
1847 Register receiver_reg = noreg;
1848 int member_arg_pos = -1;
1849 Register member_reg = noreg;
1850 int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
1851 if (ref_kind != 0) {
1852 member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument
1853 // member_reg = rbx; // known to be free at this point
1854 member_reg = S3; // known to be free at this point
1855 has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
1856 } else if (iid == vmIntrinsics::_invokeBasic) {
1857 has_receiver = true;
1858 } else {
1859 fatal(err_msg_res("unexpected intrinsic id %d", iid));
1860 }
1861
1862 if (member_reg != noreg) {
1863 // Load the member_arg into register, if necessary.
1864 SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
1865 VMReg r = regs[member_arg_pos].first();
1866 if (r->is_stack()) {
1867 // __ movptr(member_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
1868 __ ld(member_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
1869 } else {
1870 // no data motion is needed
1871 member_reg = r->as_Register();
1872 }
1873 }
1874
1875 if (has_receiver) {
1876 // Make sure the receiver is loaded into a register.
1877 assert(method->size_of_parameters() > 0, "oob");
1878 assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
1879 VMReg r = regs[0].first();
1880 assert(r->is_valid(), "bad receiver arg");
1881 if (r->is_stack()) {
1882 // Porting note: This assumes that compiled calling conventions always
1883 // pass the receiver oop in a register. If this is not true on some
1884 // platform, pick a temp and load the receiver from stack.
1885 fatal("receiver always in a register");
1886 // receiver_reg = j_rarg0; // known to be free at this point
1887 receiver_reg = SSR; // known to be free at this point
1888 // __ movptr(receiver_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
1889 __ ld(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
1890 } else {
1891 // no data motion is needed
1892 receiver_reg = r->as_Register();
1893 }
1894 }
1895
1896 // Figure out which address we are really jumping to:
1897 MethodHandles::generate_method_handle_dispatch(masm, iid,
1898 receiver_reg, member_reg, /*for_compiler_entry:*/ true);
1899 }
1900
1901 // ---------------------------------------------------------------------------
1902 // Generate a native wrapper for a given method. The method takes arguments
1903 // in the Java compiled code convention, marshals them to the native
1904 // convention (handlizes oops, etc), transitions to native, makes the call,
1905 // returns to java state (possibly blocking), unhandlizes any result and
1906 // returns.
1907 nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
1908 methodHandle method,
1909 int compile_id,
1910 BasicType *in_sig_bt,
1911 VMRegPair *in_regs,
1912 BasicType ret_type) {
1913
1914 if (method->is_method_handle_intrinsic()) {
1915 vmIntrinsics::ID iid = method->intrinsic_id();
1916 intptr_t start = (intptr_t)__ pc();
1917 int vep_offset = ((intptr_t)__ pc()) - start;
1918
1919 gen_special_dispatch(masm,
1920 method,
1921 in_sig_bt,
1922 in_regs);
1923
1924 int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period
1925 __ flush();
1926 int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually
1927 return nmethod::new_native_nmethod(method,
1928 compile_id,
1929 masm->code(),
1930 vep_offset,
1931 frame_complete,
1932 stack_slots / VMRegImpl::slots_per_word,
1933 in_ByteSize(-1),
1934 in_ByteSize(-1),
1935 (OopMapSet*)NULL);
1936 }
1937 bool is_critical_native = true;
1938 address native_func = method->critical_native_function();
1939 if (native_func == NULL) {
1940 native_func = method->native_function();
1941 is_critical_native = false;
1942 }
1943 assert(native_func != NULL, "must have function");
1944
1945 // Native nmethod wrappers never take possesion of the oop arguments.
1946 // So the caller will gc the arguments. The only thing we need an
1947 // oopMap for is if the call is static
1948 //
1949 // An OopMap for lock (and class if static), and one for the VM call itself
1950 OopMapSet *oop_maps = new OopMapSet();
1951
1952 // We have received a description of where all the java arg are located
1953 // on entry to the wrapper. We need to convert these args to where
1954 // the jni function will expect them. To figure out where they go
1955 // we convert the java signature to a C signature by inserting
1956 // the hidden arguments as arg[0] and possibly arg[1] (static method)
1957
1958 const int total_in_args = method->size_of_parameters();
1959 int total_c_args = total_in_args;
1960 if (!is_critical_native) {
1961 total_c_args += 1;
1962 if (method->is_static()) {
1963 total_c_args++;
1964 }
1965 } else {
1966 for (int i = 0; i < total_in_args; i++) {
1967 if (in_sig_bt[i] == T_ARRAY) {
1968 total_c_args++;
1969 }
1970 }
1971 }
1972
1973 BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
1974 VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
1975 BasicType* in_elem_bt = NULL;
1976
1977 int argc = 0;
1978 if (!is_critical_native) {
1979 out_sig_bt[argc++] = T_ADDRESS;
1980 if (method->is_static()) {
1981 out_sig_bt[argc++] = T_OBJECT;
1982 }
1983
1984 for (int i = 0; i < total_in_args ; i++ ) {
1985 out_sig_bt[argc++] = in_sig_bt[i];
1986 }
1987 } else {
1988 Thread* THREAD = Thread::current();
1989 in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
1990 SignatureStream ss(method->signature());
1991 for (int i = 0; i < total_in_args ; i++ ) {
1992 if (in_sig_bt[i] == T_ARRAY) {
1993 // Arrays are passed as int, elem* pair
1994 out_sig_bt[argc++] = T_INT;
1995 out_sig_bt[argc++] = T_ADDRESS;
1996 Symbol* atype = ss.as_symbol(CHECK_NULL);
1997 const char* at = atype->as_C_string();
1998 if (strlen(at) == 2) {
1999 assert(at[0] == '[', "must be");
2000 switch (at[1]) {
2001 case 'B': in_elem_bt[i] = T_BYTE; break;
2002 case 'C': in_elem_bt[i] = T_CHAR; break;
2003 case 'D': in_elem_bt[i] = T_DOUBLE; break;
2004 case 'F': in_elem_bt[i] = T_FLOAT; break;
2005 case 'I': in_elem_bt[i] = T_INT; break;
2006 case 'J': in_elem_bt[i] = T_LONG; break;
2007 case 'S': in_elem_bt[i] = T_SHORT; break;
2008 case 'Z': in_elem_bt[i] = T_BOOLEAN; break;
2009 default: ShouldNotReachHere();
2010 }
2011 }
2012 } else {
2013 out_sig_bt[argc++] = in_sig_bt[i];
2014 in_elem_bt[i] = T_VOID;
2015 }
2016 if (in_sig_bt[i] != T_VOID) {
2017 assert(in_sig_bt[i] == ss.type(), "must match");
2018 ss.next();
2019 }
2020 }
2021 }
2022
2023 // Now figure out where the args must be stored and how much stack space
2024 // they require (neglecting out_preserve_stack_slots but space for storing
2025 // the 1st six register arguments). It's weird see int_stk_helper.
2026 //
2027 int out_arg_slots;
2028 //out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);
2029 out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
2030
2031 // Compute framesize for the wrapper. We need to handlize all oops in
2032 // registers. We must create space for them here that is disjoint from
2033 // the windowed save area because we have no control over when we might
2034 // flush the window again and overwrite values that gc has since modified.
2035 // (The live window race)
2036 //
2037 // We always just allocate 6 word for storing down these object. This allow
2038 // us to simply record the base and use the Ireg number to decide which
2039 // slot to use. (Note that the reg number is the inbound number not the
2040 // outbound number).
2041 // We must shuffle args to match the native convention, and include var-args space.
2042
2043 // Calculate the total number of stack slots we will need.
2044
2045 // First count the abi requirement plus all of the outgoing args
2046 int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
2047
2048 // Now the space for the inbound oop handle area
2049 int total_save_slots = 9 * VMRegImpl::slots_per_word; // 9 arguments passed in registers
2050 if (is_critical_native) {
2051 // Critical natives may have to call out so they need a save area
2052 // for register arguments.
2053 int double_slots = 0;
2054 int single_slots = 0;
2055 for ( int i = 0; i < total_in_args; i++) {
2056 if (in_regs[i].first()->is_Register()) {
2057 const Register reg = in_regs[i].first()->as_Register();
2058 switch (in_sig_bt[i]) {
2059 case T_BOOLEAN:
2060 case T_BYTE:
2061 case T_SHORT:
2062 case T_CHAR:
2063 case T_INT: single_slots++; break;
2064 case T_ARRAY: // specific to LP64 (7145024)
2065 case T_LONG: double_slots++; break;
2066 default: ShouldNotReachHere();
2067 }
2068 } else if (in_regs[i].first()->is_FloatRegister()) {
2069 switch (in_sig_bt[i]) {
2070 case T_FLOAT: single_slots++; break;
2071 case T_DOUBLE: double_slots++; break;
2072 default: ShouldNotReachHere();
2073 }
2074 }
2075 }
2076 total_save_slots = double_slots * 2 + single_slots;
2077 // align the save area
2078 if (double_slots != 0) {
2079 stack_slots = round_to(stack_slots, 2);
2080 }
2081 }
2082
2083 int oop_handle_offset = stack_slots;
2084 // stack_slots += 9*VMRegImpl::slots_per_word; // T0, A0 ~ A7
2085 stack_slots += total_save_slots;
2086
2087 // Now any space we need for handlizing a klass if static method
2088
2089 int klass_slot_offset = 0;
2090 int klass_offset = -1;
2091 int lock_slot_offset = 0;
2092 bool is_static = false;
2093 //int oop_temp_slot_offset = 0;
2094
2095 if (method->is_static()) {
2096 klass_slot_offset = stack_slots;
2097 stack_slots += VMRegImpl::slots_per_word;
2098 klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
2099 is_static = true;
2100 }
2101
2102 // Plus a lock if needed
2103
2104 if (method->is_synchronized()) {
2105 lock_slot_offset = stack_slots;
2106 stack_slots += VMRegImpl::slots_per_word;
2107 }
2108
2109 // Now a place to save return value or as a temporary for any gpr -> fpr moves
2110 // + 2 for return address (which we own) and saved ebp
2111 //stack_slots += 2;
2112 stack_slots += 2 + 9 * VMRegImpl::slots_per_word; // (T0, A0, A1, A2, A3, A4, A5, A6, A7)
2113
2114 // Ok The space we have allocated will look like:
2115 //
2116 //
2117 // FP-> | |
2118 // |---------------------|
2119 // | 2 slots for moves |
2120 // |---------------------|
2121 // | lock box (if sync) |
2122 // |---------------------| <- lock_slot_offset
2123 // | klass (if static) |
2124 // |---------------------| <- klass_slot_offset
2125 // | oopHandle area |
2126 // |---------------------| <- oop_handle_offset
2127 // | outbound memory |
2128 // | based arguments |
2129 // | |
2130 // |---------------------|
2131 // | vararg area |
2132 // |---------------------|
2133 // | |
2134 // SP-> | out_preserved_slots |
2135 //
2136 //
2137
2138
2139 // Now compute actual number of stack words we need rounding to make
2140 // stack properly aligned.
2141 stack_slots = round_to(stack_slots, StackAlignmentInSlots);
2142
2143 int stack_size = stack_slots * VMRegImpl::stack_slot_size;
2144
2145 intptr_t start = (intptr_t)__ pc();
2146
2147
2148
2149 // First thing make an ic check to see if we should even be here
2150 address ic_miss = SharedRuntime::get_ic_miss_stub();
2151
2152 // We are free to use all registers as temps without saving them and
2153 // restoring them except ebp. ebp is the only callee save register
2154 // as far as the interpreter and the compiler(s) are concerned.
2155
2156 //refer to register_mips.hpp:IC_Klass
2157 const Register ic_reg = T1;
2158 const Register receiver = T0;
2159 Label hit;
2160 Label exception_pending;
2161
2162 __ verify_oop(receiver);
2163 //__ lw(AT, receiver, oopDesc::klass_offset_in_bytes());
2164 //add for compressedoops
2165 __ load_klass(AT, receiver);
2166 __ beq(AT, ic_reg, hit);
2167 __ delayed()->nop();
2168 __ jmp(ic_miss, relocInfo::runtime_call_type);
2169 __ delayed()->nop();
2170 // verified entry must be aligned for code patching.
2171 // and the first 5 bytes must be in the same cache line
2172 // if we align at 8 then we will be sure 5 bytes are in the same line
2173 __ align(8);
2174
2175 __ bind(hit);
2176
2177
2178 int vep_offset = ((intptr_t)__ pc()) - start;
2179 #ifdef COMPILER1
2180 if (InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) {
2181 // Object.hashCode can pull the hashCode from the header word
2182 // instead of doing a full VM transition once it's been computed.
2183 // Since hashCode is usually polymorphic at call sites we can't do
2184 // this optimization at the call site without a lot of work.
2185 Label slowCase;
2186 Register receiver = T0;
2187 Register result = V0;
2188 __ ld ( result, receiver, oopDesc::mark_offset_in_bytes());
2189 // check if locked
2190 __ andi(AT, result, markOopDesc::unlocked_value);
2191 __ beq(AT, R0, slowCase);
2192 __ delayed()->nop();
2193 if (UseBiasedLocking) {
2194 // Check if biased and fall through to runtime if so
2195 __ andi (AT, result, markOopDesc::biased_lock_bit_in_place);
2196 __ bne(AT,R0, slowCase);
2197 __ delayed()->nop();
2198 }
2199 // get hash
2200 __ li(AT, markOopDesc::hash_mask_in_place);
2201 __ andr (AT, result, AT);
2202 // test if hashCode exists
2203 __ beq (AT, R0, slowCase);
2204 __ delayed()->nop();
2205 __ shr(result, markOopDesc::hash_shift);
2206 __ jr(RA);
2207 __ delayed()->nop();
2208 __ bind (slowCase);
2209 }
2210 #endif // COMPILER1
2211
2212 // The instruction at the verified entry point must be 5 bytes or longer
2213 // because it can be patched on the fly by make_non_entrant. The stack bang
2214 // instruction fits that requirement.
2215
2216 // Generate stack overflow check
2217
2218 if (UseStackBanging) {
2219 //this function will modify the value in A0
2220 __ push(A0);
2221 __ bang_stack_with_offset(StackShadowPages*os::vm_page_size());
2222 __ pop(A0);
2223 } else {
2224 // need a 5 byte instruction to allow MT safe patching to non-entrant
2225 __ nop();
2226 __ nop();
2227 __ nop();
2228 __ nop();
2229 __ nop();
2230 }
2231 // Generate a new frame for the wrapper.
2232 // do mips need this ?
2233 #ifndef OPT_THREAD
2234 __ get_thread(TREG);
2235 #endif
2236 //FIXME here
2237 __ st_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset()));
2238 // -2 because return address is already present and so is saved ebp
2239 __ move(AT, -(StackAlignmentInBytes));
2240 __ andr(SP, SP, AT);
2241
2242 __ enter();
2243 __ addiu(SP, SP, -1 * (stack_size - 2*wordSize));
2244
2245 // Frame is now completed as far a size and linkage.
2246
2247 int frame_complete = ((intptr_t)__ pc()) - start;
2248
2249 // Calculate the difference between esp and ebp. We need to know it
2250 // after the native call because on windows Java Natives will pop
2251 // the arguments and it is painful to do esp relative addressing
2252 // in a platform independent way. So after the call we switch to
2253 // ebp relative addressing.
2254 //FIXME actually , the fp_adjustment may not be the right, because andr(sp,sp,at)may change
2255 //the SP
2256 int fp_adjustment = stack_size - 2*wordSize;
2257
2258 #ifdef COMPILER2
2259 // C2 may leave the stack dirty if not in SSE2+ mode
2260 // if (UseSSE >= 2) {
2261 // __ verify_FPU(0, "c2i transition should have clean FPU stack");
2262 //} else {
2263 __ empty_FPU_stack();
2264 //}
2265 #endif /* COMPILER2 */
2266
2267 // Compute the ebp offset for any slots used after the jni call
2268
2269 int lock_slot_ebp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment;
2270 // We use edi as a thread pointer because it is callee save and
2271 // if we load it once it is usable thru the entire wrapper
2272 // const Register thread = edi;
2273 const Register thread = TREG;
2274
2275 // We use esi as the oop handle for the receiver/klass
2276 // It is callee save so it survives the call to native
2277
2278 // const Register oop_handle_reg = esi;
2279 const Register oop_handle_reg = S4;
2280 if (is_critical_native) {
2281 __ stop("generate_native_wrapper in sharedRuntime <2>");
2282 //TODO:Fu
2283 /*
2284 check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args,
2285 oop_handle_offset, oop_maps, in_regs, in_sig_bt);
2286 */
2287 }
2288
2289 #ifndef OPT_THREAD
2290 __ get_thread(thread);
2291 #endif
2292
2293 //
2294 // We immediately shuffle the arguments so that any vm call we have to
2295 // make from here on out (sync slow path, jvmpi, etc.) we will have
2296 // captured the oops from our caller and have a valid oopMap for
2297 // them.
2298
2299 // -----------------
2300 // The Grand Shuffle
2301 //
2302 // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
2303 // and, if static, the class mirror instead of a receiver. This pretty much
2304 // guarantees that register layout will not match (and mips doesn't use reg
2305 // parms though amd does). Since the native abi doesn't use register args
2306 // and the java conventions does we don't have to worry about collisions.
2307 // All of our moved are reg->stack or stack->stack.
2308 // We ignore the extra arguments during the shuffle and handle them at the
2309 // last moment. The shuffle is described by the two calling convention
2310 // vectors we have in our possession. We simply walk the java vector to
2311 // get the source locations and the c vector to get the destinations.
2312
2313 int c_arg = method->is_static() ? 2 : 1 ;
2314
2315 // Record esp-based slot for receiver on stack for non-static methods
2316 int receiver_offset = -1;
2317
2318 // This is a trick. We double the stack slots so we can claim
2319 // the oops in the caller's frame. Since we are sure to have
2320 // more args than the caller doubling is enough to make
2321 // sure we can capture all the incoming oop args from the
2322 // caller.
2323 //
2324 OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
2325
2326 // Mark location of rbp (someday)
2327 // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(rbp));
2328
2329 // Use eax, ebx as temporaries during any memory-memory moves we have to do
2330 // All inbound args are referenced based on rbp and all outbound args via rsp.
2331
2332
2333
2334 #ifdef ASSERT
2335 bool reg_destroyed[RegisterImpl::number_of_registers];
2336 bool freg_destroyed[FloatRegisterImpl::number_of_registers];
2337 for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
2338 reg_destroyed[r] = false;
2339 }
2340 for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
2341 freg_destroyed[f] = false;
2342 }
2343
2344 #endif /* ASSERT */
2345
2346 // We know that we only have args in at most two integer registers (ecx, edx). So eax, ebx
2347 // Are free to temporaries if we have to do stack to steck moves.
2348 // All inbound args are referenced based on ebp and all outbound args via esp.
2349
2350 // This may iterate in two different directions depending on the
2351 // kind of native it is. The reason is that for regular JNI natives
2352 // the incoming and outgoing registers are offset upwards and for
2353 // critical natives they are offset down.
2354 GrowableArray<int> arg_order(2 * total_in_args);
2355 VMRegPair tmp_vmreg;
2356 // tmp_vmreg.set1(rbx->as_VMReg());
2357 tmp_vmreg.set1(T8->as_VMReg());
2358
2359 if (!is_critical_native) {
2360 for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) {
2361 arg_order.push(i);
2362 arg_order.push(c_arg);
2363 }
2364 } else {
2365 // Compute a valid move order, using tmp_vmreg to break any cycles
2366 __ stop("generate_native_wrapper in sharedRuntime <2>");
2367 //TODO:Fu
2368 // ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg);
2369 }
2370
2371 int temploc = -1;
2372 for (int ai = 0; ai < arg_order.length(); ai += 2) {
2373 int i = arg_order.at(ai);
2374 int c_arg = arg_order.at(ai + 1);
2375 __ block_comment(err_msg("move %d -> %d", i, c_arg));
2376 if (c_arg == -1) {
2377 assert(is_critical_native, "should only be required for critical natives");
2378 // This arg needs to be moved to a temporary
2379 __ move(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register());
2380 in_regs[i] = tmp_vmreg;
2381 temploc = i;
2382 continue;
2383 } else if (i == -1) {
2384 assert(is_critical_native, "should only be required for critical natives");
2385 // Read from the temporary location
2386 assert(temploc != -1, "must be valid");
2387 i = temploc;
2388 temploc = -1;
2389 }
2390 #ifdef ASSERT
2391 if (in_regs[i].first()->is_Register()) {
2392 assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
2393 } else if (in_regs[i].first()->is_FloatRegister()) {
2394 assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!");
2395 }
2396 if (out_regs[c_arg].first()->is_Register()) {
2397 reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
2398 } else if (out_regs[c_arg].first()->is_FloatRegister()) {
2399 freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
2400 }
2401 #endif /* ASSERT */
2402 switch (in_sig_bt[i]) {
2403 case T_ARRAY:
2404 if (is_critical_native) {
2405 __ stop("generate_native_wrapper in sharedRuntime <2>");
2406 //TODO:Fu
2407 // unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);
2408 c_arg++;
2409 #ifdef ASSERT
2410 if (out_regs[c_arg].first()->is_Register()) {
2411 reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
2412 } else if (out_regs[c_arg].first()->is_FloatRegister()) {
2413 freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
2414 }
2415 #endif
2416 break;
2417 }
2418 case T_OBJECT:
2419 assert(!is_critical_native, "no oop arguments");
2420 object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
2421 ((i == 0) && (!is_static)),
2422 &receiver_offset);
2423 break;
2424 case T_VOID:
2425 break;
2426
2427 case T_FLOAT:
2428 float_move(masm, in_regs[i], out_regs[c_arg]);
2429 break;
2430
2431 case T_DOUBLE:
2432 assert( i + 1 < total_in_args &&
2433 in_sig_bt[i + 1] == T_VOID &&
2434 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
2435 double_move(masm, in_regs[i], out_regs[c_arg]);
2436 break;
2437
2438 case T_LONG :
2439 long_move(masm, in_regs[i], out_regs[c_arg]);
2440 break;
2441
2442 case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
2443
2444 default:
2445 // move32_64(masm, in_regs[i], out_regs[c_arg]);
2446 simple_move32(masm, in_regs[i], out_regs[c_arg]);
2447 }
2448 }
2449
2450 // point c_arg at the first arg that is already loaded in case we
2451 // need to spill before we call out
2452 c_arg = total_c_args - total_in_args;
2453 // Pre-load a static method's oop into esi. Used both by locking code and
2454 // the normal JNI call code.
2455
2456 __ move(oop_handle_reg, A1);
2457
2458 if (method->is_static() && !is_critical_native) {
2459
2460 // load opp into a register
2461 int oop_index = __ oop_recorder()->find_index(JNIHandles::make_local(
2462 (method->method_holder())->java_mirror()));
2463
2464
2465 RelocationHolder rspec = oop_Relocation::spec(oop_index);
2466 __ relocate(rspec);
2467 //__ lui(oop_handle_reg, Assembler::split_high((int)JNIHandles::make_local(
2468 // Klass::cast(method->method_holder())->java_mirror())));
2469 //__ addiu(oop_handle_reg, oop_handle_reg, Assembler::split_low((int)
2470 // JNIHandles::make_local(Klass::cast(method->method_holder())->java_mirror())));
2471 __ li48(oop_handle_reg, (long)JNIHandles::make_local((method->method_holder())->java_mirror()));
2472 // __ verify_oop(oop_handle_reg);
2473 // Now handlize the static class mirror it's known not-null.
2474 __ sd( oop_handle_reg, SP, klass_offset);
2475 map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
2476
2477 // Now get the handle
2478 __ lea(oop_handle_reg, Address(SP, klass_offset));
2479 // store the klass handle as second argument
2480 __ move(A1, oop_handle_reg);
2481 // and protect the arg if we must spill
2482 c_arg--;
2483 }
2484 // Change state to native (we save the return address in the thread, since it might not
2485 // be pushed on the stack when we do a a stack traversal). It is enough that the pc()
2486 // points into the right code segment. It does not have to be the correct return pc.
2487 // We use the same pc/oopMap repeatedly when we call out
2488
2489 intptr_t the_pc = (intptr_t) __ pc();
2490
2491 oop_maps->add_gc_map(the_pc - start, map);
2492
2493 //__ set_last_Java_frame(thread, esp, noreg, (address)the_pc);
2494 __ set_last_Java_frame(SP, noreg, NULL);
2495 __ relocate(relocInfo::internal_pc_type);
2496 {
2497 intptr_t save_pc = (intptr_t)the_pc ;
2498 __ li48(AT, save_pc);
2499 }
2500 __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
2501
2502
2503 // We have all of the arguments setup at this point. We must not touch any register
2504 // argument registers at this point (what if we save/restore them there are no oop?
2505 {
2506 SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
2507 int metadata_index = __ oop_recorder()->find_index(method());
2508 RelocationHolder rspec = metadata_Relocation::spec(metadata_index);
2509 __ relocate(rspec);
2510 //__ lui(T6, Assembler::split_high((int)JNIHandles::make_local(method())));
2511 //__ addiu(T6, T6, Assembler::split_low((int)JNIHandles::make_local(method())));
2512 __ li48(AT, (long)(method()));
2513
2514 __ call_VM_leaf(
2515 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
2516 thread, AT);
2517
2518 }
2519
2520 // These are register definitions we need for locking/unlocking
2521 // const Register swap_reg = eax; // Must use eax for cmpxchg instruction
2522 // const Register obj_reg = ecx; // Will contain the oop
2523 // const Register lock_reg = edx; // Address of compiler lock object (BasicLock)
2524 //FIXME, I hava no idea which register to use
2525 const Register swap_reg = T8; // Must use eax for cmpxchg instruction
2526 const Register obj_reg = T9; // Will contain the oop
2527 //const Register lock_reg = T6; // Address of compiler lock object (BasicLock)
2528 const Register lock_reg = c_rarg0; // Address of compiler lock object (BasicLock)
2529
2530
2531
2532 Label slow_path_lock;
2533 Label lock_done;
2534
2535 // Lock a synchronized method
2536 if (method->is_synchronized()) {
2537 assert(!is_critical_native, "unhandled");
2538
2539 const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
2540
2541 // Get the handle (the 2nd argument)
2542 __ move(oop_handle_reg, A1);
2543
2544 // Get address of the box
2545 __ lea(lock_reg, Address(FP, lock_slot_ebp_offset));
2546
2547 // Load the oop from the handle
2548 __ ld(obj_reg, oop_handle_reg, 0);
2549
2550 if (UseBiasedLocking) {
2551 // Note that oop_handle_reg is trashed during this call
2552 __ biased_locking_enter(lock_reg, obj_reg, swap_reg, A1,
2553 false, lock_done, &slow_path_lock);
2554 }
2555
2556 // Load immediate 1 into swap_reg %eax
2557 __ move(swap_reg, 1);
2558
2559 __ ld(AT, obj_reg, 0);
2560 __ orr(swap_reg, swap_reg, AT);
2561
2562 __ sd( swap_reg, lock_reg, mark_word_offset);
2563 __ cmpxchg(lock_reg, Address(obj_reg, 0), swap_reg);
2564 __ bne(AT, R0, lock_done);
2565 __ delayed()->nop();
2566 // Test if the oopMark is an obvious stack pointer, i.e.,
2567 // 1) (mark & 3) == 0, and
2568 // 2) esp <= mark < mark + os::pagesize()
2569 // These 3 tests can be done by evaluating the following
2570 // expression: ((mark - esp) & (3 - os::vm_page_size())),
2571 // assuming both stack pointer and pagesize have their
2572 // least significant 2 bits clear.
2573 // NOTE: the oopMark is in swap_reg %eax as the result of cmpxchg
2574
2575 __ dsub(swap_reg, swap_reg,SP);
2576 __ move(AT, 3 - os::vm_page_size());
2577 __ andr(swap_reg , swap_reg, AT);
2578 // Save the test result, for recursive case, the result is zero
2579 __ sd(swap_reg, lock_reg, mark_word_offset);
2580 //FIXME here, Why notEqual?
2581 __ bne(swap_reg,R0, slow_path_lock);
2582 __ delayed()->nop();
2583 // Slow path will re-enter here
2584 __ bind(lock_done);
2585
2586 if (UseBiasedLocking) {
2587 // Re-fetch oop_handle_reg as we trashed it above
2588 __ move(A1, oop_handle_reg);
2589 }
2590 }
2591
2592
2593 // Finally just about ready to make the JNI call
2594
2595
2596 // get JNIEnv* which is first argument to native
2597 if (!is_critical_native) {
2598 __ addi(A0, thread, in_bytes(JavaThread::jni_environment_offset()));
2599 }
2600
2601 // Example: Java_java_lang_ref_Finalizer_invokeFinalizeMethod(JNIEnv *env, jclass clazz, jobject ob)
2602 /* Load the second arguments into A1 */
2603 //__ ld(A1, SP , wordSize ); // klass
2604
2605 // Now set thread in native
2606 __ addi(AT, R0, _thread_in_native);
2607 __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset()));
2608 /* Jin: do the call */
2609 __ call(method->native_function(), relocInfo::runtime_call_type);
2610 __ delayed()->nop();
2611 // WARNING - on Windows Java Natives use pascal calling convention and pop the
2612 // arguments off of the stack. We could just re-adjust the stack pointer here
2613 // and continue to do SP relative addressing but we instead switch to FP
2614 // relative addressing.
2615
2616 // Unpack native results.
2617 switch (ret_type) {
2618 case T_BOOLEAN: __ c2bool(V0); break;
2619 case T_CHAR : __ andi(V0,V0, 0xFFFF); break;
2620 case T_BYTE : __ sign_extend_byte (V0); break;
2621 case T_SHORT : __ sign_extend_short(V0); break;
2622 case T_INT : // nothing to do break;
2623 case T_DOUBLE :
2624 case T_FLOAT :
2625 // Result is in st0 we'll save as needed
2626 break;
2627 case T_ARRAY: // Really a handle
2628 case T_OBJECT: // Really a handle
2629 break; // can't de-handlize until after safepoint check
2630 case T_VOID: break;
2631 case T_LONG: break;
2632 default : ShouldNotReachHere();
2633 }
2634 // Switch thread to "native transition" state before reading the synchronization state.
2635 // This additional state is necessary because reading and testing the synchronization
2636 // state is not atomic w.r.t. GC, as this scenario demonstrates:
2637 // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
2638 // VM thread changes sync state to synchronizing and suspends threads for GC.
2639 // Thread A is resumed to finish this native method, but doesn't block here since it
2640 // didn't see any synchronization is progress, and escapes.
2641 // __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native_trans);
2642 //__ sw(_thread_in_native_trans, thread, JavaThread::thread_state_offset());
2643 // __ move(AT, (int)_thread_in_native_trans);
2644 __ addi(AT, R0, _thread_in_native_trans);
2645 __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset()));
2646
2647 Label after_transition;
2648
2649 // check for safepoint operation in progress and/or pending suspend requests
2650 { Label Continue;
2651 //FIXME here, which regiser should we use?
2652 // SafepointSynchronize::_not_synchronized);
2653 __ li(AT, SafepointSynchronize::address_of_state());
2654 __ lw(A0, AT, 0);
2655 __ addi(AT, A0, -SafepointSynchronize::_not_synchronized);
2656 Label L;
2657 __ bne(AT,R0, L);
2658 __ delayed()->nop();
2659 __ lw(AT, thread, in_bytes(JavaThread::suspend_flags_offset()));
2660 __ beq(AT, R0, Continue);
2661 __ delayed()->nop();
2662 __ bind(L);
2663
2664 // Don't use call_VM as it will see a possible pending exception and forward it
2665 // and never return here preventing us from clearing _last_native_pc down below.
2666 // Also can't use call_VM_leaf either as it will check to see if esi & edi are
2667 // preserved and correspond to the bcp/locals pointers. So we do a runtime call
2668 // by hand.
2669 //
2670 save_native_result(masm, ret_type, stack_slots);
2671 __ move (A0, thread);
2672 __ addi(SP,SP, -wordSize);
2673 if (!is_critical_native) {
2674 __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::runtime_call_type);
2675 __ delayed()->nop();
2676 } else {
2677 __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition), relocInfo::runtime_call_type);
2678 __ delayed()->nop();
2679 }
2680 // __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::runtime_call_type);
2681 // __ delayed()->nop();
2682 __ addi(SP,SP, wordSize);
2683 //add for compressedoops
2684 __ reinit_heapbase();
2685 // Restore any method result value
2686 restore_native_result(masm, ret_type, stack_slots);
2687
2688 if (is_critical_native) {
2689 // The call above performed the transition to thread_in_Java so
2690 // skip the transition logic below.
2691 __ beq(R0, R0, after_transition);
2692 __ delayed()->nop();
2693 }
2694
2695 __ bind(Continue);
2696 }
2697
2698 // change thread state
2699 __ addi(AT, R0, _thread_in_Java);
2700 __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset()));
2701 __ bind(after_transition);
2702 Label reguard;
2703 Label reguard_done;
2704 __ ld(AT, thread, in_bytes(JavaThread::stack_guard_state_offset()));
2705 __ addi(AT, AT, -JavaThread::stack_guard_yellow_disabled);
2706 __ beq(AT, R0, reguard);
2707 __ delayed()->nop();
2708 // slow path reguard re-enters here
2709 __ bind(reguard_done);
2710
2711 // Handle possible exception (will unlock if necessary)
2712
2713 // native result if any is live
2714
2715 // Unlock
2716 Label slow_path_unlock;
2717 Label unlock_done;
2718 if (method->is_synchronized()) {
2719
2720 Label done;
2721
2722 // Get locked oop from the handle we passed to jni
2723 __ ld( obj_reg, oop_handle_reg, 0);
2724 //FIXME
2725 if (UseBiasedLocking) {
2726 __ biased_locking_exit(obj_reg, T8, done);
2727
2728 }
2729
2730 // Simple recursive lock?
2731
2732 __ ld(AT, FP, lock_slot_ebp_offset);
2733 __ beq(AT, R0, done);
2734 __ delayed()->nop();
2735 // Must save eax if if it is live now because cmpxchg must use it
2736 if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
2737 save_native_result(masm, ret_type, stack_slots);
2738 }
2739
2740 // get old displaced header
2741 __ ld (T8, FP, lock_slot_ebp_offset);
2742 // get address of the stack lock
2743 //FIXME aoqi
2744 //__ addi (T6, FP, lock_slot_ebp_offset);
2745 __ addi (c_rarg0, FP, lock_slot_ebp_offset);
2746 // Atomic swap old header if oop still contains the stack lock
2747 //FIXME aoqi
2748 //__ cmpxchg(T8, Address(obj_reg, 0),T6 );
2749 __ cmpxchg(T8, Address(obj_reg, 0), c_rarg0);
2750
2751 __ beq(AT, R0, slow_path_unlock);
2752 __ delayed()->nop();
2753 // slow path re-enters here
2754 __ bind(unlock_done);
2755 if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
2756 restore_native_result(masm, ret_type, stack_slots);
2757 }
2758
2759 __ bind(done);
2760
2761 }
2762 {
2763 SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
2764 // Tell dtrace about this method exit
2765 save_native_result(masm, ret_type, stack_slots);
2766 int metadata_index = __ oop_recorder()->find_index( (method()));
2767 RelocationHolder rspec = metadata_Relocation::spec(metadata_index);
2768 __ relocate(rspec);
2769 //__ lui(T6, Assembler::split_high((int)JNIHandles::make_local(method())));
2770 //__ addiu(T6, T6, Assembler::split_low((int)JNIHandles::make_local(method())));
2771 __ li48(AT, (long)(method()));
2772
2773 __ call_VM_leaf(
2774 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
2775 thread, AT);
2776 restore_native_result(masm, ret_type, stack_slots);
2777 }
2778
2779 // We can finally stop using that last_Java_frame we setup ages ago
2780
2781 __ reset_last_Java_frame(false, true);
2782
2783 // Unpack oop result
2784 if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
2785 Label L;
2786 // __ cmpl(eax, NULL_WORD);
2787 // __ jcc(Assembler::equal, L);
2788 __ beq(V0, R0,L );
2789 __ delayed()->nop();
2790 // __ movl(eax, Address(eax));
2791 __ ld(V0, V0, 0);
2792 __ bind(L);
2793 // __ verify_oop(eax);
2794 __ verify_oop(V0);
2795 }
2796
2797 if (!is_critical_native) {
2798 // reset handle block
2799 __ ld(AT, thread, in_bytes(JavaThread::active_handles_offset()));
2800 __ sw(R0, AT, JNIHandleBlock::top_offset_in_bytes());
2801 }
2802
2803 if (!is_critical_native) {
2804 // Any exception pending?
2805 __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
2806
2807 __ bne(AT, R0, exception_pending);
2808 __ delayed()->nop();
2809 }
2810 // no exception, we're almost done
2811
2812 // check that only result value is on FPU stack
2813 __ verify_FPU(ret_type == T_FLOAT || ret_type == T_DOUBLE ? 1 : 0, "native_wrapper normal exit");
2814
2815 // Fixup floating pointer results so that result looks like a return from a compiled method
2816 /* if (ret_type == T_FLOAT) {
2817 if (UseSSE >= 1) {
2818 // Pop st0 and store as float and reload into xmm register
2819 __ fstp_s(Address(ebp, -4));
2820 __ movss(xmm0, Address(ebp, -4));
2821 }
2822 } else if (ret_type == T_DOUBLE) {
2823 if (UseSSE >= 2) {
2824 // Pop st0 and store as double and reload into xmm register
2825 __ fstp_d(Address(ebp, -8));
2826 __ movsd(xmm0, Address(ebp, -8));
2827 }
2828 }
2829 */
2830 // Return
2831 #ifndef OPT_THREAD
2832 __ get_thread(TREG);
2833 #endif
2834 __ ld_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset()));
2835 __ leave();
2836
2837 __ jr(RA);
2838 __ delayed()->nop();
2839 // Unexpected paths are out of line and go here
2840 /*
2841 if (!is_critical_native) {
2842 // forward the exception
2843 __ bind(exception_pending);
2844
2845 // and forward the exception
2846 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
2847 }
2848 */
2849 // Slow path locking & unlocking
2850 if (method->is_synchronized()) {
2851
2852 // BEGIN Slow path lock
2853
2854 __ bind(slow_path_lock);
2855
2856 // protect the args we've loaded
2857 save_args(masm, total_c_args, c_arg, out_regs);
2858
2859 // has last_Java_frame setup. No exceptions so do vanilla call not call_VM
2860 // args are (oop obj, BasicLock* lock, JavaThread* thread)
2861
2862 __ move(A0, obj_reg);
2863 __ move(A1, lock_reg);
2864 __ move(A2, thread);
2865 __ addi(SP, SP, - 3*wordSize);
2866
2867 __ move(AT, -(StackAlignmentInBytes));
2868 __ move(S2, SP); // use S2 as a sender SP holder
2869 __ andr(SP, SP, AT); // align stack as required by ABI
2870
2871 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type);
2872 __ delayed()->nop();
2873 __ move(SP, S2);
2874 __ addi(SP, SP, 3*wordSize);
2875
2876 restore_args(masm, total_c_args, c_arg, out_regs);
2877
2878 #ifdef ASSERT
2879 { Label L;
2880 // __ cmpl(Address(thread, in_bytes(Thread::pending_exception_offset())), (int)NULL_WORD);
2881 __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
2882 //__ jcc(Assembler::equal, L);
2883 __ beq(AT, R0, L);
2884 __ delayed()->nop();
2885 __ stop("no pending exception allowed on exit from monitorenter");
2886 __ bind(L);
2887 }
2888 #endif
2889 __ b(lock_done);
2890 __ delayed()->nop();
2891 // END Slow path lock
2892
2893 // BEGIN Slow path unlock
2894 __ bind(slow_path_unlock);
2895
2896 // Slow path unlock
2897
2898 if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
2899 save_native_result(masm, ret_type, stack_slots);
2900 }
2901 // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
2902
2903 __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
2904 __ push(AT);
2905 __ sd(R0, thread, in_bytes(Thread::pending_exception_offset()));
2906
2907 __ move(AT, -(StackAlignmentInBytes));
2908 __ move(S2, SP); // use S2 as a sender SP holder
2909 __ andr(SP, SP, AT); // align stack as required by ABI
2910
2911 // should be a peal
2912 // +wordSize because of the push above
2913 __ addi(A1, FP, lock_slot_ebp_offset);
2914
2915 __ move(A0, obj_reg);
2916 __ addi(SP,SP, -2*wordSize);
2917 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C),
2918 relocInfo::runtime_call_type);
2919 __ delayed()->nop();
2920 __ addi(SP,SP, 2*wordSize);
2921 __ move(SP, S2);
2922 //add for compressedoops
2923 __ reinit_heapbase();
2924 #ifdef ASSERT
2925 {
2926 Label L;
2927 // __ cmpl(Address(thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD);
2928 __ lw( AT, thread, in_bytes(Thread::pending_exception_offset()));
2929 //__ jcc(Assembler::equal, L);
2930 __ beq(AT, R0, L);
2931 __ delayed()->nop();
2932 __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
2933 __ bind(L);
2934 }
2935 #endif /* ASSERT */
2936
2937 __ pop(AT);
2938 __ sd(AT, thread, in_bytes(Thread::pending_exception_offset()));
2939 if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
2940 restore_native_result(masm, ret_type, stack_slots);
2941 }
2942 __ b(unlock_done);
2943 __ delayed()->nop();
2944 // END Slow path unlock
2945
2946 }
2947
2948 // SLOW PATH Reguard the stack if needed
2949
2950 __ bind(reguard);
2951 save_native_result(masm, ret_type, stack_slots);
2952 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages),
2953 relocInfo::runtime_call_type);
2954 __ delayed()->nop();
2955 //add for compressedoops
2956 __ reinit_heapbase();
2957 restore_native_result(masm, ret_type, stack_slots);
2958 __ b(reguard_done);
2959 __ delayed()->nop();
2960
2961 // BEGIN EXCEPTION PROCESSING
2962 if (!is_critical_native) {
2963 // Forward the exception
2964 __ bind(exception_pending);
2965
2966 // remove possible return value from FPU register stack
2967 __ empty_FPU_stack();
2968
2969 // pop our frame
2970 //forward_exception_entry need return address on stack
2971 __ addiu(SP, FP, wordSize);
2972 __ ld(FP, SP, (-1) * wordSize);
2973
2974 // and forward the exception
2975 __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
2976 __ delayed()->nop();
2977 }
2978 __ flush();
2979
2980 nmethod *nm = nmethod::new_native_nmethod(method,
2981 compile_id,
2982 masm->code(),
2983 vep_offset,
2984 frame_complete,
2985 stack_slots / VMRegImpl::slots_per_word,
2986 (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
2987 in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
2988 oop_maps);
2989
2990 if (is_critical_native) {
2991 nm->set_lazy_critical_native(true);
2992 }
2993 return nm;
2994
2995
2996 }
2997
2998 #ifdef HAVE_DTRACE_H
2999 // ---------------------------------------------------------------------------
3000 // Generate a dtrace nmethod for a given signature. The method takes arguments
3001 // in the Java compiled code convention, marshals them to the native
3002 // abi and then leaves nops at the position you would expect to call a native
3003 // function. When the probe is enabled the nops are replaced with a trap
3004 // instruction that dtrace inserts and the trace will cause a notification
3005 // to dtrace.
3006 //
3007 // The probes are only able to take primitive types and java/lang/String as
3008 // arguments. No other java types are allowed. Strings are converted to utf8
3009 // strings so that from dtrace point of view java strings are converted to C
3010 // strings. There is an arbitrary fixed limit on the total space that a method
3011 // can use for converting the strings. (256 chars per string in the signature).
3012 // So any java string larger then this is truncated.
3013
3014 static int fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 };
3015 static bool offsets_initialized = false;
3016
3017 static VMRegPair reg64_to_VMRegPair(Register r) {
3018 VMRegPair ret;
3019 if (wordSize == 8) {
3020 ret.set2(r->as_VMReg());
3021 } else {
3022 ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg());
3023 }
3024 return ret;
3025 }
3026
3027
3028 nmethod *SharedRuntime::generate_dtrace_nmethod(
3029 MacroAssembler *masm, methodHandle method) {
3030
3031
3032 // generate_dtrace_nmethod is guarded by a mutex so we are sure to
3033 // be single threaded in this method.
3034 assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be");
3035
3036 // Fill in the signature array, for the calling-convention call.
3037 int total_args_passed = method->size_of_parameters();
3038
3039 BasicType* in_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed);
3040 VMRegPair *in_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed);
3041
3042 // The signature we are going to use for the trap that dtrace will see
3043 // java/lang/String is converted. We drop "this" and any other object
3044 // is converted to NULL. (A one-slot java/lang/Long object reference
3045 // is converted to a two-slot long, which is why we double the allocation).
3046 BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2);
3047 VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2);
3048
3049 int i=0;
3050 int total_strings = 0;
3051 int first_arg_to_pass = 0;
3052 int total_c_args = 0;
3053
3054 // Skip the receiver as dtrace doesn't want to see it
3055 if( !method->is_static() ) {
3056 in_sig_bt[i++] = T_OBJECT;
3057 first_arg_to_pass = 1;
3058 }
3059
3060 SignatureStream ss(method->signature());
3061 for ( ; !ss.at_return_type(); ss.next()) {
3062 BasicType bt = ss.type();
3063 in_sig_bt[i++] = bt; // Collect remaining bits of signature
3064 out_sig_bt[total_c_args++] = bt;
3065 if( bt == T_OBJECT) {
3066 symbolOop s = ss.as_symbol_or_null();
3067 if (s == vmSymbols::java_lang_String()) {
3068 total_strings++;
3069 out_sig_bt[total_c_args-1] = T_ADDRESS;
3070 } else if (s == vmSymbols::java_lang_Boolean() ||
3071 s == vmSymbols::java_lang_Byte()) {
3072 out_sig_bt[total_c_args-1] = T_BYTE;
3073 } else if (s == vmSymbols::java_lang_Character() ||
3074 s == vmSymbols::java_lang_Short()) {
3075 out_sig_bt[total_c_args-1] = T_SHORT;
3076 } else if (s == vmSymbols::java_lang_Integer() ||
3077 s == vmSymbols::java_lang_Float()) {
3078 out_sig_bt[total_c_args-1] = T_INT;
3079 } else if (s == vmSymbols::java_lang_Long() ||
3080 s == vmSymbols::java_lang_Double()) {
3081 out_sig_bt[total_c_args-1] = T_LONG;
3082 out_sig_bt[total_c_args++] = T_VOID;
3083 }
3084 } else if ( bt == T_LONG || bt == T_DOUBLE ) {
3085 in_sig_bt[i++] = T_VOID; // Longs & doubles take 2 Java slots
3086 // We convert double to long
3087 out_sig_bt[total_c_args-1] = T_LONG;
3088 out_sig_bt[total_c_args++] = T_VOID;
3089 } else if ( bt == T_FLOAT) {
3090 // We convert float to int
3091 out_sig_bt[total_c_args-1] = T_INT;
3092 }
3093 }
3094
3095 assert(i==total_args_passed, "validly parsed signature");
3096
3097 // Now get the compiled-Java layout as input arguments
3098 int comp_args_on_stack;
3099 comp_args_on_stack = SharedRuntime::java_calling_convention(
3100 in_sig_bt, in_regs, total_args_passed, false);
3101
3102 // We have received a description of where all the java arg are located
3103 // on entry to the wrapper. We need to convert these args to where
3104 // the a native (non-jni) function would expect them. To figure out
3105 // where they go we convert the java signature to a C signature and remove
3106 // T_VOID for any long/double we might have received.
3107
3108
3109 // Now figure out where the args must be stored and how much stack space
3110 // they require (neglecting out_preserve_stack_slots but space for storing
3111 // the 1st six register arguments). It's weird see int_stk_helper.
3112 //
3113 int out_arg_slots;
3114 out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
3115
3116 // Calculate the total number of stack slots we will need.
3117
3118 // First count the abi requirement plus all of the outgoing args
3119 int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
3120
3121 // Plus a temp for possible converion of float/double/long register args
3122
3123 int conversion_temp = stack_slots;
3124 stack_slots += 2;
3125
3126
3127 // Now space for the string(s) we must convert
3128
3129 int string_locs = stack_slots;
3130 stack_slots += total_strings *
3131 (max_dtrace_string_size / VMRegImpl::stack_slot_size);
3132
3133 // Ok The space we have allocated will look like:
3134 //
3135 //
3136 // FP-> | |
3137 // |---------------------|
3138 // | string[n] |
3139 // |---------------------| <- string_locs[n]
3140 // | string[n-1] |
3141 // |---------------------| <- string_locs[n-1]
3142 // | ... |
3143 // | ... |
3144 // |---------------------| <- string_locs[1]
3145 // | string[0] |
3146 // |---------------------| <- string_locs[0]
3147 // | temp |
3148 // |---------------------| <- conversion_temp
3149 // | outbound memory |
3150 // | based arguments |
3151 // | |
3152 // |---------------------|
3153 // | |
3154 // SP-> | out_preserved_slots |
3155 //
3156 //
3157
3158 // Now compute actual number of stack words we need rounding to make
3159 // stack properly aligned.
3160 stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word);
3161
3162 int stack_size = stack_slots * VMRegImpl::stack_slot_size;
3163
3164 intptr_t start = (intptr_t)__ pc();
3165
3166 // First thing make an ic check to see if we should even be here
3167
3168 {
3169 Label L;
3170 const Register temp_reg = G3_scratch;
3171 Address ic_miss(temp_reg, SharedRuntime::get_ic_miss_stub());
3172 __ verify_oop(O0);
3173 __ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), temp_reg);
3174 __ cmp(temp_reg, G5_inline_cache_reg);
3175 __ brx(Assembler::equal, true, Assembler::pt, L);
3176 __ delayed()->nop();
3177
3178 __ jump_to(ic_miss, 0);
3179 __ delayed()->nop();
3180 __ align(CodeEntryAlignment);
3181 __ bind(L);
3182 }
3183
3184 int vep_offset = ((intptr_t)__ pc()) - start;
3185
3186
3187 // The instruction at the verified entry point must be 5 bytes or longer
3188 // because it can be patched on the fly by make_non_entrant. The stack bang
3189 // instruction fits that requirement.
3190
3191 // Generate stack overflow check before creating frame
3192 __ generate_stack_overflow_check(stack_size);
3193
3194 assert(((intptr_t)__ pc() - start - vep_offset) >= 5,
3195 "valid size for make_non_entrant");
3196
3197 // Generate a new frame for the wrapper.
3198 __ save(SP, -stack_size, SP);
3199
3200 // Frame is now completed as far a size and linkage.
3201
3202 int frame_complete = ((intptr_t)__ pc()) - start;
3203
3204 #ifdef ASSERT
3205 bool reg_destroyed[RegisterImpl::number_of_registers];
3206 bool freg_destroyed[FloatRegisterImpl::number_of_registers];
3207 for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
3208 reg_destroyed[r] = false;
3209 }
3210 for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
3211 freg_destroyed[f] = false;
3212 }
3213
3214 #endif /* ASSERT */
3215
3216 VMRegPair zero;
3217 const Register g0 = G0; // without this we get a compiler warning (why??)
3218 zero.set2(g0->as_VMReg());
3219
3220 int c_arg, j_arg;
3221
3222 Register conversion_off = noreg;
3223
3224 for (j_arg = first_arg_to_pass, c_arg = 0 ;
3225 j_arg < total_args_passed ; j_arg++, c_arg++ ) {
3226
3227 VMRegPair src = in_regs[j_arg];
3228 VMRegPair dst = out_regs[c_arg];
3229
3230 #ifdef ASSERT
3231 if (src.first()->is_Register()) {
3232 assert(!reg_destroyed[src.first()->as_Register()->encoding()], "ack!");
3233 } else if (src.first()->is_FloatRegister()) {
3234 assert(!freg_destroyed[src.first()->as_FloatRegister()->encoding(
3235 FloatRegisterImpl::S)], "ack!");
3236 }
3237 if (dst.first()->is_Register()) {
3238 reg_destroyed[dst.first()->as_Register()->encoding()] = true;
3239 } else if (dst.first()->is_FloatRegister()) {
3240 freg_destroyed[dst.first()->as_FloatRegister()->encoding(
3241 FloatRegisterImpl::S)] = true;
3242 }
3243 #endif /* ASSERT */
3244
3245 switch (in_sig_bt[j_arg]) {
3246 case T_ARRAY:
3247 case T_OBJECT:
3248 {
3249 if (out_sig_bt[c_arg] == T_BYTE || out_sig_bt[c_arg] == T_SHORT ||
3250 out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) {
3251 // need to unbox a one-slot value
3252 Register in_reg = L0;
3253 Register tmp = L2;
3254 if ( src.first()->is_reg() ) {
3255 in_reg = src.first()->as_Register();
3256 } else {
3257 assert(Assembler::is_simm13(reg2offset(src.first()) + STACK_BIAS),
3258 "must be");
3259 __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, in_reg);
3260 }
3261 // If the final destination is an acceptable register
3262 if ( dst.first()->is_reg() ) {
3263 if ( dst.is_single_phys_reg() || out_sig_bt[c_arg] != T_LONG ) {
3264 tmp = dst.first()->as_Register();
3265 }
3266 }
3267
3268 Label skipUnbox;
3269 if ( wordSize == 4 && out_sig_bt[c_arg] == T_LONG ) {
3270 __ mov(G0, tmp->successor());
3271 }
3272 __ br_null(in_reg, true, Assembler::pn, skipUnbox);
3273 __ delayed()->mov(G0, tmp);
3274
3275 BasicType bt = out_sig_bt[c_arg];
3276 int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt);
3277 switch (bt) {
3278 case T_BYTE:
3279 __ ldub(in_reg, box_offset, tmp); break;
3280 case T_SHORT:
3281 __ lduh(in_reg, box_offset, tmp); break;
3282 case T_INT:
3283 __ ld(in_reg, box_offset, tmp); break;
3284 case T_LONG:
3285 __ ld_long(in_reg, box_offset, tmp); break;
3286 default: ShouldNotReachHere();
3287 }
3288
3289 __ bind(skipUnbox);
3290 // If tmp wasn't final destination copy to final destination
3291 if (tmp == L2) {
3292 VMRegPair tmp_as_VM = reg64_to_VMRegPair(L2);
3293 if (out_sig_bt[c_arg] == T_LONG) {
3294 long_move(masm, tmp_as_VM, dst);
3295 } else {
3296 move32_64(masm, tmp_as_VM, out_regs[c_arg]);
3297 }
3298 }
3299 if (out_sig_bt[c_arg] == T_LONG) {
3300 assert(out_sig_bt[c_arg+1] == T_VOID, "must be");
3301 ++c_arg; // move over the T_VOID to keep the loop indices in sync
3302 }
3303 } else if (out_sig_bt[c_arg] == T_ADDRESS) {
3304 Register s =
3305 src.first()->is_reg() ? src.first()->as_Register() : L2;
3306 Register d =
3307 dst.first()->is_reg() ? dst.first()->as_Register() : L2;
3308
3309 // We store the oop now so that the conversion pass can reach
3310 // while in the inner frame. This will be the only store if
3311 // the oop is NULL.
3312 if (s != L2) {
3313 // src is register
3314 if (d != L2) {
3315 // dst is register
3316 __ mov(s, d);
3317 } else {
3318 assert(Assembler::is_simm13(reg2offset(dst.first()) +
3319 STACK_BIAS), "must be");
3320 __ st_ptr(s, SP, reg2offset(dst.first()) + STACK_BIAS);
3321 }
3322 } else {
3323 // src not a register
3324 assert(Assembler::is_simm13(reg2offset(src.first()) +
3325 STACK_BIAS), "must be");
3326 __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, d);
3327 if (d == L2) {
3328 assert(Assembler::is_simm13(reg2offset(dst.first()) +
3329 STACK_BIAS), "must be");
3330 __ st_ptr(d, SP, reg2offset(dst.first()) + STACK_BIAS);
3331 }
3332 }
3333 } else if (out_sig_bt[c_arg] != T_VOID) {
3334 // Convert the arg to NULL
3335 if (dst.first()->is_reg()) {
3336 __ mov(G0, dst.first()->as_Register());
3337 } else {
3338 assert(Assembler::is_simm13(reg2offset(dst.first()) +
3339 STACK_BIAS), "must be");
3340 __ st_ptr(G0, SP, reg2offset(dst.first()) + STACK_BIAS);
3341 }
3342 }
3343 }
3344 break;
3345 case T_VOID:
3346 break;
3347
3348 case T_FLOAT:
3349 if (src.first()->is_stack()) {
3350 // Stack to stack/reg is simple
3351 move32_64(masm, src, dst);
3352 } else {
3353 if (dst.first()->is_reg()) {
3354 // freg -> reg
3355 int off =
3356 STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
3357 Register d = dst.first()->as_Register();
3358 if (Assembler::is_simm13(off)) {
3359 __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
3360 SP, off);
3361 __ ld(SP, off, d);
3362 } else {
3363 if (conversion_off == noreg) {
3364 __ set(off, L6);
3365 conversion_off = L6;
3366 }
3367 __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
3368 SP, conversion_off);
3369 __ ld(SP, conversion_off , d);
3370 }
3371 } else {
3372 // freg -> mem
3373 int off = STACK_BIAS + reg2offset(dst.first());
3374 if (Assembler::is_simm13(off)) {
3375 __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
3376 SP, off);
3377 } else {
3378 if (conversion_off == noreg) {
3379 __ set(off, L6);
3380 conversion_off = L6;
3381 }
3382 __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
3383 SP, conversion_off);
3384 }
3385 }
3386 }
3387 break;
3388
3389 case T_DOUBLE:
3390 assert( j_arg + 1 < total_args_passed &&
3391 in_sig_bt[j_arg + 1] == T_VOID &&
3392 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
3393 if (src.first()->is_stack()) {
3394 // Stack to stack/reg is simple
3395 long_move(masm, src, dst);
3396 } else {
3397 Register d = dst.first()->is_reg() ? dst.first()->as_Register() : L2;
3398
3399 // Destination could be an odd reg on 32bit in which case
3400 // we can't load direct to the destination.
3401
3402 if (!d->is_even() && wordSize == 4) {
3403 d = L2;
3404 }
3405 int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
3406 if (Assembler::is_simm13(off)) {
3407 __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(),
3408 SP, off);
3409 __ ld_long(SP, off, d);
3410 } else {
3411 if (conversion_off == noreg) {
3412 __ set(off, L6);
3413 conversion_off = L6;
3414 }
3415 __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(),
3416 SP, conversion_off);
3417 __ ld_long(SP, conversion_off, d);
3418 }
3419 if (d == L2) {
3420 long_move(masm, reg64_to_VMRegPair(L2), dst);
3421 }
3422 }
3423 break;
3424
3425 case T_LONG :
3426 // 32bit can't do a split move of something like g1 -> O0, O1
3427 // so use a memory temp
3428 if (src.is_single_phys_reg() && wordSize == 4) {
3429 Register tmp = L2;
3430 if (dst.first()->is_reg() &&
3431 (wordSize == 8 || dst.first()->as_Register()->is_even())) {
3432 tmp = dst.first()->as_Register();
3433 }
3434
3435 int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
3436 if (Assembler::is_simm13(off)) {
3437 __ stx(src.first()->as_Register(), SP, off);
3438 __ ld_long(SP, off, tmp);
3439 } else {
3440 if (conversion_off == noreg) {
3441 __ set(off, L6);
3442 conversion_off = L6;
3443 }
3444 __ stx(src.first()->as_Register(), SP, conversion_off);
3445 __ ld_long(SP, conversion_off, tmp);
3446 }
3447
3448 if (tmp == L2) {
3449 long_move(masm, reg64_to_VMRegPair(L2), dst);
3450 }
3451 } else {
3452 long_move(masm, src, dst);
3453 }
3454 break;
3455
3456 case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
3457
3458 default:
3459 move32_64(masm, src, dst);
3460 }
3461 }
3462
3463
3464 // If we have any strings we must store any register based arg to the stack
3465 // This includes any still live xmm registers too.
3466
3467 if (total_strings > 0 ) {
3468
3469 // protect all the arg registers
3470 __ save_frame(0);
3471 __ mov(G2_thread, L7_thread_cache);
3472 const Register L2_string_off = L2;
3473
3474 // Get first string offset
3475 __ set(string_locs * VMRegImpl::stack_slot_size, L2_string_off);
3476
3477 for (c_arg = 0 ; c_arg < total_c_args ; c_arg++ ) {
3478 if (out_sig_bt[c_arg] == T_ADDRESS) {
3479
3480 VMRegPair dst = out_regs[c_arg];
3481 const Register d = dst.first()->is_reg() ?
3482 dst.first()->as_Register()->after_save() : noreg;
3483
3484 // It's a string the oop and it was already copied to the out arg
3485 // position
3486 if (d != noreg) {
3487 __ mov(d, O0);
3488 } else {
3489 assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS),
3490 "must be");
3491 __ ld_ptr(FP, reg2offset(dst.first()) + STACK_BIAS, O0);
3492 }
3493 Label skip;
3494
3495 __ br_null(O0, false, Assembler::pn, skip);
3496 __ delayed()->add(FP, L2_string_off, O1);
3497
3498 if (d != noreg) {
3499 __ mov(O1, d);
3500 } else {
3501 assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS),
3502 "must be");
3503 __ st_ptr(O1, FP, reg2offset(dst.first()) + STACK_BIAS);
3504 }
3505
3506 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::get_utf),
3507 relocInfo::runtime_call_type);
3508 __ delayed()->add(L2_string_off, max_dtrace_string_size, L2_string_off);
3509
3510 __ bind(skip);
3511
3512 }
3513
3514 }
3515 __ mov(L7_thread_cache, G2_thread);
3516 __ restore();
3517
3518 }
3519
3520
3521 // Ok now we are done. Need to place the nop that dtrace wants in order to
3522 // patch in the trap
3523
3524 int patch_offset = ((intptr_t)__ pc()) - start;
3525
3526 __ nop();
3527
3528
3529 // Return
3530
3531 __ ret();
3532 __ delayed()->restore();
3533
3534 __ flush();
3535
3536 nmethod *nm = nmethod::new_dtrace_nmethod(
3537 method, masm->code(), vep_offset, patch_offset, frame_complete,
3538 stack_slots / VMRegImpl::slots_per_word);
3539 return nm;
3540
3541 }
3542
3543 #endif // HAVE_DTRACE_H
3544
3545 // this function returns the adjust size (in number of words) to a c2i adapter
3546 // activation for use during deoptimization
3547 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
3548 return (callee_locals - callee_parameters) * Interpreter::stackElementWords;
3549 }
3550
3551 // "Top of Stack" slots that may be unused by the calling convention but must
3552 // otherwise be preserved.
3553 // On Intel these are not necessary and the value can be zero.
3554 // On Sparc this describes the words reserved for storing a register window
3555 // when an interrupt occurs.
3556 uint SharedRuntime::out_preserve_stack_slots() {
3557 //return frame::register_save_words * VMRegImpl::slots_per_word;
3558 return 0;
3559 }
3560 /*
3561 static void gen_new_frame(MacroAssembler* masm, bool deopt) {
3562 //
3563 // Common out the new frame generation for deopt and uncommon trap
3564 //
3565 Register G3pcs = G3_scratch; // Array of new pcs (input)
3566 Register Oreturn0 = O0;
3567 Register Oreturn1 = O1;
3568 Register O2UnrollBlock = O2;
3569 Register O3array = O3; // Array of frame sizes (input)
3570 Register O4array_size = O4; // number of frames (input)
3571 Register O7frame_size = O7; // number of frames (input)
3572
3573 __ ld_ptr(O3array, 0, O7frame_size);
3574 __ sub(G0, O7frame_size, O7frame_size);
3575 __ save(SP, O7frame_size, SP);
3576 __ ld_ptr(G3pcs, 0, I7); // load frame's new pc
3577
3578 #ifdef ASSERT
3579 // make sure that the frames are aligned properly
3580 #ifndef _LP64
3581 __ btst(wordSize*2-1, SP);
3582 __ breakpoint_trap(Assembler::notZero);
3583 #endif
3584 #endif
3585
3586 // Deopt needs to pass some extra live values from frame to frame
3587
3588 if (deopt) {
3589 __ mov(Oreturn0->after_save(), Oreturn0);
3590 __ mov(Oreturn1->after_save(), Oreturn1);
3591 }
3592
3593 __ mov(O4array_size->after_save(), O4array_size);
3594 __ sub(O4array_size, 1, O4array_size);
3595 __ mov(O3array->after_save(), O3array);
3596 __ mov(O2UnrollBlock->after_save(), O2UnrollBlock);
3597 __ add(G3pcs, wordSize, G3pcs); // point to next pc value
3598
3599 #ifdef ASSERT
3600 // trash registers to show a clear pattern in backtraces
3601 __ set(0xDEAD0000, I0);
3602 __ add(I0, 2, I1);
3603 __ add(I0, 4, I2);
3604 __ add(I0, 6, I3);
3605 __ add(I0, 8, I4);
3606 // Don't touch I5 could have valuable savedSP
3607 __ set(0xDEADBEEF, L0);
3608 __ mov(L0, L1);
3609 __ mov(L0, L2);
3610 __ mov(L0, L3);
3611 __ mov(L0, L4);
3612 __ mov(L0, L5);
3613
3614 // trash the return value as there is nothing to return yet
3615 __ set(0xDEAD0001, O7);
3616 #endif
3617
3618 __ mov(SP, O5_savedSP);
3619 }
3620
3621
3622 static void make_new_frames(MacroAssembler* masm, bool deopt) {
3623 //
3624 // loop through the UnrollBlock info and create new frames
3625 //
3626 Register G3pcs = G3_scratch;
3627 Register Oreturn0 = O0;
3628 Register Oreturn1 = O1;
3629 Register O2UnrollBlock = O2;
3630 Register O3array = O3;
3631 Register O4array_size = O4;
3632 Label loop;
3633
3634 // Before we make new frames, check to see if stack is available.
3635 // Do this after the caller's return address is on top of stack
3636 if (UseStackBanging) {
3637 // Get total frame size for interpreted frames
3638 __ ld(Address(O2UnrollBlock, 0,
3639 Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()), O4);
3640 __ bang_stack_size(O4, O3, G3_scratch);
3641 }
3642
3643 __ ld(Address(O2UnrollBlock, 0, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()), O4array_size);
3644 __ ld_ptr(Address(O2UnrollBlock, 0, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()), G3pcs);
3645
3646 __ ld_ptr(Address(O2UnrollBlock, 0, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()), O3array);
3647
3648 // Adjust old interpreter frame to make space for new frame's extra java locals
3649 //
3650 // We capture the original sp for the transition frame only because it is needed in
3651 // order to properly calculate interpreter_sp_adjustment. Even though in real life
3652 // every interpreter frame captures a savedSP it is only needed at the transition
3653 // (fortunately). If we had to have it correct everywhere then we would need to
3654 // be told the sp_adjustment for each frame we create. If the frame size array
3655 // were to have twice the frame count entries then we could have pairs [sp_adjustment, frame_size]
3656 // for each frame we create and keep up the illusion every where.
3657 //
3658
3659 __ ld(Address(O2UnrollBlock, 0, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()), O7);
3660 __ mov(SP, O5_savedSP); // remember initial sender's original sp before adjustment
3661 __ sub(SP, O7, SP);
3662
3663 #ifdef ASSERT
3664 // make sure that there is at least one entry in the array
3665 __ tst(O4array_size);
3666 __ breakpoint_trap(Assembler::zero);
3667 #endif
3668
3669 // Now push the new interpreter frames
3670 __ bind(loop);
3671
3672 // allocate a new frame, filling the registers
3673
3674 gen_new_frame(masm, deopt); // allocate an interpreter frame
3675
3676 __ tst(O4array_size);
3677 __ br(Assembler::notZero, false, Assembler::pn, loop);
3678 __ delayed()->add(O3array, wordSize, O3array);
3679 __ ld_ptr(G3pcs, 0, O7); // load final frame new pc
3680
3681 }
3682 */
3683
3684 //------------------------------generate_deopt_blob----------------------------
3685 // Ought to generate an ideal graph & compile, but here's some SPARC ASM
3686 // instead.
3687 void SharedRuntime::generate_deopt_blob() {
3688 // allocate space for the code
3689 ResourceMark rm;
3690 // setup code generation tools
3691 //CodeBuffer buffer ("deopt_blob", 4000, 2048);
3692 CodeBuffer buffer ("deopt_blob", 8000, 2048);//aoqi FIXME for debug
3693 MacroAssembler* masm = new MacroAssembler( & buffer);
3694 int frame_size_in_words;
3695 OopMap* map = NULL;
3696 // Account for the extra args we place on the stack
3697 // by the time we call fetch_unroll_info
3698 const int additional_words = 2; // deopt kind, thread
3699
3700 OopMapSet *oop_maps = new OopMapSet();
3701
3702 address start = __ pc();
3703 Label cont;
3704 // we use S3 for DeOpt reason register
3705 Register reason = S3;
3706 // use S6 for thread register
3707 Register thread = TREG;
3708 // use S7 for fetch_unroll_info returned UnrollBlock
3709 Register unroll = S7;
3710 // Prolog for non exception case!
3711 // Correct the return address we were given.
3712 //FIXME, return address is on the tos or Ra?
3713 __ addi(RA, RA, - (NativeCall::return_address_offset));
3714 // Save everything in sight.
3715 map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
3716 // Normal deoptimization
3717 __ move(reason, Deoptimization::Unpack_deopt);
3718 __ b(cont);
3719 __ delayed()->nop();
3720
3721 int reexecute_offset = __ pc() - start;
3722
3723 // Reexecute case
3724 // return address is the pc describes what bci to do re-execute at
3725
3726 // No need to update map as each call to save_live_registers will produce identical oopmap
3727 //__ addi(RA, RA, - (NativeCall::return_address_offset));
3728 (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
3729 __ move(reason, Deoptimization::Unpack_reexecute);
3730 __ b(cont);
3731 __ delayed()->nop();
3732
3733 int exception_offset = __ pc() - start;
3734 // Prolog for exception case
3735
3736 // all registers are dead at this entry point, except for eax and
3737 // edx which contain the exception oop and exception pc
3738 // respectively. Set them in TLS and fall thru to the
3739 // unpack_with_exception_in_tls entry point.
3740
3741 __ get_thread(thread);
3742 __ st_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
3743 __ st_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset()));
3744 int exception_in_tls_offset = __ pc() - start;
3745 // new implementation because exception oop is now passed in JavaThread
3746
3747 // Prolog for exception case
3748 // All registers must be preserved because they might be used by LinearScan
3749 // Exceptiop oop and throwing PC are passed in JavaThread
3750 // tos: stack at point of call to method that threw the exception (i.e. only
3751 // args are on the stack, no return address)
3752
3753 // Return address will be patched later with the throwing pc. The correct value is not
3754 // available now because loading it from memory would destroy registers.
3755 // Save everything in sight.
3756 // No need to update map as each call to save_live_registers will produce identical oopmap
3757 __ addi(RA, RA, - (NativeCall::return_address_offset));
3758 (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
3759
3760 // Now it is safe to overwrite any register
3761 // store the correct deoptimization type
3762 __ move(reason, Deoptimization::Unpack_exception);
3763 // load throwing pc from JavaThread and patch it as the return address
3764 // of the current frame. Then clear the field in JavaThread
3765 __ get_thread(thread);
3766 __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
3767 __ st_ptr(V1, SP, RegisterSaver::raOffset() * wordSize); //save ra
3768 __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset()));
3769
3770
3771 #ifdef ASSERT
3772 // verify that there is really an exception oop in JavaThread
3773 __ ld_ptr(AT, thread, in_bytes(JavaThread::exception_oop_offset()));
3774 __ verify_oop(AT);
3775 // verify that there is no pending exception
3776 Label no_pending_exception;
3777 __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
3778 __ beq(AT, R0, no_pending_exception);
3779 __ delayed()->nop();
3780 __ stop("must not have pending exception here");
3781 __ bind(no_pending_exception);
3782 #endif
3783 __ bind(cont);
3784 // Compiled code leaves the floating point stack dirty, empty it.
3785 __ empty_FPU_stack();
3786
3787
3788 // Call C code. Need thread and this frame, but NOT official VM entry
3789 // crud. We cannot block on this call, no GC can happen.
3790 #ifndef OPT_THREAD
3791 __ get_thread(thread);
3792 #endif
3793
3794 /*
3795 *
3796 0x000000555bd82aec: dadd a0, s6, zero ; __ move(A0, thread);
3797 0x000000555bd82af0: daddi sp, sp, 0xfffffff0 ; __ addi(SP, SP, -additional_words * wordSize);
3798 0x000000555bd82af4: sd sp, 0x1c8(s6) ; __ set_last_Java_frame(thread, NOREG, NOREG, NULL);
3799 0x000000555bd82af8: lui at, 0x0 ; __ li64(AT, save_pc);
3800 0x000000555bd82afc: ori at, at, 0x55
3801 0x000000555bd82b00: dsll at, at, 16
3802 0x000000555bd82b04: ori at, at, 0x5bd8
3803 0x000000555bd82b08: dsll at, at, 16
3804 0x000000555bd82b0c: ori at, at, 0x2b34 ; save_pc = pc() + NativeMovConstReg::instruction_size + NativeCall::return_address_offset + 4
3805 0x000000555bd82b10: sd at, 0x1d0(s6)
3806 0x000000555bd82b14: lui t9, 0x0
3807 0x000000555bd82b18: ori t9, t9, 0x55
3808 0x000000555bd82b1c: dsll t9, t9, 16
3809 0x000000555bd82b20: ori t9, t9, 0x5aa6
3810 0x000000555bd82b24: dsll t9, t9, 16
3811 0x000000555bd82b28: ori t9, t9, 0x4074
3812 0x000000555bd82b2c: jalr t9
3813 0x000000555bd82b30: sll zero, zero, 0
3814
3815 0x000000555bd82b34: daddiu sp, sp, 0x10 ; save_pc
3816 */
3817 __ move(A0, thread);
3818 __ addi(SP, SP, -additional_words * wordSize);
3819
3820 __ set_last_Java_frame(NOREG, NOREG, NULL);
3821
3822 // Call fetch_unroll_info(). Need thread and this frame, but NOT official VM entry - cannot block on
3823 // this call, no GC can happen. Call should capture return values.
3824
3825 __ relocate(relocInfo::internal_pc_type);
3826 {
3827 intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + NativeCall::return_address_offset + 4;
3828 __ li48(AT, save_pc);
3829 }
3830 __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
3831
3832 __ call((address)Deoptimization::fetch_unroll_info);
3833 //__ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type);
3834 __ delayed()->nop();
3835 oop_maps->add_gc_map(__ pc() - start, map);
3836 __ addiu(SP, SP, additional_words * wordSize);
3837 __ get_thread(thread);
3838 __ reset_last_Java_frame(false, true);
3839
3840 // Load UnrollBlock into S7
3841 __ move(unroll, V0);
3842
3843
3844 // Move the unpack kind to a safe place in the UnrollBlock because
3845 // we are very short of registers
3846
3847 Address unpack_kind(unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes());
3848 //__ pop(reason);
3849 __ sw(reason, unpack_kind);
3850 // save the unpack_kind value
3851 // Retrieve the possible live values (return values)
3852 // All callee save registers representing jvm state
3853 // are now in the vframeArray.
3854
3855 Label noException;
3856 __ move(AT, Deoptimization::Unpack_exception);
3857 __ bne(AT, reason, noException);// Was exception pending?
3858 __ delayed()->nop();
3859 __ ld_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset()));
3860 __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
3861 __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset()));
3862 __ st_ptr(R0, thread, in_bytes(JavaThread::exception_oop_offset()));
3863
3864 __ verify_oop(V0);
3865
3866 // Overwrite the result registers with the exception results.
3867 __ st_ptr(V0, SP, RegisterSaver::v0Offset()*wordSize);
3868 __ st_ptr(V1, SP, RegisterSaver::v1Offset()*wordSize);
3869
3870 __ bind(noException);
3871
3872
3873 // Stack is back to only having register save data on the stack.
3874 // Now restore the result registers. Everything else is either dead or captured
3875 // in the vframeArray.
3876
3877 RegisterSaver::restore_result_registers(masm);
3878 // All of the register save area has been popped of the stack. Only the
3879 // return address remains.
3880 // Pop all the frames we must move/replace.
3881 // Frame picture (youngest to oldest)
3882 // 1: self-frame (no frame link)
3883 // 2: deopting frame (no frame link)
3884 // 3: caller of deopting frame (could be compiled/interpreted).
3885 //
3886 // Note: by leaving the return address of self-frame on the stack
3887 // and using the size of frame 2 to adjust the stack
3888 // when we are done the return to frame 3 will still be on the stack.
3889
3890 // register for the sender's sp
3891 Register sender_sp = Rsender;
3892 // register for frame pcs
3893 Register pcs = T0;
3894 // register for frame sizes
3895 Register sizes = T1;
3896 // register for frame count
3897 Register count = T3;
3898
3899 // Pop deoptimized frame
3900 __ lw(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes());
3901 __ add(SP, SP, AT);
3902 // sp should be pointing at the return address to the caller (3)
3903
3904 // Load array of frame pcs into pcs
3905 __ ld_ptr(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes());
3906 __ addi(SP, SP, wordSize); // trash the old pc
3907 // Load array of frame sizes into T6
3908 __ ld_ptr(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes());
3909
3910
3911
3912 // Load count of frams into T3
3913 __ lw(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes());
3914 // Pick up the initial fp we should save
3915 __ ld(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes());
3916 // Now adjust the caller's stack to make up for the extra locals
3917 // but record the original sp so that we can save it in the skeletal interpreter
3918 // frame and the stack walking of interpreter_sender will get the unextended sp
3919 // value and not the "real" sp value.
3920 __ move(sender_sp, SP);
3921 __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes());
3922 __ sub(SP, SP, AT);
3923
3924 // Push interpreter frames in a loop
3925 /*
3926 *
3927 Loop:
3928 0x000000555bd82d18: lw t2, 0x0(t1) ; lw sizes[i] <--- error lw->ld
3929 0x000000555bd82d1c: ld at, 0x0(t0) ; ld pcs[i]
3930 0x000000555bd82d20: daddi t2, t2, 0xfffffff0 ; t2 -= 16
3931 0x000000555bd82d24: daddi sp, sp, 0xfffffff0
3932 0x000000555bd82d28: sd fp, 0x0(sp) ; push fp
3933 0x000000555bd82d2c: sd at, 0x8(sp) ; push at
3934 0x000000555bd82d30: dadd fp, sp, zero ; fp <- sp
3935 0x000000555bd82d34: dsub sp, sp, t2 ; sp -= t2
3936 0x000000555bd82d38: sd zero, 0xfffffff0(fp) ; __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
3937 0x000000555bd82d3c: sd s4, 0xfffffff8(fp) ; __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);
3938 0x000000555bd82d40: dadd s4, sp, zero ; move(sender_sp, SP);
3939 0x000000555bd82d44: daddi t3, t3, 0xffffffff ; count --
3940 0x000000555bd82d48: daddi t1, t1, 0x4 ; sizes += 4
3941 0x000000555bd82d4c: bne t3, zero, 0x000000555bd82d18
3942 0x000000555bd82d50: daddi t0, t0, 0x4 ; <--- error t0 += 8
3943 */
3944
3945 // pcs[0] = frame_pcs[0] = deopt_sender.raw_pc(); regex.split
3946 Label loop;
3947 __ bind(loop);
3948 __ ld(T2, sizes, 0); // Load frame size
3949 __ ld_ptr(AT, pcs, 0); // save return address
3950 __ addi(T2, T2, -2*wordSize); // we'll push pc and rbp, by hand
3951 __ push2(AT, FP);
3952 __ move(FP, SP);
3953 __ sub(SP, SP, T2); // Prolog!
3954 // This value is corrected by layout_activation_impl
3955 __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
3956 __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable
3957 __ move(sender_sp, SP); // pass to next frame
3958 __ addi(count, count, -1); // decrement counter
3959 __ addi(sizes, sizes, wordSize); // Bump array pointer (sizes)
3960 __ bne(count, R0, loop);
3961 __ delayed()->addi(pcs, pcs, wordSize); // Bump array pointer (pcs)
3962 __ ld(AT, pcs, 0); // frame_pcs[number_of_frames] = Interpreter::deopt_entry(vtos, 0);
3963 // Re-push self-frame
3964 __ push2(AT, FP);
3965 __ move(FP, SP);
3966 __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
3967 __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);
3968 __ addi(SP, SP, -(frame_size_in_words - 2 - additional_words) * wordSize);
3969
3970 // Restore frame locals after moving the frame
3971 __ sd(V0, SP, RegisterSaver::v0Offset() * wordSize);
3972 __ sd(V1, SP, RegisterSaver::v1Offset() * wordSize);
3973 __ sdc1(F0, SP, RegisterSaver::fpResultOffset()* wordSize);// Pop float stack and store in local
3974 __ sdc1(F1, SP, (RegisterSaver::fpResultOffset() + 1) * wordSize);
3975
3976
3977 // Call unpack_frames(). Need thread and this frame, but NOT official VM entry - cannot block on
3978 // this call, no GC can happen.
3979 __ move(A1, reason); // exec_mode
3980 __ get_thread(thread);
3981 __ move(A0, thread); // thread
3982 __ addi(SP, SP, (-additional_words) *wordSize);
3983
3984 // set last_Java_sp, last_Java_fp
3985 __ set_last_Java_frame(NOREG, FP, NULL);
3986
3987 __ move(AT, -(StackAlignmentInBytes));
3988 __ andr(SP, SP, AT); // Fix stack alignment as required by ABI
3989
3990 __ relocate(relocInfo::internal_pc_type);
3991 {
3992 intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + NativeCall::return_address_offset + 4;
3993 __ li48(AT, save_pc);
3994 }
3995 __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
3996
3997 //__ call(Deoptimization::unpack_frames);
3998 __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), relocInfo::runtime_call_type);
3999 __ delayed()->nop();
4000 // Revert SP alignment after call since we're going to do some SP relative addressing below
4001 __ ld(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
4002 // Set an oopmap for the call site
4003 oop_maps->add_gc_map(__ offset(), new OopMap( frame_size_in_words , 0));
4004
4005 __ push(V0);
4006
4007 __ get_thread(thread);
4008 __ reset_last_Java_frame(false, false);
4009
4010 // Collect return values
4011 __ ld(V0, SP, (RegisterSaver::v0Offset() + additional_words +1) * wordSize);
4012 __ ld(V1, SP, (RegisterSaver::v1Offset() + additional_words +1) * wordSize);
4013 __ ldc1(F0, SP, RegisterSaver::fpResultOffset()* wordSize);// Pop float stack and store in local
4014 __ ldc1(F1, SP, (RegisterSaver::fpResultOffset() + 1) * wordSize);
4015 //FIXME,
4016 // Clear floating point stack before returning to interpreter
4017 __ empty_FPU_stack();
4018 //FIXME, we should consider about float and double
4019 // Push a float or double return value if necessary.
4020 __ leave();
4021
4022 // Jump to interpreter
4023 __ jr(RA);
4024 __ delayed()->nop();
4025
4026 masm->flush();
4027 _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
4028 _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
4029 }
4030
4031 #ifdef COMPILER2
4032
4033 //------------------------------generate_uncommon_trap_blob--------------------
4034 // Ought to generate an ideal graph & compile, but here's some SPARC ASM
4035 // instead.
4036 void SharedRuntime::generate_uncommon_trap_blob() {
4037 // allocate space for the code
4038 ResourceMark rm;
4039 // setup code generation tools
4040 CodeBuffer buffer ("uncommon_trap_blob", 512*80 , 512*40 );
4041 MacroAssembler* masm = new MacroAssembler(&buffer);
4042
4043 enum frame_layout {
4044 s0_off, s0_off2,
4045 s1_off, s1_off2,
4046 s2_off, s2_off2,
4047 s3_off, s3_off2,
4048 s4_off, s4_off2,
4049 s5_off, s5_off2,
4050 s6_off, s6_off2,
4051 s7_off, s7_off2,
4052 fp_off, fp_off2,
4053 return_off, return_off2, // slot for return address sp + 9
4054 framesize
4055 };
4056 assert(framesize % 4 == 0, "sp not 16-byte aligned");
4057
4058 address start = __ pc();
4059
4060 // Push self-frame.
4061 __ daddiu(SP, SP, -framesize * BytesPerInt);
4062
4063 __ sd(RA, SP, return_off * BytesPerInt);
4064 __ sd(FP, SP, fp_off * BytesPerInt);
4065
4066 // Save callee saved registers. None for UseSSE=0,
4067 // floats-only for UseSSE=1, and doubles for UseSSE=2.
4068 __ sd(S0, SP, s0_off * BytesPerInt);
4069 __ sd(S1, SP, s1_off * BytesPerInt);
4070 __ sd(S2, SP, s2_off * BytesPerInt);
4071 __ sd(S3, SP, s3_off * BytesPerInt);
4072 __ sd(S4, SP, s4_off * BytesPerInt);
4073 __ sd(S5, SP, s5_off * BytesPerInt);
4074 __ sd(S6, SP, s6_off * BytesPerInt);
4075 __ sd(S7, SP, s7_off * BytesPerInt);
4076
4077 __ daddi(FP, SP, fp_off * BytesPerInt);
4078
4079 // Clear the floating point exception stack
4080 __ empty_FPU_stack();
4081
4082 Register thread = TREG;
4083
4084 #ifndef OPT_THREAD
4085 __ get_thread(thread);
4086 #endif
4087 // set last_Java_sp
4088 __ set_last_Java_frame(NOREG, FP, NULL);
4089 __ relocate(relocInfo::internal_pc_type);
4090 assert(NativeCall::return_address_offset == 24, "in sharedRuntime return_address_offset");
4091 {
4092 long save_pc = (long)__ pc() + 28 + NativeCall::return_address_offset;
4093 __ li48(AT, (long)save_pc);
4094 __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
4095 }
4096 // Call C code. Need thread but NOT official VM entry
4097 // crud. We cannot block on this call, no GC can happen. Call should
4098 // capture callee-saved registers as well as return values.
4099 __ move(A0, thread);
4100 // argument already in T0
4101 __ move(A1, T0);
4102 __ li48(T9, (long)Deoptimization::uncommon_trap);
4103 __ jalr(T9);
4104 __ delayed()->nop();
4105
4106 // Set an oopmap for the call site
4107 OopMapSet *oop_maps = new OopMapSet();
4108 OopMap* map = new OopMap( framesize, 0 );
4109
4110 map->set_callee_saved( VMRegImpl::stack2reg(s0_off ), S0->as_VMReg() );
4111 map->set_callee_saved( VMRegImpl::stack2reg(s1_off ), S1->as_VMReg() );
4112 map->set_callee_saved( VMRegImpl::stack2reg(s2_off ), S2->as_VMReg() );
4113 map->set_callee_saved( VMRegImpl::stack2reg(s3_off ), S3->as_VMReg() );
4114 map->set_callee_saved( VMRegImpl::stack2reg(s4_off ), S4->as_VMReg() );
4115 map->set_callee_saved( VMRegImpl::stack2reg(s5_off ), S5->as_VMReg() );
4116 map->set_callee_saved( VMRegImpl::stack2reg(s6_off ), S6->as_VMReg() );
4117 map->set_callee_saved( VMRegImpl::stack2reg(s7_off ), S7->as_VMReg() );
4118
4119 //oop_maps->add_gc_map( __ offset(), true, map);
4120 oop_maps->add_gc_map( __ offset(), map);
4121
4122 #ifndef OPT_THREAD
4123 __ get_thread(thread);
4124 #endif
4125 __ reset_last_Java_frame(false,false);
4126
4127 // Load UnrollBlock into S7
4128 Register unroll = S7;
4129 __ move(unroll, V0);
4130
4131 // Pop all the frames we must move/replace.
4132 //
4133 // Frame picture (youngest to oldest)
4134 // 1: self-frame (no frame link)
4135 // 2: deopting frame (no frame link)
4136 // 3: possible-i2c-adapter-frame
4137 // 4: caller of deopting frame (could be compiled/interpreted. If interpreted we will create an
4138 // and c2i here)
4139
4140 // Pop self-frame. We have no frame, and must rely only on EAX and ESP.
4141 __ daddiu(SP, SP, framesize * BytesPerInt);
4142
4143 // Pop deoptimized frame
4144 __ lw(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes());
4145 __ dadd(SP, SP, AT);
4146
4147 // register for frame pcs
4148 Register pcs = T8;
4149 // register for frame sizes
4150 Register sizes = T9;
4151 // register for frame count
4152 Register count = T3;
4153 // register for the sender's sp
4154 Register sender_sp = T1;
4155
4156 // sp should be pointing at the return address to the caller (4)
4157 // Load array of frame pcs into ECX
4158 __ ld(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes());
4159
4160 /* 2012/9/7 Not needed in MIPS
4161 __ addiu(SP, SP, wordSize);
4162 */
4163
4164 // Load array of frame sizes into ESI
4165 __ ld(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes());
4166 __ lwu(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes());
4167
4168 // Pick up the initial fp we should save
4169 __ ld(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes());
4170 // Now adjust the caller's stack to make up for the extra locals
4171 // but record the original sp so that we can save it in the skeletal interpreter
4172 // frame and the stack walking of interpreter_sender will get the unextended sp
4173 // value and not the "real" sp value.
4174
4175 __ move(sender_sp, SP);
4176 __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes());
4177 __ dsub(SP, SP, AT);
4178 // Push interpreter frames in a loop
4179 Label loop;
4180 __ bind(loop);
4181 __ ld(T2, sizes, 0); // Load frame size
4182 __ ld(AT, pcs, 0); // save return address
4183 __ daddi(T2, T2, -2*wordSize); // we'll push pc and rbp, by hand
4184 __ push2(AT, FP);
4185 __ move(FP, SP);
4186 __ dsub(SP, SP, T2); // Prolog!
4187 // This value is corrected by layout_activation_impl
4188 __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
4189 __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable
4190 __ move(sender_sp, SP); // pass to next frame
4191 __ daddi(count, count, -1); // decrement counter
4192 __ daddi(sizes, sizes, wordSize); // Bump array pointer (sizes)
4193 __ addi(pcs, pcs, wordSize); // Bump array pointer (pcs)
4194 __ bne(count, R0, loop);
4195 __ delayed()->nop(); // Bump array pointer (pcs)
4196
4197 __ ld(RA, pcs, 0);
4198
4199 // Re-push self-frame
4200 __ daddi(SP, SP, - 2 * wordSize); // save old & set new FP
4201 __ sd(FP, SP, 0 * wordSize); // save final return address
4202 __ sd(RA, SP, 1 * wordSize);
4203 __ move(FP, SP);
4204 __ daddi(SP, SP, -(framesize / 2 - 2) * wordSize);
4205
4206 // set last_Java_sp, last_Java_fp
4207 __ set_last_Java_frame(NOREG, FP, NULL);
4208
4209 __ move(AT, -(StackAlignmentInBytes));
4210 __ andr(SP, SP, AT); // Fix stack alignment as required by ABI
4211
4212 __ relocate(relocInfo::internal_pc_type);
4213 {
4214 long save_pc = (long)__ pc() + 28 + NativeCall::return_address_offset;
4215 __ li48(AT, (long)save_pc);
4216 }
4217 __ sd(AT, thread,in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
4218
4219 // Call C code. Need thread but NOT official VM entry
4220 // crud. We cannot block on this call, no GC can happen. Call should
4221 // restore return values to their stack-slots with the new SP.
4222 __ move(A0, thread);
4223 __ move(A1, Deoptimization::Unpack_uncommon_trap);
4224 __ li48(T9, (long)Deoptimization::unpack_frames);
4225 __ jalr(T9);
4226 __ delayed()->nop();
4227 // Set an oopmap for the call site
4228 //oop_maps->add_gc_map( __ offset(), true, new OopMap( framesize, 0 ) );
4229 oop_maps->add_gc_map( __ offset(), new OopMap( framesize, 0 ) );//Fu
4230
4231 __ reset_last_Java_frame(true,true);
4232
4233 // Pop self-frame.
4234 __ leave(); // Epilog!
4235
4236 // Jump to interpreter
4237 __ jr(RA);
4238 __ delayed()->nop();
4239 // -------------
4240 // make sure all code is generated
4241 masm->flush();
4242
4243 _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, framesize / 2);
4244 }
4245
4246 #endif // COMPILER2
4247
4248 //------------------------------generate_handler_blob-------------------
4249 //
4250 // Generate a special Compile2Runtime blob that saves all registers, and sets
4251 // up an OopMap and calls safepoint code to stop the compiled code for
4252 // a safepoint.
4253 //
4254 // This blob is jumped to (via a breakpoint and the signal handler) from a
4255 // safepoint in compiled code.
4256
4257 SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int pool_type) {
4258
4259 // Account for thread arg in our frame
4260 const int additional_words = 0;
4261 int frame_size_in_words;
4262
4263 assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
4264
4265 ResourceMark rm;
4266 OopMapSet *oop_maps = new OopMapSet();
4267 OopMap* map;
4268
4269 // allocate space for the code
4270 // setup code generation tools
4271 CodeBuffer buffer ("handler_blob", 2048, 512);
4272 MacroAssembler* masm = new MacroAssembler( &buffer);
4273
4274 const Register thread = TREG;
4275 address start = __ pc();
4276 address call_pc = NULL;
4277 bool cause_return = (pool_type == POLL_AT_RETURN);
4278 bool save_vectors = (pool_type == POLL_AT_VECTOR_LOOP);
4279
4280 // If cause_return is true we are at a poll_return and there is
4281 // the return address in RA to the caller on the nmethod
4282 // that is safepoint. We can leave this return in RA and
4283 // effectively complete the return and safepoint in the caller.
4284 // Otherwise we load exception pc to RA.
4285 __ push(thread);
4286 #ifndef OPT_THREAD
4287 __ get_thread(thread);
4288 #endif
4289
4290 if(!cause_return) {
4291 __ ld_ptr(RA, Address(thread, JavaThread::saved_exception_pc_offset()));
4292 }
4293
4294 __ pop(thread);
4295 map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, save_vectors);
4296
4297 #ifndef OPT_THREAD
4298 __ get_thread(thread);
4299 #endif
4300 // The following is basically a call_VM. However, we need the precise
4301 // address of the call in order to generate an oopmap. Hence, we do all the
4302 // work outselvs.
4303
4304 __ move(A0, thread);
4305 __ set_last_Java_frame(NOREG, NOREG, NULL);
4306
4307 //__ relocate(relocInfo::internal_pc_type);
4308 if (!cause_return)
4309 {
4310 /*
4311 intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + NativeCall::return_address_offset + 4;
4312 __ li48(AT, save_pc);
4313 __ sd(AT, thread, in_bytes(JavaThread::last_Java_pc_offset()));
4314 */
4315 }
4316
4317
4318 // do the call
4319 //__ lui(T9, Assembler::split_high((int)call_ptr));
4320 //__ addiu(T9, T9, Assembler::split_low((int)call_ptr));
4321 __ call(call_ptr);
4322 __ delayed()->nop();
4323
4324 // Set an oopmap for the call site. This oopmap will map all
4325 // oop-registers and debug-info registers as callee-saved. This
4326 // will allow deoptimization at this safepoint to find all possible
4327 // debug-info recordings, as well as let GC find all oops.
4328 oop_maps->add_gc_map(__ offset(), map);
4329
4330 Label noException;
4331
4332 // Clear last_Java_sp again
4333 __ reset_last_Java_frame(false, false);
4334
4335 __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
4336 __ beq(AT, R0, noException);
4337 __ delayed()->nop();
4338
4339 // Exception pending
4340
4341 RegisterSaver::restore_live_registers(masm, save_vectors);
4342 //forward_exception_entry need return address on the stack
4343 __ push(RA);
4344 //__ lui(T9, Assembler::split_high((int)StubRoutines::forward_exception_entry()));
4345 //__ addiu(T9, T9, Assembler::split_low((int)StubRoutines::forward_exception_entry()));
4346 __ li(T9, StubRoutines::forward_exception_entry());
4347 __ jr(T9);
4348 __ delayed()->nop();
4349
4350 // No exception case
4351 __ bind(noException);
4352 // Normal exit, register restoring and exit
4353 RegisterSaver::restore_live_registers(masm, save_vectors);
4354 __ jr(RA);
4355 __ delayed()->nop();
4356
4357 masm->flush();
4358
4359 // Fill-out other meta info
4360 return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words);
4361 }
4362
4363 //
4364 // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
4365 //
4366 // Generate a stub that calls into vm to find out the proper destination
4367 // of a java call. All the argument registers are live at this point
4368 // but since this is generic code we don't know what they are and the caller
4369 // must do any gc of the args.
4370 //
4371 RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
4372 assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
4373
4374 // allocate space for the code
4375 ResourceMark rm;
4376
4377 //CodeBuffer buffer(name, 1000, 512);
4378 //FIXME. aoqi. code_size
4379 CodeBuffer buffer(name, 20000, 2048);
4380 MacroAssembler* masm = new MacroAssembler(&buffer);
4381
4382 int frame_size_words;
4383 //we put the thread in A0
4384
4385 OopMapSet *oop_maps = new OopMapSet();
4386 OopMap* map = NULL;
4387
4388 int start = __ offset();
4389 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words);
4390
4391
4392 int frame_complete = __ offset();
4393
4394 const Register thread = T8;
4395 __ get_thread(thread);
4396
4397 __ move(A0, thread);
4398 __ set_last_Java_frame(noreg, FP, NULL);
4399 //__ addi(SP, SP, -wordSize);
4400 //align the stack before invoke native
4401 __ move(AT, -(StackAlignmentInBytes));
4402 __ andr(SP, SP, AT);
4403 __ relocate(relocInfo::internal_pc_type);
4404 {
4405 intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + NativeCall::return_address_offset + 1 * BytesPerInstWord;
4406 //tty->print_cr(" %s :%d, name:%s, pc: %lx, save_pc: %lx, frame_size_words: %lx", __func__, __LINE__, name, __ pc(), save_pc, frame_size_words); //aoqi_test
4407 __ li48(AT, save_pc);
4408 }
4409 __ sd(AT, thread, in_bytes(JavaThread::last_Java_pc_offset()));
4410
4411 __ call(destination);
4412 __ delayed()->nop();
4413
4414 // Set an oopmap for the call site.
4415 // We need this not only for callee-saved registers, but also for volatile
4416 // registers that the compiler might be keeping live across a safepoint.
4417 oop_maps->add_gc_map( __ offset() - start, map);
4418 // V0 contains the address we are going to jump to assuming no exception got installed
4419 __ get_thread(thread);
4420 __ ld_ptr(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
4421 // clear last_Java_sp
4422 __ reset_last_Java_frame(true, true);
4423 // check for pending exceptions
4424 Label pending;
4425 __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
4426 __ bne(AT, R0, pending);
4427 __ delayed()->nop();
4428 // get the returned Method*
4429 //FIXME, do mips need this ?
4430 __ get_vm_result_2(Rmethod, thread); // Refer to OpenJDK8
4431 __ st_ptr(Rmethod, SP, RegisterSaver::methodOffset() * wordSize);
4432 __ st_ptr(V0, SP, RegisterSaver::v0Offset() * wordSize);
4433 RegisterSaver::restore_live_registers(masm);
4434
4435 // We are back the the original state on entry and ready to go the callee method.
4436 __ jr(V0);
4437 __ delayed()->nop();
4438 // Pending exception after the safepoint
4439
4440 __ bind(pending);
4441
4442 RegisterSaver::restore_live_registers(masm);
4443
4444 // exception pending => remove activation and forward to exception handler
4445 //forward_exception_entry need return address on the stack
4446 __ push(RA);
4447 __ get_thread(thread);
4448 __ st_ptr(R0, thread, in_bytes(JavaThread::vm_result_offset()));
4449 __ ld_ptr(V0, thread, in_bytes(Thread::pending_exception_offset()));
4450 __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
4451 __ delayed() -> nop();
4452 // -------------
4453 // make sure all code is generated
4454 masm->flush();
4455
4456 RuntimeStub* tmp= RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);
4457 return tmp;
4458 }
4459
4460 /*void SharedRuntime::generate_stubs() {
4461 _wrong_method_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address,
4462 SharedRuntime::handle_wrong_method),"wrong_method_stub");
4463 _ic_miss_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address,
4464 SharedRuntime::handle_wrong_method_ic_miss),"ic_miss_stub");
4465 _resolve_opt_virtual_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address,
4466 SharedRuntime::resolve_opt_virtual_call_C),"resolve_opt_virtual_call");
4467 _resolve_virtual_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address,
4468 SharedRuntime::resolve_virtual_call_C),"resolve_virtual_call");
4469 _resolve_static_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address,
4470 SharedRuntime::resolve_static_call_C),"resolve_static_call");
4471 _polling_page_safepoint_handler_blob =generate_handler_blob(CAST_FROM_FN_PTR(address,
4472 SafepointSynchronize::handle_polling_page_exception), false);
4473 _polling_page_return_handler_blob =generate_handler_blob(CAST_FROM_FN_PTR(address,
4474 SafepointSynchronize::handle_polling_page_exception), true);
4475 generate_deopt_blob();
4476 #ifdef COMPILER2
4477 generate_uncommon_trap_blob();
4478 #endif // COMPILER2
4479 }*/
4480
4481 extern "C" int SpinPause() {return 0;}
4482 // extern "C" int SafeFetch32 (int * adr, int errValue) {return 0;} ;
4483 // extern "C" intptr_t SafeFetchN (intptr_t * adr, intptr_t errValue) {return *adr; } ;

mercurial