1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/cpu/mips/vm/sharedRuntime_mips_64.cpp Fri Apr 29 00:06:10 2016 +0800 1.3 @@ -0,0 +1,4483 @@ 1.4 +/* 1.5 + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. 1.6 + * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. 1.7 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.8 + * 1.9 + * This code is free software; you can redistribute it and/or modify it 1.10 + * under the terms of the GNU General Public License version 2 only, as 1.11 + * published by the Free Software Foundation. 1.12 + * 1.13 + * This code is distributed in the hope that it will be useful, but WITHOUT 1.14 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1.15 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1.16 + * version 2 for more details (a copy is included in the LICENSE file that 1.17 + * accompanied this code). 1.18 + * 1.19 + * You should have received a copy of the GNU General Public License version 1.20 + * 2 along with this work; if not, write to the Free Software Foundation, 1.21 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1.22 + * 1.23 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 1.24 + * or visit www.oracle.com if you need additional information or have any 1.25 + * questions. 1.26 + * 1.27 + */ 1.28 + 1.29 +#include "precompiled.hpp" 1.30 +#include "asm/macroAssembler.hpp" 1.31 +#include "asm/macroAssembler.inline.hpp" 1.32 +#include "code/debugInfoRec.hpp" 1.33 +#include "code/icBuffer.hpp" 1.34 +#include "code/vtableStubs.hpp" 1.35 +#include "interpreter/interpreter.hpp" 1.36 +#include "oops/compiledICHolder.hpp" 1.37 +#include "prims/jvmtiRedefineClassesTrace.hpp" 1.38 +#include "runtime/sharedRuntime.hpp" 1.39 +#include "runtime/vframeArray.hpp" 1.40 +#include "vmreg_mips.inline.hpp" 1.41 +#ifdef COMPILER1 1.42 +#include "c1/c1_Runtime1.hpp" 1.43 +#endif 1.44 +#ifdef COMPILER2 1.45 +#include "opto/runtime.hpp" 1.46 +#endif 1.47 + 1.48 +#define __ masm-> 1.49 +const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size; 1.50 + 1.51 +class RegisterSaver { 1.52 + enum { FPU_regs_live = 32 }; 1.53 + // Capture info about frame layout 1.54 + enum layout { 1.55 +#define DEF_LAYOUT_OFFS(regname) regname ## _off, regname ## H_off, 1.56 + DEF_LAYOUT_OFFS(for_16_bytes_aligned) 1.57 + DEF_LAYOUT_OFFS(fpr0) 1.58 + DEF_LAYOUT_OFFS(fpr1) 1.59 + DEF_LAYOUT_OFFS(fpr2) 1.60 + DEF_LAYOUT_OFFS(fpr3) 1.61 + DEF_LAYOUT_OFFS(fpr4) 1.62 + DEF_LAYOUT_OFFS(fpr5) 1.63 + DEF_LAYOUT_OFFS(fpr6) 1.64 + DEF_LAYOUT_OFFS(fpr7) 1.65 + DEF_LAYOUT_OFFS(fpr8) 1.66 + DEF_LAYOUT_OFFS(fpr9) 1.67 + DEF_LAYOUT_OFFS(fpr10) 1.68 + DEF_LAYOUT_OFFS(fpr11) 1.69 + DEF_LAYOUT_OFFS(fpr12) 1.70 + DEF_LAYOUT_OFFS(fpr13) 1.71 + DEF_LAYOUT_OFFS(fpr14) 1.72 + DEF_LAYOUT_OFFS(fpr15) 1.73 + DEF_LAYOUT_OFFS(fpr16) 1.74 + DEF_LAYOUT_OFFS(fpr17) 1.75 + DEF_LAYOUT_OFFS(fpr18) 1.76 + DEF_LAYOUT_OFFS(fpr19) 1.77 + DEF_LAYOUT_OFFS(fpr20) 1.78 + DEF_LAYOUT_OFFS(fpr21) 1.79 + DEF_LAYOUT_OFFS(fpr22) 1.80 + DEF_LAYOUT_OFFS(fpr23) 1.81 + DEF_LAYOUT_OFFS(fpr24) 1.82 + DEF_LAYOUT_OFFS(fpr25) 1.83 + DEF_LAYOUT_OFFS(fpr26) 1.84 + DEF_LAYOUT_OFFS(fpr27) 1.85 + DEF_LAYOUT_OFFS(fpr28) 1.86 + DEF_LAYOUT_OFFS(fpr29) 1.87 + DEF_LAYOUT_OFFS(fpr30) 1.88 + DEF_LAYOUT_OFFS(fpr31) 1.89 + 1.90 + DEF_LAYOUT_OFFS(v0) 1.91 + DEF_LAYOUT_OFFS(v1) 1.92 + DEF_LAYOUT_OFFS(a0) 1.93 + DEF_LAYOUT_OFFS(a1) 1.94 + DEF_LAYOUT_OFFS(a2) 1.95 + DEF_LAYOUT_OFFS(a3) 1.96 + DEF_LAYOUT_OFFS(a4) 1.97 + DEF_LAYOUT_OFFS(a5) 1.98 + DEF_LAYOUT_OFFS(a6) 1.99 + DEF_LAYOUT_OFFS(a7) 1.100 + DEF_LAYOUT_OFFS(t0) 1.101 + DEF_LAYOUT_OFFS(t1) 1.102 + DEF_LAYOUT_OFFS(t2) 1.103 + DEF_LAYOUT_OFFS(t3) 1.104 + DEF_LAYOUT_OFFS(s0) 1.105 + DEF_LAYOUT_OFFS(s1) 1.106 + DEF_LAYOUT_OFFS(s2) 1.107 + DEF_LAYOUT_OFFS(s3) 1.108 + DEF_LAYOUT_OFFS(s4) 1.109 + DEF_LAYOUT_OFFS(s5) 1.110 + DEF_LAYOUT_OFFS(s6) 1.111 + DEF_LAYOUT_OFFS(s7) 1.112 + DEF_LAYOUT_OFFS(t8) 1.113 + DEF_LAYOUT_OFFS(t9) 1.114 + 1.115 + DEF_LAYOUT_OFFS(gp) 1.116 + DEF_LAYOUT_OFFS(fp) 1.117 + DEF_LAYOUT_OFFS(return) 1.118 +/* 1.119 + fpr0_off, fpr1_off, 1.120 + fpr2_off, fpr3_off, 1.121 + fpr4_off, fpr5_off, 1.122 + fpr6_off, fpr7_off, 1.123 + fpr8_off, fpr9_off, 1.124 + fpr10_off, fpr11_off, 1.125 + fpr12_off, fpr13_off, 1.126 + fpr14_off, fpr15_off, 1.127 + fpr16_off, fpr17_off, 1.128 + fpr18_off, fpr19_off, 1.129 + fpr20_off, fpr21_off, 1.130 + fpr22_off, fpr23_off, 1.131 + fpr24_off, fpr25_off, 1.132 + fpr26_off, fpr27_off, 1.133 + fpr28_off, fpr29_off, 1.134 + fpr30_off, fpr31_off, 1.135 + 1.136 + v0_off, v1_off, 1.137 + a0_off, a1_off, 1.138 + a2_off, a3_off, 1.139 + a4_off, a5_off, 1.140 + a6_off, a7_off, 1.141 + t0_off, t1_off, t2_off, t3_off, 1.142 + s0_off, s1_off, s2_off, s3_off, s4_off, s5_off, s6_off, s7_off, 1.143 + t8_off, t9_off, 1.144 + 1.145 + gp_off, fp_off, 1.146 + return_off, 1.147 +*/ 1.148 + reg_save_size 1.149 + }; 1.150 + 1.151 + public: 1.152 + 1.153 + static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors =false ); 1.154 + static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false); 1.155 + //FIXME, I have no idea which register to use 1.156 + static int raOffset(void) { return return_off / 2; } 1.157 + //Rmethod 1.158 + static int methodOffset(void) { return s3_off / 2; } 1.159 + 1.160 + static int v0Offset(void) { return v0_off / 2; } 1.161 + static int v1Offset(void) { return v1_off / 2; } 1.162 + 1.163 + static int fpResultOffset(void) { return fpr0_off / 2; } 1.164 + 1.165 + // During deoptimization only the result register need to be restored 1.166 + // all the other values have already been extracted. 1.167 + 1.168 + static void restore_result_registers(MacroAssembler* masm); 1.169 +}; 1.170 + 1.171 +OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors ) { 1.172 + 1.173 +/* 1.174 + int frame_words = reg_save_size + additional_frame_words; 1.175 + int frame_size_in_bytes = frame_words * wordSize; 1.176 + *total_frame_words = frame_words; 1.177 + */ 1.178 + // Always make the frame size 16-byte aligned 1.179 + int frame_size_in_bytes = round_to(additional_frame_words*wordSize + 1.180 + reg_save_size*BytesPerInt, 16); 1.181 + // OopMap frame size is in compiler stack slots (jint's) not bytes or words 1.182 + int frame_size_in_slots = frame_size_in_bytes / BytesPerInt; 1.183 + // The caller will allocate additional_frame_words 1.184 + int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt; 1.185 + // CodeBlob frame size is in words. 1.186 + int frame_size_in_words = frame_size_in_bytes / wordSize; 1.187 + *total_frame_words = frame_size_in_words; 1.188 + 1.189 + // save registers, fpu state, and flags 1.190 + // We assume caller has already has return address slot on the stack 1.191 + // We push epb twice in this sequence because we want the real ebp 1.192 + // to be under the return like a normal enter and we want to use pushad 1.193 + // We push by hand instead of pusing push 1.194 + 1.195 + __ daddiu(SP, SP, - reg_save_size * jintSize); 1.196 + 1.197 + __ sdc1(F0, SP, fpr0_off * jintSize); __ sdc1(F1, SP, fpr1_off * jintSize); 1.198 + __ sdc1(F2, SP, fpr2_off * jintSize); __ sdc1(F3, SP, fpr3_off * jintSize); 1.199 + __ sdc1(F4, SP, fpr4_off * jintSize); __ sdc1(F5, SP, fpr5_off * jintSize); 1.200 + __ sdc1(F6, SP, fpr6_off * jintSize); __ sdc1(F7, SP, fpr7_off * jintSize); 1.201 + __ sdc1(F8, SP, fpr8_off * jintSize); __ sdc1(F9, SP, fpr9_off * jintSize); 1.202 + __ sdc1(F10, SP, fpr10_off * jintSize); __ sdc1(F11, SP, fpr11_off * jintSize); 1.203 + __ sdc1(F12, SP, fpr12_off * jintSize); __ sdc1(F13, SP, fpr13_off * jintSize); 1.204 + __ sdc1(F14, SP, fpr14_off * jintSize); __ sdc1(F15, SP, fpr15_off * jintSize); 1.205 + __ sdc1(F16, SP, fpr16_off * jintSize); __ sdc1(F17, SP, fpr17_off * jintSize); 1.206 + __ sdc1(F18, SP, fpr18_off * jintSize); __ sdc1(F19, SP, fpr19_off * jintSize); 1.207 + __ sdc1(F20, SP, fpr20_off * jintSize); __ sdc1(F21, SP, fpr21_off * jintSize); 1.208 + __ sdc1(F22, SP, fpr22_off * jintSize); __ sdc1(F23, SP, fpr23_off * jintSize); 1.209 + __ sdc1(F24, SP, fpr24_off * jintSize); __ sdc1(F25, SP, fpr25_off * jintSize); 1.210 + __ sdc1(F26, SP, fpr26_off * jintSize); __ sdc1(F27, SP, fpr27_off * jintSize); 1.211 + __ sdc1(F28, SP, fpr28_off * jintSize); __ sdc1(F29, SP, fpr29_off * jintSize); 1.212 + __ sdc1(F30, SP, fpr30_off * jintSize); __ sdc1(F31, SP, fpr31_off * jintSize); 1.213 + __ sd(V0, SP, v0_off * jintSize); __ sd(V1, SP, v1_off * jintSize); 1.214 + __ sd(A0, SP, a0_off * jintSize); __ sd(A1, SP, a1_off * jintSize); 1.215 + __ sd(A2, SP, a2_off * jintSize); __ sd(A3, SP, a3_off * jintSize); 1.216 + __ sd(A4, SP, a4_off * jintSize); __ sd(A5, SP, a5_off * jintSize); 1.217 + __ sd(A6, SP, a6_off * jintSize); __ sd(A7, SP, a7_off * jintSize); 1.218 + __ sd(T0, SP, t0_off * jintSize); 1.219 + __ sd(T1, SP, t1_off * jintSize); 1.220 + __ sd(T2, SP, t2_off * jintSize); 1.221 + __ sd(T3, SP, t3_off * jintSize); 1.222 + __ sd(S0, SP, s0_off * jintSize); 1.223 + __ sd(S1, SP, s1_off * jintSize); 1.224 + __ sd(S2, SP, s2_off * jintSize); 1.225 + __ sd(S3, SP, s3_off * jintSize); 1.226 + __ sd(S4, SP, s4_off * jintSize); 1.227 + __ sd(S5, SP, s5_off * jintSize); 1.228 + __ sd(S6, SP, s6_off * jintSize); 1.229 + __ sd(S7, SP, s7_off * jintSize); 1.230 + 1.231 + __ sd(T8, SP, t8_off * jintSize); 1.232 + __ sd(T9, SP, t9_off * jintSize); 1.233 + 1.234 + __ sd(GP, SP, gp_off * jintSize); 1.235 + __ sd(FP, SP, fp_off * jintSize); 1.236 + __ sd(RA, SP, return_off * jintSize); 1.237 + __ daddi(FP, SP, fp_off * jintSize); 1.238 + 1.239 + OopMapSet *oop_maps = new OopMapSet(); 1.240 + //OopMap* map = new OopMap( frame_words, 0 ); 1.241 + OopMap* map = new OopMap( frame_size_in_slots, 0 ); 1.242 + 1.243 + 1.244 +//#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_words) 1.245 +#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots) 1.246 + map->set_callee_saved(STACK_OFFSET( v0_off), V0->as_VMReg()); 1.247 + map->set_callee_saved(STACK_OFFSET( v1_off), V1->as_VMReg()); 1.248 + map->set_callee_saved(STACK_OFFSET( a0_off), A0->as_VMReg()); 1.249 + map->set_callee_saved(STACK_OFFSET( a1_off), A1->as_VMReg()); 1.250 + map->set_callee_saved(STACK_OFFSET( a2_off), A2->as_VMReg()); 1.251 + map->set_callee_saved(STACK_OFFSET( a3_off), A3->as_VMReg()); 1.252 + map->set_callee_saved(STACK_OFFSET( a4_off), A4->as_VMReg()); 1.253 + map->set_callee_saved(STACK_OFFSET( a5_off), A5->as_VMReg()); 1.254 + map->set_callee_saved(STACK_OFFSET( a6_off), A6->as_VMReg()); 1.255 + map->set_callee_saved(STACK_OFFSET( a7_off), A7->as_VMReg()); 1.256 + map->set_callee_saved(STACK_OFFSET( t0_off), T0->as_VMReg()); 1.257 + map->set_callee_saved(STACK_OFFSET( t1_off), T1->as_VMReg()); 1.258 + map->set_callee_saved(STACK_OFFSET( t2_off), T2->as_VMReg()); 1.259 + map->set_callee_saved(STACK_OFFSET( t3_off), T3->as_VMReg()); 1.260 + map->set_callee_saved(STACK_OFFSET( s0_off), S0->as_VMReg()); 1.261 + map->set_callee_saved(STACK_OFFSET( s1_off), S1->as_VMReg()); 1.262 + map->set_callee_saved(STACK_OFFSET( s2_off), S2->as_VMReg()); 1.263 + map->set_callee_saved(STACK_OFFSET( s3_off), S3->as_VMReg()); 1.264 + map->set_callee_saved(STACK_OFFSET( s4_off), S4->as_VMReg()); 1.265 + map->set_callee_saved(STACK_OFFSET( s5_off), S5->as_VMReg()); 1.266 + map->set_callee_saved(STACK_OFFSET( s6_off), S6->as_VMReg()); 1.267 + map->set_callee_saved(STACK_OFFSET( s7_off), S7->as_VMReg()); 1.268 + map->set_callee_saved(STACK_OFFSET( t8_off), T8->as_VMReg()); 1.269 + map->set_callee_saved(STACK_OFFSET( t9_off), T9->as_VMReg()); 1.270 + map->set_callee_saved(STACK_OFFSET( gp_off), GP->as_VMReg()); 1.271 + map->set_callee_saved(STACK_OFFSET( fp_off), FP->as_VMReg()); 1.272 + map->set_callee_saved(STACK_OFFSET( return_off), RA->as_VMReg()); 1.273 + 1.274 + map->set_callee_saved(STACK_OFFSET( fpr0_off), F0->as_VMReg()); 1.275 + map->set_callee_saved(STACK_OFFSET( fpr1_off), F1->as_VMReg()); 1.276 + map->set_callee_saved(STACK_OFFSET( fpr2_off), F2->as_VMReg()); 1.277 + map->set_callee_saved(STACK_OFFSET( fpr3_off), F3->as_VMReg()); 1.278 + map->set_callee_saved(STACK_OFFSET( fpr4_off), F4->as_VMReg()); 1.279 + map->set_callee_saved(STACK_OFFSET( fpr5_off), F5->as_VMReg()); 1.280 + map->set_callee_saved(STACK_OFFSET( fpr6_off), F6->as_VMReg()); 1.281 + map->set_callee_saved(STACK_OFFSET( fpr7_off), F7->as_VMReg()); 1.282 + map->set_callee_saved(STACK_OFFSET( fpr8_off), F8->as_VMReg()); 1.283 + map->set_callee_saved(STACK_OFFSET( fpr9_off), F9->as_VMReg()); 1.284 + map->set_callee_saved(STACK_OFFSET( fpr10_off), F10->as_VMReg()); 1.285 + map->set_callee_saved(STACK_OFFSET( fpr11_off), F11->as_VMReg()); 1.286 + map->set_callee_saved(STACK_OFFSET( fpr12_off), F12->as_VMReg()); 1.287 + map->set_callee_saved(STACK_OFFSET( fpr13_off), F13->as_VMReg()); 1.288 + map->set_callee_saved(STACK_OFFSET( fpr14_off), F14->as_VMReg()); 1.289 + map->set_callee_saved(STACK_OFFSET( fpr15_off), F15->as_VMReg()); 1.290 + map->set_callee_saved(STACK_OFFSET( fpr16_off), F16->as_VMReg()); 1.291 + map->set_callee_saved(STACK_OFFSET( fpr17_off), F17->as_VMReg()); 1.292 + map->set_callee_saved(STACK_OFFSET( fpr18_off), F18->as_VMReg()); 1.293 + map->set_callee_saved(STACK_OFFSET( fpr19_off), F19->as_VMReg()); 1.294 + map->set_callee_saved(STACK_OFFSET( fpr20_off), F20->as_VMReg()); 1.295 + map->set_callee_saved(STACK_OFFSET( fpr21_off), F21->as_VMReg()); 1.296 + map->set_callee_saved(STACK_OFFSET( fpr22_off), F22->as_VMReg()); 1.297 + map->set_callee_saved(STACK_OFFSET( fpr23_off), F23->as_VMReg()); 1.298 + map->set_callee_saved(STACK_OFFSET( fpr24_off), F24->as_VMReg()); 1.299 + map->set_callee_saved(STACK_OFFSET( fpr25_off), F25->as_VMReg()); 1.300 + map->set_callee_saved(STACK_OFFSET( fpr26_off), F26->as_VMReg()); 1.301 + map->set_callee_saved(STACK_OFFSET( fpr27_off), F27->as_VMReg()); 1.302 + map->set_callee_saved(STACK_OFFSET( fpr28_off), F28->as_VMReg()); 1.303 + map->set_callee_saved(STACK_OFFSET( fpr29_off), F29->as_VMReg()); 1.304 + map->set_callee_saved(STACK_OFFSET( fpr30_off), F30->as_VMReg()); 1.305 + map->set_callee_saved(STACK_OFFSET( fpr31_off), F31->as_VMReg()); 1.306 + 1.307 +/* 1.308 + if (true) { 1.309 + map->set_callee_saved(STACK_OFFSET( v0H_off), V0->as_VMReg()->next()); 1.310 + map->set_callee_saved(STACK_OFFSET( v1H_off), V1->as_VMReg()->next()); 1.311 + map->set_callee_saved(STACK_OFFSET( a0H_off), A0->as_VMReg()->next()); 1.312 + map->set_callee_saved(STACK_OFFSET( a1H_off), A1->as_VMReg()->next()); 1.313 + map->set_callee_saved(STACK_OFFSET( a2H_off), A2->as_VMReg()->next()); 1.314 + map->set_callee_saved(STACK_OFFSET( a3H_off), A3->as_VMReg()->next()); 1.315 + map->set_callee_saved(STACK_OFFSET( a4H_off), A4->as_VMReg()->next()); 1.316 + map->set_callee_saved(STACK_OFFSET( a5H_off), A5->as_VMReg()->next()); 1.317 + map->set_callee_saved(STACK_OFFSET( a6H_off), A6->as_VMReg()->next()); 1.318 + map->set_callee_saved(STACK_OFFSET( a7H_off), A7->as_VMReg()->next()); 1.319 + map->set_callee_saved(STACK_OFFSET( t0H_off), T0->as_VMReg()->next()); 1.320 + map->set_callee_saved(STACK_OFFSET( t1H_off), T1->as_VMReg()->next()); 1.321 + map->set_callee_saved(STACK_OFFSET( t2H_off), T2->as_VMReg()->next()); 1.322 + map->set_callee_saved(STACK_OFFSET( t3H_off), T3->as_VMReg()->next()); 1.323 + map->set_callee_saved(STACK_OFFSET( s0H_off), S0->as_VMReg()->next()); 1.324 + map->set_callee_saved(STACK_OFFSET( s1H_off), S1->as_VMReg()->next()); 1.325 + map->set_callee_saved(STACK_OFFSET( s2H_off), S2->as_VMReg()->next()); 1.326 + map->set_callee_saved(STACK_OFFSET( s3H_off), S3->as_VMReg()->next()); 1.327 + map->set_callee_saved(STACK_OFFSET( s4H_off), S4->as_VMReg()->next()); 1.328 + map->set_callee_saved(STACK_OFFSET( s5H_off), S5->as_VMReg()->next()); 1.329 + map->set_callee_saved(STACK_OFFSET( s6H_off), S6->as_VMReg()->next()); 1.330 + map->set_callee_saved(STACK_OFFSET( s7H_off), S7->as_VMReg()->next()); 1.331 + map->set_callee_saved(STACK_OFFSET( t8H_off), T8->as_VMReg()->next()); 1.332 + map->set_callee_saved(STACK_OFFSET( t9H_off), T9->as_VMReg()->next()); 1.333 + map->set_callee_saved(STACK_OFFSET( gpH_off), GP->as_VMReg()->next()); 1.334 + map->set_callee_saved(STACK_OFFSET( fpH_off), FP->as_VMReg()->next()); 1.335 + map->set_callee_saved(STACK_OFFSET( returnH_off), RA->as_VMReg()->next()); 1.336 + 1.337 + map->set_callee_saved(STACK_OFFSET( fpr0H_off), F0->as_VMReg()->next()); 1.338 + map->set_callee_saved(STACK_OFFSET( fpr2H_off), F2->as_VMReg()->next()); 1.339 + map->set_callee_saved(STACK_OFFSET( fpr4H_off), F4->as_VMReg()->next()); 1.340 + map->set_callee_saved(STACK_OFFSET( fpr6H_off), F6->as_VMReg()->next()); 1.341 + map->set_callee_saved(STACK_OFFSET( fpr8H_off), F8->as_VMReg()->next()); 1.342 + map->set_callee_saved(STACK_OFFSET( fpr10H_off), F10->as_VMReg()->next()); 1.343 + map->set_callee_saved(STACK_OFFSET( fpr12H_off), F12->as_VMReg()->next()); 1.344 + map->set_callee_saved(STACK_OFFSET( fpr14H_off), F14->as_VMReg()->next()); 1.345 + map->set_callee_saved(STACK_OFFSET( fpr16H_off), F16->as_VMReg()->next()); 1.346 + map->set_callee_saved(STACK_OFFSET( fpr18H_off), F18->as_VMReg()->next()); 1.347 + map->set_callee_saved(STACK_OFFSET( fpr20H_off), F20->as_VMReg()->next()); 1.348 + map->set_callee_saved(STACK_OFFSET( fpr22H_off), F22->as_VMReg()->next()); 1.349 + map->set_callee_saved(STACK_OFFSET( fpr24H_off), F24->as_VMReg()->next()); 1.350 + map->set_callee_saved(STACK_OFFSET( fpr26H_off), F26->as_VMReg()->next()); 1.351 + map->set_callee_saved(STACK_OFFSET( fpr28H_off), F28->as_VMReg()->next()); 1.352 + map->set_callee_saved(STACK_OFFSET( fpr30H_off), F30->as_VMReg()->next()); 1.353 + } 1.354 +*/ 1.355 +#undef STACK_OFFSET 1.356 + return map; 1.357 +} 1.358 + 1.359 + 1.360 +// Pop the current frame and restore all the registers that we 1.361 +// saved. 1.362 +void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) { 1.363 + __ ldc1(F0, SP, fpr0_off * jintSize); __ ldc1(F1, SP, fpr1_off * jintSize); 1.364 + __ ldc1(F2, SP, fpr2_off * jintSize); __ ldc1(F3, SP, fpr3_off * jintSize); 1.365 + __ ldc1(F4, SP, fpr4_off * jintSize); __ ldc1(F5, SP, fpr5_off * jintSize); 1.366 + __ ldc1(F6, SP, fpr6_off * jintSize); __ ldc1(F7, SP, fpr7_off * jintSize); 1.367 + __ ldc1(F8, SP, fpr8_off * jintSize); __ ldc1(F9, SP, fpr9_off * jintSize); 1.368 + __ ldc1(F10, SP, fpr10_off * jintSize); __ ldc1(F11, SP, fpr11_off * jintSize); 1.369 + __ ldc1(F12, SP, fpr12_off * jintSize); __ ldc1(F13, SP, fpr13_off * jintSize); 1.370 + __ ldc1(F14, SP, fpr14_off * jintSize); __ ldc1(F15, SP, fpr15_off * jintSize); 1.371 + __ ldc1(F16, SP, fpr16_off * jintSize); __ ldc1(F17, SP, fpr17_off * jintSize); 1.372 + __ ldc1(F18, SP, fpr18_off * jintSize); __ ldc1(F19, SP, fpr19_off * jintSize); 1.373 + __ ldc1(F20, SP, fpr20_off * jintSize); __ ldc1(F21, SP, fpr21_off * jintSize); 1.374 + __ ldc1(F22, SP, fpr22_off * jintSize); __ ldc1(F23, SP, fpr23_off * jintSize); 1.375 + __ ldc1(F24, SP, fpr24_off * jintSize); __ ldc1(F25, SP, fpr25_off * jintSize); 1.376 + __ ldc1(F26, SP, fpr26_off * jintSize); __ ldc1(F27, SP, fpr27_off * jintSize); 1.377 + __ ldc1(F28, SP, fpr28_off * jintSize); __ ldc1(F29, SP, fpr29_off * jintSize); 1.378 + __ ldc1(F30, SP, fpr30_off * jintSize); __ ldc1(F31, SP, fpr31_off * jintSize); 1.379 + 1.380 + __ ld(V0, SP, v0_off * jintSize); __ ld(V1, SP, v1_off * jintSize); 1.381 + __ ld(A0, SP, a0_off * jintSize); __ ld(A1, SP, a1_off * jintSize); 1.382 + __ ld(A2, SP, a2_off * jintSize); __ ld(A3, SP, a3_off * jintSize); 1.383 + __ ld(A4, SP, a4_off * jintSize); __ ld(A5, SP, a5_off * jintSize); 1.384 + __ ld(A6, SP, a6_off * jintSize); __ ld(A7, SP, a7_off * jintSize); 1.385 + __ ld(T0, SP, t0_off * jintSize); 1.386 + __ ld(T1, SP, t1_off * jintSize); 1.387 + __ ld(T2, SP, t2_off * jintSize); 1.388 + __ ld(T3, SP, t3_off * jintSize); 1.389 + __ ld(S0, SP, s0_off * jintSize); 1.390 + __ ld(S1, SP, s1_off * jintSize); 1.391 + __ ld(S2, SP, s2_off * jintSize); 1.392 + __ ld(S3, SP, s3_off * jintSize); 1.393 + __ ld(S4, SP, s4_off * jintSize); 1.394 + __ ld(S5, SP, s5_off * jintSize); 1.395 + __ ld(S6, SP, s6_off * jintSize); 1.396 + __ ld(S7, SP, s7_off * jintSize); 1.397 + 1.398 + __ ld(T8, SP, t8_off * jintSize); 1.399 + __ ld(T9, SP, t9_off * jintSize); 1.400 + 1.401 + __ ld(GP, SP, gp_off * jintSize); 1.402 + __ ld(FP, SP, fp_off * jintSize); 1.403 + __ ld(RA, SP, return_off * jintSize); 1.404 + 1.405 + __ addiu(SP, SP, reg_save_size * jintSize); 1.406 +} 1.407 + 1.408 +// Pop the current frame and restore the registers that might be holding 1.409 +// a result. 1.410 +// FIXME, if the result is float? 1.411 +void RegisterSaver::restore_result_registers(MacroAssembler* masm) { 1.412 + // Just restore result register. Only used by deoptimization. By 1.413 + // now any callee save register that needs to be restore to a c2 1.414 + // caller of the deoptee has been extracted into the vframeArray 1.415 + // and will be stuffed into the c2i adapter we create for later 1.416 + // restoration so only result registers need to be restored here. 1.417 + // 1.418 + __ ld(V0, SP, v0_off * jintSize); 1.419 + __ ld(V1, SP, v1_off * jintSize); 1.420 + __ addiu(SP, SP, return_off * jintSize); 1.421 +} 1.422 + 1.423 + // Is vector's size (in bytes) bigger than a size saved by default? 1.424 + // 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions. 1.425 + bool SharedRuntime::is_wide_vector(int size) { 1.426 + return size > 16; 1.427 + } 1.428 + 1.429 +// The java_calling_convention describes stack locations as ideal slots on 1.430 +// a frame with no abi restrictions. Since we must observe abi restrictions 1.431 +// (like the placement of the register window) the slots must be biased by 1.432 +// the following value. 1.433 + 1.434 +static int reg2offset_in(VMReg r) { 1.435 + // Account for saved ebp and return address 1.436 + // This should really be in_preserve_stack_slots 1.437 + return (r->reg2stack() + 2 * VMRegImpl::slots_per_word) * VMRegImpl::stack_slot_size; // + 2 * VMRegImpl::stack_slot_size); 1.438 +} 1.439 + 1.440 +static int reg2offset_out(VMReg r) { 1.441 + return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; 1.442 +} 1.443 + 1.444 +// --------------------------------------------------------------------------- 1.445 +// Read the array of BasicTypes from a signature, and compute where the 1.446 +// arguments should go. Values in the VMRegPair regs array refer to 4-byte 1.447 +// quantities. Values less than SharedInfo::stack0 are registers, those above 1.448 +// refer to 4-byte stack slots. All stack slots are based off of the stack pointer 1.449 +// as framesizes are fixed. 1.450 +// VMRegImpl::stack0 refers to the first slot 0(sp). 1.451 +// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register 1.452 +// up to RegisterImpl::number_of_registers) are the 32-bit 1.453 +// integer registers. 1.454 + 1.455 +// Pass first five oop/int args in registers T0, A0 - A3. 1.456 +// Pass float/double/long args in stack. 1.457 +// Doubles have precedence, so if you pass a mix of floats and doubles 1.458 +// the doubles will grab the registers before the floats will. 1.459 + 1.460 +// Note: the INPUTS in sig_bt are in units of Java argument words, which are 1.461 +// either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit 1.462 +// units regardless of build. Of course for i486 there is no 64 bit build 1.463 + 1.464 + 1.465 +// --------------------------------------------------------------------------- 1.466 +// The compiled Java calling convention. 1.467 +// Pass first five oop/int args in registers T0, A0 - A3. 1.468 +// Pass float/double/long args in stack. 1.469 +// Doubles have precedence, so if you pass a mix of floats and doubles 1.470 +// the doubles will grab the registers before the floats will. 1.471 + 1.472 +int SharedRuntime::java_calling_convention(const BasicType *sig_bt, 1.473 + VMRegPair *regs, 1.474 + int total_args_passed, 1.475 + int is_outgoing) { 1.476 +//#define aoqi_test 1.477 +#ifdef aoqi_test 1.478 +tty->print_cr(" SharedRuntime::%s :%d, total_args_passed: %d", __func__, __LINE__, total_args_passed); 1.479 +#endif 1.480 + 1.481 + // Create the mapping between argument positions and 1.482 + // registers. 1.483 + //static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = { 1.484 + static const Register INT_ArgReg[Argument::n_register_parameters + 1] = { 1.485 + T0, A0, A1, A2, A3, A4, A5, A6, A7 1.486 + }; 1.487 + //static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_j] = { 1.488 + static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = { 1.489 + F12, F13, F14, F15, F16, F17, F18, F19 1.490 + }; 1.491 + 1.492 + 1.493 + uint args = 0; 1.494 + uint stk_args = 0; // inc by 2 each time 1.495 + 1.496 + for (int i = 0; i < total_args_passed; i++) { 1.497 + switch (sig_bt[i]) { 1.498 + case T_VOID: 1.499 + // halves of T_LONG or T_DOUBLE 1.500 + assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); 1.501 + regs[i].set_bad(); 1.502 + break; 1.503 + case T_BOOLEAN: 1.504 + case T_CHAR: 1.505 + case T_BYTE: 1.506 + case T_SHORT: 1.507 + case T_INT: 1.508 + if (args < Argument::n_register_parameters) { 1.509 + regs[i].set1(INT_ArgReg[args++]->as_VMReg()); 1.510 + } else { 1.511 + regs[i].set1(VMRegImpl::stack2reg(stk_args)); 1.512 + stk_args += 2; 1.513 + } 1.514 + break; 1.515 + case T_LONG: 1.516 + assert(sig_bt[i + 1] == T_VOID, "expecting half"); 1.517 + // fall through 1.518 + case T_OBJECT: 1.519 + case T_ARRAY: 1.520 + case T_ADDRESS: 1.521 + if (args < Argument::n_register_parameters) { 1.522 + regs[i].set2(INT_ArgReg[args++]->as_VMReg()); 1.523 + } else { 1.524 + regs[i].set2(VMRegImpl::stack2reg(stk_args)); 1.525 + stk_args += 2; 1.526 + } 1.527 + break; 1.528 + case T_FLOAT: 1.529 + if (args < Argument::n_float_register_parameters) { 1.530 + regs[i].set1(FP_ArgReg[args++]->as_VMReg()); 1.531 + } else { 1.532 + regs[i].set1(VMRegImpl::stack2reg(stk_args)); 1.533 + stk_args += 2; 1.534 + } 1.535 + break; 1.536 + case T_DOUBLE: 1.537 + assert(sig_bt[i + 1] == T_VOID, "expecting half"); 1.538 + if (args < Argument::n_float_register_parameters) { 1.539 + regs[i].set2(FP_ArgReg[args++]->as_VMReg()); 1.540 + } else { 1.541 + regs[i].set2(VMRegImpl::stack2reg(stk_args)); 1.542 + stk_args += 2; 1.543 + } 1.544 + break; 1.545 + default: 1.546 + ShouldNotReachHere(); 1.547 + break; 1.548 + } 1.549 +#ifdef aoqi_test 1.550 +tty->print_cr(" SharedRuntime::%s :%d, sig_bt[%d]: %d, reg[%d]:%d|%d, stk_args:%d", __func__, __LINE__, i, sig_bt[i], i, regs[i].first(), regs[i].second(), stk_args); 1.551 +#endif 1.552 + } 1.553 + 1.554 + return round_to(stk_args, 2); 1.555 +/* 1.556 + // Starting stack position for args on stack 1.557 + uint stack = 0; 1.558 + 1.559 + // Pass first five oop/int args in registers T0, A0 - A3. 1.560 + uint reg_arg0 = 9999; 1.561 + uint reg_arg1 = 9999; 1.562 + uint reg_arg2 = 9999; 1.563 + uint reg_arg3 = 9999; 1.564 + uint reg_arg4 = 9999; 1.565 + 1.566 + 1.567 + // Pass doubles & longs &float ligned on the stack. First count stack slots for doubles 1.568 + int i; 1.569 + for( i = 0; i < total_args_passed; i++) { 1.570 + if( sig_bt[i] == T_DOUBLE || sig_bt[i] == T_LONG ) { 1.571 + stack += 2; 1.572 + } 1.573 + } 1.574 + int dstack = 0; // Separate counter for placing doubles 1.575 + for( i = 0; i < total_args_passed; i++) { 1.576 + // From the type and the argument number (count) compute the location 1.577 + switch( sig_bt[i] ) { 1.578 + case T_SHORT: 1.579 + case T_CHAR: 1.580 + case T_BYTE: 1.581 + case T_BOOLEAN: 1.582 + case T_INT: 1.583 + case T_ARRAY: 1.584 + case T_OBJECT: 1.585 + case T_ADDRESS: 1.586 + if( reg_arg0 == 9999 ) { 1.587 + reg_arg0 = i; 1.588 + regs[i].set1(T0->as_VMReg()); 1.589 + } else if( reg_arg1 == 9999 ) { 1.590 + reg_arg1 = i; 1.591 + regs[i].set1(A0->as_VMReg()); 1.592 + } else if( reg_arg2 == 9999 ) { 1.593 + reg_arg2 = i; 1.594 + regs[i].set1(A1->as_VMReg()); 1.595 + }else if( reg_arg3 == 9999 ) { 1.596 + reg_arg3 = i; 1.597 + regs[i].set1(A2->as_VMReg()); 1.598 + }else if( reg_arg4 == 9999 ) { 1.599 + reg_arg4 = i; 1.600 + regs[i].set1(A3->as_VMReg()); 1.601 + } else { 1.602 + regs[i].set1(VMRegImpl::stack2reg(stack++)); 1.603 + } 1.604 + break; 1.605 + case T_FLOAT: 1.606 + regs[i].set1(VMRegImpl::stack2reg(stack++)); 1.607 + break; 1.608 + case T_LONG: 1.609 + assert(sig_bt[i+1] == T_VOID, "missing Half" ); 1.610 + regs[i].set2(VMRegImpl::stack2reg(dstack)); 1.611 + dstack += 2; 1.612 + break; 1.613 + case T_DOUBLE: 1.614 + assert(sig_bt[i+1] == T_VOID, "missing Half" ); 1.615 + regs[i].set2(VMRegImpl::stack2reg(dstack)); 1.616 + dstack += 2; 1.617 + break; 1.618 + case T_VOID: regs[i].set_bad(); break; 1.619 + break; 1.620 + default: 1.621 + ShouldNotReachHere(); 1.622 + break; 1.623 + } 1.624 + } 1.625 + // return value can be odd number of VMRegImpl stack slots make multiple of 2 1.626 + return round_to(stack, 2); 1.627 +*/ 1.628 +} 1.629 + 1.630 +// Helper class mostly to avoid passing masm everywhere, and handle store 1.631 +// displacement overflow logic for LP64 1.632 +class AdapterGenerator { 1.633 + MacroAssembler *masm; 1.634 +#ifdef _LP64 1.635 + Register Rdisp; 1.636 + void set_Rdisp(Register r) { Rdisp = r; } 1.637 +#endif // _LP64 1.638 + 1.639 + void patch_callers_callsite(); 1.640 +// void tag_c2i_arg(frame::Tag t, Register base, int st_off, Register scratch); 1.641 + 1.642 + // base+st_off points to top of argument 1.643 + int arg_offset(const int st_off) { return st_off; } 1.644 + int next_arg_offset(const int st_off) { 1.645 + return st_off - Interpreter::stackElementSize; 1.646 + } 1.647 + 1.648 +#ifdef _LP64 1.649 + // On _LP64 argument slot values are loaded first into a register 1.650 + // because they might not fit into displacement. 1.651 + Register arg_slot(const int st_off); 1.652 + Register next_arg_slot(const int st_off); 1.653 +#else 1.654 + int arg_slot(const int st_off) { return arg_offset(st_off); } 1.655 + int next_arg_slot(const int st_off) { return next_arg_offset(st_off); } 1.656 +#endif // _LP64 1.657 + 1.658 + // Stores long into offset pointed to by base 1.659 + void store_c2i_long(Register r, Register base, 1.660 + const int st_off, bool is_stack); 1.661 + void store_c2i_object(Register r, Register base, 1.662 + const int st_off); 1.663 + void store_c2i_int(Register r, Register base, 1.664 + const int st_off); 1.665 + void store_c2i_double(VMReg r_2, 1.666 + VMReg r_1, Register base, const int st_off); 1.667 + void store_c2i_float(FloatRegister f, Register base, 1.668 + const int st_off); 1.669 + 1.670 + public: 1.671 + //void tag_stack(const BasicType sig, int st_off); 1.672 + void gen_c2i_adapter(int total_args_passed, 1.673 + // VMReg max_arg, 1.674 + int comp_args_on_stack, // VMRegStackSlots 1.675 + const BasicType *sig_bt, 1.676 + const VMRegPair *regs, 1.677 + Label& skip_fixup); 1.678 + void gen_i2c_adapter(int total_args_passed, 1.679 + // VMReg max_arg, 1.680 + int comp_args_on_stack, // VMRegStackSlots 1.681 + const BasicType *sig_bt, 1.682 + const VMRegPair *regs); 1.683 + 1.684 + AdapterGenerator(MacroAssembler *_masm) : masm(_masm) {} 1.685 +}; 1.686 + 1.687 + 1.688 +// Patch the callers callsite with entry to compiled code if it exists. 1.689 +void AdapterGenerator::patch_callers_callsite() { 1.690 + Label L; 1.691 + //FIXME , what is stored in eax? 1.692 + //__ verify_oop(ebx); 1.693 + __ verify_oop(Rmethod); 1.694 + // __ cmpl(Address(ebx, in_bytes(Method::code_offset())), NULL_WORD); 1.695 + __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset())); 1.696 + //__ jcc(Assembler::equal, L); 1.697 + __ beq(AT,R0,L); 1.698 + __ delayed()->nop(); 1.699 + // Schedule the branch target address early. 1.700 + // Call into the VM to patch the caller, then jump to compiled callee 1.701 + // eax isn't live so capture return address while we easily can 1.702 + // __ movl(eax, Address(esp, 0)); 1.703 +// __ lw(T5,SP,0); 1.704 + __ move(V0, RA); 1.705 + 1.706 + __ pushad(); 1.707 + //jerome_for_debug 1.708 + // __ pushad(); 1.709 + // __ pushfd(); 1.710 +#ifdef COMPILER2 1.711 + // C2 may leave the stack dirty if not in SSE2+ mode 1.712 + __ empty_FPU_stack(); 1.713 +#endif /* COMPILER2 */ 1.714 + 1.715 + // VM needs caller's callsite 1.716 + // __ pushl(eax); 1.717 + 1.718 + // VM needs target method 1.719 + // __ pushl(ebx); 1.720 + // __ push(Rmethod); 1.721 + // __ verify_oop(ebx); 1.722 + 1.723 + __ move(A0, Rmethod); 1.724 + __ move(A1, V0); 1.725 +// __ addi(SP, SP, -8); 1.726 +//we should preserve the return address 1.727 + __ verify_oop(Rmethod); 1.728 + __ move(S0, SP); 1.729 + __ move(AT, -(StackAlignmentInBytes)); // align the stack 1.730 + __ andr(SP, SP, AT); 1.731 + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), 1.732 + relocInfo::runtime_call_type); 1.733 + //__ addl(esp, 2*wordSize); 1.734 + 1.735 + __ delayed()->nop(); 1.736 + // __ addi(SP, SP, 8); 1.737 + // __ popfd(); 1.738 + __ move(SP, S0); 1.739 + __ popad(); 1.740 + __ bind(L); 1.741 +} 1.742 +/* 1.743 +void AdapterGenerator::tag_c2i_arg(frame::Tag t, Register base, int st_off, 1.744 + Register scratch) { 1.745 + Unimplemented(); 1.746 +}*/ 1.747 + 1.748 +#ifdef _LP64 1.749 +Register AdapterGenerator::arg_slot(const int st_off) { 1.750 + Unimplemented(); 1.751 +} 1.752 + 1.753 +Register AdapterGenerator::next_arg_slot(const int st_off){ 1.754 + Unimplemented(); 1.755 +} 1.756 +#endif // _LP64 1.757 + 1.758 +// Stores long into offset pointed to by base 1.759 +void AdapterGenerator::store_c2i_long(Register r, Register base, 1.760 + const int st_off, bool is_stack) { 1.761 + Unimplemented(); 1.762 +} 1.763 + 1.764 +void AdapterGenerator::store_c2i_object(Register r, Register base, 1.765 + const int st_off) { 1.766 + Unimplemented(); 1.767 +} 1.768 + 1.769 +void AdapterGenerator::store_c2i_int(Register r, Register base, 1.770 + const int st_off) { 1.771 + Unimplemented(); 1.772 +} 1.773 + 1.774 +// Stores into offset pointed to by base 1.775 +void AdapterGenerator::store_c2i_double(VMReg r_2, 1.776 + VMReg r_1, Register base, const int st_off) { 1.777 + Unimplemented(); 1.778 +} 1.779 + 1.780 +void AdapterGenerator::store_c2i_float(FloatRegister f, Register base, 1.781 + const int st_off) { 1.782 + Unimplemented(); 1.783 +} 1.784 +/* 1.785 +void AdapterGenerator::tag_stack(const BasicType sig, int st_off) { 1.786 + if (TaggedStackInterpreter) { 1.787 + int tag_offset = st_off + Interpreter::expr_tag_offset_in_bytes(0); 1.788 + if (sig == T_OBJECT || sig == T_ARRAY) { 1.789 + // __ movl(Address(esp, tag_offset), frame::TagReference); 1.790 + // __ addi(AT,R0, frame::TagReference); 1.791 + 1.792 + __ move(AT, frame::TagReference); 1.793 + __ sw (AT, SP, tag_offset); 1.794 + } else if (sig == T_LONG || sig == T_DOUBLE) { 1.795 + int next_tag_offset = st_off + Interpreter::expr_tag_offset_in_bytes(1); 1.796 + // __ movl(Address(esp, next_tag_offset), frame::TagValue); 1.797 + // __ addi(AT,R0, frame::TagValue); 1.798 + __ move(AT, frame::TagValue); 1.799 + __ sw (AT, SP, next_tag_offset); 1.800 + //__ movl(Address(esp, tag_offset), frame::TagValue); 1.801 + // __ addi(AT,R0, frame::TagValue); 1.802 + __ move(AT, frame::TagValue); 1.803 + __ sw (AT, SP, tag_offset); 1.804 + 1.805 + } else { 1.806 + // __ movl(Address(esp, tag_offset), frame::TagValue); 1.807 + //__ addi(AT,R0, frame::TagValue); 1.808 + __ move(AT, frame::TagValue); 1.809 + __ sw (AT, SP, tag_offset); 1.810 + 1.811 + } 1.812 + } 1.813 +}*/ 1.814 + 1.815 +void AdapterGenerator::gen_c2i_adapter( 1.816 + int total_args_passed, 1.817 + // VMReg max_arg, 1.818 + int comp_args_on_stack, // VMRegStackSlots 1.819 + const BasicType *sig_bt, 1.820 + const VMRegPair *regs, 1.821 + Label& skip_fixup) { 1.822 + 1.823 + // Before we get into the guts of the C2I adapter, see if we should be here 1.824 + // at all. We've come from compiled code and are attempting to jump to the 1.825 + // interpreter, which means the caller made a static call to get here 1.826 + // (vcalls always get a compiled target if there is one). Check for a 1.827 + // compiled target. If there is one, we need to patch the caller's call. 1.828 + // However we will run interpreted if we come thru here. The next pass 1.829 + // thru the call site will run compiled. If we ran compiled here then 1.830 + // we can (theorectically) do endless i2c->c2i->i2c transitions during 1.831 + // deopt/uncommon trap cycles. If we always go interpreted here then 1.832 + // we can have at most one and don't need to play any tricks to keep 1.833 + // from endlessly growing the stack. 1.834 + // 1.835 + // Actually if we detected that we had an i2c->c2i transition here we 1.836 + // ought to be able to reset the world back to the state of the interpreted 1.837 + // call and not bother building another interpreter arg area. We don't 1.838 + // do that at this point. 1.839 + 1.840 + patch_callers_callsite(); 1.841 + 1.842 + __ bind(skip_fixup); 1.843 + 1.844 +#ifdef COMPILER2 1.845 + __ empty_FPU_stack(); 1.846 +#endif /* COMPILER2 */ 1.847 + //this is for native ? 1.848 + // Since all args are passed on the stack, total_args_passed * interpreter_ 1.849 + // stack_element_size is the 1.850 + // space we need. 1.851 + int extraspace = total_args_passed * Interpreter::stackElementSize; 1.852 + 1.853 + // stack is aligned, keep it that way 1.854 + extraspace = round_to(extraspace, 2*wordSize); 1.855 + 1.856 + // Get return address 1.857 + // __ popl(eax); 1.858 + //__ pop(T4); 1.859 + __ move(V0, RA); 1.860 + // set senderSP value 1.861 + // __ movl(esi, esp); 1.862 +//refer to interpreter_mips.cpp:generate_asm_entry 1.863 + __ move(Rsender, SP); 1.864 + //__ subl(esp, extraspace); 1.865 + __ addi(SP, SP, -extraspace); 1.866 + 1.867 + // Now write the args into the outgoing interpreter space 1.868 + for (int i = 0; i < total_args_passed; i++) { 1.869 + if (sig_bt[i] == T_VOID) { 1.870 + assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), 1.871 + "missing half"); 1.872 + continue; 1.873 + } 1.874 + 1.875 + // st_off points to lowest address on stack. 1.876 + int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize; 1.877 +#ifdef aoqi_test 1.878 +tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d, st_off:%d", __func__, __LINE__, i, sig_bt[i], total_args_passed, st_off); 1.879 +#endif 1.880 + // Say 4 args: 1.881 + // i st_off 1.882 + // 0 12 T_LONG 1.883 + // 1 8 T_VOID 1.884 + // 2 4 T_OBJECT 1.885 + // 3 0 T_BOOL 1.886 + VMReg r_1 = regs[i].first(); 1.887 + VMReg r_2 = regs[i].second(); 1.888 + if (!r_1->is_valid()) { 1.889 + assert(!r_2->is_valid(), ""); 1.890 + continue; 1.891 + } 1.892 + 1.893 + if (r_1->is_stack()) { 1.894 + // memory to memory use fpu stack top 1.895 + int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace; 1.896 +#ifdef aoqi_test 1.897 +tty->print_cr(" AdapterGenerator::%s :%d, r_1->is_stack, ld_off:%x", __func__, __LINE__, ld_off); 1.898 +#endif 1.899 + 1.900 + if (!r_2->is_valid()) { 1.901 +#ifdef aoqi_test 1.902 +tty->print_cr(" AdapterGenerator::%s :%d, !r_2->is_valid, ld_off:%x", __func__, __LINE__, ld_off); 1.903 +#endif 1.904 + __ ld_ptr(AT, SP, ld_off); 1.905 + __ st_ptr(AT, SP, st_off); 1.906 + //tag_stack(sig_bt[i], st_off); 1.907 + } else { 1.908 +#ifdef aoqi_test 1.909 +tty->print_cr(" AdapterGenerator::%s :%d, r_2->is_valid, ld_off:%x", __func__, __LINE__, ld_off); 1.910 +#endif 1.911 + 1.912 + // ld_off == LSW, ld_off+VMRegImpl::stack_slot_size == MSW 1.913 + // st_off == MSW, st_off-wordSize == LSW 1.914 + 1.915 + int next_off = st_off - Interpreter::stackElementSize; 1.916 + /* 1.917 + __ lw(AT, SP, ld_off); 1.918 + __ sw(AT, SP, next_off); 1.919 + __ lw(AT, SP, ld_off + wordSize); 1.920 + __ sw(AT, SP, st_off); 1.921 + */ 1.922 + __ ld_ptr(AT, SP, ld_off); 1.923 + __ st_ptr(AT, SP, st_off); 1.924 + 1.925 + /* Ref to is_Register condition */ 1.926 + if(sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) 1.927 + __ st_ptr(AT,SP,st_off - 8); 1.928 + //tag_stack(sig_bt[i], next_off); 1.929 + } 1.930 + } else if (r_1->is_Register()) { 1.931 + Register r = r_1->as_Register(); 1.932 + if (!r_2->is_valid()) { 1.933 +#ifdef aoqi_test 1.934 +tty->print_cr(" AdapterGenerator::%s :%d, r_1->is_Register, !r_2->is_valid, st_off: %lx", __func__, __LINE__, st_off); 1.935 +#endif 1.936 + // __ movl(Address(esp, st_off), r); 1.937 + __ sd(r,SP, st_off); //aoqi_test FIXME 1.938 + //tag_stack(sig_bt[i], st_off); 1.939 + } else { 1.940 +#ifdef aoqi_test 1.941 +tty->print_cr(" AdapterGenerator::%s :%d, r_1->is_Register, r_2->is_valid, st_off: %lx", __func__, __LINE__, st_off); 1.942 +#endif 1.943 + //FIXME, mips will not enter here 1.944 + // long/double in gpr 1.945 + __ sd(r,SP, st_off); //aoqi_test FIXME 1.946 +/* Jin: In [java/util/zip/ZipFile.java] 1.947 + 1.948 + private static native long open(String name, int mode, long lastModified); 1.949 + private static native int getTotal(long jzfile); 1.950 + * 1.951 + * We need to transfer T_LONG paramenters from a compiled method to a native method. 1.952 + * It's a complex process: 1.953 + * 1.954 + * Caller -> lir_static_call -> gen_resolve_stub 1.955 + -> -- resolve_static_call_C 1.956 + `- gen_c2i_adapter() [*] 1.957 + | 1.958 + `- AdapterHandlerLibrary::get_create_apapter_index 1.959 + -> generate_native_entry 1.960 + -> InterpreterRuntime::SignatureHandlerGenerator::pass_long [**] 1.961 + 1.962 + * In [**], T_Long parameter is stored in stack as: 1.963 + 1.964 + (high) 1.965 + | | 1.966 + ----------- 1.967 + | 8 bytes | 1.968 + | (void) | 1.969 + ----------- 1.970 + | 8 bytes | 1.971 + | (long) | 1.972 + ----------- 1.973 + | | 1.974 + (low) 1.975 + * 1.976 + * However, the sequence is reversed here: 1.977 + * 1.978 + (high) 1.979 + | | 1.980 + ----------- 1.981 + | 8 bytes | 1.982 + | (long) | 1.983 + ----------- 1.984 + | 8 bytes | 1.985 + | (void) | 1.986 + ----------- 1.987 + | | 1.988 + (low) 1.989 + * 1.990 + * So I stored another 8 bytes in the T_VOID slot. It then can be accessed from generate_native_entry(). 1.991 + */ 1.992 + if (sig_bt[i] == T_LONG) 1.993 + __ sd(r,SP, st_off - 8); 1.994 + // ShouldNotReachHere(); 1.995 + // int next_off = st_off - Interpreter::stackElementSize; 1.996 + // __ sw(r_2->as_Register(),SP, st_off); 1.997 + // __ sw(r,SP, next_off); 1.998 + // tag_stack(masm, sig_bt[i], next_off); 1.999 + } 1.1000 + } else if (r_1->is_FloatRegister()) { 1.1001 + assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register"); 1.1002 + 1.1003 + FloatRegister fr = r_1->as_FloatRegister(); 1.1004 + if (sig_bt[i] == T_FLOAT) 1.1005 + __ swc1(fr,SP, st_off); 1.1006 + else 1.1007 + { 1.1008 + __ sdc1(fr,SP, st_off); 1.1009 + __ sdc1(fr,SP, st_off - 8); /* T_DOUBLE needs two slots */ 1.1010 + } 1.1011 + } 1.1012 + } 1.1013 + 1.1014 + // Schedule the branch target address early. 1.1015 + __ ld_ptr(AT, Rmethod,in_bytes(Method::interpreter_entry_offset()) ); 1.1016 + // And repush original return address 1.1017 + __ move(RA, V0); 1.1018 + __ jr (AT); 1.1019 + __ delayed()->nop(); 1.1020 +} 1.1021 + 1.1022 +void AdapterGenerator::gen_i2c_adapter( 1.1023 + int total_args_passed, 1.1024 + // VMReg max_arg, 1.1025 + int comp_args_on_stack, // VMRegStackSlots 1.1026 + const BasicType *sig_bt, 1.1027 + const VMRegPair *regs) { 1.1028 + 1.1029 + // Generate an I2C adapter: adjust the I-frame to make space for the C-frame 1.1030 + // layout. Lesp was saved by the calling I-frame and will be restored on 1.1031 + // return. Meanwhile, outgoing arg space is all owned by the callee 1.1032 + // C-frame, so we can mangle it at will. After adjusting the frame size, 1.1033 + // hoist register arguments and repack other args according to the compiled 1.1034 + // code convention. Finally, end in a jump to the compiled code. The entry 1.1035 + // point address is the start of the buffer. 1.1036 + 1.1037 + // We will only enter here from an interpreted frame and never from after 1.1038 + // passing thru a c2i. Azul allowed this but we do not. If we lose the 1.1039 + // race and use a c2i we will remain interpreted for the race loser(s). 1.1040 + // This removes all sorts of headaches on the mips side and also eliminates 1.1041 + // the possibility of having c2i -> i2c -> c2i -> ... endless transitions. 1.1042 + 1.1043 + 1.1044 + __ move(T9, SP); 1.1045 + 1.1046 + // Cut-out for having no stack args. Since up to 2 int/oop args are passed 1.1047 + // in registers, we will occasionally have no stack args. 1.1048 + int comp_words_on_stack = 0; 1.1049 + if (comp_args_on_stack) { 1.1050 + // Sig words on the stack are greater-than VMRegImpl::stack0. Those in 1.1051 + // registers are below. By subtracting stack0, we either get a negative 1.1052 + // number (all values in registers) or the maximum stack slot accessed. 1.1053 + // int comp_args_on_stack = VMRegImpl::reg2stack(max_arg); 1.1054 + // Convert 4-byte stack slots to words. 1.1055 + // did mips need round? FIXME aoqi 1.1056 + comp_words_on_stack = round_to(comp_args_on_stack*4, wordSize)>>LogBytesPerWord; 1.1057 + // Round up to miminum stack alignment, in wordSize 1.1058 + comp_words_on_stack = round_to(comp_words_on_stack, 2); 1.1059 + __ daddi(SP, SP, -comp_words_on_stack * wordSize); 1.1060 + } 1.1061 + 1.1062 + // Align the outgoing SP 1.1063 + __ move(AT, -(StackAlignmentInBytes)); 1.1064 + __ andr(SP, SP, AT); 1.1065 + // push the return address on the stack (note that pushing, rather 1.1066 + // than storing it, yields the correct frame alignment for the callee) 1.1067 + // Put saved SP in another register 1.1068 + // const Register saved_sp = eax; 1.1069 + const Register saved_sp = V0; 1.1070 + __ move(saved_sp, T9); 1.1071 + 1.1072 + 1.1073 + // Will jump to the compiled code just as if compiled code was doing it. 1.1074 + // Pre-load the register-jump target early, to schedule it better. 1.1075 + __ ld(T9, Rmethod, in_bytes(Method::from_compiled_offset())); 1.1076 + 1.1077 + // Now generate the shuffle code. Pick up all register args and move the 1.1078 + // rest through the floating point stack top. 1.1079 + for (int i = 0; i < total_args_passed; i++) { 1.1080 + if (sig_bt[i] == T_VOID) { 1.1081 + // Longs and doubles are passed in native word order, but misaligned 1.1082 + // in the 32-bit build. 1.1083 + assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); 1.1084 + continue; 1.1085 + } 1.1086 + 1.1087 + // Pick up 0, 1 or 2 words from SP+offset. 1.1088 + 1.1089 + //FIXME. aoqi. just delete the assert 1.1090 + //assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "scrambled load targets?"); 1.1091 + // Load in argument order going down. 1.1092 + int ld_off = (total_args_passed -1 - i)*Interpreter::stackElementSize; 1.1093 + // Point to interpreter value (vs. tag) 1.1094 + int next_off = ld_off - Interpreter::stackElementSize; 1.1095 + // 1.1096 + // 1.1097 + // 1.1098 + VMReg r_1 = regs[i].first(); 1.1099 + VMReg r_2 = regs[i].second(); 1.1100 + if (!r_1->is_valid()) { 1.1101 + assert(!r_2->is_valid(), ""); 1.1102 + continue; 1.1103 + } 1.1104 +#ifdef aoqi_test 1.1105 +tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d, ld_off:%d, next_off: %d", __func__, __LINE__, i, sig_bt[i], total_args_passed, ld_off, next_off); 1.1106 +#endif 1.1107 + if (r_1->is_stack()) { 1.1108 + // Convert stack slot to an SP offset (+ wordSize to 1.1109 + // account for return address ) 1.1110 + //NOTICE HERE!!!! I sub a wordSize here 1.1111 + int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size; 1.1112 + //+ wordSize; 1.1113 + 1.1114 + // We can use esi as a temp here because compiled code doesn't 1.1115 + // need esi as an input 1.1116 + // and if we end up going thru a c2i because of a miss a reasonable 1.1117 + // value of esi 1.1118 + // we be generated. 1.1119 + if (!r_2->is_valid()) { 1.1120 +#ifdef aoqi_test 1.1121 +tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d r_1->is_stack() !r_2->is_valid(), st_off:%d", __func__, __LINE__, i, sig_bt[i], total_args_passed, st_off); 1.1122 +#endif 1.1123 + __ ld(AT, saved_sp, ld_off); 1.1124 + __ sd(AT, SP, st_off); 1.1125 + } else { 1.1126 +#ifdef aoqi_test 1.1127 +tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d r_1->is_stack() r_2->is_valid(), st_off:%d", __func__, __LINE__, i, sig_bt[i], total_args_passed, st_off); 1.1128 +#endif 1.1129 + // Interpreter local[n] == MSW, local[n+1] == LSW however locals 1.1130 + // are accessed as negative so LSW is at LOW address 1.1131 + 1.1132 + // ld_off is MSW so get LSW 1.1133 + // st_off is LSW (i.e. reg.first()) 1.1134 + /* 1.1135 + __ ld(AT, saved_sp, next_off); 1.1136 + __ sd(AT, SP, st_off); 1.1137 + __ ld(AT, saved_sp, ld_off); 1.1138 + __ sd(AT, SP, st_off + wordSize); 1.1139 + */ 1.1140 + 1.1141 + /* 2012/4/9 Jin 1.1142 + * [./org/eclipse/swt/graphics/GC.java] 1.1143 + * void drawImageXRender(Image srcImage, int srcX, int srcY, int srcWidth, int srcHeight, 1.1144 + int destX, int destY, int destWidth, int destHeight, 1.1145 + boolean simple, 1.1146 + int imgWidth, int imgHeight, 1.1147 + long maskPixmap, <-- Pass T_LONG in stack 1.1148 + int maskType); 1.1149 + * Before this modification, Eclipse displays icons with solid black background. 1.1150 + */ 1.1151 + __ ld(AT, saved_sp, ld_off); 1.1152 + if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) 1.1153 + __ ld(AT, saved_sp, ld_off - 8); 1.1154 + __ sd(AT, SP, st_off); 1.1155 + //__ ld(AT, saved_sp, next_off); 1.1156 + //__ sd(AT, SP, st_off + wordSize); 1.1157 + } 1.1158 + } else if (r_1->is_Register()) { // Register argument 1.1159 + Register r = r_1->as_Register(); 1.1160 + // assert(r != eax, "must be different"); 1.1161 + if (r_2->is_valid()) { 1.1162 +#ifdef aoqi_test 1.1163 +tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d r_1->is_Register() r_2->is_valid()", __func__, __LINE__, i, sig_bt[i], total_args_passed); 1.1164 +#endif 1.1165 + // assert(r_2->as_Register() != eax, "need another temporary register"); 1.1166 + // Remember r_1 is low address (and LSB on mips) 1.1167 + // So r_2 gets loaded from high address regardless of the platform 1.1168 + //aoqi 1.1169 + assert(r_2->as_Register() == r_1->as_Register(), ""); 1.1170 + //__ ld(r_2->as_Register(), saved_sp, ld_off); 1.1171 + //__ ld(r, saved_sp, next_off); 1.1172 + __ ld(r, saved_sp, ld_off); 1.1173 + 1.1174 +/* Jin: 1.1175 + * 1.1176 + * For T_LONG type, the real layout is as below: 1.1177 + 1.1178 + (high) 1.1179 + | | 1.1180 + ----------- 1.1181 + | 8 bytes | 1.1182 + | (void) | 1.1183 + ----------- 1.1184 + | 8 bytes | 1.1185 + | (long) | 1.1186 + ----------- 1.1187 + | | 1.1188 + (low) 1.1189 + * 1.1190 + * We should load the low-8 bytes. 1.1191 + */ 1.1192 + if (sig_bt[i] == T_LONG) 1.1193 + __ ld(r, saved_sp, ld_off - 8); 1.1194 + } else { 1.1195 +#ifdef aoqi_test 1.1196 +tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d r_1->is_Register() !r_2->is_valid()", __func__, __LINE__, i, sig_bt[i], total_args_passed); 1.1197 +#endif 1.1198 + __ lw(r, saved_sp, ld_off); 1.1199 + } 1.1200 + } else if (r_1->is_FloatRegister()) { // Float Register 1.1201 + assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register"); 1.1202 + 1.1203 + FloatRegister fr = r_1->as_FloatRegister(); 1.1204 + if (sig_bt[i] == T_FLOAT) 1.1205 + __ lwc1(fr, saved_sp, ld_off); 1.1206 + else 1.1207 + { 1.1208 + __ ldc1(fr, saved_sp, ld_off); 1.1209 + __ ldc1(fr, saved_sp, ld_off - 8); 1.1210 + } 1.1211 + } 1.1212 + } 1.1213 + 1.1214 + // 6243940 We might end up in handle_wrong_method if 1.1215 + // the callee is deoptimized as we race thru here. If that 1.1216 + // happens we don't want to take a safepoint because the 1.1217 + // caller frame will look interpreted and arguments are now 1.1218 + // "compiled" so it is much better to make this transition 1.1219 + // invisible to the stack walking code. Unfortunately if 1.1220 + // we try and find the callee by normal means a safepoint 1.1221 + // is possible. So we stash the desired callee in the thread 1.1222 + // and the vm will find there should this case occur. 1.1223 + __ get_thread(T8); 1.1224 + __ sd(Rmethod, T8, in_bytes(JavaThread::callee_target_offset())); 1.1225 + 1.1226 + // move methodOop to eax in case we end up in an c2i adapter. 1.1227 + // the c2i adapters expect methodOop in eax (c2) because c2's 1.1228 + // resolve stubs return the result (the method) in eax. 1.1229 + // I'd love to fix this. 1.1230 + __ move(V0, Rmethod); 1.1231 + __ jr(T9); 1.1232 + __ delayed()->nop(); 1.1233 +} 1.1234 + 1.1235 +// --------------------------------------------------------------- 1.1236 +AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, 1.1237 + int total_args_passed, 1.1238 + // VMReg max_arg, 1.1239 + int comp_args_on_stack, // VMRegStackSlots 1.1240 + const BasicType *sig_bt, 1.1241 + const VMRegPair *regs, 1.1242 + AdapterFingerPrint* fingerprint) { 1.1243 + address i2c_entry = __ pc(); 1.1244 + 1.1245 + AdapterGenerator agen(masm); 1.1246 + 1.1247 + agen.gen_i2c_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs); 1.1248 + 1.1249 + 1.1250 + // ------------------------------------------------------------------------- 1.1251 + // Generate a C2I adapter. On entry we know G5 holds the methodOop. The 1.1252 + // args start out packed in the compiled layout. They need to be unpacked 1.1253 + // into the interpreter layout. This will almost always require some stack 1.1254 + // space. We grow the current (compiled) stack, then repack the args. We 1.1255 + // finally end in a jump to the generic interpreter entry point. On exit 1.1256 + // from the interpreter, the interpreter will restore our SP (lest the 1.1257 + // compiled code, which relys solely on SP and not FP, get sick). 1.1258 + 1.1259 + address c2i_unverified_entry = __ pc(); 1.1260 + Label skip_fixup; 1.1261 + { 1.1262 + Register holder = T1; 1.1263 + Register receiver = T0; 1.1264 + Register temp = T8; 1.1265 + address ic_miss = SharedRuntime::get_ic_miss_stub(); 1.1266 + 1.1267 + Label missed; 1.1268 + 1.1269 + __ verify_oop(holder); 1.1270 + // __ movl(temp, Address(receiver, oopDesc::klass_offset_in_bytes())); 1.1271 + //__ ld_ptr(temp, receiver, oopDesc::klass_offset_in_bytes()); 1.1272 + //add for compressedoops 1.1273 + __ load_klass(temp, receiver); 1.1274 + __ verify_oop(temp); 1.1275 + 1.1276 + // __ cmpl(temp, Address(holder, CompiledICHolder::holder_klass_offset())); 1.1277 + __ ld_ptr(AT, holder, CompiledICHolder::holder_klass_offset()); 1.1278 + //__ movl(ebx, Address(holder, CompiledICHolder::holder_method_offset())); 1.1279 + __ ld_ptr(Rmethod, holder, CompiledICHolder::holder_method_offset()); 1.1280 + //__ jcc(Assembler::notEqual, missed); 1.1281 + __ bne(AT, temp, missed); 1.1282 + __ delayed()->nop(); 1.1283 + // Method might have been compiled since the call site was patched to 1.1284 + // interpreted if that is the case treat it as a miss so we can get 1.1285 + // the call site corrected. 1.1286 + //__ cmpl(Address(ebx, in_bytes(Method::code_offset())), NULL_WORD); 1.1287 + //__ jcc(Assembler::equal, skip_fixup); 1.1288 + __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset())); 1.1289 + __ beq(AT, R0, skip_fixup); 1.1290 + __ delayed()->nop(); 1.1291 + __ bind(missed); 1.1292 + // __ move(AT, (int)&jerome7); 1.1293 + // __ sw(RA, AT, 0); 1.1294 + 1.1295 + __ jmp(ic_miss, relocInfo::runtime_call_type); 1.1296 + __ delayed()->nop(); 1.1297 + } 1.1298 + 1.1299 + address c2i_entry = __ pc(); 1.1300 + 1.1301 + agen.gen_c2i_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); 1.1302 + 1.1303 + __ flush(); 1.1304 + return AdapterHandlerLibrary::new_entry(fingerprint,i2c_entry, c2i_entry, c2i_unverified_entry); 1.1305 + 1.1306 +} 1.1307 +/* 1.1308 +// Helper function for native calling conventions 1.1309 +static VMReg int_stk_helper( int i ) { 1.1310 + // Bias any stack based VMReg we get by ignoring the window area 1.1311 + // but not the register parameter save area. 1.1312 + // 1.1313 + // This is strange for the following reasons. We'd normally expect 1.1314 + // the calling convention to return an VMReg for a stack slot 1.1315 + // completely ignoring any abi reserved area. C2 thinks of that 1.1316 + // abi area as only out_preserve_stack_slots. This does not include 1.1317 + // the area allocated by the C abi to store down integer arguments 1.1318 + // because the java calling convention does not use it. So 1.1319 + // since c2 assumes that there are only out_preserve_stack_slots 1.1320 + // to bias the optoregs (which impacts VMRegs) when actually referencing any actual stack 1.1321 + // location the c calling convention must add in this bias amount 1.1322 + // to make up for the fact that the out_preserve_stack_slots is 1.1323 + // insufficient for C calls. What a mess. I sure hope those 6 1.1324 + // stack words were worth it on every java call! 1.1325 + 1.1326 + // Another way of cleaning this up would be for out_preserve_stack_slots 1.1327 + // to take a parameter to say whether it was C or java calling conventions. 1.1328 + // Then things might look a little better (but not much). 1.1329 + 1.1330 + int mem_parm_offset = i - SPARC_ARGS_IN_REGS_NUM; 1.1331 + if( mem_parm_offset < 0 ) { 1.1332 + return as_oRegister(i)->as_VMReg(); 1.1333 + } else { 1.1334 + int actual_offset = (mem_parm_offset + frame::memory_parameter_word_sp_offset) * VMRegImpl::slots_per_word; 1.1335 + // Now return a biased offset that will be correct when out_preserve_slots is added back in 1.1336 + return VMRegImpl::stack2reg(actual_offset - SharedRuntime::out_preserve_stack_slots()); 1.1337 + } 1.1338 +} 1.1339 +*/ 1.1340 + 1.1341 + 1.1342 +int SharedRuntime::c_calling_convention(const BasicType *sig_bt, 1.1343 + VMRegPair *regs, 1.1344 + VMRegPair *regs2, 1.1345 + int total_args_passed) { 1.1346 + assert(regs2 == NULL, "not needed on MIPS"); 1.1347 +#ifdef aoqi_test 1.1348 +tty->print_cr(" SharedRuntime::%s :%d total_args_passed:%d", __func__, __LINE__, total_args_passed); 1.1349 +#endif 1.1350 + // Return the number of VMReg stack_slots needed for the args. 1.1351 + // This value does not include an abi space (like register window 1.1352 + // save area). 1.1353 + 1.1354 + // The native convention is V8 if !LP64 1.1355 + // The LP64 convention is the V9 convention which is slightly more sane. 1.1356 + 1.1357 + // We return the amount of VMReg stack slots we need to reserve for all 1.1358 + // the arguments NOT counting out_preserve_stack_slots. Since we always 1.1359 + // have space for storing at least 6 registers to memory we start with that. 1.1360 + // See int_stk_helper for a further discussion. 1.1361 + // We return the amount of VMRegImpl stack slots we need to reserve for all 1.1362 + // the arguments NOT counting out_preserve_stack_slots. 1.1363 + static const Register INT_ArgReg[Argument::n_register_parameters] = { 1.1364 + A0, A1, A2, A3, A4, A5, A6, A7 1.1365 + }; 1.1366 + static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = { 1.1367 + F12, F13, F14, F15, F16, F17, F18, F19 1.1368 + }; 1.1369 + uint args = 0; 1.1370 + uint stk_args = 0; // inc by 2 each time 1.1371 + 1.1372 +/* Example: 1.1373 +--- n java.lang.UNIXProcess::forkAndExec 1.1374 + private native int forkAndExec(byte[] prog, 1.1375 + byte[] argBlock, int argc, 1.1376 + byte[] envBlock, int envc, 1.1377 + byte[] dir, 1.1378 + boolean redirectErrorStream, 1.1379 + FileDescriptor stdin_fd, 1.1380 + FileDescriptor stdout_fd, 1.1381 + FileDescriptor stderr_fd) 1.1382 +JNIEXPORT jint JNICALL 1.1383 +Java_java_lang_UNIXProcess_forkAndExec(JNIEnv *env, 1.1384 + jobject process, 1.1385 + jbyteArray prog, 1.1386 + jbyteArray argBlock, jint argc, 1.1387 + jbyteArray envBlock, jint envc, 1.1388 + jbyteArray dir, 1.1389 + jboolean redirectErrorStream, 1.1390 + jobject stdin_fd, 1.1391 + jobject stdout_fd, 1.1392 + jobject stderr_fd) 1.1393 + 1.1394 +::c_calling_convention 1.1395 +0: // env <-- a0 1.1396 +1: L // klass/obj <-- t0 => a1 1.1397 +2: [ // prog[] <-- a0 => a2 1.1398 +3: [ // argBlock[] <-- a1 => a3 1.1399 +4: I // argc 1.1400 +5: [ // envBlock[] <-- a3 => a5 1.1401 +6: I // envc 1.1402 +7: [ // dir[] <-- a5 => a7 1.1403 +8: Z // redirectErrorStream a6 => sp[0] 1.1404 +9: L // stdin a7 => sp[8] 1.1405 +10: L // stdout fp[16] => sp[16] 1.1406 +11: L // stderr fp[24] => sp[24] 1.1407 +*/ 1.1408 + for (int i = 0; i < total_args_passed; i++) { 1.1409 + switch (sig_bt[i]) { 1.1410 + case T_VOID: // Halves of longs and doubles 1.1411 + assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); 1.1412 + regs[i].set_bad(); 1.1413 + break; 1.1414 + case T_BOOLEAN: 1.1415 + case T_CHAR: 1.1416 + case T_BYTE: 1.1417 + case T_SHORT: 1.1418 + case T_INT: 1.1419 + if (args < Argument::n_register_parameters) { 1.1420 + regs[i].set1(INT_ArgReg[args++]->as_VMReg()); 1.1421 + } else { 1.1422 + regs[i].set1(VMRegImpl::stack2reg(stk_args)); 1.1423 + stk_args += 2; 1.1424 + } 1.1425 + break; 1.1426 + case T_LONG: 1.1427 + assert(sig_bt[i + 1] == T_VOID, "expecting half"); 1.1428 + // fall through 1.1429 + case T_OBJECT: 1.1430 + case T_ARRAY: 1.1431 + case T_ADDRESS: 1.1432 + case T_METADATA: 1.1433 + if (args < Argument::n_register_parameters) { 1.1434 + regs[i].set2(INT_ArgReg[args++]->as_VMReg()); 1.1435 + } else { 1.1436 + regs[i].set2(VMRegImpl::stack2reg(stk_args)); 1.1437 + stk_args += 2; 1.1438 + } 1.1439 + break; 1.1440 + case T_FLOAT: 1.1441 + if (args < Argument::n_float_register_parameters) { 1.1442 + regs[i].set1(FP_ArgReg[args++]->as_VMReg()); 1.1443 + } else { 1.1444 + regs[i].set1(VMRegImpl::stack2reg(stk_args)); 1.1445 + stk_args += 2; 1.1446 + } 1.1447 + break; 1.1448 + case T_DOUBLE: 1.1449 + assert(sig_bt[i + 1] == T_VOID, "expecting half"); 1.1450 + if (args < Argument::n_float_register_parameters) { 1.1451 + regs[i].set2(FP_ArgReg[args++]->as_VMReg()); 1.1452 + } else { 1.1453 + regs[i].set2(VMRegImpl::stack2reg(stk_args)); 1.1454 + stk_args += 2; 1.1455 + } 1.1456 + break; 1.1457 + default: 1.1458 + ShouldNotReachHere(); 1.1459 + break; 1.1460 + } 1.1461 + } 1.1462 + 1.1463 + return round_to(stk_args, 2); 1.1464 +} 1.1465 +/* 1.1466 +int SharedRuntime::c_calling_convention_jni(const BasicType *sig_bt, 1.1467 + VMRegPair *regs, 1.1468 + int total_args_passed) { 1.1469 +// We return the amount of VMRegImpl stack slots we need to reserve for all 1.1470 +// the arguments NOT counting out_preserve_stack_slots. 1.1471 + bool unalign = 0; 1.1472 + uint stack = 0; // All arguments on stack 1.1473 +#ifdef aoqi_test 1.1474 +tty->print_cr(" SharedRuntime::%s :%d total_args_passed:%d", __func__, __LINE__, total_args_passed); 1.1475 +#endif 1.1476 + 1.1477 + for( int i = 0; i < total_args_passed; i++) { 1.1478 + // From the type and the argument number (count) compute the location 1.1479 + switch( sig_bt[i] ) { 1.1480 + case T_BOOLEAN: 1.1481 + case T_CHAR: 1.1482 + case T_FLOAT: 1.1483 + case T_BYTE: 1.1484 + case T_SHORT: 1.1485 + case T_INT: 1.1486 + case T_OBJECT: 1.1487 + case T_ARRAY: 1.1488 + case T_ADDRESS: 1.1489 + regs[i].set1(VMRegImpl::stack2reg(stack++)); 1.1490 + unalign = !unalign; 1.1491 + break; 1.1492 + case T_LONG: 1.1493 + case T_DOUBLE: // The stack numbering is reversed from Java 1.1494 + // Since C arguments do not get reversed, the ordering for 1.1495 + // doubles on the stack must be opposite the Java convention 1.1496 + assert(sig_bt[i+1] == T_VOID, "missing Half" ); 1.1497 + if(unalign){ 1.1498 + stack += 1; 1.1499 + unalign = ! unalign; 1.1500 + } 1.1501 + regs[i].set2(VMRegImpl::stack2reg(stack)); 1.1502 + stack += 2; 1.1503 + break; 1.1504 + case T_VOID: regs[i].set_bad(); break; 1.1505 + default: 1.1506 + ShouldNotReachHere(); 1.1507 + break; 1.1508 + } 1.1509 + } 1.1510 + return stack; 1.1511 +} 1.1512 +*/ 1.1513 + 1.1514 +// --------------------------------------------------------------------------- 1.1515 +void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { 1.1516 + // We always ignore the frame_slots arg and just use the space just below frame pointer 1.1517 + // which by this time is free to use 1.1518 + switch (ret_type) { 1.1519 + case T_FLOAT: 1.1520 + __ swc1(FSF, FP, -wordSize); 1.1521 + break; 1.1522 + case T_DOUBLE: 1.1523 + __ sdc1(FSF, FP, -wordSize ); 1.1524 + break; 1.1525 + case T_VOID: break; 1.1526 + case T_LONG: 1.1527 + __ sd(V0, FP, -wordSize); 1.1528 + break; 1.1529 + case T_OBJECT: 1.1530 + case T_ARRAY: 1.1531 + __ sd(V0, FP, -wordSize); 1.1532 + break; 1.1533 + default: { 1.1534 + __ sw(V0, FP, -wordSize); 1.1535 + } 1.1536 + } 1.1537 +} 1.1538 + 1.1539 +void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { 1.1540 + // We always ignore the frame_slots arg and just use the space just below frame pointer 1.1541 + // which by this time is free to use 1.1542 + switch (ret_type) { 1.1543 + case T_FLOAT: 1.1544 + __ lwc1(FSF, FP, -wordSize); 1.1545 + break; 1.1546 + case T_DOUBLE: 1.1547 + __ ldc1(FSF, FP, -wordSize ); 1.1548 + break; 1.1549 + case T_LONG: 1.1550 + __ ld(V0, FP, -wordSize); 1.1551 + break; 1.1552 + case T_VOID: break; 1.1553 + case T_OBJECT: 1.1554 + case T_ARRAY: 1.1555 + __ ld(V0, FP, -wordSize); 1.1556 + break; 1.1557 + default: { 1.1558 + __ lw(V0, FP, -wordSize); 1.1559 + } 1.1560 + } 1.1561 +} 1.1562 + 1.1563 +static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { 1.1564 + for ( int i = first_arg ; i < arg_count ; i++ ) { 1.1565 + if (args[i].first()->is_Register()) { 1.1566 + __ push(args[i].first()->as_Register()); 1.1567 + } else if (args[i].first()->is_FloatRegister()) { 1.1568 + __ push(args[i].first()->as_FloatRegister()); 1.1569 + } 1.1570 + } 1.1571 +} 1.1572 + 1.1573 +static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { 1.1574 + for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) { 1.1575 + if (args[i].first()->is_Register()) { 1.1576 + __ pop(args[i].first()->as_Register()); 1.1577 + } else if (args[i].first()->is_FloatRegister()) { 1.1578 + __ pop(args[i].first()->as_FloatRegister()); 1.1579 + } 1.1580 + } 1.1581 +} 1.1582 + 1.1583 +// A simple move of integer like type 1.1584 +static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { 1.1585 + if (src.first()->is_stack()) { 1.1586 + if (dst.first()->is_stack()) { 1.1587 + // stack to stack 1.1588 + __ lw(AT, FP, reg2offset_in(src.first())); 1.1589 + __ sd(AT,SP, reg2offset_out(dst.first())); 1.1590 + } else { 1.1591 + // stack to reg 1.1592 + //__ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); 1.1593 + __ lw(dst.first()->as_Register(), FP, reg2offset_in(src.first())); 1.1594 + } 1.1595 + } else if (dst.first()->is_stack()) { 1.1596 + // reg to stack 1.1597 + __ sd(src.first()->as_Register(), SP, reg2offset_out(dst.first())); 1.1598 + } else { 1.1599 + //__ mov(src.first()->as_Register(), dst.first()->as_Register()); 1.1600 + if (dst.first() != src.first()){ 1.1601 + __ move(dst.first()->as_Register(), src.first()->as_Register()); // fujie error:dst.first() 1.1602 + } 1.1603 + } 1.1604 +} 1.1605 +/* 1.1606 +// On 64 bit we will store integer like items to the stack as 1.1607 +// 64 bits items (sparc abi) even though java would only store 1.1608 +// 32bits for a parameter. On 32bit it will simply be 32 bits 1.1609 +// So this routine will do 32->32 on 32bit and 32->64 on 64bit 1.1610 +static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { 1.1611 + if (src.first()->is_stack()) { 1.1612 + if (dst.first()->is_stack()) { 1.1613 + // stack to stack 1.1614 + __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5); 1.1615 + __ st_ptr(L5, SP, reg2offset(dst.first()) + STACK_BIAS); 1.1616 + } else { 1.1617 + // stack to reg 1.1618 + __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); 1.1619 + } 1.1620 + } else if (dst.first()->is_stack()) { 1.1621 + // reg to stack 1.1622 + __ st_ptr(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS); 1.1623 + } else { 1.1624 + __ mov(src.first()->as_Register(), dst.first()->as_Register()); 1.1625 + } 1.1626 +} 1.1627 +*/ 1.1628 + 1.1629 +// An oop arg. Must pass a handle not the oop itself 1.1630 +static void object_move(MacroAssembler* masm, 1.1631 + OopMap* map, 1.1632 + int oop_handle_offset, 1.1633 + int framesize_in_slots, 1.1634 + VMRegPair src, 1.1635 + VMRegPair dst, 1.1636 + bool is_receiver, 1.1637 + int* receiver_offset) { 1.1638 + 1.1639 + // must pass a handle. First figure out the location we use as a handle 1.1640 + 1.1641 + //FIXME, for mips, dst can be register 1.1642 + if (src.first()->is_stack()) { 1.1643 + // Oop is already on the stack as an argument 1.1644 + Register rHandle = V0; 1.1645 + Label nil; 1.1646 + //__ xorl(rHandle, rHandle); 1.1647 + __ xorr(rHandle, rHandle, rHandle); 1.1648 + //__ cmpl(Address(ebp, reg2offset_in(src.first())), NULL_WORD); 1.1649 + __ ld(AT, FP, reg2offset_in(src.first())); 1.1650 + //__ jcc(Assembler::equal, nil); 1.1651 + __ beq(AT,R0, nil); 1.1652 + __ delayed()->nop(); 1.1653 + // __ leal(rHandle, Address(ebp, reg2offset_in(src.first()))); 1.1654 + __ lea(rHandle, Address(FP, reg2offset_in(src.first()))); 1.1655 + __ bind(nil); 1.1656 + //__ movl(Address(esp, reg2offset_out(dst.first())), rHandle); 1.1657 + if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first())); 1.1658 + else __ move( (dst.first())->as_Register(),rHandle); 1.1659 + //if dst is register 1.1660 + //FIXME, do mips need out preserve stack slots? 1.1661 + int offset_in_older_frame = src.first()->reg2stack() 1.1662 + + SharedRuntime::out_preserve_stack_slots(); 1.1663 + map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); 1.1664 + if (is_receiver) { 1.1665 + *receiver_offset = (offset_in_older_frame 1.1666 + + framesize_in_slots) * VMRegImpl::stack_slot_size; 1.1667 + } 1.1668 + } else { 1.1669 + // Oop is in an a register we must store it to the space we reserve 1.1670 + // on the stack for oop_handles 1.1671 + const Register rOop = src.first()->as_Register(); 1.1672 + assert( (rOop->encoding() >= A0->encoding()) && (rOop->encoding() <= T0->encoding()),"wrong register"); 1.1673 + // const Register rHandle = eax; 1.1674 + const Register rHandle = V0; 1.1675 + //Important: refer to java_calling_convertion 1.1676 + int oop_slot = (rOop->encoding() - A0->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset; 1.1677 + int offset = oop_slot*VMRegImpl::stack_slot_size; 1.1678 + Label skip; 1.1679 + // __ movl(Address(esp, offset), rOop); 1.1680 + __ sd( rOop , SP, offset ); 1.1681 + map->set_oop(VMRegImpl::stack2reg(oop_slot)); 1.1682 + // __ xorl(rHandle, rHandle); 1.1683 + __ xorr( rHandle, rHandle, rHandle); 1.1684 + //__ cmpl(rOop, NULL_WORD); 1.1685 + // __ jcc(Assembler::equal, skip); 1.1686 + __ beq(rOop, R0, skip); 1.1687 + __ delayed()->nop(); 1.1688 + // __ leal(rHandle, Address(esp, offset)); 1.1689 + __ lea(rHandle, Address(SP, offset)); 1.1690 + __ bind(skip); 1.1691 + // Store the handle parameter 1.1692 + //__ movl(Address(esp, reg2offset_out(dst.first())), rHandle); 1.1693 + if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first())); 1.1694 + else __ move((dst.first())->as_Register(), rHandle); 1.1695 + //if dst is register 1.1696 + 1.1697 + if (is_receiver) { 1.1698 + *receiver_offset = offset; 1.1699 + } 1.1700 + } 1.1701 +} 1.1702 + 1.1703 +// A float arg may have to do float reg int reg conversion 1.1704 +static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { 1.1705 + assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move"); 1.1706 + 1.1707 + if (src.first()->is_stack()) { 1.1708 + if(dst.first()->is_stack()){ 1.1709 + // __ movl(eax, Address(ebp, reg2offset_in(src.first()))); 1.1710 + __ lwc1(F12 , FP, reg2offset_in(src.first())); 1.1711 + // __ movl(Address(esp, reg2offset_out(dst.first())), eax); 1.1712 + __ swc1(F12 ,SP, reg2offset_out(dst.first())); 1.1713 + } 1.1714 + else 1.1715 + __ lwc1( dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first())); 1.1716 + } else { 1.1717 + // reg to stack 1.1718 + // __ movss(Address(esp, reg2offset_out(dst.first())), 1.1719 + // src.first()->as_XMMRegister()); 1.1720 + // __ movl(Address(esp, reg2offset_out(dst.first())), eax); 1.1721 + if(dst.first()->is_stack()) 1.1722 + __ swc1( src.first()->as_FloatRegister(),SP, reg2offset_out(dst.first())); 1.1723 + else 1.1724 + __ mov_s( dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); 1.1725 + } 1.1726 +} 1.1727 +/* 1.1728 +static void split_long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { 1.1729 + VMRegPair src_lo(src.first()); 1.1730 + VMRegPair src_hi(src.second()); 1.1731 + VMRegPair dst_lo(dst.first()); 1.1732 + VMRegPair dst_hi(dst.second()); 1.1733 + simple_move32(masm, src_lo, dst_lo); 1.1734 + simple_move32(masm, src_hi, dst_hi); 1.1735 +} 1.1736 +*/ 1.1737 +// A long move 1.1738 +static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { 1.1739 + 1.1740 + // The only legal possibility for a long_move VMRegPair is: 1.1741 + // 1: two stack slots (possibly unaligned) 1.1742 + // as neither the java or C calling convention will use registers 1.1743 + // for longs. 1.1744 + 1.1745 + if (src.first()->is_stack()) { 1.1746 + assert(src.second()->is_stack() && dst.second()->is_stack(), "must be all stack"); 1.1747 + // __ movl(eax, Address(ebp, reg2offset_in(src.first()))); 1.1748 + if( dst.first()->is_stack()){ 1.1749 + __ ld(AT, FP, reg2offset_in(src.first())); 1.1750 + // __ movl(ebx, address(ebp, reg2offset_in(src.second()))); 1.1751 + //__ lw(V0, FP, reg2offset_in(src.second())); 1.1752 + // __ movl(address(esp, reg2offset_out(dst.first())), eax); 1.1753 + __ sd(AT, SP, reg2offset_out(dst.first())); 1.1754 + // __ movl(address(esp, reg2offset_out(dst.second())), ebx); 1.1755 + //__ sw(V0, SP, reg2offset_out(dst.second())); 1.1756 + } else{ 1.1757 + __ ld( (dst.first())->as_Register() , FP, reg2offset_in(src.first())); 1.1758 + //__ lw( (dst.second())->as_Register(), FP, reg2offset_in(src.second())); 1.1759 + } 1.1760 + } else { 1.1761 + if( dst.first()->is_stack()){ 1.1762 + __ sd( (src.first())->as_Register(), SP, reg2offset_out(dst.first())); 1.1763 + //__ sw( (src.second())->as_Register(), SP, reg2offset_out(dst.second())); 1.1764 + } else{ 1.1765 + __ move( (dst.first())->as_Register() , (src.first())->as_Register()); 1.1766 + //__ move( (dst.second())->as_Register(), (src.second())->as_Register()); 1.1767 + } 1.1768 + } 1.1769 +} 1.1770 + 1.1771 +// A double move 1.1772 +static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { 1.1773 + 1.1774 + // The only legal possibilities for a double_move VMRegPair are: 1.1775 + // The painful thing here is that like long_move a VMRegPair might be 1.1776 + 1.1777 + // Because of the calling convention we know that src is either 1.1778 + // 1: a single physical register (xmm registers only) 1.1779 + // 2: two stack slots (possibly unaligned) 1.1780 + // dst can only be a pair of stack slots. 1.1781 + 1.1782 + // assert(dst.first()->is_stack() && (src.first()->is_XMMRegister() || 1.1783 + // src.first()->is_stack()), "bad args"); 1.1784 + // assert(dst.first()->is_stack() || src.first()->is_stack()), "bad args"); 1.1785 + 1.1786 + if (src.first()->is_stack()) { 1.1787 + // source is all stack 1.1788 + // __ movl(eax, Address(ebp, reg2offset_in(src.first()))); 1.1789 + if( dst.first()->is_stack()){ 1.1790 + __ ldc1(F12, FP, reg2offset_in(src.first())); 1.1791 + //__ movl(ebx, Address(ebp, reg2offset_in(src.second()))); 1.1792 + //__ lwc1(F14, FP, reg2offset_in(src.second())); 1.1793 + 1.1794 + // __ movl(Address(esp, reg2offset_out(dst.first())), eax); 1.1795 + __ sdc1(F12, SP, reg2offset_out(dst.first())); 1.1796 + // __ movl(Address(esp, reg2offset_out(dst.second())), ebx); 1.1797 + //__ swc1(F14, SP, reg2offset_out(dst.second())); 1.1798 + } else{ 1.1799 + __ ldc1( (dst.first())->as_FloatRegister(), FP, reg2offset_in(src.first())); 1.1800 + //__ lwc1( (dst.second())->as_FloatRegister(), FP, reg2offset_in(src.second())); 1.1801 + } 1.1802 + 1.1803 + } else { 1.1804 + // reg to stack 1.1805 + // No worries about stack alignment 1.1806 + // __ movsd(Address(esp, reg2offset_out(dst.first())), src.first()->as_XMMRegister()); 1.1807 + if( dst.first()->is_stack()){ 1.1808 + __ sdc1( src.first()->as_FloatRegister(),SP, reg2offset_out(dst.first())); 1.1809 + //__ swc1( src.second()->as_FloatRegister(),SP, reg2offset_out(dst.second())); 1.1810 + } 1.1811 + else 1.1812 + __ mov_d( dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); 1.1813 + //__ mov_s( dst.second()->as_FloatRegister(), src.second()->as_FloatRegister()); 1.1814 + 1.1815 + } 1.1816 +} 1.1817 + 1.1818 +static void verify_oop_args(MacroAssembler* masm, 1.1819 + methodHandle method, 1.1820 + const BasicType* sig_bt, 1.1821 + const VMRegPair* regs) { 1.1822 + Register temp_reg = T9; // not part of any compiled calling seq 1.1823 + if (VerifyOops) { 1.1824 + for (int i = 0; i < method->size_of_parameters(); i++) { 1.1825 + if (sig_bt[i] == T_OBJECT || 1.1826 + sig_bt[i] == T_ARRAY) { 1.1827 + VMReg r = regs[i].first(); 1.1828 + assert(r->is_valid(), "bad oop arg"); 1.1829 + if (r->is_stack()) { 1.1830 +// __ movptr(temp_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); 1.1831 + __ ld(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); 1.1832 + __ verify_oop(temp_reg); 1.1833 + } else { 1.1834 + __ verify_oop(r->as_Register()); 1.1835 + } 1.1836 + } 1.1837 + } 1.1838 + } 1.1839 +} 1.1840 + 1.1841 +static void gen_special_dispatch(MacroAssembler* masm, 1.1842 + methodHandle method, 1.1843 + const BasicType* sig_bt, 1.1844 + const VMRegPair* regs) { 1.1845 + verify_oop_args(masm, method, sig_bt, regs); 1.1846 + vmIntrinsics::ID iid = method->intrinsic_id(); 1.1847 + 1.1848 + // Now write the args into the outgoing interpreter space 1.1849 + bool has_receiver = false; 1.1850 + Register receiver_reg = noreg; 1.1851 + int member_arg_pos = -1; 1.1852 + Register member_reg = noreg; 1.1853 + int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); 1.1854 + if (ref_kind != 0) { 1.1855 + member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument 1.1856 +// member_reg = rbx; // known to be free at this point 1.1857 + member_reg = S3; // known to be free at this point 1.1858 + has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); 1.1859 + } else if (iid == vmIntrinsics::_invokeBasic) { 1.1860 + has_receiver = true; 1.1861 + } else { 1.1862 + fatal(err_msg_res("unexpected intrinsic id %d", iid)); 1.1863 + } 1.1864 + 1.1865 + if (member_reg != noreg) { 1.1866 + // Load the member_arg into register, if necessary. 1.1867 + SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); 1.1868 + VMReg r = regs[member_arg_pos].first(); 1.1869 + if (r->is_stack()) { 1.1870 +// __ movptr(member_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); 1.1871 + __ ld(member_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); 1.1872 + } else { 1.1873 + // no data motion is needed 1.1874 + member_reg = r->as_Register(); 1.1875 + } 1.1876 + } 1.1877 + 1.1878 + if (has_receiver) { 1.1879 + // Make sure the receiver is loaded into a register. 1.1880 + assert(method->size_of_parameters() > 0, "oob"); 1.1881 + assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); 1.1882 + VMReg r = regs[0].first(); 1.1883 + assert(r->is_valid(), "bad receiver arg"); 1.1884 + if (r->is_stack()) { 1.1885 + // Porting note: This assumes that compiled calling conventions always 1.1886 + // pass the receiver oop in a register. If this is not true on some 1.1887 + // platform, pick a temp and load the receiver from stack. 1.1888 + fatal("receiver always in a register"); 1.1889 +// receiver_reg = j_rarg0; // known to be free at this point 1.1890 + receiver_reg = SSR; // known to be free at this point 1.1891 +// __ movptr(receiver_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); 1.1892 + __ ld(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); 1.1893 + } else { 1.1894 + // no data motion is needed 1.1895 + receiver_reg = r->as_Register(); 1.1896 + } 1.1897 + } 1.1898 + 1.1899 + // Figure out which address we are really jumping to: 1.1900 + MethodHandles::generate_method_handle_dispatch(masm, iid, 1.1901 + receiver_reg, member_reg, /*for_compiler_entry:*/ true); 1.1902 +} 1.1903 + 1.1904 +// --------------------------------------------------------------------------- 1.1905 +// Generate a native wrapper for a given method. The method takes arguments 1.1906 +// in the Java compiled code convention, marshals them to the native 1.1907 +// convention (handlizes oops, etc), transitions to native, makes the call, 1.1908 +// returns to java state (possibly blocking), unhandlizes any result and 1.1909 +// returns. 1.1910 +nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm, 1.1911 + methodHandle method, 1.1912 + int compile_id, 1.1913 + BasicType *in_sig_bt, 1.1914 + VMRegPair *in_regs, 1.1915 + BasicType ret_type) { 1.1916 + 1.1917 + if (method->is_method_handle_intrinsic()) { 1.1918 + vmIntrinsics::ID iid = method->intrinsic_id(); 1.1919 + intptr_t start = (intptr_t)__ pc(); 1.1920 + int vep_offset = ((intptr_t)__ pc()) - start; 1.1921 + 1.1922 + gen_special_dispatch(masm, 1.1923 + method, 1.1924 + in_sig_bt, 1.1925 + in_regs); 1.1926 + 1.1927 + int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period 1.1928 + __ flush(); 1.1929 + int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually 1.1930 + return nmethod::new_native_nmethod(method, 1.1931 + compile_id, 1.1932 + masm->code(), 1.1933 + vep_offset, 1.1934 + frame_complete, 1.1935 + stack_slots / VMRegImpl::slots_per_word, 1.1936 + in_ByteSize(-1), 1.1937 + in_ByteSize(-1), 1.1938 + (OopMapSet*)NULL); 1.1939 + } 1.1940 + bool is_critical_native = true; 1.1941 + address native_func = method->critical_native_function(); 1.1942 + if (native_func == NULL) { 1.1943 + native_func = method->native_function(); 1.1944 + is_critical_native = false; 1.1945 + } 1.1946 + assert(native_func != NULL, "must have function"); 1.1947 + 1.1948 + // Native nmethod wrappers never take possesion of the oop arguments. 1.1949 + // So the caller will gc the arguments. The only thing we need an 1.1950 + // oopMap for is if the call is static 1.1951 + // 1.1952 + // An OopMap for lock (and class if static), and one for the VM call itself 1.1953 + OopMapSet *oop_maps = new OopMapSet(); 1.1954 + 1.1955 + // We have received a description of where all the java arg are located 1.1956 + // on entry to the wrapper. We need to convert these args to where 1.1957 + // the jni function will expect them. To figure out where they go 1.1958 + // we convert the java signature to a C signature by inserting 1.1959 + // the hidden arguments as arg[0] and possibly arg[1] (static method) 1.1960 + 1.1961 + const int total_in_args = method->size_of_parameters(); 1.1962 + int total_c_args = total_in_args; 1.1963 + if (!is_critical_native) { 1.1964 + total_c_args += 1; 1.1965 + if (method->is_static()) { 1.1966 + total_c_args++; 1.1967 + } 1.1968 + } else { 1.1969 + for (int i = 0; i < total_in_args; i++) { 1.1970 + if (in_sig_bt[i] == T_ARRAY) { 1.1971 + total_c_args++; 1.1972 + } 1.1973 + } 1.1974 + } 1.1975 + 1.1976 + BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); 1.1977 + VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); 1.1978 + BasicType* in_elem_bt = NULL; 1.1979 + 1.1980 + int argc = 0; 1.1981 + if (!is_critical_native) { 1.1982 + out_sig_bt[argc++] = T_ADDRESS; 1.1983 + if (method->is_static()) { 1.1984 + out_sig_bt[argc++] = T_OBJECT; 1.1985 + } 1.1986 + 1.1987 + for (int i = 0; i < total_in_args ; i++ ) { 1.1988 + out_sig_bt[argc++] = in_sig_bt[i]; 1.1989 + } 1.1990 + } else { 1.1991 + Thread* THREAD = Thread::current(); 1.1992 + in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args); 1.1993 + SignatureStream ss(method->signature()); 1.1994 + for (int i = 0; i < total_in_args ; i++ ) { 1.1995 + if (in_sig_bt[i] == T_ARRAY) { 1.1996 + // Arrays are passed as int, elem* pair 1.1997 + out_sig_bt[argc++] = T_INT; 1.1998 + out_sig_bt[argc++] = T_ADDRESS; 1.1999 + Symbol* atype = ss.as_symbol(CHECK_NULL); 1.2000 + const char* at = atype->as_C_string(); 1.2001 + if (strlen(at) == 2) { 1.2002 + assert(at[0] == '[', "must be"); 1.2003 + switch (at[1]) { 1.2004 + case 'B': in_elem_bt[i] = T_BYTE; break; 1.2005 + case 'C': in_elem_bt[i] = T_CHAR; break; 1.2006 + case 'D': in_elem_bt[i] = T_DOUBLE; break; 1.2007 + case 'F': in_elem_bt[i] = T_FLOAT; break; 1.2008 + case 'I': in_elem_bt[i] = T_INT; break; 1.2009 + case 'J': in_elem_bt[i] = T_LONG; break; 1.2010 + case 'S': in_elem_bt[i] = T_SHORT; break; 1.2011 + case 'Z': in_elem_bt[i] = T_BOOLEAN; break; 1.2012 + default: ShouldNotReachHere(); 1.2013 + } 1.2014 + } 1.2015 + } else { 1.2016 + out_sig_bt[argc++] = in_sig_bt[i]; 1.2017 + in_elem_bt[i] = T_VOID; 1.2018 + } 1.2019 + if (in_sig_bt[i] != T_VOID) { 1.2020 + assert(in_sig_bt[i] == ss.type(), "must match"); 1.2021 + ss.next(); 1.2022 + } 1.2023 + } 1.2024 + } 1.2025 + 1.2026 + // Now figure out where the args must be stored and how much stack space 1.2027 + // they require (neglecting out_preserve_stack_slots but space for storing 1.2028 + // the 1st six register arguments). It's weird see int_stk_helper. 1.2029 + // 1.2030 + int out_arg_slots; 1.2031 + //out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args); 1.2032 + out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); 1.2033 + 1.2034 + // Compute framesize for the wrapper. We need to handlize all oops in 1.2035 + // registers. We must create space for them here that is disjoint from 1.2036 + // the windowed save area because we have no control over when we might 1.2037 + // flush the window again and overwrite values that gc has since modified. 1.2038 + // (The live window race) 1.2039 + // 1.2040 + // We always just allocate 6 word for storing down these object. This allow 1.2041 + // us to simply record the base and use the Ireg number to decide which 1.2042 + // slot to use. (Note that the reg number is the inbound number not the 1.2043 + // outbound number). 1.2044 + // We must shuffle args to match the native convention, and include var-args space. 1.2045 + 1.2046 + // Calculate the total number of stack slots we will need. 1.2047 + 1.2048 + // First count the abi requirement plus all of the outgoing args 1.2049 + int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; 1.2050 + 1.2051 + // Now the space for the inbound oop handle area 1.2052 + int total_save_slots = 9 * VMRegImpl::slots_per_word; // 9 arguments passed in registers 1.2053 + if (is_critical_native) { 1.2054 + // Critical natives may have to call out so they need a save area 1.2055 + // for register arguments. 1.2056 + int double_slots = 0; 1.2057 + int single_slots = 0; 1.2058 + for ( int i = 0; i < total_in_args; i++) { 1.2059 + if (in_regs[i].first()->is_Register()) { 1.2060 + const Register reg = in_regs[i].first()->as_Register(); 1.2061 + switch (in_sig_bt[i]) { 1.2062 + case T_BOOLEAN: 1.2063 + case T_BYTE: 1.2064 + case T_SHORT: 1.2065 + case T_CHAR: 1.2066 + case T_INT: single_slots++; break; 1.2067 + case T_ARRAY: // specific to LP64 (7145024) 1.2068 + case T_LONG: double_slots++; break; 1.2069 + default: ShouldNotReachHere(); 1.2070 + } 1.2071 + } else if (in_regs[i].first()->is_FloatRegister()) { 1.2072 + switch (in_sig_bt[i]) { 1.2073 + case T_FLOAT: single_slots++; break; 1.2074 + case T_DOUBLE: double_slots++; break; 1.2075 + default: ShouldNotReachHere(); 1.2076 + } 1.2077 + } 1.2078 + } 1.2079 + total_save_slots = double_slots * 2 + single_slots; 1.2080 + // align the save area 1.2081 + if (double_slots != 0) { 1.2082 + stack_slots = round_to(stack_slots, 2); 1.2083 + } 1.2084 + } 1.2085 + 1.2086 + int oop_handle_offset = stack_slots; 1.2087 +// stack_slots += 9*VMRegImpl::slots_per_word; // T0, A0 ~ A7 1.2088 + stack_slots += total_save_slots; 1.2089 + 1.2090 + // Now any space we need for handlizing a klass if static method 1.2091 + 1.2092 + int klass_slot_offset = 0; 1.2093 + int klass_offset = -1; 1.2094 + int lock_slot_offset = 0; 1.2095 + bool is_static = false; 1.2096 + //int oop_temp_slot_offset = 0; 1.2097 + 1.2098 + if (method->is_static()) { 1.2099 + klass_slot_offset = stack_slots; 1.2100 + stack_slots += VMRegImpl::slots_per_word; 1.2101 + klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; 1.2102 + is_static = true; 1.2103 + } 1.2104 + 1.2105 + // Plus a lock if needed 1.2106 + 1.2107 + if (method->is_synchronized()) { 1.2108 + lock_slot_offset = stack_slots; 1.2109 + stack_slots += VMRegImpl::slots_per_word; 1.2110 + } 1.2111 + 1.2112 + // Now a place to save return value or as a temporary for any gpr -> fpr moves 1.2113 + // + 2 for return address (which we own) and saved ebp 1.2114 + //stack_slots += 2; 1.2115 + stack_slots += 2 + 9 * VMRegImpl::slots_per_word; // (T0, A0, A1, A2, A3, A4, A5, A6, A7) 1.2116 + 1.2117 + // Ok The space we have allocated will look like: 1.2118 + // 1.2119 + // 1.2120 + // FP-> | | 1.2121 + // |---------------------| 1.2122 + // | 2 slots for moves | 1.2123 + // |---------------------| 1.2124 + // | lock box (if sync) | 1.2125 + // |---------------------| <- lock_slot_offset 1.2126 + // | klass (if static) | 1.2127 + // |---------------------| <- klass_slot_offset 1.2128 + // | oopHandle area | 1.2129 + // |---------------------| <- oop_handle_offset 1.2130 + // | outbound memory | 1.2131 + // | based arguments | 1.2132 + // | | 1.2133 + // |---------------------| 1.2134 + // | vararg area | 1.2135 + // |---------------------| 1.2136 + // | | 1.2137 + // SP-> | out_preserved_slots | 1.2138 + // 1.2139 + // 1.2140 + 1.2141 + 1.2142 + // Now compute actual number of stack words we need rounding to make 1.2143 + // stack properly aligned. 1.2144 + stack_slots = round_to(stack_slots, StackAlignmentInSlots); 1.2145 + 1.2146 + int stack_size = stack_slots * VMRegImpl::stack_slot_size; 1.2147 + 1.2148 + intptr_t start = (intptr_t)__ pc(); 1.2149 + 1.2150 + 1.2151 + 1.2152 + // First thing make an ic check to see if we should even be here 1.2153 + address ic_miss = SharedRuntime::get_ic_miss_stub(); 1.2154 + 1.2155 + // We are free to use all registers as temps without saving them and 1.2156 + // restoring them except ebp. ebp is the only callee save register 1.2157 + // as far as the interpreter and the compiler(s) are concerned. 1.2158 + 1.2159 + //refer to register_mips.hpp:IC_Klass 1.2160 + const Register ic_reg = T1; 1.2161 + const Register receiver = T0; 1.2162 + Label hit; 1.2163 + Label exception_pending; 1.2164 + 1.2165 + __ verify_oop(receiver); 1.2166 + //__ lw(AT, receiver, oopDesc::klass_offset_in_bytes()); 1.2167 + //add for compressedoops 1.2168 + __ load_klass(AT, receiver); 1.2169 + __ beq(AT, ic_reg, hit); 1.2170 + __ delayed()->nop(); 1.2171 + __ jmp(ic_miss, relocInfo::runtime_call_type); 1.2172 + __ delayed()->nop(); 1.2173 + // verified entry must be aligned for code patching. 1.2174 + // and the first 5 bytes must be in the same cache line 1.2175 + // if we align at 8 then we will be sure 5 bytes are in the same line 1.2176 + __ align(8); 1.2177 + 1.2178 + __ bind(hit); 1.2179 + 1.2180 + 1.2181 + int vep_offset = ((intptr_t)__ pc()) - start; 1.2182 +#ifdef COMPILER1 1.2183 + if (InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) { 1.2184 + // Object.hashCode can pull the hashCode from the header word 1.2185 + // instead of doing a full VM transition once it's been computed. 1.2186 + // Since hashCode is usually polymorphic at call sites we can't do 1.2187 + // this optimization at the call site without a lot of work. 1.2188 + Label slowCase; 1.2189 + Register receiver = T0; 1.2190 + Register result = V0; 1.2191 + __ ld ( result, receiver, oopDesc::mark_offset_in_bytes()); 1.2192 + // check if locked 1.2193 + __ andi(AT, result, markOopDesc::unlocked_value); 1.2194 + __ beq(AT, R0, slowCase); 1.2195 + __ delayed()->nop(); 1.2196 + if (UseBiasedLocking) { 1.2197 + // Check if biased and fall through to runtime if so 1.2198 + __ andi (AT, result, markOopDesc::biased_lock_bit_in_place); 1.2199 + __ bne(AT,R0, slowCase); 1.2200 + __ delayed()->nop(); 1.2201 + } 1.2202 + // get hash 1.2203 + __ li(AT, markOopDesc::hash_mask_in_place); 1.2204 + __ andr (AT, result, AT); 1.2205 + // test if hashCode exists 1.2206 + __ beq (AT, R0, slowCase); 1.2207 + __ delayed()->nop(); 1.2208 + __ shr(result, markOopDesc::hash_shift); 1.2209 + __ jr(RA); 1.2210 + __ delayed()->nop(); 1.2211 + __ bind (slowCase); 1.2212 + } 1.2213 +#endif // COMPILER1 1.2214 + 1.2215 + // The instruction at the verified entry point must be 5 bytes or longer 1.2216 + // because it can be patched on the fly by make_non_entrant. The stack bang 1.2217 + // instruction fits that requirement. 1.2218 + 1.2219 + // Generate stack overflow check 1.2220 + 1.2221 + if (UseStackBanging) { 1.2222 + //this function will modify the value in A0 1.2223 + __ push(A0); 1.2224 + __ bang_stack_with_offset(StackShadowPages*os::vm_page_size()); 1.2225 + __ pop(A0); 1.2226 + } else { 1.2227 + // need a 5 byte instruction to allow MT safe patching to non-entrant 1.2228 + __ nop(); 1.2229 + __ nop(); 1.2230 + __ nop(); 1.2231 + __ nop(); 1.2232 + __ nop(); 1.2233 + } 1.2234 + // Generate a new frame for the wrapper. 1.2235 + // do mips need this ? 1.2236 +#ifndef OPT_THREAD 1.2237 + __ get_thread(TREG); 1.2238 +#endif 1.2239 +//FIXME here 1.2240 + __ st_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset())); 1.2241 + // -2 because return address is already present and so is saved ebp 1.2242 + __ move(AT, -(StackAlignmentInBytes)); 1.2243 + __ andr(SP, SP, AT); 1.2244 + 1.2245 + __ enter(); 1.2246 + __ addiu(SP, SP, -1 * (stack_size - 2*wordSize)); 1.2247 + 1.2248 + // Frame is now completed as far a size and linkage. 1.2249 + 1.2250 + int frame_complete = ((intptr_t)__ pc()) - start; 1.2251 + 1.2252 + // Calculate the difference between esp and ebp. We need to know it 1.2253 + // after the native call because on windows Java Natives will pop 1.2254 + // the arguments and it is painful to do esp relative addressing 1.2255 + // in a platform independent way. So after the call we switch to 1.2256 + // ebp relative addressing. 1.2257 +//FIXME actually , the fp_adjustment may not be the right, because andr(sp,sp,at)may change 1.2258 +//the SP 1.2259 + int fp_adjustment = stack_size - 2*wordSize; 1.2260 + 1.2261 +#ifdef COMPILER2 1.2262 + // C2 may leave the stack dirty if not in SSE2+ mode 1.2263 + // if (UseSSE >= 2) { 1.2264 + // __ verify_FPU(0, "c2i transition should have clean FPU stack"); 1.2265 + //} else { 1.2266 + __ empty_FPU_stack(); 1.2267 + //} 1.2268 +#endif /* COMPILER2 */ 1.2269 + 1.2270 + // Compute the ebp offset for any slots used after the jni call 1.2271 + 1.2272 + int lock_slot_ebp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment; 1.2273 + // We use edi as a thread pointer because it is callee save and 1.2274 + // if we load it once it is usable thru the entire wrapper 1.2275 + // const Register thread = edi; 1.2276 + const Register thread = TREG; 1.2277 + 1.2278 + // We use esi as the oop handle for the receiver/klass 1.2279 + // It is callee save so it survives the call to native 1.2280 + 1.2281 + // const Register oop_handle_reg = esi; 1.2282 + const Register oop_handle_reg = S4; 1.2283 + if (is_critical_native) { 1.2284 + __ stop("generate_native_wrapper in sharedRuntime <2>"); 1.2285 +//TODO:Fu 1.2286 +/* 1.2287 + check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args, 1.2288 + oop_handle_offset, oop_maps, in_regs, in_sig_bt); 1.2289 +*/ 1.2290 + } 1.2291 + 1.2292 +#ifndef OPT_THREAD 1.2293 + __ get_thread(thread); 1.2294 +#endif 1.2295 + 1.2296 + // 1.2297 + // We immediately shuffle the arguments so that any vm call we have to 1.2298 + // make from here on out (sync slow path, jvmpi, etc.) we will have 1.2299 + // captured the oops from our caller and have a valid oopMap for 1.2300 + // them. 1.2301 + 1.2302 + // ----------------- 1.2303 + // The Grand Shuffle 1.2304 + // 1.2305 + // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv* 1.2306 + // and, if static, the class mirror instead of a receiver. This pretty much 1.2307 + // guarantees that register layout will not match (and mips doesn't use reg 1.2308 + // parms though amd does). Since the native abi doesn't use register args 1.2309 + // and the java conventions does we don't have to worry about collisions. 1.2310 + // All of our moved are reg->stack or stack->stack. 1.2311 + // We ignore the extra arguments during the shuffle and handle them at the 1.2312 + // last moment. The shuffle is described by the two calling convention 1.2313 + // vectors we have in our possession. We simply walk the java vector to 1.2314 + // get the source locations and the c vector to get the destinations. 1.2315 + 1.2316 + int c_arg = method->is_static() ? 2 : 1 ; 1.2317 + 1.2318 + // Record esp-based slot for receiver on stack for non-static methods 1.2319 + int receiver_offset = -1; 1.2320 + 1.2321 + // This is a trick. We double the stack slots so we can claim 1.2322 + // the oops in the caller's frame. Since we are sure to have 1.2323 + // more args than the caller doubling is enough to make 1.2324 + // sure we can capture all the incoming oop args from the 1.2325 + // caller. 1.2326 + // 1.2327 + OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); 1.2328 + 1.2329 + // Mark location of rbp (someday) 1.2330 + // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(rbp)); 1.2331 + 1.2332 + // Use eax, ebx as temporaries during any memory-memory moves we have to do 1.2333 + // All inbound args are referenced based on rbp and all outbound args via rsp. 1.2334 + 1.2335 + 1.2336 + 1.2337 +#ifdef ASSERT 1.2338 + bool reg_destroyed[RegisterImpl::number_of_registers]; 1.2339 + bool freg_destroyed[FloatRegisterImpl::number_of_registers]; 1.2340 + for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { 1.2341 + reg_destroyed[r] = false; 1.2342 + } 1.2343 + for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { 1.2344 + freg_destroyed[f] = false; 1.2345 + } 1.2346 + 1.2347 +#endif /* ASSERT */ 1.2348 + 1.2349 + // We know that we only have args in at most two integer registers (ecx, edx). So eax, ebx 1.2350 + // Are free to temporaries if we have to do stack to steck moves. 1.2351 + // All inbound args are referenced based on ebp and all outbound args via esp. 1.2352 + 1.2353 + // This may iterate in two different directions depending on the 1.2354 + // kind of native it is. The reason is that for regular JNI natives 1.2355 + // the incoming and outgoing registers are offset upwards and for 1.2356 + // critical natives they are offset down. 1.2357 + GrowableArray<int> arg_order(2 * total_in_args); 1.2358 + VMRegPair tmp_vmreg; 1.2359 +// tmp_vmreg.set1(rbx->as_VMReg()); 1.2360 + tmp_vmreg.set1(T8->as_VMReg()); 1.2361 + 1.2362 + if (!is_critical_native) { 1.2363 + for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { 1.2364 + arg_order.push(i); 1.2365 + arg_order.push(c_arg); 1.2366 + } 1.2367 + } else { 1.2368 + // Compute a valid move order, using tmp_vmreg to break any cycles 1.2369 + __ stop("generate_native_wrapper in sharedRuntime <2>"); 1.2370 +//TODO:Fu 1.2371 +// ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg); 1.2372 + } 1.2373 + 1.2374 + int temploc = -1; 1.2375 + for (int ai = 0; ai < arg_order.length(); ai += 2) { 1.2376 + int i = arg_order.at(ai); 1.2377 + int c_arg = arg_order.at(ai + 1); 1.2378 + __ block_comment(err_msg("move %d -> %d", i, c_arg)); 1.2379 + if (c_arg == -1) { 1.2380 + assert(is_critical_native, "should only be required for critical natives"); 1.2381 + // This arg needs to be moved to a temporary 1.2382 + __ move(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register()); 1.2383 + in_regs[i] = tmp_vmreg; 1.2384 + temploc = i; 1.2385 + continue; 1.2386 + } else if (i == -1) { 1.2387 + assert(is_critical_native, "should only be required for critical natives"); 1.2388 + // Read from the temporary location 1.2389 + assert(temploc != -1, "must be valid"); 1.2390 + i = temploc; 1.2391 + temploc = -1; 1.2392 + } 1.2393 +#ifdef ASSERT 1.2394 + if (in_regs[i].first()->is_Register()) { 1.2395 + assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!"); 1.2396 + } else if (in_regs[i].first()->is_FloatRegister()) { 1.2397 + assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!"); 1.2398 + } 1.2399 + if (out_regs[c_arg].first()->is_Register()) { 1.2400 + reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; 1.2401 + } else if (out_regs[c_arg].first()->is_FloatRegister()) { 1.2402 + freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; 1.2403 + } 1.2404 +#endif /* ASSERT */ 1.2405 + switch (in_sig_bt[i]) { 1.2406 + case T_ARRAY: 1.2407 + if (is_critical_native) { 1.2408 + __ stop("generate_native_wrapper in sharedRuntime <2>"); 1.2409 + //TODO:Fu 1.2410 + // unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]); 1.2411 + c_arg++; 1.2412 +#ifdef ASSERT 1.2413 + if (out_regs[c_arg].first()->is_Register()) { 1.2414 + reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; 1.2415 + } else if (out_regs[c_arg].first()->is_FloatRegister()) { 1.2416 + freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; 1.2417 + } 1.2418 +#endif 1.2419 + break; 1.2420 + } 1.2421 + case T_OBJECT: 1.2422 + assert(!is_critical_native, "no oop arguments"); 1.2423 + object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], 1.2424 + ((i == 0) && (!is_static)), 1.2425 + &receiver_offset); 1.2426 + break; 1.2427 + case T_VOID: 1.2428 + break; 1.2429 + 1.2430 + case T_FLOAT: 1.2431 + float_move(masm, in_regs[i], out_regs[c_arg]); 1.2432 + break; 1.2433 + 1.2434 + case T_DOUBLE: 1.2435 + assert( i + 1 < total_in_args && 1.2436 + in_sig_bt[i + 1] == T_VOID && 1.2437 + out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); 1.2438 + double_move(masm, in_regs[i], out_regs[c_arg]); 1.2439 + break; 1.2440 + 1.2441 + case T_LONG : 1.2442 + long_move(masm, in_regs[i], out_regs[c_arg]); 1.2443 + break; 1.2444 + 1.2445 + case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); 1.2446 + 1.2447 + default: 1.2448 +// move32_64(masm, in_regs[i], out_regs[c_arg]); 1.2449 + simple_move32(masm, in_regs[i], out_regs[c_arg]); 1.2450 + } 1.2451 + } 1.2452 + 1.2453 + // point c_arg at the first arg that is already loaded in case we 1.2454 + // need to spill before we call out 1.2455 + c_arg = total_c_args - total_in_args; 1.2456 + // Pre-load a static method's oop into esi. Used both by locking code and 1.2457 + // the normal JNI call code. 1.2458 + 1.2459 + __ move(oop_handle_reg, A1); 1.2460 + 1.2461 + if (method->is_static() && !is_critical_native) { 1.2462 + 1.2463 + // load opp into a register 1.2464 + int oop_index = __ oop_recorder()->find_index(JNIHandles::make_local( 1.2465 + (method->method_holder())->java_mirror())); 1.2466 + 1.2467 + 1.2468 + RelocationHolder rspec = oop_Relocation::spec(oop_index); 1.2469 + __ relocate(rspec); 1.2470 + //__ lui(oop_handle_reg, Assembler::split_high((int)JNIHandles::make_local( 1.2471 + // Klass::cast(method->method_holder())->java_mirror()))); 1.2472 + //__ addiu(oop_handle_reg, oop_handle_reg, Assembler::split_low((int) 1.2473 + // JNIHandles::make_local(Klass::cast(method->method_holder())->java_mirror()))); 1.2474 + __ li48(oop_handle_reg, (long)JNIHandles::make_local((method->method_holder())->java_mirror())); 1.2475 + // __ verify_oop(oop_handle_reg); 1.2476 + // Now handlize the static class mirror it's known not-null. 1.2477 + __ sd( oop_handle_reg, SP, klass_offset); 1.2478 + map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); 1.2479 + 1.2480 + // Now get the handle 1.2481 + __ lea(oop_handle_reg, Address(SP, klass_offset)); 1.2482 + // store the klass handle as second argument 1.2483 + __ move(A1, oop_handle_reg); 1.2484 + // and protect the arg if we must spill 1.2485 + c_arg--; 1.2486 + } 1.2487 + // Change state to native (we save the return address in the thread, since it might not 1.2488 + // be pushed on the stack when we do a a stack traversal). It is enough that the pc() 1.2489 + // points into the right code segment. It does not have to be the correct return pc. 1.2490 + // We use the same pc/oopMap repeatedly when we call out 1.2491 + 1.2492 + intptr_t the_pc = (intptr_t) __ pc(); 1.2493 + 1.2494 + oop_maps->add_gc_map(the_pc - start, map); 1.2495 + 1.2496 + //__ set_last_Java_frame(thread, esp, noreg, (address)the_pc); 1.2497 + __ set_last_Java_frame(SP, noreg, NULL); 1.2498 + __ relocate(relocInfo::internal_pc_type); 1.2499 + { 1.2500 + intptr_t save_pc = (intptr_t)the_pc ; 1.2501 + __ li48(AT, save_pc); 1.2502 + } 1.2503 + __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); 1.2504 + 1.2505 + 1.2506 + // We have all of the arguments setup at this point. We must not touch any register 1.2507 + // argument registers at this point (what if we save/restore them there are no oop? 1.2508 + { 1.2509 + SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0); 1.2510 + int metadata_index = __ oop_recorder()->find_index(method()); 1.2511 + RelocationHolder rspec = metadata_Relocation::spec(metadata_index); 1.2512 + __ relocate(rspec); 1.2513 + //__ lui(T6, Assembler::split_high((int)JNIHandles::make_local(method()))); 1.2514 + //__ addiu(T6, T6, Assembler::split_low((int)JNIHandles::make_local(method()))); 1.2515 + __ li48(AT, (long)(method())); 1.2516 + 1.2517 + __ call_VM_leaf( 1.2518 + CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), 1.2519 + thread, AT); 1.2520 + 1.2521 + } 1.2522 + 1.2523 + // These are register definitions we need for locking/unlocking 1.2524 +// const Register swap_reg = eax; // Must use eax for cmpxchg instruction 1.2525 +// const Register obj_reg = ecx; // Will contain the oop 1.2526 + // const Register lock_reg = edx; // Address of compiler lock object (BasicLock) 1.2527 +//FIXME, I hava no idea which register to use 1.2528 + const Register swap_reg = T8; // Must use eax for cmpxchg instruction 1.2529 + const Register obj_reg = T9; // Will contain the oop 1.2530 + //const Register lock_reg = T6; // Address of compiler lock object (BasicLock) 1.2531 + const Register lock_reg = c_rarg0; // Address of compiler lock object (BasicLock) 1.2532 + 1.2533 + 1.2534 + 1.2535 + Label slow_path_lock; 1.2536 + Label lock_done; 1.2537 + 1.2538 + // Lock a synchronized method 1.2539 + if (method->is_synchronized()) { 1.2540 + assert(!is_critical_native, "unhandled"); 1.2541 + 1.2542 + const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); 1.2543 + 1.2544 + // Get the handle (the 2nd argument) 1.2545 + __ move(oop_handle_reg, A1); 1.2546 + 1.2547 + // Get address of the box 1.2548 + __ lea(lock_reg, Address(FP, lock_slot_ebp_offset)); 1.2549 + 1.2550 + // Load the oop from the handle 1.2551 + __ ld(obj_reg, oop_handle_reg, 0); 1.2552 + 1.2553 + if (UseBiasedLocking) { 1.2554 + // Note that oop_handle_reg is trashed during this call 1.2555 + __ biased_locking_enter(lock_reg, obj_reg, swap_reg, A1, 1.2556 + false, lock_done, &slow_path_lock); 1.2557 + } 1.2558 + 1.2559 + // Load immediate 1 into swap_reg %eax 1.2560 + __ move(swap_reg, 1); 1.2561 + 1.2562 + __ ld(AT, obj_reg, 0); 1.2563 + __ orr(swap_reg, swap_reg, AT); 1.2564 + 1.2565 + __ sd( swap_reg, lock_reg, mark_word_offset); 1.2566 + __ cmpxchg(lock_reg, Address(obj_reg, 0), swap_reg); 1.2567 + __ bne(AT, R0, lock_done); 1.2568 + __ delayed()->nop(); 1.2569 + // Test if the oopMark is an obvious stack pointer, i.e., 1.2570 + // 1) (mark & 3) == 0, and 1.2571 + // 2) esp <= mark < mark + os::pagesize() 1.2572 + // These 3 tests can be done by evaluating the following 1.2573 + // expression: ((mark - esp) & (3 - os::vm_page_size())), 1.2574 + // assuming both stack pointer and pagesize have their 1.2575 + // least significant 2 bits clear. 1.2576 + // NOTE: the oopMark is in swap_reg %eax as the result of cmpxchg 1.2577 + 1.2578 + __ dsub(swap_reg, swap_reg,SP); 1.2579 + __ move(AT, 3 - os::vm_page_size()); 1.2580 + __ andr(swap_reg , swap_reg, AT); 1.2581 + // Save the test result, for recursive case, the result is zero 1.2582 + __ sd(swap_reg, lock_reg, mark_word_offset); 1.2583 + //FIXME here, Why notEqual? 1.2584 + __ bne(swap_reg,R0, slow_path_lock); 1.2585 + __ delayed()->nop(); 1.2586 + // Slow path will re-enter here 1.2587 + __ bind(lock_done); 1.2588 + 1.2589 + if (UseBiasedLocking) { 1.2590 + // Re-fetch oop_handle_reg as we trashed it above 1.2591 + __ move(A1, oop_handle_reg); 1.2592 + } 1.2593 + } 1.2594 + 1.2595 + 1.2596 + // Finally just about ready to make the JNI call 1.2597 + 1.2598 + 1.2599 + // get JNIEnv* which is first argument to native 1.2600 + if (!is_critical_native) { 1.2601 + __ addi(A0, thread, in_bytes(JavaThread::jni_environment_offset())); 1.2602 + } 1.2603 + 1.2604 + // Example: Java_java_lang_ref_Finalizer_invokeFinalizeMethod(JNIEnv *env, jclass clazz, jobject ob) 1.2605 + /* Load the second arguments into A1 */ 1.2606 + //__ ld(A1, SP , wordSize ); // klass 1.2607 + 1.2608 + // Now set thread in native 1.2609 + __ addi(AT, R0, _thread_in_native); 1.2610 + __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset())); 1.2611 + /* Jin: do the call */ 1.2612 + __ call(method->native_function(), relocInfo::runtime_call_type); 1.2613 + __ delayed()->nop(); 1.2614 + // WARNING - on Windows Java Natives use pascal calling convention and pop the 1.2615 + // arguments off of the stack. We could just re-adjust the stack pointer here 1.2616 + // and continue to do SP relative addressing but we instead switch to FP 1.2617 + // relative addressing. 1.2618 + 1.2619 + // Unpack native results. 1.2620 + switch (ret_type) { 1.2621 + case T_BOOLEAN: __ c2bool(V0); break; 1.2622 + case T_CHAR : __ andi(V0,V0, 0xFFFF); break; 1.2623 + case T_BYTE : __ sign_extend_byte (V0); break; 1.2624 + case T_SHORT : __ sign_extend_short(V0); break; 1.2625 + case T_INT : // nothing to do break; 1.2626 + case T_DOUBLE : 1.2627 + case T_FLOAT : 1.2628 + // Result is in st0 we'll save as needed 1.2629 + break; 1.2630 + case T_ARRAY: // Really a handle 1.2631 + case T_OBJECT: // Really a handle 1.2632 + break; // can't de-handlize until after safepoint check 1.2633 + case T_VOID: break; 1.2634 + case T_LONG: break; 1.2635 + default : ShouldNotReachHere(); 1.2636 + } 1.2637 + // Switch thread to "native transition" state before reading the synchronization state. 1.2638 + // This additional state is necessary because reading and testing the synchronization 1.2639 + // state is not atomic w.r.t. GC, as this scenario demonstrates: 1.2640 + // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. 1.2641 + // VM thread changes sync state to synchronizing and suspends threads for GC. 1.2642 + // Thread A is resumed to finish this native method, but doesn't block here since it 1.2643 + // didn't see any synchronization is progress, and escapes. 1.2644 + // __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native_trans); 1.2645 + //__ sw(_thread_in_native_trans, thread, JavaThread::thread_state_offset()); 1.2646 + // __ move(AT, (int)_thread_in_native_trans); 1.2647 + __ addi(AT, R0, _thread_in_native_trans); 1.2648 + __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset())); 1.2649 + 1.2650 + Label after_transition; 1.2651 + 1.2652 + // check for safepoint operation in progress and/or pending suspend requests 1.2653 + { Label Continue; 1.2654 +//FIXME here, which regiser should we use? 1.2655 + // SafepointSynchronize::_not_synchronized); 1.2656 + __ li(AT, SafepointSynchronize::address_of_state()); 1.2657 + __ lw(A0, AT, 0); 1.2658 + __ addi(AT, A0, -SafepointSynchronize::_not_synchronized); 1.2659 + Label L; 1.2660 + __ bne(AT,R0, L); 1.2661 + __ delayed()->nop(); 1.2662 + __ lw(AT, thread, in_bytes(JavaThread::suspend_flags_offset())); 1.2663 + __ beq(AT, R0, Continue); 1.2664 + __ delayed()->nop(); 1.2665 + __ bind(L); 1.2666 + 1.2667 + // Don't use call_VM as it will see a possible pending exception and forward it 1.2668 + // and never return here preventing us from clearing _last_native_pc down below. 1.2669 + // Also can't use call_VM_leaf either as it will check to see if esi & edi are 1.2670 + // preserved and correspond to the bcp/locals pointers. So we do a runtime call 1.2671 + // by hand. 1.2672 + // 1.2673 + save_native_result(masm, ret_type, stack_slots); 1.2674 + __ move (A0, thread); 1.2675 + __ addi(SP,SP, -wordSize); 1.2676 + if (!is_critical_native) { 1.2677 + __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::runtime_call_type); 1.2678 + __ delayed()->nop(); 1.2679 + } else { 1.2680 + __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition), relocInfo::runtime_call_type); 1.2681 + __ delayed()->nop(); 1.2682 + } 1.2683 +// __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::runtime_call_type); 1.2684 +// __ delayed()->nop(); 1.2685 + __ addi(SP,SP, wordSize); 1.2686 + //add for compressedoops 1.2687 + __ reinit_heapbase(); 1.2688 + // Restore any method result value 1.2689 + restore_native_result(masm, ret_type, stack_slots); 1.2690 + 1.2691 + if (is_critical_native) { 1.2692 + // The call above performed the transition to thread_in_Java so 1.2693 + // skip the transition logic below. 1.2694 + __ beq(R0, R0, after_transition); 1.2695 + __ delayed()->nop(); 1.2696 + } 1.2697 + 1.2698 + __ bind(Continue); 1.2699 + } 1.2700 + 1.2701 + // change thread state 1.2702 + __ addi(AT, R0, _thread_in_Java); 1.2703 + __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset())); 1.2704 + __ bind(after_transition); 1.2705 + Label reguard; 1.2706 + Label reguard_done; 1.2707 + __ ld(AT, thread, in_bytes(JavaThread::stack_guard_state_offset())); 1.2708 + __ addi(AT, AT, -JavaThread::stack_guard_yellow_disabled); 1.2709 + __ beq(AT, R0, reguard); 1.2710 + __ delayed()->nop(); 1.2711 + // slow path reguard re-enters here 1.2712 + __ bind(reguard_done); 1.2713 + 1.2714 + // Handle possible exception (will unlock if necessary) 1.2715 + 1.2716 + // native result if any is live 1.2717 + 1.2718 + // Unlock 1.2719 + Label slow_path_unlock; 1.2720 + Label unlock_done; 1.2721 + if (method->is_synchronized()) { 1.2722 + 1.2723 + Label done; 1.2724 + 1.2725 + // Get locked oop from the handle we passed to jni 1.2726 + __ ld( obj_reg, oop_handle_reg, 0); 1.2727 + //FIXME 1.2728 + if (UseBiasedLocking) { 1.2729 + __ biased_locking_exit(obj_reg, T8, done); 1.2730 + 1.2731 + } 1.2732 + 1.2733 + // Simple recursive lock? 1.2734 + 1.2735 + __ ld(AT, FP, lock_slot_ebp_offset); 1.2736 + __ beq(AT, R0, done); 1.2737 + __ delayed()->nop(); 1.2738 + // Must save eax if if it is live now because cmpxchg must use it 1.2739 + if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { 1.2740 + save_native_result(masm, ret_type, stack_slots); 1.2741 + } 1.2742 + 1.2743 + // get old displaced header 1.2744 + __ ld (T8, FP, lock_slot_ebp_offset); 1.2745 + // get address of the stack lock 1.2746 + //FIXME aoqi 1.2747 + //__ addi (T6, FP, lock_slot_ebp_offset); 1.2748 + __ addi (c_rarg0, FP, lock_slot_ebp_offset); 1.2749 + // Atomic swap old header if oop still contains the stack lock 1.2750 + //FIXME aoqi 1.2751 + //__ cmpxchg(T8, Address(obj_reg, 0),T6 ); 1.2752 + __ cmpxchg(T8, Address(obj_reg, 0), c_rarg0); 1.2753 + 1.2754 + __ beq(AT, R0, slow_path_unlock); 1.2755 + __ delayed()->nop(); 1.2756 + // slow path re-enters here 1.2757 + __ bind(unlock_done); 1.2758 + if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { 1.2759 + restore_native_result(masm, ret_type, stack_slots); 1.2760 + } 1.2761 + 1.2762 + __ bind(done); 1.2763 + 1.2764 + } 1.2765 + { 1.2766 + SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0); 1.2767 + // Tell dtrace about this method exit 1.2768 + save_native_result(masm, ret_type, stack_slots); 1.2769 + int metadata_index = __ oop_recorder()->find_index( (method())); 1.2770 + RelocationHolder rspec = metadata_Relocation::spec(metadata_index); 1.2771 + __ relocate(rspec); 1.2772 + //__ lui(T6, Assembler::split_high((int)JNIHandles::make_local(method()))); 1.2773 + //__ addiu(T6, T6, Assembler::split_low((int)JNIHandles::make_local(method()))); 1.2774 + __ li48(AT, (long)(method())); 1.2775 + 1.2776 + __ call_VM_leaf( 1.2777 + CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), 1.2778 + thread, AT); 1.2779 + restore_native_result(masm, ret_type, stack_slots); 1.2780 + } 1.2781 + 1.2782 + // We can finally stop using that last_Java_frame we setup ages ago 1.2783 + 1.2784 + __ reset_last_Java_frame(false, true); 1.2785 + 1.2786 + // Unpack oop result 1.2787 + if (ret_type == T_OBJECT || ret_type == T_ARRAY) { 1.2788 + Label L; 1.2789 + // __ cmpl(eax, NULL_WORD); 1.2790 + // __ jcc(Assembler::equal, L); 1.2791 + __ beq(V0, R0,L ); 1.2792 + __ delayed()->nop(); 1.2793 + // __ movl(eax, Address(eax)); 1.2794 + __ ld(V0, V0, 0); 1.2795 + __ bind(L); 1.2796 + // __ verify_oop(eax); 1.2797 + __ verify_oop(V0); 1.2798 + } 1.2799 + 1.2800 + if (!is_critical_native) { 1.2801 + // reset handle block 1.2802 + __ ld(AT, thread, in_bytes(JavaThread::active_handles_offset())); 1.2803 + __ sw(R0, AT, JNIHandleBlock::top_offset_in_bytes()); 1.2804 + } 1.2805 + 1.2806 + if (!is_critical_native) { 1.2807 + // Any exception pending? 1.2808 + __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); 1.2809 + 1.2810 + __ bne(AT, R0, exception_pending); 1.2811 + __ delayed()->nop(); 1.2812 + } 1.2813 + // no exception, we're almost done 1.2814 + 1.2815 + // check that only result value is on FPU stack 1.2816 + __ verify_FPU(ret_type == T_FLOAT || ret_type == T_DOUBLE ? 1 : 0, "native_wrapper normal exit"); 1.2817 + 1.2818 + // Fixup floating pointer results so that result looks like a return from a compiled method 1.2819 +/* if (ret_type == T_FLOAT) { 1.2820 + if (UseSSE >= 1) { 1.2821 + // Pop st0 and store as float and reload into xmm register 1.2822 + __ fstp_s(Address(ebp, -4)); 1.2823 + __ movss(xmm0, Address(ebp, -4)); 1.2824 + } 1.2825 + } else if (ret_type == T_DOUBLE) { 1.2826 + if (UseSSE >= 2) { 1.2827 + // Pop st0 and store as double and reload into xmm register 1.2828 + __ fstp_d(Address(ebp, -8)); 1.2829 + __ movsd(xmm0, Address(ebp, -8)); 1.2830 + } 1.2831 + } 1.2832 +*/ 1.2833 + // Return 1.2834 +#ifndef OPT_THREAD 1.2835 + __ get_thread(TREG); 1.2836 +#endif 1.2837 + __ ld_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset())); 1.2838 + __ leave(); 1.2839 + 1.2840 + __ jr(RA); 1.2841 + __ delayed()->nop(); 1.2842 + // Unexpected paths are out of line and go here 1.2843 +/* 1.2844 + if (!is_critical_native) { 1.2845 + // forward the exception 1.2846 + __ bind(exception_pending); 1.2847 + 1.2848 + // and forward the exception 1.2849 + __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 1.2850 + } 1.2851 +*/ 1.2852 + // Slow path locking & unlocking 1.2853 + if (method->is_synchronized()) { 1.2854 + 1.2855 + // BEGIN Slow path lock 1.2856 + 1.2857 + __ bind(slow_path_lock); 1.2858 + 1.2859 + // protect the args we've loaded 1.2860 + save_args(masm, total_c_args, c_arg, out_regs); 1.2861 + 1.2862 + // has last_Java_frame setup. No exceptions so do vanilla call not call_VM 1.2863 + // args are (oop obj, BasicLock* lock, JavaThread* thread) 1.2864 + 1.2865 + __ move(A0, obj_reg); 1.2866 + __ move(A1, lock_reg); 1.2867 + __ move(A2, thread); 1.2868 + __ addi(SP, SP, - 3*wordSize); 1.2869 + 1.2870 + __ move(AT, -(StackAlignmentInBytes)); 1.2871 + __ move(S2, SP); // use S2 as a sender SP holder 1.2872 + __ andr(SP, SP, AT); // align stack as required by ABI 1.2873 + 1.2874 + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type); 1.2875 + __ delayed()->nop(); 1.2876 + __ move(SP, S2); 1.2877 + __ addi(SP, SP, 3*wordSize); 1.2878 + 1.2879 + restore_args(masm, total_c_args, c_arg, out_regs); 1.2880 + 1.2881 +#ifdef ASSERT 1.2882 + { Label L; 1.2883 + // __ cmpl(Address(thread, in_bytes(Thread::pending_exception_offset())), (int)NULL_WORD); 1.2884 + __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); 1.2885 + //__ jcc(Assembler::equal, L); 1.2886 + __ beq(AT, R0, L); 1.2887 + __ delayed()->nop(); 1.2888 + __ stop("no pending exception allowed on exit from monitorenter"); 1.2889 + __ bind(L); 1.2890 + } 1.2891 +#endif 1.2892 + __ b(lock_done); 1.2893 + __ delayed()->nop(); 1.2894 + // END Slow path lock 1.2895 + 1.2896 + // BEGIN Slow path unlock 1.2897 + __ bind(slow_path_unlock); 1.2898 + 1.2899 + // Slow path unlock 1.2900 + 1.2901 + if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { 1.2902 + save_native_result(masm, ret_type, stack_slots); 1.2903 + } 1.2904 + // Save pending exception around call to VM (which contains an EXCEPTION_MARK) 1.2905 + 1.2906 + __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); 1.2907 + __ push(AT); 1.2908 + __ sd(R0, thread, in_bytes(Thread::pending_exception_offset())); 1.2909 + 1.2910 + __ move(AT, -(StackAlignmentInBytes)); 1.2911 + __ move(S2, SP); // use S2 as a sender SP holder 1.2912 + __ andr(SP, SP, AT); // align stack as required by ABI 1.2913 + 1.2914 + // should be a peal 1.2915 + // +wordSize because of the push above 1.2916 + __ addi(A1, FP, lock_slot_ebp_offset); 1.2917 + 1.2918 + __ move(A0, obj_reg); 1.2919 + __ addi(SP,SP, -2*wordSize); 1.2920 + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), 1.2921 + relocInfo::runtime_call_type); 1.2922 + __ delayed()->nop(); 1.2923 + __ addi(SP,SP, 2*wordSize); 1.2924 + __ move(SP, S2); 1.2925 + //add for compressedoops 1.2926 + __ reinit_heapbase(); 1.2927 +#ifdef ASSERT 1.2928 + { 1.2929 + Label L; 1.2930 + // __ cmpl(Address(thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD); 1.2931 + __ lw( AT, thread, in_bytes(Thread::pending_exception_offset())); 1.2932 + //__ jcc(Assembler::equal, L); 1.2933 + __ beq(AT, R0, L); 1.2934 + __ delayed()->nop(); 1.2935 + __ stop("no pending exception allowed on exit complete_monitor_unlocking_C"); 1.2936 + __ bind(L); 1.2937 + } 1.2938 +#endif /* ASSERT */ 1.2939 + 1.2940 + __ pop(AT); 1.2941 + __ sd(AT, thread, in_bytes(Thread::pending_exception_offset())); 1.2942 + if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { 1.2943 + restore_native_result(masm, ret_type, stack_slots); 1.2944 + } 1.2945 + __ b(unlock_done); 1.2946 + __ delayed()->nop(); 1.2947 + // END Slow path unlock 1.2948 + 1.2949 + } 1.2950 + 1.2951 + // SLOW PATH Reguard the stack if needed 1.2952 + 1.2953 + __ bind(reguard); 1.2954 + save_native_result(masm, ret_type, stack_slots); 1.2955 + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), 1.2956 + relocInfo::runtime_call_type); 1.2957 + __ delayed()->nop(); 1.2958 + //add for compressedoops 1.2959 + __ reinit_heapbase(); 1.2960 + restore_native_result(masm, ret_type, stack_slots); 1.2961 + __ b(reguard_done); 1.2962 + __ delayed()->nop(); 1.2963 + 1.2964 + // BEGIN EXCEPTION PROCESSING 1.2965 + if (!is_critical_native) { 1.2966 + // Forward the exception 1.2967 + __ bind(exception_pending); 1.2968 + 1.2969 + // remove possible return value from FPU register stack 1.2970 + __ empty_FPU_stack(); 1.2971 + 1.2972 + // pop our frame 1.2973 + //forward_exception_entry need return address on stack 1.2974 + __ addiu(SP, FP, wordSize); 1.2975 + __ ld(FP, SP, (-1) * wordSize); 1.2976 + 1.2977 + // and forward the exception 1.2978 + __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); 1.2979 + __ delayed()->nop(); 1.2980 + } 1.2981 + __ flush(); 1.2982 + 1.2983 + nmethod *nm = nmethod::new_native_nmethod(method, 1.2984 + compile_id, 1.2985 + masm->code(), 1.2986 + vep_offset, 1.2987 + frame_complete, 1.2988 + stack_slots / VMRegImpl::slots_per_word, 1.2989 + (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), 1.2990 + in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size), 1.2991 + oop_maps); 1.2992 + 1.2993 + if (is_critical_native) { 1.2994 + nm->set_lazy_critical_native(true); 1.2995 + } 1.2996 + return nm; 1.2997 + 1.2998 + 1.2999 +} 1.3000 + 1.3001 +#ifdef HAVE_DTRACE_H 1.3002 +// --------------------------------------------------------------------------- 1.3003 +// Generate a dtrace nmethod for a given signature. The method takes arguments 1.3004 +// in the Java compiled code convention, marshals them to the native 1.3005 +// abi and then leaves nops at the position you would expect to call a native 1.3006 +// function. When the probe is enabled the nops are replaced with a trap 1.3007 +// instruction that dtrace inserts and the trace will cause a notification 1.3008 +// to dtrace. 1.3009 +// 1.3010 +// The probes are only able to take primitive types and java/lang/String as 1.3011 +// arguments. No other java types are allowed. Strings are converted to utf8 1.3012 +// strings so that from dtrace point of view java strings are converted to C 1.3013 +// strings. There is an arbitrary fixed limit on the total space that a method 1.3014 +// can use for converting the strings. (256 chars per string in the signature). 1.3015 +// So any java string larger then this is truncated. 1.3016 + 1.3017 +static int fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 }; 1.3018 +static bool offsets_initialized = false; 1.3019 + 1.3020 +static VMRegPair reg64_to_VMRegPair(Register r) { 1.3021 + VMRegPair ret; 1.3022 + if (wordSize == 8) { 1.3023 + ret.set2(r->as_VMReg()); 1.3024 + } else { 1.3025 + ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg()); 1.3026 + } 1.3027 + return ret; 1.3028 +} 1.3029 + 1.3030 + 1.3031 +nmethod *SharedRuntime::generate_dtrace_nmethod( 1.3032 + MacroAssembler *masm, methodHandle method) { 1.3033 + 1.3034 + 1.3035 + // generate_dtrace_nmethod is guarded by a mutex so we are sure to 1.3036 + // be single threaded in this method. 1.3037 + assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be"); 1.3038 + 1.3039 + // Fill in the signature array, for the calling-convention call. 1.3040 + int total_args_passed = method->size_of_parameters(); 1.3041 + 1.3042 + BasicType* in_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed); 1.3043 + VMRegPair *in_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed); 1.3044 + 1.3045 + // The signature we are going to use for the trap that dtrace will see 1.3046 + // java/lang/String is converted. We drop "this" and any other object 1.3047 + // is converted to NULL. (A one-slot java/lang/Long object reference 1.3048 + // is converted to a two-slot long, which is why we double the allocation). 1.3049 + BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2); 1.3050 + VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2); 1.3051 + 1.3052 + int i=0; 1.3053 + int total_strings = 0; 1.3054 + int first_arg_to_pass = 0; 1.3055 + int total_c_args = 0; 1.3056 + 1.3057 + // Skip the receiver as dtrace doesn't want to see it 1.3058 + if( !method->is_static() ) { 1.3059 + in_sig_bt[i++] = T_OBJECT; 1.3060 + first_arg_to_pass = 1; 1.3061 + } 1.3062 + 1.3063 + SignatureStream ss(method->signature()); 1.3064 + for ( ; !ss.at_return_type(); ss.next()) { 1.3065 + BasicType bt = ss.type(); 1.3066 + in_sig_bt[i++] = bt; // Collect remaining bits of signature 1.3067 + out_sig_bt[total_c_args++] = bt; 1.3068 + if( bt == T_OBJECT) { 1.3069 + symbolOop s = ss.as_symbol_or_null(); 1.3070 + if (s == vmSymbols::java_lang_String()) { 1.3071 + total_strings++; 1.3072 + out_sig_bt[total_c_args-1] = T_ADDRESS; 1.3073 + } else if (s == vmSymbols::java_lang_Boolean() || 1.3074 + s == vmSymbols::java_lang_Byte()) { 1.3075 + out_sig_bt[total_c_args-1] = T_BYTE; 1.3076 + } else if (s == vmSymbols::java_lang_Character() || 1.3077 + s == vmSymbols::java_lang_Short()) { 1.3078 + out_sig_bt[total_c_args-1] = T_SHORT; 1.3079 + } else if (s == vmSymbols::java_lang_Integer() || 1.3080 + s == vmSymbols::java_lang_Float()) { 1.3081 + out_sig_bt[total_c_args-1] = T_INT; 1.3082 + } else if (s == vmSymbols::java_lang_Long() || 1.3083 + s == vmSymbols::java_lang_Double()) { 1.3084 + out_sig_bt[total_c_args-1] = T_LONG; 1.3085 + out_sig_bt[total_c_args++] = T_VOID; 1.3086 + } 1.3087 + } else if ( bt == T_LONG || bt == T_DOUBLE ) { 1.3088 + in_sig_bt[i++] = T_VOID; // Longs & doubles take 2 Java slots 1.3089 + // We convert double to long 1.3090 + out_sig_bt[total_c_args-1] = T_LONG; 1.3091 + out_sig_bt[total_c_args++] = T_VOID; 1.3092 + } else if ( bt == T_FLOAT) { 1.3093 + // We convert float to int 1.3094 + out_sig_bt[total_c_args-1] = T_INT; 1.3095 + } 1.3096 + } 1.3097 + 1.3098 + assert(i==total_args_passed, "validly parsed signature"); 1.3099 + 1.3100 + // Now get the compiled-Java layout as input arguments 1.3101 + int comp_args_on_stack; 1.3102 + comp_args_on_stack = SharedRuntime::java_calling_convention( 1.3103 + in_sig_bt, in_regs, total_args_passed, false); 1.3104 + 1.3105 + // We have received a description of where all the java arg are located 1.3106 + // on entry to the wrapper. We need to convert these args to where 1.3107 + // the a native (non-jni) function would expect them. To figure out 1.3108 + // where they go we convert the java signature to a C signature and remove 1.3109 + // T_VOID for any long/double we might have received. 1.3110 + 1.3111 + 1.3112 + // Now figure out where the args must be stored and how much stack space 1.3113 + // they require (neglecting out_preserve_stack_slots but space for storing 1.3114 + // the 1st six register arguments). It's weird see int_stk_helper. 1.3115 + // 1.3116 + int out_arg_slots; 1.3117 + out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); 1.3118 + 1.3119 + // Calculate the total number of stack slots we will need. 1.3120 + 1.3121 + // First count the abi requirement plus all of the outgoing args 1.3122 + int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; 1.3123 + 1.3124 + // Plus a temp for possible converion of float/double/long register args 1.3125 + 1.3126 + int conversion_temp = stack_slots; 1.3127 + stack_slots += 2; 1.3128 + 1.3129 + 1.3130 + // Now space for the string(s) we must convert 1.3131 + 1.3132 + int string_locs = stack_slots; 1.3133 + stack_slots += total_strings * 1.3134 + (max_dtrace_string_size / VMRegImpl::stack_slot_size); 1.3135 + 1.3136 + // Ok The space we have allocated will look like: 1.3137 + // 1.3138 + // 1.3139 + // FP-> | | 1.3140 + // |---------------------| 1.3141 + // | string[n] | 1.3142 + // |---------------------| <- string_locs[n] 1.3143 + // | string[n-1] | 1.3144 + // |---------------------| <- string_locs[n-1] 1.3145 + // | ... | 1.3146 + // | ... | 1.3147 + // |---------------------| <- string_locs[1] 1.3148 + // | string[0] | 1.3149 + // |---------------------| <- string_locs[0] 1.3150 + // | temp | 1.3151 + // |---------------------| <- conversion_temp 1.3152 + // | outbound memory | 1.3153 + // | based arguments | 1.3154 + // | | 1.3155 + // |---------------------| 1.3156 + // | | 1.3157 + // SP-> | out_preserved_slots | 1.3158 + // 1.3159 + // 1.3160 + 1.3161 + // Now compute actual number of stack words we need rounding to make 1.3162 + // stack properly aligned. 1.3163 + stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word); 1.3164 + 1.3165 + int stack_size = stack_slots * VMRegImpl::stack_slot_size; 1.3166 + 1.3167 + intptr_t start = (intptr_t)__ pc(); 1.3168 + 1.3169 + // First thing make an ic check to see if we should even be here 1.3170 + 1.3171 + { 1.3172 + Label L; 1.3173 + const Register temp_reg = G3_scratch; 1.3174 + Address ic_miss(temp_reg, SharedRuntime::get_ic_miss_stub()); 1.3175 + __ verify_oop(O0); 1.3176 + __ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), temp_reg); 1.3177 + __ cmp(temp_reg, G5_inline_cache_reg); 1.3178 + __ brx(Assembler::equal, true, Assembler::pt, L); 1.3179 + __ delayed()->nop(); 1.3180 + 1.3181 + __ jump_to(ic_miss, 0); 1.3182 + __ delayed()->nop(); 1.3183 + __ align(CodeEntryAlignment); 1.3184 + __ bind(L); 1.3185 + } 1.3186 + 1.3187 + int vep_offset = ((intptr_t)__ pc()) - start; 1.3188 + 1.3189 + 1.3190 + // The instruction at the verified entry point must be 5 bytes or longer 1.3191 + // because it can be patched on the fly by make_non_entrant. The stack bang 1.3192 + // instruction fits that requirement. 1.3193 + 1.3194 + // Generate stack overflow check before creating frame 1.3195 + __ generate_stack_overflow_check(stack_size); 1.3196 + 1.3197 + assert(((intptr_t)__ pc() - start - vep_offset) >= 5, 1.3198 + "valid size for make_non_entrant"); 1.3199 + 1.3200 + // Generate a new frame for the wrapper. 1.3201 + __ save(SP, -stack_size, SP); 1.3202 + 1.3203 + // Frame is now completed as far a size and linkage. 1.3204 + 1.3205 + int frame_complete = ((intptr_t)__ pc()) - start; 1.3206 + 1.3207 +#ifdef ASSERT 1.3208 + bool reg_destroyed[RegisterImpl::number_of_registers]; 1.3209 + bool freg_destroyed[FloatRegisterImpl::number_of_registers]; 1.3210 + for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { 1.3211 + reg_destroyed[r] = false; 1.3212 + } 1.3213 + for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { 1.3214 + freg_destroyed[f] = false; 1.3215 + } 1.3216 + 1.3217 +#endif /* ASSERT */ 1.3218 + 1.3219 + VMRegPair zero; 1.3220 + const Register g0 = G0; // without this we get a compiler warning (why??) 1.3221 + zero.set2(g0->as_VMReg()); 1.3222 + 1.3223 + int c_arg, j_arg; 1.3224 + 1.3225 + Register conversion_off = noreg; 1.3226 + 1.3227 + for (j_arg = first_arg_to_pass, c_arg = 0 ; 1.3228 + j_arg < total_args_passed ; j_arg++, c_arg++ ) { 1.3229 + 1.3230 + VMRegPair src = in_regs[j_arg]; 1.3231 + VMRegPair dst = out_regs[c_arg]; 1.3232 + 1.3233 +#ifdef ASSERT 1.3234 + if (src.first()->is_Register()) { 1.3235 + assert(!reg_destroyed[src.first()->as_Register()->encoding()], "ack!"); 1.3236 + } else if (src.first()->is_FloatRegister()) { 1.3237 + assert(!freg_destroyed[src.first()->as_FloatRegister()->encoding( 1.3238 + FloatRegisterImpl::S)], "ack!"); 1.3239 + } 1.3240 + if (dst.first()->is_Register()) { 1.3241 + reg_destroyed[dst.first()->as_Register()->encoding()] = true; 1.3242 + } else if (dst.first()->is_FloatRegister()) { 1.3243 + freg_destroyed[dst.first()->as_FloatRegister()->encoding( 1.3244 + FloatRegisterImpl::S)] = true; 1.3245 + } 1.3246 +#endif /* ASSERT */ 1.3247 + 1.3248 + switch (in_sig_bt[j_arg]) { 1.3249 + case T_ARRAY: 1.3250 + case T_OBJECT: 1.3251 + { 1.3252 + if (out_sig_bt[c_arg] == T_BYTE || out_sig_bt[c_arg] == T_SHORT || 1.3253 + out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) { 1.3254 + // need to unbox a one-slot value 1.3255 + Register in_reg = L0; 1.3256 + Register tmp = L2; 1.3257 + if ( src.first()->is_reg() ) { 1.3258 + in_reg = src.first()->as_Register(); 1.3259 + } else { 1.3260 + assert(Assembler::is_simm13(reg2offset(src.first()) + STACK_BIAS), 1.3261 + "must be"); 1.3262 + __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, in_reg); 1.3263 + } 1.3264 + // If the final destination is an acceptable register 1.3265 + if ( dst.first()->is_reg() ) { 1.3266 + if ( dst.is_single_phys_reg() || out_sig_bt[c_arg] != T_LONG ) { 1.3267 + tmp = dst.first()->as_Register(); 1.3268 + } 1.3269 + } 1.3270 + 1.3271 + Label skipUnbox; 1.3272 + if ( wordSize == 4 && out_sig_bt[c_arg] == T_LONG ) { 1.3273 + __ mov(G0, tmp->successor()); 1.3274 + } 1.3275 + __ br_null(in_reg, true, Assembler::pn, skipUnbox); 1.3276 + __ delayed()->mov(G0, tmp); 1.3277 + 1.3278 + BasicType bt = out_sig_bt[c_arg]; 1.3279 + int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt); 1.3280 + switch (bt) { 1.3281 + case T_BYTE: 1.3282 + __ ldub(in_reg, box_offset, tmp); break; 1.3283 + case T_SHORT: 1.3284 + __ lduh(in_reg, box_offset, tmp); break; 1.3285 + case T_INT: 1.3286 + __ ld(in_reg, box_offset, tmp); break; 1.3287 + case T_LONG: 1.3288 + __ ld_long(in_reg, box_offset, tmp); break; 1.3289 + default: ShouldNotReachHere(); 1.3290 + } 1.3291 + 1.3292 + __ bind(skipUnbox); 1.3293 + // If tmp wasn't final destination copy to final destination 1.3294 + if (tmp == L2) { 1.3295 + VMRegPair tmp_as_VM = reg64_to_VMRegPair(L2); 1.3296 + if (out_sig_bt[c_arg] == T_LONG) { 1.3297 + long_move(masm, tmp_as_VM, dst); 1.3298 + } else { 1.3299 + move32_64(masm, tmp_as_VM, out_regs[c_arg]); 1.3300 + } 1.3301 + } 1.3302 + if (out_sig_bt[c_arg] == T_LONG) { 1.3303 + assert(out_sig_bt[c_arg+1] == T_VOID, "must be"); 1.3304 + ++c_arg; // move over the T_VOID to keep the loop indices in sync 1.3305 + } 1.3306 + } else if (out_sig_bt[c_arg] == T_ADDRESS) { 1.3307 + Register s = 1.3308 + src.first()->is_reg() ? src.first()->as_Register() : L2; 1.3309 + Register d = 1.3310 + dst.first()->is_reg() ? dst.first()->as_Register() : L2; 1.3311 + 1.3312 + // We store the oop now so that the conversion pass can reach 1.3313 + // while in the inner frame. This will be the only store if 1.3314 + // the oop is NULL. 1.3315 + if (s != L2) { 1.3316 + // src is register 1.3317 + if (d != L2) { 1.3318 + // dst is register 1.3319 + __ mov(s, d); 1.3320 + } else { 1.3321 + assert(Assembler::is_simm13(reg2offset(dst.first()) + 1.3322 + STACK_BIAS), "must be"); 1.3323 + __ st_ptr(s, SP, reg2offset(dst.first()) + STACK_BIAS); 1.3324 + } 1.3325 + } else { 1.3326 + // src not a register 1.3327 + assert(Assembler::is_simm13(reg2offset(src.first()) + 1.3328 + STACK_BIAS), "must be"); 1.3329 + __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, d); 1.3330 + if (d == L2) { 1.3331 + assert(Assembler::is_simm13(reg2offset(dst.first()) + 1.3332 + STACK_BIAS), "must be"); 1.3333 + __ st_ptr(d, SP, reg2offset(dst.first()) + STACK_BIAS); 1.3334 + } 1.3335 + } 1.3336 + } else if (out_sig_bt[c_arg] != T_VOID) { 1.3337 + // Convert the arg to NULL 1.3338 + if (dst.first()->is_reg()) { 1.3339 + __ mov(G0, dst.first()->as_Register()); 1.3340 + } else { 1.3341 + assert(Assembler::is_simm13(reg2offset(dst.first()) + 1.3342 + STACK_BIAS), "must be"); 1.3343 + __ st_ptr(G0, SP, reg2offset(dst.first()) + STACK_BIAS); 1.3344 + } 1.3345 + } 1.3346 + } 1.3347 + break; 1.3348 + case T_VOID: 1.3349 + break; 1.3350 + 1.3351 + case T_FLOAT: 1.3352 + if (src.first()->is_stack()) { 1.3353 + // Stack to stack/reg is simple 1.3354 + move32_64(masm, src, dst); 1.3355 + } else { 1.3356 + if (dst.first()->is_reg()) { 1.3357 + // freg -> reg 1.3358 + int off = 1.3359 + STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; 1.3360 + Register d = dst.first()->as_Register(); 1.3361 + if (Assembler::is_simm13(off)) { 1.3362 + __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), 1.3363 + SP, off); 1.3364 + __ ld(SP, off, d); 1.3365 + } else { 1.3366 + if (conversion_off == noreg) { 1.3367 + __ set(off, L6); 1.3368 + conversion_off = L6; 1.3369 + } 1.3370 + __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), 1.3371 + SP, conversion_off); 1.3372 + __ ld(SP, conversion_off , d); 1.3373 + } 1.3374 + } else { 1.3375 + // freg -> mem 1.3376 + int off = STACK_BIAS + reg2offset(dst.first()); 1.3377 + if (Assembler::is_simm13(off)) { 1.3378 + __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), 1.3379 + SP, off); 1.3380 + } else { 1.3381 + if (conversion_off == noreg) { 1.3382 + __ set(off, L6); 1.3383 + conversion_off = L6; 1.3384 + } 1.3385 + __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), 1.3386 + SP, conversion_off); 1.3387 + } 1.3388 + } 1.3389 + } 1.3390 + break; 1.3391 + 1.3392 + case T_DOUBLE: 1.3393 + assert( j_arg + 1 < total_args_passed && 1.3394 + in_sig_bt[j_arg + 1] == T_VOID && 1.3395 + out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); 1.3396 + if (src.first()->is_stack()) { 1.3397 + // Stack to stack/reg is simple 1.3398 + long_move(masm, src, dst); 1.3399 + } else { 1.3400 + Register d = dst.first()->is_reg() ? dst.first()->as_Register() : L2; 1.3401 + 1.3402 + // Destination could be an odd reg on 32bit in which case 1.3403 + // we can't load direct to the destination. 1.3404 + 1.3405 + if (!d->is_even() && wordSize == 4) { 1.3406 + d = L2; 1.3407 + } 1.3408 + int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; 1.3409 + if (Assembler::is_simm13(off)) { 1.3410 + __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), 1.3411 + SP, off); 1.3412 + __ ld_long(SP, off, d); 1.3413 + } else { 1.3414 + if (conversion_off == noreg) { 1.3415 + __ set(off, L6); 1.3416 + conversion_off = L6; 1.3417 + } 1.3418 + __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), 1.3419 + SP, conversion_off); 1.3420 + __ ld_long(SP, conversion_off, d); 1.3421 + } 1.3422 + if (d == L2) { 1.3423 + long_move(masm, reg64_to_VMRegPair(L2), dst); 1.3424 + } 1.3425 + } 1.3426 + break; 1.3427 + 1.3428 + case T_LONG : 1.3429 + // 32bit can't do a split move of something like g1 -> O0, O1 1.3430 + // so use a memory temp 1.3431 + if (src.is_single_phys_reg() && wordSize == 4) { 1.3432 + Register tmp = L2; 1.3433 + if (dst.first()->is_reg() && 1.3434 + (wordSize == 8 || dst.first()->as_Register()->is_even())) { 1.3435 + tmp = dst.first()->as_Register(); 1.3436 + } 1.3437 + 1.3438 + int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; 1.3439 + if (Assembler::is_simm13(off)) { 1.3440 + __ stx(src.first()->as_Register(), SP, off); 1.3441 + __ ld_long(SP, off, tmp); 1.3442 + } else { 1.3443 + if (conversion_off == noreg) { 1.3444 + __ set(off, L6); 1.3445 + conversion_off = L6; 1.3446 + } 1.3447 + __ stx(src.first()->as_Register(), SP, conversion_off); 1.3448 + __ ld_long(SP, conversion_off, tmp); 1.3449 + } 1.3450 + 1.3451 + if (tmp == L2) { 1.3452 + long_move(masm, reg64_to_VMRegPair(L2), dst); 1.3453 + } 1.3454 + } else { 1.3455 + long_move(masm, src, dst); 1.3456 + } 1.3457 + break; 1.3458 + 1.3459 + case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); 1.3460 + 1.3461 + default: 1.3462 + move32_64(masm, src, dst); 1.3463 + } 1.3464 + } 1.3465 + 1.3466 + 1.3467 + // If we have any strings we must store any register based arg to the stack 1.3468 + // This includes any still live xmm registers too. 1.3469 + 1.3470 + if (total_strings > 0 ) { 1.3471 + 1.3472 + // protect all the arg registers 1.3473 + __ save_frame(0); 1.3474 + __ mov(G2_thread, L7_thread_cache); 1.3475 + const Register L2_string_off = L2; 1.3476 + 1.3477 + // Get first string offset 1.3478 + __ set(string_locs * VMRegImpl::stack_slot_size, L2_string_off); 1.3479 + 1.3480 + for (c_arg = 0 ; c_arg < total_c_args ; c_arg++ ) { 1.3481 + if (out_sig_bt[c_arg] == T_ADDRESS) { 1.3482 + 1.3483 + VMRegPair dst = out_regs[c_arg]; 1.3484 + const Register d = dst.first()->is_reg() ? 1.3485 + dst.first()->as_Register()->after_save() : noreg; 1.3486 + 1.3487 + // It's a string the oop and it was already copied to the out arg 1.3488 + // position 1.3489 + if (d != noreg) { 1.3490 + __ mov(d, O0); 1.3491 + } else { 1.3492 + assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS), 1.3493 + "must be"); 1.3494 + __ ld_ptr(FP, reg2offset(dst.first()) + STACK_BIAS, O0); 1.3495 + } 1.3496 + Label skip; 1.3497 + 1.3498 + __ br_null(O0, false, Assembler::pn, skip); 1.3499 + __ delayed()->add(FP, L2_string_off, O1); 1.3500 + 1.3501 + if (d != noreg) { 1.3502 + __ mov(O1, d); 1.3503 + } else { 1.3504 + assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS), 1.3505 + "must be"); 1.3506 + __ st_ptr(O1, FP, reg2offset(dst.first()) + STACK_BIAS); 1.3507 + } 1.3508 + 1.3509 + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::get_utf), 1.3510 + relocInfo::runtime_call_type); 1.3511 + __ delayed()->add(L2_string_off, max_dtrace_string_size, L2_string_off); 1.3512 + 1.3513 + __ bind(skip); 1.3514 + 1.3515 + } 1.3516 + 1.3517 + } 1.3518 + __ mov(L7_thread_cache, G2_thread); 1.3519 + __ restore(); 1.3520 + 1.3521 + } 1.3522 + 1.3523 + 1.3524 + // Ok now we are done. Need to place the nop that dtrace wants in order to 1.3525 + // patch in the trap 1.3526 + 1.3527 + int patch_offset = ((intptr_t)__ pc()) - start; 1.3528 + 1.3529 + __ nop(); 1.3530 + 1.3531 + 1.3532 + // Return 1.3533 + 1.3534 + __ ret(); 1.3535 + __ delayed()->restore(); 1.3536 + 1.3537 + __ flush(); 1.3538 + 1.3539 + nmethod *nm = nmethod::new_dtrace_nmethod( 1.3540 + method, masm->code(), vep_offset, patch_offset, frame_complete, 1.3541 + stack_slots / VMRegImpl::slots_per_word); 1.3542 + return nm; 1.3543 + 1.3544 +} 1.3545 + 1.3546 +#endif // HAVE_DTRACE_H 1.3547 + 1.3548 +// this function returns the adjust size (in number of words) to a c2i adapter 1.3549 +// activation for use during deoptimization 1.3550 +int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) { 1.3551 + return (callee_locals - callee_parameters) * Interpreter::stackElementWords; 1.3552 +} 1.3553 + 1.3554 +// "Top of Stack" slots that may be unused by the calling convention but must 1.3555 +// otherwise be preserved. 1.3556 +// On Intel these are not necessary and the value can be zero. 1.3557 +// On Sparc this describes the words reserved for storing a register window 1.3558 +// when an interrupt occurs. 1.3559 +uint SharedRuntime::out_preserve_stack_slots() { 1.3560 + //return frame::register_save_words * VMRegImpl::slots_per_word; 1.3561 + return 0; 1.3562 +} 1.3563 +/* 1.3564 +static void gen_new_frame(MacroAssembler* masm, bool deopt) { 1.3565 +// 1.3566 +// Common out the new frame generation for deopt and uncommon trap 1.3567 +// 1.3568 + Register G3pcs = G3_scratch; // Array of new pcs (input) 1.3569 + Register Oreturn0 = O0; 1.3570 + Register Oreturn1 = O1; 1.3571 + Register O2UnrollBlock = O2; 1.3572 + Register O3array = O3; // Array of frame sizes (input) 1.3573 + Register O4array_size = O4; // number of frames (input) 1.3574 + Register O7frame_size = O7; // number of frames (input) 1.3575 + 1.3576 + __ ld_ptr(O3array, 0, O7frame_size); 1.3577 + __ sub(G0, O7frame_size, O7frame_size); 1.3578 + __ save(SP, O7frame_size, SP); 1.3579 + __ ld_ptr(G3pcs, 0, I7); // load frame's new pc 1.3580 + 1.3581 + #ifdef ASSERT 1.3582 + // make sure that the frames are aligned properly 1.3583 +#ifndef _LP64 1.3584 + __ btst(wordSize*2-1, SP); 1.3585 + __ breakpoint_trap(Assembler::notZero); 1.3586 +#endif 1.3587 + #endif 1.3588 + 1.3589 + // Deopt needs to pass some extra live values from frame to frame 1.3590 + 1.3591 + if (deopt) { 1.3592 + __ mov(Oreturn0->after_save(), Oreturn0); 1.3593 + __ mov(Oreturn1->after_save(), Oreturn1); 1.3594 + } 1.3595 + 1.3596 + __ mov(O4array_size->after_save(), O4array_size); 1.3597 + __ sub(O4array_size, 1, O4array_size); 1.3598 + __ mov(O3array->after_save(), O3array); 1.3599 + __ mov(O2UnrollBlock->after_save(), O2UnrollBlock); 1.3600 + __ add(G3pcs, wordSize, G3pcs); // point to next pc value 1.3601 + 1.3602 + #ifdef ASSERT 1.3603 + // trash registers to show a clear pattern in backtraces 1.3604 + __ set(0xDEAD0000, I0); 1.3605 + __ add(I0, 2, I1); 1.3606 + __ add(I0, 4, I2); 1.3607 + __ add(I0, 6, I3); 1.3608 + __ add(I0, 8, I4); 1.3609 + // Don't touch I5 could have valuable savedSP 1.3610 + __ set(0xDEADBEEF, L0); 1.3611 + __ mov(L0, L1); 1.3612 + __ mov(L0, L2); 1.3613 + __ mov(L0, L3); 1.3614 + __ mov(L0, L4); 1.3615 + __ mov(L0, L5); 1.3616 + 1.3617 + // trash the return value as there is nothing to return yet 1.3618 + __ set(0xDEAD0001, O7); 1.3619 + #endif 1.3620 + 1.3621 + __ mov(SP, O5_savedSP); 1.3622 +} 1.3623 + 1.3624 + 1.3625 +static void make_new_frames(MacroAssembler* masm, bool deopt) { 1.3626 + // 1.3627 + // loop through the UnrollBlock info and create new frames 1.3628 + // 1.3629 + Register G3pcs = G3_scratch; 1.3630 + Register Oreturn0 = O0; 1.3631 + Register Oreturn1 = O1; 1.3632 + Register O2UnrollBlock = O2; 1.3633 + Register O3array = O3; 1.3634 + Register O4array_size = O4; 1.3635 + Label loop; 1.3636 + 1.3637 + // Before we make new frames, check to see if stack is available. 1.3638 + // Do this after the caller's return address is on top of stack 1.3639 + if (UseStackBanging) { 1.3640 + // Get total frame size for interpreted frames 1.3641 + __ ld(Address(O2UnrollBlock, 0, 1.3642 + Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()), O4); 1.3643 + __ bang_stack_size(O4, O3, G3_scratch); 1.3644 + } 1.3645 + 1.3646 + __ ld(Address(O2UnrollBlock, 0, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()), O4array_size); 1.3647 + __ ld_ptr(Address(O2UnrollBlock, 0, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()), G3pcs); 1.3648 + 1.3649 + __ ld_ptr(Address(O2UnrollBlock, 0, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()), O3array); 1.3650 + 1.3651 + // Adjust old interpreter frame to make space for new frame's extra java locals 1.3652 + // 1.3653 + // We capture the original sp for the transition frame only because it is needed in 1.3654 + // order to properly calculate interpreter_sp_adjustment. Even though in real life 1.3655 + // every interpreter frame captures a savedSP it is only needed at the transition 1.3656 + // (fortunately). If we had to have it correct everywhere then we would need to 1.3657 + // be told the sp_adjustment for each frame we create. If the frame size array 1.3658 + // were to have twice the frame count entries then we could have pairs [sp_adjustment, frame_size] 1.3659 + // for each frame we create and keep up the illusion every where. 1.3660 + // 1.3661 + 1.3662 + __ ld(Address(O2UnrollBlock, 0, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()), O7); 1.3663 + __ mov(SP, O5_savedSP); // remember initial sender's original sp before adjustment 1.3664 + __ sub(SP, O7, SP); 1.3665 + 1.3666 +#ifdef ASSERT 1.3667 + // make sure that there is at least one entry in the array 1.3668 + __ tst(O4array_size); 1.3669 + __ breakpoint_trap(Assembler::zero); 1.3670 +#endif 1.3671 + 1.3672 + // Now push the new interpreter frames 1.3673 + __ bind(loop); 1.3674 + 1.3675 + // allocate a new frame, filling the registers 1.3676 + 1.3677 + gen_new_frame(masm, deopt); // allocate an interpreter frame 1.3678 + 1.3679 + __ tst(O4array_size); 1.3680 + __ br(Assembler::notZero, false, Assembler::pn, loop); 1.3681 + __ delayed()->add(O3array, wordSize, O3array); 1.3682 + __ ld_ptr(G3pcs, 0, O7); // load final frame new pc 1.3683 + 1.3684 +} 1.3685 +*/ 1.3686 + 1.3687 +//------------------------------generate_deopt_blob---------------------------- 1.3688 +// Ought to generate an ideal graph & compile, but here's some SPARC ASM 1.3689 +// instead. 1.3690 +void SharedRuntime::generate_deopt_blob() { 1.3691 + // allocate space for the code 1.3692 + ResourceMark rm; 1.3693 + // setup code generation tools 1.3694 + //CodeBuffer buffer ("deopt_blob", 4000, 2048); 1.3695 + CodeBuffer buffer ("deopt_blob", 8000, 2048);//aoqi FIXME for debug 1.3696 + MacroAssembler* masm = new MacroAssembler( & buffer); 1.3697 + int frame_size_in_words; 1.3698 + OopMap* map = NULL; 1.3699 + // Account for the extra args we place on the stack 1.3700 + // by the time we call fetch_unroll_info 1.3701 + const int additional_words = 2; // deopt kind, thread 1.3702 + 1.3703 + OopMapSet *oop_maps = new OopMapSet(); 1.3704 + 1.3705 + address start = __ pc(); 1.3706 + Label cont; 1.3707 + // we use S3 for DeOpt reason register 1.3708 + Register reason = S3; 1.3709 + // use S6 for thread register 1.3710 + Register thread = TREG; 1.3711 + // use S7 for fetch_unroll_info returned UnrollBlock 1.3712 + Register unroll = S7; 1.3713 + // Prolog for non exception case! 1.3714 + // Correct the return address we were given. 1.3715 + //FIXME, return address is on the tos or Ra? 1.3716 + __ addi(RA, RA, - (NativeCall::return_address_offset)); 1.3717 + // Save everything in sight. 1.3718 + map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words); 1.3719 + // Normal deoptimization 1.3720 + __ move(reason, Deoptimization::Unpack_deopt); 1.3721 + __ b(cont); 1.3722 + __ delayed()->nop(); 1.3723 + 1.3724 + int reexecute_offset = __ pc() - start; 1.3725 + 1.3726 + // Reexecute case 1.3727 + // return address is the pc describes what bci to do re-execute at 1.3728 + 1.3729 + // No need to update map as each call to save_live_registers will produce identical oopmap 1.3730 + //__ addi(RA, RA, - (NativeCall::return_address_offset)); 1.3731 + (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words); 1.3732 + __ move(reason, Deoptimization::Unpack_reexecute); 1.3733 + __ b(cont); 1.3734 + __ delayed()->nop(); 1.3735 + 1.3736 + int exception_offset = __ pc() - start; 1.3737 + // Prolog for exception case 1.3738 + 1.3739 + // all registers are dead at this entry point, except for eax and 1.3740 + // edx which contain the exception oop and exception pc 1.3741 + // respectively. Set them in TLS and fall thru to the 1.3742 + // unpack_with_exception_in_tls entry point. 1.3743 + 1.3744 + __ get_thread(thread); 1.3745 + __ st_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); 1.3746 + __ st_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset())); 1.3747 + int exception_in_tls_offset = __ pc() - start; 1.3748 + // new implementation because exception oop is now passed in JavaThread 1.3749 + 1.3750 + // Prolog for exception case 1.3751 + // All registers must be preserved because they might be used by LinearScan 1.3752 + // Exceptiop oop and throwing PC are passed in JavaThread 1.3753 + // tos: stack at point of call to method that threw the exception (i.e. only 1.3754 + // args are on the stack, no return address) 1.3755 + 1.3756 + // Return address will be patched later with the throwing pc. The correct value is not 1.3757 + // available now because loading it from memory would destroy registers. 1.3758 + // Save everything in sight. 1.3759 + // No need to update map as each call to save_live_registers will produce identical oopmap 1.3760 + __ addi(RA, RA, - (NativeCall::return_address_offset)); 1.3761 + (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words); 1.3762 + 1.3763 + // Now it is safe to overwrite any register 1.3764 + // store the correct deoptimization type 1.3765 + __ move(reason, Deoptimization::Unpack_exception); 1.3766 + // load throwing pc from JavaThread and patch it as the return address 1.3767 + // of the current frame. Then clear the field in JavaThread 1.3768 + __ get_thread(thread); 1.3769 + __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); 1.3770 + __ st_ptr(V1, SP, RegisterSaver::raOffset() * wordSize); //save ra 1.3771 + __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset())); 1.3772 + 1.3773 + 1.3774 +#ifdef ASSERT 1.3775 + // verify that there is really an exception oop in JavaThread 1.3776 + __ ld_ptr(AT, thread, in_bytes(JavaThread::exception_oop_offset())); 1.3777 + __ verify_oop(AT); 1.3778 + // verify that there is no pending exception 1.3779 + Label no_pending_exception; 1.3780 + __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); 1.3781 + __ beq(AT, R0, no_pending_exception); 1.3782 + __ delayed()->nop(); 1.3783 + __ stop("must not have pending exception here"); 1.3784 + __ bind(no_pending_exception); 1.3785 +#endif 1.3786 + __ bind(cont); 1.3787 + // Compiled code leaves the floating point stack dirty, empty it. 1.3788 + __ empty_FPU_stack(); 1.3789 + 1.3790 + 1.3791 + // Call C code. Need thread and this frame, but NOT official VM entry 1.3792 + // crud. We cannot block on this call, no GC can happen. 1.3793 +#ifndef OPT_THREAD 1.3794 + __ get_thread(thread); 1.3795 +#endif 1.3796 + 1.3797 +/* 1.3798 + * 1.3799 + 0x000000555bd82aec: dadd a0, s6, zero ; __ move(A0, thread); 1.3800 + 0x000000555bd82af0: daddi sp, sp, 0xfffffff0 ; __ addi(SP, SP, -additional_words * wordSize); 1.3801 + 0x000000555bd82af4: sd sp, 0x1c8(s6) ; __ set_last_Java_frame(thread, NOREG, NOREG, NULL); 1.3802 + 0x000000555bd82af8: lui at, 0x0 ; __ li64(AT, save_pc); 1.3803 + 0x000000555bd82afc: ori at, at, 0x55 1.3804 + 0x000000555bd82b00: dsll at, at, 16 1.3805 + 0x000000555bd82b04: ori at, at, 0x5bd8 1.3806 + 0x000000555bd82b08: dsll at, at, 16 1.3807 + 0x000000555bd82b0c: ori at, at, 0x2b34 ; save_pc = pc() + NativeMovConstReg::instruction_size + NativeCall::return_address_offset + 4 1.3808 + 0x000000555bd82b10: sd at, 0x1d0(s6) 1.3809 + 0x000000555bd82b14: lui t9, 0x0 1.3810 + 0x000000555bd82b18: ori t9, t9, 0x55 1.3811 + 0x000000555bd82b1c: dsll t9, t9, 16 1.3812 + 0x000000555bd82b20: ori t9, t9, 0x5aa6 1.3813 + 0x000000555bd82b24: dsll t9, t9, 16 1.3814 + 0x000000555bd82b28: ori t9, t9, 0x4074 1.3815 + 0x000000555bd82b2c: jalr t9 1.3816 + 0x000000555bd82b30: sll zero, zero, 0 1.3817 + 1.3818 + 0x000000555bd82b34: daddiu sp, sp, 0x10 ; save_pc 1.3819 + */ 1.3820 + __ move(A0, thread); 1.3821 + __ addi(SP, SP, -additional_words * wordSize); 1.3822 + 1.3823 + __ set_last_Java_frame(NOREG, NOREG, NULL); 1.3824 + 1.3825 + // Call fetch_unroll_info(). Need thread and this frame, but NOT official VM entry - cannot block on 1.3826 + // this call, no GC can happen. Call should capture return values. 1.3827 + 1.3828 + __ relocate(relocInfo::internal_pc_type); 1.3829 + { 1.3830 + intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + NativeCall::return_address_offset + 4; 1.3831 + __ li48(AT, save_pc); 1.3832 + } 1.3833 + __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); 1.3834 + 1.3835 + __ call((address)Deoptimization::fetch_unroll_info); 1.3836 + //__ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type); 1.3837 + __ delayed()->nop(); 1.3838 + oop_maps->add_gc_map(__ pc() - start, map); 1.3839 + __ addiu(SP, SP, additional_words * wordSize); 1.3840 + __ get_thread(thread); 1.3841 + __ reset_last_Java_frame(false, true); 1.3842 + 1.3843 + // Load UnrollBlock into S7 1.3844 + __ move(unroll, V0); 1.3845 + 1.3846 + 1.3847 + // Move the unpack kind to a safe place in the UnrollBlock because 1.3848 + // we are very short of registers 1.3849 + 1.3850 + Address unpack_kind(unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()); 1.3851 + //__ pop(reason); 1.3852 + __ sw(reason, unpack_kind); 1.3853 + // save the unpack_kind value 1.3854 + // Retrieve the possible live values (return values) 1.3855 + // All callee save registers representing jvm state 1.3856 + // are now in the vframeArray. 1.3857 + 1.3858 + Label noException; 1.3859 + __ move(AT, Deoptimization::Unpack_exception); 1.3860 + __ bne(AT, reason, noException);// Was exception pending? 1.3861 + __ delayed()->nop(); 1.3862 + __ ld_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset())); 1.3863 + __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); 1.3864 + __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset())); 1.3865 + __ st_ptr(R0, thread, in_bytes(JavaThread::exception_oop_offset())); 1.3866 + 1.3867 + __ verify_oop(V0); 1.3868 + 1.3869 + // Overwrite the result registers with the exception results. 1.3870 + __ st_ptr(V0, SP, RegisterSaver::v0Offset()*wordSize); 1.3871 + __ st_ptr(V1, SP, RegisterSaver::v1Offset()*wordSize); 1.3872 + 1.3873 + __ bind(noException); 1.3874 + 1.3875 + 1.3876 + // Stack is back to only having register save data on the stack. 1.3877 + // Now restore the result registers. Everything else is either dead or captured 1.3878 + // in the vframeArray. 1.3879 + 1.3880 + RegisterSaver::restore_result_registers(masm); 1.3881 + // All of the register save area has been popped of the stack. Only the 1.3882 + // return address remains. 1.3883 + // Pop all the frames we must move/replace. 1.3884 + // Frame picture (youngest to oldest) 1.3885 + // 1: self-frame (no frame link) 1.3886 + // 2: deopting frame (no frame link) 1.3887 + // 3: caller of deopting frame (could be compiled/interpreted). 1.3888 + // 1.3889 + // Note: by leaving the return address of self-frame on the stack 1.3890 + // and using the size of frame 2 to adjust the stack 1.3891 + // when we are done the return to frame 3 will still be on the stack. 1.3892 + 1.3893 + // register for the sender's sp 1.3894 + Register sender_sp = Rsender; 1.3895 + // register for frame pcs 1.3896 + Register pcs = T0; 1.3897 + // register for frame sizes 1.3898 + Register sizes = T1; 1.3899 + // register for frame count 1.3900 + Register count = T3; 1.3901 + 1.3902 + // Pop deoptimized frame 1.3903 + __ lw(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()); 1.3904 + __ add(SP, SP, AT); 1.3905 + // sp should be pointing at the return address to the caller (3) 1.3906 + 1.3907 + // Load array of frame pcs into pcs 1.3908 + __ ld_ptr(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()); 1.3909 + __ addi(SP, SP, wordSize); // trash the old pc 1.3910 + // Load array of frame sizes into T6 1.3911 + __ ld_ptr(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()); 1.3912 + 1.3913 + 1.3914 + 1.3915 + // Load count of frams into T3 1.3916 + __ lw(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()); 1.3917 + // Pick up the initial fp we should save 1.3918 + __ ld(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()); 1.3919 + // Now adjust the caller's stack to make up for the extra locals 1.3920 + // but record the original sp so that we can save it in the skeletal interpreter 1.3921 + // frame and the stack walking of interpreter_sender will get the unextended sp 1.3922 + // value and not the "real" sp value. 1.3923 + __ move(sender_sp, SP); 1.3924 + __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()); 1.3925 + __ sub(SP, SP, AT); 1.3926 + 1.3927 + // Push interpreter frames in a loop 1.3928 +/* 1.3929 + * 1.3930 +Loop: 1.3931 + 0x000000555bd82d18: lw t2, 0x0(t1) ; lw sizes[i] <--- error lw->ld 1.3932 + 0x000000555bd82d1c: ld at, 0x0(t0) ; ld pcs[i] 1.3933 + 0x000000555bd82d20: daddi t2, t2, 0xfffffff0 ; t2 -= 16 1.3934 + 0x000000555bd82d24: daddi sp, sp, 0xfffffff0 1.3935 + 0x000000555bd82d28: sd fp, 0x0(sp) ; push fp 1.3936 + 0x000000555bd82d2c: sd at, 0x8(sp) ; push at 1.3937 + 0x000000555bd82d30: dadd fp, sp, zero ; fp <- sp 1.3938 + 0x000000555bd82d34: dsub sp, sp, t2 ; sp -= t2 1.3939 + 0x000000555bd82d38: sd zero, 0xfffffff0(fp) ; __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); 1.3940 + 0x000000555bd82d3c: sd s4, 0xfffffff8(fp) ; __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize); 1.3941 + 0x000000555bd82d40: dadd s4, sp, zero ; move(sender_sp, SP); 1.3942 + 0x000000555bd82d44: daddi t3, t3, 0xffffffff ; count -- 1.3943 + 0x000000555bd82d48: daddi t1, t1, 0x4 ; sizes += 4 1.3944 + 0x000000555bd82d4c: bne t3, zero, 0x000000555bd82d18 1.3945 + 0x000000555bd82d50: daddi t0, t0, 0x4 ; <--- error t0 += 8 1.3946 + */ 1.3947 + 1.3948 +// pcs[0] = frame_pcs[0] = deopt_sender.raw_pc(); regex.split 1.3949 + Label loop; 1.3950 + __ bind(loop); 1.3951 + __ ld(T2, sizes, 0); // Load frame size 1.3952 + __ ld_ptr(AT, pcs, 0); // save return address 1.3953 + __ addi(T2, T2, -2*wordSize); // we'll push pc and rbp, by hand 1.3954 + __ push2(AT, FP); 1.3955 + __ move(FP, SP); 1.3956 + __ sub(SP, SP, T2); // Prolog! 1.3957 + // This value is corrected by layout_activation_impl 1.3958 + __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); 1.3959 + __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable 1.3960 + __ move(sender_sp, SP); // pass to next frame 1.3961 + __ addi(count, count, -1); // decrement counter 1.3962 + __ addi(sizes, sizes, wordSize); // Bump array pointer (sizes) 1.3963 + __ bne(count, R0, loop); 1.3964 + __ delayed()->addi(pcs, pcs, wordSize); // Bump array pointer (pcs) 1.3965 + __ ld(AT, pcs, 0); // frame_pcs[number_of_frames] = Interpreter::deopt_entry(vtos, 0); 1.3966 + // Re-push self-frame 1.3967 + __ push2(AT, FP); 1.3968 + __ move(FP, SP); 1.3969 + __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); 1.3970 + __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize); 1.3971 + __ addi(SP, SP, -(frame_size_in_words - 2 - additional_words) * wordSize); 1.3972 + 1.3973 + // Restore frame locals after moving the frame 1.3974 + __ sd(V0, SP, RegisterSaver::v0Offset() * wordSize); 1.3975 + __ sd(V1, SP, RegisterSaver::v1Offset() * wordSize); 1.3976 + __ sdc1(F0, SP, RegisterSaver::fpResultOffset()* wordSize);// Pop float stack and store in local 1.3977 + __ sdc1(F1, SP, (RegisterSaver::fpResultOffset() + 1) * wordSize); 1.3978 + 1.3979 + 1.3980 + // Call unpack_frames(). Need thread and this frame, but NOT official VM entry - cannot block on 1.3981 + // this call, no GC can happen. 1.3982 + __ move(A1, reason); // exec_mode 1.3983 + __ get_thread(thread); 1.3984 + __ move(A0, thread); // thread 1.3985 + __ addi(SP, SP, (-additional_words) *wordSize); 1.3986 + 1.3987 + // set last_Java_sp, last_Java_fp 1.3988 + __ set_last_Java_frame(NOREG, FP, NULL); 1.3989 + 1.3990 + __ move(AT, -(StackAlignmentInBytes)); 1.3991 + __ andr(SP, SP, AT); // Fix stack alignment as required by ABI 1.3992 + 1.3993 + __ relocate(relocInfo::internal_pc_type); 1.3994 + { 1.3995 + intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + NativeCall::return_address_offset + 4; 1.3996 + __ li48(AT, save_pc); 1.3997 + } 1.3998 + __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); 1.3999 + 1.4000 + //__ call(Deoptimization::unpack_frames); 1.4001 + __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), relocInfo::runtime_call_type); 1.4002 + __ delayed()->nop(); 1.4003 + // Revert SP alignment after call since we're going to do some SP relative addressing below 1.4004 + __ ld(SP, thread, in_bytes(JavaThread::last_Java_sp_offset())); 1.4005 + // Set an oopmap for the call site 1.4006 + oop_maps->add_gc_map(__ offset(), new OopMap( frame_size_in_words , 0)); 1.4007 + 1.4008 + __ push(V0); 1.4009 + 1.4010 + __ get_thread(thread); 1.4011 + __ reset_last_Java_frame(false, false); 1.4012 + 1.4013 + // Collect return values 1.4014 + __ ld(V0, SP, (RegisterSaver::v0Offset() + additional_words +1) * wordSize); 1.4015 + __ ld(V1, SP, (RegisterSaver::v1Offset() + additional_words +1) * wordSize); 1.4016 + __ ldc1(F0, SP, RegisterSaver::fpResultOffset()* wordSize);// Pop float stack and store in local 1.4017 + __ ldc1(F1, SP, (RegisterSaver::fpResultOffset() + 1) * wordSize); 1.4018 + //FIXME, 1.4019 + // Clear floating point stack before returning to interpreter 1.4020 + __ empty_FPU_stack(); 1.4021 + //FIXME, we should consider about float and double 1.4022 + // Push a float or double return value if necessary. 1.4023 + __ leave(); 1.4024 + 1.4025 + // Jump to interpreter 1.4026 + __ jr(RA); 1.4027 + __ delayed()->nop(); 1.4028 + 1.4029 + masm->flush(); 1.4030 + _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words); 1.4031 + _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); 1.4032 +} 1.4033 + 1.4034 +#ifdef COMPILER2 1.4035 + 1.4036 +//------------------------------generate_uncommon_trap_blob-------------------- 1.4037 +// Ought to generate an ideal graph & compile, but here's some SPARC ASM 1.4038 +// instead. 1.4039 +void SharedRuntime::generate_uncommon_trap_blob() { 1.4040 + // allocate space for the code 1.4041 + ResourceMark rm; 1.4042 + // setup code generation tools 1.4043 + CodeBuffer buffer ("uncommon_trap_blob", 512*80 , 512*40 ); 1.4044 + MacroAssembler* masm = new MacroAssembler(&buffer); 1.4045 + 1.4046 + enum frame_layout { 1.4047 + s0_off, s0_off2, 1.4048 + s1_off, s1_off2, 1.4049 + s2_off, s2_off2, 1.4050 + s3_off, s3_off2, 1.4051 + s4_off, s4_off2, 1.4052 + s5_off, s5_off2, 1.4053 + s6_off, s6_off2, 1.4054 + s7_off, s7_off2, 1.4055 + fp_off, fp_off2, 1.4056 + return_off, return_off2, // slot for return address sp + 9 1.4057 + framesize 1.4058 + }; 1.4059 + assert(framesize % 4 == 0, "sp not 16-byte aligned"); 1.4060 + 1.4061 + address start = __ pc(); 1.4062 + 1.4063 + // Push self-frame. 1.4064 + __ daddiu(SP, SP, -framesize * BytesPerInt); 1.4065 + 1.4066 + __ sd(RA, SP, return_off * BytesPerInt); 1.4067 + __ sd(FP, SP, fp_off * BytesPerInt); 1.4068 + 1.4069 + // Save callee saved registers. None for UseSSE=0, 1.4070 + // floats-only for UseSSE=1, and doubles for UseSSE=2. 1.4071 + __ sd(S0, SP, s0_off * BytesPerInt); 1.4072 + __ sd(S1, SP, s1_off * BytesPerInt); 1.4073 + __ sd(S2, SP, s2_off * BytesPerInt); 1.4074 + __ sd(S3, SP, s3_off * BytesPerInt); 1.4075 + __ sd(S4, SP, s4_off * BytesPerInt); 1.4076 + __ sd(S5, SP, s5_off * BytesPerInt); 1.4077 + __ sd(S6, SP, s6_off * BytesPerInt); 1.4078 + __ sd(S7, SP, s7_off * BytesPerInt); 1.4079 + 1.4080 + __ daddi(FP, SP, fp_off * BytesPerInt); 1.4081 + 1.4082 + // Clear the floating point exception stack 1.4083 + __ empty_FPU_stack(); 1.4084 + 1.4085 + Register thread = TREG; 1.4086 + 1.4087 +#ifndef OPT_THREAD 1.4088 + __ get_thread(thread); 1.4089 +#endif 1.4090 + // set last_Java_sp 1.4091 + __ set_last_Java_frame(NOREG, FP, NULL); 1.4092 + __ relocate(relocInfo::internal_pc_type); 1.4093 + assert(NativeCall::return_address_offset == 24, "in sharedRuntime return_address_offset"); 1.4094 + { 1.4095 + long save_pc = (long)__ pc() + 28 + NativeCall::return_address_offset; 1.4096 + __ li48(AT, (long)save_pc); 1.4097 + __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); 1.4098 + } 1.4099 + // Call C code. Need thread but NOT official VM entry 1.4100 + // crud. We cannot block on this call, no GC can happen. Call should 1.4101 + // capture callee-saved registers as well as return values. 1.4102 + __ move(A0, thread); 1.4103 + // argument already in T0 1.4104 + __ move(A1, T0); 1.4105 + __ li48(T9, (long)Deoptimization::uncommon_trap); 1.4106 + __ jalr(T9); 1.4107 + __ delayed()->nop(); 1.4108 + 1.4109 + // Set an oopmap for the call site 1.4110 + OopMapSet *oop_maps = new OopMapSet(); 1.4111 + OopMap* map = new OopMap( framesize, 0 ); 1.4112 + 1.4113 + map->set_callee_saved( VMRegImpl::stack2reg(s0_off ), S0->as_VMReg() ); 1.4114 + map->set_callee_saved( VMRegImpl::stack2reg(s1_off ), S1->as_VMReg() ); 1.4115 + map->set_callee_saved( VMRegImpl::stack2reg(s2_off ), S2->as_VMReg() ); 1.4116 + map->set_callee_saved( VMRegImpl::stack2reg(s3_off ), S3->as_VMReg() ); 1.4117 + map->set_callee_saved( VMRegImpl::stack2reg(s4_off ), S4->as_VMReg() ); 1.4118 + map->set_callee_saved( VMRegImpl::stack2reg(s5_off ), S5->as_VMReg() ); 1.4119 + map->set_callee_saved( VMRegImpl::stack2reg(s6_off ), S6->as_VMReg() ); 1.4120 + map->set_callee_saved( VMRegImpl::stack2reg(s7_off ), S7->as_VMReg() ); 1.4121 + 1.4122 + //oop_maps->add_gc_map( __ offset(), true, map); 1.4123 + oop_maps->add_gc_map( __ offset(), map); 1.4124 + 1.4125 +#ifndef OPT_THREAD 1.4126 + __ get_thread(thread); 1.4127 +#endif 1.4128 + __ reset_last_Java_frame(false,false); 1.4129 + 1.4130 + // Load UnrollBlock into S7 1.4131 + Register unroll = S7; 1.4132 + __ move(unroll, V0); 1.4133 + 1.4134 + // Pop all the frames we must move/replace. 1.4135 + // 1.4136 + // Frame picture (youngest to oldest) 1.4137 + // 1: self-frame (no frame link) 1.4138 + // 2: deopting frame (no frame link) 1.4139 + // 3: possible-i2c-adapter-frame 1.4140 + // 4: caller of deopting frame (could be compiled/interpreted. If interpreted we will create an 1.4141 + // and c2i here) 1.4142 + 1.4143 + // Pop self-frame. We have no frame, and must rely only on EAX and ESP. 1.4144 + __ daddiu(SP, SP, framesize * BytesPerInt); 1.4145 + 1.4146 + // Pop deoptimized frame 1.4147 + __ lw(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()); 1.4148 + __ dadd(SP, SP, AT); 1.4149 + 1.4150 + // register for frame pcs 1.4151 + Register pcs = T8; 1.4152 + // register for frame sizes 1.4153 + Register sizes = T9; 1.4154 + // register for frame count 1.4155 + Register count = T3; 1.4156 + // register for the sender's sp 1.4157 + Register sender_sp = T1; 1.4158 + 1.4159 + // sp should be pointing at the return address to the caller (4) 1.4160 + // Load array of frame pcs into ECX 1.4161 + __ ld(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()); 1.4162 + 1.4163 +/* 2012/9/7 Not needed in MIPS 1.4164 + __ addiu(SP, SP, wordSize); 1.4165 +*/ 1.4166 + 1.4167 + // Load array of frame sizes into ESI 1.4168 + __ ld(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()); 1.4169 + __ lwu(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()); 1.4170 + 1.4171 + // Pick up the initial fp we should save 1.4172 + __ ld(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()); 1.4173 + // Now adjust the caller's stack to make up for the extra locals 1.4174 + // but record the original sp so that we can save it in the skeletal interpreter 1.4175 + // frame and the stack walking of interpreter_sender will get the unextended sp 1.4176 + // value and not the "real" sp value. 1.4177 + 1.4178 + __ move(sender_sp, SP); 1.4179 + __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()); 1.4180 + __ dsub(SP, SP, AT); 1.4181 + // Push interpreter frames in a loop 1.4182 + Label loop; 1.4183 + __ bind(loop); 1.4184 + __ ld(T2, sizes, 0); // Load frame size 1.4185 + __ ld(AT, pcs, 0); // save return address 1.4186 + __ daddi(T2, T2, -2*wordSize); // we'll push pc and rbp, by hand 1.4187 + __ push2(AT, FP); 1.4188 + __ move(FP, SP); 1.4189 + __ dsub(SP, SP, T2); // Prolog! 1.4190 + // This value is corrected by layout_activation_impl 1.4191 + __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); 1.4192 + __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable 1.4193 + __ move(sender_sp, SP); // pass to next frame 1.4194 + __ daddi(count, count, -1); // decrement counter 1.4195 + __ daddi(sizes, sizes, wordSize); // Bump array pointer (sizes) 1.4196 + __ addi(pcs, pcs, wordSize); // Bump array pointer (pcs) 1.4197 + __ bne(count, R0, loop); 1.4198 + __ delayed()->nop(); // Bump array pointer (pcs) 1.4199 + 1.4200 + __ ld(RA, pcs, 0); 1.4201 + 1.4202 + // Re-push self-frame 1.4203 + __ daddi(SP, SP, - 2 * wordSize); // save old & set new FP 1.4204 + __ sd(FP, SP, 0 * wordSize); // save final return address 1.4205 + __ sd(RA, SP, 1 * wordSize); 1.4206 + __ move(FP, SP); 1.4207 + __ daddi(SP, SP, -(framesize / 2 - 2) * wordSize); 1.4208 + 1.4209 + // set last_Java_sp, last_Java_fp 1.4210 + __ set_last_Java_frame(NOREG, FP, NULL); 1.4211 + 1.4212 + __ move(AT, -(StackAlignmentInBytes)); 1.4213 + __ andr(SP, SP, AT); // Fix stack alignment as required by ABI 1.4214 + 1.4215 + __ relocate(relocInfo::internal_pc_type); 1.4216 + { 1.4217 + long save_pc = (long)__ pc() + 28 + NativeCall::return_address_offset; 1.4218 + __ li48(AT, (long)save_pc); 1.4219 + } 1.4220 + __ sd(AT, thread,in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); 1.4221 + 1.4222 + // Call C code. Need thread but NOT official VM entry 1.4223 + // crud. We cannot block on this call, no GC can happen. Call should 1.4224 + // restore return values to their stack-slots with the new SP. 1.4225 + __ move(A0, thread); 1.4226 + __ move(A1, Deoptimization::Unpack_uncommon_trap); 1.4227 + __ li48(T9, (long)Deoptimization::unpack_frames); 1.4228 + __ jalr(T9); 1.4229 + __ delayed()->nop(); 1.4230 + // Set an oopmap for the call site 1.4231 + //oop_maps->add_gc_map( __ offset(), true, new OopMap( framesize, 0 ) ); 1.4232 + oop_maps->add_gc_map( __ offset(), new OopMap( framesize, 0 ) );//Fu 1.4233 + 1.4234 + __ reset_last_Java_frame(true,true); 1.4235 + 1.4236 + // Pop self-frame. 1.4237 + __ leave(); // Epilog! 1.4238 + 1.4239 + // Jump to interpreter 1.4240 + __ jr(RA); 1.4241 + __ delayed()->nop(); 1.4242 + // ------------- 1.4243 + // make sure all code is generated 1.4244 + masm->flush(); 1.4245 + 1.4246 + _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, framesize / 2); 1.4247 +} 1.4248 + 1.4249 +#endif // COMPILER2 1.4250 + 1.4251 +//------------------------------generate_handler_blob------------------- 1.4252 +// 1.4253 +// Generate a special Compile2Runtime blob that saves all registers, and sets 1.4254 +// up an OopMap and calls safepoint code to stop the compiled code for 1.4255 +// a safepoint. 1.4256 +// 1.4257 +// This blob is jumped to (via a breakpoint and the signal handler) from a 1.4258 +// safepoint in compiled code. 1.4259 + 1.4260 +SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int pool_type) { 1.4261 + 1.4262 + // Account for thread arg in our frame 1.4263 + const int additional_words = 0; 1.4264 + int frame_size_in_words; 1.4265 + 1.4266 + assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); 1.4267 + 1.4268 + ResourceMark rm; 1.4269 + OopMapSet *oop_maps = new OopMapSet(); 1.4270 + OopMap* map; 1.4271 + 1.4272 + // allocate space for the code 1.4273 + // setup code generation tools 1.4274 + CodeBuffer buffer ("handler_blob", 2048, 512); 1.4275 + MacroAssembler* masm = new MacroAssembler( &buffer); 1.4276 + 1.4277 + const Register thread = TREG; 1.4278 + address start = __ pc(); 1.4279 + address call_pc = NULL; 1.4280 + bool cause_return = (pool_type == POLL_AT_RETURN); 1.4281 + bool save_vectors = (pool_type == POLL_AT_VECTOR_LOOP); 1.4282 + 1.4283 + // If cause_return is true we are at a poll_return and there is 1.4284 + // the return address in RA to the caller on the nmethod 1.4285 + // that is safepoint. We can leave this return in RA and 1.4286 + // effectively complete the return and safepoint in the caller. 1.4287 + // Otherwise we load exception pc to RA. 1.4288 + __ push(thread); 1.4289 +#ifndef OPT_THREAD 1.4290 + __ get_thread(thread); 1.4291 +#endif 1.4292 + 1.4293 + if(!cause_return) { 1.4294 + __ ld_ptr(RA, Address(thread, JavaThread::saved_exception_pc_offset())); 1.4295 + } 1.4296 + 1.4297 + __ pop(thread); 1.4298 + map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, save_vectors); 1.4299 + 1.4300 +#ifndef OPT_THREAD 1.4301 + __ get_thread(thread); 1.4302 +#endif 1.4303 + // The following is basically a call_VM. However, we need the precise 1.4304 + // address of the call in order to generate an oopmap. Hence, we do all the 1.4305 + // work outselvs. 1.4306 + 1.4307 + __ move(A0, thread); 1.4308 + __ set_last_Java_frame(NOREG, NOREG, NULL); 1.4309 + 1.4310 + //__ relocate(relocInfo::internal_pc_type); 1.4311 + if (!cause_return) 1.4312 + { 1.4313 +/* 1.4314 + intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + NativeCall::return_address_offset + 4; 1.4315 + __ li48(AT, save_pc); 1.4316 + __ sd(AT, thread, in_bytes(JavaThread::last_Java_pc_offset())); 1.4317 +*/ 1.4318 + } 1.4319 + 1.4320 + 1.4321 + // do the call 1.4322 + //__ lui(T9, Assembler::split_high((int)call_ptr)); 1.4323 + //__ addiu(T9, T9, Assembler::split_low((int)call_ptr)); 1.4324 + __ call(call_ptr); 1.4325 + __ delayed()->nop(); 1.4326 + 1.4327 + // Set an oopmap for the call site. This oopmap will map all 1.4328 + // oop-registers and debug-info registers as callee-saved. This 1.4329 + // will allow deoptimization at this safepoint to find all possible 1.4330 + // debug-info recordings, as well as let GC find all oops. 1.4331 + oop_maps->add_gc_map(__ offset(), map); 1.4332 + 1.4333 + Label noException; 1.4334 + 1.4335 + // Clear last_Java_sp again 1.4336 + __ reset_last_Java_frame(false, false); 1.4337 + 1.4338 + __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); 1.4339 + __ beq(AT, R0, noException); 1.4340 + __ delayed()->nop(); 1.4341 + 1.4342 + // Exception pending 1.4343 + 1.4344 + RegisterSaver::restore_live_registers(masm, save_vectors); 1.4345 + //forward_exception_entry need return address on the stack 1.4346 + __ push(RA); 1.4347 + //__ lui(T9, Assembler::split_high((int)StubRoutines::forward_exception_entry())); 1.4348 + //__ addiu(T9, T9, Assembler::split_low((int)StubRoutines::forward_exception_entry())); 1.4349 + __ li(T9, StubRoutines::forward_exception_entry()); 1.4350 + __ jr(T9); 1.4351 + __ delayed()->nop(); 1.4352 + 1.4353 + // No exception case 1.4354 + __ bind(noException); 1.4355 + // Normal exit, register restoring and exit 1.4356 + RegisterSaver::restore_live_registers(masm, save_vectors); 1.4357 + __ jr(RA); 1.4358 + __ delayed()->nop(); 1.4359 + 1.4360 + masm->flush(); 1.4361 + 1.4362 + // Fill-out other meta info 1.4363 + return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words); 1.4364 +} 1.4365 + 1.4366 +// 1.4367 +// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss 1.4368 +// 1.4369 +// Generate a stub that calls into vm to find out the proper destination 1.4370 +// of a java call. All the argument registers are live at this point 1.4371 +// but since this is generic code we don't know what they are and the caller 1.4372 +// must do any gc of the args. 1.4373 +// 1.4374 +RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) { 1.4375 + assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); 1.4376 + 1.4377 + // allocate space for the code 1.4378 + ResourceMark rm; 1.4379 + 1.4380 + //CodeBuffer buffer(name, 1000, 512); 1.4381 + //FIXME. aoqi. code_size 1.4382 + CodeBuffer buffer(name, 20000, 2048); 1.4383 + MacroAssembler* masm = new MacroAssembler(&buffer); 1.4384 + 1.4385 + int frame_size_words; 1.4386 + //we put the thread in A0 1.4387 + 1.4388 + OopMapSet *oop_maps = new OopMapSet(); 1.4389 + OopMap* map = NULL; 1.4390 + 1.4391 + int start = __ offset(); 1.4392 + map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words); 1.4393 + 1.4394 + 1.4395 + int frame_complete = __ offset(); 1.4396 + 1.4397 + const Register thread = T8; 1.4398 + __ get_thread(thread); 1.4399 + 1.4400 + __ move(A0, thread); 1.4401 + __ set_last_Java_frame(noreg, FP, NULL); 1.4402 + //__ addi(SP, SP, -wordSize); 1.4403 + //align the stack before invoke native 1.4404 + __ move(AT, -(StackAlignmentInBytes)); 1.4405 + __ andr(SP, SP, AT); 1.4406 + __ relocate(relocInfo::internal_pc_type); 1.4407 + { 1.4408 + intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + NativeCall::return_address_offset + 1 * BytesPerInstWord; 1.4409 +//tty->print_cr(" %s :%d, name:%s, pc: %lx, save_pc: %lx, frame_size_words: %lx", __func__, __LINE__, name, __ pc(), save_pc, frame_size_words); //aoqi_test 1.4410 + __ li48(AT, save_pc); 1.4411 + } 1.4412 + __ sd(AT, thread, in_bytes(JavaThread::last_Java_pc_offset())); 1.4413 + 1.4414 + __ call(destination); 1.4415 + __ delayed()->nop(); 1.4416 + 1.4417 + // Set an oopmap for the call site. 1.4418 + // We need this not only for callee-saved registers, but also for volatile 1.4419 + // registers that the compiler might be keeping live across a safepoint. 1.4420 + oop_maps->add_gc_map( __ offset() - start, map); 1.4421 + // V0 contains the address we are going to jump to assuming no exception got installed 1.4422 + __ get_thread(thread); 1.4423 + __ ld_ptr(SP, thread, in_bytes(JavaThread::last_Java_sp_offset())); 1.4424 + // clear last_Java_sp 1.4425 + __ reset_last_Java_frame(true, true); 1.4426 + // check for pending exceptions 1.4427 + Label pending; 1.4428 + __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); 1.4429 + __ bne(AT, R0, pending); 1.4430 + __ delayed()->nop(); 1.4431 + // get the returned Method* 1.4432 + //FIXME, do mips need this ? 1.4433 + __ get_vm_result_2(Rmethod, thread); // Refer to OpenJDK8 1.4434 + __ st_ptr(Rmethod, SP, RegisterSaver::methodOffset() * wordSize); 1.4435 + __ st_ptr(V0, SP, RegisterSaver::v0Offset() * wordSize); 1.4436 + RegisterSaver::restore_live_registers(masm); 1.4437 + 1.4438 + // We are back the the original state on entry and ready to go the callee method. 1.4439 + __ jr(V0); 1.4440 + __ delayed()->nop(); 1.4441 + // Pending exception after the safepoint 1.4442 + 1.4443 + __ bind(pending); 1.4444 + 1.4445 + RegisterSaver::restore_live_registers(masm); 1.4446 + 1.4447 + // exception pending => remove activation and forward to exception handler 1.4448 + //forward_exception_entry need return address on the stack 1.4449 + __ push(RA); 1.4450 + __ get_thread(thread); 1.4451 + __ st_ptr(R0, thread, in_bytes(JavaThread::vm_result_offset())); 1.4452 + __ ld_ptr(V0, thread, in_bytes(Thread::pending_exception_offset())); 1.4453 + __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); 1.4454 + __ delayed() -> nop(); 1.4455 + // ------------- 1.4456 + // make sure all code is generated 1.4457 + masm->flush(); 1.4458 + 1.4459 + RuntimeStub* tmp= RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true); 1.4460 + return tmp; 1.4461 +} 1.4462 + 1.4463 +/*void SharedRuntime::generate_stubs() { 1.4464 + _wrong_method_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, 1.4465 + SharedRuntime::handle_wrong_method),"wrong_method_stub"); 1.4466 + _ic_miss_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, 1.4467 + SharedRuntime::handle_wrong_method_ic_miss),"ic_miss_stub"); 1.4468 + _resolve_opt_virtual_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, 1.4469 + SharedRuntime::resolve_opt_virtual_call_C),"resolve_opt_virtual_call"); 1.4470 + _resolve_virtual_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, 1.4471 + SharedRuntime::resolve_virtual_call_C),"resolve_virtual_call"); 1.4472 + _resolve_static_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, 1.4473 + SharedRuntime::resolve_static_call_C),"resolve_static_call"); 1.4474 + _polling_page_safepoint_handler_blob =generate_handler_blob(CAST_FROM_FN_PTR(address, 1.4475 + SafepointSynchronize::handle_polling_page_exception), false); 1.4476 + _polling_page_return_handler_blob =generate_handler_blob(CAST_FROM_FN_PTR(address, 1.4477 + SafepointSynchronize::handle_polling_page_exception), true); 1.4478 + generate_deopt_blob(); 1.4479 +#ifdef COMPILER2 1.4480 + generate_uncommon_trap_blob(); 1.4481 +#endif // COMPILER2 1.4482 +}*/ 1.4483 + 1.4484 +extern "C" int SpinPause() {return 0;} 1.4485 +// extern "C" int SafeFetch32 (int * adr, int errValue) {return 0;} ; 1.4486 +// extern "C" intptr_t SafeFetchN (intptr_t * adr, intptr_t errValue) {return *adr; } ;