src/cpu/mips/vm/sharedRuntime_mips_64.cpp

Mon, 18 Nov 2019 10:41:48 +0800

author
huangjia
date
Mon, 18 Nov 2019 10:41:48 +0800
changeset 9759
8c71022cf5f3
parent 9705
0b27fc8adf1b
child 9932
86ea9a02a717
permissions
-rw-r--r--

#10052 Backport of #9904 compiler/floatingpoint/TestFloatSyncJNIArgs.java failed
Reviewed-by: aoqi

aoqi@1 1 /*
aoqi@1 2 * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
aoqi@9459 3 * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved.
aoqi@1 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
aoqi@1 5 *
aoqi@1 6 * This code is free software; you can redistribute it and/or modify it
aoqi@1 7 * under the terms of the GNU General Public License version 2 only, as
aoqi@1 8 * published by the Free Software Foundation.
aoqi@1 9 *
aoqi@1 10 * This code is distributed in the hope that it will be useful, but WITHOUT
aoqi@1 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
aoqi@1 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
aoqi@1 13 * version 2 for more details (a copy is included in the LICENSE file that
aoqi@1 14 * accompanied this code).
aoqi@1 15 *
aoqi@1 16 * You should have received a copy of the GNU General Public License version
aoqi@1 17 * 2 along with this work; if not, write to the Free Software Foundation,
aoqi@1 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
aoqi@1 19 *
aoqi@1 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
aoqi@1 21 * or visit www.oracle.com if you need additional information or have any
aoqi@1 22 * questions.
aoqi@1 23 *
aoqi@1 24 */
aoqi@1 25
aoqi@1 26 #include "precompiled.hpp"
aoqi@1 27 #include "asm/macroAssembler.hpp"
aoqi@1 28 #include "asm/macroAssembler.inline.hpp"
aoqi@1 29 #include "code/debugInfoRec.hpp"
aoqi@1 30 #include "code/icBuffer.hpp"
aoqi@1 31 #include "code/vtableStubs.hpp"
aoqi@1 32 #include "interpreter/interpreter.hpp"
aoqi@1 33 #include "oops/compiledICHolder.hpp"
aoqi@1 34 #include "prims/jvmtiRedefineClassesTrace.hpp"
aoqi@1 35 #include "runtime/sharedRuntime.hpp"
aoqi@1 36 #include "runtime/vframeArray.hpp"
aoqi@1 37 #include "vmreg_mips.inline.hpp"
aoqi@1 38 #ifdef COMPILER1
aoqi@1 39 #include "c1/c1_Runtime1.hpp"
aoqi@1 40 #endif
aoqi@1 41 #ifdef COMPILER2
aoqi@1 42 #include "opto/runtime.hpp"
aoqi@1 43 #endif
aoqi@1 44
wanghaomin@9639 45 #include <alloca.h>
wanghaomin@9639 46
aoqi@1 47 #define __ masm->
aoqi@6880 48
aoqi@1 49 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
aoqi@1 50
aoqi@1 51 class RegisterSaver {
aoqi@6880 52 enum { FPU_regs_live = 32 };
aoqi@6880 53 // Capture info about frame layout
aoqi@6880 54 enum layout {
aoqi@1 55 #define DEF_LAYOUT_OFFS(regname) regname ## _off, regname ## H_off,
aoqi@6880 56 DEF_LAYOUT_OFFS(for_16_bytes_aligned)
aoqi@6880 57 DEF_LAYOUT_OFFS(fpr0)
aoqi@6880 58 DEF_LAYOUT_OFFS(fpr1)
aoqi@6880 59 DEF_LAYOUT_OFFS(fpr2)
aoqi@6880 60 DEF_LAYOUT_OFFS(fpr3)
aoqi@6880 61 DEF_LAYOUT_OFFS(fpr4)
aoqi@6880 62 DEF_LAYOUT_OFFS(fpr5)
aoqi@6880 63 DEF_LAYOUT_OFFS(fpr6)
aoqi@6880 64 DEF_LAYOUT_OFFS(fpr7)
aoqi@6880 65 DEF_LAYOUT_OFFS(fpr8)
aoqi@6880 66 DEF_LAYOUT_OFFS(fpr9)
aoqi@6880 67 DEF_LAYOUT_OFFS(fpr10)
aoqi@6880 68 DEF_LAYOUT_OFFS(fpr11)
aoqi@6880 69 DEF_LAYOUT_OFFS(fpr12)
aoqi@6880 70 DEF_LAYOUT_OFFS(fpr13)
aoqi@6880 71 DEF_LAYOUT_OFFS(fpr14)
aoqi@6880 72 DEF_LAYOUT_OFFS(fpr15)
aoqi@6880 73 DEF_LAYOUT_OFFS(fpr16)
aoqi@6880 74 DEF_LAYOUT_OFFS(fpr17)
aoqi@6880 75 DEF_LAYOUT_OFFS(fpr18)
aoqi@6880 76 DEF_LAYOUT_OFFS(fpr19)
aoqi@6880 77 DEF_LAYOUT_OFFS(fpr20)
aoqi@6880 78 DEF_LAYOUT_OFFS(fpr21)
aoqi@6880 79 DEF_LAYOUT_OFFS(fpr22)
aoqi@6880 80 DEF_LAYOUT_OFFS(fpr23)
aoqi@6880 81 DEF_LAYOUT_OFFS(fpr24)
aoqi@6880 82 DEF_LAYOUT_OFFS(fpr25)
aoqi@6880 83 DEF_LAYOUT_OFFS(fpr26)
aoqi@6880 84 DEF_LAYOUT_OFFS(fpr27)
aoqi@6880 85 DEF_LAYOUT_OFFS(fpr28)
aoqi@6880 86 DEF_LAYOUT_OFFS(fpr29)
aoqi@6880 87 DEF_LAYOUT_OFFS(fpr30)
aoqi@6880 88 DEF_LAYOUT_OFFS(fpr31)
aoqi@6880 89
aoqi@6880 90 DEF_LAYOUT_OFFS(v0)
aoqi@6880 91 DEF_LAYOUT_OFFS(v1)
aoqi@6880 92 DEF_LAYOUT_OFFS(a0)
aoqi@6880 93 DEF_LAYOUT_OFFS(a1)
aoqi@6880 94 DEF_LAYOUT_OFFS(a2)
aoqi@6880 95 DEF_LAYOUT_OFFS(a3)
aoqi@6880 96 DEF_LAYOUT_OFFS(a4)
aoqi@6880 97 DEF_LAYOUT_OFFS(a5)
aoqi@6880 98 DEF_LAYOUT_OFFS(a6)
aoqi@6880 99 DEF_LAYOUT_OFFS(a7)
aoqi@6880 100 DEF_LAYOUT_OFFS(t0)
aoqi@6880 101 DEF_LAYOUT_OFFS(t1)
aoqi@6880 102 DEF_LAYOUT_OFFS(t2)
aoqi@6880 103 DEF_LAYOUT_OFFS(t3)
aoqi@6880 104 DEF_LAYOUT_OFFS(s0)
aoqi@6880 105 DEF_LAYOUT_OFFS(s1)
aoqi@6880 106 DEF_LAYOUT_OFFS(s2)
aoqi@6880 107 DEF_LAYOUT_OFFS(s3)
aoqi@6880 108 DEF_LAYOUT_OFFS(s4)
aoqi@6880 109 DEF_LAYOUT_OFFS(s5)
aoqi@6880 110 DEF_LAYOUT_OFFS(s6)
aoqi@6880 111 DEF_LAYOUT_OFFS(s7)
aoqi@6880 112 DEF_LAYOUT_OFFS(t8)
aoqi@6880 113 DEF_LAYOUT_OFFS(t9)
aoqi@6880 114
aoqi@6880 115 DEF_LAYOUT_OFFS(gp)
aoqi@6880 116 DEF_LAYOUT_OFFS(fp)
aoqi@6880 117 DEF_LAYOUT_OFFS(return)
aoqi@6880 118 reg_save_size
aoqi@6880 119 };
aoqi@1 120
aoqi@1 121 public:
aoqi@1 122
aoqi@6880 123 static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors =false );
aoqi@6880 124 static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false);
aoqi@6880 125 static int raOffset(void) { return return_off / 2; }
aoqi@6880 126 //Rmethod
aoqi@6880 127 static int methodOffset(void) { return s3_off / 2; }
aoqi@6880 128
aoqi@6880 129 static int v0Offset(void) { return v0_off / 2; }
aoqi@6880 130 static int v1Offset(void) { return v1_off / 2; }
aoqi@6880 131
aoqi@6880 132 static int fpResultOffset(void) { return fpr0_off / 2; }
aoqi@6880 133
aoqi@6880 134 // During deoptimization only the result register need to be restored
aoqi@6880 135 // all the other values have already been extracted.
aoqi@6880 136 static void restore_result_registers(MacroAssembler* masm);
aoqi@1 137 };
aoqi@1 138
aoqi@1 139 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors ) {
aoqi@1 140
aoqi@1 141 // Always make the frame size 16-byte aligned
aoqi@1 142 int frame_size_in_bytes = round_to(additional_frame_words*wordSize +
aoqi@1 143 reg_save_size*BytesPerInt, 16);
aoqi@1 144 // OopMap frame size is in compiler stack slots (jint's) not bytes or words
aoqi@1 145 int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
aoqi@1 146 // The caller will allocate additional_frame_words
aoqi@1 147 int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt;
aoqi@1 148 // CodeBlob frame size is in words.
aoqi@1 149 int frame_size_in_words = frame_size_in_bytes / wordSize;
aoqi@1 150 *total_frame_words = frame_size_in_words;
aoqi@1 151
aoqi@9459 152 // save registers
aoqi@1 153
aoqi@1 154 __ daddiu(SP, SP, - reg_save_size * jintSize);
aoqi@1 155
aoqi@1 156 __ sdc1(F0, SP, fpr0_off * jintSize); __ sdc1(F1, SP, fpr1_off * jintSize);
aoqi@1 157 __ sdc1(F2, SP, fpr2_off * jintSize); __ sdc1(F3, SP, fpr3_off * jintSize);
aoqi@1 158 __ sdc1(F4, SP, fpr4_off * jintSize); __ sdc1(F5, SP, fpr5_off * jintSize);
aoqi@6880 159 __ sdc1(F6, SP, fpr6_off * jintSize); __ sdc1(F7, SP, fpr7_off * jintSize);
aoqi@6880 160 __ sdc1(F8, SP, fpr8_off * jintSize); __ sdc1(F9, SP, fpr9_off * jintSize);
aoqi@6880 161 __ sdc1(F10, SP, fpr10_off * jintSize); __ sdc1(F11, SP, fpr11_off * jintSize);
aoqi@6880 162 __ sdc1(F12, SP, fpr12_off * jintSize); __ sdc1(F13, SP, fpr13_off * jintSize);
aoqi@6880 163 __ sdc1(F14, SP, fpr14_off * jintSize); __ sdc1(F15, SP, fpr15_off * jintSize);
aoqi@6880 164 __ sdc1(F16, SP, fpr16_off * jintSize); __ sdc1(F17, SP, fpr17_off * jintSize);
aoqi@6880 165 __ sdc1(F18, SP, fpr18_off * jintSize); __ sdc1(F19, SP, fpr19_off * jintSize);
aoqi@6880 166 __ sdc1(F20, SP, fpr20_off * jintSize); __ sdc1(F21, SP, fpr21_off * jintSize);
aoqi@6880 167 __ sdc1(F22, SP, fpr22_off * jintSize); __ sdc1(F23, SP, fpr23_off * jintSize);
aoqi@6880 168 __ sdc1(F24, SP, fpr24_off * jintSize); __ sdc1(F25, SP, fpr25_off * jintSize);
aoqi@6880 169 __ sdc1(F26, SP, fpr26_off * jintSize); __ sdc1(F27, SP, fpr27_off * jintSize);
aoqi@6880 170 __ sdc1(F28, SP, fpr28_off * jintSize); __ sdc1(F29, SP, fpr29_off * jintSize);
aoqi@6880 171 __ sdc1(F30, SP, fpr30_off * jintSize); __ sdc1(F31, SP, fpr31_off * jintSize);
aoqi@6880 172 __ sd(V0, SP, v0_off * jintSize); __ sd(V1, SP, v1_off * jintSize);
aoqi@6880 173 __ sd(A0, SP, a0_off * jintSize); __ sd(A1, SP, a1_off * jintSize);
aoqi@6880 174 __ sd(A2, SP, a2_off * jintSize); __ sd(A3, SP, a3_off * jintSize);
aoqi@6880 175 __ sd(A4, SP, a4_off * jintSize); __ sd(A5, SP, a5_off * jintSize);
aoqi@6880 176 __ sd(A6, SP, a6_off * jintSize); __ sd(A7, SP, a7_off * jintSize);
aoqi@1 177 __ sd(T0, SP, t0_off * jintSize);
aoqi@1 178 __ sd(T1, SP, t1_off * jintSize);
aoqi@1 179 __ sd(T2, SP, t2_off * jintSize);
aoqi@1 180 __ sd(T3, SP, t3_off * jintSize);
aoqi@1 181 __ sd(S0, SP, s0_off * jintSize);
aoqi@1 182 __ sd(S1, SP, s1_off * jintSize);
aoqi@1 183 __ sd(S2, SP, s2_off * jintSize);
aoqi@1 184 __ sd(S3, SP, s3_off * jintSize);
aoqi@1 185 __ sd(S4, SP, s4_off * jintSize);
aoqi@1 186 __ sd(S5, SP, s5_off * jintSize);
aoqi@1 187 __ sd(S6, SP, s6_off * jintSize);
aoqi@1 188 __ sd(S7, SP, s7_off * jintSize);
aoqi@1 189
aoqi@1 190 __ sd(T8, SP, t8_off * jintSize);
aoqi@1 191 __ sd(T9, SP, t9_off * jintSize);
aoqi@1 192
aoqi@1 193 __ sd(GP, SP, gp_off * jintSize);
aoqi@1 194 __ sd(FP, SP, fp_off * jintSize);
aoqi@1 195 __ sd(RA, SP, return_off * jintSize);
aoqi@1 196 __ daddi(FP, SP, fp_off * jintSize);
aoqi@1 197
aoqi@1 198 OopMapSet *oop_maps = new OopMapSet();
aoqi@6880 199 //OopMap* map = new OopMap( frame_words, 0 );
aoqi@6880 200 OopMap* map = new OopMap( frame_size_in_slots, 0 );
aoqi@1 201
aoqi@1 202
aoqi@1 203 //#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_words)
aoqi@1 204 #define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots)
aoqi@1 205 map->set_callee_saved(STACK_OFFSET( v0_off), V0->as_VMReg());
aoqi@1 206 map->set_callee_saved(STACK_OFFSET( v1_off), V1->as_VMReg());
aoqi@1 207 map->set_callee_saved(STACK_OFFSET( a0_off), A0->as_VMReg());
aoqi@1 208 map->set_callee_saved(STACK_OFFSET( a1_off), A1->as_VMReg());
aoqi@1 209 map->set_callee_saved(STACK_OFFSET( a2_off), A2->as_VMReg());
aoqi@1 210 map->set_callee_saved(STACK_OFFSET( a3_off), A3->as_VMReg());
aoqi@1 211 map->set_callee_saved(STACK_OFFSET( a4_off), A4->as_VMReg());
aoqi@1 212 map->set_callee_saved(STACK_OFFSET( a5_off), A5->as_VMReg());
aoqi@1 213 map->set_callee_saved(STACK_OFFSET( a6_off), A6->as_VMReg());
aoqi@1 214 map->set_callee_saved(STACK_OFFSET( a7_off), A7->as_VMReg());
aoqi@1 215 map->set_callee_saved(STACK_OFFSET( t0_off), T0->as_VMReg());
aoqi@1 216 map->set_callee_saved(STACK_OFFSET( t1_off), T1->as_VMReg());
aoqi@1 217 map->set_callee_saved(STACK_OFFSET( t2_off), T2->as_VMReg());
aoqi@1 218 map->set_callee_saved(STACK_OFFSET( t3_off), T3->as_VMReg());
aoqi@1 219 map->set_callee_saved(STACK_OFFSET( s0_off), S0->as_VMReg());
aoqi@1 220 map->set_callee_saved(STACK_OFFSET( s1_off), S1->as_VMReg());
aoqi@1 221 map->set_callee_saved(STACK_OFFSET( s2_off), S2->as_VMReg());
aoqi@1 222 map->set_callee_saved(STACK_OFFSET( s3_off), S3->as_VMReg());
aoqi@1 223 map->set_callee_saved(STACK_OFFSET( s4_off), S4->as_VMReg());
aoqi@1 224 map->set_callee_saved(STACK_OFFSET( s5_off), S5->as_VMReg());
aoqi@1 225 map->set_callee_saved(STACK_OFFSET( s6_off), S6->as_VMReg());
aoqi@1 226 map->set_callee_saved(STACK_OFFSET( s7_off), S7->as_VMReg());
aoqi@1 227 map->set_callee_saved(STACK_OFFSET( t8_off), T8->as_VMReg());
aoqi@1 228 map->set_callee_saved(STACK_OFFSET( t9_off), T9->as_VMReg());
aoqi@1 229 map->set_callee_saved(STACK_OFFSET( gp_off), GP->as_VMReg());
aoqi@1 230 map->set_callee_saved(STACK_OFFSET( fp_off), FP->as_VMReg());
aoqi@1 231 map->set_callee_saved(STACK_OFFSET( return_off), RA->as_VMReg());
aoqi@1 232
aoqi@1 233 map->set_callee_saved(STACK_OFFSET( fpr0_off), F0->as_VMReg());
aoqi@1 234 map->set_callee_saved(STACK_OFFSET( fpr1_off), F1->as_VMReg());
aoqi@1 235 map->set_callee_saved(STACK_OFFSET( fpr2_off), F2->as_VMReg());
aoqi@1 236 map->set_callee_saved(STACK_OFFSET( fpr3_off), F3->as_VMReg());
aoqi@1 237 map->set_callee_saved(STACK_OFFSET( fpr4_off), F4->as_VMReg());
aoqi@1 238 map->set_callee_saved(STACK_OFFSET( fpr5_off), F5->as_VMReg());
aoqi@1 239 map->set_callee_saved(STACK_OFFSET( fpr6_off), F6->as_VMReg());
aoqi@1 240 map->set_callee_saved(STACK_OFFSET( fpr7_off), F7->as_VMReg());
aoqi@1 241 map->set_callee_saved(STACK_OFFSET( fpr8_off), F8->as_VMReg());
aoqi@1 242 map->set_callee_saved(STACK_OFFSET( fpr9_off), F9->as_VMReg());
aoqi@1 243 map->set_callee_saved(STACK_OFFSET( fpr10_off), F10->as_VMReg());
aoqi@1 244 map->set_callee_saved(STACK_OFFSET( fpr11_off), F11->as_VMReg());
aoqi@1 245 map->set_callee_saved(STACK_OFFSET( fpr12_off), F12->as_VMReg());
aoqi@1 246 map->set_callee_saved(STACK_OFFSET( fpr13_off), F13->as_VMReg());
aoqi@1 247 map->set_callee_saved(STACK_OFFSET( fpr14_off), F14->as_VMReg());
aoqi@1 248 map->set_callee_saved(STACK_OFFSET( fpr15_off), F15->as_VMReg());
aoqi@1 249 map->set_callee_saved(STACK_OFFSET( fpr16_off), F16->as_VMReg());
aoqi@1 250 map->set_callee_saved(STACK_OFFSET( fpr17_off), F17->as_VMReg());
aoqi@1 251 map->set_callee_saved(STACK_OFFSET( fpr18_off), F18->as_VMReg());
aoqi@1 252 map->set_callee_saved(STACK_OFFSET( fpr19_off), F19->as_VMReg());
aoqi@1 253 map->set_callee_saved(STACK_OFFSET( fpr20_off), F20->as_VMReg());
aoqi@1 254 map->set_callee_saved(STACK_OFFSET( fpr21_off), F21->as_VMReg());
aoqi@1 255 map->set_callee_saved(STACK_OFFSET( fpr22_off), F22->as_VMReg());
aoqi@1 256 map->set_callee_saved(STACK_OFFSET( fpr23_off), F23->as_VMReg());
aoqi@1 257 map->set_callee_saved(STACK_OFFSET( fpr24_off), F24->as_VMReg());
aoqi@1 258 map->set_callee_saved(STACK_OFFSET( fpr25_off), F25->as_VMReg());
aoqi@1 259 map->set_callee_saved(STACK_OFFSET( fpr26_off), F26->as_VMReg());
aoqi@1 260 map->set_callee_saved(STACK_OFFSET( fpr27_off), F27->as_VMReg());
aoqi@1 261 map->set_callee_saved(STACK_OFFSET( fpr28_off), F28->as_VMReg());
aoqi@1 262 map->set_callee_saved(STACK_OFFSET( fpr29_off), F29->as_VMReg());
aoqi@1 263 map->set_callee_saved(STACK_OFFSET( fpr30_off), F30->as_VMReg());
aoqi@1 264 map->set_callee_saved(STACK_OFFSET( fpr31_off), F31->as_VMReg());
aoqi@1 265
aoqi@1 266 #undef STACK_OFFSET
aoqi@1 267 return map;
aoqi@1 268 }
aoqi@1 269
aoqi@1 270
aoqi@1 271 // Pop the current frame and restore all the registers that we
aoqi@1 272 // saved.
aoqi@1 273 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
aoqi@1 274 __ ldc1(F0, SP, fpr0_off * jintSize); __ ldc1(F1, SP, fpr1_off * jintSize);
aoqi@1 275 __ ldc1(F2, SP, fpr2_off * jintSize); __ ldc1(F3, SP, fpr3_off * jintSize);
aoqi@1 276 __ ldc1(F4, SP, fpr4_off * jintSize); __ ldc1(F5, SP, fpr5_off * jintSize);
aoqi@6880 277 __ ldc1(F6, SP, fpr6_off * jintSize); __ ldc1(F7, SP, fpr7_off * jintSize);
aoqi@6880 278 __ ldc1(F8, SP, fpr8_off * jintSize); __ ldc1(F9, SP, fpr9_off * jintSize);
aoqi@6880 279 __ ldc1(F10, SP, fpr10_off * jintSize); __ ldc1(F11, SP, fpr11_off * jintSize);
aoqi@6880 280 __ ldc1(F12, SP, fpr12_off * jintSize); __ ldc1(F13, SP, fpr13_off * jintSize);
aoqi@6880 281 __ ldc1(F14, SP, fpr14_off * jintSize); __ ldc1(F15, SP, fpr15_off * jintSize);
aoqi@6880 282 __ ldc1(F16, SP, fpr16_off * jintSize); __ ldc1(F17, SP, fpr17_off * jintSize);
aoqi@6880 283 __ ldc1(F18, SP, fpr18_off * jintSize); __ ldc1(F19, SP, fpr19_off * jintSize);
aoqi@6880 284 __ ldc1(F20, SP, fpr20_off * jintSize); __ ldc1(F21, SP, fpr21_off * jintSize);
aoqi@6880 285 __ ldc1(F22, SP, fpr22_off * jintSize); __ ldc1(F23, SP, fpr23_off * jintSize);
aoqi@6880 286 __ ldc1(F24, SP, fpr24_off * jintSize); __ ldc1(F25, SP, fpr25_off * jintSize);
aoqi@6880 287 __ ldc1(F26, SP, fpr26_off * jintSize); __ ldc1(F27, SP, fpr27_off * jintSize);
aoqi@6880 288 __ ldc1(F28, SP, fpr28_off * jintSize); __ ldc1(F29, SP, fpr29_off * jintSize);
aoqi@6880 289 __ ldc1(F30, SP, fpr30_off * jintSize); __ ldc1(F31, SP, fpr31_off * jintSize);
aoqi@6880 290
aoqi@6880 291 __ ld(V0, SP, v0_off * jintSize); __ ld(V1, SP, v1_off * jintSize);
aoqi@6880 292 __ ld(A0, SP, a0_off * jintSize); __ ld(A1, SP, a1_off * jintSize);
aoqi@6880 293 __ ld(A2, SP, a2_off * jintSize); __ ld(A3, SP, a3_off * jintSize);
aoqi@6880 294 __ ld(A4, SP, a4_off * jintSize); __ ld(A5, SP, a5_off * jintSize);
aoqi@6880 295 __ ld(A6, SP, a6_off * jintSize); __ ld(A7, SP, a7_off * jintSize);
aoqi@1 296 __ ld(T0, SP, t0_off * jintSize);
aoqi@1 297 __ ld(T1, SP, t1_off * jintSize);
aoqi@1 298 __ ld(T2, SP, t2_off * jintSize);
aoqi@1 299 __ ld(T3, SP, t3_off * jintSize);
aoqi@1 300 __ ld(S0, SP, s0_off * jintSize);
aoqi@1 301 __ ld(S1, SP, s1_off * jintSize);
aoqi@1 302 __ ld(S2, SP, s2_off * jintSize);
aoqi@1 303 __ ld(S3, SP, s3_off * jintSize);
aoqi@1 304 __ ld(S4, SP, s4_off * jintSize);
aoqi@1 305 __ ld(S5, SP, s5_off * jintSize);
aoqi@1 306 __ ld(S6, SP, s6_off * jintSize);
aoqi@1 307 __ ld(S7, SP, s7_off * jintSize);
aoqi@1 308
aoqi@1 309 __ ld(T8, SP, t8_off * jintSize);
aoqi@1 310 __ ld(T9, SP, t9_off * jintSize);
aoqi@1 311
aoqi@1 312 __ ld(GP, SP, gp_off * jintSize);
aoqi@1 313 __ ld(FP, SP, fp_off * jintSize);
aoqi@1 314 __ ld(RA, SP, return_off * jintSize);
aoqi@1 315
aoqi@1 316 __ addiu(SP, SP, reg_save_size * jintSize);
aoqi@1 317 }
aoqi@1 318
aoqi@1 319 // Pop the current frame and restore the registers that might be holding
aoqi@1 320 // a result.
aoqi@1 321 // FIXME, if the result is float?
aoqi@1 322 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
aoqi@6880 323
aoqi@1 324 // Just restore result register. Only used by deoptimization. By
aoqi@1 325 // now any callee save register that needs to be restore to a c2
aoqi@1 326 // caller of the deoptee has been extracted into the vframeArray
aoqi@1 327 // and will be stuffed into the c2i adapter we create for later
aoqi@1 328 // restoration so only result registers need to be restored here.
aoqi@6880 329
aoqi@1 330 __ ld(V0, SP, v0_off * jintSize);
aoqi@1 331 __ ld(V1, SP, v1_off * jintSize);
aoqi@6880 332 __ addiu(SP, SP, return_off * jintSize);
aoqi@1 333 }
aoqi@1 334
aoqi@6880 335 // Is vector's size (in bytes) bigger than a size saved by default?
aoqi@6880 336 // 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions.
aoqi@6880 337 bool SharedRuntime::is_wide_vector(int size) {
aoqi@6880 338 return size > 16;
aoqi@6880 339 }
aoqi@1 340
aoqi@1 341 // The java_calling_convention describes stack locations as ideal slots on
aoqi@1 342 // a frame with no abi restrictions. Since we must observe abi restrictions
aoqi@1 343 // (like the placement of the register window) the slots must be biased by
aoqi@1 344 // the following value.
aoqi@1 345
aoqi@6880 346 static int reg2offset_in(VMReg r) {
aoqi@9459 347 // Account for saved fp and return address
aoqi@6880 348 // This should really be in_preserve_stack_slots
aoqi@6880 349 return (r->reg2stack() + 2 * VMRegImpl::slots_per_word) * VMRegImpl::stack_slot_size; // + 2 * VMRegImpl::stack_slot_size);
aoqi@1 350 }
aoqi@1 351
aoqi@6880 352 static int reg2offset_out(VMReg r) {
aoqi@6880 353 return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
aoqi@1 354 }
aoqi@1 355
aoqi@1 356 // ---------------------------------------------------------------------------
aoqi@1 357 // Read the array of BasicTypes from a signature, and compute where the
aoqi@1 358 // arguments should go. Values in the VMRegPair regs array refer to 4-byte
aoqi@1 359 // quantities. Values less than SharedInfo::stack0 are registers, those above
aoqi@1 360 // refer to 4-byte stack slots. All stack slots are based off of the stack pointer
aoqi@1 361 // as framesizes are fixed.
aoqi@1 362 // VMRegImpl::stack0 refers to the first slot 0(sp).
aoqi@1 363 // and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register
aoqi@1 364 // up to RegisterImpl::number_of_registers) are the 32-bit
aoqi@1 365 // integer registers.
aoqi@1 366
aoqi@1 367 // Pass first five oop/int args in registers T0, A0 - A3.
aoqi@1 368 // Pass float/double/long args in stack.
aoqi@1 369 // Doubles have precedence, so if you pass a mix of floats and doubles
aoqi@1 370 // the doubles will grab the registers before the floats will.
aoqi@1 371
aoqi@1 372 // Note: the INPUTS in sig_bt are in units of Java argument words, which are
aoqi@1 373 // either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit
aoqi@9459 374 // units regardless of build.
aoqi@1 375
aoqi@1 376
aoqi@1 377 // ---------------------------------------------------------------------------
aoqi@1 378 // The compiled Java calling convention.
aoqi@1 379 // Pass first five oop/int args in registers T0, A0 - A3.
aoqi@1 380 // Pass float/double/long args in stack.
aoqi@1 381 // Doubles have precedence, so if you pass a mix of floats and doubles
aoqi@1 382 // the doubles will grab the registers before the floats will.
aoqi@1 383
aoqi@1 384 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
aoqi@1 385 VMRegPair *regs,
aoqi@1 386 int total_args_passed,
aoqi@1 387 int is_outgoing) {
aoqi@1 388
aoqi@1 389 // Create the mapping between argument positions and
aoqi@1 390 // registers.
aoqi@1 391 //static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = {
aoqi@1 392 static const Register INT_ArgReg[Argument::n_register_parameters + 1] = {
aoqi@1 393 T0, A0, A1, A2, A3, A4, A5, A6, A7
aoqi@1 394 };
aoqi@1 395 //static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_j] = {
aoqi@1 396 static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = {
aoqi@1 397 F12, F13, F14, F15, F16, F17, F18, F19
aoqi@1 398 };
aoqi@1 399
aoqi@1 400
aoqi@1 401 uint args = 0;
aoqi@1 402 uint stk_args = 0; // inc by 2 each time
aoqi@1 403
aoqi@1 404 for (int i = 0; i < total_args_passed; i++) {
aoqi@1 405 switch (sig_bt[i]) {
aoqi@1 406 case T_VOID:
aoqi@1 407 // halves of T_LONG or T_DOUBLE
aoqi@1 408 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
aoqi@1 409 regs[i].set_bad();
aoqi@1 410 break;
aoqi@1 411 case T_BOOLEAN:
aoqi@1 412 case T_CHAR:
aoqi@1 413 case T_BYTE:
aoqi@1 414 case T_SHORT:
aoqi@1 415 case T_INT:
aoqi@1 416 if (args < Argument::n_register_parameters) {
aoqi@1 417 regs[i].set1(INT_ArgReg[args++]->as_VMReg());
aoqi@1 418 } else {
aoqi@1 419 regs[i].set1(VMRegImpl::stack2reg(stk_args));
aoqi@1 420 stk_args += 2;
aoqi@1 421 }
aoqi@1 422 break;
aoqi@1 423 case T_LONG:
aoqi@1 424 assert(sig_bt[i + 1] == T_VOID, "expecting half");
aoqi@1 425 // fall through
aoqi@1 426 case T_OBJECT:
aoqi@1 427 case T_ARRAY:
aoqi@1 428 case T_ADDRESS:
aoqi@1 429 if (args < Argument::n_register_parameters) {
aoqi@1 430 regs[i].set2(INT_ArgReg[args++]->as_VMReg());
aoqi@1 431 } else {
aoqi@1 432 regs[i].set2(VMRegImpl::stack2reg(stk_args));
aoqi@1 433 stk_args += 2;
aoqi@1 434 }
aoqi@1 435 break;
aoqi@1 436 case T_FLOAT:
aoqi@1 437 if (args < Argument::n_float_register_parameters) {
aoqi@1 438 regs[i].set1(FP_ArgReg[args++]->as_VMReg());
aoqi@1 439 } else {
aoqi@1 440 regs[i].set1(VMRegImpl::stack2reg(stk_args));
aoqi@1 441 stk_args += 2;
aoqi@1 442 }
aoqi@1 443 break;
aoqi@1 444 case T_DOUBLE:
aoqi@1 445 assert(sig_bt[i + 1] == T_VOID, "expecting half");
aoqi@1 446 if (args < Argument::n_float_register_parameters) {
aoqi@1 447 regs[i].set2(FP_ArgReg[args++]->as_VMReg());
aoqi@1 448 } else {
aoqi@1 449 regs[i].set2(VMRegImpl::stack2reg(stk_args));
aoqi@1 450 stk_args += 2;
aoqi@1 451 }
aoqi@1 452 break;
aoqi@1 453 default:
aoqi@1 454 ShouldNotReachHere();
aoqi@1 455 break;
aoqi@1 456 }
aoqi@1 457 }
aoqi@1 458
aoqi@1 459 return round_to(stk_args, 2);
aoqi@1 460 }
aoqi@1 461
aoqi@1 462 // Helper class mostly to avoid passing masm everywhere, and handle store
aoqi@1 463 // displacement overflow logic for LP64
aoqi@1 464 class AdapterGenerator {
aoqi@1 465 MacroAssembler *masm;
aoqi@1 466 #ifdef _LP64
aoqi@1 467 Register Rdisp;
aoqi@1 468 void set_Rdisp(Register r) { Rdisp = r; }
aoqi@1 469 #endif // _LP64
aoqi@1 470
aoqi@1 471 void patch_callers_callsite();
aoqi@1 472
aoqi@1 473 // base+st_off points to top of argument
aoqi@1 474 int arg_offset(const int st_off) { return st_off; }
aoqi@1 475 int next_arg_offset(const int st_off) {
aoqi@1 476 return st_off - Interpreter::stackElementSize;
aoqi@1 477 }
aoqi@1 478
aoqi@1 479 #ifdef _LP64
aoqi@1 480 // On _LP64 argument slot values are loaded first into a register
aoqi@1 481 // because they might not fit into displacement.
aoqi@1 482 Register arg_slot(const int st_off);
aoqi@1 483 Register next_arg_slot(const int st_off);
aoqi@1 484 #else
aoqi@1 485 int arg_slot(const int st_off) { return arg_offset(st_off); }
aoqi@1 486 int next_arg_slot(const int st_off) { return next_arg_offset(st_off); }
aoqi@1 487 #endif // _LP64
aoqi@1 488
aoqi@1 489 // Stores long into offset pointed to by base
aoqi@1 490 void store_c2i_long(Register r, Register base,
aoqi@1 491 const int st_off, bool is_stack);
aoqi@1 492 void store_c2i_object(Register r, Register base,
aoqi@1 493 const int st_off);
aoqi@1 494 void store_c2i_int(Register r, Register base,
aoqi@1 495 const int st_off);
aoqi@1 496 void store_c2i_double(VMReg r_2,
aoqi@1 497 VMReg r_1, Register base, const int st_off);
aoqi@1 498 void store_c2i_float(FloatRegister f, Register base,
aoqi@1 499 const int st_off);
aoqi@1 500
aoqi@1 501 public:
aoqi@1 502 //void tag_stack(const BasicType sig, int st_off);
aoqi@1 503 void gen_c2i_adapter(int total_args_passed,
aoqi@1 504 // VMReg max_arg,
aoqi@1 505 int comp_args_on_stack, // VMRegStackSlots
aoqi@1 506 const BasicType *sig_bt,
aoqi@1 507 const VMRegPair *regs,
aoqi@1 508 Label& skip_fixup);
aoqi@1 509 void gen_i2c_adapter(int total_args_passed,
aoqi@1 510 // VMReg max_arg,
aoqi@1 511 int comp_args_on_stack, // VMRegStackSlots
aoqi@1 512 const BasicType *sig_bt,
aoqi@1 513 const VMRegPair *regs);
aoqi@1 514
aoqi@1 515 AdapterGenerator(MacroAssembler *_masm) : masm(_masm) {}
aoqi@1 516 };
aoqi@1 517
aoqi@1 518
aoqi@1 519 // Patch the callers callsite with entry to compiled code if it exists.
aoqi@1 520 void AdapterGenerator::patch_callers_callsite() {
aoqi@6880 521 Label L;
aoqi@6880 522 __ verify_oop(Rmethod);
aoqi@6880 523 __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset()));
huangjia@9645 524 __ beq(AT, R0, L);
aoqi@6880 525 __ delayed()->nop();
aoqi@6880 526 // Schedule the branch target address early.
aoqi@6880 527 // Call into the VM to patch the caller, then jump to compiled callee
aoqi@9459 528 // V0 isn't live so capture return address while we easily can
aoqi@6880 529 __ move(V0, RA);
aoqi@6880 530
aoqi@6880 531 __ pushad();
aoqi@1 532 #ifdef COMPILER2
aoqi@6880 533 // C2 may leave the stack dirty if not in SSE2+ mode
aoqi@6880 534 __ empty_FPU_stack();
aoqi@9459 535 #endif
aoqi@1 536
aoqi@6880 537 // VM needs caller's callsite
aoqi@6880 538 // VM needs target method
aoqi@6880 539
aoqi@6880 540 __ move(A0, Rmethod);
aoqi@6880 541 __ move(A1, V0);
aoqi@9459 542 // we should preserve the return address
aoqi@6880 543 __ verify_oop(Rmethod);
aoqi@6880 544 __ move(S0, SP);
aoqi@6880 545 __ move(AT, -(StackAlignmentInBytes)); // align the stack
aoqi@6880 546 __ andr(SP, SP, AT);
aoqi@6880 547 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite),
aoqi@6880 548 relocInfo::runtime_call_type);
aoqi@6880 549
aoqi@6880 550 __ delayed()->nop();
aoqi@6880 551 __ move(SP, S0);
aoqi@6880 552 __ popad();
aoqi@6880 553 __ bind(L);
aoqi@1 554 }
aoqi@1 555
aoqi@1 556 #ifdef _LP64
aoqi@1 557 Register AdapterGenerator::arg_slot(const int st_off) {
aoqi@6880 558 Unimplemented();
aoqi@1 559 }
aoqi@1 560
aoqi@1 561 Register AdapterGenerator::next_arg_slot(const int st_off){
aoqi@6880 562 Unimplemented();
aoqi@1 563 }
aoqi@1 564 #endif // _LP64
aoqi@1 565
aoqi@1 566 // Stores long into offset pointed to by base
aoqi@1 567 void AdapterGenerator::store_c2i_long(Register r, Register base,
aoqi@1 568 const int st_off, bool is_stack) {
aoqi@6880 569 Unimplemented();
aoqi@1 570 }
aoqi@1 571
aoqi@1 572 void AdapterGenerator::store_c2i_object(Register r, Register base,
aoqi@6880 573 const int st_off) {
aoqi@6880 574 Unimplemented();
aoqi@1 575 }
aoqi@1 576
aoqi@1 577 void AdapterGenerator::store_c2i_int(Register r, Register base,
aoqi@6880 578 const int st_off) {
aoqi@6880 579 Unimplemented();
aoqi@1 580 }
aoqi@1 581
aoqi@1 582 // Stores into offset pointed to by base
aoqi@1 583 void AdapterGenerator::store_c2i_double(VMReg r_2,
aoqi@1 584 VMReg r_1, Register base, const int st_off) {
aoqi@6880 585 Unimplemented();
aoqi@1 586 }
aoqi@1 587
aoqi@1 588 void AdapterGenerator::store_c2i_float(FloatRegister f, Register base,
aoqi@1 589 const int st_off) {
aoqi@6880 590 Unimplemented();
aoqi@1 591 }
aoqi@1 592
aoqi@1 593 void AdapterGenerator::gen_c2i_adapter(
aoqi@1 594 int total_args_passed,
aoqi@1 595 // VMReg max_arg,
aoqi@1 596 int comp_args_on_stack, // VMRegStackSlots
aoqi@1 597 const BasicType *sig_bt,
aoqi@1 598 const VMRegPair *regs,
aoqi@1 599 Label& skip_fixup) {
aoqi@1 600
aoqi@1 601 // Before we get into the guts of the C2I adapter, see if we should be here
aoqi@1 602 // at all. We've come from compiled code and are attempting to jump to the
aoqi@1 603 // interpreter, which means the caller made a static call to get here
aoqi@1 604 // (vcalls always get a compiled target if there is one). Check for a
aoqi@1 605 // compiled target. If there is one, we need to patch the caller's call.
aoqi@1 606 // However we will run interpreted if we come thru here. The next pass
aoqi@1 607 // thru the call site will run compiled. If we ran compiled here then
aoqi@1 608 // we can (theorectically) do endless i2c->c2i->i2c transitions during
aoqi@1 609 // deopt/uncommon trap cycles. If we always go interpreted here then
aoqi@1 610 // we can have at most one and don't need to play any tricks to keep
aoqi@1 611 // from endlessly growing the stack.
aoqi@1 612 //
aoqi@1 613 // Actually if we detected that we had an i2c->c2i transition here we
aoqi@1 614 // ought to be able to reset the world back to the state of the interpreted
aoqi@1 615 // call and not bother building another interpreter arg area. We don't
aoqi@1 616 // do that at this point.
aoqi@1 617
aoqi@6880 618 patch_callers_callsite();
aoqi@6880 619
aoqi@6880 620 __ bind(skip_fixup);
aoqi@1 621
aoqi@1 622 #ifdef COMPILER2
aoqi@6880 623 __ empty_FPU_stack();
aoqi@9459 624 #endif
aoqi@6880 625 //this is for native ?
aoqi@6880 626 // Since all args are passed on the stack, total_args_passed * interpreter_
aoqi@6880 627 // stack_element_size is the
aoqi@6880 628 // space we need.
aoqi@6880 629 int extraspace = total_args_passed * Interpreter::stackElementSize;
aoqi@6880 630
aoqi@6880 631 // stack is aligned, keep it that way
aoqi@6880 632 extraspace = round_to(extraspace, 2*wordSize);
aoqi@6880 633
aoqi@6880 634 // Get return address
aoqi@6880 635 __ move(V0, RA);
aoqi@6880 636 // set senderSP value
aoqi@6880 637 //refer to interpreter_mips.cpp:generate_asm_entry
aoqi@6880 638 __ move(Rsender, SP);
aoqi@6880 639 __ addi(SP, SP, -extraspace);
aoqi@6880 640
aoqi@6880 641 // Now write the args into the outgoing interpreter space
aoqi@6880 642 for (int i = 0; i < total_args_passed; i++) {
aoqi@6880 643 if (sig_bt[i] == T_VOID) {
aoqi@6880 644 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
aoqi@6880 645 continue;
aoqi@6880 646 }
aoqi@6880 647
aoqi@6880 648 // st_off points to lowest address on stack.
aoqi@6880 649 int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize;
aoqi@6880 650 // Say 4 args:
aoqi@6880 651 // i st_off
aoqi@6880 652 // 0 12 T_LONG
aoqi@6880 653 // 1 8 T_VOID
aoqi@6880 654 // 2 4 T_OBJECT
aoqi@6880 655 // 3 0 T_BOOL
aoqi@6880 656 VMReg r_1 = regs[i].first();
aoqi@6880 657 VMReg r_2 = regs[i].second();
aoqi@6880 658 if (!r_1->is_valid()) {
aoqi@6880 659 assert(!r_2->is_valid(), "");
aoqi@6880 660 continue;
aoqi@6880 661 }
aoqi@6880 662 if (r_1->is_stack()) {
aoqi@6880 663 // memory to memory use fpu stack top
aoqi@6880 664 int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
aoqi@6880 665 if (!r_2->is_valid()) {
aoqi@6880 666 __ ld_ptr(AT, SP, ld_off);
aoqi@6880 667 __ st_ptr(AT, SP, st_off);
aoqi@6880 668
aoqi@6880 669 } else {
aoqi@6880 670
aoqi@6880 671
aoqi@6880 672 int next_off = st_off - Interpreter::stackElementSize;
aoqi@6880 673 __ ld_ptr(AT, SP, ld_off);
aoqi@6880 674 __ st_ptr(AT, SP, st_off);
aoqi@6880 675
aoqi@9459 676 // Ref to is_Register condition
aoqi@6880 677 if(sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE)
huangjia@9645 678 __ st_ptr(AT, SP, st_off - 8);
aoqi@6880 679 }
aoqi@6880 680 } else if (r_1->is_Register()) {
aoqi@6880 681 Register r = r_1->as_Register();
aoqi@6880 682 if (!r_2->is_valid()) {
huangjia@9645 683 __ sd(r, SP, st_off);
aoqi@6880 684 } else {
aoqi@6880 685 //FIXME, mips will not enter here
aoqi@6880 686 // long/double in gpr
huangjia@9645 687 __ sd(r, SP, st_off);
huangjia@9645 688 // In [java/util/zip/ZipFile.java]
aoqi@9459 689 //
aoqi@9459 690 // private static native long open(String name, int mode, long lastModified);
aoqi@9459 691 // private static native int getTotal(long jzfile);
aoqi@9459 692 //
aoqi@9459 693 // We need to transfer T_LONG paramenters from a compiled method to a native method.
aoqi@9459 694 // It's a complex process:
aoqi@9459 695 //
aoqi@9459 696 // Caller -> lir_static_call -> gen_resolve_stub
aoqi@9459 697 // -> -- resolve_static_call_C
aoqi@9459 698 // `- gen_c2i_adapter() [*]
aoqi@9459 699 // |
aoqi@9459 700 // `- AdapterHandlerLibrary::get_create_apapter_index
aoqi@9459 701 // -> generate_native_entry
aoqi@9459 702 // -> InterpreterRuntime::SignatureHandlerGenerator::pass_long [**]
aoqi@9459 703 //
aoqi@9459 704 // In [**], T_Long parameter is stored in stack as:
aoqi@9459 705 //
aoqi@9459 706 // (high)
aoqi@9459 707 // | |
aoqi@9459 708 // -----------
aoqi@9459 709 // | 8 bytes |
aoqi@9459 710 // | (void) |
aoqi@9459 711 // -----------
aoqi@9459 712 // | 8 bytes |
aoqi@9459 713 // | (long) |
aoqi@9459 714 // -----------
aoqi@9459 715 // | |
aoqi@9459 716 // (low)
aoqi@9459 717 //
aoqi@9459 718 // However, the sequence is reversed here:
aoqi@9459 719 //
aoqi@9459 720 // (high)
aoqi@9459 721 // | |
aoqi@9459 722 // -----------
aoqi@9459 723 // | 8 bytes |
aoqi@9459 724 // | (long) |
aoqi@9459 725 // -----------
aoqi@9459 726 // | 8 bytes |
aoqi@9459 727 // | (void) |
aoqi@9459 728 // -----------
aoqi@9459 729 // | |
aoqi@9459 730 // (low)
aoqi@9459 731 //
aoqi@9459 732 // So I stored another 8 bytes in the T_VOID slot. It then can be accessed from generate_native_entry().
aoqi@9459 733 //
aoqi@6880 734 if (sig_bt[i] == T_LONG)
huangjia@9645 735 __ sd(r, SP, st_off - 8);
aoqi@6880 736 }
aoqi@6880 737 } else if (r_1->is_FloatRegister()) {
aoqi@6880 738 assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register");
aoqi@6880 739
aoqi@6880 740 FloatRegister fr = r_1->as_FloatRegister();
aoqi@6880 741 if (sig_bt[i] == T_FLOAT)
huangjia@9645 742 __ swc1(fr, SP, st_off);
aoqi@6880 743 else {
huangjia@9645 744 __ sdc1(fr, SP, st_off);
huangjia@9645 745 __ sdc1(fr, SP, st_off - 8); // T_DOUBLE needs two slots
aoqi@6880 746 }
aoqi@6880 747 }
aoqi@6880 748 }
aoqi@6880 749
aoqi@6880 750 // Schedule the branch target address early.
huangjia@9645 751 __ ld_ptr(AT, Rmethod, in_bytes(Method::interpreter_entry_offset()) );
aoqi@6880 752 // And repush original return address
aoqi@6880 753 __ move(RA, V0);
aoqi@6880 754 __ jr (AT);
aoqi@6880 755 __ delayed()->nop();
aoqi@1 756 }
aoqi@1 757
aoqi@1 758 void AdapterGenerator::gen_i2c_adapter(
aoqi@6880 759 int total_args_passed,
aoqi@6880 760 // VMReg max_arg,
aoqi@6880 761 int comp_args_on_stack, // VMRegStackSlots
aoqi@6880 762 const BasicType *sig_bt,
aoqi@6880 763 const VMRegPair *regs) {
aoqi@1 764
aoqi@1 765 // Generate an I2C adapter: adjust the I-frame to make space for the C-frame
aoqi@1 766 // layout. Lesp was saved by the calling I-frame and will be restored on
aoqi@1 767 // return. Meanwhile, outgoing arg space is all owned by the callee
aoqi@1 768 // C-frame, so we can mangle it at will. After adjusting the frame size,
aoqi@1 769 // hoist register arguments and repack other args according to the compiled
aoqi@1 770 // code convention. Finally, end in a jump to the compiled code. The entry
aoqi@1 771 // point address is the start of the buffer.
aoqi@1 772
aoqi@1 773 // We will only enter here from an interpreted frame and never from after
aoqi@1 774 // passing thru a c2i. Azul allowed this but we do not. If we lose the
aoqi@1 775 // race and use a c2i we will remain interpreted for the race loser(s).
aoqi@1 776 // This removes all sorts of headaches on the mips side and also eliminates
aoqi@1 777 // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.
aoqi@1 778
aoqi@1 779
aoqi@1 780 __ move(T9, SP);
aoqi@1 781
aoqi@1 782 // Cut-out for having no stack args. Since up to 2 int/oop args are passed
aoqi@1 783 // in registers, we will occasionally have no stack args.
aoqi@1 784 int comp_words_on_stack = 0;
aoqi@1 785 if (comp_args_on_stack) {
aoqi@1 786 // Sig words on the stack are greater-than VMRegImpl::stack0. Those in
aoqi@1 787 // registers are below. By subtracting stack0, we either get a negative
aoqi@1 788 // number (all values in registers) or the maximum stack slot accessed.
aoqi@1 789 // int comp_args_on_stack = VMRegImpl::reg2stack(max_arg);
aoqi@1 790 // Convert 4-byte stack slots to words.
aoqi@1 791 // did mips need round? FIXME aoqi
aoqi@1 792 comp_words_on_stack = round_to(comp_args_on_stack*4, wordSize)>>LogBytesPerWord;
aoqi@1 793 // Round up to miminum stack alignment, in wordSize
aoqi@1 794 comp_words_on_stack = round_to(comp_words_on_stack, 2);
aoqi@1 795 __ daddi(SP, SP, -comp_words_on_stack * wordSize);
aoqi@1 796 }
aoqi@1 797
aoqi@1 798 // Align the outgoing SP
aoqi@1 799 __ move(AT, -(StackAlignmentInBytes));
aoqi@6880 800 __ andr(SP, SP, AT);
aoqi@1 801 // push the return address on the stack (note that pushing, rather
aoqi@1 802 // than storing it, yields the correct frame alignment for the callee)
aoqi@1 803 // Put saved SP in another register
aoqi@1 804 const Register saved_sp = V0;
aoqi@1 805 __ move(saved_sp, T9);
aoqi@1 806
aoqi@1 807
aoqi@1 808 // Will jump to the compiled code just as if compiled code was doing it.
aoqi@1 809 // Pre-load the register-jump target early, to schedule it better.
aoqi@1 810 __ ld(T9, Rmethod, in_bytes(Method::from_compiled_offset()));
aoqi@1 811
aoqi@1 812 // Now generate the shuffle code. Pick up all register args and move the
aoqi@1 813 // rest through the floating point stack top.
aoqi@1 814 for (int i = 0; i < total_args_passed; i++) {
aoqi@1 815 if (sig_bt[i] == T_VOID) {
aoqi@1 816 // Longs and doubles are passed in native word order, but misaligned
aoqi@1 817 // in the 32-bit build.
aoqi@1 818 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
aoqi@1 819 continue;
aoqi@1 820 }
aoqi@1 821
aoqi@6880 822 // Pick up 0, 1 or 2 words from SP+offset.
aoqi@6880 823
aoqi@6880 824 //FIXME. aoqi. just delete the assert
aoqi@1 825 //assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "scrambled load targets?");
aoqi@1 826 // Load in argument order going down.
aoqi@1 827 int ld_off = (total_args_passed -1 - i)*Interpreter::stackElementSize;
aoqi@1 828 // Point to interpreter value (vs. tag)
aoqi@1 829 int next_off = ld_off - Interpreter::stackElementSize;
aoqi@1 830 VMReg r_1 = regs[i].first();
aoqi@1 831 VMReg r_2 = regs[i].second();
aoqi@1 832 if (!r_1->is_valid()) {
aoqi@1 833 assert(!r_2->is_valid(), "");
aoqi@1 834 continue;
aoqi@1 835 }
aoqi@6880 836 if (r_1->is_stack()) {
aoqi@6880 837 // Convert stack slot to an SP offset (+ wordSize to
aoqi@1 838 // account for return address )
aoqi@9459 839 // NOTICE HERE!!!! I sub a wordSize here
aoqi@6880 840 int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size;
aoqi@1 841 //+ wordSize;
aoqi@1 842
aoqi@1 843 if (!r_2->is_valid()) {
aoqi@6880 844 __ ld(AT, saved_sp, ld_off);
aoqi@6880 845 __ sd(AT, SP, st_off);
aoqi@1 846 } else {
aoqi@6880 847 // Interpreter local[n] == MSW, local[n+1] == LSW however locals
aoqi@6880 848 // are accessed as negative so LSW is at LOW address
aoqi@6880 849
aoqi@6880 850 // ld_off is MSW so get LSW
aoqi@6880 851 // st_off is LSW (i.e. reg.first())
aoqi@9459 852
aoqi@9459 853 // [./org/eclipse/swt/graphics/GC.java]
aoqi@9459 854 // void drawImageXRender(Image srcImage, int srcX, int srcY, int srcWidth, int srcHeight,
aoqi@9459 855 // int destX, int destY, int destWidth, int destHeight,
aoqi@9459 856 // boolean simple,
aoqi@9459 857 // int imgWidth, int imgHeight,
aoqi@9459 858 // long maskPixmap, <-- Pass T_LONG in stack
aoqi@9459 859 // int maskType);
aoqi@9459 860 // Before this modification, Eclipse displays icons with solid black background.
aoqi@9459 861 //
aoqi@6880 862 __ ld(AT, saved_sp, ld_off);
aoqi@1 863 if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE)
aoqi@6880 864 __ ld(AT, saved_sp, ld_off - 8);
aoqi@6880 865 __ sd(AT, SP, st_off);
aoqi@1 866 }
aoqi@1 867 } else if (r_1->is_Register()) { // Register argument
aoqi@1 868 Register r = r_1->as_Register();
aoqi@1 869 if (r_2->is_valid()) {
aoqi@6880 870 // Remember r_1 is low address (and LSB on mips)
aoqi@6880 871 // So r_2 gets loaded from high address regardless of the platform
aoqi@6880 872 assert(r_2->as_Register() == r_1->as_Register(), "");
aoqi@6880 873 __ ld(r, saved_sp, ld_off);
aoqi@6880 874
aoqi@9459 875 //
aoqi@9459 876 // For T_LONG type, the real layout is as below:
aoqi@9459 877 //
aoqi@9459 878 // (high)
aoqi@9459 879 // | |
aoqi@9459 880 // -----------
aoqi@9459 881 // | 8 bytes |
aoqi@9459 882 // | (void) |
aoqi@9459 883 // -----------
aoqi@9459 884 // | 8 bytes |
aoqi@9459 885 // | (long) |
aoqi@9459 886 // -----------
aoqi@9459 887 // | |
aoqi@9459 888 // (low)
aoqi@9459 889 //
aoqi@9459 890 // We should load the low-8 bytes.
aoqi@9459 891 //
aoqi@6880 892 if (sig_bt[i] == T_LONG)
aoqi@6880 893 __ ld(r, saved_sp, ld_off - 8);
aoqi@1 894 } else {
aoqi@6880 895 __ lw(r, saved_sp, ld_off);
aoqi@1 896 }
aoqi@1 897 } else if (r_1->is_FloatRegister()) { // Float Register
aoqi@6880 898 assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register");
aoqi@6880 899
aoqi@6880 900 FloatRegister fr = r_1->as_FloatRegister();
aoqi@6880 901 if (sig_bt[i] == T_FLOAT)
aoqi@6880 902 __ lwc1(fr, saved_sp, ld_off);
aoqi@6880 903 else {
aoqi@6880 904 __ ldc1(fr, saved_sp, ld_off);
aoqi@6880 905 __ ldc1(fr, saved_sp, ld_off - 8);
aoqi@6880 906 }
aoqi@6880 907 }
aoqi@1 908 }
aoqi@1 909
aoqi@1 910 // 6243940 We might end up in handle_wrong_method if
aoqi@1 911 // the callee is deoptimized as we race thru here. If that
aoqi@1 912 // happens we don't want to take a safepoint because the
aoqi@1 913 // caller frame will look interpreted and arguments are now
aoqi@1 914 // "compiled" so it is much better to make this transition
aoqi@1 915 // invisible to the stack walking code. Unfortunately if
aoqi@1 916 // we try and find the callee by normal means a safepoint
aoqi@1 917 // is possible. So we stash the desired callee in the thread
aoqi@1 918 // and the vm will find there should this case occur.
aoqi@1 919 __ get_thread(T8);
aoqi@1 920 __ sd(Rmethod, T8, in_bytes(JavaThread::callee_target_offset()));
aoqi@1 921
aoqi@9459 922 // move methodOop to V0 in case we end up in an c2i adapter.
aoqi@9459 923 // the c2i adapters expect methodOop in V0 (c2) because c2's
aoqi@9459 924 // resolve stubs return the result (the method) in V0.
aoqi@6880 925 // I'd love to fix this.
aoqi@6880 926 __ move(V0, Rmethod);
aoqi@1 927 __ jr(T9);
aoqi@1 928 __ delayed()->nop();
aoqi@1 929 }
aoqi@1 930
aoqi@1 931 // ---------------------------------------------------------------
aoqi@1 932 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
aoqi@1 933 int total_args_passed,
aoqi@1 934 // VMReg max_arg,
aoqi@1 935 int comp_args_on_stack, // VMRegStackSlots
aoqi@1 936 const BasicType *sig_bt,
aoqi@1 937 const VMRegPair *regs,
aoqi@1 938 AdapterFingerPrint* fingerprint) {
aoqi@1 939 address i2c_entry = __ pc();
aoqi@1 940
aoqi@1 941 AdapterGenerator agen(masm);
aoqi@1 942
aoqi@1 943 agen.gen_i2c_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs);
aoqi@1 944
aoqi@1 945
aoqi@1 946 // -------------------------------------------------------------------------
aoqi@1 947 // Generate a C2I adapter. On entry we know G5 holds the methodOop. The
aoqi@1 948 // args start out packed in the compiled layout. They need to be unpacked
aoqi@1 949 // into the interpreter layout. This will almost always require some stack
aoqi@1 950 // space. We grow the current (compiled) stack, then repack the args. We
aoqi@1 951 // finally end in a jump to the generic interpreter entry point. On exit
aoqi@1 952 // from the interpreter, the interpreter will restore our SP (lest the
aoqi@1 953 // compiled code, which relys solely on SP and not FP, get sick).
aoqi@1 954
aoqi@1 955 address c2i_unverified_entry = __ pc();
aoqi@1 956 Label skip_fixup;
aoqi@1 957 {
aoqi@1 958 Register holder = T1;
aoqi@1 959 Register receiver = T0;
aoqi@1 960 Register temp = T8;
aoqi@1 961 address ic_miss = SharedRuntime::get_ic_miss_stub();
aoqi@1 962
aoqi@1 963 Label missed;
aoqi@1 964
aoqi@1 965 __ verify_oop(holder);
aoqi@1 966 //add for compressedoops
aoqi@1 967 __ load_klass(temp, receiver);
aoqi@1 968 __ verify_oop(temp);
aoqi@1 969
aoqi@6880 970 __ ld_ptr(AT, holder, CompiledICHolder::holder_klass_offset());
aoqi@9043 971 __ ld_ptr(Rmethod, holder, CompiledICHolder::holder_metadata_offset());
aoqi@6880 972 __ bne(AT, temp, missed);
aoqi@6880 973 __ delayed()->nop();
aoqi@1 974 // Method might have been compiled since the call site was patched to
aoqi@1 975 // interpreted if that is the case treat it as a miss so we can get
aoqi@1 976 // the call site corrected.
aoqi@1 977 __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset()));
aoqi@6880 978 __ beq(AT, R0, skip_fixup);
aoqi@6880 979 __ delayed()->nop();
aoqi@1 980 __ bind(missed);
aoqi@1 981
aoqi@1 982 __ jmp(ic_miss, relocInfo::runtime_call_type);
aoqi@6880 983 __ delayed()->nop();
aoqi@1 984 }
aoqi@1 985
aoqi@1 986 address c2i_entry = __ pc();
aoqi@1 987
aoqi@1 988 agen.gen_c2i_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
aoqi@1 989
aoqi@1 990 __ flush();
huangjia@9645 991 return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
aoqi@1 992 }
aoqi@1 993
aoqi@1 994 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
aoqi@1 995 VMRegPair *regs,
aoqi@1 996 VMRegPair *regs2,
aoqi@1 997 int total_args_passed) {
aoqi@6880 998 assert(regs2 == NULL, "not needed on MIPS");
aoqi@6880 999 // Return the number of VMReg stack_slots needed for the args.
aoqi@6880 1000 // This value does not include an abi space (like register window
aoqi@6880 1001 // save area).
aoqi@6880 1002
aoqi@6880 1003 // The native convention is V8 if !LP64
aoqi@6880 1004 // The LP64 convention is the V9 convention which is slightly more sane.
aoqi@6880 1005
aoqi@6880 1006 // We return the amount of VMReg stack slots we need to reserve for all
aoqi@6880 1007 // the arguments NOT counting out_preserve_stack_slots. Since we always
aoqi@6880 1008 // have space for storing at least 6 registers to memory we start with that.
aoqi@6880 1009 // See int_stk_helper for a further discussion.
aoqi@6880 1010 // We return the amount of VMRegImpl stack slots we need to reserve for all
aoqi@6880 1011 // the arguments NOT counting out_preserve_stack_slots.
aoqi@1 1012 static const Register INT_ArgReg[Argument::n_register_parameters] = {
aoqi@1 1013 A0, A1, A2, A3, A4, A5, A6, A7
aoqi@1 1014 };
aoqi@1 1015 static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = {
aoqi@1 1016 F12, F13, F14, F15, F16, F17, F18, F19
aoqi@1 1017 };
aoqi@6880 1018 uint args = 0;
aoqi@6880 1019 uint stk_args = 0; // inc by 2 each time
aoqi@1 1020
aoqi@9459 1021 // Example:
aoqi@9459 1022 // n java.lang.UNIXProcess::forkAndExec
aoqi@9459 1023 // private native int forkAndExec(byte[] prog,
aoqi@9459 1024 // byte[] argBlock, int argc,
aoqi@9459 1025 // byte[] envBlock, int envc,
aoqi@9459 1026 // byte[] dir,
aoqi@9459 1027 // boolean redirectErrorStream,
aoqi@9459 1028 // FileDescriptor stdin_fd,
aoqi@9459 1029 // FileDescriptor stdout_fd,
aoqi@9459 1030 // FileDescriptor stderr_fd)
aoqi@9459 1031 // JNIEXPORT jint JNICALL
aoqi@9459 1032 // Java_java_lang_UNIXProcess_forkAndExec(JNIEnv *env,
aoqi@9459 1033 // jobject process,
aoqi@9459 1034 // jbyteArray prog,
aoqi@9459 1035 // jbyteArray argBlock, jint argc,
aoqi@9459 1036 // jbyteArray envBlock, jint envc,
aoqi@9459 1037 // jbyteArray dir,
aoqi@9459 1038 // jboolean redirectErrorStream,
aoqi@9459 1039 // jobject stdin_fd,
aoqi@9459 1040 // jobject stdout_fd,
aoqi@9459 1041 // jobject stderr_fd)
aoqi@9459 1042 //
aoqi@9459 1043 // ::c_calling_convention
aoqi@9459 1044 // 0: // env <-- a0
aoqi@9459 1045 // 1: L // klass/obj <-- t0 => a1
aoqi@9459 1046 // 2: [ // prog[] <-- a0 => a2
aoqi@9459 1047 // 3: [ // argBlock[] <-- a1 => a3
aoqi@9459 1048 // 4: I // argc
aoqi@9459 1049 // 5: [ // envBlock[] <-- a3 => a5
aoqi@9459 1050 // 6: I // envc
aoqi@9459 1051 // 7: [ // dir[] <-- a5 => a7
aoqi@9459 1052 // 8: Z // redirectErrorStream a6 => sp[0]
aoqi@9459 1053 // 9: L // stdin a7 => sp[8]
aoqi@9459 1054 // 10: L // stdout fp[16] => sp[16]
aoqi@9459 1055 // 11: L // stderr fp[24] => sp[24]
aoqi@9459 1056 //
aoqi@6880 1057 for (int i = 0; i < total_args_passed; i++) {
aoqi@6880 1058 switch (sig_bt[i]) {
aoqi@6880 1059 case T_VOID: // Halves of longs and doubles
aoqi@6880 1060 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
aoqi@6880 1061 regs[i].set_bad();
aoqi@6880 1062 break;
aoqi@1 1063 case T_BOOLEAN:
aoqi@1 1064 case T_CHAR:
aoqi@1 1065 case T_BYTE:
aoqi@1 1066 case T_SHORT:
aoqi@1 1067 case T_INT:
aoqi@6880 1068 if (args < Argument::n_register_parameters) {
aoqi@6880 1069 regs[i].set1(INT_ArgReg[args++]->as_VMReg());
aoqi@6880 1070 } else {
aoqi@6880 1071 regs[i].set1(VMRegImpl::stack2reg(stk_args));
aoqi@6880 1072 stk_args += 2;
aoqi@6880 1073 }
aoqi@6880 1074 break;
aoqi@6880 1075 case T_LONG:
aoqi@6880 1076 assert(sig_bt[i + 1] == T_VOID, "expecting half");
aoqi@6880 1077 // fall through
aoqi@1 1078 case T_OBJECT:
aoqi@1 1079 case T_ARRAY:
aoqi@1 1080 case T_ADDRESS:
aoqi@6880 1081 case T_METADATA:
aoqi@6880 1082 if (args < Argument::n_register_parameters) {
aoqi@6880 1083 regs[i].set2(INT_ArgReg[args++]->as_VMReg());
aoqi@6880 1084 } else {
aoqi@6880 1085 regs[i].set2(VMRegImpl::stack2reg(stk_args));
aoqi@6880 1086 stk_args += 2;
aoqi@6880 1087 }
aoqi@1 1088 break;
aoqi@6880 1089 case T_FLOAT:
aoqi@6880 1090 if (args < Argument::n_float_register_parameters) {
aoqi@6880 1091 regs[i].set1(FP_ArgReg[args++]->as_VMReg());
aoqi@6880 1092 } else {
aoqi@6880 1093 regs[i].set1(VMRegImpl::stack2reg(stk_args));
aoqi@6880 1094 stk_args += 2;
aoqi@6880 1095 }
aoqi@1 1096 break;
aoqi@6880 1097 case T_DOUBLE:
aoqi@6880 1098 assert(sig_bt[i + 1] == T_VOID, "expecting half");
aoqi@6880 1099 if (args < Argument::n_float_register_parameters) {
aoqi@6880 1100 regs[i].set2(FP_ArgReg[args++]->as_VMReg());
aoqi@6880 1101 } else {
aoqi@6880 1102 regs[i].set2(VMRegImpl::stack2reg(stk_args));
aoqi@6880 1103 stk_args += 2;
aoqi@6880 1104 }
aoqi@6880 1105 break;
aoqi@1 1106 default:
aoqi@1 1107 ShouldNotReachHere();
aoqi@1 1108 break;
aoqi@1 1109 }
aoqi@1 1110 }
aoqi@6880 1111
aoqi@6880 1112 return round_to(stk_args, 2);
aoqi@1 1113 }
aoqi@1 1114
aoqi@1 1115 // ---------------------------------------------------------------------------
aoqi@1 1116 void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
aoqi@6880 1117 // We always ignore the frame_slots arg and just use the space just below frame pointer
aoqi@6880 1118 // which by this time is free to use
aoqi@6880 1119 switch (ret_type) {
aoqi@6880 1120 case T_FLOAT:
aoqi@6880 1121 __ swc1(FSF, FP, -wordSize);
aoqi@6880 1122 break;
aoqi@6880 1123 case T_DOUBLE:
aoqi@6880 1124 __ sdc1(FSF, FP, -wordSize );
aoqi@6880 1125 break;
aoqi@6880 1126 case T_VOID: break;
aoqi@6880 1127 case T_LONG:
aoqi@8009 1128 __ sd(V0, FP, -wordSize);
aoqi@8009 1129 break;
aoqi@6880 1130 case T_OBJECT:
aoqi@6880 1131 case T_ARRAY:
aoqi@6880 1132 __ sd(V0, FP, -wordSize);
aoqi@6880 1133 break;
aoqi@6880 1134 default: {
aoqi@8009 1135 __ sw(V0, FP, -wordSize);
aoqi@8009 1136 }
aoqi@6880 1137 }
aoqi@1 1138 }
aoqi@1 1139
aoqi@1 1140 void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
aoqi@6880 1141 // We always ignore the frame_slots arg and just use the space just below frame pointer
aoqi@6880 1142 // which by this time is free to use
aoqi@6880 1143 switch (ret_type) {
aoqi@6880 1144 case T_FLOAT:
aoqi@6880 1145 __ lwc1(FSF, FP, -wordSize);
aoqi@6880 1146 break;
aoqi@6880 1147 case T_DOUBLE:
aoqi@6880 1148 __ ldc1(FSF, FP, -wordSize );
aoqi@6880 1149 break;
aoqi@6880 1150 case T_LONG:
aoqi@6880 1151 __ ld(V0, FP, -wordSize);
aoqi@6880 1152 break;
aoqi@6880 1153 case T_VOID: break;
aoqi@6880 1154 case T_OBJECT:
aoqi@6880 1155 case T_ARRAY:
aoqi@6880 1156 __ ld(V0, FP, -wordSize);
aoqi@6880 1157 break;
aoqi@6880 1158 default: {
aoqi@8009 1159 __ lw(V0, FP, -wordSize);
aoqi@8009 1160 }
aoqi@6880 1161 }
aoqi@1 1162 }
aoqi@1 1163
aoqi@1 1164 static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
aoqi@6880 1165 for ( int i = first_arg ; i < arg_count ; i++ ) {
aoqi@6880 1166 if (args[i].first()->is_Register()) {
aoqi@6880 1167 __ push(args[i].first()->as_Register());
aoqi@6880 1168 } else if (args[i].first()->is_FloatRegister()) {
aoqi@6880 1169 __ push(args[i].first()->as_FloatRegister());
aoqi@1 1170 }
aoqi@6880 1171 }
aoqi@1 1172 }
aoqi@1 1173
aoqi@1 1174 static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
aoqi@6880 1175 for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) {
aoqi@6880 1176 if (args[i].first()->is_Register()) {
aoqi@6880 1177 __ pop(args[i].first()->as_Register());
aoqi@6880 1178 } else if (args[i].first()->is_FloatRegister()) {
aoqi@6880 1179 __ pop(args[i].first()->as_FloatRegister());
aoqi@1 1180 }
aoqi@6880 1181 }
aoqi@1 1182 }
aoqi@1 1183
aoqi@1 1184 // A simple move of integer like type
aoqi@1 1185 static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
aoqi@1 1186 if (src.first()->is_stack()) {
aoqi@1 1187 if (dst.first()->is_stack()) {
aoqi@1 1188 // stack to stack
aoqi@6880 1189 __ lw(AT, FP, reg2offset_in(src.first()));
huangjia@9645 1190 __ sd(AT, SP, reg2offset_out(dst.first()));
aoqi@1 1191 } else {
aoqi@1 1192 // stack to reg
aoqi@6880 1193 __ lw(dst.first()->as_Register(), FP, reg2offset_in(src.first()));
aoqi@1 1194 }
aoqi@1 1195 } else if (dst.first()->is_stack()) {
aoqi@1 1196 // reg to stack
aoqi@6880 1197 __ sd(src.first()->as_Register(), SP, reg2offset_out(dst.first()));
aoqi@1 1198 } else {
aoqi@6880 1199 if (dst.first() != src.first()){
aoqi@6880 1200 __ move(dst.first()->as_Register(), src.first()->as_Register()); // fujie error:dst.first()
aoqi@6880 1201 }
aoqi@1 1202 }
aoqi@1 1203 }
aoqi@1 1204
aoqi@1 1205 // An oop arg. Must pass a handle not the oop itself
aoqi@1 1206 static void object_move(MacroAssembler* masm,
aoqi@1 1207 OopMap* map,
aoqi@1 1208 int oop_handle_offset,
aoqi@1 1209 int framesize_in_slots,
aoqi@1 1210 VMRegPair src,
aoqi@1 1211 VMRegPair dst,
aoqi@1 1212 bool is_receiver,
aoqi@1 1213 int* receiver_offset) {
aoqi@1 1214
aoqi@1 1215 // must pass a handle. First figure out the location we use as a handle
aoqi@1 1216
aoqi@6880 1217 //FIXME, for mips, dst can be register
aoqi@6880 1218 if (src.first()->is_stack()) {
aoqi@6880 1219 // Oop is already on the stack as an argument
aoqi@6880 1220 Register rHandle = V0;
aoqi@6880 1221 Label nil;
aoqi@6880 1222 __ xorr(rHandle, rHandle, rHandle);
aoqi@6880 1223 __ ld(AT, FP, reg2offset_in(src.first()));
huangjia@9645 1224 __ beq(AT, R0, nil);
aoqi@6880 1225 __ delayed()->nop();
aoqi@6880 1226 __ lea(rHandle, Address(FP, reg2offset_in(src.first())));
aoqi@6880 1227 __ bind(nil);
aoqi@6880 1228 if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first()));
huangjia@9645 1229 else __ move( (dst.first())->as_Register(), rHandle);
aoqi@6880 1230 //if dst is register
aoqi@6880 1231 //FIXME, do mips need out preserve stack slots?
aoqi@6880 1232 int offset_in_older_frame = src.first()->reg2stack()
aoqi@6880 1233 + SharedRuntime::out_preserve_stack_slots();
aoqi@6880 1234 map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
aoqi@6880 1235 if (is_receiver) {
aoqi@6880 1236 *receiver_offset = (offset_in_older_frame
aoqi@6880 1237 + framesize_in_slots) * VMRegImpl::stack_slot_size;
aoqi@6880 1238 }
aoqi@6880 1239 } else {
aoqi@6880 1240 // Oop is in an a register we must store it to the space we reserve
aoqi@6880 1241 // on the stack for oop_handles
aoqi@6880 1242 const Register rOop = src.first()->as_Register();
aoqi@6880 1243 assert( (rOop->encoding() >= A0->encoding()) && (rOop->encoding() <= T0->encoding()),"wrong register");
aoqi@6880 1244 const Register rHandle = V0;
aoqi@6880 1245 //Important: refer to java_calling_convertion
aoqi@6880 1246 int oop_slot = (rOop->encoding() - A0->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset;
aoqi@6880 1247 int offset = oop_slot*VMRegImpl::stack_slot_size;
aoqi@6880 1248 Label skip;
aoqi@6880 1249 __ sd( rOop , SP, offset );
aoqi@6880 1250 map->set_oop(VMRegImpl::stack2reg(oop_slot));
aoqi@6880 1251 __ xorr( rHandle, rHandle, rHandle);
aoqi@6880 1252 __ beq(rOop, R0, skip);
aoqi@6880 1253 __ delayed()->nop();
aoqi@6880 1254 __ lea(rHandle, Address(SP, offset));
aoqi@6880 1255 __ bind(skip);
aoqi@6880 1256 // Store the handle parameter
aoqi@6880 1257 if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first()));
aoqi@6880 1258 else __ move((dst.first())->as_Register(), rHandle);
aoqi@6880 1259 //if dst is register
aoqi@6880 1260
aoqi@6880 1261 if (is_receiver) {
aoqi@6880 1262 *receiver_offset = offset;
aoqi@6880 1263 }
aoqi@6880 1264 }
aoqi@1 1265 }
aoqi@1 1266
aoqi@1 1267 // A float arg may have to do float reg int reg conversion
aoqi@1 1268 static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
aoqi@1 1269 assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move");
aoqi@1 1270
aoqi@6880 1271 if (src.first()->is_stack()) {
aoqi@6880 1272 if (dst.first()->is_stack()) {
huangjia@9759 1273 __ lw(AT, FP, reg2offset_in(src.first()));
huangjia@9759 1274 __ sw(AT, SP, reg2offset_out(dst.first()));
aoqi@6880 1275 }
aoqi@6880 1276 else
huangjia@9645 1277 __ lwc1(dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first()));
aoqi@6880 1278 } else {
aoqi@6880 1279 // reg to stack
aoqi@6880 1280 if(dst.first()->is_stack())
huangjia@9645 1281 __ swc1(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first()));
aoqi@6880 1282 else
huangjia@9645 1283 __ mov_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
aoqi@6880 1284 }
aoqi@1 1285 }
aoqi@6880 1286
aoqi@1 1287 // A long move
aoqi@1 1288 static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
aoqi@1 1289
aoqi@6880 1290 // The only legal possibility for a long_move VMRegPair is:
aoqi@6880 1291 // 1: two stack slots (possibly unaligned)
aoqi@6880 1292 // as neither the java or C calling convention will use registers
aoqi@6880 1293 // for longs.
aoqi@6880 1294
aoqi@6880 1295 if (src.first()->is_stack()) {
aoqi@6880 1296 assert(src.second()->is_stack() && dst.second()->is_stack(), "must be all stack");
aoqi@6880 1297 if( dst.first()->is_stack()){
aoqi@6880 1298 __ ld(AT, FP, reg2offset_in(src.first()));
aoqi@6880 1299 __ sd(AT, SP, reg2offset_out(dst.first()));
aoqi@6880 1300 } else {
aoqi@6880 1301 __ ld( (dst.first())->as_Register() , FP, reg2offset_in(src.first()));
aoqi@6880 1302 }
aoqi@6880 1303 } else {
aoqi@6880 1304 if( dst.first()->is_stack()){
aoqi@6880 1305 __ sd( (src.first())->as_Register(), SP, reg2offset_out(dst.first()));
huangjia@9645 1306 } else {
aoqi@6880 1307 __ move( (dst.first())->as_Register() , (src.first())->as_Register());
aoqi@6880 1308 }
aoqi@6880 1309 }
aoqi@1 1310 }
aoqi@1 1311
aoqi@1 1312 // A double move
aoqi@1 1313 static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
aoqi@1 1314
aoqi@6880 1315 // The only legal possibilities for a double_move VMRegPair are:
aoqi@6880 1316 // The painful thing here is that like long_move a VMRegPair might be
aoqi@6880 1317
aoqi@6880 1318 // Because of the calling convention we know that src is either
aoqi@6880 1319 // 1: a single physical register (xmm registers only)
aoqi@6880 1320 // 2: two stack slots (possibly unaligned)
aoqi@6880 1321 // dst can only be a pair of stack slots.
aoqi@6880 1322
aoqi@6880 1323
aoqi@6880 1324 if (src.first()->is_stack()) {
aoqi@6880 1325 // source is all stack
aoqi@6880 1326 if( dst.first()->is_stack()){
huangjia@9759 1327 __ ld(AT, FP, reg2offset_in(src.first()));
huangjia@9759 1328 __ sd(AT, SP, reg2offset_out(dst.first()));
huangjia@9645 1329 } else {
aoqi@6880 1330 __ ldc1( (dst.first())->as_FloatRegister(), FP, reg2offset_in(src.first()));
aoqi@6880 1331 }
aoqi@6880 1332
aoqi@6880 1333 } else {
aoqi@6880 1334 // reg to stack
aoqi@6880 1335 // No worries about stack alignment
aoqi@6880 1336 if( dst.first()->is_stack()){
huangjia@9645 1337 __ sdc1(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first()));
aoqi@6880 1338 }
aoqi@6880 1339 else
aoqi@6880 1340 __ mov_d( dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
aoqi@6880 1341
aoqi@6880 1342 }
aoqi@1 1343 }
aoqi@1 1344
aoqi@1 1345 static void verify_oop_args(MacroAssembler* masm,
aoqi@1 1346 methodHandle method,
aoqi@1 1347 const BasicType* sig_bt,
aoqi@1 1348 const VMRegPair* regs) {
aoqi@1 1349 Register temp_reg = T9; // not part of any compiled calling seq
aoqi@1 1350 if (VerifyOops) {
aoqi@1 1351 for (int i = 0; i < method->size_of_parameters(); i++) {
aoqi@1 1352 if (sig_bt[i] == T_OBJECT ||
aoqi@1 1353 sig_bt[i] == T_ARRAY) {
aoqi@1 1354 VMReg r = regs[i].first();
aoqi@1 1355 assert(r->is_valid(), "bad oop arg");
aoqi@1 1356 if (r->is_stack()) {
aoqi@1 1357 __ ld(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
aoqi@1 1358 __ verify_oop(temp_reg);
aoqi@1 1359 } else {
aoqi@1 1360 __ verify_oop(r->as_Register());
aoqi@1 1361 }
aoqi@1 1362 }
aoqi@1 1363 }
aoqi@1 1364 }
aoqi@1 1365 }
aoqi@1 1366
aoqi@1 1367 static void gen_special_dispatch(MacroAssembler* masm,
aoqi@1 1368 methodHandle method,
aoqi@1 1369 const BasicType* sig_bt,
aoqi@1 1370 const VMRegPair* regs) {
aoqi@1 1371 verify_oop_args(masm, method, sig_bt, regs);
aoqi@1 1372 vmIntrinsics::ID iid = method->intrinsic_id();
aoqi@1 1373
aoqi@1 1374 // Now write the args into the outgoing interpreter space
aoqi@1 1375 bool has_receiver = false;
aoqi@1 1376 Register receiver_reg = noreg;
aoqi@1 1377 int member_arg_pos = -1;
aoqi@1 1378 Register member_reg = noreg;
aoqi@1 1379 int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
aoqi@1 1380 if (ref_kind != 0) {
aoqi@1 1381 member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument
aoqi@1 1382 member_reg = S3; // known to be free at this point
aoqi@1 1383 has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
aoqi@1 1384 } else if (iid == vmIntrinsics::_invokeBasic) {
aoqi@1 1385 has_receiver = true;
aoqi@1 1386 } else {
aoqi@1 1387 fatal(err_msg_res("unexpected intrinsic id %d", iid));
aoqi@1 1388 }
aoqi@1 1389
aoqi@1 1390 if (member_reg != noreg) {
aoqi@1 1391 // Load the member_arg into register, if necessary.
aoqi@1 1392 SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
aoqi@1 1393 VMReg r = regs[member_arg_pos].first();
aoqi@1 1394 if (r->is_stack()) {
fujie@410 1395 __ ld(member_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
aoqi@1 1396 } else {
aoqi@1 1397 // no data motion is needed
aoqi@1 1398 member_reg = r->as_Register();
aoqi@1 1399 }
aoqi@1 1400 }
aoqi@1 1401
aoqi@1 1402 if (has_receiver) {
aoqi@1 1403 // Make sure the receiver is loaded into a register.
aoqi@1 1404 assert(method->size_of_parameters() > 0, "oob");
aoqi@1 1405 assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
aoqi@1 1406 VMReg r = regs[0].first();
aoqi@1 1407 assert(r->is_valid(), "bad receiver arg");
aoqi@1 1408 if (r->is_stack()) {
aoqi@1 1409 // Porting note: This assumes that compiled calling conventions always
aoqi@1 1410 // pass the receiver oop in a register. If this is not true on some
aoqi@1 1411 // platform, pick a temp and load the receiver from stack.
aoqi@1 1412 fatal("receiver always in a register");
aoqi@1 1413 receiver_reg = SSR; // known to be free at this point
fujie@410 1414 __ ld(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
aoqi@1 1415 } else {
aoqi@1 1416 // no data motion is needed
aoqi@1 1417 receiver_reg = r->as_Register();
aoqi@1 1418 }
aoqi@1 1419 }
aoqi@1 1420
aoqi@1 1421 // Figure out which address we are really jumping to:
aoqi@1 1422 MethodHandles::generate_method_handle_dispatch(masm, iid,
aoqi@1 1423 receiver_reg, member_reg, /*for_compiler_entry:*/ true);
aoqi@1 1424 }
aoqi@1 1425
aoqi@1 1426 // ---------------------------------------------------------------------------
aoqi@1 1427 // Generate a native wrapper for a given method. The method takes arguments
aoqi@1 1428 // in the Java compiled code convention, marshals them to the native
aoqi@1 1429 // convention (handlizes oops, etc), transitions to native, makes the call,
aoqi@1 1430 // returns to java state (possibly blocking), unhandlizes any result and
aoqi@1 1431 // returns.
aoqi@1 1432 nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
aoqi@1 1433 methodHandle method,
aoqi@1 1434 int compile_id,
aoqi@6880 1435 BasicType* in_sig_bt,
aoqi@6880 1436 VMRegPair* in_regs,
aoqi@1 1437 BasicType ret_type) {
aoqi@1 1438 if (method->is_method_handle_intrinsic()) {
aoqi@1 1439 vmIntrinsics::ID iid = method->intrinsic_id();
aoqi@1 1440 intptr_t start = (intptr_t)__ pc();
aoqi@1 1441 int vep_offset = ((intptr_t)__ pc()) - start;
aoqi@1 1442 gen_special_dispatch(masm,
aoqi@1 1443 method,
aoqi@1 1444 in_sig_bt,
aoqi@1 1445 in_regs);
aoqi@1 1446 int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period
aoqi@1 1447 __ flush();
aoqi@1 1448 int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually
aoqi@1 1449 return nmethod::new_native_nmethod(method,
aoqi@1 1450 compile_id,
aoqi@1 1451 masm->code(),
aoqi@1 1452 vep_offset,
aoqi@1 1453 frame_complete,
aoqi@1 1454 stack_slots / VMRegImpl::slots_per_word,
aoqi@1 1455 in_ByteSize(-1),
aoqi@1 1456 in_ByteSize(-1),
aoqi@1 1457 (OopMapSet*)NULL);
aoqi@1 1458 }
aoqi@1 1459 bool is_critical_native = true;
aoqi@1 1460 address native_func = method->critical_native_function();
aoqi@1 1461 if (native_func == NULL) {
aoqi@1 1462 native_func = method->native_function();
aoqi@1 1463 is_critical_native = false;
aoqi@1 1464 }
aoqi@1 1465 assert(native_func != NULL, "must have function");
aoqi@1 1466
aoqi@1 1467 // Native nmethod wrappers never take possesion of the oop arguments.
aoqi@1 1468 // So the caller will gc the arguments. The only thing we need an
aoqi@1 1469 // oopMap for is if the call is static
aoqi@1 1470 //
aoqi@1 1471 // An OopMap for lock (and class if static), and one for the VM call itself
aoqi@1 1472 OopMapSet *oop_maps = new OopMapSet();
aoqi@1 1473
aoqi@6880 1474 // We have received a description of where all the java arg are located
aoqi@6880 1475 // on entry to the wrapper. We need to convert these args to where
aoqi@6880 1476 // the jni function will expect them. To figure out where they go
aoqi@6880 1477 // we convert the java signature to a C signature by inserting
aoqi@6880 1478 // the hidden arguments as arg[0] and possibly arg[1] (static method)
aoqi@1 1479
aoqi@1 1480 const int total_in_args = method->size_of_parameters();
aoqi@1 1481 int total_c_args = total_in_args;
aoqi@1 1482 if (!is_critical_native) {
aoqi@1 1483 total_c_args += 1;
aoqi@1 1484 if (method->is_static()) {
aoqi@1 1485 total_c_args++;
aoqi@1 1486 }
aoqi@1 1487 } else {
aoqi@1 1488 for (int i = 0; i < total_in_args; i++) {
aoqi@1 1489 if (in_sig_bt[i] == T_ARRAY) {
aoqi@1 1490 total_c_args++;
aoqi@1 1491 }
aoqi@1 1492 }
aoqi@1 1493 }
aoqi@1 1494
aoqi@6880 1495 BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
aoqi@6880 1496 VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
aoqi@1 1497 BasicType* in_elem_bt = NULL;
aoqi@1 1498
aoqi@1 1499 int argc = 0;
aoqi@1 1500 if (!is_critical_native) {
aoqi@1 1501 out_sig_bt[argc++] = T_ADDRESS;
aoqi@1 1502 if (method->is_static()) {
aoqi@1 1503 out_sig_bt[argc++] = T_OBJECT;
aoqi@1 1504 }
aoqi@1 1505
aoqi@1 1506 for (int i = 0; i < total_in_args ; i++ ) {
aoqi@1 1507 out_sig_bt[argc++] = in_sig_bt[i];
aoqi@1 1508 }
aoqi@1 1509 } else {
aoqi@1 1510 Thread* THREAD = Thread::current();
aoqi@1 1511 in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
aoqi@1 1512 SignatureStream ss(method->signature());
aoqi@1 1513 for (int i = 0; i < total_in_args ; i++ ) {
aoqi@1 1514 if (in_sig_bt[i] == T_ARRAY) {
aoqi@1 1515 // Arrays are passed as int, elem* pair
aoqi@1 1516 out_sig_bt[argc++] = T_INT;
aoqi@1 1517 out_sig_bt[argc++] = T_ADDRESS;
aoqi@1 1518 Symbol* atype = ss.as_symbol(CHECK_NULL);
aoqi@1 1519 const char* at = atype->as_C_string();
aoqi@1 1520 if (strlen(at) == 2) {
aoqi@1 1521 assert(at[0] == '[', "must be");
aoqi@1 1522 switch (at[1]) {
aoqi@1 1523 case 'B': in_elem_bt[i] = T_BYTE; break;
aoqi@1 1524 case 'C': in_elem_bt[i] = T_CHAR; break;
aoqi@1 1525 case 'D': in_elem_bt[i] = T_DOUBLE; break;
aoqi@1 1526 case 'F': in_elem_bt[i] = T_FLOAT; break;
aoqi@1 1527 case 'I': in_elem_bt[i] = T_INT; break;
aoqi@1 1528 case 'J': in_elem_bt[i] = T_LONG; break;
aoqi@1 1529 case 'S': in_elem_bt[i] = T_SHORT; break;
aoqi@1 1530 case 'Z': in_elem_bt[i] = T_BOOLEAN; break;
aoqi@1 1531 default: ShouldNotReachHere();
aoqi@1 1532 }
aoqi@1 1533 }
aoqi@1 1534 } else {
aoqi@1 1535 out_sig_bt[argc++] = in_sig_bt[i];
aoqi@1 1536 in_elem_bt[i] = T_VOID;
aoqi@1 1537 }
aoqi@1 1538 if (in_sig_bt[i] != T_VOID) {
aoqi@1 1539 assert(in_sig_bt[i] == ss.type(), "must match");
aoqi@1 1540 ss.next();
aoqi@1 1541 }
aoqi@1 1542 }
aoqi@1 1543 }
aoqi@1 1544
aoqi@1 1545 // Now figure out where the args must be stored and how much stack space
aoqi@1 1546 // they require (neglecting out_preserve_stack_slots but space for storing
aoqi@1 1547 // the 1st six register arguments). It's weird see int_stk_helper.
aoqi@1 1548 //
aoqi@1 1549 int out_arg_slots;
aoqi@6880 1550 out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
aoqi@1 1551
aoqi@1 1552 // Compute framesize for the wrapper. We need to handlize all oops in
aoqi@1 1553 // registers. We must create space for them here that is disjoint from
aoqi@1 1554 // the windowed save area because we have no control over when we might
aoqi@1 1555 // flush the window again and overwrite values that gc has since modified.
aoqi@1 1556 // (The live window race)
aoqi@1 1557 //
aoqi@1 1558 // We always just allocate 6 word for storing down these object. This allow
aoqi@1 1559 // us to simply record the base and use the Ireg number to decide which
aoqi@1 1560 // slot to use. (Note that the reg number is the inbound number not the
aoqi@1 1561 // outbound number).
aoqi@1 1562 // We must shuffle args to match the native convention, and include var-args space.
aoqi@1 1563
aoqi@1 1564 // Calculate the total number of stack slots we will need.
aoqi@1 1565
aoqi@1 1566 // First count the abi requirement plus all of the outgoing args
aoqi@1 1567 int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
aoqi@1 1568
aoqi@1 1569 // Now the space for the inbound oop handle area
aoqi@1 1570 int total_save_slots = 9 * VMRegImpl::slots_per_word; // 9 arguments passed in registers
aoqi@1 1571 if (is_critical_native) {
aoqi@1 1572 // Critical natives may have to call out so they need a save area
aoqi@1 1573 // for register arguments.
aoqi@1 1574 int double_slots = 0;
aoqi@1 1575 int single_slots = 0;
aoqi@1 1576 for ( int i = 0; i < total_in_args; i++) {
aoqi@1 1577 if (in_regs[i].first()->is_Register()) {
aoqi@1 1578 const Register reg = in_regs[i].first()->as_Register();
aoqi@1 1579 switch (in_sig_bt[i]) {
aoqi@1 1580 case T_BOOLEAN:
aoqi@1 1581 case T_BYTE:
aoqi@1 1582 case T_SHORT:
aoqi@1 1583 case T_CHAR:
aoqi@1 1584 case T_INT: single_slots++; break;
aoqi@1 1585 case T_ARRAY: // specific to LP64 (7145024)
aoqi@1 1586 case T_LONG: double_slots++; break;
aoqi@1 1587 default: ShouldNotReachHere();
aoqi@1 1588 }
aoqi@1 1589 } else if (in_regs[i].first()->is_FloatRegister()) {
aoqi@1 1590 switch (in_sig_bt[i]) {
aoqi@1 1591 case T_FLOAT: single_slots++; break;
aoqi@1 1592 case T_DOUBLE: double_slots++; break;
aoqi@1 1593 default: ShouldNotReachHere();
aoqi@1 1594 }
aoqi@1 1595 }
aoqi@1 1596 }
aoqi@1 1597 total_save_slots = double_slots * 2 + single_slots;
aoqi@1 1598 // align the save area
aoqi@1 1599 if (double_slots != 0) {
aoqi@1 1600 stack_slots = round_to(stack_slots, 2);
aoqi@1 1601 }
aoqi@1 1602 }
aoqi@1 1603
aoqi@1 1604 int oop_handle_offset = stack_slots;
aoqi@1 1605 stack_slots += total_save_slots;
aoqi@1 1606
aoqi@1 1607 // Now any space we need for handlizing a klass if static method
aoqi@1 1608
aoqi@6880 1609 int klass_slot_offset = 0;
aoqi@6880 1610 int klass_offset = -1;
aoqi@6880 1611 int lock_slot_offset = 0;
aoqi@6880 1612 bool is_static = false;
aoqi@1 1613
aoqi@1 1614 if (method->is_static()) {
aoqi@1 1615 klass_slot_offset = stack_slots;
aoqi@1 1616 stack_slots += VMRegImpl::slots_per_word;
aoqi@1 1617 klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
aoqi@1 1618 is_static = true;
aoqi@1 1619 }
aoqi@1 1620
aoqi@1 1621 // Plus a lock if needed
aoqi@1 1622
aoqi@1 1623 if (method->is_synchronized()) {
aoqi@1 1624 lock_slot_offset = stack_slots;
aoqi@1 1625 stack_slots += VMRegImpl::slots_per_word;
aoqi@1 1626 }
aoqi@1 1627
aoqi@1 1628 // Now a place to save return value or as a temporary for any gpr -> fpr moves
aoqi@9459 1629 // + 2 for return address (which we own) and saved fp
aoqi@6880 1630 stack_slots += 2 + 9 * VMRegImpl::slots_per_word; // (T0, A0, A1, A2, A3, A4, A5, A6, A7)
aoqi@1 1631
aoqi@1 1632 // Ok The space we have allocated will look like:
aoqi@1 1633 //
aoqi@1 1634 //
aoqi@1 1635 // FP-> | |
aoqi@1 1636 // |---------------------|
aoqi@1 1637 // | 2 slots for moves |
aoqi@1 1638 // |---------------------|
aoqi@1 1639 // | lock box (if sync) |
aoqi@1 1640 // |---------------------| <- lock_slot_offset
aoqi@1 1641 // | klass (if static) |
aoqi@1 1642 // |---------------------| <- klass_slot_offset
aoqi@1 1643 // | oopHandle area |
aoqi@1 1644 // |---------------------| <- oop_handle_offset
aoqi@1 1645 // | outbound memory |
aoqi@1 1646 // | based arguments |
aoqi@1 1647 // | |
aoqi@1 1648 // |---------------------|
aoqi@1 1649 // | vararg area |
aoqi@1 1650 // |---------------------|
aoqi@1 1651 // | |
aoqi@1 1652 // SP-> | out_preserved_slots |
aoqi@1 1653 //
aoqi@1 1654 //
aoqi@1 1655
aoqi@1 1656
aoqi@1 1657 // Now compute actual number of stack words we need rounding to make
aoqi@1 1658 // stack properly aligned.
aoqi@1 1659 stack_slots = round_to(stack_slots, StackAlignmentInSlots);
aoqi@1 1660
aoqi@1 1661 int stack_size = stack_slots * VMRegImpl::stack_slot_size;
aoqi@1 1662
aoqi@6880 1663 intptr_t start = (intptr_t)__ pc();
aoqi@6880 1664
aoqi@6880 1665
aoqi@6880 1666
aoqi@6880 1667 // First thing make an ic check to see if we should even be here
aoqi@6880 1668 address ic_miss = SharedRuntime::get_ic_miss_stub();
aoqi@6880 1669
aoqi@6880 1670 // We are free to use all registers as temps without saving them and
aoqi@9459 1671 // restoring them except fp. fp is the only callee save register
aoqi@6880 1672 // as far as the interpreter and the compiler(s) are concerned.
aoqi@1 1673
aoqi@1 1674 //refer to register_mips.hpp:IC_Klass
aoqi@6880 1675 const Register ic_reg = T1;
aoqi@6880 1676 const Register receiver = T0;
aoqi@6880 1677
aoqi@6880 1678 Label hit;
aoqi@6880 1679 Label exception_pending;
aoqi@6880 1680
aoqi@6880 1681 __ verify_oop(receiver);
aoqi@6880 1682 //add for compressedoops
aoqi@6880 1683 __ load_klass(T9, receiver);
aoqi@6880 1684 __ beq(T9, ic_reg, hit);
aoqi@6880 1685 __ delayed()->nop();
aoqi@6880 1686 __ jmp(ic_miss, relocInfo::runtime_call_type);
aoqi@6880 1687 __ delayed()->nop();
aoqi@6880 1688 // verified entry must be aligned for code patching.
aoqi@6880 1689 // and the first 5 bytes must be in the same cache line
aoqi@6880 1690 // if we align at 8 then we will be sure 5 bytes are in the same line
aoqi@6880 1691 __ align(8);
aoqi@6880 1692
aoqi@6880 1693 __ bind(hit);
aoqi@6880 1694
aoqi@6880 1695
aoqi@6880 1696 int vep_offset = ((intptr_t)__ pc()) - start;
aoqi@1 1697 #ifdef COMPILER1
aoqi@6880 1698 if (InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) {
aoqi@6880 1699 // Object.hashCode can pull the hashCode from the header word
aoqi@6880 1700 // instead of doing a full VM transition once it's been computed.
aoqi@6880 1701 // Since hashCode is usually polymorphic at call sites we can't do
aoqi@6880 1702 // this optimization at the call site without a lot of work.
aoqi@6880 1703 Label slowCase;
aoqi@6880 1704 Register receiver = T0;
aoqi@6880 1705 Register result = V0;
aoqi@6880 1706 __ ld ( result, receiver, oopDesc::mark_offset_in_bytes());
aoqi@6880 1707 // check if locked
aoqi@6880 1708 __ andi(AT, result, markOopDesc::unlocked_value);
aoqi@6880 1709 __ beq(AT, R0, slowCase);
aoqi@6880 1710 __ delayed()->nop();
aoqi@6880 1711 if (UseBiasedLocking) {
aoqi@6880 1712 // Check if biased and fall through to runtime if so
aoqi@6880 1713 __ andi (AT, result, markOopDesc::biased_lock_bit_in_place);
huangjia@9645 1714 __ bne(AT, R0, slowCase);
aoqi@6880 1715 __ delayed()->nop();
aoqi@6880 1716 }
aoqi@6880 1717 // get hash
aoqi@6880 1718 __ li(AT, markOopDesc::hash_mask_in_place);
aoqi@6880 1719 __ andr (AT, result, AT);
aoqi@6880 1720 // test if hashCode exists
aoqi@6880 1721 __ beq (AT, R0, slowCase);
aoqi@6880 1722 __ delayed()->nop();
aoqi@6880 1723 __ shr(result, markOopDesc::hash_shift);
aoqi@6880 1724 __ jr(RA);
aoqi@6880 1725 __ delayed()->nop();
aoqi@6880 1726 __ bind (slowCase);
aoqi@6880 1727 }
aoqi@1 1728 #endif // COMPILER1
aoqi@1 1729
aoqi@6880 1730 // The instruction at the verified entry point must be 5 bytes or longer
aoqi@6880 1731 // because it can be patched on the fly by make_non_entrant. The stack bang
aoqi@6880 1732 // instruction fits that requirement.
aoqi@6880 1733
aoqi@6880 1734 // Generate stack overflow check
aoqi@6880 1735
aoqi@6880 1736 if (UseStackBanging) {
aoqi@6880 1737 __ bang_stack_with_offset(StackShadowPages*os::vm_page_size());
aoqi@6880 1738 } else {
aoqi@6880 1739 // need a 5 byte instruction to allow MT safe patching to non-entrant
aoqi@6880 1740 __ nop();
aoqi@6880 1741 __ nop();
aoqi@6880 1742 __ nop();
aoqi@6880 1743 __ nop();
aoqi@6880 1744 __ nop();
aoqi@6880 1745 }
aoqi@6880 1746 // Generate a new frame for the wrapper.
aoqi@6880 1747 // do mips need this ?
aoqi@1 1748 #ifndef OPT_THREAD
aoqi@6880 1749 __ get_thread(TREG);
aoqi@1 1750 #endif
aoqi@6880 1751 __ st_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset()));
aoqi@6880 1752 __ move(AT, -(StackAlignmentInBytes));
aoqi@6880 1753 __ andr(SP, SP, AT);
aoqi@6880 1754
aoqi@6880 1755 __ enter();
aoqi@9459 1756 // -2 because return address is already present and so is saved fp
aoqi@6880 1757 __ addiu(SP, SP, -1 * (stack_size - 2*wordSize));
aoqi@6880 1758
aoqi@6880 1759 // Frame is now completed as far a size and linkage.
aoqi@6880 1760
aoqi@6880 1761 int frame_complete = ((intptr_t)__ pc()) - start;
aoqi@6880 1762
aoqi@9459 1763 // Calculate the difference between sp and fp. We need to know it
aoqi@6880 1764 // after the native call because on windows Java Natives will pop
aoqi@9459 1765 // the arguments and it is painful to do sp relative addressing
aoqi@6880 1766 // in a platform independent way. So after the call we switch to
aoqi@9459 1767 // fp relative addressing.
aoqi@9459 1768 //FIXME actually , the fp_adjustment may not be the right, because andr(sp, sp, at) may change
aoqi@7997 1769 //the SP
aoqi@6880 1770 int fp_adjustment = stack_size - 2*wordSize;
aoqi@1 1771
aoqi@1 1772 #ifdef COMPILER2
aoqi@6880 1773 // C2 may leave the stack dirty if not in SSE2+ mode
aoqi@6880 1774 __ empty_FPU_stack();
aoqi@9459 1775 #endif
aoqi@9459 1776
aoqi@9459 1777 // Compute the fp offset for any slots used after the jni call
aoqi@9459 1778
aoqi@9459 1779 int lock_slot_fp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment;
aoqi@9459 1780 // We use TREG as a thread pointer because it is callee save and
aoqi@6880 1781 // if we load it once it is usable thru the entire wrapper
aoqi@6880 1782 const Register thread = TREG;
aoqi@6880 1783
aoqi@9459 1784 // We use S4 as the oop handle for the receiver/klass
aoqi@6880 1785 // It is callee save so it survives the call to native
aoqi@6880 1786
aoqi@6880 1787 const Register oop_handle_reg = S4;
aoqi@1 1788 if (is_critical_native) {
aoqi@1 1789 __ stop("generate_native_wrapper in sharedRuntime <2>");
aoqi@9459 1790 //TODO:Fu
aoqi@9459 1791 // check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args,
aoqi@9459 1792 // oop_handle_offset, oop_maps, in_regs, in_sig_bt);
aoqi@1 1793 }
aoqi@1 1794
aoqi@1 1795 #ifndef OPT_THREAD
aoqi@6880 1796 __ get_thread(thread);
aoqi@1 1797 #endif
aoqi@1 1798
aoqi@1 1799 //
aoqi@1 1800 // We immediately shuffle the arguments so that any vm call we have to
aoqi@1 1801 // make from here on out (sync slow path, jvmpi, etc.) we will have
aoqi@1 1802 // captured the oops from our caller and have a valid oopMap for
aoqi@1 1803 // them.
aoqi@1 1804
aoqi@1 1805 // -----------------
aoqi@6880 1806 // The Grand Shuffle
aoqi@1 1807 //
aoqi@1 1808 // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
aoqi@1 1809 // and, if static, the class mirror instead of a receiver. This pretty much
aoqi@1 1810 // guarantees that register layout will not match (and mips doesn't use reg
aoqi@1 1811 // parms though amd does). Since the native abi doesn't use register args
aoqi@1 1812 // and the java conventions does we don't have to worry about collisions.
aoqi@1 1813 // All of our moved are reg->stack or stack->stack.
aoqi@1 1814 // We ignore the extra arguments during the shuffle and handle them at the
aoqi@1 1815 // last moment. The shuffle is described by the two calling convention
aoqi@1 1816 // vectors we have in our possession. We simply walk the java vector to
aoqi@1 1817 // get the source locations and the c vector to get the destinations.
aoqi@1 1818
aoqi@6880 1819 int c_arg = method->is_static() ? 2 : 1 ;
aoqi@6880 1820
aoqi@9459 1821 // Record sp-based slot for receiver on stack for non-static methods
aoqi@6880 1822 int receiver_offset = -1;
aoqi@6880 1823
aoqi@6880 1824 // This is a trick. We double the stack slots so we can claim
aoqi@6880 1825 // the oops in the caller's frame. Since we are sure to have
aoqi@6880 1826 // more args than the caller doubling is enough to make
aoqi@6880 1827 // sure we can capture all the incoming oop args from the
aoqi@6880 1828 // caller.
aoqi@6880 1829 //
aoqi@6880 1830 OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
aoqi@1 1831
aoqi@9459 1832 // Mark location of fp (someday)
aoqi@9459 1833 // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(fp));
aoqi@1 1834
aoqi@1 1835 #ifdef ASSERT
aoqi@1 1836 bool reg_destroyed[RegisterImpl::number_of_registers];
aoqi@1 1837 bool freg_destroyed[FloatRegisterImpl::number_of_registers];
aoqi@1 1838 for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
aoqi@1 1839 reg_destroyed[r] = false;
aoqi@1 1840 }
aoqi@1 1841 for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
aoqi@1 1842 freg_destroyed[f] = false;
aoqi@1 1843 }
aoqi@1 1844
aoqi@1 1845 #endif /* ASSERT */
aoqi@1 1846
aoqi@1 1847 // This may iterate in two different directions depending on the
aoqi@1 1848 // kind of native it is. The reason is that for regular JNI natives
aoqi@1 1849 // the incoming and outgoing registers are offset upwards and for
aoqi@1 1850 // critical natives they are offset down.
aoqi@1 1851 GrowableArray<int> arg_order(2 * total_in_args);
aoqi@1 1852 VMRegPair tmp_vmreg;
aoqi@1 1853 tmp_vmreg.set1(T8->as_VMReg());
aoqi@1 1854
aoqi@1 1855 if (!is_critical_native) {
aoqi@1 1856 for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) {
aoqi@1 1857 arg_order.push(i);
aoqi@1 1858 arg_order.push(c_arg);
aoqi@1 1859 }
aoqi@1 1860 } else {
aoqi@1 1861 // Compute a valid move order, using tmp_vmreg to break any cycles
aoqi@1 1862 __ stop("generate_native_wrapper in sharedRuntime <2>");
aoqi@9459 1863 //TODO:Fu
aoqi@9459 1864 // ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg);
aoqi@1 1865 }
aoqi@1 1866
aoqi@1 1867 int temploc = -1;
aoqi@1 1868 for (int ai = 0; ai < arg_order.length(); ai += 2) {
aoqi@1 1869 int i = arg_order.at(ai);
aoqi@1 1870 int c_arg = arg_order.at(ai + 1);
aoqi@1 1871 __ block_comment(err_msg("move %d -> %d", i, c_arg));
aoqi@1 1872 if (c_arg == -1) {
aoqi@1 1873 assert(is_critical_native, "should only be required for critical natives");
aoqi@1 1874 // This arg needs to be moved to a temporary
aoqi@1 1875 __ move(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register());
aoqi@1 1876 in_regs[i] = tmp_vmreg;
aoqi@1 1877 temploc = i;
aoqi@1 1878 continue;
aoqi@1 1879 } else if (i == -1) {
aoqi@1 1880 assert(is_critical_native, "should only be required for critical natives");
aoqi@1 1881 // Read from the temporary location
aoqi@1 1882 assert(temploc != -1, "must be valid");
aoqi@1 1883 i = temploc;
aoqi@1 1884 temploc = -1;
aoqi@1 1885 }
aoqi@1 1886 #ifdef ASSERT
aoqi@1 1887 if (in_regs[i].first()->is_Register()) {
aoqi@1 1888 assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
aoqi@1 1889 } else if (in_regs[i].first()->is_FloatRegister()) {
aoqi@1 1890 assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!");
aoqi@1 1891 }
aoqi@1 1892 if (out_regs[c_arg].first()->is_Register()) {
aoqi@1 1893 reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
aoqi@1 1894 } else if (out_regs[c_arg].first()->is_FloatRegister()) {
aoqi@1 1895 freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
aoqi@1 1896 }
aoqi@1 1897 #endif /* ASSERT */
aoqi@1 1898 switch (in_sig_bt[i]) {
aoqi@1 1899 case T_ARRAY:
aoqi@1 1900 if (is_critical_native) {
aoqi@9459 1901 __ stop("generate_native_wrapper in sharedRuntime <2>");
aoqi@9459 1902 //TODO:Fu
aoqi@9459 1903 // unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);
aoqi@1 1904 c_arg++;
aoqi@1 1905 #ifdef ASSERT
aoqi@1 1906 if (out_regs[c_arg].first()->is_Register()) {
aoqi@1 1907 reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
aoqi@1 1908 } else if (out_regs[c_arg].first()->is_FloatRegister()) {
aoqi@1 1909 freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
aoqi@1 1910 }
aoqi@1 1911 #endif
aoqi@1 1912 break;
aoqi@1 1913 }
aoqi@1 1914 case T_OBJECT:
aoqi@1 1915 assert(!is_critical_native, "no oop arguments");
aoqi@1 1916 object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
aoqi@1 1917 ((i == 0) && (!is_static)),
aoqi@1 1918 &receiver_offset);
aoqi@1 1919 break;
aoqi@1 1920 case T_VOID:
aoqi@1 1921 break;
aoqi@1 1922
aoqi@1 1923 case T_FLOAT:
aoqi@1 1924 float_move(masm, in_regs[i], out_regs[c_arg]);
aoqi@1 1925 break;
aoqi@1 1926
aoqi@1 1927 case T_DOUBLE:
aoqi@1 1928 assert( i + 1 < total_in_args &&
aoqi@1 1929 in_sig_bt[i + 1] == T_VOID &&
aoqi@1 1930 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
aoqi@1 1931 double_move(masm, in_regs[i], out_regs[c_arg]);
aoqi@1 1932 break;
aoqi@1 1933
aoqi@1 1934 case T_LONG :
aoqi@1 1935 long_move(masm, in_regs[i], out_regs[c_arg]);
aoqi@1 1936 break;
aoqi@1 1937
aoqi@1 1938 case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
aoqi@1 1939
aoqi@1 1940 default:
aoqi@1 1941 simple_move32(masm, in_regs[i], out_regs[c_arg]);
aoqi@1 1942 }
aoqi@1 1943 }
aoqi@1 1944
aoqi@1 1945 // point c_arg at the first arg that is already loaded in case we
aoqi@1 1946 // need to spill before we call out
aoqi@9459 1947 c_arg = total_c_args - total_in_args;
aoqi@9459 1948 // Pre-load a static method's oop. Used both by locking code and
aoqi@6880 1949 // the normal JNI call code.
aoqi@6880 1950
aoqi@6880 1951 __ move(oop_handle_reg, A1);
aoqi@6880 1952
aoqi@6880 1953 if (method->is_static() && !is_critical_native) {
aoqi@6880 1954
aoqi@6880 1955 // load opp into a register
aoqi@6880 1956 int oop_index = __ oop_recorder()->find_index(JNIHandles::make_local(
aoqi@6880 1957 (method->method_holder())->java_mirror()));
aoqi@6880 1958
aoqi@6880 1959
aoqi@6880 1960 RelocationHolder rspec = oop_Relocation::spec(oop_index);
aoqi@6880 1961 __ relocate(rspec);
aoqi@6880 1962 __ patchable_set48(oop_handle_reg, (long)JNIHandles::make_local((method->method_holder())->java_mirror()));
aoqi@6880 1963 // Now handlize the static class mirror it's known not-null.
aoqi@6880 1964 __ sd( oop_handle_reg, SP, klass_offset);
aoqi@6880 1965 map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
aoqi@6880 1966
aoqi@6880 1967 // Now get the handle
aoqi@6880 1968 __ lea(oop_handle_reg, Address(SP, klass_offset));
aoqi@6880 1969 // store the klass handle as second argument
aoqi@6880 1970 __ move(A1, oop_handle_reg);
aoqi@6880 1971 // and protect the arg if we must spill
aoqi@6880 1972 c_arg--;
aoqi@6880 1973 }
aoqi@6880 1974
aoqi@1 1975 // Change state to native (we save the return address in the thread, since it might not
aoqi@1 1976 // be pushed on the stack when we do a a stack traversal). It is enough that the pc()
aoqi@1 1977 // points into the right code segment. It does not have to be the correct return pc.
aoqi@1 1978 // We use the same pc/oopMap repeatedly when we call out
aoqi@1 1979
aoqi@6880 1980 intptr_t the_pc = (intptr_t) __ pc();
aoqi@6880 1981 oop_maps->add_gc_map(the_pc - start, map);
aoqi@6880 1982
aoqi@6880 1983 __ set_last_Java_frame(SP, noreg, NULL);
aoqi@6880 1984 __ relocate(relocInfo::internal_pc_type);
aoqi@6880 1985 {
aoqi@6880 1986 intptr_t save_pc = (intptr_t)the_pc ;
aoqi@6880 1987 __ patchable_set48(AT, save_pc);
aoqi@6880 1988 }
aoqi@6880 1989 __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
aoqi@6880 1990
aoqi@6880 1991
aoqi@6880 1992 // We have all of the arguments setup at this point. We must not touch any register
aoqi@6880 1993 // argument registers at this point (what if we save/restore them there are no oop?
aoqi@6880 1994 {
aoqi@6880 1995 SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
aoqi@6880 1996 int metadata_index = __ oop_recorder()->find_index(method());
aoqi@6880 1997 RelocationHolder rspec = metadata_Relocation::spec(metadata_index);
aoqi@6880 1998 __ relocate(rspec);
aoqi@6880 1999 __ patchable_set48(AT, (long)(method()));
aoqi@6880 2000
aoqi@6880 2001 __ call_VM_leaf(
aoqi@6880 2002 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
aoqi@6880 2003 thread, AT);
aoqi@6880 2004
aoqi@6880 2005 }
aoqi@6880 2006
aoqi@6880 2007 // These are register definitions we need for locking/unlocking
aoqi@9459 2008 const Register swap_reg = T8; // Must use T8 for cmpxchg instruction
aoqi@6880 2009 const Register obj_reg = T9; // Will contain the oop
aoqi@6880 2010 //const Register lock_reg = T6; // Address of compiler lock object (BasicLock)
aoqi@6880 2011 const Register lock_reg = c_rarg0; // Address of compiler lock object (BasicLock)
aoqi@6880 2012
aoqi@6880 2013
aoqi@6880 2014
aoqi@6880 2015 Label slow_path_lock;
aoqi@6880 2016 Label lock_done;
aoqi@6880 2017
aoqi@6880 2018 // Lock a synchronized method
aoqi@6880 2019 if (method->is_synchronized()) {
aoqi@6880 2020 assert(!is_critical_native, "unhandled");
aoqi@6880 2021
aoqi@6880 2022 const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
aoqi@6880 2023
aoqi@6880 2024 // Get the handle (the 2nd argument)
aoqi@6880 2025 __ move(oop_handle_reg, A1);
aoqi@6880 2026
aoqi@6880 2027 // Get address of the box
aoqi@9459 2028 __ lea(lock_reg, Address(FP, lock_slot_fp_offset));
aoqi@6880 2029
aoqi@6880 2030 // Load the oop from the handle
aoqi@6880 2031 __ ld(obj_reg, oop_handle_reg, 0);
aoqi@6880 2032
aoqi@6880 2033 if (UseBiasedLocking) {
aoqi@6880 2034 // Note that oop_handle_reg is trashed during this call
aoqi@6880 2035 __ biased_locking_enter(lock_reg, obj_reg, swap_reg, A1, false, lock_done, &slow_path_lock);
aoqi@6880 2036 }
aoqi@6880 2037
aoqi@9459 2038 // Load immediate 1 into swap_reg %T8
aoqi@6880 2039 __ move(swap_reg, 1);
aoqi@6880 2040
aoqi@6880 2041 __ ld(AT, obj_reg, 0);
aoqi@6880 2042 __ orr(swap_reg, swap_reg, AT);
aoqi@6880 2043
aoqi@6880 2044 __ sd( swap_reg, lock_reg, mark_word_offset);
aoqi@6880 2045 __ cmpxchg(lock_reg, Address(obj_reg, 0), swap_reg);
aoqi@6880 2046 __ bne(AT, R0, lock_done);
aoqi@6880 2047 __ delayed()->nop();
aoqi@6880 2048 // Test if the oopMark is an obvious stack pointer, i.e.,
aoqi@6880 2049 // 1) (mark & 3) == 0, and
aoqi@9459 2050 // 2) sp <= mark < mark + os::pagesize()
aoqi@6880 2051 // These 3 tests can be done by evaluating the following
aoqi@9459 2052 // expression: ((mark - sp) & (3 - os::vm_page_size())),
aoqi@6880 2053 // assuming both stack pointer and pagesize have their
aoqi@6880 2054 // least significant 2 bits clear.
aoqi@9459 2055 // NOTE: the oopMark is in swap_reg %T8 as the result of cmpxchg
aoqi@6880 2056
aoqi@8009 2057 __ dsub(swap_reg, swap_reg, SP);
aoqi@8009 2058 __ move(AT, 3 - os::vm_page_size());
aoqi@6880 2059 __ andr(swap_reg , swap_reg, AT);
aoqi@6880 2060 // Save the test result, for recursive case, the result is zero
aoqi@6880 2061 __ sd(swap_reg, lock_reg, mark_word_offset);
aoqi@8009 2062 __ bne(swap_reg, R0, slow_path_lock);
aoqi@6880 2063 __ delayed()->nop();
aoqi@6880 2064 // Slow path will re-enter here
aoqi@6880 2065 __ bind(lock_done);
aoqi@6880 2066
aoqi@6880 2067 if (UseBiasedLocking) {
aoqi@6880 2068 // Re-fetch oop_handle_reg as we trashed it above
aoqi@6880 2069 __ move(A1, oop_handle_reg);
aoqi@6880 2070 }
aoqi@6880 2071 }
aoqi@6880 2072
aoqi@6880 2073
aoqi@6880 2074 // Finally just about ready to make the JNI call
aoqi@6880 2075
aoqi@6880 2076
aoqi@6880 2077 // get JNIEnv* which is first argument to native
aoqi@1 2078 if (!is_critical_native) {
aoqi@8009 2079 __ addi(A0, thread, in_bytes(JavaThread::jni_environment_offset()));
aoqi@1 2080 }
aoqi@1 2081
aoqi@6880 2082 // Example: Java_java_lang_ref_Finalizer_invokeFinalizeMethod(JNIEnv *env, jclass clazz, jobject ob)
aoqi@9459 2083 // Load the second arguments into A1
aoqi@6880 2084 //__ ld(A1, SP , wordSize ); // klass
aoqi@6880 2085
aoqi@6880 2086 // Now set thread in native
aoqi@6880 2087 __ addi(AT, R0, _thread_in_native);
aoqi@6880 2088 __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset()));
aoqi@9459 2089 // do the call
aoqi@6880 2090 __ call(method->native_function(), relocInfo::runtime_call_type);
aoqi@6880 2091 __ delayed()->nop();
aoqi@6880 2092 // WARNING - on Windows Java Natives use pascal calling convention and pop the
aoqi@6880 2093 // arguments off of the stack. We could just re-adjust the stack pointer here
aoqi@6880 2094 // and continue to do SP relative addressing but we instead switch to FP
aoqi@6880 2095 // relative addressing.
aoqi@6880 2096
aoqi@6880 2097 // Unpack native results.
aoqi@6880 2098 switch (ret_type) {
aoqi@6880 2099 case T_BOOLEAN: __ c2bool(V0); break;
huangjia@9645 2100 case T_CHAR : __ andi(V0, V0, 0xFFFF); break;
aoqi@6880 2101 case T_BYTE : __ sign_extend_byte (V0); break;
aoqi@6880 2102 case T_SHORT : __ sign_extend_short(V0); break;
aoqi@6880 2103 case T_INT : // nothing to do break;
aoqi@6880 2104 case T_DOUBLE :
aoqi@6880 2105 case T_FLOAT :
aoqi@6880 2106 // Result is in st0 we'll save as needed
aoqi@6880 2107 break;
aoqi@6880 2108 case T_ARRAY: // Really a handle
aoqi@6880 2109 case T_OBJECT: // Really a handle
aoqi@6880 2110 break; // can't de-handlize until after safepoint check
aoqi@6880 2111 case T_VOID: break;
aoqi@6880 2112 case T_LONG: break;
aoqi@6880 2113 default : ShouldNotReachHere();
aoqi@6880 2114 }
aoqi@6880 2115 // Switch thread to "native transition" state before reading the synchronization state.
aoqi@6880 2116 // This additional state is necessary because reading and testing the synchronization
aoqi@6880 2117 // state is not atomic w.r.t. GC, as this scenario demonstrates:
aoqi@6880 2118 // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
aoqi@6880 2119 // VM thread changes sync state to synchronizing and suspends threads for GC.
aoqi@6880 2120 // Thread A is resumed to finish this native method, but doesn't block here since it
aoqi@6880 2121 // didn't see any synchronization is progress, and escapes.
aoqi@6880 2122 __ addi(AT, R0, _thread_in_native_trans);
aoqi@6880 2123 __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset()));
aoqi@6880 2124
aoqi@8009 2125 //if(os::is_MP()) {}
aoqi@8009 2126
aoqi@1 2127 Label after_transition;
aoqi@1 2128
aoqi@6880 2129 // check for safepoint operation in progress and/or pending suspend requests
aoqi@8009 2130 {
aoqi@8009 2131 Label Continue;
aoqi@6880 2132 __ li(AT, SafepointSynchronize::address_of_state());
aoqi@6880 2133 __ lw(A0, AT, 0);
aoqi@6880 2134 __ addi(AT, A0, -SafepointSynchronize::_not_synchronized);
aoqi@6880 2135 Label L;
huangjia@9645 2136 __ bne(AT, R0, L);
aoqi@6880 2137 __ delayed()->nop();
aoqi@6880 2138 __ lw(AT, thread, in_bytes(JavaThread::suspend_flags_offset()));
aoqi@6880 2139 __ beq(AT, R0, Continue);
aoqi@6880 2140 __ delayed()->nop();
aoqi@6880 2141 __ bind(L);
aoqi@6880 2142
aoqi@6880 2143 // Don't use call_VM as it will see a possible pending exception and forward it
aoqi@6880 2144 // and never return here preventing us from clearing _last_native_pc down below.
aoqi@6880 2145 //
aoqi@6880 2146 save_native_result(masm, ret_type, stack_slots);
aoqi@8009 2147 __ move(A0, thread);
aoqi@8009 2148 __ addi(SP, SP, -wordSize);
aoqi@21 2149 __ push(S2);
aoqi@21 2150 __ move(AT, -(StackAlignmentInBytes));
aoqi@21 2151 __ move(S2, SP); // use S2 as a sender SP holder
aoqi@21 2152 __ andr(SP, SP, AT); // align stack as required by ABI
aoqi@1 2153 if (!is_critical_native) {
aoqi@1 2154 __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::runtime_call_type);
aoqi@6880 2155 __ delayed()->nop();
aoqi@1 2156 } else {
aoqi@1 2157 __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition), relocInfo::runtime_call_type);
aoqi@6880 2158 __ delayed()->nop();
aoqi@1 2159 }
aoqi@21 2160 __ move(SP, S2); // use S2 as a sender SP holder
aoqi@21 2161 __ pop(S2);
huangjia@9645 2162 __ addi(SP, SP, wordSize);
aoqi@6880 2163 //add for compressedoops
aoqi@6880 2164 __ reinit_heapbase();
aoqi@6880 2165 // Restore any method result value
aoqi@6880 2166 restore_native_result(masm, ret_type, stack_slots);
aoqi@1 2167
aoqi@1 2168 if (is_critical_native) {
aoqi@1 2169 // The call above performed the transition to thread_in_Java so
aoqi@1 2170 // skip the transition logic below.
aoqi@1 2171 __ beq(R0, R0, after_transition);
aoqi@6880 2172 __ delayed()->nop();
aoqi@1 2173 }
aoqi@1 2174
aoqi@6880 2175 __ bind(Continue);
aoqi@6880 2176 }
aoqi@6880 2177
aoqi@6880 2178 // change thread state
aoqi@6880 2179 __ addi(AT, R0, _thread_in_Java);
aoqi@6880 2180 __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset()));
aoqi@1 2181 __ bind(after_transition);
aoqi@6880 2182 Label reguard;
aoqi@6880 2183 Label reguard_done;
aoqi@6880 2184 __ lw(AT, thread, in_bytes(JavaThread::stack_guard_state_offset()));
aoqi@6880 2185 __ addi(AT, AT, -JavaThread::stack_guard_yellow_disabled);
aoqi@6880 2186 __ beq(AT, R0, reguard);
aoqi@6880 2187 __ delayed()->nop();
aoqi@6880 2188 // slow path reguard re-enters here
aoqi@6880 2189 __ bind(reguard_done);
aoqi@6880 2190
aoqi@6880 2191 // Handle possible exception (will unlock if necessary)
aoqi@6880 2192
aoqi@6880 2193 // native result if any is live
aoqi@6880 2194
aoqi@6880 2195 // Unlock
aoqi@6880 2196 Label slow_path_unlock;
aoqi@6880 2197 Label unlock_done;
aoqi@6880 2198 if (method->is_synchronized()) {
aoqi@6880 2199
aoqi@6880 2200 Label done;
aoqi@6880 2201
aoqi@6880 2202 // Get locked oop from the handle we passed to jni
aoqi@6880 2203 __ ld( obj_reg, oop_handle_reg, 0);
aoqi@6880 2204 if (UseBiasedLocking) {
aoqi@6880 2205 __ biased_locking_exit(obj_reg, T8, done);
aoqi@6880 2206
aoqi@6880 2207 }
aoqi@6880 2208
aoqi@6880 2209 // Simple recursive lock?
aoqi@6880 2210
aoqi@9459 2211 __ ld(AT, FP, lock_slot_fp_offset);
aoqi@6880 2212 __ beq(AT, R0, done);
aoqi@6880 2213 __ delayed()->nop();
aoqi@9459 2214 // Must save FSF if if it is live now because cmpxchg must use it
aoqi@6880 2215 if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
aoqi@6880 2216 save_native_result(masm, ret_type, stack_slots);
aoqi@6880 2217 }
aoqi@6880 2218
aoqi@6880 2219 // get old displaced header
aoqi@9459 2220 __ ld (T8, FP, lock_slot_fp_offset);
aoqi@6880 2221 // get address of the stack lock
aoqi@9459 2222 __ addi (c_rarg0, FP, lock_slot_fp_offset);
aoqi@6880 2223 // Atomic swap old header if oop still contains the stack lock
aoqi@6880 2224 __ cmpxchg(T8, Address(obj_reg, 0), c_rarg0);
aoqi@6880 2225
aoqi@6880 2226 __ beq(AT, R0, slow_path_unlock);
aoqi@6880 2227 __ delayed()->nop();
aoqi@6880 2228 // slow path re-enters here
aoqi@6880 2229 __ bind(unlock_done);
aoqi@6880 2230 if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
aoqi@6880 2231 restore_native_result(masm, ret_type, stack_slots);
aoqi@6880 2232 }
aoqi@6880 2233
aoqi@6880 2234 __ bind(done);
aoqi@6880 2235
aoqi@6880 2236 }
aoqi@6880 2237 {
aoqi@6880 2238 SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
aoqi@6880 2239 // Tell dtrace about this method exit
aoqi@6880 2240 save_native_result(masm, ret_type, stack_slots);
aoqi@6880 2241 int metadata_index = __ oop_recorder()->find_index( (method()));
aoqi@6880 2242 RelocationHolder rspec = metadata_Relocation::spec(metadata_index);
aoqi@6880 2243 __ relocate(rspec);
aoqi@6880 2244 __ patchable_set48(AT, (long)(method()));
aoqi@6880 2245
aoqi@6880 2246 __ call_VM_leaf(
aoqi@6880 2247 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
aoqi@6880 2248 thread, AT);
aoqi@6880 2249 restore_native_result(masm, ret_type, stack_slots);
aoqi@6880 2250 }
aoqi@6880 2251
aoqi@6880 2252 // We can finally stop using that last_Java_frame we setup ages ago
aoqi@6880 2253
fujie@9171 2254 __ reset_last_Java_frame(false);
aoqi@6880 2255
huangjia@9705 2256 // Unpack oop result, e.g. JNIHandles::resolve value.
aoqi@6880 2257 if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
huangjia@9705 2258 __ resolve_jobject(V0, thread, T9);
aoqi@6880 2259 }
aoqi@1 2260
aoqi@1 2261 if (!is_critical_native) {
aoqi@8009 2262 // reset handle block
aoqi@8009 2263 __ ld(AT, thread, in_bytes(JavaThread::active_handles_offset()));
aoqi@8009 2264 __ sw(R0, AT, JNIHandleBlock::top_offset_in_bytes());
aoqi@1 2265 }
aoqi@1 2266
aoqi@1 2267 if (!is_critical_native) {
aoqi@6880 2268 // Any exception pending?
aoqi@6880 2269 __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
aoqi@6880 2270 __ bne(AT, R0, exception_pending);
aoqi@6880 2271 __ delayed()->nop();
aoqi@1 2272 }
aoqi@6880 2273 // no exception, we're almost done
aoqi@6880 2274
aoqi@6880 2275 // check that only result value is on FPU stack
aoqi@6880 2276 __ verify_FPU(ret_type == T_FLOAT || ret_type == T_DOUBLE ? 1 : 0, "native_wrapper normal exit");
aoqi@6880 2277
aoqi@1 2278 // Return
aoqi@1 2279 #ifndef OPT_THREAD
aoqi@6880 2280 __ get_thread(TREG);
aoqi@1 2281 #endif
aoqi@8009 2282 //__ ld_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset()));
aoqi@6880 2283 __ leave();
aoqi@6880 2284
aoqi@6880 2285 __ jr(RA);
aoqi@6880 2286 __ delayed()->nop();
aoqi@6880 2287 // Unexpected paths are out of line and go here
aoqi@6880 2288 // Slow path locking & unlocking
aoqi@6880 2289 if (method->is_synchronized()) {
aoqi@6880 2290
aoqi@6880 2291 // BEGIN Slow path lock
aoqi@6880 2292 __ bind(slow_path_lock);
aoqi@6880 2293
aoqi@6880 2294 // protect the args we've loaded
aoqi@6880 2295 save_args(masm, total_c_args, c_arg, out_regs);
aoqi@6880 2296
aoqi@6880 2297 // has last_Java_frame setup. No exceptions so do vanilla call not call_VM
aoqi@6880 2298 // args are (oop obj, BasicLock* lock, JavaThread* thread)
aoqi@6880 2299
aoqi@6880 2300 __ move(A0, obj_reg);
aoqi@6880 2301 __ move(A1, lock_reg);
aoqi@6880 2302 __ move(A2, thread);
aoqi@6880 2303 __ addi(SP, SP, - 3*wordSize);
aoqi@6880 2304
aoqi@6880 2305 __ move(AT, -(StackAlignmentInBytes));
aoqi@6880 2306 __ move(S2, SP); // use S2 as a sender SP holder
aoqi@6880 2307 __ andr(SP, SP, AT); // align stack as required by ABI
aoqi@6880 2308
aoqi@6880 2309 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type);
aoqi@6880 2310 __ delayed()->nop();
aoqi@6880 2311 __ move(SP, S2);
aoqi@6880 2312 __ addi(SP, SP, 3*wordSize);
aoqi@6880 2313
aoqi@6880 2314 restore_args(masm, total_c_args, c_arg, out_regs);
aoqi@6880 2315
aoqi@6880 2316 #ifdef ASSERT
aoqi@6880 2317 { Label L;
aoqi@6880 2318 __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
aoqi@6880 2319 __ beq(AT, R0, L);
aoqi@6880 2320 __ delayed()->nop();
aoqi@6880 2321 __ stop("no pending exception allowed on exit from monitorenter");
aoqi@6880 2322 __ bind(L);
aoqi@6880 2323 }
aoqi@6880 2324 #endif
aoqi@6880 2325 __ b(lock_done);
aoqi@6880 2326 __ delayed()->nop();
aoqi@6880 2327 // END Slow path lock
aoqi@6880 2328
aoqi@6880 2329 // BEGIN Slow path unlock
aoqi@6880 2330 __ bind(slow_path_unlock);
aoqi@6880 2331
aoqi@6880 2332 // Slow path unlock
aoqi@6880 2333
aoqi@6880 2334 if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
aoqi@6880 2335 save_native_result(masm, ret_type, stack_slots);
aoqi@6880 2336 }
aoqi@6880 2337 // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
aoqi@6880 2338
aoqi@6880 2339 __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
aoqi@6880 2340 __ push(AT);
aoqi@6880 2341 __ sd(R0, thread, in_bytes(Thread::pending_exception_offset()));
aoqi@1 2342
aoqi@1 2343 __ move(AT, -(StackAlignmentInBytes));
aoqi@1 2344 __ move(S2, SP); // use S2 as a sender SP holder
aoqi@1 2345 __ andr(SP, SP, AT); // align stack as required by ABI
aoqi@1 2346
aoqi@6880 2347 // should be a peal
aoqi@6880 2348 // +wordSize because of the push above
aoqi@9459 2349 __ addi(A1, FP, lock_slot_fp_offset);
aoqi@6880 2350
aoqi@6880 2351 __ move(A0, obj_reg);
aoqi@6880 2352 __ addi(SP,SP, -2*wordSize);
aoqi@6880 2353 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C),
aoqi@6880 2354 relocInfo::runtime_call_type);
aoqi@6880 2355 __ delayed()->nop();
huangjia@9645 2356 __ addi(SP, SP, 2*wordSize);
aoqi@1 2357 __ move(SP, S2);
aoqi@6880 2358 //add for compressedoops
aoqi@6880 2359 __ reinit_heapbase();
aoqi@1 2360 #ifdef ASSERT
aoqi@6880 2361 {
aoqi@6880 2362 Label L;
aoqi@6880 2363 __ lw( AT, thread, in_bytes(Thread::pending_exception_offset()));
aoqi@6880 2364 __ beq(AT, R0, L);
aoqi@6880 2365 __ delayed()->nop();
aoqi@6880 2366 __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
aoqi@6880 2367 __ bind(L);
aoqi@6880 2368 }
aoqi@1 2369 #endif /* ASSERT */
aoqi@1 2370
aoqi@6880 2371 __ pop(AT);
aoqi@6880 2372 __ sd(AT, thread, in_bytes(Thread::pending_exception_offset()));
aoqi@6880 2373 if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
aoqi@6880 2374 restore_native_result(masm, ret_type, stack_slots);
aoqi@1 2375 }
aoqi@6880 2376 __ b(unlock_done);
aoqi@6880 2377 __ delayed()->nop();
aoqi@6880 2378 // END Slow path unlock
aoqi@6880 2379
aoqi@6880 2380 }
aoqi@6880 2381
aoqi@6880 2382 // SLOW PATH Reguard the stack if needed
aoqi@6880 2383
aoqi@6880 2384 __ bind(reguard);
aoqi@6880 2385 save_native_result(masm, ret_type, stack_slots);
aoqi@6880 2386 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages),
aoqi@6880 2387 relocInfo::runtime_call_type);
aoqi@6880 2388 __ delayed()->nop();
aoqi@6880 2389 //add for compressedoops
aoqi@6880 2390 __ reinit_heapbase();
aoqi@6880 2391 restore_native_result(masm, ret_type, stack_slots);
aoqi@6880 2392 __ b(reguard_done);
aoqi@6880 2393 __ delayed()->nop();
aoqi@6880 2394
aoqi@6880 2395 // BEGIN EXCEPTION PROCESSING
aoqi@6880 2396 if (!is_critical_native) {
aoqi@6880 2397 // Forward the exception
aoqi@6880 2398 __ bind(exception_pending);
aoqi@6880 2399
aoqi@6880 2400 // remove possible return value from FPU register stack
aoqi@6880 2401 __ empty_FPU_stack();
aoqi@6880 2402
aoqi@6880 2403 // pop our frame
aoqi@8009 2404 //forward_exception_entry need return address on stack
aoqi@8009 2405 __ addiu(SP, FP, wordSize);
aoqi@6880 2406 __ ld(FP, SP, (-1) * wordSize);
aoqi@6880 2407
aoqi@6880 2408 // and forward the exception
aoqi@6880 2409 __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
aoqi@6880 2410 __ delayed()->nop();
aoqi@6880 2411 }
aoqi@6880 2412 __ flush();
aoqi@6880 2413
aoqi@6880 2414 nmethod *nm = nmethod::new_native_nmethod(method,
aoqi@6880 2415 compile_id,
aoqi@6880 2416 masm->code(),
aoqi@6880 2417 vep_offset,
aoqi@6880 2418 frame_complete,
aoqi@6880 2419 stack_slots / VMRegImpl::slots_per_word,
aoqi@6880 2420 (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
aoqi@6880 2421 in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
aoqi@6880 2422 oop_maps);
aoqi@1 2423
aoqi@1 2424 if (is_critical_native) {
aoqi@1 2425 nm->set_lazy_critical_native(true);
aoqi@1 2426 }
aoqi@6880 2427
aoqi@6880 2428 return nm;
aoqi@1 2429
aoqi@1 2430 }
aoqi@1 2431
aoqi@1 2432 #ifdef HAVE_DTRACE_H
aoqi@1 2433 // ---------------------------------------------------------------------------
aoqi@1 2434 // Generate a dtrace nmethod for a given signature. The method takes arguments
aoqi@1 2435 // in the Java compiled code convention, marshals them to the native
aoqi@1 2436 // abi and then leaves nops at the position you would expect to call a native
aoqi@1 2437 // function. When the probe is enabled the nops are replaced with a trap
aoqi@1 2438 // instruction that dtrace inserts and the trace will cause a notification
aoqi@1 2439 // to dtrace.
aoqi@1 2440 //
aoqi@1 2441 // The probes are only able to take primitive types and java/lang/String as
aoqi@1 2442 // arguments. No other java types are allowed. Strings are converted to utf8
aoqi@1 2443 // strings so that from dtrace point of view java strings are converted to C
aoqi@1 2444 // strings. There is an arbitrary fixed limit on the total space that a method
aoqi@1 2445 // can use for converting the strings. (256 chars per string in the signature).
aoqi@1 2446 // So any java string larger then this is truncated.
aoqi@1 2447
aoqi@1 2448 static int fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 };
aoqi@1 2449 static bool offsets_initialized = false;
aoqi@1 2450
aoqi@1 2451 static VMRegPair reg64_to_VMRegPair(Register r) {
aoqi@1 2452 VMRegPair ret;
aoqi@1 2453 if (wordSize == 8) {
aoqi@1 2454 ret.set2(r->as_VMReg());
aoqi@1 2455 } else {
aoqi@1 2456 ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg());
aoqi@1 2457 }
aoqi@1 2458 return ret;
aoqi@1 2459 }
aoqi@1 2460
aoqi@1 2461
aoqi@6880 2462 nmethod *SharedRuntime::generate_dtrace_nmethod(MacroAssembler *masm,
aoqi@6880 2463 methodHandle method) {
aoqi@1 2464
aoqi@1 2465
aoqi@1 2466 // generate_dtrace_nmethod is guarded by a mutex so we are sure to
aoqi@1 2467 // be single threaded in this method.
aoqi@1 2468 assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be");
aoqi@1 2469
aoqi@1 2470 // Fill in the signature array, for the calling-convention call.
aoqi@1 2471 int total_args_passed = method->size_of_parameters();
aoqi@1 2472
aoqi@1 2473 BasicType* in_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed);
aoqi@1 2474 VMRegPair *in_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed);
aoqi@1 2475
aoqi@1 2476 // The signature we are going to use for the trap that dtrace will see
aoqi@1 2477 // java/lang/String is converted. We drop "this" and any other object
aoqi@1 2478 // is converted to NULL. (A one-slot java/lang/Long object reference
aoqi@1 2479 // is converted to a two-slot long, which is why we double the allocation).
aoqi@1 2480 BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2);
aoqi@1 2481 VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2);
aoqi@1 2482
aoqi@1 2483 int i=0;
aoqi@1 2484 int total_strings = 0;
aoqi@1 2485 int first_arg_to_pass = 0;
aoqi@1 2486 int total_c_args = 0;
aoqi@1 2487
aoqi@1 2488 // Skip the receiver as dtrace doesn't want to see it
aoqi@1 2489 if( !method->is_static() ) {
aoqi@1 2490 in_sig_bt[i++] = T_OBJECT;
aoqi@1 2491 first_arg_to_pass = 1;
aoqi@1 2492 }
aoqi@1 2493
aoqi@1 2494 SignatureStream ss(method->signature());
aoqi@1 2495 for ( ; !ss.at_return_type(); ss.next()) {
aoqi@1 2496 BasicType bt = ss.type();
aoqi@1 2497 in_sig_bt[i++] = bt; // Collect remaining bits of signature
aoqi@1 2498 out_sig_bt[total_c_args++] = bt;
aoqi@1 2499 if( bt == T_OBJECT) {
aoqi@1 2500 symbolOop s = ss.as_symbol_or_null();
aoqi@1 2501 if (s == vmSymbols::java_lang_String()) {
aoqi@1 2502 total_strings++;
aoqi@1 2503 out_sig_bt[total_c_args-1] = T_ADDRESS;
aoqi@1 2504 } else if (s == vmSymbols::java_lang_Boolean() ||
aoqi@1 2505 s == vmSymbols::java_lang_Byte()) {
aoqi@1 2506 out_sig_bt[total_c_args-1] = T_BYTE;
aoqi@1 2507 } else if (s == vmSymbols::java_lang_Character() ||
aoqi@1 2508 s == vmSymbols::java_lang_Short()) {
aoqi@1 2509 out_sig_bt[total_c_args-1] = T_SHORT;
aoqi@1 2510 } else if (s == vmSymbols::java_lang_Integer() ||
aoqi@1 2511 s == vmSymbols::java_lang_Float()) {
aoqi@1 2512 out_sig_bt[total_c_args-1] = T_INT;
aoqi@1 2513 } else if (s == vmSymbols::java_lang_Long() ||
aoqi@1 2514 s == vmSymbols::java_lang_Double()) {
aoqi@1 2515 out_sig_bt[total_c_args-1] = T_LONG;
aoqi@1 2516 out_sig_bt[total_c_args++] = T_VOID;
aoqi@1 2517 }
aoqi@1 2518 } else if ( bt == T_LONG || bt == T_DOUBLE ) {
aoqi@1 2519 in_sig_bt[i++] = T_VOID; // Longs & doubles take 2 Java slots
aoqi@1 2520 // We convert double to long
aoqi@1 2521 out_sig_bt[total_c_args-1] = T_LONG;
aoqi@1 2522 out_sig_bt[total_c_args++] = T_VOID;
aoqi@1 2523 } else if ( bt == T_FLOAT) {
aoqi@1 2524 // We convert float to int
aoqi@1 2525 out_sig_bt[total_c_args-1] = T_INT;
aoqi@1 2526 }
aoqi@1 2527 }
aoqi@1 2528
aoqi@1 2529 assert(i==total_args_passed, "validly parsed signature");
aoqi@1 2530
aoqi@1 2531 // Now get the compiled-Java layout as input arguments
aoqi@1 2532 int comp_args_on_stack;
aoqi@1 2533 comp_args_on_stack = SharedRuntime::java_calling_convention(
aoqi@1 2534 in_sig_bt, in_regs, total_args_passed, false);
aoqi@1 2535
aoqi@1 2536 // We have received a description of where all the java arg are located
aoqi@1 2537 // on entry to the wrapper. We need to convert these args to where
aoqi@1 2538 // the a native (non-jni) function would expect them. To figure out
aoqi@1 2539 // where they go we convert the java signature to a C signature and remove
aoqi@1 2540 // T_VOID for any long/double we might have received.
aoqi@1 2541
aoqi@1 2542
aoqi@1 2543 // Now figure out where the args must be stored and how much stack space
aoqi@1 2544 // they require (neglecting out_preserve_stack_slots but space for storing
aoqi@1 2545 // the 1st six register arguments). It's weird see int_stk_helper.
aoqi@6880 2546
aoqi@1 2547 int out_arg_slots;
aoqi@1 2548 out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
aoqi@1 2549
aoqi@1 2550 // Calculate the total number of stack slots we will need.
aoqi@1 2551
aoqi@1 2552 // First count the abi requirement plus all of the outgoing args
aoqi@1 2553 int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
aoqi@1 2554
aoqi@1 2555 // Plus a temp for possible converion of float/double/long register args
aoqi@1 2556
aoqi@1 2557 int conversion_temp = stack_slots;
aoqi@1 2558 stack_slots += 2;
aoqi@1 2559
aoqi@1 2560
aoqi@1 2561 // Now space for the string(s) we must convert
aoqi@1 2562
aoqi@1 2563 int string_locs = stack_slots;
aoqi@1 2564 stack_slots += total_strings *
aoqi@1 2565 (max_dtrace_string_size / VMRegImpl::stack_slot_size);
aoqi@1 2566
aoqi@1 2567 // Ok The space we have allocated will look like:
aoqi@1 2568 //
aoqi@1 2569 //
aoqi@1 2570 // FP-> | |
aoqi@1 2571 // |---------------------|
aoqi@1 2572 // | string[n] |
aoqi@1 2573 // |---------------------| <- string_locs[n]
aoqi@1 2574 // | string[n-1] |
aoqi@1 2575 // |---------------------| <- string_locs[n-1]
aoqi@1 2576 // | ... |
aoqi@1 2577 // | ... |
aoqi@1 2578 // |---------------------| <- string_locs[1]
aoqi@1 2579 // | string[0] |
aoqi@1 2580 // |---------------------| <- string_locs[0]
aoqi@1 2581 // | temp |
aoqi@1 2582 // |---------------------| <- conversion_temp
aoqi@1 2583 // | outbound memory |
aoqi@1 2584 // | based arguments |
aoqi@1 2585 // | |
aoqi@1 2586 // |---------------------|
aoqi@1 2587 // | |
aoqi@1 2588 // SP-> | out_preserved_slots |
aoqi@1 2589 //
aoqi@1 2590 //
aoqi@1 2591
aoqi@1 2592 // Now compute actual number of stack words we need rounding to make
aoqi@1 2593 // stack properly aligned.
aoqi@1 2594 stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word);
aoqi@1 2595
aoqi@1 2596 int stack_size = stack_slots * VMRegImpl::stack_slot_size;
aoqi@1 2597
aoqi@1 2598 intptr_t start = (intptr_t)__ pc();
aoqi@1 2599
aoqi@1 2600 // First thing make an ic check to see if we should even be here
aoqi@1 2601
aoqi@1 2602 {
aoqi@1 2603 Label L;
aoqi@1 2604 const Register temp_reg = G3_scratch;
aoqi@1 2605 Address ic_miss(temp_reg, SharedRuntime::get_ic_miss_stub());
aoqi@1 2606 __ verify_oop(O0);
aoqi@1 2607 __ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), temp_reg);
aoqi@1 2608 __ cmp(temp_reg, G5_inline_cache_reg);
aoqi@1 2609 __ brx(Assembler::equal, true, Assembler::pt, L);
aoqi@1 2610 __ delayed()->nop();
aoqi@1 2611
aoqi@1 2612 __ jump_to(ic_miss, 0);
aoqi@1 2613 __ delayed()->nop();
aoqi@1 2614 __ align(CodeEntryAlignment);
aoqi@1 2615 __ bind(L);
aoqi@1 2616 }
aoqi@1 2617
aoqi@1 2618 int vep_offset = ((intptr_t)__ pc()) - start;
aoqi@1 2619
aoqi@1 2620
aoqi@1 2621 // The instruction at the verified entry point must be 5 bytes or longer
aoqi@1 2622 // because it can be patched on the fly by make_non_entrant. The stack bang
aoqi@1 2623 // instruction fits that requirement.
aoqi@1 2624
aoqi@1 2625 // Generate stack overflow check before creating frame
aoqi@1 2626 __ generate_stack_overflow_check(stack_size);
aoqi@1 2627
aoqi@1 2628 assert(((intptr_t)__ pc() - start - vep_offset) >= 5,
aoqi@1 2629 "valid size for make_non_entrant");
aoqi@1 2630
aoqi@1 2631 // Generate a new frame for the wrapper.
aoqi@1 2632 __ save(SP, -stack_size, SP);
aoqi@1 2633
aoqi@1 2634 // Frame is now completed as far a size and linkage.
aoqi@1 2635
aoqi@1 2636 int frame_complete = ((intptr_t)__ pc()) - start;
aoqi@1 2637
aoqi@1 2638 #ifdef ASSERT
aoqi@1 2639 bool reg_destroyed[RegisterImpl::number_of_registers];
aoqi@1 2640 bool freg_destroyed[FloatRegisterImpl::number_of_registers];
aoqi@1 2641 for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
aoqi@1 2642 reg_destroyed[r] = false;
aoqi@1 2643 }
aoqi@1 2644 for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
aoqi@1 2645 freg_destroyed[f] = false;
aoqi@1 2646 }
aoqi@1 2647
aoqi@1 2648 #endif /* ASSERT */
aoqi@1 2649
aoqi@1 2650 VMRegPair zero;
aoqi@1 2651 const Register g0 = G0; // without this we get a compiler warning (why??)
aoqi@1 2652 zero.set2(g0->as_VMReg());
aoqi@1 2653
aoqi@1 2654 int c_arg, j_arg;
aoqi@1 2655
aoqi@1 2656 Register conversion_off = noreg;
aoqi@1 2657
aoqi@1 2658 for (j_arg = first_arg_to_pass, c_arg = 0 ;
aoqi@1 2659 j_arg < total_args_passed ; j_arg++, c_arg++ ) {
aoqi@1 2660
aoqi@1 2661 VMRegPair src = in_regs[j_arg];
aoqi@1 2662 VMRegPair dst = out_regs[c_arg];
aoqi@1 2663
aoqi@1 2664 #ifdef ASSERT
aoqi@1 2665 if (src.first()->is_Register()) {
aoqi@1 2666 assert(!reg_destroyed[src.first()->as_Register()->encoding()], "ack!");
aoqi@1 2667 } else if (src.first()->is_FloatRegister()) {
aoqi@1 2668 assert(!freg_destroyed[src.first()->as_FloatRegister()->encoding(
aoqi@1 2669 FloatRegisterImpl::S)], "ack!");
aoqi@1 2670 }
aoqi@1 2671 if (dst.first()->is_Register()) {
aoqi@1 2672 reg_destroyed[dst.first()->as_Register()->encoding()] = true;
aoqi@1 2673 } else if (dst.first()->is_FloatRegister()) {
aoqi@1 2674 freg_destroyed[dst.first()->as_FloatRegister()->encoding(
aoqi@1 2675 FloatRegisterImpl::S)] = true;
aoqi@1 2676 }
aoqi@1 2677 #endif /* ASSERT */
aoqi@1 2678
aoqi@1 2679 switch (in_sig_bt[j_arg]) {
aoqi@1 2680 case T_ARRAY:
aoqi@1 2681 case T_OBJECT:
aoqi@1 2682 {
aoqi@1 2683 if (out_sig_bt[c_arg] == T_BYTE || out_sig_bt[c_arg] == T_SHORT ||
aoqi@1 2684 out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) {
aoqi@1 2685 // need to unbox a one-slot value
aoqi@1 2686 Register in_reg = L0;
aoqi@1 2687 Register tmp = L2;
aoqi@1 2688 if ( src.first()->is_reg() ) {
aoqi@1 2689 in_reg = src.first()->as_Register();
aoqi@1 2690 } else {
aoqi@1 2691 assert(Assembler::is_simm13(reg2offset(src.first()) + STACK_BIAS),
aoqi@1 2692 "must be");
aoqi@1 2693 __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, in_reg);
aoqi@1 2694 }
aoqi@1 2695 // If the final destination is an acceptable register
aoqi@1 2696 if ( dst.first()->is_reg() ) {
aoqi@1 2697 if ( dst.is_single_phys_reg() || out_sig_bt[c_arg] != T_LONG ) {
aoqi@1 2698 tmp = dst.first()->as_Register();
aoqi@1 2699 }
aoqi@1 2700 }
aoqi@1 2701
aoqi@1 2702 Label skipUnbox;
aoqi@1 2703 if ( wordSize == 4 && out_sig_bt[c_arg] == T_LONG ) {
aoqi@1 2704 __ mov(G0, tmp->successor());
aoqi@1 2705 }
aoqi@1 2706 __ br_null(in_reg, true, Assembler::pn, skipUnbox);
aoqi@1 2707 __ delayed()->mov(G0, tmp);
aoqi@1 2708
aoqi@1 2709 BasicType bt = out_sig_bt[c_arg];
aoqi@1 2710 int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt);
aoqi@1 2711 switch (bt) {
aoqi@1 2712 case T_BYTE:
aoqi@1 2713 __ ldub(in_reg, box_offset, tmp); break;
aoqi@1 2714 case T_SHORT:
aoqi@1 2715 __ lduh(in_reg, box_offset, tmp); break;
aoqi@1 2716 case T_INT:
aoqi@1 2717 __ ld(in_reg, box_offset, tmp); break;
aoqi@1 2718 case T_LONG:
aoqi@1 2719 __ ld_long(in_reg, box_offset, tmp); break;
aoqi@1 2720 default: ShouldNotReachHere();
aoqi@1 2721 }
aoqi@1 2722
aoqi@1 2723 __ bind(skipUnbox);
aoqi@1 2724 // If tmp wasn't final destination copy to final destination
aoqi@1 2725 if (tmp == L2) {
aoqi@1 2726 VMRegPair tmp_as_VM = reg64_to_VMRegPair(L2);
aoqi@1 2727 if (out_sig_bt[c_arg] == T_LONG) {
aoqi@1 2728 long_move(masm, tmp_as_VM, dst);
aoqi@1 2729 } else {
aoqi@1 2730 move32_64(masm, tmp_as_VM, out_regs[c_arg]);
aoqi@1 2731 }
aoqi@1 2732 }
aoqi@1 2733 if (out_sig_bt[c_arg] == T_LONG) {
aoqi@1 2734 assert(out_sig_bt[c_arg+1] == T_VOID, "must be");
aoqi@1 2735 ++c_arg; // move over the T_VOID to keep the loop indices in sync
aoqi@1 2736 }
aoqi@1 2737 } else if (out_sig_bt[c_arg] == T_ADDRESS) {
aoqi@1 2738 Register s =
aoqi@1 2739 src.first()->is_reg() ? src.first()->as_Register() : L2;
aoqi@1 2740 Register d =
aoqi@1 2741 dst.first()->is_reg() ? dst.first()->as_Register() : L2;
aoqi@1 2742
aoqi@1 2743 // We store the oop now so that the conversion pass can reach
aoqi@1 2744 // while in the inner frame. This will be the only store if
aoqi@1 2745 // the oop is NULL.
aoqi@1 2746 if (s != L2) {
aoqi@1 2747 // src is register
aoqi@1 2748 if (d != L2) {
aoqi@1 2749 // dst is register
aoqi@1 2750 __ mov(s, d);
aoqi@1 2751 } else {
aoqi@1 2752 assert(Assembler::is_simm13(reg2offset(dst.first()) +
aoqi@1 2753 STACK_BIAS), "must be");
aoqi@1 2754 __ st_ptr(s, SP, reg2offset(dst.first()) + STACK_BIAS);
aoqi@1 2755 }
aoqi@1 2756 } else {
aoqi@1 2757 // src not a register
aoqi@1 2758 assert(Assembler::is_simm13(reg2offset(src.first()) +
aoqi@1 2759 STACK_BIAS), "must be");
aoqi@1 2760 __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, d);
aoqi@1 2761 if (d == L2) {
aoqi@1 2762 assert(Assembler::is_simm13(reg2offset(dst.first()) +
aoqi@1 2763 STACK_BIAS), "must be");
aoqi@1 2764 __ st_ptr(d, SP, reg2offset(dst.first()) + STACK_BIAS);
aoqi@1 2765 }
aoqi@1 2766 }
aoqi@1 2767 } else if (out_sig_bt[c_arg] != T_VOID) {
aoqi@1 2768 // Convert the arg to NULL
aoqi@1 2769 if (dst.first()->is_reg()) {
aoqi@1 2770 __ mov(G0, dst.first()->as_Register());
aoqi@1 2771 } else {
aoqi@1 2772 assert(Assembler::is_simm13(reg2offset(dst.first()) +
aoqi@1 2773 STACK_BIAS), "must be");
aoqi@1 2774 __ st_ptr(G0, SP, reg2offset(dst.first()) + STACK_BIAS);
aoqi@1 2775 }
aoqi@1 2776 }
aoqi@1 2777 }
aoqi@1 2778 break;
aoqi@1 2779 case T_VOID:
aoqi@1 2780 break;
aoqi@1 2781
aoqi@1 2782 case T_FLOAT:
aoqi@1 2783 if (src.first()->is_stack()) {
aoqi@1 2784 // Stack to stack/reg is simple
aoqi@1 2785 move32_64(masm, src, dst);
aoqi@1 2786 } else {
aoqi@1 2787 if (dst.first()->is_reg()) {
aoqi@1 2788 // freg -> reg
aoqi@1 2789 int off =
aoqi@1 2790 STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
aoqi@1 2791 Register d = dst.first()->as_Register();
aoqi@1 2792 if (Assembler::is_simm13(off)) {
aoqi@1 2793 __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
aoqi@1 2794 SP, off);
aoqi@1 2795 __ ld(SP, off, d);
aoqi@1 2796 } else {
aoqi@1 2797 if (conversion_off == noreg) {
aoqi@1 2798 __ set(off, L6);
aoqi@1 2799 conversion_off = L6;
aoqi@1 2800 }
aoqi@1 2801 __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
aoqi@1 2802 SP, conversion_off);
aoqi@1 2803 __ ld(SP, conversion_off , d);
aoqi@1 2804 }
aoqi@1 2805 } else {
aoqi@1 2806 // freg -> mem
aoqi@1 2807 int off = STACK_BIAS + reg2offset(dst.first());
aoqi@1 2808 if (Assembler::is_simm13(off)) {
aoqi@1 2809 __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
aoqi@1 2810 SP, off);
aoqi@1 2811 } else {
aoqi@1 2812 if (conversion_off == noreg) {
aoqi@1 2813 __ set(off, L6);
aoqi@1 2814 conversion_off = L6;
aoqi@1 2815 }
aoqi@1 2816 __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
aoqi@1 2817 SP, conversion_off);
aoqi@1 2818 }
aoqi@1 2819 }
aoqi@1 2820 }
aoqi@1 2821 break;
aoqi@1 2822
aoqi@1 2823 case T_DOUBLE:
aoqi@1 2824 assert( j_arg + 1 < total_args_passed &&
aoqi@1 2825 in_sig_bt[j_arg + 1] == T_VOID &&
aoqi@1 2826 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
aoqi@1 2827 if (src.first()->is_stack()) {
aoqi@1 2828 // Stack to stack/reg is simple
aoqi@1 2829 long_move(masm, src, dst);
aoqi@1 2830 } else {
aoqi@1 2831 Register d = dst.first()->is_reg() ? dst.first()->as_Register() : L2;
aoqi@1 2832
aoqi@1 2833 // Destination could be an odd reg on 32bit in which case
aoqi@1 2834 // we can't load direct to the destination.
aoqi@1 2835
aoqi@1 2836 if (!d->is_even() && wordSize == 4) {
aoqi@1 2837 d = L2;
aoqi@1 2838 }
aoqi@1 2839 int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
aoqi@1 2840 if (Assembler::is_simm13(off)) {
aoqi@1 2841 __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(),
aoqi@1 2842 SP, off);
aoqi@1 2843 __ ld_long(SP, off, d);
aoqi@1 2844 } else {
aoqi@1 2845 if (conversion_off == noreg) {
aoqi@1 2846 __ set(off, L6);
aoqi@1 2847 conversion_off = L6;
aoqi@1 2848 }
aoqi@1 2849 __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(),
aoqi@1 2850 SP, conversion_off);
aoqi@1 2851 __ ld_long(SP, conversion_off, d);
aoqi@1 2852 }
aoqi@1 2853 if (d == L2) {
aoqi@1 2854 long_move(masm, reg64_to_VMRegPair(L2), dst);
aoqi@1 2855 }
aoqi@1 2856 }
aoqi@1 2857 break;
aoqi@1 2858
aoqi@1 2859 case T_LONG :
aoqi@1 2860 // 32bit can't do a split move of something like g1 -> O0, O1
aoqi@1 2861 // so use a memory temp
aoqi@1 2862 if (src.is_single_phys_reg() && wordSize == 4) {
aoqi@1 2863 Register tmp = L2;
aoqi@1 2864 if (dst.first()->is_reg() &&
aoqi@1 2865 (wordSize == 8 || dst.first()->as_Register()->is_even())) {
aoqi@1 2866 tmp = dst.first()->as_Register();
aoqi@1 2867 }
aoqi@1 2868
aoqi@1 2869 int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
aoqi@1 2870 if (Assembler::is_simm13(off)) {
aoqi@1 2871 __ stx(src.first()->as_Register(), SP, off);
aoqi@1 2872 __ ld_long(SP, off, tmp);
aoqi@1 2873 } else {
aoqi@1 2874 if (conversion_off == noreg) {
aoqi@1 2875 __ set(off, L6);
aoqi@1 2876 conversion_off = L6;
aoqi@1 2877 }
aoqi@1 2878 __ stx(src.first()->as_Register(), SP, conversion_off);
aoqi@1 2879 __ ld_long(SP, conversion_off, tmp);
aoqi@1 2880 }
aoqi@1 2881
aoqi@1 2882 if (tmp == L2) {
aoqi@1 2883 long_move(masm, reg64_to_VMRegPair(L2), dst);
aoqi@1 2884 }
aoqi@1 2885 } else {
aoqi@1 2886 long_move(masm, src, dst);
aoqi@1 2887 }
aoqi@1 2888 break;
aoqi@1 2889
aoqi@1 2890 case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
aoqi@1 2891
aoqi@1 2892 default:
aoqi@1 2893 move32_64(masm, src, dst);
aoqi@1 2894 }
aoqi@1 2895 }
aoqi@1 2896
aoqi@1 2897
aoqi@1 2898 // If we have any strings we must store any register based arg to the stack
aoqi@1 2899 // This includes any still live xmm registers too.
aoqi@1 2900
aoqi@1 2901 if (total_strings > 0 ) {
aoqi@1 2902
aoqi@1 2903 // protect all the arg registers
aoqi@1 2904 __ save_frame(0);
aoqi@1 2905 __ mov(G2_thread, L7_thread_cache);
aoqi@1 2906 const Register L2_string_off = L2;
aoqi@1 2907
aoqi@1 2908 // Get first string offset
aoqi@1 2909 __ set(string_locs * VMRegImpl::stack_slot_size, L2_string_off);
aoqi@1 2910
aoqi@1 2911 for (c_arg = 0 ; c_arg < total_c_args ; c_arg++ ) {
aoqi@1 2912 if (out_sig_bt[c_arg] == T_ADDRESS) {
aoqi@1 2913
aoqi@1 2914 VMRegPair dst = out_regs[c_arg];
aoqi@1 2915 const Register d = dst.first()->is_reg() ?
aoqi@1 2916 dst.first()->as_Register()->after_save() : noreg;
aoqi@1 2917
aoqi@1 2918 // It's a string the oop and it was already copied to the out arg
aoqi@1 2919 // position
aoqi@1 2920 if (d != noreg) {
aoqi@1 2921 __ mov(d, O0);
aoqi@1 2922 } else {
aoqi@1 2923 assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS),
aoqi@1 2924 "must be");
aoqi@1 2925 __ ld_ptr(FP, reg2offset(dst.first()) + STACK_BIAS, O0);
aoqi@1 2926 }
aoqi@1 2927 Label skip;
aoqi@1 2928
aoqi@1 2929 __ br_null(O0, false, Assembler::pn, skip);
aoqi@1 2930 __ delayed()->add(FP, L2_string_off, O1);
aoqi@1 2931
aoqi@1 2932 if (d != noreg) {
aoqi@1 2933 __ mov(O1, d);
aoqi@1 2934 } else {
aoqi@1 2935 assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS),
aoqi@1 2936 "must be");
aoqi@1 2937 __ st_ptr(O1, FP, reg2offset(dst.first()) + STACK_BIAS);
aoqi@1 2938 }
aoqi@1 2939
aoqi@1 2940 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::get_utf),
aoqi@1 2941 relocInfo::runtime_call_type);
aoqi@1 2942 __ delayed()->add(L2_string_off, max_dtrace_string_size, L2_string_off);
aoqi@1 2943
aoqi@1 2944 __ bind(skip);
aoqi@1 2945
aoqi@1 2946 }
aoqi@1 2947
aoqi@1 2948 }
aoqi@1 2949 __ mov(L7_thread_cache, G2_thread);
aoqi@1 2950 __ restore();
aoqi@1 2951
aoqi@1 2952 }
aoqi@1 2953
aoqi@1 2954
aoqi@1 2955 // Ok now we are done. Need to place the nop that dtrace wants in order to
aoqi@1 2956 // patch in the trap
aoqi@1 2957
aoqi@1 2958 int patch_offset = ((intptr_t)__ pc()) - start;
aoqi@1 2959
aoqi@1 2960 __ nop();
aoqi@1 2961
aoqi@1 2962
aoqi@1 2963 // Return
aoqi@1 2964
aoqi@1 2965 __ ret();
aoqi@1 2966 __ delayed()->restore();
aoqi@1 2967
aoqi@1 2968 __ flush();
aoqi@1 2969
aoqi@1 2970 nmethod *nm = nmethod::new_dtrace_nmethod(
aoqi@1 2971 method, masm->code(), vep_offset, patch_offset, frame_complete,
aoqi@1 2972 stack_slots / VMRegImpl::slots_per_word);
aoqi@1 2973 return nm;
aoqi@1 2974
aoqi@1 2975 }
aoqi@1 2976
aoqi@1 2977 #endif // HAVE_DTRACE_H
aoqi@1 2978
aoqi@1 2979 // this function returns the adjust size (in number of words) to a c2i adapter
aoqi@1 2980 // activation for use during deoptimization
aoqi@1 2981 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
aoqi@6880 2982 return (callee_locals - callee_parameters) * Interpreter::stackElementWords;
aoqi@1 2983 }
aoqi@1 2984
aoqi@1 2985 // "Top of Stack" slots that may be unused by the calling convention but must
aoqi@1 2986 // otherwise be preserved.
aoqi@1 2987 // On Intel these are not necessary and the value can be zero.
aoqi@1 2988 // On Sparc this describes the words reserved for storing a register window
aoqi@1 2989 // when an interrupt occurs.
aoqi@1 2990 uint SharedRuntime::out_preserve_stack_slots() {
aoqi@6880 2991 return 0;
aoqi@1 2992 }
aoqi@1 2993
aoqi@1 2994 //------------------------------generate_deopt_blob----------------------------
aoqi@1 2995 // Ought to generate an ideal graph & compile, but here's some SPARC ASM
aoqi@1 2996 // instead.
aoqi@1 2997 void SharedRuntime::generate_deopt_blob() {
aoqi@1 2998 // allocate space for the code
aoqi@1 2999 ResourceMark rm;
aoqi@1 3000 // setup code generation tools
aoqi@1 3001 //CodeBuffer buffer ("deopt_blob", 4000, 2048);
aoqi@1 3002 CodeBuffer buffer ("deopt_blob", 8000, 2048);//aoqi FIXME for debug
aoqi@1 3003 MacroAssembler* masm = new MacroAssembler( & buffer);
aoqi@1 3004 int frame_size_in_words;
aoqi@1 3005 OopMap* map = NULL;
aoqi@1 3006 // Account for the extra args we place on the stack
aoqi@1 3007 // by the time we call fetch_unroll_info
aoqi@1 3008 const int additional_words = 2; // deopt kind, thread
aoqi@1 3009
aoqi@1 3010 OopMapSet *oop_maps = new OopMapSet();
aoqi@1 3011
aoqi@1 3012 address start = __ pc();
aoqi@1 3013 Label cont;
aoqi@1 3014 // we use S3 for DeOpt reason register
aoqi@1 3015 Register reason = S3;
aoqi@1 3016 // use S6 for thread register
aoqi@1 3017 Register thread = TREG;
aoqi@1 3018 // use S7 for fetch_unroll_info returned UnrollBlock
aoqi@1 3019 Register unroll = S7;
aoqi@1 3020 // Prolog for non exception case!
aoqi@1 3021 // Correct the return address we were given.
aoqi@6880 3022 //FIXME, return address is on the tos or Ra?
fujie@375 3023 __ addi(RA, RA, - (NativeCall::return_address_offset_long));
aoqi@1 3024 // Save everything in sight.
aoqi@1 3025 map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
aoqi@1 3026 // Normal deoptimization
aoqi@1 3027 __ move(reason, Deoptimization::Unpack_deopt);
aoqi@1 3028 __ b(cont);
aoqi@1 3029 __ delayed()->nop();
aoqi@1 3030
aoqi@1 3031 int reexecute_offset = __ pc() - start;
aoqi@1 3032
aoqi@6880 3033 // Reexecute case
aoqi@6880 3034 // return address is the pc describes what bci to do re-execute at
aoqi@6880 3035
aoqi@6880 3036 // No need to update map as each call to save_live_registers will produce identical oopmap
aoqi@1 3037 (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
aoqi@6880 3038 __ move(reason, Deoptimization::Unpack_reexecute);
aoqi@1 3039 __ b(cont);
aoqi@1 3040 __ delayed()->nop();
aoqi@1 3041
aoqi@1 3042 int exception_offset = __ pc() - start;
aoqi@1 3043 // Prolog for exception case
aoqi@1 3044
aoqi@9228 3045 // all registers are dead at this entry point, except for V0 and
aoqi@9228 3046 // V1 which contain the exception oop and exception pc
aoqi@1 3047 // respectively. Set them in TLS and fall thru to the
aoqi@1 3048 // unpack_with_exception_in_tls entry point.
aoqi@6880 3049
aoqi@1 3050 __ get_thread(thread);
aoqi@6880 3051 __ st_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
aoqi@1 3052 __ st_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset()));
aoqi@1 3053 int exception_in_tls_offset = __ pc() - start;
aoqi@1 3054 // new implementation because exception oop is now passed in JavaThread
aoqi@1 3055
aoqi@1 3056 // Prolog for exception case
aoqi@1 3057 // All registers must be preserved because they might be used by LinearScan
aoqi@1 3058 // Exceptiop oop and throwing PC are passed in JavaThread
aoqi@1 3059 // tos: stack at point of call to method that threw the exception (i.e. only
aoqi@1 3060 // args are on the stack, no return address)
aoqi@1 3061
aoqi@6880 3062 // Return address will be patched later with the throwing pc. The correct value is not
aoqi@1 3063 // available now because loading it from memory would destroy registers.
aoqi@6880 3064 // Save everything in sight.
aoqi@1 3065 // No need to update map as each call to save_live_registers will produce identical oopmap
fujie@375 3066 __ addi(RA, RA, - (NativeCall::return_address_offset_long));
aoqi@1 3067 (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
aoqi@1 3068
aoqi@1 3069 // Now it is safe to overwrite any register
aoqi@1 3070 // store the correct deoptimization type
aoqi@1 3071 __ move(reason, Deoptimization::Unpack_exception);
aoqi@6880 3072 // load throwing pc from JavaThread and patch it as the return address
aoqi@1 3073 // of the current frame. Then clear the field in JavaThread
aoqi@1 3074 __ get_thread(thread);
aoqi@1 3075 __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
aoqi@1 3076 __ st_ptr(V1, SP, RegisterSaver::raOffset() * wordSize); //save ra
aoqi@1 3077 __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset()));
aoqi@1 3078
aoqi@1 3079
aoqi@1 3080 #ifdef ASSERT
aoqi@1 3081 // verify that there is really an exception oop in JavaThread
aoqi@1 3082 __ ld_ptr(AT, thread, in_bytes(JavaThread::exception_oop_offset()));
aoqi@1 3083 __ verify_oop(AT);
aoqi@1 3084 // verify that there is no pending exception
aoqi@1 3085 Label no_pending_exception;
aoqi@1 3086 __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
aoqi@6880 3087 __ beq(AT, R0, no_pending_exception);
aoqi@6880 3088 __ delayed()->nop();
aoqi@1 3089 __ stop("must not have pending exception here");
aoqi@1 3090 __ bind(no_pending_exception);
aoqi@1 3091 #endif
aoqi@1 3092 __ bind(cont);
aoqi@1 3093 // Compiled code leaves the floating point stack dirty, empty it.
aoqi@1 3094 __ empty_FPU_stack();
aoqi@1 3095
aoqi@1 3096
aoqi@1 3097 // Call C code. Need thread and this frame, but NOT official VM entry
aoqi@6880 3098 // crud. We cannot block on this call, no GC can happen.
aoqi@1 3099 #ifndef OPT_THREAD
aoqi@1 3100 __ get_thread(thread);
aoqi@1 3101 #endif
aoqi@1 3102
aoqi@1 3103 __ move(A0, thread);
aoqi@1 3104 __ addi(SP, SP, -additional_words * wordSize);
aoqi@1 3105
aoqi@1 3106 __ set_last_Java_frame(NOREG, NOREG, NULL);
aoqi@1 3107
aoqi@1 3108 // Call fetch_unroll_info(). Need thread and this frame, but NOT official VM entry - cannot block on
aoqi@1 3109 // this call, no GC can happen. Call should capture return values.
aoqi@1 3110
aoqi@6880 3111 __ relocate(relocInfo::internal_pc_type);
aoqi@6880 3112 {
fujie@373 3113 intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 28;
fujie@368 3114 __ patchable_set48(AT, save_pc);
aoqi@1 3115 }
aoqi@1 3116 __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
aoqi@1 3117
aoqi@1 3118 __ call((address)Deoptimization::fetch_unroll_info);
aoqi@1 3119 //__ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type);
aoqi@1 3120 __ delayed()->nop();
aoqi@1 3121 oop_maps->add_gc_map(__ pc() - start, map);
aoqi@1 3122 __ addiu(SP, SP, additional_words * wordSize);
aoqi@1 3123 __ get_thread(thread);
fujie@9171 3124 __ reset_last_Java_frame(false);
aoqi@1 3125
aoqi@1 3126 // Load UnrollBlock into S7
aoqi@1 3127 __ move(unroll, V0);
aoqi@1 3128
aoqi@1 3129
aoqi@1 3130 // Move the unpack kind to a safe place in the UnrollBlock because
aoqi@1 3131 // we are very short of registers
aoqi@1 3132
aoqi@1 3133 Address unpack_kind(unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes());
aoqi@1 3134 __ sw(reason, unpack_kind);
aoqi@1 3135 // save the unpack_kind value
aoqi@1 3136 // Retrieve the possible live values (return values)
aoqi@1 3137 // All callee save registers representing jvm state
aoqi@1 3138 // are now in the vframeArray.
aoqi@1 3139
aoqi@1 3140 Label noException;
aoqi@1 3141 __ move(AT, Deoptimization::Unpack_exception);
aoqi@1 3142 __ bne(AT, reason, noException);// Was exception pending?
aoqi@1 3143 __ delayed()->nop();
aoqi@1 3144 __ ld_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset()));
aoqi@1 3145 __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
aoqi@1 3146 __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset()));
aoqi@1 3147 __ st_ptr(R0, thread, in_bytes(JavaThread::exception_oop_offset()));
aoqi@6880 3148
aoqi@1 3149 __ verify_oop(V0);
aoqi@1 3150
aoqi@1 3151 // Overwrite the result registers with the exception results.
aoqi@6880 3152 __ st_ptr(V0, SP, RegisterSaver::v0Offset()*wordSize);
aoqi@1 3153 __ st_ptr(V1, SP, RegisterSaver::v1Offset()*wordSize);
aoqi@6880 3154
aoqi@1 3155 __ bind(noException);
aoqi@1 3156
aoqi@1 3157
aoqi@1 3158 // Stack is back to only having register save data on the stack.
aoqi@1 3159 // Now restore the result registers. Everything else is either dead or captured
aoqi@1 3160 // in the vframeArray.
aoqi@1 3161
aoqi@1 3162 RegisterSaver::restore_result_registers(masm);
aoqi@1 3163 // All of the register save area has been popped of the stack. Only the
aoqi@1 3164 // return address remains.
aoqi@6880 3165 // Pop all the frames we must move/replace.
aoqi@1 3166 // Frame picture (youngest to oldest)
aoqi@1 3167 // 1: self-frame (no frame link)
aoqi@1 3168 // 2: deopting frame (no frame link)
aoqi@6880 3169 // 3: caller of deopting frame (could be compiled/interpreted).
aoqi@1 3170 //
aoqi@1 3171 // Note: by leaving the return address of self-frame on the stack
aoqi@1 3172 // and using the size of frame 2 to adjust the stack
aoqi@1 3173 // when we are done the return to frame 3 will still be on the stack.
aoqi@1 3174
aoqi@1 3175 // register for the sender's sp
aoqi@1 3176 Register sender_sp = Rsender;
aoqi@1 3177 // register for frame pcs
aoqi@1 3178 Register pcs = T0;
aoqi@1 3179 // register for frame sizes
aoqi@1 3180 Register sizes = T1;
aoqi@1 3181 // register for frame count
aoqi@1 3182 Register count = T3;
aoqi@6880 3183
aoqi@1 3184 // Pop deoptimized frame
aoqi@1 3185 __ lw(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes());
aoqi@1 3186 __ add(SP, SP, AT);
aoqi@1 3187 // sp should be pointing at the return address to the caller (3)
aoqi@6880 3188
aoqi@1 3189 // Load array of frame pcs into pcs
aoqi@1 3190 __ ld_ptr(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes());
aoqi@1 3191 __ addi(SP, SP, wordSize); // trash the old pc
aoqi@1 3192 // Load array of frame sizes into T6
aoqi@1 3193 __ ld_ptr(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes());
aoqi@1 3194
aoqi@6880 3195
aoqi@1 3196
aoqi@1 3197 // Load count of frams into T3
aoqi@1 3198 __ lw(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes());
aoqi@1 3199 // Pick up the initial fp we should save
aoqi@1 3200 __ ld(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes());
aoqi@1 3201 // Now adjust the caller's stack to make up for the extra locals
aoqi@1 3202 // but record the original sp so that we can save it in the skeletal interpreter
aoqi@1 3203 // frame and the stack walking of interpreter_sender will get the unextended sp
aoqi@1 3204 // value and not the "real" sp value.
aoqi@1 3205 __ move(sender_sp, SP);
aoqi@1 3206 __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes());
aoqi@1 3207 __ sub(SP, SP, AT);
aoqi@1 3208
aoqi@1 3209 // Push interpreter frames in a loop
aoqi@9459 3210 //
aoqi@9459 3211 //Loop:
aoqi@9459 3212 // 0x000000555bd82d18: lw t2, 0x0(t1) ; lw sizes[i] <--- error lw->ld
aoqi@9459 3213 // 0x000000555bd82d1c: ld at, 0x0(t0) ; ld pcs[i]
aoqi@9459 3214 // 0x000000555bd82d20: daddi t2, t2, 0xfffffff0 ; t2 -= 16
aoqi@9459 3215 // 0x000000555bd82d24: daddi sp, sp, 0xfffffff0
aoqi@9459 3216 // 0x000000555bd82d28: sd fp, 0x0(sp) ; push fp
aoqi@9459 3217 // 0x000000555bd82d2c: sd at, 0x8(sp) ; push at
aoqi@9459 3218 // 0x000000555bd82d30: dadd fp, sp, zero ; fp <- sp
aoqi@9459 3219 // 0x000000555bd82d34: dsub sp, sp, t2 ; sp -= t2
aoqi@9459 3220 // 0x000000555bd82d38: sd zero, 0xfffffff0(fp) ; __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
aoqi@9459 3221 // 0x000000555bd82d3c: sd s4, 0xfffffff8(fp) ; __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);
aoqi@9459 3222 // 0x000000555bd82d40: dadd s4, sp, zero ; move(sender_sp, SP);
aoqi@9459 3223 // 0x000000555bd82d44: daddi t3, t3, 0xffffffff ; count --
aoqi@9459 3224 // 0x000000555bd82d48: daddi t1, t1, 0x4 ; sizes += 4
aoqi@9459 3225 // 0x000000555bd82d4c: bne t3, zero, 0x000000555bd82d18
aoqi@9459 3226 // 0x000000555bd82d50: daddi t0, t0, 0x4 ; <--- error t0 += 8
aoqi@9459 3227 //
aoqi@9459 3228 // pcs[0] = frame_pcs[0] = deopt_sender.raw_pc(); regex.split
aoqi@1 3229 Label loop;
aoqi@1 3230 __ bind(loop);
aoqi@6880 3231 __ ld(T2, sizes, 0); // Load frame size
aoqi@6880 3232 __ ld_ptr(AT, pcs, 0); // save return address
aoqi@9459 3233 __ addi(T2, T2, -2*wordSize); // we'll push pc and fp, by hand
aoqi@6880 3234 __ push2(AT, FP);
aoqi@1 3235 __ move(FP, SP);
aoqi@6880 3236 __ sub(SP, SP, T2); // Prolog!
aoqi@1 3237 // This value is corrected by layout_activation_impl
aoqi@6880 3238 __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
aoqi@1 3239 __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable
aoqi@6880 3240 __ move(sender_sp, SP); // pass to next frame
aoqi@6880 3241 __ addi(count, count, -1); // decrement counter
aoqi@6880 3242 __ addi(sizes, sizes, wordSize); // Bump array pointer (sizes)
aoqi@1 3243 __ bne(count, R0, loop);
aoqi@6880 3244 __ delayed()->addi(pcs, pcs, wordSize); // Bump array pointer (pcs)
aoqi@6880 3245 __ ld(AT, pcs, 0); // frame_pcs[number_of_frames] = Interpreter::deopt_entry(vtos, 0);
aoqi@1 3246 // Re-push self-frame
aoqi@6880 3247 __ push2(AT, FP);
aoqi@1 3248 __ move(FP, SP);
aoqi@6880 3249 __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
aoqi@6880 3250 __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);
aoqi@1 3251 __ addi(SP, SP, -(frame_size_in_words - 2 - additional_words) * wordSize);
aoqi@1 3252
aoqi@1 3253 // Restore frame locals after moving the frame
aoqi@1 3254 __ sd(V0, SP, RegisterSaver::v0Offset() * wordSize);
aoqi@1 3255 __ sd(V1, SP, RegisterSaver::v1Offset() * wordSize);
aoqi@1 3256 __ sdc1(F0, SP, RegisterSaver::fpResultOffset()* wordSize);// Pop float stack and store in local
aoqi@1 3257 __ sdc1(F1, SP, (RegisterSaver::fpResultOffset() + 1) * wordSize);
aoqi@1 3258
aoqi@6880 3259
aoqi@1 3260 // Call unpack_frames(). Need thread and this frame, but NOT official VM entry - cannot block on
aoqi@1 3261 // this call, no GC can happen.
aoqi@6880 3262 __ move(A1, reason); // exec_mode
aoqi@1 3263 __ get_thread(thread);
aoqi@6880 3264 __ move(A0, thread); // thread
aoqi@1 3265 __ addi(SP, SP, (-additional_words) *wordSize);
aoqi@1 3266
aoqi@1 3267 // set last_Java_sp, last_Java_fp
aoqi@1 3268 __ set_last_Java_frame(NOREG, FP, NULL);
aoqi@1 3269
aoqi@1 3270 __ move(AT, -(StackAlignmentInBytes));
aoqi@1 3271 __ andr(SP, SP, AT); // Fix stack alignment as required by ABI
aoqi@1 3272
aoqi@6880 3273 __ relocate(relocInfo::internal_pc_type);
aoqi@6880 3274 {
fujie@373 3275 intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 28;
fujie@368 3276 __ patchable_set48(AT, save_pc);
aoqi@1 3277 }
aoqi@1 3278 __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
aoqi@6880 3279
aoqi@1 3280 __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), relocInfo::runtime_call_type);
aoqi@1 3281 __ delayed()->nop();
aoqi@1 3282 // Revert SP alignment after call since we're going to do some SP relative addressing below
aoqi@1 3283 __ ld(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
aoqi@1 3284 // Set an oopmap for the call site
aoqi@1 3285 oop_maps->add_gc_map(__ offset(), new OopMap( frame_size_in_words , 0));
aoqi@1 3286
aoqi@1 3287 __ push(V0);
aoqi@6880 3288
aoqi@1 3289 __ get_thread(thread);
fujie@9171 3290 __ reset_last_Java_frame(true);
aoqi@1 3291
aoqi@1 3292 // Collect return values
aoqi@1 3293 __ ld(V0, SP, (RegisterSaver::v0Offset() + additional_words +1) * wordSize);
aoqi@1 3294 __ ld(V1, SP, (RegisterSaver::v1Offset() + additional_words +1) * wordSize);
aoqi@1 3295 __ ldc1(F0, SP, RegisterSaver::fpResultOffset()* wordSize);// Pop float stack and store in local
aoqi@1 3296 __ ldc1(F1, SP, (RegisterSaver::fpResultOffset() + 1) * wordSize);
aoqi@6880 3297 //FIXME,
aoqi@1 3298 // Clear floating point stack before returning to interpreter
aoqi@1 3299 __ empty_FPU_stack();
aoqi@1 3300 //FIXME, we should consider about float and double
aoqi@1 3301 // Push a float or double return value if necessary.
aoqi@1 3302 __ leave();
aoqi@1 3303
aoqi@1 3304 // Jump to interpreter
aoqi@1 3305 __ jr(RA);
aoqi@1 3306 __ delayed()->nop();
aoqi@1 3307
aoqi@1 3308 masm->flush();
aoqi@1 3309 _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
aoqi@1 3310 _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
aoqi@1 3311 }
aoqi@1 3312
aoqi@1 3313 #ifdef COMPILER2
aoqi@1 3314
aoqi@1 3315 //------------------------------generate_uncommon_trap_blob--------------------
aoqi@1 3316 // Ought to generate an ideal graph & compile, but here's some SPARC ASM
aoqi@1 3317 // instead.
aoqi@1 3318 void SharedRuntime::generate_uncommon_trap_blob() {
aoqi@1 3319 // allocate space for the code
aoqi@1 3320 ResourceMark rm;
aoqi@1 3321 // setup code generation tools
aoqi@6880 3322 CodeBuffer buffer ("uncommon_trap_blob", 512*80 , 512*40 );
aoqi@6880 3323 MacroAssembler* masm = new MacroAssembler(&buffer);
aoqi@1 3324
aoqi@1 3325 enum frame_layout {
aoqi@6880 3326 s0_off, s0_off2,
aoqi@6880 3327 s1_off, s1_off2,
aoqi@6880 3328 s2_off, s2_off2,
aoqi@6880 3329 s3_off, s3_off2,
aoqi@6880 3330 s4_off, s4_off2,
aoqi@6880 3331 s5_off, s5_off2,
aoqi@6880 3332 s6_off, s6_off2,
aoqi@6880 3333 s7_off, s7_off2,
aoqi@6880 3334 fp_off, fp_off2,
aoqi@6880 3335 return_off, return_off2, // slot for return address sp + 9
aoqi@1 3336 framesize
aoqi@1 3337 };
aoqi@1 3338 assert(framesize % 4 == 0, "sp not 16-byte aligned");
aoqi@1 3339
aoqi@1 3340 address start = __ pc();
aoqi@1 3341
aoqi@1 3342 // Push self-frame.
aoqi@1 3343 __ daddiu(SP, SP, -framesize * BytesPerInt);
aoqi@1 3344
aoqi@1 3345 __ sd(RA, SP, return_off * BytesPerInt);
aoqi@1 3346 __ sd(FP, SP, fp_off * BytesPerInt);
aoqi@1 3347
aoqi@6880 3348 // Save callee saved registers. None for UseSSE=0,
aoqi@1 3349 // floats-only for UseSSE=1, and doubles for UseSSE=2.
aoqi@1 3350 __ sd(S0, SP, s0_off * BytesPerInt);
aoqi@1 3351 __ sd(S1, SP, s1_off * BytesPerInt);
aoqi@1 3352 __ sd(S2, SP, s2_off * BytesPerInt);
aoqi@1 3353 __ sd(S3, SP, s3_off * BytesPerInt);
aoqi@1 3354 __ sd(S4, SP, s4_off * BytesPerInt);
aoqi@1 3355 __ sd(S5, SP, s5_off * BytesPerInt);
aoqi@1 3356 __ sd(S6, SP, s6_off * BytesPerInt);
aoqi@1 3357 __ sd(S7, SP, s7_off * BytesPerInt);
aoqi@1 3358
aoqi@1 3359 __ daddi(FP, SP, fp_off * BytesPerInt);
aoqi@1 3360
aoqi@1 3361 // Clear the floating point exception stack
aoqi@1 3362 __ empty_FPU_stack();
aoqi@1 3363
aoqi@1 3364 Register thread = TREG;
aoqi@1 3365
aoqi@1 3366 #ifndef OPT_THREAD
aoqi@1 3367 __ get_thread(thread);
aoqi@1 3368 #endif
aoqi@1 3369 // set last_Java_sp
aoqi@1 3370 __ set_last_Java_frame(NOREG, FP, NULL);
aoqi@6880 3371 __ relocate(relocInfo::internal_pc_type);
aoqi@6880 3372 {
fujie@373 3373 long save_pc = (long)__ pc() + 52;
fujie@368 3374 __ patchable_set48(AT, (long)save_pc);
aoqi@1 3375 __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
aoqi@1 3376 }
aoqi@1 3377 // Call C code. Need thread but NOT official VM entry
aoqi@1 3378 // crud. We cannot block on this call, no GC can happen. Call should
aoqi@1 3379 // capture callee-saved registers as well as return values.
aoqi@1 3380 __ move(A0, thread);
aoqi@1 3381 // argument already in T0
aoqi@1 3382 __ move(A1, T0);
fujie@386 3383 __ patchable_call((address)Deoptimization::uncommon_trap);
aoqi@1 3384
aoqi@1 3385 // Set an oopmap for the call site
aoqi@1 3386 OopMapSet *oop_maps = new OopMapSet();
aoqi@1 3387 OopMap* map = new OopMap( framesize, 0 );
aoqi@1 3388
aoqi@6880 3389 map->set_callee_saved( VMRegImpl::stack2reg(s0_off ), S0->as_VMReg() );
aoqi@1 3390 map->set_callee_saved( VMRegImpl::stack2reg(s1_off ), S1->as_VMReg() );
aoqi@1 3391 map->set_callee_saved( VMRegImpl::stack2reg(s2_off ), S2->as_VMReg() );
aoqi@1 3392 map->set_callee_saved( VMRegImpl::stack2reg(s3_off ), S3->as_VMReg() );
aoqi@1 3393 map->set_callee_saved( VMRegImpl::stack2reg(s4_off ), S4->as_VMReg() );
aoqi@1 3394 map->set_callee_saved( VMRegImpl::stack2reg(s5_off ), S5->as_VMReg() );
aoqi@1 3395 map->set_callee_saved( VMRegImpl::stack2reg(s6_off ), S6->as_VMReg() );
aoqi@1 3396 map->set_callee_saved( VMRegImpl::stack2reg(s7_off ), S7->as_VMReg() );
aoqi@1 3397
aoqi@1 3398 //oop_maps->add_gc_map( __ offset(), true, map);
aoqi@6880 3399 oop_maps->add_gc_map( __ offset(), map);
aoqi@1 3400
aoqi@1 3401 #ifndef OPT_THREAD
aoqi@1 3402 __ get_thread(thread);
aoqi@1 3403 #endif
fujie@9171 3404 __ reset_last_Java_frame(false);
aoqi@1 3405
aoqi@1 3406 // Load UnrollBlock into S7
aoqi@1 3407 Register unroll = S7;
aoqi@1 3408 __ move(unroll, V0);
aoqi@1 3409
aoqi@6880 3410 // Pop all the frames we must move/replace.
aoqi@6880 3411 //
aoqi@1 3412 // Frame picture (youngest to oldest)
aoqi@1 3413 // 1: self-frame (no frame link)
aoqi@1 3414 // 2: deopting frame (no frame link)
aoqi@6880 3415 // 3: possible-i2c-adapter-frame
aoqi@1 3416 // 4: caller of deopting frame (could be compiled/interpreted. If interpreted we will create an
aoqi@1 3417 // and c2i here)
aoqi@1 3418
aoqi@1 3419 __ daddiu(SP, SP, framesize * BytesPerInt);
aoqi@1 3420
aoqi@1 3421 // Pop deoptimized frame
aoqi@1 3422 __ lw(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes());
aoqi@1 3423 __ dadd(SP, SP, AT);
aoqi@1 3424
aoqi@1 3425 // register for frame pcs
aoqi@1 3426 Register pcs = T8;
aoqi@1 3427 // register for frame sizes
aoqi@1 3428 Register sizes = T9;
aoqi@1 3429 // register for frame count
aoqi@1 3430 Register count = T3;
aoqi@1 3431 // register for the sender's sp
aoqi@1 3432 Register sender_sp = T1;
aoqi@1 3433
aoqi@1 3434 // sp should be pointing at the return address to the caller (4)
aoqi@9459 3435 // Load array of frame pcs
aoqi@1 3436 __ ld(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes());
aoqi@1 3437
aoqi@9459 3438 // Load array of frame sizes
aoqi@1 3439 __ ld(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes());
aoqi@1 3440 __ lwu(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes());
aoqi@1 3441
aoqi@1 3442 // Pick up the initial fp we should save
aoqi@1 3443 __ ld(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes());
aoqi@1 3444 // Now adjust the caller's stack to make up for the extra locals
aoqi@1 3445 // but record the original sp so that we can save it in the skeletal interpreter
aoqi@1 3446 // frame and the stack walking of interpreter_sender will get the unextended sp
aoqi@1 3447 // value and not the "real" sp value.
aoqi@1 3448
aoqi@1 3449 __ move(sender_sp, SP);
aoqi@1 3450 __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes());
aoqi@1 3451 __ dsub(SP, SP, AT);
aoqi@1 3452 // Push interpreter frames in a loop
aoqi@1 3453 Label loop;
aoqi@1 3454 __ bind(loop);
aoqi@1 3455 __ ld(T2, sizes, 0); // Load frame size
aoqi@1 3456 __ ld(AT, pcs, 0); // save return address
aoqi@9459 3457 __ daddi(T2, T2, -2*wordSize); // we'll push pc and fp, by hand
aoqi@1 3458 __ push2(AT, FP);
aoqi@1 3459 __ move(FP, SP);
aoqi@1 3460 __ dsub(SP, SP, T2); // Prolog!
aoqi@1 3461 // This value is corrected by layout_activation_impl
aoqi@1 3462 __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
aoqi@1 3463 __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable
aoqi@1 3464 __ move(sender_sp, SP); // pass to next frame
aoqi@1 3465 __ daddi(count, count, -1); // decrement counter
aoqi@1 3466 __ daddi(sizes, sizes, wordSize); // Bump array pointer (sizes)
aoqi@1 3467 __ addi(pcs, pcs, wordSize); // Bump array pointer (pcs)
aoqi@1 3468 __ bne(count, R0, loop);
aoqi@1 3469 __ delayed()->nop(); // Bump array pointer (pcs)
aoqi@1 3470
aoqi@1 3471 __ ld(RA, pcs, 0);
aoqi@1 3472
aoqi@1 3473 // Re-push self-frame
aoqi@1 3474 __ daddi(SP, SP, - 2 * wordSize); // save old & set new FP
aoqi@1 3475 __ sd(FP, SP, 0 * wordSize); // save final return address
aoqi@1 3476 __ sd(RA, SP, 1 * wordSize);
aoqi@6880 3477 __ move(FP, SP);
aoqi@1 3478 __ daddi(SP, SP, -(framesize / 2 - 2) * wordSize);
aoqi@1 3479
aoqi@1 3480 // set last_Java_sp, last_Java_fp
aoqi@1 3481 __ set_last_Java_frame(NOREG, FP, NULL);
aoqi@1 3482
aoqi@1 3483 __ move(AT, -(StackAlignmentInBytes));
aoqi@1 3484 __ andr(SP, SP, AT); // Fix stack alignment as required by ABI
aoqi@1 3485
aoqi@6880 3486 __ relocate(relocInfo::internal_pc_type);
aoqi@6880 3487 {
fujie@373 3488 long save_pc = (long)__ pc() + 52;
fujie@368 3489 __ patchable_set48(AT, (long)save_pc);
aoqi@1 3490 }
huangjia@9645 3491 __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
aoqi@1 3492
aoqi@1 3493 // Call C code. Need thread but NOT official VM entry
aoqi@1 3494 // crud. We cannot block on this call, no GC can happen. Call should
aoqi@1 3495 // restore return values to their stack-slots with the new SP.
aoqi@1 3496 __ move(A0, thread);
aoqi@1 3497 __ move(A1, Deoptimization::Unpack_uncommon_trap);
fujie@386 3498 __ patchable_call((address)Deoptimization::unpack_frames);
aoqi@1 3499 // Set an oopmap for the call site
huangjia@9645 3500 oop_maps->add_gc_map( __ offset(), new OopMap( framesize, 0 ) );
aoqi@1 3501
fujie@9171 3502 __ reset_last_Java_frame(true);
aoqi@1 3503
aoqi@1 3504 // Pop self-frame.
aoqi@1 3505 __ leave(); // Epilog!
aoqi@1 3506
aoqi@1 3507 // Jump to interpreter
aoqi@1 3508 __ jr(RA);
aoqi@1 3509 __ delayed()->nop();
aoqi@1 3510 // -------------
aoqi@1 3511 // make sure all code is generated
aoqi@1 3512 masm->flush();
aoqi@1 3513
aoqi@1 3514 _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, framesize / 2);
aoqi@1 3515 }
aoqi@1 3516
aoqi@1 3517 #endif // COMPILER2
aoqi@1 3518
aoqi@1 3519 //------------------------------generate_handler_blob-------------------
aoqi@1 3520 //
aoqi@1 3521 // Generate a special Compile2Runtime blob that saves all registers, and sets
aoqi@1 3522 // up an OopMap and calls safepoint code to stop the compiled code for
aoqi@1 3523 // a safepoint.
aoqi@1 3524 //
aoqi@1 3525 // This blob is jumped to (via a breakpoint and the signal handler) from a
aoqi@6880 3526 // safepoint in compiled code.
aoqi@6880 3527
aoqi@1 3528 SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int pool_type) {
aoqi@1 3529
aoqi@1 3530 // Account for thread arg in our frame
aoqi@6880 3531 const int additional_words = 0;
aoqi@1 3532 int frame_size_in_words;
aoqi@1 3533
aoqi@6880 3534 assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
aoqi@1 3535
aoqi@1 3536 ResourceMark rm;
aoqi@1 3537 OopMapSet *oop_maps = new OopMapSet();
aoqi@1 3538 OopMap* map;
aoqi@1 3539
aoqi@1 3540 // allocate space for the code
aoqi@6880 3541 // setup code generation tools
aoqi@1 3542 CodeBuffer buffer ("handler_blob", 2048, 512);
aoqi@1 3543 MacroAssembler* masm = new MacroAssembler( &buffer);
aoqi@6880 3544
aoqi@6880 3545 const Register thread = TREG;
aoqi@6880 3546 address start = __ pc();
aoqi@6880 3547 address call_pc = NULL;
aoqi@1 3548 bool cause_return = (pool_type == POLL_AT_RETURN);
aoqi@1 3549 bool save_vectors = (pool_type == POLL_AT_VECTOR_LOOP);
aoqi@1 3550
aoqi@1 3551 // If cause_return is true we are at a poll_return and there is
aoqi@1 3552 // the return address in RA to the caller on the nmethod
aoqi@1 3553 // that is safepoint. We can leave this return in RA and
aoqi@1 3554 // effectively complete the return and safepoint in the caller.
aoqi@1 3555 // Otherwise we load exception pc to RA.
aoqi@1 3556 __ push(thread);
aoqi@1 3557 #ifndef OPT_THREAD
aoqi@1 3558 __ get_thread(thread);
aoqi@1 3559 #endif
aoqi@1 3560
aoqi@1 3561 if(!cause_return) {
aoqi@1 3562 __ ld_ptr(RA, Address(thread, JavaThread::saved_exception_pc_offset()));
aoqi@1 3563 }
aoqi@6880 3564
aoqi@1 3565 __ pop(thread);
aoqi@1 3566 map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, save_vectors);
aoqi@1 3567
aoqi@1 3568 #ifndef OPT_THREAD
aoqi@1 3569 __ get_thread(thread);
aoqi@1 3570 #endif
aoqi@1 3571 // The following is basically a call_VM. However, we need the precise
aoqi@1 3572 // address of the call in order to generate an oopmap. Hence, we do all the
aoqi@1 3573 // work outselvs.
aoqi@1 3574
aoqi@1 3575 __ move(A0, thread);
aoqi@1 3576 __ set_last_Java_frame(NOREG, NOREG, NULL);
aoqi@1 3577
aoqi@1 3578
aoqi@1 3579 // do the call
aoqi@1 3580 __ call(call_ptr);
aoqi@1 3581 __ delayed()->nop();
aoqi@1 3582
aoqi@1 3583 // Set an oopmap for the call site. This oopmap will map all
aoqi@1 3584 // oop-registers and debug-info registers as callee-saved. This
aoqi@1 3585 // will allow deoptimization at this safepoint to find all possible
aoqi@1 3586 // debug-info recordings, as well as let GC find all oops.
aoqi@1 3587 oop_maps->add_gc_map(__ offset(), map);
aoqi@1 3588
aoqi@1 3589 Label noException;
aoqi@1 3590
aoqi@1 3591 // Clear last_Java_sp again
fujie@9171 3592 __ reset_last_Java_frame(false);
aoqi@1 3593
aoqi@1 3594 __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
aoqi@1 3595 __ beq(AT, R0, noException);
aoqi@1 3596 __ delayed()->nop();
aoqi@1 3597
aoqi@1 3598 // Exception pending
aoqi@1 3599
aoqi@1 3600 RegisterSaver::restore_live_registers(masm, save_vectors);
aoqi@1 3601 //forward_exception_entry need return address on the stack
aoqi@1 3602 __ push(RA);
fujie@386 3603 __ patchable_jump((address)StubRoutines::forward_exception_entry());
aoqi@1 3604
aoqi@1 3605 // No exception case
aoqi@1 3606 __ bind(noException);
aoqi@6880 3607 // Normal exit, register restoring and exit
aoqi@1 3608 RegisterSaver::restore_live_registers(masm, save_vectors);
aoqi@1 3609 __ jr(RA);
aoqi@1 3610 __ delayed()->nop();
aoqi@6880 3611
aoqi@6880 3612 masm->flush();
aoqi@1 3613
aoqi@1 3614 // Fill-out other meta info
aoqi@6880 3615 return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words);
aoqi@1 3616 }
aoqi@1 3617
aoqi@1 3618 //
aoqi@1 3619 // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
aoqi@1 3620 //
aoqi@1 3621 // Generate a stub that calls into vm to find out the proper destination
aoqi@1 3622 // of a java call. All the argument registers are live at this point
aoqi@1 3623 // but since this is generic code we don't know what they are and the caller
aoqi@1 3624 // must do any gc of the args.
aoqi@1 3625 //
aoqi@1 3626 RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
aoqi@1 3627 assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
aoqi@1 3628
aoqi@1 3629 // allocate space for the code
aoqi@1 3630 ResourceMark rm;
aoqi@1 3631
aoqi@1 3632 //CodeBuffer buffer(name, 1000, 512);
aoqi@1 3633 //FIXME. aoqi. code_size
aoqi@8009 3634 CodeBuffer buffer(name, 2000, 2048);
aoqi@1 3635 MacroAssembler* masm = new MacroAssembler(&buffer);
aoqi@1 3636
aoqi@1 3637 int frame_size_words;
aoqi@6880 3638 //we put the thread in A0
aoqi@1 3639
aoqi@1 3640 OopMapSet *oop_maps = new OopMapSet();
aoqi@1 3641 OopMap* map = NULL;
aoqi@1 3642
aoqi@1 3643 int start = __ offset();
aoqi@1 3644 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words);
aoqi@1 3645
aoqi@1 3646
aoqi@1 3647 int frame_complete = __ offset();
aoqi@1 3648
aoqi@1 3649 const Register thread = T8;
aoqi@1 3650 __ get_thread(thread);
aoqi@1 3651
aoqi@6880 3652 __ move(A0, thread);
aoqi@1 3653 __ set_last_Java_frame(noreg, FP, NULL);
aoqi@6880 3654 //align the stack before invoke native
aoqi@1 3655 __ move(AT, -(StackAlignmentInBytes));
aoqi@6880 3656 __ andr(SP, SP, AT);
aoqi@6880 3657 __ relocate(relocInfo::internal_pc_type);
aoqi@6880 3658 {
fujie@373 3659 intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 24 + 1 * BytesPerInstWord;
fujie@368 3660 __ patchable_set48(AT, save_pc);
aoqi@1 3661 }
aoqi@1 3662 __ sd(AT, thread, in_bytes(JavaThread::last_Java_pc_offset()));
aoqi@1 3663
aoqi@1 3664 __ call(destination);
aoqi@1 3665 __ delayed()->nop();
aoqi@1 3666
aoqi@1 3667 // Set an oopmap for the call site.
aoqi@1 3668 // We need this not only for callee-saved registers, but also for volatile
aoqi@1 3669 // registers that the compiler might be keeping live across a safepoint.
aoqi@1 3670 oop_maps->add_gc_map( __ offset() - start, map);
aoqi@1 3671 // V0 contains the address we are going to jump to assuming no exception got installed
aoqi@1 3672 __ get_thread(thread);
aoqi@1 3673 __ ld_ptr(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
aoqi@1 3674 // clear last_Java_sp
fujie@9171 3675 __ reset_last_Java_frame(true);
aoqi@1 3676 // check for pending exceptions
aoqi@1 3677 Label pending;
aoqi@1 3678 __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
aoqi@1 3679 __ bne(AT, R0, pending);
aoqi@6880 3680 __ delayed()->nop();
aoqi@6880 3681 // get the returned Method*
aoqi@6880 3682 //FIXME, do mips need this ?
aoqi@1 3683 __ get_vm_result_2(Rmethod, thread); // Refer to OpenJDK8
aoqi@1 3684 __ st_ptr(Rmethod, SP, RegisterSaver::methodOffset() * wordSize);
aoqi@1 3685 __ st_ptr(V0, SP, RegisterSaver::v0Offset() * wordSize);
aoqi@1 3686 RegisterSaver::restore_live_registers(masm);
aoqi@1 3687
aoqi@1 3688 // We are back the the original state on entry and ready to go the callee method.
aoqi@1 3689 __ jr(V0);
aoqi@1 3690 __ delayed()->nop();
aoqi@1 3691 // Pending exception after the safepoint
aoqi@1 3692
aoqi@1 3693 __ bind(pending);
aoqi@1 3694
aoqi@1 3695 RegisterSaver::restore_live_registers(masm);
aoqi@1 3696
aoqi@1 3697 // exception pending => remove activation and forward to exception handler
aoqi@6880 3698 //forward_exception_entry need return address on the stack
aoqi@1 3699 __ push(RA);
aoqi@1 3700 __ get_thread(thread);
aoqi@6880 3701 __ st_ptr(R0, thread, in_bytes(JavaThread::vm_result_offset()));
aoqi@1 3702 __ ld_ptr(V0, thread, in_bytes(Thread::pending_exception_offset()));
aoqi@1 3703 __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
zhaixiang@9144 3704 __ delayed()->nop();
aoqi@9459 3705 //
aoqi@1 3706 // make sure all code is generated
aoqi@6880 3707 masm->flush();
aoqi@1 3708
aoqi@1 3709 RuntimeStub* tmp= RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);
aoqi@1 3710 return tmp;
aoqi@1 3711 }
aoqi@1 3712
aoqi@1 3713 extern "C" int SpinPause() {return 0;}
wanghaomin@9639 3714
wanghaomin@9639 3715
wanghaomin@9639 3716 //------------------------------Montgomery multiplication------------------------
wanghaomin@9639 3717 //
wanghaomin@9639 3718
wanghaomin@9639 3719 // Subtract 0:b from carry:a. Return carry.
wanghaomin@9639 3720 static unsigned long
wanghaomin@9639 3721 sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) {
wanghaomin@9639 3722 long borrow = 0, t = 0;
wanghaomin@9639 3723 unsigned long tmp0, tmp1;
wanghaomin@9639 3724 __asm__ __volatile__ (
wanghaomin@9639 3725 "0: \n"
wanghaomin@9639 3726 "ld %[tmp0], 0(%[a]) \n"
wanghaomin@9639 3727 "ld %[tmp1], 0(%[b]) \n"
wanghaomin@9639 3728 "sltu %[t], %[tmp0], %[borrow] \n"
wanghaomin@9639 3729 "dsubu %[tmp0], %[tmp0], %[borrow] \n"
wanghaomin@9639 3730 "sltu %[borrow], %[tmp0], %[tmp1] \n"
wanghaomin@9639 3731 "or %[borrow], %[borrow], %[t] \n"
wanghaomin@9639 3732 "dsubu %[tmp0], %[tmp0], %[tmp1] \n"
wanghaomin@9639 3733 "sd %[tmp0], 0(%[a]) \n"
wanghaomin@9639 3734 "daddiu %[a], %[a], 8 \n"
wanghaomin@9639 3735 "daddiu %[b], %[b], 8 \n"
wanghaomin@9639 3736 "daddiu %[len], %[len], -1 \n"
wanghaomin@9639 3737 "bgtz %[len], 0b \n"
wanghaomin@9639 3738 "dsubu %[tmp0], %[carry], %[borrow] \n"
wanghaomin@9639 3739 : [len]"+r"(len), [tmp0]"=&r"(tmp0), [tmp1]"=&r"(tmp1), [borrow]"+r"(borrow), [a]"+r"(a), [b]"+r"(b), [t]"+r"(t)
wanghaomin@9639 3740 : [carry]"r"(carry)
wanghaomin@9639 3741 : "memory"
wanghaomin@9639 3742 );
wanghaomin@9639 3743 return tmp0;
wanghaomin@9639 3744 }
wanghaomin@9639 3745
wanghaomin@9639 3746 // Multiply (unsigned) Long A by Long B, accumulating the double-
wanghaomin@9639 3747 // length result into the accumulator formed of t0, t1, and t2.
wanghaomin@9639 3748 inline void MACC(unsigned long A, unsigned long B, unsigned long &t0, unsigned long &t1, unsigned long &t2) {
wanghaomin@9639 3749 unsigned long hi, lo, carry = 0, t = 0;
wanghaomin@9639 3750 __asm__ __volatile__(
wanghaomin@9639 3751 "dmultu %[A], %[B] \n"
wanghaomin@9639 3752 "mfhi %[hi] \n"
wanghaomin@9639 3753 "mflo %[lo] \n"
wanghaomin@9639 3754 "daddu %[t0], %[t0], %[lo] \n"
wanghaomin@9639 3755 "sltu %[carry], %[t0], %[lo] \n"
wanghaomin@9639 3756 "daddu %[t1], %[t1], %[carry] \n"
wanghaomin@9639 3757 "sltu %[t], %[t1], %[carry] \n"
wanghaomin@9639 3758 "daddu %[t1], %[t1], %[hi] \n"
wanghaomin@9639 3759 "sltu %[carry], %[t1], %[hi] \n"
wanghaomin@9639 3760 "or %[carry], %[carry], %[t] \n"
wanghaomin@9639 3761 "daddu %[t2], %[t2], %[carry] \n"
wanghaomin@9639 3762 : [hi]"=&r"(hi), [lo]"=&r"(lo), [t0]"+r"(t0), [t1]"+r"(t1), [t2]"+r"(t2), [carry]"+r"(carry), [t]"+r"(t)
wanghaomin@9639 3763 : [A]"r"(A), [B]"r"(B)
wanghaomin@9639 3764 :
wanghaomin@9639 3765 );
wanghaomin@9639 3766 }
wanghaomin@9639 3767
wanghaomin@9639 3768 // As above, but add twice the double-length result into the
wanghaomin@9639 3769 // accumulator.
wanghaomin@9639 3770 inline void MACC2(unsigned long A, unsigned long B, unsigned long &t0, unsigned long &t1, unsigned long &t2) {
wanghaomin@9639 3771 unsigned long hi, lo, carry = 0, t = 0;
wanghaomin@9639 3772 __asm__ __volatile__(
wanghaomin@9639 3773 "dmultu %[A], %[B] \n"
wanghaomin@9639 3774 "mfhi %[hi] \n"
wanghaomin@9639 3775 "mflo %[lo] \n"
wanghaomin@9639 3776 "daddu %[t0], %[t0], %[lo] \n"
wanghaomin@9639 3777 "sltu %[carry], %[t0], %[lo] \n"
wanghaomin@9639 3778 "daddu %[t1], %[t1], %[carry] \n"
wanghaomin@9639 3779 "sltu %[t], %[t1], %[carry] \n"
wanghaomin@9639 3780 "daddu %[t1], %[t1], %[hi] \n"
wanghaomin@9639 3781 "sltu %[carry], %[t1], %[hi] \n"
wanghaomin@9639 3782 "or %[carry], %[carry], %[t] \n"
wanghaomin@9639 3783 "daddu %[t2], %[t2], %[carry] \n"
wanghaomin@9639 3784 "daddu %[t0], %[t0], %[lo] \n"
wanghaomin@9639 3785 "sltu %[carry], %[t0], %[lo] \n"
wanghaomin@9639 3786 "daddu %[t1], %[t1], %[carry] \n"
wanghaomin@9639 3787 "sltu %[t], %[t1], %[carry] \n"
wanghaomin@9639 3788 "daddu %[t1], %[t1], %[hi] \n"
wanghaomin@9639 3789 "sltu %[carry], %[t1], %[hi] \n"
wanghaomin@9639 3790 "or %[carry], %[carry], %[t] \n"
wanghaomin@9639 3791 "daddu %[t2], %[t2], %[carry] \n"
wanghaomin@9639 3792 : [hi]"=&r"(hi), [lo]"=&r"(lo), [t0]"+r"(t0), [t1]"+r"(t1), [t2]"+r"(t2), [carry]"+r"(carry), [t]"+r"(t)
wanghaomin@9639 3793 : [A]"r"(A), [B]"r"(B)
wanghaomin@9639 3794 :
wanghaomin@9639 3795 );
wanghaomin@9639 3796 }
wanghaomin@9639 3797
wanghaomin@9639 3798 // Fast Montgomery multiplication. The derivation of the algorithm is
wanghaomin@9639 3799 // in A Cryptographic Library for the Motorola DSP56000,
wanghaomin@9639 3800 // Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237.
wanghaomin@9639 3801
wanghaomin@9639 3802 static void __attribute__((noinline))
wanghaomin@9639 3803 montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[],
wanghaomin@9639 3804 unsigned long m[], unsigned long inv, int len) {
wanghaomin@9639 3805 unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
wanghaomin@9639 3806 int i;
wanghaomin@9639 3807
wanghaomin@9639 3808 assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
wanghaomin@9639 3809
wanghaomin@9639 3810 for (i = 0; i < len; i++) {
wanghaomin@9639 3811 int j;
wanghaomin@9639 3812 for (j = 0; j < i; j++) {
wanghaomin@9639 3813 MACC(a[j], b[i-j], t0, t1, t2);
wanghaomin@9639 3814 MACC(m[j], n[i-j], t0, t1, t2);
wanghaomin@9639 3815 }
wanghaomin@9639 3816 MACC(a[i], b[0], t0, t1, t2);
wanghaomin@9639 3817 m[i] = t0 * inv;
wanghaomin@9639 3818 MACC(m[i], n[0], t0, t1, t2);
wanghaomin@9639 3819
wanghaomin@9639 3820 assert(t0 == 0, "broken Montgomery multiply");
wanghaomin@9639 3821
wanghaomin@9639 3822 t0 = t1; t1 = t2; t2 = 0;
wanghaomin@9639 3823 }
wanghaomin@9639 3824
wanghaomin@9639 3825 for (i = len; i < 2*len; i++) {
wanghaomin@9639 3826 int j;
wanghaomin@9639 3827 for (j = i-len+1; j < len; j++) {
wanghaomin@9639 3828 MACC(a[j], b[i-j], t0, t1, t2);
wanghaomin@9639 3829 MACC(m[j], n[i-j], t0, t1, t2);
wanghaomin@9639 3830 }
wanghaomin@9639 3831 m[i-len] = t0;
wanghaomin@9639 3832 t0 = t1; t1 = t2; t2 = 0;
wanghaomin@9639 3833 }
wanghaomin@9639 3834
wanghaomin@9639 3835 while (t0)
wanghaomin@9639 3836 t0 = sub(m, n, t0, len);
wanghaomin@9639 3837 }
wanghaomin@9639 3838
wanghaomin@9639 3839 // Fast Montgomery squaring. This uses asymptotically 25% fewer
wanghaomin@9639 3840 // multiplies so it should be up to 25% faster than Montgomery
wanghaomin@9639 3841 // multiplication. However, its loop control is more complex and it
wanghaomin@9639 3842 // may actually run slower on some machines.
wanghaomin@9639 3843
wanghaomin@9639 3844 static void __attribute__((noinline))
wanghaomin@9639 3845 montgomery_square(unsigned long a[], unsigned long n[],
wanghaomin@9639 3846 unsigned long m[], unsigned long inv, int len) {
wanghaomin@9639 3847 unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
wanghaomin@9639 3848 int i;
wanghaomin@9639 3849
wanghaomin@9639 3850 assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
wanghaomin@9639 3851
wanghaomin@9639 3852 for (i = 0; i < len; i++) {
wanghaomin@9639 3853 int j;
wanghaomin@9639 3854 int end = (i+1)/2;
wanghaomin@9639 3855 for (j = 0; j < end; j++) {
wanghaomin@9639 3856 MACC2(a[j], a[i-j], t0, t1, t2);
wanghaomin@9639 3857 MACC(m[j], n[i-j], t0, t1, t2);
wanghaomin@9639 3858 }
wanghaomin@9639 3859 if ((i & 1) == 0) {
wanghaomin@9639 3860 MACC(a[j], a[j], t0, t1, t2);
wanghaomin@9639 3861 }
wanghaomin@9639 3862 for (; j < i; j++) {
wanghaomin@9639 3863 MACC(m[j], n[i-j], t0, t1, t2);
wanghaomin@9639 3864 }
wanghaomin@9639 3865 m[i] = t0 * inv;
wanghaomin@9639 3866 MACC(m[i], n[0], t0, t1, t2);
wanghaomin@9639 3867
wanghaomin@9639 3868 assert(t0 == 0, "broken Montgomery square");
wanghaomin@9639 3869
wanghaomin@9639 3870 t0 = t1; t1 = t2; t2 = 0;
wanghaomin@9639 3871 }
wanghaomin@9639 3872
wanghaomin@9639 3873 for (i = len; i < 2*len; i++) {
wanghaomin@9639 3874 int start = i-len+1;
wanghaomin@9639 3875 int end = start + (len - start)/2;
wanghaomin@9639 3876 int j;
wanghaomin@9639 3877 for (j = start; j < end; j++) {
wanghaomin@9639 3878 MACC2(a[j], a[i-j], t0, t1, t2);
wanghaomin@9639 3879 MACC(m[j], n[i-j], t0, t1, t2);
wanghaomin@9639 3880 }
wanghaomin@9639 3881 if ((i & 1) == 0) {
wanghaomin@9639 3882 MACC(a[j], a[j], t0, t1, t2);
wanghaomin@9639 3883 }
wanghaomin@9639 3884 for (; j < len; j++) {
wanghaomin@9639 3885 MACC(m[j], n[i-j], t0, t1, t2);
wanghaomin@9639 3886 }
wanghaomin@9639 3887 m[i-len] = t0;
wanghaomin@9639 3888 t0 = t1; t1 = t2; t2 = 0;
wanghaomin@9639 3889 }
wanghaomin@9639 3890
wanghaomin@9639 3891 while (t0)
wanghaomin@9639 3892 t0 = sub(m, n, t0, len);
wanghaomin@9639 3893 }
wanghaomin@9639 3894
wanghaomin@9639 3895 // Swap words in a longword.
wanghaomin@9639 3896 static unsigned long swap(unsigned long x) {
wanghaomin@9639 3897 return (x << 32) | (x >> 32);
wanghaomin@9639 3898 }
wanghaomin@9639 3899
wanghaomin@9639 3900 // Copy len longwords from s to d, word-swapping as we go. The
wanghaomin@9639 3901 // destination array is reversed.
wanghaomin@9639 3902 static void reverse_words(unsigned long *s, unsigned long *d, int len) {
wanghaomin@9639 3903 d += len;
wanghaomin@9639 3904 while(len-- > 0) {
wanghaomin@9639 3905 d--;
wanghaomin@9639 3906 *d = swap(*s);
wanghaomin@9639 3907 s++;
wanghaomin@9639 3908 }
wanghaomin@9639 3909 }
wanghaomin@9639 3910
wanghaomin@9639 3911 // The threshold at which squaring is advantageous was determined
wanghaomin@9639 3912 // experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz.
wanghaomin@9639 3913 // Doesn't seem to be relevant for MIPS64 so we use the same value.
wanghaomin@9639 3914 #define MONTGOMERY_SQUARING_THRESHOLD 64
wanghaomin@9639 3915
wanghaomin@9639 3916 void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints,
wanghaomin@9639 3917 jint len, jlong inv,
wanghaomin@9639 3918 jint *m_ints) {
wanghaomin@9639 3919 assert(len % 2 == 0, "array length in montgomery_multiply must be even");
wanghaomin@9639 3920 int longwords = len/2;
wanghaomin@9639 3921
wanghaomin@9639 3922 // Make very sure we don't use so much space that the stack might
wanghaomin@9639 3923 // overflow. 512 jints corresponds to an 16384-bit integer and
wanghaomin@9639 3924 // will use here a total of 8k bytes of stack space.
wanghaomin@9639 3925 int total_allocation = longwords * sizeof (unsigned long) * 4;
wanghaomin@9639 3926 guarantee(total_allocation <= 8192, "must be");
wanghaomin@9639 3927 unsigned long *scratch = (unsigned long *)alloca(total_allocation);
wanghaomin@9639 3928
wanghaomin@9639 3929 // Local scratch arrays
wanghaomin@9639 3930 unsigned long
wanghaomin@9639 3931 *a = scratch + 0 * longwords,
wanghaomin@9639 3932 *b = scratch + 1 * longwords,
wanghaomin@9639 3933 *n = scratch + 2 * longwords,
wanghaomin@9639 3934 *m = scratch + 3 * longwords;
wanghaomin@9639 3935
wanghaomin@9639 3936 reverse_words((unsigned long *)a_ints, a, longwords);
wanghaomin@9639 3937 reverse_words((unsigned long *)b_ints, b, longwords);
wanghaomin@9639 3938 reverse_words((unsigned long *)n_ints, n, longwords);
wanghaomin@9639 3939
wanghaomin@9639 3940 ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords);
wanghaomin@9639 3941
wanghaomin@9639 3942 reverse_words(m, (unsigned long *)m_ints, longwords);
wanghaomin@9639 3943 }
wanghaomin@9639 3944
wanghaomin@9639 3945 void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints,
wanghaomin@9639 3946 jint len, jlong inv,
wanghaomin@9639 3947 jint *m_ints) {
wanghaomin@9639 3948 assert(len % 2 == 0, "array length in montgomery_square must be even");
wanghaomin@9639 3949 int longwords = len/2;
wanghaomin@9639 3950
wanghaomin@9639 3951 // Make very sure we don't use so much space that the stack might
wanghaomin@9639 3952 // overflow. 512 jints corresponds to an 16384-bit integer and
wanghaomin@9639 3953 // will use here a total of 6k bytes of stack space.
wanghaomin@9639 3954 int total_allocation = longwords * sizeof (unsigned long) * 3;
wanghaomin@9639 3955 guarantee(total_allocation <= 8192, "must be");
wanghaomin@9639 3956 unsigned long *scratch = (unsigned long *)alloca(total_allocation);
wanghaomin@9639 3957
wanghaomin@9639 3958 // Local scratch arrays
wanghaomin@9639 3959 unsigned long
wanghaomin@9639 3960 *a = scratch + 0 * longwords,
wanghaomin@9639 3961 *n = scratch + 1 * longwords,
wanghaomin@9639 3962 *m = scratch + 2 * longwords;
wanghaomin@9639 3963
wanghaomin@9639 3964 reverse_words((unsigned long *)a_ints, a, longwords);
wanghaomin@9639 3965 reverse_words((unsigned long *)n_ints, n, longwords);
wanghaomin@9639 3966
wanghaomin@9639 3967 if (len >= MONTGOMERY_SQUARING_THRESHOLD) {
wanghaomin@9639 3968 ::montgomery_square(a, n, m, (unsigned long)inv, longwords);
wanghaomin@9639 3969 } else {
wanghaomin@9639 3970 ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords);
wanghaomin@9639 3971 }
wanghaomin@9639 3972
wanghaomin@9639 3973 reverse_words(m, (unsigned long *)m_ints, longwords);
wanghaomin@9639 3974 }

mercurial