Mon, 30 May 2016 02:01:38 -0400
[C2] Rewrite generate_disjoint_short_copy.
Eliminated unaligned access and Optimized copy algorithm.
xml.transform improved by 50%, total GEO improved by 13%.
Copy Algorithm:
Generate stub for disjoint short copy. If "aligned" is true, the
"from" and "to" addresses are assumed to be heapword aligned.
Arguments for generated stub:
from: A0
to: A1
elm.count: A2 treated as signed
one element: 2 bytes
Strategy for aligned==true:
If length <= 9:
1. copy 1 elements at a time (l_5)
If length > 9:
1. copy 4 elements at a time until less than 4 elements are left (l_7)
2. copy 2 elements at a time until less than 2 elements are left (l_6)
3. copy last element if one was left in step 2. (l_1)
Strategy for aligned==false:
If length <= 9: same as aligned==true case
If length > 9:
1. continue with step 7. if the alignment of from and to mod 4
is different.
2. align from and to to 4 bytes by copying 1 element if necessary
3. at l_2 from and to are 4 byte aligned; continue with
6. if they cannot be aligned to 8 bytes because they have
got different alignment mod 8.
4. at this point we know that both, from and to, have the same
alignment mod 8, now copy one element if necessary to get
8 byte alignment of from and to.
5. copy 4 elements at a time until less than 4 elements are
left; depending on step 3. all load/stores are aligned.
6. copy 2 elements at a time until less than 2 elements are
left. (l_6)
7. copy 1 element at a time. (l_5)
8. copy last element if one was left in step 6. (l_1)
TODO:
1. use loongson 128-bit load/store
2. use loop unrolling optimization when len is big enough, for example if
len > 0x2000:
__ bind(l_x);
__ ld(AT, tmp1, 0);
__ ld(tmp, tmp1, 8);
__ sd(AT, tmp2, 0);
__ sd(tmp, tmp2, 8);
__ ld(AT, tmp1, 16);
__ ld(tmp, tmp1, 24);
__ sd(AT, tmp2, 16);
__ sd(tmp, tmp2, 24);
__ daddi(tmp1, tmp1, 32);
__ daddi(tmp2, tmp2, 32);
__ daddi(tmp3, tmp3, -16);
__ daddi(AT, tmp3, -16);
__ bgez(AT, l_x);
__ delayed()->nop();
aoqi@1 | 1 | /* |
aoqi@1 | 2 | * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. |
aoqi@1 | 3 | * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. |
aoqi@1 | 4 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
aoqi@1 | 5 | * |
aoqi@1 | 6 | * This code is free software; you can redistribute it and/or modify it |
aoqi@1 | 7 | * under the terms of the GNU General Public License version 2 only, as |
aoqi@1 | 8 | * published by the Free Software Foundation. |
aoqi@1 | 9 | * |
aoqi@1 | 10 | * This code is distributed in the hope that it will be useful, but WITHOUT |
aoqi@1 | 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
aoqi@1 | 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
aoqi@1 | 13 | * version 2 for more details (a copy is included in the LICENSE file that |
aoqi@1 | 14 | * accompanied this code). |
aoqi@1 | 15 | * |
aoqi@1 | 16 | * You should have received a copy of the GNU General Public License version |
aoqi@1 | 17 | * 2 along with this work; if not, write to the Free Software Foundation, |
aoqi@1 | 18 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
aoqi@1 | 19 | * |
aoqi@1 | 20 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
aoqi@1 | 21 | * or visit www.oracle.com if you need additional information or have any |
aoqi@1 | 22 | * questions. |
aoqi@1 | 23 | * |
aoqi@1 | 24 | */ |
aoqi@1 | 25 | |
aoqi@1 | 26 | #include "precompiled.hpp" |
aoqi@1 | 27 | #include "asm/macroAssembler.hpp" |
aoqi@1 | 28 | #include "asm/macroAssembler.inline.hpp" |
aoqi@1 | 29 | #include "interpreter/interpreter.hpp" |
aoqi@1 | 30 | #include "nativeInst_mips.hpp" |
aoqi@1 | 31 | #include "oops/instanceOop.hpp" |
aoqi@1 | 32 | #include "oops/method.hpp" |
aoqi@1 | 33 | #include "oops/objArrayKlass.hpp" |
aoqi@1 | 34 | #include "oops/oop.inline.hpp" |
aoqi@1 | 35 | #include "prims/methodHandles.hpp" |
aoqi@1 | 36 | #include "runtime/frame.inline.hpp" |
aoqi@1 | 37 | #include "runtime/handles.inline.hpp" |
aoqi@1 | 38 | #include "runtime/sharedRuntime.hpp" |
aoqi@1 | 39 | #include "runtime/stubCodeGenerator.hpp" |
aoqi@1 | 40 | #include "runtime/stubRoutines.hpp" |
aoqi@1 | 41 | #include "runtime/thread.inline.hpp" |
aoqi@1 | 42 | #include "utilities/top.hpp" |
aoqi@1 | 43 | #ifdef COMPILER2 |
aoqi@1 | 44 | #include "opto/runtime.hpp" |
aoqi@1 | 45 | #endif |
aoqi@1 | 46 | |
aoqi@1 | 47 | |
aoqi@1 | 48 | // Declaration and definition of StubGenerator (no .hpp file). |
aoqi@1 | 49 | // For a more detailed description of the stub routine structure |
aoqi@1 | 50 | // see the comment in stubRoutines.hpp |
aoqi@1 | 51 | |
aoqi@1 | 52 | #define __ _masm-> |
aoqi@1 | 53 | //#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8) |
aoqi@1 | 54 | //#define a__ ((Assembler*)_masm)-> |
aoqi@1 | 55 | |
aoqi@1 | 56 | //#ifdef PRODUCT |
aoqi@1 | 57 | //#define BLOCK_COMMENT(str) /* nothing */ |
aoqi@1 | 58 | //#else |
aoqi@1 | 59 | //#define BLOCK_COMMENT(str) __ block_comment(str) |
aoqi@1 | 60 | //#endif |
aoqi@1 | 61 | |
aoqi@1 | 62 | //#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") |
aoqi@1 | 63 | const int MXCSR_MASK = 0xFFC0; // Mask out any pending exceptions |
aoqi@1 | 64 | |
aoqi@1 | 65 | // Stub Code definitions |
aoqi@1 | 66 | |
aoqi@1 | 67 | static address handle_unsafe_access() { |
aoqi@1 | 68 | JavaThread* thread = JavaThread::current(); |
aoqi@1 | 69 | address pc = thread->saved_exception_pc(); |
aoqi@1 | 70 | // pc is the instruction which we must emulate |
aoqi@1 | 71 | // doing a no-op is fine: return garbage from the load |
aoqi@1 | 72 | // therefore, compute npc |
aoqi@1 | 73 | //address npc = Assembler::locate_next_instruction(pc); |
aoqi@1 | 74 | address npc = (address)((unsigned long)pc + sizeof(unsigned long)); |
aoqi@1 | 75 | |
aoqi@1 | 76 | // request an async exception |
aoqi@1 | 77 | thread->set_pending_unsafe_access_error(); |
aoqi@1 | 78 | |
aoqi@1 | 79 | // return address of next instruction to execute |
aoqi@1 | 80 | return npc; |
aoqi@1 | 81 | } |
aoqi@1 | 82 | |
aoqi@1 | 83 | class StubGenerator: public StubCodeGenerator { |
aoqi@1 | 84 | private: |
aoqi@1 | 85 | |
aoqi@1 | 86 | // ABI mips n64 |
aoqi@1 | 87 | // This fig is not MIPS ABI. It is call Java from C ABI. |
aoqi@1 | 88 | // Call stubs are used to call Java from C |
aoqi@1 | 89 | // |
aoqi@1 | 90 | // [ return_from_Java ] |
aoqi@1 | 91 | // [ argument word n-1 ] <--- sp |
aoqi@1 | 92 | // ... |
aoqi@1 | 93 | // [ argument word 0 ] |
aoqi@1 | 94 | // ... |
aoqi@1 | 95 | //-10 [ S6 ] |
aoqi@1 | 96 | // -9 [ S5 ] |
aoqi@1 | 97 | // -8 [ S4 ] |
aoqi@1 | 98 | // -7 [ S3 ] |
aoqi@1 | 99 | // -6 [ S0 ] |
aoqi@1 | 100 | // -5 [ TSR(S2) ] |
aoqi@1 | 101 | // -4 [ LVP(S7) ] |
aoqi@1 | 102 | // -3 [ BCP(S1) ] |
aoqi@1 | 103 | // -2 [ saved fp ] <--- fp_after_call |
aoqi@1 | 104 | // -1 [ return address ] |
aoqi@1 | 105 | // 0 [ ptr. to call wrapper ] <--- a0 (old sp -->)fp |
aoqi@1 | 106 | // 1 [ result ] <--- a1 |
aoqi@1 | 107 | // 2 [ result_type ] <--- a2 |
aoqi@1 | 108 | // 3 [ method ] <--- a3 |
aoqi@1 | 109 | // 4 [ entry_point ] <--- a4 |
aoqi@1 | 110 | // 5 [ parameters ] <--- a5 |
aoqi@1 | 111 | // 6 [ parameter_size ] <--- a6 |
aoqi@1 | 112 | // 7 [ thread ] <--- a7 |
aoqi@1 | 113 | |
aoqi@1 | 114 | // |
aoqi@1 | 115 | // _LP64: n64 does not save paras in sp. |
aoqi@1 | 116 | // |
aoqi@1 | 117 | // [ return_from_Java ] |
aoqi@1 | 118 | // [ argument word n-1 ] <--- sp |
aoqi@1 | 119 | // ... |
aoqi@1 | 120 | // [ argument word 0 ] |
aoqi@1 | 121 | // ... |
aoqi@1 | 122 | //-14 [ thread ] |
aoqi@1 | 123 | //-13 [ result_type ] <--- a2 |
aoqi@1 | 124 | //-12 [ result ] <--- a1 |
aoqi@1 | 125 | //-11 [ ptr. to call wrapper ] <--- a0 |
aoqi@1 | 126 | //-10 [ S6 ] |
aoqi@1 | 127 | // -9 [ S5 ] |
aoqi@1 | 128 | // -8 [ S4 ] |
aoqi@1 | 129 | // -7 [ S3 ] |
aoqi@1 | 130 | // -6 [ S0 ] |
aoqi@1 | 131 | // -5 [ TSR(S2) ] |
aoqi@1 | 132 | // -4 [ LVP(S7) ] |
aoqi@1 | 133 | // -3 [ BCP(S1) ] |
aoqi@1 | 134 | // -2 [ saved fp ] <--- fp_after_call |
aoqi@1 | 135 | // -1 [ return address ] |
aoqi@1 | 136 | // 0 [ ] <--- old sp |
aoqi@1 | 137 | /* |
aoqi@1 | 138 | * 2014/01/16 Fu: Find a right place in the call_stub for GP. |
aoqi@1 | 139 | * GP will point to the starting point of Interpreter::dispatch_table(itos). |
aoqi@1 | 140 | * It should be saved/restored before/after Java calls. |
aoqi@1 | 141 | * |
aoqi@1 | 142 | */ |
aoqi@1 | 143 | enum call_stub_layout { |
aoqi@1 | 144 | RA_off = -1, |
aoqi@1 | 145 | FP_off = -2, |
aoqi@1 | 146 | BCP_off = -3, |
aoqi@1 | 147 | LVP_off = -4, |
aoqi@1 | 148 | TSR_off = -5, |
aoqi@1 | 149 | S1_off = -6, |
aoqi@1 | 150 | S3_off = -7, |
aoqi@1 | 151 | S4_off = -8, |
aoqi@1 | 152 | S5_off = -9, |
aoqi@1 | 153 | S6_off = -10, |
aoqi@1 | 154 | result_off = -11, |
aoqi@1 | 155 | result_type_off = -12, |
aoqi@1 | 156 | thread_off = -13, |
aoqi@1 | 157 | total_off = thread_off - 3, |
aoqi@1 | 158 | GP_off = -16, |
aoqi@1 | 159 | }; |
aoqi@1 | 160 | |
aoqi@1 | 161 | address generate_call_stub(address& return_address) { |
aoqi@1 | 162 | |
aoqi@1 | 163 | StubCodeMark mark(this, "StubRoutines", "call_stub"); |
aoqi@1 | 164 | address start = __ pc(); |
aoqi@1 | 165 | |
aoqi@1 | 166 | // same as in generate_catch_exception()! |
aoqi@1 | 167 | |
aoqi@1 | 168 | // stub code |
aoqi@1 | 169 | // save ra and fp |
aoqi@1 | 170 | __ sd(RA, SP, RA_off * wordSize); |
aoqi@1 | 171 | __ sd(FP, SP, FP_off * wordSize); |
aoqi@1 | 172 | __ sd(BCP, SP, BCP_off * wordSize); |
aoqi@1 | 173 | __ sd(LVP, SP, LVP_off * wordSize); |
aoqi@1 | 174 | __ sd(GP, SP, GP_off * wordSize); |
aoqi@1 | 175 | __ sd(TSR, SP, TSR_off * wordSize); |
aoqi@1 | 176 | __ sd(S1, SP, S1_off * wordSize); |
aoqi@1 | 177 | __ sd(S3, SP, S3_off * wordSize); |
aoqi@1 | 178 | __ sd(S4, SP, S4_off * wordSize); |
aoqi@1 | 179 | __ sd(S5, SP, S5_off * wordSize); |
aoqi@1 | 180 | __ sd(S6, SP, S6_off * wordSize); |
aoqi@1 | 181 | |
aoqi@1 | 182 | |
aoqi@1 | 183 | __ li48(GP, (long)Interpreter::dispatch_table(itos)); |
aoqi@1 | 184 | |
aoqi@1 | 185 | // I think 14 is the max gap between argument and callee saved register |
aoqi@1 | 186 | __ daddi(FP, SP, (-2) * wordSize); |
aoqi@1 | 187 | __ daddi(SP, SP, total_off * wordSize); |
aoqi@1 | 188 | //FIXME, aoqi. find a suitable place to save A1 & A2. |
aoqi@1 | 189 | /* |
aoqi@1 | 190 | __ sd(A0, FP, frame::entry_frame_call_wrapper_offset * wordSize); |
aoqi@1 | 191 | __ sd(A1, FP, 3 * wordSize); |
aoqi@1 | 192 | __ sd(A2, FP, 4 * wordSize); |
aoqi@1 | 193 | __ sd(A3, FP, 5 * wordSize); |
aoqi@1 | 194 | __ sd(A4, FP, 6 * wordSize); |
aoqi@1 | 195 | __ sd(A5, FP, 7 * wordSize); |
aoqi@1 | 196 | __ sd(A6, FP, 8 * wordSize); |
aoqi@1 | 197 | __ sd(A7, FP, 9 * wordSize); |
aoqi@1 | 198 | */ |
aoqi@1 | 199 | __ sd(A0, FP, frame::entry_frame_call_wrapper_offset * wordSize); |
aoqi@1 | 200 | __ sd(A1, FP, result_off * wordSize); |
aoqi@1 | 201 | __ sd(A2, FP, result_type_off * wordSize); |
aoqi@1 | 202 | __ sd(A7, FP, thread_off * wordSize); |
aoqi@1 | 203 | |
aoqi@1 | 204 | #ifdef OPT_THREAD |
aoqi@1 | 205 | //__ get_thread(TREG); |
aoqi@1 | 206 | __ move(TREG, A7); |
aoqi@1 | 207 | |
aoqi@1 | 208 | //__ ld(TREG, FP, thread_off * wordSize); |
aoqi@1 | 209 | #endif |
aoqi@1 | 210 | //add for compressedoops |
aoqi@1 | 211 | __ reinit_heapbase(); |
aoqi@1 | 212 | |
aoqi@1 | 213 | #ifdef ASSERT |
aoqi@1 | 214 | // make sure we have no pending exceptions |
aoqi@1 | 215 | { |
aoqi@1 | 216 | Label L; |
aoqi@1 | 217 | __ ld(AT, A7, in_bytes(Thread::pending_exception_offset())); |
aoqi@1 | 218 | __ beq(AT, R0, L); |
aoqi@1 | 219 | __ delayed()->nop(); |
aoqi@1 | 220 | /* FIXME: I do not know how to realize stop in mips arch, do it in the future */ |
aoqi@1 | 221 | __ stop("StubRoutines::call_stub: entered with pending exception"); |
aoqi@1 | 222 | __ bind(L); |
aoqi@1 | 223 | } |
aoqi@1 | 224 | #endif |
aoqi@1 | 225 | |
aoqi@1 | 226 | // pass parameters if any |
aoqi@1 | 227 | // A5: parameter |
aoqi@1 | 228 | // A6: parameter_size |
aoqi@1 | 229 | // T0: parameter_size_tmp(--) |
aoqi@1 | 230 | // T2: offset(++) |
aoqi@1 | 231 | // T3: tmp |
aoqi@1 | 232 | Label parameters_done; |
aoqi@1 | 233 | // judge if the parameter_size equals 0 |
aoqi@1 | 234 | __ beq(A6, R0, parameters_done); |
aoqi@1 | 235 | __ delayed()->nop(); |
aoqi@1 | 236 | __ dsll(AT, A6, Interpreter::logStackElementSize); |
aoqi@1 | 237 | __ dsub(SP, SP, AT); |
aoqi@1 | 238 | __ move(AT, -StackAlignmentInBytes); |
aoqi@1 | 239 | __ andr(SP, SP , AT); |
aoqi@1 | 240 | // Copy Java parameters in reverse order (receiver last) |
aoqi@1 | 241 | // Note that the argument order is inverted in the process |
aoqi@1 | 242 | // source is edx[ecx: N-1..0] |
aoqi@1 | 243 | // dest is esp[ebx: 0..N-1] |
aoqi@1 | 244 | Label loop; |
aoqi@1 | 245 | __ move(T0, A6); |
aoqi@1 | 246 | __ move(T2, R0); |
aoqi@1 | 247 | __ bind(loop); |
aoqi@1 | 248 | |
aoqi@1 | 249 | // get parameter |
aoqi@1 | 250 | __ dsll(T3, T0, LogBytesPerWord); |
aoqi@1 | 251 | __ dadd(T3, T3, A5); |
aoqi@1 | 252 | __ ld(AT, T3, -wordSize); |
aoqi@1 | 253 | __ dsll(T3, T2, LogBytesPerWord); |
aoqi@1 | 254 | __ dadd(T3, T3, SP); |
aoqi@1 | 255 | __ sd(AT, T3, Interpreter::expr_offset_in_bytes(0)); |
aoqi@1 | 256 | __ daddi(T2, T2, 1); |
aoqi@1 | 257 | __ daddi(T0, T0, -1); |
aoqi@1 | 258 | __ bne(T0, R0, loop); |
aoqi@1 | 259 | __ delayed()->nop(); |
aoqi@1 | 260 | // advance to next parameter |
aoqi@1 | 261 | |
aoqi@1 | 262 | // call Java function |
aoqi@1 | 263 | __ bind(parameters_done); |
aoqi@1 | 264 | |
aoqi@1 | 265 | // receiver in V0, methodOop in Rmethod |
aoqi@1 | 266 | |
aoqi@1 | 267 | __ move(Rmethod, A3); |
aoqi@1 | 268 | __ move(Rsender, SP); //set sender sp |
aoqi@1 | 269 | __ jalr(A4); |
aoqi@1 | 270 | __ delayed()->nop(); |
aoqi@1 | 271 | return_address = __ pc(); |
aoqi@1 | 272 | |
aoqi@1 | 273 | Label common_return; |
aoqi@1 | 274 | __ bind(common_return); |
aoqi@1 | 275 | |
aoqi@1 | 276 | // store result depending on type |
aoqi@1 | 277 | // (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT) |
aoqi@1 | 278 | __ ld(T0, FP, result_off * wordSize); // result --> T0 |
aoqi@1 | 279 | Label is_long, is_float, is_double, exit; |
aoqi@1 | 280 | __ ld(T2, FP, result_type_off * wordSize); // result_type --> T2 |
aoqi@1 | 281 | __ daddi(T3, T2, (-1) * T_LONG); |
aoqi@1 | 282 | __ beq(T3, R0, is_long); |
aoqi@1 | 283 | __ delayed()->daddi(T3, T2, (-1) * T_FLOAT); |
aoqi@1 | 284 | __ beq(T3, R0, is_float); |
aoqi@1 | 285 | __ delayed()->daddi(T3, T2, (-1) * T_DOUBLE); |
aoqi@1 | 286 | __ beq(T3, R0, is_double); |
aoqi@1 | 287 | __ delayed()->nop(); |
aoqi@1 | 288 | |
aoqi@1 | 289 | // handle T_INT case |
aoqi@1 | 290 | __ sd(V0, T0, 0 * wordSize); |
aoqi@1 | 291 | __ bind(exit); |
aoqi@1 | 292 | |
aoqi@1 | 293 | // restore |
aoqi@1 | 294 | __ daddi(SP, FP, 2 * wordSize ); |
aoqi@1 | 295 | __ ld(RA, SP, RA_off * wordSize); |
aoqi@1 | 296 | __ ld(FP, SP, FP_off * wordSize); |
aoqi@1 | 297 | __ ld(BCP, SP, BCP_off * wordSize); |
aoqi@1 | 298 | __ ld(LVP, SP, LVP_off * wordSize); |
aoqi@1 | 299 | __ ld(GP, SP, GP_off * wordSize); |
aoqi@1 | 300 | __ ld(TSR, SP, TSR_off * wordSize); |
aoqi@1 | 301 | |
aoqi@1 | 302 | __ ld(S1, SP, S1_off * wordSize); |
aoqi@1 | 303 | __ ld(S3, SP, S3_off * wordSize); |
aoqi@1 | 304 | __ ld(S4, SP, S4_off * wordSize); |
aoqi@1 | 305 | __ ld(S5, SP, S5_off * wordSize); |
aoqi@1 | 306 | __ ld(S6, SP, S6_off * wordSize); |
aoqi@1 | 307 | |
aoqi@1 | 308 | // return |
aoqi@1 | 309 | __ jr(RA); |
aoqi@1 | 310 | __ delayed()->nop(); |
aoqi@1 | 311 | |
aoqi@1 | 312 | // handle return types different from T_INT |
aoqi@1 | 313 | __ bind(is_long); |
aoqi@1 | 314 | __ sd(V0, T0, 0 * wordSize); |
aoqi@1 | 315 | //__ sd(V1, T0, 1 * wordSize); |
aoqi@1 | 316 | __ sd(R0, T0, 1 * wordSize); |
aoqi@1 | 317 | __ b(exit); |
aoqi@1 | 318 | __ delayed()->nop(); |
aoqi@1 | 319 | |
aoqi@1 | 320 | __ bind(is_float); |
aoqi@1 | 321 | __ swc1(F0, T0, 0 * wordSize); |
aoqi@1 | 322 | __ b(exit); |
aoqi@1 | 323 | __ delayed()->nop(); |
aoqi@1 | 324 | |
aoqi@1 | 325 | __ bind(is_double); |
aoqi@1 | 326 | __ sdc1(F0, T0, 0 * wordSize); |
aoqi@1 | 327 | //__ sdc1(F1, T0, 1 * wordSize); |
aoqi@1 | 328 | __ sd(R0, T0, 1 * wordSize); |
aoqi@1 | 329 | __ b(exit); |
aoqi@1 | 330 | __ delayed()->nop(); |
aoqi@1 | 331 | //FIXME, 1.6 mips version add operation of fpu here |
aoqi@1 | 332 | StubRoutines::gs2::set_call_stub_compiled_return(__ pc()); |
aoqi@1 | 333 | __ b(common_return); |
aoqi@1 | 334 | __ delayed()->nop(); |
aoqi@1 | 335 | return start; |
aoqi@1 | 336 | } |
aoqi@1 | 337 | |
aoqi@1 | 338 | // Return point for a Java call if there's an exception thrown in |
aoqi@1 | 339 | // Java code. The exception is caught and transformed into a |
aoqi@1 | 340 | // pending exception stored in JavaThread that can be tested from |
aoqi@1 | 341 | // within the VM. |
aoqi@1 | 342 | // |
aoqi@1 | 343 | // Note: Usually the parameters are removed by the callee. In case |
aoqi@1 | 344 | // of an exception crossing an activation frame boundary, that is |
aoqi@1 | 345 | // not the case if the callee is compiled code => need to setup the |
aoqi@1 | 346 | // rsp. |
aoqi@1 | 347 | // |
aoqi@1 | 348 | // rax: exception oop |
aoqi@1 | 349 | |
aoqi@1 | 350 | address generate_catch_exception() { |
aoqi@1 | 351 | StubCodeMark mark(this, "StubRoutines", "catch_exception"); |
aoqi@1 | 352 | address start = __ pc(); |
aoqi@1 | 353 | |
aoqi@1 | 354 | Register thread = TREG; |
aoqi@1 | 355 | |
aoqi@1 | 356 | // get thread directly |
aoqi@1 | 357 | #ifndef OPT_THREAD |
aoqi@1 | 358 | __ ld(thread, FP, thread_off * wordSize); |
aoqi@1 | 359 | #endif |
aoqi@1 | 360 | |
aoqi@1 | 361 | #ifdef ASSERT |
aoqi@1 | 362 | // verify that threads correspond |
aoqi@1 | 363 | { Label L; |
aoqi@1 | 364 | __ get_thread(T8); |
aoqi@1 | 365 | __ beq(T8, thread, L); |
aoqi@1 | 366 | __ delayed()->nop(); |
aoqi@1 | 367 | __ stop("StubRoutines::catch_exception: threads must correspond"); |
aoqi@1 | 368 | __ bind(L); |
aoqi@1 | 369 | } |
aoqi@1 | 370 | #endif |
aoqi@1 | 371 | // set pending exception |
aoqi@1 | 372 | __ verify_oop(V0); |
aoqi@1 | 373 | __ sd(V0, thread, in_bytes(Thread::pending_exception_offset())); |
aoqi@1 | 374 | __ li(AT, (long)__FILE__); |
aoqi@1 | 375 | __ sd(AT, thread, in_bytes(Thread::exception_file_offset ())); |
aoqi@1 | 376 | __ li(AT, (long)__LINE__); |
aoqi@1 | 377 | __ sd(AT, thread, in_bytes(Thread::exception_line_offset ())); |
aoqi@1 | 378 | |
aoqi@1 | 379 | // complete return to VM |
aoqi@1 | 380 | assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before"); |
aoqi@1 | 381 | __ jmp(StubRoutines::_call_stub_return_address, relocInfo::none); |
aoqi@1 | 382 | __ delayed()->nop(); |
aoqi@1 | 383 | |
aoqi@1 | 384 | return start; |
aoqi@1 | 385 | } |
aoqi@1 | 386 | |
aoqi@1 | 387 | // Continuation point for runtime calls returning with a pending |
aoqi@1 | 388 | // exception. The pending exception check happened in the runtime |
aoqi@1 | 389 | // or native call stub. The pending exception in Thread is |
aoqi@1 | 390 | // converted into a Java-level exception. |
aoqi@1 | 391 | // |
aoqi@1 | 392 | // Contract with Java-level exception handlers: |
aoqi@1 | 393 | // rax: exception |
aoqi@1 | 394 | // rdx: throwing pc |
aoqi@1 | 395 | // |
aoqi@1 | 396 | // NOTE: At entry of this stub, exception-pc must be on stack !! |
aoqi@1 | 397 | |
aoqi@1 | 398 | address generate_forward_exception() { |
aoqi@1 | 399 | StubCodeMark mark(this, "StubRoutines", "forward exception"); |
aoqi@1 | 400 | //Register thread = TREG; |
aoqi@1 | 401 | Register thread = TREG; |
aoqi@1 | 402 | address start = __ pc(); |
aoqi@1 | 403 | |
aoqi@1 | 404 | // Upon entry, the sp points to the return address returning into Java |
aoqi@1 | 405 | // (interpreted or compiled) code; i.e., the return address becomes the |
aoqi@1 | 406 | // throwing pc. |
aoqi@1 | 407 | // |
aoqi@1 | 408 | // Arguments pushed before the runtime call are still on the stack but |
aoqi@1 | 409 | // the exception handler will reset the stack pointer -> ignore them. |
aoqi@1 | 410 | // A potential result in registers can be ignored as well. |
aoqi@1 | 411 | |
aoqi@1 | 412 | #ifdef ASSERT |
aoqi@1 | 413 | // make sure this code is only executed if there is a pending exception |
aoqi@1 | 414 | #ifndef OPT_THREAD |
aoqi@1 | 415 | __ get_thread(thread); |
aoqi@1 | 416 | #endif |
aoqi@1 | 417 | { Label L; |
aoqi@1 | 418 | __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); |
aoqi@1 | 419 | __ bne(AT, R0, L); |
aoqi@1 | 420 | __ delayed()->nop(); |
aoqi@1 | 421 | __ stop("StubRoutines::forward exception: no pending exception (1)"); |
aoqi@1 | 422 | __ bind(L); |
aoqi@1 | 423 | } |
aoqi@1 | 424 | #endif |
aoqi@1 | 425 | |
aoqi@1 | 426 | // compute exception handler into T9 |
aoqi@1 | 427 | __ ld(A1, SP, 0); |
aoqi@1 | 428 | __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1); |
aoqi@1 | 429 | __ move(T9, V0); |
aoqi@1 | 430 | __ pop(V1); |
aoqi@1 | 431 | |
aoqi@1 | 432 | #ifndef OPT_THREAD |
aoqi@1 | 433 | __ get_thread(thread); |
aoqi@1 | 434 | #endif |
aoqi@1 | 435 | __ ld(V0, thread, in_bytes(Thread::pending_exception_offset())); |
aoqi@1 | 436 | __ sd(R0, thread, in_bytes(Thread::pending_exception_offset())); |
aoqi@1 | 437 | |
aoqi@1 | 438 | #ifdef ASSERT |
aoqi@1 | 439 | // make sure exception is set |
aoqi@1 | 440 | { Label L; |
aoqi@1 | 441 | __ bne(V0, R0, L); |
aoqi@1 | 442 | __ delayed()->nop(); |
aoqi@1 | 443 | __ stop("StubRoutines::forward exception: no pending exception (2)"); |
aoqi@1 | 444 | __ bind(L); |
aoqi@1 | 445 | } |
aoqi@1 | 446 | #endif |
aoqi@1 | 447 | |
aoqi@1 | 448 | // continue at exception handler (return address removed) |
aoqi@1 | 449 | // V0: exception |
aoqi@1 | 450 | // T9: exception handler |
aoqi@1 | 451 | // V1: throwing pc |
aoqi@1 | 452 | __ verify_oop(V0); |
aoqi@1 | 453 | __ jr(T9); |
aoqi@1 | 454 | __ delayed()->nop(); |
aoqi@1 | 455 | |
aoqi@1 | 456 | return start; |
aoqi@1 | 457 | } |
aoqi@1 | 458 | |
aoqi@1 | 459 | // Support for intptr_t get_previous_fp() |
aoqi@1 | 460 | // |
aoqi@1 | 461 | // This routine is used to find the previous frame pointer for the |
aoqi@1 | 462 | // caller (current_frame_guess). This is used as part of debugging |
aoqi@1 | 463 | // ps() is seemingly lost trying to find frames. |
aoqi@1 | 464 | // This code assumes that caller current_frame_guess) has a frame. |
aoqi@1 | 465 | address generate_get_previous_fp() { |
aoqi@1 | 466 | StubCodeMark mark(this, "StubRoutines", "get_previous_fp"); |
aoqi@1 | 467 | const Address old_fp (FP, 0); |
aoqi@1 | 468 | const Address older_fp (V0, 0); |
aoqi@1 | 469 | address start = __ pc(); |
aoqi@1 | 470 | __ enter(); |
aoqi@1 | 471 | __ lw(V0, old_fp); // callers fp |
aoqi@1 | 472 | __ lw(V0, older_fp); // the frame for ps() |
aoqi@1 | 473 | __ leave(); |
aoqi@1 | 474 | __ jr(RA); |
aoqi@1 | 475 | __ delayed()->nop(); |
aoqi@1 | 476 | return start; |
aoqi@1 | 477 | } |
aoqi@1 | 478 | // The following routine generates a subroutine to throw an |
aoqi@1 | 479 | // asynchronous UnknownError when an unsafe access gets a fault that |
aoqi@1 | 480 | // could not be reasonably prevented by the programmer. (Example: |
aoqi@1 | 481 | // SIGBUS/OBJERR.) |
aoqi@1 | 482 | address generate_handler_for_unsafe_access() { |
aoqi@1 | 483 | StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access"); |
aoqi@1 | 484 | address start = __ pc(); |
aoqi@1 | 485 | __ pushad(); // push registers |
aoqi@1 | 486 | // Address next_pc(esp, RegisterImpl::number_of_registers * BytesPerWord); |
aoqi@1 | 487 | __ call(CAST_FROM_FN_PTR(address, handle_unsafe_access), relocInfo::runtime_call_type); |
aoqi@1 | 488 | __ delayed()->nop(); |
aoqi@1 | 489 | __ sw(V0, SP, RegisterImpl::number_of_registers * BytesPerWord); |
aoqi@1 | 490 | __ popad(); |
aoqi@1 | 491 | __ jr(RA); |
aoqi@1 | 492 | __ delayed()->nop(); |
aoqi@1 | 493 | return start; |
aoqi@1 | 494 | } |
aoqi@1 | 495 | |
aoqi@1 | 496 | // Non-destructive plausibility checks for oops |
aoqi@1 | 497 | // |
aoqi@1 | 498 | // Arguments: |
aoqi@1 | 499 | // all args on stack! |
aoqi@1 | 500 | // |
aoqi@1 | 501 | // Stack after saving c_rarg3: |
aoqi@1 | 502 | // [tos + 0]: saved c_rarg3 |
aoqi@1 | 503 | // [tos + 1]: saved c_rarg2 |
aoqi@1 | 504 | // [tos + 2]: saved r12 (several TemplateTable methods use it) |
aoqi@1 | 505 | // [tos + 3]: saved flags |
aoqi@1 | 506 | // [tos + 4]: return address |
aoqi@1 | 507 | // * [tos + 5]: error message (char*) |
aoqi@1 | 508 | // * [tos + 6]: object to verify (oop) |
aoqi@1 | 509 | // * [tos + 7]: saved rax - saved by caller and bashed |
aoqi@1 | 510 | // * = popped on exit |
aoqi@1 | 511 | address generate_verify_oop() { |
aoqi@1 | 512 | StubCodeMark mark(this, "StubRoutines", "verify_oop"); |
aoqi@1 | 513 | address start = __ pc(); |
aoqi@1 | 514 | __ reinit_heapbase(); |
aoqi@1 | 515 | __ verify_oop_subroutine(); |
aoqi@1 | 516 | address end = __ pc(); |
aoqi@1 | 517 | return start; |
aoqi@1 | 518 | } |
aoqi@1 | 519 | |
aoqi@1 | 520 | // |
aoqi@1 | 521 | // Generate overlap test for array copy stubs |
aoqi@1 | 522 | // |
aoqi@1 | 523 | // Input: |
aoqi@1 | 524 | // A0 - array1 |
aoqi@1 | 525 | // A1 - array2 |
aoqi@1 | 526 | // A2 - element count |
aoqi@1 | 527 | // |
aoqi@1 | 528 | // Note: this code can only use %eax, %ecx, and %edx |
aoqi@1 | 529 | // |
aoqi@1 | 530 | |
aoqi@1 | 531 | // use T9 as temp |
aoqi@1 | 532 | void array_overlap_test(address no_overlap_target, int log2_elem_size) { |
aoqi@1 | 533 | int elem_size = 1 << log2_elem_size; |
aoqi@1 | 534 | Address::ScaleFactor sf = Address::times_1; |
aoqi@1 | 535 | |
aoqi@1 | 536 | switch (log2_elem_size) { |
aoqi@1 | 537 | case 0: sf = Address::times_1; break; |
aoqi@1 | 538 | case 1: sf = Address::times_2; break; |
aoqi@1 | 539 | case 2: sf = Address::times_4; break; |
aoqi@1 | 540 | case 3: sf = Address::times_8; break; |
aoqi@1 | 541 | } |
aoqi@1 | 542 | |
aoqi@1 | 543 | __ dsll(AT, A2, sf); |
aoqi@1 | 544 | __ dadd(AT, AT, A0); |
aoqi@1 | 545 | __ lea(T9, Address(AT, -elem_size)); |
aoqi@1 | 546 | __ dsub(AT, A1, A0); |
aoqi@1 | 547 | __ blez(AT, no_overlap_target); |
aoqi@1 | 548 | __ delayed()->nop(); |
aoqi@1 | 549 | __ dsub(AT, A1, T9); |
aoqi@1 | 550 | __ bgtz(AT, no_overlap_target); |
aoqi@1 | 551 | __ delayed()->nop(); |
aoqi@1 | 552 | |
aoqi@8 | 553 | // 2016/05/10 aoqi: If A0 = 0xf... and A1 = 0x0..., than goto no_overlap_target |
aoqi@8 | 554 | Label L; |
aoqi@8 | 555 | __ bgez(A0, L); |
aoqi@8 | 556 | __ delayed()->nop(); |
aoqi@8 | 557 | __ bgtz(A1, no_overlap_target); |
aoqi@8 | 558 | __ delayed()->nop(); |
aoqi@8 | 559 | __ bind(L); |
aoqi@8 | 560 | |
aoqi@1 | 561 | } |
aoqi@1 | 562 | |
aoqi@1 | 563 | // |
aoqi@1 | 564 | // Generate store check for array |
aoqi@1 | 565 | // |
aoqi@1 | 566 | // Input: |
aoqi@1 | 567 | // %edi - starting address |
aoqi@1 | 568 | // %ecx - element count |
aoqi@1 | 569 | // |
aoqi@1 | 570 | // The 2 input registers are overwritten |
aoqi@1 | 571 | // |
aoqi@1 | 572 | |
aoqi@1 | 573 | // |
aoqi@1 | 574 | // Generate store check for array |
aoqi@1 | 575 | // |
aoqi@1 | 576 | // Input: |
aoqi@1 | 577 | // T0 - starting address(edi) |
aoqi@1 | 578 | // T1 - element count (ecx) |
aoqi@1 | 579 | // |
aoqi@1 | 580 | // The 2 input registers are overwritten |
aoqi@1 | 581 | // |
aoqi@1 | 582 | |
aoqi@1 | 583 | #define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8) |
aoqi@1 | 584 | |
aoqi@1 | 585 | void array_store_check() { |
aoqi@1 | 586 | BarrierSet* bs = Universe::heap()->barrier_set(); |
aoqi@1 | 587 | assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); |
aoqi@1 | 588 | CardTableModRefBS* ct = (CardTableModRefBS*)bs; |
aoqi@1 | 589 | assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); |
aoqi@1 | 590 | Label l_0; |
aoqi@1 | 591 | |
aoqi@1 | 592 | __ dsll(AT, T1, TIMES_OOP); |
aoqi@1 | 593 | __ dadd(AT, T0, AT); |
aoqi@1 | 594 | __ daddiu(T1, AT, - BytesPerHeapOop); |
aoqi@1 | 595 | |
aoqi@1 | 596 | __ shr(T0, CardTableModRefBS::card_shift); |
aoqi@1 | 597 | __ shr(T1, CardTableModRefBS::card_shift); |
aoqi@1 | 598 | |
aoqi@1 | 599 | __ dsub(T1, T1, T0); // end --> cards count |
aoqi@1 | 600 | __ bind(l_0); |
aoqi@1 | 601 | |
aoqi@1 | 602 | __ li48(AT, (long)ct->byte_map_base); |
aoqi@1 | 603 | __ dadd(AT, AT, T0); |
aoqi@1 | 604 | __ dadd(AT, AT, T1); |
aoqi@1 | 605 | __ sb(R0, AT, 0); |
aoqi@1 | 606 | //__ daddi(T1, T1, -4); |
aoqi@1 | 607 | __ daddi(T1, T1, - 1); |
aoqi@1 | 608 | __ bgez(T1, l_0); |
aoqi@1 | 609 | __ delayed()->nop(); |
aoqi@1 | 610 | } |
aoqi@1 | 611 | |
aoqi@1 | 612 | // Arguments: |
aoqi@1 | 613 | // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary |
aoqi@1 | 614 | // ignored |
aoqi@1 | 615 | // name - stub name string |
aoqi@1 | 616 | // |
aoqi@1 | 617 | // Inputs: |
aoqi@1 | 618 | // c_rarg0 - source array address |
aoqi@1 | 619 | // c_rarg1 - destination array address |
aoqi@1 | 620 | // c_rarg2 - element count, treated as ssize_t, can be zero |
aoqi@1 | 621 | // |
aoqi@1 | 622 | // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, |
aoqi@1 | 623 | // we let the hardware handle it. The one to eight bytes within words, |
aoqi@1 | 624 | // dwords or qwords that span cache line boundaries will still be loaded |
aoqi@1 | 625 | // and stored atomically. |
aoqi@1 | 626 | // |
aoqi@1 | 627 | // Side Effects: |
aoqi@1 | 628 | // disjoint_byte_copy_entry is set to the no-overlap entry point |
aoqi@1 | 629 | // used by generate_conjoint_byte_copy(). |
aoqi@1 | 630 | // |
aoqi@1 | 631 | address generate_disjoint_byte_copy(bool aligned, const char *name) { |
aoqi@1 | 632 | StubCodeMark mark(this, "StubRoutines", name); |
aoqi@1 | 633 | __ align(CodeEntryAlignment); |
aoqi@1 | 634 | address start = __ pc(); |
aoqi@1 | 635 | Label l_0, l_1, l_2, l_3, l_4, l_5, l_6; |
aoqi@1 | 636 | |
aoqi@1 | 637 | __ push(T3); |
aoqi@1 | 638 | __ push(T0); |
aoqi@1 | 639 | __ push(T1); |
aoqi@1 | 640 | __ push(T8); |
aoqi@1 | 641 | __ move(T3, A0); |
aoqi@1 | 642 | __ move(T0, A1); |
aoqi@1 | 643 | __ move(T1, A2); |
aoqi@1 | 644 | __ move(T8, T1); // original count in T1 |
aoqi@1 | 645 | __ daddi(AT, T1, -3); |
aoqi@1 | 646 | __ blez(AT, l_4); |
aoqi@1 | 647 | __ delayed()->nop(); |
aoqi@1 | 648 | if (!aligned) { |
aoqi@8 | 649 | //TODO: copy 8 bytes at one time |
Jin@7 | 650 | // 2016/5/8 Jin: only when src and dest has the same alignment can we do lw/sw */ |
Jin@7 | 651 | __ andi(AT, T3, 3); |
Jin@7 | 652 | __ andi(T9, T0, 3); |
Jin@7 | 653 | __ bne(AT, T9, l_5); |
Jin@7 | 654 | __ delayed()->nop(); |
Jin@7 | 655 | |
aoqi@1 | 656 | // align source address at dword address boundary |
aoqi@1 | 657 | __ move(T1, 4); |
aoqi@1 | 658 | __ sub(T1, T1, T3); |
aoqi@1 | 659 | __ andi(T1, T1, 3); |
aoqi@1 | 660 | __ beq(T1, R0, l_1); |
aoqi@1 | 661 | __ delayed()->nop(); |
aoqi@1 | 662 | __ sub(T8,T8,T1); |
aoqi@1 | 663 | __ bind(l_0); |
aoqi@1 | 664 | __ lb(AT, T3, 0); |
aoqi@1 | 665 | __ sb(AT, T0, 0); |
aoqi@1 | 666 | __ addi(T3, T3, 1); |
aoqi@1 | 667 | __ addi(T0, T0, 1); |
aoqi@1 | 668 | __ addi(T1 ,T1, -1); |
aoqi@1 | 669 | __ bne(T1, R0, l_0); |
aoqi@1 | 670 | __ delayed()->nop(); |
aoqi@1 | 671 | __ bind(l_1); |
aoqi@1 | 672 | __ move(T1, T8); |
aoqi@1 | 673 | } |
aoqi@1 | 674 | __ shr(T1, 2); |
aoqi@1 | 675 | __ beq(T1, R0, l_4); // no dwords to move |
aoqi@1 | 676 | __ delayed()->nop(); |
aoqi@1 | 677 | // copy aligned dwords |
aoqi@1 | 678 | __ bind(l_2); |
aoqi@1 | 679 | __ align(16); |
aoqi@1 | 680 | __ bind(l_3); |
aoqi@1 | 681 | __ lw(AT, T3, 0); |
aoqi@1 | 682 | __ sw(AT, T0, 0 ); |
aoqi@1 | 683 | __ addi(T3, T3, 4); |
aoqi@1 | 684 | __ addi(T0, T0, 4); |
aoqi@1 | 685 | __ addi(T1, T1, -1); |
aoqi@1 | 686 | __ bne(T1, R0, l_3); |
aoqi@1 | 687 | __ delayed()->nop(); |
aoqi@1 | 688 | __ bind(l_4); |
aoqi@1 | 689 | __ move(T1, T8); |
aoqi@1 | 690 | __ andi(T1, T1, 3); |
aoqi@1 | 691 | __ beq(T1, R0, l_6); |
aoqi@1 | 692 | __ delayed()->nop(); |
aoqi@1 | 693 | // copy suffix |
aoqi@1 | 694 | __ bind(l_5); |
aoqi@1 | 695 | __ lb(AT, T3, 0); |
aoqi@1 | 696 | __ sb(AT, T0, 0); |
aoqi@1 | 697 | __ addi(T3, T3, 1); |
aoqi@1 | 698 | __ addi(T0, T0, 1); |
aoqi@1 | 699 | __ addi(T1, T1, -1); |
aoqi@1 | 700 | __ bne(T1, R0, l_5 ); |
aoqi@1 | 701 | __ delayed()->nop(); |
aoqi@1 | 702 | __ bind(l_6); |
aoqi@1 | 703 | __ pop(T8); |
aoqi@1 | 704 | __ pop(T1); |
aoqi@1 | 705 | __ pop(T0); |
aoqi@1 | 706 | __ pop(T3); |
aoqi@1 | 707 | __ jr(RA); |
aoqi@1 | 708 | __ delayed()->nop(); |
aoqi@1 | 709 | return start; |
aoqi@1 | 710 | } |
aoqi@1 | 711 | |
aoqi@1 | 712 | // Arguments: |
aoqi@1 | 713 | // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary |
aoqi@1 | 714 | // ignored |
aoqi@1 | 715 | // name - stub name string |
aoqi@1 | 716 | // |
aoqi@1 | 717 | // Inputs: |
aoqi@8 | 718 | // A0 - source array address |
aoqi@8 | 719 | // A1 - destination array address |
aoqi@8 | 720 | // A2 - element count, treated as ssize_t, can be zero |
aoqi@1 | 721 | // |
aoqi@1 | 722 | // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, |
aoqi@1 | 723 | // we let the hardware handle it. The one to eight bytes within words, |
aoqi@1 | 724 | // dwords or qwords that span cache line boundaries will still be loaded |
aoqi@1 | 725 | // and stored atomically. |
aoqi@1 | 726 | // |
aoqi@1 | 727 | address generate_conjoint_byte_copy(bool aligned, const char *name) { |
aoqi@8 | 728 | __ align(CodeEntryAlignment); |
aoqi@8 | 729 | StubCodeMark mark(this, "StubRoutines", name); |
aoqi@8 | 730 | address start = __ pc(); |
aoqi@1 | 731 | |
aoqi@8 | 732 | Label l_copy_4_bytes_loop, l_copy_suffix, l_copy_suffix_loop, l_exit; |
aoqi@8 | 733 | Label l_copy_byte, l_from_unaligned, l_unaligned, l_4_bytes_aligned; |
aoqi@1 | 734 | |
aoqi@8 | 735 | address nooverlap_target = aligned ? |
aoqi@8 | 736 | StubRoutines::arrayof_jbyte_disjoint_arraycopy() : |
aoqi@8 | 737 | StubRoutines::jbyte_disjoint_arraycopy(); |
aoqi@1 | 738 | |
aoqi@8 | 739 | array_overlap_test(nooverlap_target, 0); |
Jin@7 | 740 | |
aoqi@8 | 741 | const Register from = A0; // source array address |
aoqi@8 | 742 | const Register to = A1; // destination array address |
aoqi@8 | 743 | const Register count = A2; // elements count |
aoqi@8 | 744 | const Register end_from = T3; // source array end address |
aoqi@8 | 745 | const Register end_to = T0; // destination array end address |
aoqi@8 | 746 | const Register end_count = T1; // destination array end address |
Jin@7 | 747 | |
aoqi@8 | 748 | __ push(end_from); |
aoqi@8 | 749 | __ push(end_to); |
aoqi@8 | 750 | __ push(end_count); |
aoqi@8 | 751 | __ push(T8); |
Jin@7 | 752 | |
aoqi@8 | 753 | // copy from high to low |
aoqi@8 | 754 | __ move(end_count, count); |
aoqi@8 | 755 | __ dadd(end_from, from, end_count); |
aoqi@8 | 756 | __ dadd(end_to, to, end_count); |
Jin@7 | 757 | |
aoqi@8 | 758 | // 2016/05/08 aoqi: If end_from and end_to has differante alignment, unaligned copy is performed. |
aoqi@8 | 759 | __ andi(AT, end_from, 3); |
aoqi@8 | 760 | __ andi(T8, end_to, 3); |
aoqi@8 | 761 | __ bne(AT, T8, l_copy_byte); |
aoqi@8 | 762 | __ delayed()->nop(); |
Jin@7 | 763 | |
aoqi@8 | 764 | // First deal with the unaligned data at the top. |
aoqi@8 | 765 | __ bind(l_unaligned); |
aoqi@8 | 766 | __ beq(end_count, R0, l_exit); |
aoqi@8 | 767 | __ delayed()->nop(); |
aoqi@8 | 768 | |
aoqi@8 | 769 | __ andi(AT, end_from, 3); |
aoqi@8 | 770 | __ bne(AT, R0, l_from_unaligned); |
aoqi@8 | 771 | __ delayed()->nop(); |
aoqi@8 | 772 | |
aoqi@8 | 773 | __ andi(AT, end_to, 3); |
aoqi@8 | 774 | __ beq(AT, R0, l_4_bytes_aligned); |
aoqi@8 | 775 | __ delayed()->nop(); |
aoqi@8 | 776 | |
aoqi@8 | 777 | __ bind(l_from_unaligned); |
aoqi@8 | 778 | __ lb(AT, end_from, -1); |
aoqi@8 | 779 | __ sb(AT, end_to, -1); |
aoqi@8 | 780 | __ daddi(end_from, end_from, -1); |
aoqi@8 | 781 | __ daddi(end_to, end_to, -1); |
aoqi@8 | 782 | __ daddi(end_count, end_count, -1); |
aoqi@8 | 783 | __ b(l_unaligned); |
aoqi@8 | 784 | __ delayed()->nop(); |
aoqi@8 | 785 | |
aoqi@8 | 786 | // now end_to, end_from point to 4-byte aligned high-ends |
aoqi@8 | 787 | // end_count contains byte count that is not copied. |
aoqi@8 | 788 | // copy 4 bytes at a time |
aoqi@8 | 789 | __ bind(l_4_bytes_aligned); |
aoqi@8 | 790 | |
aoqi@8 | 791 | __ move(T8, end_count); |
aoqi@8 | 792 | __ daddi(AT, end_count, -3); |
aoqi@8 | 793 | __ blez(AT, l_copy_suffix); |
aoqi@8 | 794 | __ delayed()->nop(); |
aoqi@8 | 795 | |
aoqi@8 | 796 | //__ andi(T8, T8, 3); |
aoqi@8 | 797 | __ lea(end_from, Address(end_from, -4)); |
aoqi@8 | 798 | __ lea(end_to, Address(end_to, -4)); |
aoqi@8 | 799 | |
aoqi@8 | 800 | __ dsrl(end_count, end_count, 2); |
aoqi@8 | 801 | __ align(16); |
aoqi@8 | 802 | __ bind(l_copy_4_bytes_loop); //l_copy_4_bytes |
aoqi@8 | 803 | __ lw(AT, end_from, 0); |
aoqi@8 | 804 | __ sw(AT, end_to, 0); |
aoqi@8 | 805 | __ addi(end_from, end_from, -4); |
aoqi@8 | 806 | __ addi(end_to, end_to, -4); |
aoqi@8 | 807 | __ addi(end_count, end_count, -1); |
aoqi@8 | 808 | __ bne(end_count, R0, l_copy_4_bytes_loop); |
aoqi@8 | 809 | __ delayed()->nop(); |
aoqi@8 | 810 | |
aoqi@8 | 811 | __ b(l_copy_suffix); |
aoqi@8 | 812 | __ delayed()->nop(); |
aoqi@8 | 813 | // copy dwords aligned or not with repeat move |
aoqi@8 | 814 | // l_copy_suffix |
aoqi@8 | 815 | // copy suffix (0-3 bytes) |
aoqi@8 | 816 | __ bind(l_copy_suffix); |
aoqi@8 | 817 | __ andi(T8, T8, 3); |
aoqi@8 | 818 | __ beq(T8, R0, l_exit); |
aoqi@8 | 819 | __ delayed()->nop(); |
aoqi@8 | 820 | __ addi(end_from, end_from, 3); |
aoqi@8 | 821 | __ addi(end_to, end_to, 3); |
aoqi@8 | 822 | __ bind(l_copy_suffix_loop); |
aoqi@8 | 823 | __ lb(AT, end_from, 0); |
aoqi@8 | 824 | __ sb(AT, end_to, 0); |
aoqi@8 | 825 | __ addi(end_from, end_from, -1); |
aoqi@8 | 826 | __ addi(end_to, end_to, -1); |
aoqi@8 | 827 | __ addi(T8, T8, -1); |
aoqi@8 | 828 | __ bne(T8, R0, l_copy_suffix_loop); |
aoqi@8 | 829 | __ delayed()->nop(); |
aoqi@8 | 830 | |
aoqi@8 | 831 | __ bind(l_copy_byte); |
aoqi@8 | 832 | __ beq(end_count, R0, l_exit); |
aoqi@8 | 833 | __ delayed()->nop(); |
aoqi@8 | 834 | __ lb(AT, end_from, -1); |
aoqi@8 | 835 | __ sb(AT, end_to, -1); |
aoqi@8 | 836 | __ daddi(end_from, end_from, -1); |
aoqi@8 | 837 | __ daddi(end_to, end_to, -1); |
aoqi@8 | 838 | __ daddi(end_count, end_count, -1); |
aoqi@8 | 839 | __ b(l_copy_byte); |
aoqi@8 | 840 | __ delayed()->nop(); |
aoqi@8 | 841 | |
aoqi@8 | 842 | __ bind(l_exit); |
aoqi@8 | 843 | __ pop(T8); |
aoqi@8 | 844 | __ pop(end_count); |
aoqi@8 | 845 | __ pop(end_to); |
aoqi@8 | 846 | __ pop(end_from); |
aoqi@8 | 847 | __ jr(RA); |
aoqi@8 | 848 | __ delayed()->nop(); |
aoqi@8 | 849 | return start; |
aoqi@1 | 850 | } |
aoqi@1 | 851 | |
aoqi@13 | 852 | // Generate stub for disjoint short copy. If "aligned" is true, the |
aoqi@13 | 853 | // "from" and "to" addresses are assumed to be heapword aligned. |
aoqi@1 | 854 | // |
aoqi@13 | 855 | // Arguments for generated stub: |
aoqi@13 | 856 | // from: A0 |
aoqi@13 | 857 | // to: A1 |
aoqi@13 | 858 | // elm.count: A2 treated as signed |
aoqi@13 | 859 | // one element: 2 bytes |
aoqi@1 | 860 | // |
aoqi@13 | 861 | // Strategy for aligned==true: |
aoqi@1 | 862 | // |
aoqi@13 | 863 | // If length <= 9: |
aoqi@13 | 864 | // 1. copy 1 elements at a time (l_5) |
aoqi@1 | 865 | // |
aoqi@13 | 866 | // If length > 9: |
aoqi@13 | 867 | // 1. copy 4 elements at a time until less than 4 elements are left (l_7) |
aoqi@13 | 868 | // 2. copy 2 elements at a time until less than 2 elements are left (l_6) |
aoqi@13 | 869 | // 3. copy last element if one was left in step 2. (l_1) |
aoqi@13 | 870 | // |
aoqi@13 | 871 | // |
aoqi@13 | 872 | // Strategy for aligned==false: |
aoqi@13 | 873 | // |
aoqi@13 | 874 | // If length <= 9: same as aligned==true case |
aoqi@13 | 875 | // |
aoqi@13 | 876 | // If length > 9: |
aoqi@13 | 877 | // 1. continue with step 7. if the alignment of from and to mod 4 |
aoqi@13 | 878 | // is different. |
aoqi@13 | 879 | // 2. align from and to to 4 bytes by copying 1 element if necessary |
aoqi@13 | 880 | // 3. at l_2 from and to are 4 byte aligned; continue with |
aoqi@13 | 881 | // 6. if they cannot be aligned to 8 bytes because they have |
aoqi@13 | 882 | // got different alignment mod 8. |
aoqi@13 | 883 | // 4. at this point we know that both, from and to, have the same |
aoqi@13 | 884 | // alignment mod 8, now copy one element if necessary to get |
aoqi@13 | 885 | // 8 byte alignment of from and to. |
aoqi@13 | 886 | // 5. copy 4 elements at a time until less than 4 elements are |
aoqi@13 | 887 | // left; depending on step 3. all load/stores are aligned. |
aoqi@13 | 888 | // 6. copy 2 elements at a time until less than 2 elements are |
aoqi@13 | 889 | // left. (l_6) |
aoqi@13 | 890 | // 7. copy 1 element at a time. (l_5) |
aoqi@13 | 891 | // 8. copy last element if one was left in step 6. (l_1) |
aoqi@13 | 892 | // |
aoqi@13 | 893 | // TODO: |
aoqi@13 | 894 | // |
aoqi@13 | 895 | // 1. use loongson 128-bit load/store |
aoqi@13 | 896 | // 2. use loop unrolling optimization when len is big enough, for example if len > 0x2000: |
aoqi@13 | 897 | // __ bind(l_x); |
aoqi@13 | 898 | // __ ld(AT, tmp1, 0); |
aoqi@13 | 899 | // __ ld(tmp, tmp1, 8); |
aoqi@13 | 900 | // __ sd(AT, tmp2, 0); |
aoqi@13 | 901 | // __ sd(tmp, tmp2, 8); |
aoqi@13 | 902 | // __ ld(AT, tmp1, 16); |
aoqi@13 | 903 | // __ ld(tmp, tmp1, 24); |
aoqi@13 | 904 | // __ sd(AT, tmp2, 16); |
aoqi@13 | 905 | // __ sd(tmp, tmp2, 24); |
aoqi@13 | 906 | // __ daddi(tmp1, tmp1, 32); |
aoqi@13 | 907 | // __ daddi(tmp2, tmp2, 32); |
aoqi@13 | 908 | // __ daddi(tmp3, tmp3, -16); |
aoqi@13 | 909 | // __ daddi(AT, tmp3, -16); |
aoqi@13 | 910 | // __ bgez(AT, l_x); |
aoqi@13 | 911 | // __ delayed()->nop(); |
aoqi@13 | 912 | // |
aoqi@13 | 913 | address generate_disjoint_short_copy(bool aligned, const char * name) { |
aoqi@13 | 914 | StubCodeMark mark(this, "StubRoutines", name); |
aoqi@13 | 915 | __ align(CodeEntryAlignment); |
aoqi@1 | 916 | |
aoqi@13 | 917 | Register tmp1 = T0; |
aoqi@13 | 918 | Register tmp2 = T1; |
aoqi@13 | 919 | Register tmp3 = T3; |
aoqi@1 | 920 | |
aoqi@13 | 921 | address start = __ pc(); |
aoqi@13 | 922 | |
aoqi@13 | 923 | __ push(tmp1); |
aoqi@13 | 924 | __ push(tmp2); |
aoqi@13 | 925 | __ push(tmp3); |
aoqi@13 | 926 | __ move(tmp1, A0); |
aoqi@13 | 927 | __ move(tmp2, A1); |
aoqi@13 | 928 | __ move(tmp3, A2); |
aoqi@13 | 929 | |
aoqi@13 | 930 | Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8; |
aoqi@13 | 931 | Label l_debug; |
aoqi@13 | 932 | // don't try anything fancy if arrays don't have many elements |
aoqi@13 | 933 | __ daddi(AT, tmp3, -9); |
aoqi@13 | 934 | __ blez(AT, l_1); |
aoqi@13 | 935 | __ delayed()->nop(); |
aoqi@13 | 936 | |
aoqi@13 | 937 | if (!aligned) { |
aoqi@13 | 938 | __ xorr(AT, A0, A1); |
aoqi@13 | 939 | __ andi(AT, AT, 1); |
aoqi@13 | 940 | __ bne(AT, R0, l_debug); // if arrays don't have the same alignment mod 2, can this happen? |
aoqi@13 | 941 | __ delayed()->nop(); |
aoqi@13 | 942 | |
aoqi@13 | 943 | __ xorr(AT, A0, A1); |
aoqi@13 | 944 | __ andi(AT, AT, 3); |
aoqi@13 | 945 | __ bne(AT, R0, l_1); // if arrays don't have the same alignment mod 4, do 1 element copy |
aoqi@13 | 946 | __ delayed()->nop(); |
aoqi@13 | 947 | |
aoqi@13 | 948 | // At this point it is guaranteed that both, from and to have the same alignment mod 4. |
aoqi@13 | 949 | |
aoqi@13 | 950 | // Copy 1 element if necessary to align to 4 bytes. |
aoqi@13 | 951 | __ andi(AT, A0, 3); |
aoqi@13 | 952 | __ beq(AT, R0, l_2); |
aoqi@13 | 953 | __ delayed()->nop(); |
aoqi@13 | 954 | |
aoqi@13 | 955 | __ lhu(AT, tmp1, 0); |
aoqi@13 | 956 | __ daddi(tmp1, tmp1, 2); |
aoqi@13 | 957 | __ sh(AT, tmp2, 0); |
aoqi@13 | 958 | __ daddi(tmp2, tmp2, 2); |
aoqi@13 | 959 | __ daddi(tmp3, tmp3, -1); |
aoqi@13 | 960 | __ bind(l_2); |
aoqi@13 | 961 | |
aoqi@13 | 962 | // At this point the positions of both, from and to, are at least 4 byte aligned. |
aoqi@13 | 963 | |
aoqi@13 | 964 | // Copy 4 elements at a time. |
aoqi@13 | 965 | // Align to 8 bytes, but only if both, from and to, have same alignment mod 8. |
aoqi@13 | 966 | __ xorr(AT, tmp1, tmp2); |
aoqi@13 | 967 | __ andi(AT, AT, 7); |
aoqi@13 | 968 | __ bne(AT, R0, l_6); // not same alignment mod 8 -> copy 2, either from or to will be unaligned |
aoqi@13 | 969 | __ delayed()->nop(); |
aoqi@13 | 970 | |
aoqi@13 | 971 | // Copy a 2-element word if necessary to align to 8 bytes. |
aoqi@13 | 972 | __ andi(AT, tmp1, 7); |
aoqi@13 | 973 | __ beq(AT, R0, l_7); |
aoqi@13 | 974 | __ delayed()->nop(); |
aoqi@13 | 975 | |
aoqi@13 | 976 | __ lw(AT, tmp1, 0); |
aoqi@13 | 977 | __ daddi(tmp3, tmp3, -2); |
aoqi@13 | 978 | __ sw(AT, tmp2, 0); |
aoqi@13 | 979 | { // FasterArrayCopy |
aoqi@13 | 980 | __ daddi(tmp1, tmp1, 4); |
aoqi@13 | 981 | __ daddi(tmp2, tmp2, 4); |
aoqi@13 | 982 | } |
aoqi@13 | 983 | } |
aoqi@13 | 984 | |
aoqi@13 | 985 | __ bind(l_7); |
aoqi@13 | 986 | |
aoqi@13 | 987 | // Copy 4 elements at a time; either the loads or the stores can |
aoqi@13 | 988 | // be unaligned if aligned == false. |
aoqi@13 | 989 | |
aoqi@13 | 990 | { // FasterArrayCopy |
aoqi@13 | 991 | __ daddi(AT, tmp3, -15); |
aoqi@13 | 992 | __ blez(AT, l_6); // copy 2 at a time if less than 16 elements remain |
aoqi@13 | 993 | __ delayed()->nop(); |
aoqi@13 | 994 | |
aoqi@13 | 995 | __ bind(l_8); |
aoqi@13 | 996 | // For Loongson, there is 128-bit memory access. TODO |
aoqi@13 | 997 | __ ld(AT, tmp1, 0); |
aoqi@13 | 998 | __ sd(AT, tmp2, 0); |
aoqi@13 | 999 | __ daddi(tmp1, tmp1, 8); |
aoqi@13 | 1000 | __ daddi(tmp2, tmp2, 8); |
aoqi@13 | 1001 | __ daddi(tmp3, tmp3, -4); |
aoqi@13 | 1002 | __ daddi(AT, tmp3, -4); |
aoqi@13 | 1003 | __ bgez(AT, l_8); |
aoqi@13 | 1004 | __ delayed()->nop(); |
aoqi@13 | 1005 | } |
aoqi@13 | 1006 | __ bind(l_6); |
aoqi@13 | 1007 | |
aoqi@13 | 1008 | // copy 2 element at a time |
aoqi@13 | 1009 | { // FasterArrayCopy |
aoqi@13 | 1010 | __ daddi(AT, tmp3, -1); |
aoqi@13 | 1011 | __ blez(AT, l_1); |
aoqi@13 | 1012 | __ delayed()->nop(); |
aoqi@13 | 1013 | |
aoqi@13 | 1014 | __ bind(l_3); |
aoqi@13 | 1015 | __ lw(AT, tmp1, 0); |
aoqi@13 | 1016 | __ sw(AT, tmp2, 0); |
aoqi@13 | 1017 | __ daddi(tmp1, tmp1, 4); |
aoqi@13 | 1018 | __ daddi(tmp2, tmp2, 4); |
aoqi@13 | 1019 | __ daddi(tmp3, tmp3, -2); |
aoqi@13 | 1020 | __ daddi(AT, tmp3, -2); |
aoqi@13 | 1021 | __ bgez(AT, l_3); |
aoqi@13 | 1022 | __ delayed()->nop(); |
aoqi@13 | 1023 | |
aoqi@13 | 1024 | } |
aoqi@13 | 1025 | |
aoqi@13 | 1026 | // do single element copy (8 bit), can this happen? |
aoqi@13 | 1027 | __ bind(l_1); |
aoqi@13 | 1028 | __ beq(R0, tmp3, l_4); |
aoqi@13 | 1029 | __ delayed()->nop(); |
aoqi@13 | 1030 | |
aoqi@13 | 1031 | { // FasterArrayCopy |
aoqi@13 | 1032 | |
aoqi@13 | 1033 | __ bind(l_5); |
aoqi@13 | 1034 | __ lhu(AT, tmp1, 0); |
aoqi@13 | 1035 | __ daddi(tmp3, tmp3, -1); |
aoqi@13 | 1036 | __ sh(AT, tmp2, 0); |
aoqi@13 | 1037 | __ daddi(tmp1, tmp1, 2); |
aoqi@13 | 1038 | __ daddi(tmp2, tmp2, 2); |
aoqi@13 | 1039 | __ daddi(AT, tmp3, -1); |
aoqi@13 | 1040 | __ bgez(AT, l_5); |
aoqi@13 | 1041 | __ delayed()->nop(); |
aoqi@13 | 1042 | } |
aoqi@13 | 1043 | __ bind(l_4); |
aoqi@13 | 1044 | __ pop(tmp3); |
aoqi@13 | 1045 | __ pop(tmp2); |
aoqi@13 | 1046 | __ pop(tmp1); |
aoqi@13 | 1047 | |
aoqi@13 | 1048 | __ jr(RA); |
aoqi@13 | 1049 | __ delayed()->nop(); |
aoqi@13 | 1050 | |
aoqi@13 | 1051 | __ bind(l_debug); |
aoqi@13 | 1052 | __ stop("generate_disjoint_short_copy should not reach here"); |
aoqi@13 | 1053 | return start; |
aoqi@1 | 1054 | } |
aoqi@1 | 1055 | |
aoqi@1 | 1056 | // Arguments: |
aoqi@1 | 1057 | // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary |
aoqi@1 | 1058 | // ignored |
aoqi@1 | 1059 | // name - stub name string |
aoqi@1 | 1060 | // |
aoqi@1 | 1061 | // Inputs: |
aoqi@1 | 1062 | // c_rarg0 - source array address |
aoqi@1 | 1063 | // c_rarg1 - destination array address |
aoqi@1 | 1064 | // c_rarg2 - element count, treated as ssize_t, can be zero |
aoqi@1 | 1065 | // |
aoqi@1 | 1066 | // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we |
aoqi@1 | 1067 | // let the hardware handle it. The two or four words within dwords |
aoqi@1 | 1068 | // or qwords that span cache line boundaries will still be loaded |
aoqi@1 | 1069 | // and stored atomically. |
aoqi@1 | 1070 | // |
aoqi@1 | 1071 | address generate_conjoint_short_copy(bool aligned, const char *name) { |
aoqi@1 | 1072 | Label l_1, l_2, l_3, l_4, l_5; |
aoqi@1 | 1073 | StubCodeMark mark(this, "StubRoutines", name); |
aoqi@1 | 1074 | __ align(CodeEntryAlignment); |
aoqi@1 | 1075 | address start = __ pc(); |
aoqi@1 | 1076 | address nooverlap_target = aligned ? |
aoqi@1 | 1077 | StubRoutines::arrayof_jshort_disjoint_arraycopy() : |
aoqi@1 | 1078 | StubRoutines::jshort_disjoint_arraycopy(); |
aoqi@1 | 1079 | |
aoqi@1 | 1080 | array_overlap_test(nooverlap_target, 1); |
aoqi@1 | 1081 | |
aoqi@1 | 1082 | __ push(T3); |
aoqi@1 | 1083 | __ push(T0); |
aoqi@1 | 1084 | __ push(T1); |
aoqi@1 | 1085 | __ push(T8); |
aoqi@1 | 1086 | |
aoqi@1 | 1087 | /* |
aoqi@1 | 1088 | __ pushl(esi); |
aoqi@1 | 1089 | __ movl(ecx, Address(esp, 4+12)); // count |
aoqi@1 | 1090 | __ pushl(edi); |
aoqi@1 | 1091 | __ movl(esi, Address(esp, 8+ 4)); // from |
aoqi@1 | 1092 | __ movl(edi, Address(esp, 8+ 8)); // to |
aoqi@1 | 1093 | */ |
aoqi@1 | 1094 | __ move(T1, A2); |
aoqi@1 | 1095 | __ move(T3, A0); |
aoqi@1 | 1096 | __ move(T0, A1); |
aoqi@1 | 1097 | |
aoqi@1 | 1098 | |
aoqi@1 | 1099 | // copy dwords from high to low |
aoqi@1 | 1100 | // __ leal(esi, Address(esi, ecx, Address::times_2, -4)); // from + count*2 - 4 |
aoqi@1 | 1101 | __ sll(AT, T1, Address::times_2); |
aoqi@1 | 1102 | __ add(AT, T3, AT); |
aoqi@1 | 1103 | __ lea(T3, Address( AT, -4)); |
aoqi@1 | 1104 | //__ std(); |
aoqi@1 | 1105 | //__ leal(edi, Address(edi, ecx, Address::times_2, -4)); // to + count*2 - 4 |
aoqi@1 | 1106 | __ sll(AT,T1 , Address::times_2); |
aoqi@1 | 1107 | __ add(AT, T0, AT); |
aoqi@1 | 1108 | __ lea(T0, Address( AT, -4)); |
aoqi@1 | 1109 | // __ movl(eax, ecx); |
aoqi@1 | 1110 | __ move(T8, T1); |
aoqi@1 | 1111 | __ bind(l_1); |
aoqi@1 | 1112 | // __ sarl(ecx, 1); // dword count |
aoqi@1 | 1113 | __ sra(T1,T1, 1); |
aoqi@1 | 1114 | //__ jcc(Assembler::equal, l_4); // no dwords to move |
aoqi@1 | 1115 | __ beq(T1, R0, l_4); |
aoqi@1 | 1116 | __ delayed()->nop(); |
aoqi@1 | 1117 | /* __ cmpl(ecx, 32); |
aoqi@1 | 1118 | __ jcc(Assembler::above, l_3); // > 32 dwords |
aoqi@1 | 1119 | // copy dwords with loop |
aoqi@1 | 1120 | __ subl(edi, esi); |
aoqi@1 | 1121 | */ __ align(16); |
aoqi@1 | 1122 | __ bind(l_2); |
aoqi@1 | 1123 | //__ movl(edx, Address(esi)); |
aoqi@1 | 1124 | __ lw(AT, T3, 0); |
aoqi@1 | 1125 | //__ movl(Address(edi, esi, Address::times_1), edx); |
aoqi@1 | 1126 | __ sw(AT, T0, 0); |
aoqi@1 | 1127 | //__ subl(esi, 4); |
aoqi@1 | 1128 | __ addi(T3, T3, -4); |
aoqi@1 | 1129 | __ addi(T0, T0, -4); |
aoqi@1 | 1130 | //__ decl(ecx); |
aoqi@1 | 1131 | __ addi(T1, T1, -1); |
aoqi@1 | 1132 | // __ jcc(Assembler::notEqual, l_2); |
aoqi@1 | 1133 | __ bne(T1, R0, l_2); |
aoqi@1 | 1134 | __ delayed()->nop(); |
aoqi@1 | 1135 | // __ addl(edi, esi); |
aoqi@1 | 1136 | // __ jmp(l_4); |
aoqi@1 | 1137 | __ b(l_4); |
aoqi@1 | 1138 | __ delayed()->nop(); |
aoqi@1 | 1139 | // copy dwords with repeat move |
aoqi@1 | 1140 | __ bind(l_3); |
aoqi@1 | 1141 | // __ rep_movl(); |
aoqi@1 | 1142 | __ bind(l_4); |
aoqi@1 | 1143 | // __ andl(eax, 1); // suffix count |
aoqi@1 | 1144 | __ andi(T8, T8, 1); // suffix count |
aoqi@1 | 1145 | //__ jcc(Assembler::equal, l_5); // no suffix |
aoqi@1 | 1146 | __ beq(T8, R0, l_5 ); |
aoqi@1 | 1147 | __ delayed()->nop(); |
aoqi@1 | 1148 | // copy suffix |
aoqi@1 | 1149 | // __ movw(edx, Address(esi, 2)); |
aoqi@1 | 1150 | __ lh(AT, T3, 2); |
aoqi@1 | 1151 | // __ movw(Address(edi, 2), edx); |
aoqi@1 | 1152 | __ sh(AT, T0, 2); |
aoqi@1 | 1153 | __ bind(l_5); |
aoqi@1 | 1154 | // __ cld(); |
aoqi@1 | 1155 | // __ popl(edi); |
aoqi@1 | 1156 | // __ popl(esi); |
aoqi@1 | 1157 | // __ ret(0); |
aoqi@1 | 1158 | __ pop(T8); |
aoqi@1 | 1159 | __ pop(T1); |
aoqi@1 | 1160 | __ pop(T0); |
aoqi@1 | 1161 | __ pop(T3); |
aoqi@1 | 1162 | __ jr(RA); |
aoqi@1 | 1163 | __ delayed()->nop(); |
aoqi@1 | 1164 | return start; |
aoqi@1 | 1165 | } |
aoqi@1 | 1166 | |
aoqi@1 | 1167 | // Arguments: |
aoqi@1 | 1168 | // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary |
aoqi@1 | 1169 | // ignored |
aoqi@1 | 1170 | // is_oop - true => oop array, so generate store check code |
aoqi@1 | 1171 | // name - stub name string |
aoqi@1 | 1172 | // |
aoqi@1 | 1173 | // Inputs: |
aoqi@1 | 1174 | // c_rarg0 - source array address |
aoqi@1 | 1175 | // c_rarg1 - destination array address |
aoqi@1 | 1176 | // c_rarg2 - element count, treated as ssize_t, can be zero |
aoqi@1 | 1177 | // |
aoqi@1 | 1178 | // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let |
aoqi@1 | 1179 | // the hardware handle it. The two dwords within qwords that span |
aoqi@1 | 1180 | // cache line boundaries will still be loaded and stored atomicly. |
aoqi@1 | 1181 | // |
aoqi@1 | 1182 | // Side Effects: |
aoqi@1 | 1183 | // disjoint_int_copy_entry is set to the no-overlap entry point |
aoqi@1 | 1184 | // used by generate_conjoint_int_oop_copy(). |
aoqi@1 | 1185 | // |
aoqi@1 | 1186 | address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, const char *name) { |
aoqi@1 | 1187 | Label l_2, l_3, l_4, l_stchk; |
aoqi@1 | 1188 | StubCodeMark mark(this, "StubRoutines", name); |
aoqi@1 | 1189 | __ align(CodeEntryAlignment); |
aoqi@1 | 1190 | address start = __ pc(); |
aoqi@1 | 1191 | /* |
aoqi@1 | 1192 | __ pushl(esi); |
aoqi@1 | 1193 | __ movl(ecx, Address(esp, 4+12)); // count |
aoqi@1 | 1194 | __ pushl(edi); |
aoqi@1 | 1195 | __ movl(esi, Address(esp, 8+ 4)); // from |
aoqi@1 | 1196 | __ movl(edi, Address(esp, 8+ 8)); // to |
aoqi@1 | 1197 | */ |
aoqi@1 | 1198 | __ push(T3); |
aoqi@1 | 1199 | __ push(T0); |
aoqi@1 | 1200 | __ push(T1); |
aoqi@1 | 1201 | __ push(T8); |
aoqi@1 | 1202 | __ move(T1, A2); |
aoqi@1 | 1203 | __ move(T3, A0); |
aoqi@1 | 1204 | __ move(T0, A1); |
aoqi@1 | 1205 | |
aoqi@1 | 1206 | // __ cmpl(ecx, 32); |
aoqi@1 | 1207 | // __ jcc(Assembler::belowEqual, l_2); // <= 32 dwords |
aoqi@1 | 1208 | // __ rep_movl(); |
aoqi@1 | 1209 | __ b(l_2); |
aoqi@1 | 1210 | __ delayed()->nop(); |
aoqi@1 | 1211 | if (is_oop) { |
aoqi@1 | 1212 | // __ jmp(l_stchk); |
aoqi@1 | 1213 | __ b(l_stchk); |
aoqi@1 | 1214 | __ delayed()->nop(); |
aoqi@1 | 1215 | } |
aoqi@1 | 1216 | // __ popl(edi); |
aoqi@1 | 1217 | // __ popl(esi); |
aoqi@1 | 1218 | // __ ret(0); |
aoqi@1 | 1219 | __ pop(T8); |
aoqi@1 | 1220 | __ pop(T1); |
aoqi@1 | 1221 | __ pop(T0); |
aoqi@1 | 1222 | __ pop(T3); |
aoqi@1 | 1223 | __ jr(RA); |
aoqi@1 | 1224 | __ delayed()->nop(); |
aoqi@1 | 1225 | |
aoqi@1 | 1226 | __ bind(l_2); |
aoqi@1 | 1227 | // __ subl(edi, esi); |
aoqi@1 | 1228 | // __ testl(ecx, ecx); |
aoqi@1 | 1229 | // __ jcc(Assembler::zero, l_4); |
aoqi@1 | 1230 | __ beq(T1, R0, l_4); |
aoqi@1 | 1231 | __ delayed()->nop(); |
aoqi@1 | 1232 | __ align(16); |
aoqi@1 | 1233 | __ bind(l_3); |
aoqi@1 | 1234 | //__ movl(edx, Address(esi)); |
aoqi@1 | 1235 | __ lw(AT, T3, 0); |
aoqi@1 | 1236 | // __ movl(Address(edi, esi, Address::times_1), edx); |
aoqi@1 | 1237 | __ sw(AT, T0, 0); |
aoqi@1 | 1238 | // __ addl(esi, 4); |
aoqi@1 | 1239 | __ addi(T3, T3, 4); |
aoqi@1 | 1240 | __ addi(T0, T0, 4); |
aoqi@1 | 1241 | // __ decl(ecx); |
aoqi@1 | 1242 | __ addi(T1, T1, -1); |
aoqi@1 | 1243 | // __ jcc(Assembler::notEqual, l_3); |
aoqi@1 | 1244 | __ bne(T1, R0, l_3); |
aoqi@1 | 1245 | __ delayed()->nop(); |
aoqi@1 | 1246 | if (is_oop) { |
aoqi@1 | 1247 | __ bind(l_stchk); |
aoqi@1 | 1248 | // __ movl(edi, Address(esp, 8+ 8)); |
aoqi@1 | 1249 | // __ movl(ecx, Address(esp, 8+ 12)); |
aoqi@1 | 1250 | __ move(T0, A1); |
aoqi@1 | 1251 | __ move(T1, A2); |
aoqi@1 | 1252 | array_store_check(); |
aoqi@1 | 1253 | } |
aoqi@1 | 1254 | __ bind(l_4); |
aoqi@1 | 1255 | // __ popl(edi); |
aoqi@1 | 1256 | // __ popl(esi); |
aoqi@1 | 1257 | // __ ret(0); |
aoqi@1 | 1258 | __ pop(T8); |
aoqi@1 | 1259 | __ pop(T1); |
aoqi@1 | 1260 | __ pop(T0); |
aoqi@1 | 1261 | __ pop(T3); |
aoqi@1 | 1262 | __ jr(RA); |
aoqi@1 | 1263 | __ delayed()->nop(); |
aoqi@1 | 1264 | return start; |
aoqi@1 | 1265 | } |
aoqi@1 | 1266 | |
aoqi@1 | 1267 | // Arguments: |
aoqi@1 | 1268 | // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary |
aoqi@1 | 1269 | // ignored |
aoqi@1 | 1270 | // is_oop - true => oop array, so generate store check code |
aoqi@1 | 1271 | // name - stub name string |
aoqi@1 | 1272 | // |
aoqi@1 | 1273 | // Inputs: |
aoqi@1 | 1274 | // c_rarg0 - source array address |
aoqi@1 | 1275 | // c_rarg1 - destination array address |
aoqi@1 | 1276 | // c_rarg2 - element count, treated as ssize_t, can be zero |
aoqi@1 | 1277 | // |
aoqi@1 | 1278 | // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let |
aoqi@1 | 1279 | // the hardware handle it. The two dwords within qwords that span |
aoqi@1 | 1280 | // cache line boundaries will still be loaded and stored atomicly. |
aoqi@1 | 1281 | // |
aoqi@1 | 1282 | address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, const char *name) { |
aoqi@1 | 1283 | Label l_2, l_3, l_4, l_stchk; |
aoqi@1 | 1284 | StubCodeMark mark(this, "StubRoutines", name); |
aoqi@1 | 1285 | __ align(CodeEntryAlignment); |
aoqi@1 | 1286 | address start = __ pc(); |
aoqi@1 | 1287 | address nooverlap_target; |
aoqi@1 | 1288 | |
aoqi@1 | 1289 | if (is_oop) { |
aoqi@1 | 1290 | nooverlap_target = aligned ? |
aoqi@1 | 1291 | StubRoutines::arrayof_oop_disjoint_arraycopy() : |
aoqi@1 | 1292 | StubRoutines::oop_disjoint_arraycopy(); |
aoqi@1 | 1293 | }else { |
aoqi@1 | 1294 | nooverlap_target = aligned ? |
aoqi@1 | 1295 | StubRoutines::arrayof_jint_disjoint_arraycopy() : |
aoqi@1 | 1296 | StubRoutines::jint_disjoint_arraycopy(); |
aoqi@1 | 1297 | } |
aoqi@1 | 1298 | |
aoqi@1 | 1299 | array_overlap_test(nooverlap_target, 2); |
aoqi@1 | 1300 | |
aoqi@1 | 1301 | __ push(T3); |
aoqi@1 | 1302 | __ push(T0); |
aoqi@1 | 1303 | __ push(T1); |
aoqi@1 | 1304 | __ push(T8); |
aoqi@1 | 1305 | |
aoqi@1 | 1306 | /* |
aoqi@1 | 1307 | __ pushl(esi); |
aoqi@1 | 1308 | __ movl(ecx, Address(esp, 4+12)); // count |
aoqi@1 | 1309 | __ pushl(edi); |
aoqi@1 | 1310 | __ movl(esi, Address(esp, 8+ 4)); // from |
aoqi@1 | 1311 | __ movl(edi, Address(esp, 8+ 8)); // to |
aoqi@1 | 1312 | */ |
aoqi@1 | 1313 | __ move(T1, A2); |
aoqi@1 | 1314 | __ move(T3, A0); |
aoqi@1 | 1315 | __ move(T0, A1); |
aoqi@1 | 1316 | |
aoqi@1 | 1317 | //__ leal(esi, Address(esi, ecx, Address::times_4, -4)); // from + count*4 - 4 |
aoqi@1 | 1318 | __ sll(AT, T1, Address::times_4); |
aoqi@1 | 1319 | __ add(AT, T3, AT); |
aoqi@1 | 1320 | __ lea(T3 , Address(AT, -4)); |
aoqi@1 | 1321 | //__ std(); |
aoqi@1 | 1322 | //__ leal(edi, Address(edi, ecx, Address::times_4, -4)); // to + count*4 - 4 |
aoqi@1 | 1323 | __ sll(AT, T1, Address::times_4); |
aoqi@1 | 1324 | __ add(AT, T0, AT); |
aoqi@1 | 1325 | __ lea(T0 , Address(AT, -4)); |
aoqi@1 | 1326 | |
aoqi@1 | 1327 | // __ cmpl(ecx, 32); |
aoqi@1 | 1328 | // __ jcc(Assembler::above, l_3); // > 32 dwords |
aoqi@1 | 1329 | // __ testl(ecx, ecx); |
aoqi@1 | 1330 | //__ jcc(Assembler::zero, l_4); |
aoqi@1 | 1331 | __ beq(T1, R0, l_4); |
aoqi@1 | 1332 | __ delayed()->nop(); |
aoqi@1 | 1333 | // __ subl(edi, esi); |
aoqi@1 | 1334 | __ align(16); |
aoqi@1 | 1335 | __ bind(l_2); |
aoqi@1 | 1336 | // __ movl(edx, Address(esi)); |
aoqi@1 | 1337 | __ lw(AT, T3, 0); |
aoqi@1 | 1338 | // __ movl(Address(esi, edi, Address::times_1), edx); |
aoqi@1 | 1339 | __ sw(AT, T0, 0); |
aoqi@1 | 1340 | // __ subl(esi, 4); |
aoqi@1 | 1341 | __ addi(T3, T3, -4); |
aoqi@1 | 1342 | __ addi(T0, T0, -4); |
aoqi@1 | 1343 | // __ decl(ecx); |
aoqi@1 | 1344 | __ addi(T1, T1, -1); |
aoqi@1 | 1345 | //__ jcc(Assembler::notEqual, l_2); |
aoqi@1 | 1346 | __ bne(T1, R0, l_2); |
aoqi@1 | 1347 | __ delayed()->nop(); |
aoqi@1 | 1348 | if (is_oop) { |
aoqi@1 | 1349 | // __ jmp(l_stchk); |
aoqi@1 | 1350 | __ b( l_stchk); |
aoqi@1 | 1351 | __ delayed()->nop(); |
aoqi@1 | 1352 | } |
aoqi@1 | 1353 | __ bind(l_4); |
aoqi@1 | 1354 | // __ cld(); |
aoqi@1 | 1355 | // __ popl(edi); |
aoqi@1 | 1356 | // __ popl(esi); |
aoqi@1 | 1357 | // __ ret(0); |
aoqi@1 | 1358 | __ pop(T8); |
aoqi@1 | 1359 | __ pop(T1); |
aoqi@1 | 1360 | __ pop(T0); |
aoqi@1 | 1361 | __ pop(T3); |
aoqi@1 | 1362 | __ jr(RA); |
aoqi@1 | 1363 | __ delayed()->nop(); |
aoqi@1 | 1364 | __ bind(l_3); |
aoqi@1 | 1365 | // __ rep_movl(); |
aoqi@1 | 1366 | if (is_oop) { |
aoqi@1 | 1367 | __ bind(l_stchk); |
aoqi@1 | 1368 | // __ movl(edi, Address(esp, 8+ 8)); |
aoqi@1 | 1369 | __ move(T0, A1); |
aoqi@1 | 1370 | // __ movl(ecx, Address(esp, 8+ 12)); |
aoqi@1 | 1371 | __ move(T1, A2); |
aoqi@1 | 1372 | array_store_check(); |
aoqi@1 | 1373 | } |
aoqi@1 | 1374 | // __ cld(); |
aoqi@1 | 1375 | // __ popl(edi); |
aoqi@1 | 1376 | // __ popl(esi); |
aoqi@1 | 1377 | // __ ret(0); |
aoqi@1 | 1378 | __ pop(T8); |
aoqi@1 | 1379 | __ pop(T1); |
aoqi@1 | 1380 | __ pop(T0); |
aoqi@1 | 1381 | __ pop(T3); |
aoqi@1 | 1382 | __ jr(RA); |
aoqi@1 | 1383 | __ delayed()->nop(); |
aoqi@1 | 1384 | return start; |
aoqi@1 | 1385 | } |
aoqi@1 | 1386 | |
aoqi@1 | 1387 | // Arguments: |
aoqi@1 | 1388 | // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary |
aoqi@1 | 1389 | // ignored |
aoqi@1 | 1390 | // is_oop - true => oop array, so generate store check code |
aoqi@1 | 1391 | // name - stub name string |
aoqi@1 | 1392 | // |
aoqi@1 | 1393 | // Inputs: |
aoqi@1 | 1394 | // c_rarg0 - source array address |
aoqi@1 | 1395 | // c_rarg1 - destination array address |
aoqi@1 | 1396 | // c_rarg2 - element count, treated as ssize_t, can be zero |
aoqi@1 | 1397 | // |
aoqi@1 | 1398 | // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let |
aoqi@1 | 1399 | // the hardware handle it. The two dwords within qwords that span |
aoqi@1 | 1400 | // cache line boundaries will still be loaded and stored atomicly. |
aoqi@1 | 1401 | // |
aoqi@1 | 1402 | // Side Effects: |
aoqi@1 | 1403 | // disjoint_int_copy_entry is set to the no-overlap entry point |
aoqi@1 | 1404 | // used by generate_conjoint_int_oop_copy(). |
aoqi@1 | 1405 | // |
aoqi@1 | 1406 | address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, const char *name) { |
aoqi@1 | 1407 | Label l_2, l_3, l_4, l_stchk; |
aoqi@1 | 1408 | StubCodeMark mark(this, "StubRoutines", name); |
aoqi@1 | 1409 | __ align(CodeEntryAlignment); |
aoqi@1 | 1410 | address start = __ pc(); |
aoqi@1 | 1411 | __ push(T3); |
aoqi@1 | 1412 | __ push(T0); |
aoqi@1 | 1413 | __ push(T1); |
aoqi@1 | 1414 | __ push(T8); |
aoqi@1 | 1415 | __ move(T1, A2); |
aoqi@1 | 1416 | __ move(T3, A0); |
aoqi@1 | 1417 | __ move(T0, A1); |
aoqi@1 | 1418 | |
aoqi@1 | 1419 | // __ cmpl(ecx, 32); |
aoqi@1 | 1420 | // __ jcc(Assembler::belowEqual, l_2); // <= 32 dwords |
aoqi@1 | 1421 | // __ rep_movl(); |
aoqi@1 | 1422 | __ b(l_2); |
aoqi@1 | 1423 | __ delayed()->nop(); |
aoqi@1 | 1424 | if (is_oop) { |
aoqi@1 | 1425 | // __ jmp(l_stchk); |
aoqi@1 | 1426 | __ b(l_stchk); |
aoqi@1 | 1427 | __ delayed()->nop(); |
aoqi@1 | 1428 | } |
aoqi@1 | 1429 | // __ popl(edi); |
aoqi@1 | 1430 | // __ popl(esi); |
aoqi@1 | 1431 | // __ ret(0); |
aoqi@1 | 1432 | __ pop(T8); |
aoqi@1 | 1433 | __ pop(T1); |
aoqi@1 | 1434 | __ pop(T0); |
aoqi@1 | 1435 | __ pop(T3); |
aoqi@1 | 1436 | __ jr(RA); |
aoqi@1 | 1437 | __ delayed()->nop(); |
aoqi@1 | 1438 | |
aoqi@1 | 1439 | __ bind(l_2); |
aoqi@1 | 1440 | // __ subl(edi, esi); |
aoqi@1 | 1441 | // __ testl(ecx, ecx); |
aoqi@1 | 1442 | // __ jcc(Assembler::zero, l_4); |
aoqi@1 | 1443 | __ beq(T1, R0, l_4); |
aoqi@1 | 1444 | __ delayed()->nop(); |
aoqi@1 | 1445 | __ align(16); |
aoqi@1 | 1446 | __ bind(l_3); |
aoqi@1 | 1447 | //__ movl(edx, Address(esi)); |
aoqi@1 | 1448 | __ ld(AT, T3, 0); |
aoqi@1 | 1449 | // __ movl(Address(edi, esi, Address::times_1), edx); |
aoqi@1 | 1450 | __ sd(AT, T0, 0); |
aoqi@1 | 1451 | // __ addl(esi, 4); |
aoqi@1 | 1452 | __ addi(T3, T3, 8); |
aoqi@1 | 1453 | __ addi(T0, T0, 8); |
aoqi@1 | 1454 | // __ decl(ecx); |
aoqi@1 | 1455 | __ addi(T1, T1, -1); |
aoqi@1 | 1456 | // __ jcc(Assembler::notEqual, l_3); |
aoqi@1 | 1457 | __ bne(T1, R0, l_3); |
aoqi@1 | 1458 | __ delayed()->nop(); |
aoqi@1 | 1459 | if (is_oop) { |
aoqi@1 | 1460 | __ bind(l_stchk); |
aoqi@1 | 1461 | // __ movl(edi, Address(esp, 8+ 8)); |
aoqi@1 | 1462 | // __ movl(ecx, Address(esp, 8+ 12)); |
aoqi@1 | 1463 | __ move(T0, A1); |
aoqi@1 | 1464 | __ move(T1, A2); |
aoqi@1 | 1465 | array_store_check(); |
aoqi@1 | 1466 | } |
aoqi@1 | 1467 | __ bind(l_4); |
aoqi@1 | 1468 | // __ popl(edi); |
aoqi@1 | 1469 | // __ popl(esi); |
aoqi@1 | 1470 | // __ ret(0); |
aoqi@1 | 1471 | __ pop(T8); |
aoqi@1 | 1472 | __ pop(T1); |
aoqi@1 | 1473 | __ pop(T0); |
aoqi@1 | 1474 | __ pop(T3); |
aoqi@1 | 1475 | __ jr(RA); |
aoqi@1 | 1476 | __ delayed()->nop(); |
aoqi@1 | 1477 | return start; |
aoqi@1 | 1478 | } |
aoqi@1 | 1479 | |
aoqi@1 | 1480 | // Arguments: |
aoqi@1 | 1481 | // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary |
aoqi@1 | 1482 | // ignored |
aoqi@1 | 1483 | // is_oop - true => oop array, so generate store check code |
aoqi@1 | 1484 | // name - stub name string |
aoqi@1 | 1485 | // |
aoqi@1 | 1486 | // Inputs: |
aoqi@1 | 1487 | // c_rarg0 - source array address |
aoqi@1 | 1488 | // c_rarg1 - destination array address |
aoqi@1 | 1489 | // c_rarg2 - element count, treated as ssize_t, can be zero |
aoqi@1 | 1490 | // |
aoqi@1 | 1491 | // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let |
aoqi@1 | 1492 | // the hardware handle it. The two dwords within qwords that span |
aoqi@1 | 1493 | // cache line boundaries will still be loaded and stored atomicly. |
aoqi@1 | 1494 | // |
aoqi@1 | 1495 | address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, const char *name) { |
aoqi@1 | 1496 | Label l_2, l_3, l_4, l_stchk; |
aoqi@1 | 1497 | StubCodeMark mark(this, "StubRoutines", name); |
aoqi@1 | 1498 | __ align(CodeEntryAlignment); |
aoqi@1 | 1499 | address start = __ pc(); |
aoqi@1 | 1500 | address nooverlap_target; |
aoqi@1 | 1501 | |
aoqi@1 | 1502 | if (is_oop) { |
aoqi@1 | 1503 | nooverlap_target = aligned ? |
aoqi@1 | 1504 | StubRoutines::arrayof_oop_disjoint_arraycopy() : |
aoqi@1 | 1505 | StubRoutines::oop_disjoint_arraycopy(); |
aoqi@1 | 1506 | }else { |
aoqi@1 | 1507 | nooverlap_target = aligned ? |
aoqi@1 | 1508 | StubRoutines::arrayof_jlong_disjoint_arraycopy() : |
aoqi@1 | 1509 | StubRoutines::jlong_disjoint_arraycopy(); |
aoqi@1 | 1510 | } |
aoqi@1 | 1511 | |
aoqi@1 | 1512 | array_overlap_test(nooverlap_target, 3); |
aoqi@1 | 1513 | |
aoqi@1 | 1514 | __ push(T3); |
aoqi@1 | 1515 | __ push(T0); |
aoqi@1 | 1516 | __ push(T1); |
aoqi@1 | 1517 | __ push(T8); |
aoqi@1 | 1518 | |
aoqi@1 | 1519 | __ move(T1, A2); |
aoqi@1 | 1520 | __ move(T3, A0); |
aoqi@1 | 1521 | __ move(T0, A1); |
aoqi@1 | 1522 | |
aoqi@1 | 1523 | //__ leal(esi, Address(esi, ecx, Address::times_4, -4)); // from + count*4 - 4 |
aoqi@1 | 1524 | __ sll(AT, T1, Address::times_8); |
aoqi@1 | 1525 | __ add(AT, T3, AT); |
aoqi@1 | 1526 | __ lea(T3 , Address(AT, -8)); |
aoqi@1 | 1527 | //__ std(); |
aoqi@1 | 1528 | //__ leal(edi, Address(edi, ecx, Address::times_4, -4)); // to + count*4 - 4 |
aoqi@1 | 1529 | __ sll(AT, T1, Address::times_8); |
aoqi@1 | 1530 | __ add(AT, T0, AT); |
aoqi@1 | 1531 | __ lea(T0 , Address(AT, -8)); |
aoqi@1 | 1532 | |
aoqi@1 | 1533 | // __ cmpl(ecx, 32); |
aoqi@1 | 1534 | // __ jcc(Assembler::above, l_3); // > 32 dwords |
aoqi@1 | 1535 | // __ testl(ecx, ecx); |
aoqi@1 | 1536 | //__ jcc(Assembler::zero, l_4); |
aoqi@1 | 1537 | __ beq(T1, R0, l_4); |
aoqi@1 | 1538 | __ delayed()->nop(); |
aoqi@1 | 1539 | // __ subl(edi, esi); |
aoqi@1 | 1540 | __ align(16); |
aoqi@1 | 1541 | __ bind(l_2); |
aoqi@1 | 1542 | // __ movl(edx, Address(esi)); |
aoqi@1 | 1543 | __ ld(AT, T3, 0); |
aoqi@1 | 1544 | // __ movl(Address(esi, edi, Address::times_1), edx); |
aoqi@1 | 1545 | __ sd(AT, T0, 0); |
aoqi@1 | 1546 | // __ subl(esi, 4); |
aoqi@1 | 1547 | __ addi(T3, T3, -8); |
aoqi@1 | 1548 | __ addi(T0, T0, -8); |
aoqi@1 | 1549 | // __ decl(ecx); |
aoqi@1 | 1550 | __ addi(T1, T1, -1); |
aoqi@1 | 1551 | //__ jcc(Assembler::notEqual, l_2); |
aoqi@1 | 1552 | __ bne(T1, R0, l_2); |
aoqi@1 | 1553 | __ delayed()->nop(); |
aoqi@1 | 1554 | if (is_oop) { |
aoqi@1 | 1555 | // __ jmp(l_stchk); |
aoqi@1 | 1556 | __ b( l_stchk); |
aoqi@1 | 1557 | __ delayed()->nop(); |
aoqi@1 | 1558 | } |
aoqi@1 | 1559 | __ bind(l_4); |
aoqi@1 | 1560 | // __ cld(); |
aoqi@1 | 1561 | // __ popl(edi); |
aoqi@1 | 1562 | // __ popl(esi); |
aoqi@1 | 1563 | // __ ret(0); |
aoqi@1 | 1564 | __ pop(T8); |
aoqi@1 | 1565 | __ pop(T1); |
aoqi@1 | 1566 | __ pop(T0); |
aoqi@1 | 1567 | __ pop(T3); |
aoqi@1 | 1568 | __ jr(RA); |
aoqi@1 | 1569 | __ delayed()->nop(); |
aoqi@1 | 1570 | __ bind(l_3); |
aoqi@1 | 1571 | // __ rep_movl(); |
aoqi@1 | 1572 | if (is_oop) { |
aoqi@1 | 1573 | __ bind(l_stchk); |
aoqi@1 | 1574 | // __ movl(edi, Address(esp, 8+ 8)); |
aoqi@1 | 1575 | __ move(T0, A1); |
aoqi@1 | 1576 | // __ movl(ecx, Address(esp, 8+ 12)); |
aoqi@1 | 1577 | __ move(T1, A2); |
aoqi@1 | 1578 | array_store_check(); |
aoqi@1 | 1579 | } |
aoqi@1 | 1580 | // __ cld(); |
aoqi@1 | 1581 | // __ popl(edi); |
aoqi@1 | 1582 | // __ popl(esi); |
aoqi@1 | 1583 | // __ ret(0); |
aoqi@1 | 1584 | __ pop(T8); |
aoqi@1 | 1585 | __ pop(T1); |
aoqi@1 | 1586 | __ pop(T0); |
aoqi@1 | 1587 | __ pop(T3); |
aoqi@1 | 1588 | __ jr(RA); |
aoqi@1 | 1589 | __ delayed()->nop(); |
aoqi@1 | 1590 | return start; |
aoqi@1 | 1591 | } |
aoqi@1 | 1592 | #if 0 |
aoqi@1 | 1593 | // Arguments: |
aoqi@1 | 1594 | // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes |
aoqi@1 | 1595 | // ignored |
aoqi@1 | 1596 | // is_oop - true => oop array, so generate store check code |
aoqi@1 | 1597 | // name - stub name string |
aoqi@1 | 1598 | // |
aoqi@1 | 1599 | // Inputs: |
aoqi@1 | 1600 | // c_rarg0 - source array address |
aoqi@1 | 1601 | // c_rarg1 - destination array address |
aoqi@1 | 1602 | // c_rarg2 - element count, treated as ssize_t, can be zero |
aoqi@1 | 1603 | // |
aoqi@1 | 1604 | address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, const char *name) { |
aoqi@1 | 1605 | __ align(CodeEntryAlignment); |
aoqi@1 | 1606 | StubCodeMark mark(this, "StubRoutines", name); |
aoqi@1 | 1607 | address start = __ pc(); |
aoqi@1 | 1608 | |
aoqi@1 | 1609 | Label L_copy_32_bytes, L_copy_8_bytes, L_exit; |
aoqi@1 | 1610 | const Register from = rdi; // source array address |
aoqi@1 | 1611 | const Register to = rsi; // destination array address |
aoqi@1 | 1612 | const Register qword_count = rdx; // elements count |
aoqi@1 | 1613 | const Register saved_count = rcx; |
aoqi@1 | 1614 | |
aoqi@1 | 1615 | __ enter(); // required for proper stackwalking of RuntimeStub frame |
aoqi@1 | 1616 | assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int. |
aoqi@1 | 1617 | |
aoqi@1 | 1618 | address disjoint_copy_entry = NULL; |
aoqi@1 | 1619 | if (is_oop) { |
aoqi@1 | 1620 | assert(!UseCompressedOops, "shouldn't be called for compressed oops"); |
aoqi@1 | 1621 | disjoint_copy_entry = disjoint_oop_copy_entry; |
aoqi@1 | 1622 | oop_copy_entry = __ pc(); |
aoqi@1 | 1623 | array_overlap_test(disjoint_oop_copy_entry, Address::times_8); |
aoqi@1 | 1624 | } else { |
aoqi@1 | 1625 | disjoint_copy_entry = disjoint_long_copy_entry; |
aoqi@1 | 1626 | long_copy_entry = __ pc(); |
aoqi@1 | 1627 | array_overlap_test(disjoint_long_copy_entry, Address::times_8); |
aoqi@1 | 1628 | } |
aoqi@1 | 1629 | BLOCK_COMMENT("Entry:"); |
aoqi@1 | 1630 | // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) |
aoqi@1 | 1631 | |
aoqi@1 | 1632 | array_overlap_test(disjoint_copy_entry, Address::times_8); |
aoqi@1 | 1633 | setup_arg_regs(); // from => rdi, to => rsi, count => rdx |
aoqi@1 | 1634 | // r9 and r10 may be used to save non-volatile registers |
aoqi@1 | 1635 | |
aoqi@1 | 1636 | // 'from', 'to' and 'qword_count' are now valid |
aoqi@1 | 1637 | |
aoqi@1 | 1638 | if (is_oop) { |
aoqi@1 | 1639 | // Save to and count for store barrier |
aoqi@1 | 1640 | __ movptr(saved_count, qword_count); |
aoqi@1 | 1641 | // No registers are destroyed by this call |
aoqi@1 | 1642 | gen_write_ref_array_pre_barrier(to, saved_count); |
aoqi@1 | 1643 | } |
aoqi@1 | 1644 | |
aoqi@1 | 1645 | __ jmp(L_copy_32_bytes); |
aoqi@1 | 1646 | |
aoqi@1 | 1647 | // Copy trailing qwords |
aoqi@1 | 1648 | __ BIND(L_copy_8_bytes); |
aoqi@1 | 1649 | __ movq(rax, Address(from, qword_count, Address::times_8, -8)); |
aoqi@1 | 1650 | __ movq(Address(to, qword_count, Address::times_8, -8), rax); |
aoqi@1 | 1651 | __ decrement(qword_count); |
aoqi@1 | 1652 | __ jcc(Assembler::notZero, L_copy_8_bytes); |
aoqi@1 | 1653 | |
aoqi@1 | 1654 | if (is_oop) { |
aoqi@1 | 1655 | __ jmp(L_exit); |
aoqi@1 | 1656 | } else { |
aoqi@1 | 1657 | inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); |
aoqi@1 | 1658 | restore_arg_regs(); |
aoqi@1 | 1659 | __ xorptr(rax, rax); // return 0 |
aoqi@1 | 1660 | __ leave(); // required for proper stackwalking of RuntimeStub frame |
aoqi@1 | 1661 | __ ret(0); |
aoqi@1 | 1662 | } |
aoqi@1 | 1663 | |
aoqi@1 | 1664 | // Copy in 32-bytes chunks |
aoqi@1 | 1665 | copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes); |
aoqi@1 | 1666 | |
aoqi@1 | 1667 | if (is_oop) { |
aoqi@1 | 1668 | __ BIND(L_exit); |
aoqi@1 | 1669 | __ lea(rcx, Address(to, saved_count, Address::times_8, -8)); |
aoqi@1 | 1670 | gen_write_ref_array_post_barrier(to, rcx, rax); |
aoqi@1 | 1671 | inc_counter_np(SharedRuntime::_oop_array_copy_ctr); |
aoqi@1 | 1672 | } else { |
aoqi@1 | 1673 | inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); |
aoqi@1 | 1674 | } |
aoqi@1 | 1675 | restore_arg_regs(); |
aoqi@1 | 1676 | __ xorptr(rax, rax); // return 0 |
aoqi@1 | 1677 | __ leave(); // required for proper stackwalking of RuntimeStub frame |
aoqi@1 | 1678 | __ ret(0); |
aoqi@1 | 1679 | |
aoqi@1 | 1680 | return start; |
aoqi@1 | 1681 | } |
aoqi@1 | 1682 | |
aoqi@1 | 1683 | |
aoqi@1 | 1684 | // Helper for generating a dynamic type check. |
aoqi@1 | 1685 | // Smashes no registers. |
aoqi@1 | 1686 | void generate_type_check(Register sub_klass, |
aoqi@1 | 1687 | Register super_check_offset, |
aoqi@1 | 1688 | Register super_klass, |
aoqi@1 | 1689 | Label& L_success) { |
aoqi@1 | 1690 | assert_different_registers(sub_klass, super_check_offset, super_klass); |
aoqi@1 | 1691 | |
aoqi@1 | 1692 | BLOCK_COMMENT("type_check:"); |
aoqi@1 | 1693 | |
aoqi@1 | 1694 | Label L_miss; |
aoqi@1 | 1695 | |
aoqi@1 | 1696 | // a couple of useful fields in sub_klass: |
aoqi@1 | 1697 | int ss_offset = (klassOopDesc::header_size() * HeapWordSize + |
aoqi@1 | 1698 | Klass::secondary_supers_offset_in_bytes()); |
aoqi@1 | 1699 | int sc_offset = (klassOopDesc::header_size() * HeapWordSize + |
aoqi@1 | 1700 | Klass::secondary_super_cache_offset_in_bytes()); |
aoqi@1 | 1701 | Address secondary_supers_addr(sub_klass, ss_offset); |
aoqi@1 | 1702 | Address super_cache_addr( sub_klass, sc_offset); |
aoqi@1 | 1703 | |
aoqi@1 | 1704 | // if the pointers are equal, we are done (e.g., String[] elements) |
aoqi@1 | 1705 | __ cmpptr(super_klass, sub_klass); |
aoqi@1 | 1706 | __ jcc(Assembler::equal, L_success); |
aoqi@1 | 1707 | |
aoqi@1 | 1708 | // check the supertype display: |
aoqi@1 | 1709 | Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0); |
aoqi@1 | 1710 | __ cmpptr(super_klass, super_check_addr); // test the super type |
aoqi@1 | 1711 | __ jcc(Assembler::equal, L_success); |
aoqi@1 | 1712 | |
aoqi@1 | 1713 | // if it was a primary super, we can just fail immediately |
aoqi@1 | 1714 | __ cmpl(super_check_offset, sc_offset); |
aoqi@1 | 1715 | __ jcc(Assembler::notEqual, L_miss); |
aoqi@1 | 1716 | |
aoqi@1 | 1717 | // Now do a linear scan of the secondary super-klass chain. |
aoqi@1 | 1718 | // The repne_scan instruction uses fixed registers, which we must spill. |
aoqi@1 | 1719 | // (We need a couple more temps in any case.) |
aoqi@1 | 1720 | // This code is rarely used, so simplicity is a virtue here. |
aoqi@1 | 1721 | inc_counter_np(SharedRuntime::_partial_subtype_ctr); |
aoqi@1 | 1722 | { |
aoqi@1 | 1723 | __ push(rax); |
aoqi@1 | 1724 | __ push(rcx); |
aoqi@1 | 1725 | __ push(rdi); |
aoqi@1 | 1726 | assert_different_registers(sub_klass, super_klass, rax, rcx, rdi); |
aoqi@1 | 1727 | |
aoqi@1 | 1728 | __ movptr(rdi, secondary_supers_addr); |
aoqi@1 | 1729 | // Load the array length. |
aoqi@1 | 1730 | __ movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes())); |
aoqi@1 | 1731 | // Skip to start of data. |
aoqi@1 | 1732 | __ addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); |
aoqi@1 | 1733 | // Scan rcx words at [rdi] for occurance of rax |
aoqi@1 | 1734 | // Set NZ/Z based on last compare |
aoqi@1 | 1735 | __ movptr(rax, super_klass); |
aoqi@1 | 1736 | if (UseCompressedOops) { |
aoqi@1 | 1737 | // Compare against compressed form. Don't need to uncompress because |
aoqi@1 | 1738 | // looks like orig rax is restored in popq below. |
aoqi@1 | 1739 | __ encode_heap_oop(rax); |
aoqi@1 | 1740 | __ repne_scanl(); |
aoqi@1 | 1741 | } else { |
aoqi@1 | 1742 | __ repne_scan(); |
aoqi@1 | 1743 | } |
aoqi@1 | 1744 | |
aoqi@1 | 1745 | // Unspill the temp. registers: |
aoqi@1 | 1746 | __ pop(rdi); |
aoqi@1 | 1747 | __ pop(rcx); |
aoqi@1 | 1748 | __ pop(rax); |
aoqi@1 | 1749 | |
aoqi@1 | 1750 | __ jcc(Assembler::notEqual, L_miss); |
aoqi@1 | 1751 | } |
aoqi@1 | 1752 | |
aoqi@1 | 1753 | // Success. Cache the super we found and proceed in triumph. |
aoqi@1 | 1754 | __ movptr(super_cache_addr, super_klass); // note: rax is dead |
aoqi@1 | 1755 | __ jmp(L_success); |
aoqi@1 | 1756 | |
aoqi@1 | 1757 | // Fall through on failure! |
aoqi@1 | 1758 | __ BIND(L_miss); |
aoqi@1 | 1759 | } |
aoqi@1 | 1760 | |
aoqi@1 | 1761 | // |
aoqi@1 | 1762 | // Generate checkcasting array copy stub |
aoqi@1 | 1763 | // |
aoqi@1 | 1764 | // Input: |
aoqi@1 | 1765 | // c_rarg0 - source array address |
aoqi@1 | 1766 | // c_rarg1 - destination array address |
aoqi@1 | 1767 | // c_rarg2 - element count, treated as ssize_t, can be zero |
aoqi@1 | 1768 | // c_rarg3 - size_t ckoff (super_check_offset) |
aoqi@1 | 1769 | // not Win64 |
aoqi@1 | 1770 | // c_rarg4 - oop ckval (super_klass) |
aoqi@1 | 1771 | // Win64 |
aoqi@1 | 1772 | // rsp+40 - oop ckval (super_klass) |
aoqi@1 | 1773 | // |
aoqi@1 | 1774 | // Output: |
aoqi@1 | 1775 | // rax == 0 - success |
aoqi@1 | 1776 | // rax == -1^K - failure, where K is partial transfer count |
aoqi@1 | 1777 | // |
aoqi@1 | 1778 | address generate_checkcast_copy(const char *name) { |
aoqi@1 | 1779 | |
aoqi@1 | 1780 | Label L_load_element, L_store_element, L_do_card_marks, L_done; |
aoqi@1 | 1781 | |
aoqi@1 | 1782 | // Input registers (after setup_arg_regs) |
aoqi@1 | 1783 | const Register from = rdi; // source array address |
aoqi@1 | 1784 | const Register to = rsi; // destination array address |
aoqi@1 | 1785 | const Register length = rdx; // elements count |
aoqi@1 | 1786 | const Register ckoff = rcx; // super_check_offset |
aoqi@1 | 1787 | const Register ckval = r8; // super_klass |
aoqi@1 | 1788 | |
aoqi@1 | 1789 | // Registers used as temps (r13, r14 are save-on-entry) |
aoqi@1 | 1790 | const Register end_from = from; // source array end address |
aoqi@1 | 1791 | const Register end_to = r13; // destination array end address |
aoqi@1 | 1792 | const Register count = rdx; // -(count_remaining) |
aoqi@1 | 1793 | const Register r14_length = r14; // saved copy of length |
aoqi@1 | 1794 | // End pointers are inclusive, and if length is not zero they point |
aoqi@1 | 1795 | // to the last unit copied: end_to[0] := end_from[0] |
aoqi@1 | 1796 | |
aoqi@1 | 1797 | const Register rax_oop = rax; // actual oop copied |
aoqi@1 | 1798 | const Register r11_klass = r11; // oop._klass |
aoqi@1 | 1799 | |
aoqi@1 | 1800 | //--------------------------------------------------------------- |
aoqi@1 | 1801 | // Assembler stub will be used for this call to arraycopy |
aoqi@1 | 1802 | // if the two arrays are subtypes of Object[] but the |
aoqi@1 | 1803 | // destination array type is not equal to or a supertype |
aoqi@1 | 1804 | // of the source type. Each element must be separately |
aoqi@1 | 1805 | // checked. |
aoqi@1 | 1806 | |
aoqi@1 | 1807 | __ align(CodeEntryAlignment); |
aoqi@1 | 1808 | StubCodeMark mark(this, "StubRoutines", name); |
aoqi@1 | 1809 | address start = __ pc(); |
aoqi@1 | 1810 | |
aoqi@1 | 1811 | __ enter(); // required for proper stackwalking of RuntimeStub frame |
aoqi@1 | 1812 | |
aoqi@1 | 1813 | checkcast_copy_entry = __ pc(); |
aoqi@1 | 1814 | BLOCK_COMMENT("Entry:"); |
aoqi@1 | 1815 | |
aoqi@1 | 1816 | #ifdef ASSERT |
aoqi@1 | 1817 | // caller guarantees that the arrays really are different |
aoqi@1 | 1818 | // otherwise, we would have to make conjoint checks |
aoqi@1 | 1819 | { Label L; |
aoqi@1 | 1820 | array_overlap_test(L, TIMES_OOP); |
aoqi@1 | 1821 | __ stop("checkcast_copy within a single array"); |
aoqi@1 | 1822 | __ bind(L); |
aoqi@1 | 1823 | } |
aoqi@1 | 1824 | #endif //ASSERT |
aoqi@1 | 1825 | |
aoqi@1 | 1826 | // allocate spill slots for r13, r14 |
aoqi@1 | 1827 | enum { |
aoqi@1 | 1828 | saved_r13_offset, |
aoqi@1 | 1829 | saved_r14_offset, |
aoqi@1 | 1830 | saved_rbp_offset, |
aoqi@1 | 1831 | saved_rip_offset, |
aoqi@1 | 1832 | saved_rarg0_offset |
aoqi@1 | 1833 | }; |
aoqi@1 | 1834 | __ subptr(rsp, saved_rbp_offset * wordSize); |
aoqi@1 | 1835 | __ movptr(Address(rsp, saved_r13_offset * wordSize), r13); |
aoqi@1 | 1836 | __ movptr(Address(rsp, saved_r14_offset * wordSize), r14); |
aoqi@1 | 1837 | setup_arg_regs(4); // from => rdi, to => rsi, length => rdx |
aoqi@1 | 1838 | // ckoff => rcx, ckval => r8 |
aoqi@1 | 1839 | // r9 and r10 may be used to save non-volatile registers |
aoqi@1 | 1840 | #ifdef _WIN64 |
aoqi@1 | 1841 | // last argument (#4) is on stack on Win64 |
aoqi@1 | 1842 | const int ckval_offset = saved_rarg0_offset + 4; |
aoqi@1 | 1843 | __ movptr(ckval, Address(rsp, ckval_offset * wordSize)); |
aoqi@1 | 1844 | #endif |
aoqi@1 | 1845 | |
aoqi@1 | 1846 | // check that int operands are properly extended to size_t |
aoqi@1 | 1847 | assert_clean_int(length, rax); |
aoqi@1 | 1848 | assert_clean_int(ckoff, rax); |
aoqi@1 | 1849 | |
aoqi@1 | 1850 | #ifdef ASSERT |
aoqi@1 | 1851 | BLOCK_COMMENT("assert consistent ckoff/ckval"); |
aoqi@1 | 1852 | // The ckoff and ckval must be mutually consistent, |
aoqi@1 | 1853 | // even though caller generates both. |
aoqi@1 | 1854 | { Label L; |
aoqi@1 | 1855 | int sco_offset = (klassOopDesc::header_size() * HeapWordSize + |
aoqi@1 | 1856 | Klass::super_check_offset_offset_in_bytes()); |
aoqi@1 | 1857 | __ cmpl(ckoff, Address(ckval, sco_offset)); |
aoqi@1 | 1858 | __ jcc(Assembler::equal, L); |
aoqi@1 | 1859 | __ stop("super_check_offset inconsistent"); |
aoqi@1 | 1860 | __ bind(L); |
aoqi@1 | 1861 | } |
aoqi@1 | 1862 | #endif //ASSERT |
aoqi@1 | 1863 | |
aoqi@1 | 1864 | // Loop-invariant addresses. They are exclusive end pointers. |
aoqi@1 | 1865 | Address end_from_addr(from, length, TIMES_OOP, 0); |
aoqi@1 | 1866 | Address end_to_addr(to, length, TIMES_OOP, 0); |
aoqi@1 | 1867 | // Loop-variant addresses. They assume post-incremented count < 0. |
aoqi@1 | 1868 | Address from_element_addr(end_from, count, TIMES_OOP, 0); |
aoqi@1 | 1869 | Address to_element_addr(end_to, count, TIMES_OOP, 0); |
aoqi@1 | 1870 | |
aoqi@1 | 1871 | gen_write_ref_array_pre_barrier(to, count); |
aoqi@1 | 1872 | |
aoqi@1 | 1873 | // Copy from low to high addresses, indexed from the end of each array. |
aoqi@1 | 1874 | __ lea(end_from, end_from_addr); |
aoqi@1 | 1875 | __ lea(end_to, end_to_addr); |
aoqi@1 | 1876 | __ movptr(r14_length, length); // save a copy of the length |
aoqi@1 | 1877 | assert(length == count, ""); // else fix next line: |
aoqi@1 | 1878 | __ negptr(count); // negate and test the length |
aoqi@1 | 1879 | __ jcc(Assembler::notZero, L_load_element); |
aoqi@1 | 1880 | |
aoqi@1 | 1881 | // Empty array: Nothing to do. |
aoqi@1 | 1882 | __ xorptr(rax, rax); // return 0 on (trivial) success |
aoqi@1 | 1883 | __ jmp(L_done); |
aoqi@1 | 1884 | |
aoqi@1 | 1885 | // ======== begin loop ======== |
aoqi@1 | 1886 | // (Loop is rotated; its entry is L_load_element.) |
aoqi@1 | 1887 | // Loop control: |
aoqi@1 | 1888 | // for (count = -count; count != 0; count++) |
aoqi@1 | 1889 | // Base pointers src, dst are biased by 8*(count-1),to last element. |
aoqi@1 | 1890 | __ align(16); |
aoqi@1 | 1891 | |
aoqi@1 | 1892 | __ BIND(L_store_element); |
aoqi@1 | 1893 | __ store_heap_oop(rax_oop, to_element_addr); // store the oop |
aoqi@1 | 1894 | __ increment(count); // increment the count toward zero |
aoqi@1 | 1895 | __ jcc(Assembler::zero, L_do_card_marks); |
aoqi@1 | 1896 | |
aoqi@1 | 1897 | // ======== loop entry is here ======== |
aoqi@1 | 1898 | __ BIND(L_load_element); |
aoqi@1 | 1899 | __ load_heap_oop(rax_oop, from_element_addr); // load the oop |
aoqi@1 | 1900 | __ testptr(rax_oop, rax_oop); |
aoqi@1 | 1901 | __ jcc(Assembler::zero, L_store_element); |
aoqi@1 | 1902 | |
aoqi@1 | 1903 | __ load_klass(r11_klass, rax_oop);// query the object klass |
aoqi@1 | 1904 | generate_type_check(r11_klass, ckoff, ckval, L_store_element); |
aoqi@1 | 1905 | // ======== end loop ======== |
aoqi@1 | 1906 | |
aoqi@1 | 1907 | // It was a real error; we must depend on the caller to finish the job. |
aoqi@1 | 1908 | // Register rdx = -1 * number of *remaining* oops, r14 = *total* oops. |
aoqi@1 | 1909 | // Emit GC store barriers for the oops we have copied (r14 + rdx), |
aoqi@1 | 1910 | // and report their number to the caller. |
aoqi@1 | 1911 | assert_different_registers(rax, r14_length, count, to, end_to, rcx); |
aoqi@1 | 1912 | __ lea(end_to, to_element_addr); |
aoqi@1 | 1913 | gen_write_ref_array_post_barrier(to, end_to, rscratch1); |
aoqi@1 | 1914 | __ movptr(rax, r14_length); // original oops |
aoqi@1 | 1915 | __ addptr(rax, count); // K = (original - remaining) oops |
aoqi@1 | 1916 | __ notptr(rax); // report (-1^K) to caller |
aoqi@1 | 1917 | __ jmp(L_done); |
aoqi@1 | 1918 | |
aoqi@1 | 1919 | // Come here on success only. |
aoqi@1 | 1920 | __ BIND(L_do_card_marks); |
aoqi@1 | 1921 | __ addptr(end_to, -wordSize); // make an inclusive end pointer |
aoqi@1 | 1922 | gen_write_ref_array_post_barrier(to, end_to, rscratch1); |
aoqi@1 | 1923 | __ xorptr(rax, rax); // return 0 on success |
aoqi@1 | 1924 | |
aoqi@1 | 1925 | // Common exit point (success or failure). |
aoqi@1 | 1926 | __ BIND(L_done); |
aoqi@1 | 1927 | __ movptr(r13, Address(rsp, saved_r13_offset * wordSize)); |
aoqi@1 | 1928 | __ movptr(r14, Address(rsp, saved_r14_offset * wordSize)); |
aoqi@1 | 1929 | inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr); |
aoqi@1 | 1930 | restore_arg_regs(); |
aoqi@1 | 1931 | __ leave(); // required for proper stackwalking of RuntimeStub frame |
aoqi@1 | 1932 | __ ret(0); |
aoqi@1 | 1933 | |
aoqi@1 | 1934 | return start; |
aoqi@1 | 1935 | } |
aoqi@1 | 1936 | |
aoqi@1 | 1937 | // |
aoqi@1 | 1938 | // Generate 'unsafe' array copy stub |
aoqi@1 | 1939 | // Though just as safe as the other stubs, it takes an unscaled |
aoqi@1 | 1940 | // size_t argument instead of an element count. |
aoqi@1 | 1941 | // |
aoqi@1 | 1942 | // Input: |
aoqi@1 | 1943 | // c_rarg0 - source array address |
aoqi@1 | 1944 | // c_rarg1 - destination array address |
aoqi@1 | 1945 | // c_rarg2 - byte count, treated as ssize_t, can be zero |
aoqi@1 | 1946 | // |
aoqi@1 | 1947 | // Examines the alignment of the operands and dispatches |
aoqi@1 | 1948 | // to a long, int, short, or byte copy loop. |
aoqi@1 | 1949 | // |
aoqi@1 | 1950 | address generate_unsafe_copy(const char *name) { |
aoqi@1 | 1951 | |
aoqi@1 | 1952 | Label L_long_aligned, L_int_aligned, L_short_aligned; |
aoqi@1 | 1953 | |
aoqi@1 | 1954 | // Input registers (before setup_arg_regs) |
aoqi@1 | 1955 | const Register from = c_rarg0; // source array address |
aoqi@1 | 1956 | const Register to = c_rarg1; // destination array address |
aoqi@1 | 1957 | const Register size = c_rarg2; // byte count (size_t) |
aoqi@1 | 1958 | |
aoqi@1 | 1959 | // Register used as a temp |
aoqi@1 | 1960 | const Register bits = rax; // test copy of low bits |
aoqi@1 | 1961 | |
aoqi@1 | 1962 | __ align(CodeEntryAlignment); |
aoqi@1 | 1963 | StubCodeMark mark(this, "StubRoutines", name); |
aoqi@1 | 1964 | address start = __ pc(); |
aoqi@1 | 1965 | |
aoqi@1 | 1966 | __ enter(); // required for proper stackwalking of RuntimeStub frame |
aoqi@1 | 1967 | |
aoqi@1 | 1968 | // bump this on entry, not on exit: |
aoqi@1 | 1969 | inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr); |
aoqi@1 | 1970 | |
aoqi@1 | 1971 | __ mov(bits, from); |
aoqi@1 | 1972 | __ orptr(bits, to); |
aoqi@1 | 1973 | __ orptr(bits, size); |
aoqi@1 | 1974 | |
aoqi@1 | 1975 | __ testb(bits, BytesPerLong-1); |
aoqi@1 | 1976 | __ jccb(Assembler::zero, L_long_aligned); |
aoqi@1 | 1977 | |
aoqi@1 | 1978 | __ testb(bits, BytesPerInt-1); |
aoqi@1 | 1979 | __ jccb(Assembler::zero, L_int_aligned); |
aoqi@1 | 1980 | |
aoqi@1 | 1981 | __ testb(bits, BytesPerShort-1); |
aoqi@1 | 1982 | __ jump_cc(Assembler::notZero, RuntimeAddress(byte_copy_entry)); |
aoqi@1 | 1983 | |
aoqi@1 | 1984 | __ BIND(L_short_aligned); |
aoqi@1 | 1985 | __ shrptr(size, LogBytesPerShort); // size => short_count |
aoqi@1 | 1986 | __ jump(RuntimeAddress(short_copy_entry)); |
aoqi@1 | 1987 | |
aoqi@1 | 1988 | __ BIND(L_int_aligned); |
aoqi@1 | 1989 | __ shrptr(size, LogBytesPerInt); // size => int_count |
aoqi@1 | 1990 | __ jump(RuntimeAddress(int_copy_entry)); |
aoqi@1 | 1991 | |
aoqi@1 | 1992 | __ BIND(L_long_aligned); |
aoqi@1 | 1993 | __ shrptr(size, LogBytesPerLong); // size => qword_count |
aoqi@1 | 1994 | __ jump(RuntimeAddress(long_copy_entry)); |
aoqi@1 | 1995 | |
aoqi@1 | 1996 | return start; |
aoqi@1 | 1997 | } |
aoqi@1 | 1998 | |
aoqi@1 | 1999 | // Perform range checks on the proposed arraycopy. |
aoqi@1 | 2000 | // Kills temp, but nothing else. |
aoqi@1 | 2001 | // Also, clean the sign bits of src_pos and dst_pos. |
aoqi@1 | 2002 | void arraycopy_range_checks(Register src, // source array oop (c_rarg0) |
aoqi@1 | 2003 | Register src_pos, // source position (c_rarg1) |
aoqi@1 | 2004 | Register dst, // destination array oo (c_rarg2) |
aoqi@1 | 2005 | Register dst_pos, // destination position (c_rarg3) |
aoqi@1 | 2006 | Register length, |
aoqi@1 | 2007 | Register temp, |
aoqi@1 | 2008 | Label& L_failed) { |
aoqi@1 | 2009 | BLOCK_COMMENT("arraycopy_range_checks:"); |
aoqi@1 | 2010 | |
aoqi@1 | 2011 | // if (src_pos + length > arrayOop(src)->length()) FAIL; |
aoqi@1 | 2012 | __ movl(temp, length); |
aoqi@1 | 2013 | __ addl(temp, src_pos); // src_pos + length |
aoqi@1 | 2014 | __ cmpl(temp, Address(src, arrayOopDesc::length_offset_in_bytes())); |
aoqi@1 | 2015 | __ jcc(Assembler::above, L_failed); |
aoqi@1 | 2016 | |
aoqi@1 | 2017 | // if (dst_pos + length > arrayOop(dst)->length()) FAIL; |
aoqi@1 | 2018 | __ movl(temp, length); |
aoqi@1 | 2019 | __ addl(temp, dst_pos); // dst_pos + length |
aoqi@1 | 2020 | __ cmpl(temp, Address(dst, arrayOopDesc::length_offset_in_bytes())); |
aoqi@1 | 2021 | __ jcc(Assembler::above, L_failed); |
aoqi@1 | 2022 | |
aoqi@1 | 2023 | // Have to clean up high 32-bits of 'src_pos' and 'dst_pos'. |
aoqi@1 | 2024 | // Move with sign extension can be used since they are positive. |
aoqi@1 | 2025 | __ movslq(src_pos, src_pos); |
aoqi@1 | 2026 | __ movslq(dst_pos, dst_pos); |
aoqi@1 | 2027 | |
aoqi@1 | 2028 | BLOCK_COMMENT("arraycopy_range_checks done"); |
aoqi@1 | 2029 | } |
aoqi@1 | 2030 | |
aoqi@1 | 2031 | // |
aoqi@1 | 2032 | // Generate generic array copy stubs |
aoqi@1 | 2033 | // |
aoqi@1 | 2034 | // Input: |
aoqi@1 | 2035 | // c_rarg0 - src oop |
aoqi@1 | 2036 | // c_rarg1 - src_pos (32-bits) |
aoqi@1 | 2037 | // c_rarg2 - dst oop |
aoqi@1 | 2038 | // c_rarg3 - dst_pos (32-bits) |
aoqi@1 | 2039 | // not Win64 |
aoqi@1 | 2040 | // c_rarg4 - element count (32-bits) |
aoqi@1 | 2041 | // Win64 |
aoqi@1 | 2042 | // rsp+40 - element count (32-bits) |
aoqi@1 | 2043 | // |
aoqi@1 | 2044 | // Output: |
aoqi@1 | 2045 | // rax == 0 - success |
aoqi@1 | 2046 | // rax == -1^K - failure, where K is partial transfer count |
aoqi@1 | 2047 | // |
aoqi@1 | 2048 | address generate_generic_copy(const char *name) { |
aoqi@1 | 2049 | |
aoqi@1 | 2050 | Label L_failed, L_failed_0, L_objArray; |
aoqi@1 | 2051 | Label L_copy_bytes, L_copy_shorts, L_copy_ints, L_copy_longs; |
aoqi@1 | 2052 | |
aoqi@1 | 2053 | // Input registers |
aoqi@1 | 2054 | const Register src = c_rarg0; // source array oop |
aoqi@1 | 2055 | const Register src_pos = c_rarg1; // source position |
aoqi@1 | 2056 | const Register dst = c_rarg2; // destination array oop |
aoqi@1 | 2057 | const Register dst_pos = c_rarg3; // destination position |
aoqi@1 | 2058 | // elements count is on stack on Win64 |
aoqi@1 | 2059 | #ifdef _WIN64 |
aoqi@1 | 2060 | #define C_RARG4 Address(rsp, 6 * wordSize) |
aoqi@1 | 2061 | #else |
aoqi@1 | 2062 | #define C_RARG4 c_rarg4 |
aoqi@1 | 2063 | #endif |
aoqi@1 | 2064 | |
aoqi@1 | 2065 | { int modulus = CodeEntryAlignment; |
aoqi@1 | 2066 | int target = modulus - 5; // 5 = sizeof jmp(L_failed) |
aoqi@1 | 2067 | int advance = target - (__ offset() % modulus); |
aoqi@1 | 2068 | if (advance < 0) advance += modulus; |
aoqi@1 | 2069 | if (advance > 0) __ nop(advance); |
aoqi@1 | 2070 | } |
aoqi@1 | 2071 | StubCodeMark mark(this, "StubRoutines", name); |
aoqi@1 | 2072 | |
aoqi@1 | 2073 | // Short-hop target to L_failed. Makes for denser prologue code. |
aoqi@1 | 2074 | __ BIND(L_failed_0); |
aoqi@1 | 2075 | __ jmp(L_failed); |
aoqi@1 | 2076 | assert(__ offset() % CodeEntryAlignment == 0, "no further alignment needed"); |
aoqi@1 | 2077 | |
aoqi@1 | 2078 | __ align(CodeEntryAlignment); |
aoqi@1 | 2079 | address start = __ pc(); |
aoqi@1 | 2080 | |
aoqi@1 | 2081 | __ enter(); // required for proper stackwalking of RuntimeStub frame |
aoqi@1 | 2082 | |
aoqi@1 | 2083 | // bump this on entry, not on exit: |
aoqi@1 | 2084 | inc_counter_np(SharedRuntime::_generic_array_copy_ctr); |
aoqi@1 | 2085 | |
aoqi@1 | 2086 | //----------------------------------------------------------------------- |
aoqi@1 | 2087 | // Assembler stub will be used for this call to arraycopy |
aoqi@1 | 2088 | // if the following conditions are met: |
aoqi@1 | 2089 | // |
aoqi@1 | 2090 | // (1) src and dst must not be null. |
aoqi@1 | 2091 | // (2) src_pos must not be negative. |
aoqi@1 | 2092 | // (3) dst_pos must not be negative. |
aoqi@1 | 2093 | // (4) length must not be negative. |
aoqi@1 | 2094 | // (5) src klass and dst klass should be the same and not NULL. |
aoqi@1 | 2095 | // (6) src and dst should be arrays. |
aoqi@1 | 2096 | // (7) src_pos + length must not exceed length of src. |
aoqi@1 | 2097 | // (8) dst_pos + length must not exceed length of dst. |
aoqi@1 | 2098 | // |
aoqi@1 | 2099 | |
aoqi@1 | 2100 | // if (src == NULL) return -1; |
aoqi@1 | 2101 | __ testptr(src, src); // src oop |
aoqi@1 | 2102 | size_t j1off = __ offset(); |
aoqi@1 | 2103 | __ jccb(Assembler::zero, L_failed_0); |
aoqi@1 | 2104 | |
aoqi@1 | 2105 | // if (src_pos < 0) return -1; |
aoqi@1 | 2106 | __ testl(src_pos, src_pos); // src_pos (32-bits) |
aoqi@1 | 2107 | __ jccb(Assembler::negative, L_failed_0); |
aoqi@1 | 2108 | |
aoqi@1 | 2109 | // if (dst == NULL) return -1; |
aoqi@1 | 2110 | __ testptr(dst, dst); // dst oop |
aoqi@1 | 2111 | __ jccb(Assembler::zero, L_failed_0); |
aoqi@1 | 2112 | |
aoqi@1 | 2113 | // if (dst_pos < 0) return -1; |
aoqi@1 | 2114 | __ testl(dst_pos, dst_pos); // dst_pos (32-bits) |
aoqi@1 | 2115 | size_t j4off = __ offset(); |
aoqi@1 | 2116 | __ jccb(Assembler::negative, L_failed_0); |
aoqi@1 | 2117 | |
aoqi@1 | 2118 | // The first four tests are very dense code, |
aoqi@1 | 2119 | // but not quite dense enough to put four |
aoqi@1 | 2120 | // jumps in a 16-byte instruction fetch buffer. |
aoqi@1 | 2121 | // That's good, because some branch predicters |
aoqi@1 | 2122 | // do not like jumps so close together. |
aoqi@1 | 2123 | // Make sure of this. |
aoqi@1 | 2124 | guarantee(((j1off ^ j4off) & ~15) != 0, "I$ line of 1st & 4th jumps"); |
aoqi@1 | 2125 | |
aoqi@1 | 2126 | // registers used as temp |
aoqi@1 | 2127 | const Register r11_length = r11; // elements count to copy |
aoqi@1 | 2128 | const Register r10_src_klass = r10; // array klass |
aoqi@1 | 2129 | const Register r9_dst_klass = r9; // dest array klass |
aoqi@1 | 2130 | |
aoqi@1 | 2131 | // if (length < 0) return -1; |
aoqi@1 | 2132 | __ movl(r11_length, C_RARG4); // length (elements count, 32-bits value) |
aoqi@1 | 2133 | __ testl(r11_length, r11_length); |
aoqi@1 | 2134 | __ jccb(Assembler::negative, L_failed_0); |
aoqi@1 | 2135 | |
aoqi@1 | 2136 | __ load_klass(r10_src_klass, src); |
aoqi@1 | 2137 | #ifdef ASSERT |
aoqi@1 | 2138 | // assert(src->klass() != NULL); |
aoqi@1 | 2139 | BLOCK_COMMENT("assert klasses not null"); |
aoqi@1 | 2140 | { Label L1, L2; |
aoqi@1 | 2141 | __ testptr(r10_src_klass, r10_src_klass); |
aoqi@1 | 2142 | __ jcc(Assembler::notZero, L2); // it is broken if klass is NULL |
aoqi@1 | 2143 | __ bind(L1); |
aoqi@1 | 2144 | __ stop("broken null klass"); |
aoqi@1 | 2145 | __ bind(L2); |
aoqi@1 | 2146 | __ load_klass(r9_dst_klass, dst); |
aoqi@1 | 2147 | __ cmpq(r9_dst_klass, 0); |
aoqi@1 | 2148 | __ jcc(Assembler::equal, L1); // this would be broken also |
aoqi@1 | 2149 | BLOCK_COMMENT("assert done"); |
aoqi@1 | 2150 | } |
aoqi@1 | 2151 | #endif |
aoqi@1 | 2152 | |
aoqi@1 | 2153 | // Load layout helper (32-bits) |
aoqi@1 | 2154 | // |
aoqi@1 | 2155 | // |array_tag| | header_size | element_type | |log2_element_size| |
aoqi@1 | 2156 | // 32 30 24 16 8 2 0 |
aoqi@1 | 2157 | // |
aoqi@1 | 2158 | // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0 |
aoqi@1 | 2159 | // |
aoqi@1 | 2160 | |
aoqi@1 | 2161 | int lh_offset = klassOopDesc::header_size() * HeapWordSize + |
aoqi@1 | 2162 | Klass::layout_helper_offset_in_bytes(); |
aoqi@1 | 2163 | |
aoqi@1 | 2164 | const Register rax_lh = rax; // layout helper |
aoqi@1 | 2165 | |
aoqi@1 | 2166 | __ movl(rax_lh, Address(r10_src_klass, lh_offset)); |
aoqi@1 | 2167 | |
aoqi@1 | 2168 | // Handle objArrays completely differently... |
aoqi@1 | 2169 | jint objArray_lh = Klass::array_layout_helper(T_OBJECT); |
aoqi@1 | 2170 | __ cmpl(rax_lh, objArray_lh); |
aoqi@1 | 2171 | __ jcc(Assembler::equal, L_objArray); |
aoqi@1 | 2172 | |
aoqi@1 | 2173 | // if (src->klass() != dst->klass()) return -1; |
aoqi@1 | 2174 | __ load_klass(r9_dst_klass, dst); |
aoqi@1 | 2175 | __ cmpq(r10_src_klass, r9_dst_klass); |
aoqi@1 | 2176 | __ jcc(Assembler::notEqual, L_failed); |
aoqi@1 | 2177 | |
aoqi@1 | 2178 | // if (!src->is_Array()) return -1; |
aoqi@1 | 2179 | __ cmpl(rax_lh, Klass::_lh_neutral_value); |
aoqi@1 | 2180 | __ jcc(Assembler::greaterEqual, L_failed); |
aoqi@1 | 2181 | |
aoqi@1 | 2182 | // At this point, it is known to be a typeArray (array_tag 0x3). |
aoqi@1 | 2183 | #ifdef ASSERT |
aoqi@1 | 2184 | { Label L; |
aoqi@1 | 2185 | __ cmpl(rax_lh, (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift)); |
aoqi@1 | 2186 | __ jcc(Assembler::greaterEqual, L); |
aoqi@1 | 2187 | __ stop("must be a primitive array"); |
aoqi@1 | 2188 | __ bind(L); |
aoqi@1 | 2189 | } |
aoqi@1 | 2190 | #endif |
aoqi@1 | 2191 | |
aoqi@1 | 2192 | arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length, |
aoqi@1 | 2193 | r10, L_failed); |
aoqi@1 | 2194 | |
aoqi@1 | 2195 | // typeArrayKlass |
aoqi@1 | 2196 | // |
aoqi@1 | 2197 | // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize); |
aoqi@1 | 2198 | // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize); |
aoqi@1 | 2199 | // |
aoqi@1 | 2200 | |
aoqi@1 | 2201 | const Register r10_offset = r10; // array offset |
aoqi@1 | 2202 | const Register rax_elsize = rax_lh; // element size |
aoqi@1 | 2203 | |
aoqi@1 | 2204 | __ movl(r10_offset, rax_lh); |
aoqi@1 | 2205 | __ shrl(r10_offset, Klass::_lh_header_size_shift); |
aoqi@1 | 2206 | __ andptr(r10_offset, Klass::_lh_header_size_mask); // array_offset |
aoqi@1 | 2207 | __ addptr(src, r10_offset); // src array offset |
aoqi@1 | 2208 | __ addptr(dst, r10_offset); // dst array offset |
aoqi@1 | 2209 | BLOCK_COMMENT("choose copy loop based on element size"); |
aoqi@1 | 2210 | __ andl(rax_lh, Klass::_lh_log2_element_size_mask); // rax_lh -> rax_elsize |
aoqi@1 | 2211 | |
aoqi@1 | 2212 | // next registers should be set before the jump to corresponding stub |
aoqi@1 | 2213 | const Register from = c_rarg0; // source array address |
aoqi@1 | 2214 | const Register to = c_rarg1; // destination array address |
aoqi@1 | 2215 | const Register count = c_rarg2; // elements count |
aoqi@1 | 2216 | |
aoqi@1 | 2217 | // 'from', 'to', 'count' registers should be set in such order |
aoqi@1 | 2218 | // since they are the same as 'src', 'src_pos', 'dst'. |
aoqi@1 | 2219 | |
aoqi@1 | 2220 | __ BIND(L_copy_bytes); |
aoqi@1 | 2221 | __ cmpl(rax_elsize, 0); |
aoqi@1 | 2222 | __ jccb(Assembler::notEqual, L_copy_shorts); |
aoqi@1 | 2223 | __ lea(from, Address(src, src_pos, Address::times_1, 0));// src_addr |
aoqi@1 | 2224 | __ lea(to, Address(dst, dst_pos, Address::times_1, 0));// dst_addr |
aoqi@1 | 2225 | __ movl2ptr(count, r11_length); // length |
aoqi@1 | 2226 | __ jump(RuntimeAddress(byte_copy_entry)); |
aoqi@1 | 2227 | |
aoqi@1 | 2228 | __ BIND(L_copy_shorts); |
aoqi@1 | 2229 | __ cmpl(rax_elsize, LogBytesPerShort); |
aoqi@1 | 2230 | __ jccb(Assembler::notEqual, L_copy_ints); |
aoqi@1 | 2231 | __ lea(from, Address(src, src_pos, Address::times_2, 0));// src_addr |
aoqi@1 | 2232 | __ lea(to, Address(dst, dst_pos, Address::times_2, 0));// dst_addr |
aoqi@1 | 2233 | __ movl2ptr(count, r11_length); // length |
aoqi@1 | 2234 | __ jump(RuntimeAddress(short_copy_entry)); |
aoqi@1 | 2235 | |
aoqi@1 | 2236 | __ BIND(L_copy_ints); |
aoqi@1 | 2237 | __ cmpl(rax_elsize, LogBytesPerInt); |
aoqi@1 | 2238 | __ jccb(Assembler::notEqual, L_copy_longs); |
aoqi@1 | 2239 | __ lea(from, Address(src, src_pos, Address::times_4, 0));// src_addr |
aoqi@1 | 2240 | __ lea(to, Address(dst, dst_pos, Address::times_4, 0));// dst_addr |
aoqi@1 | 2241 | __ movl2ptr(count, r11_length); // length |
aoqi@1 | 2242 | __ jump(RuntimeAddress(int_copy_entry)); |
aoqi@1 | 2243 | |
aoqi@1 | 2244 | __ BIND(L_copy_longs); |
aoqi@1 | 2245 | #ifdef ASSERT |
aoqi@1 | 2246 | { Label L; |
aoqi@1 | 2247 | __ cmpl(rax_elsize, LogBytesPerLong); |
aoqi@1 | 2248 | __ jcc(Assembler::equal, L); |
aoqi@1 | 2249 | __ stop("must be long copy, but elsize is wrong"); |
aoqi@1 | 2250 | __ bind(L); |
aoqi@1 | 2251 | } |
aoqi@1 | 2252 | #endif |
aoqi@1 | 2253 | __ lea(from, Address(src, src_pos, Address::times_8, 0));// src_addr |
aoqi@1 | 2254 | __ lea(to, Address(dst, dst_pos, Address::times_8, 0));// dst_addr |
aoqi@1 | 2255 | __ movl2ptr(count, r11_length); // length |
aoqi@1 | 2256 | __ jump(RuntimeAddress(long_copy_entry)); |
aoqi@1 | 2257 | |
aoqi@1 | 2258 | // objArrayKlass |
aoqi@1 | 2259 | __ BIND(L_objArray); |
aoqi@1 | 2260 | // live at this point: r10_src_klass, src[_pos], dst[_pos] |
aoqi@1 | 2261 | |
aoqi@1 | 2262 | Label L_plain_copy, L_checkcast_copy; |
aoqi@1 | 2263 | // test array classes for subtyping |
aoqi@1 | 2264 | __ load_klass(r9_dst_klass, dst); |
aoqi@1 | 2265 | __ cmpq(r10_src_klass, r9_dst_klass); // usual case is exact equality |
aoqi@1 | 2266 | __ jcc(Assembler::notEqual, L_checkcast_copy); |
aoqi@1 | 2267 | |
aoqi@1 | 2268 | // Identically typed arrays can be copied without element-wise checks. |
aoqi@1 | 2269 | arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length, |
aoqi@1 | 2270 | r10, L_failed); |
aoqi@1 | 2271 | |
aoqi@1 | 2272 | __ lea(from, Address(src, src_pos, TIMES_OOP, |
aoqi@1 | 2273 | arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // src_addr |
aoqi@1 | 2274 | __ lea(to, Address(dst, dst_pos, TIMES_OOP, |
aoqi@1 | 2275 | arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // dst_addr |
aoqi@1 | 2276 | __ movl2ptr(count, r11_length); // length |
aoqi@1 | 2277 | __ BIND(L_plain_copy); |
aoqi@1 | 2278 | __ jump(RuntimeAddress(oop_copy_entry)); |
aoqi@1 | 2279 | |
aoqi@1 | 2280 | __ BIND(L_checkcast_copy); |
aoqi@1 | 2281 | // live at this point: r10_src_klass, !r11_length |
aoqi@1 | 2282 | { |
aoqi@1 | 2283 | // assert(r11_length == C_RARG4); // will reload from here |
aoqi@1 | 2284 | Register r11_dst_klass = r11; |
aoqi@1 | 2285 | __ load_klass(r11_dst_klass, dst); |
aoqi@1 | 2286 | |
aoqi@1 | 2287 | // Before looking at dst.length, make sure dst is also an objArray. |
aoqi@1 | 2288 | __ cmpl(Address(r11_dst_klass, lh_offset), objArray_lh); |
aoqi@1 | 2289 | __ jcc(Assembler::notEqual, L_failed); |
aoqi@1 | 2290 | |
aoqi@1 | 2291 | // It is safe to examine both src.length and dst.length. |
aoqi@1 | 2292 | #ifndef _WIN64 |
aoqi@1 | 2293 | arraycopy_range_checks(src, src_pos, dst, dst_pos, C_RARG4, |
aoqi@1 | 2294 | rax, L_failed); |
aoqi@1 | 2295 | #else |
aoqi@1 | 2296 | __ movl(r11_length, C_RARG4); // reload |
aoqi@1 | 2297 | arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length, |
aoqi@1 | 2298 | rax, L_failed); |
aoqi@1 | 2299 | __ load_klass(r11_dst_klass, dst); // reload |
aoqi@1 | 2300 | #endif |
aoqi@1 | 2301 | |
aoqi@1 | 2302 | // Marshal the base address arguments now, freeing registers. |
aoqi@1 | 2303 | __ lea(from, Address(src, src_pos, TIMES_OOP, |
aoqi@1 | 2304 | arrayOopDesc::base_offset_in_bytes(T_OBJECT))); |
aoqi@1 | 2305 | __ lea(to, Address(dst, dst_pos, TIMES_OOP, |
aoqi@1 | 2306 | arrayOopDesc::base_offset_in_bytes(T_OBJECT))); |
aoqi@1 | 2307 | __ movl(count, C_RARG4); // length (reloaded) |
aoqi@1 | 2308 | Register sco_temp = c_rarg3; // this register is free now |
aoqi@1 | 2309 | assert_different_registers(from, to, count, sco_temp, |
aoqi@1 | 2310 | r11_dst_klass, r10_src_klass); |
aoqi@1 | 2311 | assert_clean_int(count, sco_temp); |
aoqi@1 | 2312 | |
aoqi@1 | 2313 | // Generate the type check. |
aoqi@1 | 2314 | int sco_offset = (klassOopDesc::header_size() * HeapWordSize + |
aoqi@1 | 2315 | Klass::super_check_offset_offset_in_bytes()); |
aoqi@1 | 2316 | __ movl(sco_temp, Address(r11_dst_klass, sco_offset)); |
aoqi@1 | 2317 | assert_clean_int(sco_temp, rax); |
aoqi@1 | 2318 | generate_type_check(r10_src_klass, sco_temp, r11_dst_klass, L_plain_copy); |
aoqi@1 | 2319 | |
aoqi@1 | 2320 | // Fetch destination element klass from the objArrayKlass header. |
aoqi@1 | 2321 | int ek_offset = (klassOopDesc::header_size() * HeapWordSize + |
aoqi@1 | 2322 | objArrayKlass::element_klass_offset_in_bytes()); |
aoqi@1 | 2323 | __ movptr(r11_dst_klass, Address(r11_dst_klass, ek_offset)); |
aoqi@1 | 2324 | __ movl(sco_temp, Address(r11_dst_klass, sco_offset)); |
aoqi@1 | 2325 | assert_clean_int(sco_temp, rax); |
aoqi@1 | 2326 | |
aoqi@1 | 2327 | // the checkcast_copy loop needs two extra arguments: |
aoqi@1 | 2328 | assert(c_rarg3 == sco_temp, "#3 already in place"); |
aoqi@1 | 2329 | __ movptr(C_RARG4, r11_dst_klass); // dst.klass.element_klass |
aoqi@1 | 2330 | __ jump(RuntimeAddress(checkcast_copy_entry)); |
aoqi@1 | 2331 | } |
aoqi@1 | 2332 | |
aoqi@1 | 2333 | __ BIND(L_failed); |
aoqi@1 | 2334 | __ xorptr(rax, rax); |
aoqi@1 | 2335 | __ notptr(rax); // return -1 |
aoqi@1 | 2336 | __ leave(); // required for proper stackwalking of RuntimeStub frame |
aoqi@1 | 2337 | __ ret(0); |
aoqi@1 | 2338 | |
aoqi@1 | 2339 | return start; |
aoqi@1 | 2340 | } |
aoqi@1 | 2341 | |
aoqi@1 | 2342 | #undef length_arg |
aoqi@1 | 2343 | #endif |
aoqi@1 | 2344 | |
aoqi@1 | 2345 | //FIXME |
aoqi@1 | 2346 | address generate_disjoint_long_copy(bool aligned, const char *name) { |
aoqi@1 | 2347 | Label l_1, l_2; |
aoqi@1 | 2348 | StubCodeMark mark(this, "StubRoutines", name); |
aoqi@1 | 2349 | __ align(CodeEntryAlignment); |
aoqi@1 | 2350 | address start = __ pc(); |
aoqi@1 | 2351 | |
aoqi@1 | 2352 | // __ movl(ecx, Address(esp, 4+8)); // count |
aoqi@1 | 2353 | // __ movl(eax, Address(esp, 4+0)); // from |
aoqi@1 | 2354 | // __ movl(edx, Address(esp, 4+4)); // to |
aoqi@1 | 2355 | __ move(T1, A2); |
aoqi@1 | 2356 | __ move(T3, A0); |
aoqi@1 | 2357 | __ move(T0, A1); |
aoqi@1 | 2358 | __ push(T3); |
aoqi@1 | 2359 | __ push(T0); |
aoqi@1 | 2360 | __ push(T1); |
aoqi@1 | 2361 | //__ subl(edx, eax); |
aoqi@1 | 2362 | //__ jmp(l_2); |
aoqi@1 | 2363 | __ b(l_2); |
aoqi@1 | 2364 | __ delayed()->nop(); |
aoqi@1 | 2365 | __ align(16); |
aoqi@1 | 2366 | __ bind(l_1); |
aoqi@1 | 2367 | // if (VM_Version::supports_mmx()) { |
aoqi@1 | 2368 | // __ movq(mmx0, Address(eax)); |
aoqi@1 | 2369 | // __ movq(Address(eax, edx, Address::times_1), mmx0); |
aoqi@1 | 2370 | // } else { |
aoqi@1 | 2371 | // __ fild_d(Address(eax)); |
aoqi@1 | 2372 | __ ld(AT, T3, 0); |
aoqi@1 | 2373 | // __ fistp_d(Address(eax, edx, Address::times_1)); |
aoqi@1 | 2374 | __ sd (AT, T0, 0); |
aoqi@1 | 2375 | // } |
aoqi@1 | 2376 | // __ addl(eax, 8); |
aoqi@1 | 2377 | __ addi(T3, T3, 8); |
aoqi@1 | 2378 | __ addi(T0, T0, 8); |
aoqi@1 | 2379 | __ bind(l_2); |
aoqi@1 | 2380 | // __ decl(ecx); |
aoqi@1 | 2381 | __ addi(T1, T1, -1); |
aoqi@1 | 2382 | // __ jcc(Assembler::greaterEqual, l_1); |
aoqi@1 | 2383 | __ bgez(T1, l_1); |
aoqi@1 | 2384 | __ delayed()->nop(); |
aoqi@1 | 2385 | // if (VM_Version::supports_mmx()) { |
aoqi@1 | 2386 | // __ emms(); |
aoqi@1 | 2387 | // } |
aoqi@1 | 2388 | // __ ret(0); |
aoqi@1 | 2389 | __ pop(T1); |
aoqi@1 | 2390 | __ pop(T0); |
aoqi@1 | 2391 | __ pop(T3); |
aoqi@1 | 2392 | __ jr(RA); |
aoqi@1 | 2393 | __ delayed()->nop(); |
aoqi@1 | 2394 | return start; |
aoqi@1 | 2395 | } |
aoqi@1 | 2396 | |
aoqi@1 | 2397 | |
aoqi@1 | 2398 | address generate_conjoint_long_copy(bool aligned, const char *name) { |
aoqi@1 | 2399 | Label l_1, l_2; |
aoqi@1 | 2400 | StubCodeMark mark(this, "StubRoutines", name); |
aoqi@1 | 2401 | __ align(CodeEntryAlignment); |
aoqi@1 | 2402 | address start = __ pc(); |
aoqi@1 | 2403 | address nooverlap_target = aligned ? |
aoqi@1 | 2404 | StubRoutines::arrayof_jlong_disjoint_arraycopy() : |
aoqi@1 | 2405 | StubRoutines::jlong_disjoint_arraycopy(); |
aoqi@1 | 2406 | array_overlap_test(nooverlap_target, 3); |
aoqi@1 | 2407 | |
aoqi@1 | 2408 | __ push(T3); |
aoqi@1 | 2409 | __ push(T0); |
aoqi@1 | 2410 | __ push(T1); |
aoqi@1 | 2411 | |
aoqi@1 | 2412 | /* __ movl(ecx, Address(esp, 4+8)); // count |
aoqi@1 | 2413 | __ movl(eax, Address(esp, 4+0)); // from |
aoqi@1 | 2414 | __ movl(edx, Address(esp, 4+4)); // to |
aoqi@1 | 2415 | __ jmp(l_2); |
aoqi@1 | 2416 | |
aoqi@1 | 2417 | */ |
aoqi@1 | 2418 | __ move(T1, A2); |
aoqi@1 | 2419 | __ move(T3, A0); |
aoqi@1 | 2420 | __ move(T0, A1); |
aoqi@1 | 2421 | __ sll(AT, T1, Address::times_8); |
aoqi@1 | 2422 | __ add(AT, T3, AT); |
aoqi@1 | 2423 | __ lea(T3 , Address(AT, -8)); |
aoqi@1 | 2424 | __ sll(AT, T1, Address::times_8); |
aoqi@1 | 2425 | __ add(AT, T0, AT); |
aoqi@1 | 2426 | __ lea(T0 , Address(AT, -8)); |
aoqi@1 | 2427 | |
aoqi@1 | 2428 | |
aoqi@1 | 2429 | |
aoqi@1 | 2430 | __ b(l_2); |
aoqi@1 | 2431 | __ delayed()->nop(); |
aoqi@1 | 2432 | __ align(16); |
aoqi@1 | 2433 | __ bind(l_1); |
aoqi@1 | 2434 | /* if (VM_Version::supports_mmx()) { |
aoqi@1 | 2435 | __ movq(mmx0, Address(eax, ecx, Address::times_8)); |
aoqi@1 | 2436 | __ movq(Address(edx, ecx,Address::times_8), mmx0); |
aoqi@1 | 2437 | } else { |
aoqi@1 | 2438 | __ fild_d(Address(eax, ecx, Address::times_8)); |
aoqi@1 | 2439 | __ fistp_d(Address(edx, ecx,Address::times_8)); |
aoqi@1 | 2440 | } |
aoqi@1 | 2441 | */ |
aoqi@1 | 2442 | __ ld(AT, T3, 0); |
aoqi@1 | 2443 | __ sd (AT, T0, 0); |
aoqi@1 | 2444 | __ addi(T3, T3, -8); |
aoqi@1 | 2445 | __ addi(T0, T0,-8); |
aoqi@1 | 2446 | __ bind(l_2); |
aoqi@1 | 2447 | // __ decl(ecx); |
aoqi@1 | 2448 | __ addi(T1, T1, -1); |
aoqi@1 | 2449 | //__ jcc(Assembler::greaterEqual, l_1); |
aoqi@1 | 2450 | __ bgez(T1, l_1); |
aoqi@1 | 2451 | __ delayed()->nop(); |
aoqi@1 | 2452 | // if (VM_Version::supports_mmx()) { |
aoqi@1 | 2453 | // __ emms(); |
aoqi@1 | 2454 | // } |
aoqi@1 | 2455 | // __ ret(0); |
aoqi@1 | 2456 | __ pop(T1); |
aoqi@1 | 2457 | __ pop(T0); |
aoqi@1 | 2458 | __ pop(T3); |
aoqi@1 | 2459 | __ jr(RA); |
aoqi@1 | 2460 | __ delayed()->nop(); |
aoqi@1 | 2461 | return start; |
aoqi@1 | 2462 | } |
aoqi@1 | 2463 | |
aoqi@1 | 2464 | void generate_arraycopy_stubs() { |
aoqi@1 | 2465 | if (UseCompressedOops) { |
aoqi@1 | 2466 | StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, true, "oop_disjoint_arraycopy"); |
aoqi@1 | 2467 | StubRoutines::_oop_arraycopy = generate_conjoint_int_oop_copy(false, true, "oop_arraycopy"); |
aoqi@1 | 2468 | } else { |
aoqi@1 | 2469 | StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, true, "oop_disjoint_arraycopy"); |
aoqi@1 | 2470 | StubRoutines::_oop_arraycopy = generate_conjoint_long_oop_copy(false, true, "oop_arraycopy"); |
aoqi@1 | 2471 | } |
aoqi@1 | 2472 | |
aoqi@1 | 2473 | StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy"); |
aoqi@1 | 2474 | StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy"); |
aoqi@1 | 2475 | StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, false, "jint_disjoint_arraycopy"); |
aoqi@1 | 2476 | StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_copy(false, "jlong_disjoint_arraycopy"); |
aoqi@1 | 2477 | StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(true, "arrayof_jbyte_disjoint_arraycopy"); |
aoqi@1 | 2478 | |
aoqi@1 | 2479 | // if (VM_Version::supports_mmx()) |
aoqi@1 | 2480 | //if (false) |
aoqi@1 | 2481 | // StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_mmx_copy_aligned("arrayof_jshort_disjoint_arraycopy"); |
aoqi@1 | 2482 | // else |
aoqi@1 | 2483 | StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, "arrayof_jshort_disjoint_arraycopy"); |
aoqi@1 | 2484 | StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_oop_copy(true, false, "arrayof_jint_disjoint_arraycopy"); |
aoqi@1 | 2485 | //StubRoutines::_arrayof_oop_disjoint_arraycopy = generate_disjoint_int_oop_copy(true, true, "arrayof_oop_disjoint_arraycopy"); |
aoqi@1 | 2486 | StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy(true, "arrayof_jlong_disjoint_arraycopy"); |
aoqi@1 | 2487 | |
aoqi@1 | 2488 | StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, "jbyte_arraycopy"); |
aoqi@1 | 2489 | StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, "jshort_arraycopy"); |
aoqi@1 | 2490 | StubRoutines::_jint_arraycopy = generate_conjoint_int_oop_copy(false, false, "jint_arraycopy"); |
aoqi@1 | 2491 | StubRoutines::_jlong_arraycopy = generate_conjoint_long_copy(false, "jlong_arraycopy"); |
aoqi@1 | 2492 | |
aoqi@1 | 2493 | StubRoutines::_arrayof_jbyte_arraycopy = generate_conjoint_byte_copy(true, "arrayof_jbyte_arraycopy"); |
aoqi@1 | 2494 | StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_short_copy(true, "arrayof_jshort_arraycopy"); |
aoqi@1 | 2495 | StubRoutines::_arrayof_jint_arraycopy = generate_conjoint_int_oop_copy(true, false, "arrayof_jint_arraycopy"); |
aoqi@1 | 2496 | //StubRoutines::_arrayof_oop_arraycopy = generate_conjoint_int_oop_copy(true, true, "arrayof_oop_arraycopy"); |
aoqi@1 | 2497 | StubRoutines::_arrayof_jlong_arraycopy = generate_conjoint_long_copy(true, "arrayof_jlong_arraycopy"); |
aoqi@1 | 2498 | |
aoqi@1 | 2499 | StubRoutines::_arrayof_oop_disjoint_arraycopy = StubRoutines::_oop_disjoint_arraycopy; |
aoqi@1 | 2500 | StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy; |
aoqi@1 | 2501 | } |
aoqi@1 | 2502 | |
aoqi@1 | 2503 | //Wang: add a function to implement SafeFetch32 and SafeFetchN |
aoqi@1 | 2504 | void generate_safefetch(const char* name, int size, address* entry, |
aoqi@1 | 2505 | address* fault_pc, address* continuation_pc) { |
aoqi@1 | 2506 | // safefetch signatures: |
aoqi@1 | 2507 | // int SafeFetch32(int* adr, int errValue); |
aoqi@1 | 2508 | // intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue); |
aoqi@1 | 2509 | // |
aoqi@1 | 2510 | // arguments: |
aoqi@1 | 2511 | // A0 = adr |
aoqi@1 | 2512 | // A1 = errValue |
aoqi@1 | 2513 | // |
aoqi@1 | 2514 | // result: |
aoqi@1 | 2515 | // PPC_RET = *adr or errValue |
aoqi@1 | 2516 | |
aoqi@1 | 2517 | StubCodeMark mark(this, "StubRoutines", name); |
aoqi@1 | 2518 | |
aoqi@1 | 2519 | // Entry point, pc or function descriptor. |
aoqi@1 | 2520 | *entry = __ pc(); |
aoqi@1 | 2521 | |
aoqi@1 | 2522 | // Load *adr into A1, may fault. |
aoqi@1 | 2523 | *fault_pc = __ pc(); |
aoqi@1 | 2524 | switch (size) { |
aoqi@1 | 2525 | case 4: |
aoqi@1 | 2526 | // int32_t |
aoqi@1 | 2527 | __ lw(A1, A0, 0); |
aoqi@1 | 2528 | break; |
aoqi@1 | 2529 | case 8: |
aoqi@1 | 2530 | // int64_t |
aoqi@1 | 2531 | __ ld(A1, A0, 0); |
aoqi@1 | 2532 | break; |
aoqi@1 | 2533 | default: |
aoqi@1 | 2534 | ShouldNotReachHere(); |
aoqi@1 | 2535 | } |
aoqi@1 | 2536 | |
aoqi@1 | 2537 | // return errValue or *adr |
aoqi@1 | 2538 | *continuation_pc = __ pc(); |
aoqi@1 | 2539 | __ addu(V0,A1,R0); |
aoqi@1 | 2540 | __ jr(RA); |
aoqi@1 | 2541 | __ delayed()->nop(); |
aoqi@1 | 2542 | } |
aoqi@1 | 2543 | |
aoqi@1 | 2544 | |
aoqi@1 | 2545 | #undef __ |
aoqi@1 | 2546 | #define __ masm-> |
aoqi@1 | 2547 | |
aoqi@1 | 2548 | // Continuation point for throwing of implicit exceptions that are |
aoqi@1 | 2549 | // not handled in the current activation. Fabricates an exception |
aoqi@1 | 2550 | // oop and initiates normal exception dispatching in this |
aoqi@1 | 2551 | // frame. Since we need to preserve callee-saved values (currently |
aoqi@1 | 2552 | // only for C2, but done for C1 as well) we need a callee-saved oop |
aoqi@1 | 2553 | // map and therefore have to make these stubs into RuntimeStubs |
aoqi@1 | 2554 | // rather than BufferBlobs. If the compiler needs all registers to |
aoqi@1 | 2555 | // be preserved between the fault point and the exception handler |
aoqi@1 | 2556 | // then it must assume responsibility for that in |
aoqi@1 | 2557 | // AbstractCompiler::continuation_for_implicit_null_exception or |
aoqi@1 | 2558 | // continuation_for_implicit_division_by_zero_exception. All other |
aoqi@1 | 2559 | // implicit exceptions (e.g., NullPointerException or |
aoqi@1 | 2560 | // AbstractMethodError on entry) are either at call sites or |
aoqi@1 | 2561 | // otherwise assume that stack unwinding will be initiated, so |
aoqi@1 | 2562 | // caller saved registers were assumed volatile in the compiler. |
aoqi@1 | 2563 | address generate_throw_exception(const char* name, |
aoqi@1 | 2564 | address runtime_entry, |
aoqi@1 | 2565 | bool restore_saved_exception_pc) { |
aoqi@1 | 2566 | // Information about frame layout at time of blocking runtime call. |
aoqi@1 | 2567 | // Note that we only have to preserve callee-saved registers since |
aoqi@1 | 2568 | // the compilers are responsible for supplying a continuation point |
aoqi@1 | 2569 | // if they expect all registers to be preserved. |
aoqi@1 | 2570 | //#define aoqi_test |
aoqi@1 | 2571 | #ifdef aoqi_test |
aoqi@1 | 2572 | tty->print_cr("%s:%d name:%s", __func__, __LINE__, name); |
aoqi@1 | 2573 | #endif |
aoqi@1 | 2574 | enum layout { |
aoqi@1 | 2575 | thread_off, // last_java_sp |
aoqi@1 | 2576 | S7_off, // callee saved register sp + 1 |
aoqi@1 | 2577 | S6_off, // callee saved register sp + 2 |
aoqi@1 | 2578 | S5_off, // callee saved register sp + 3 |
aoqi@1 | 2579 | S4_off, // callee saved register sp + 4 |
aoqi@1 | 2580 | S3_off, // callee saved register sp + 5 |
aoqi@1 | 2581 | S2_off, // callee saved register sp + 6 |
aoqi@1 | 2582 | S1_off, // callee saved register sp + 7 |
aoqi@1 | 2583 | S0_off, // callee saved register sp + 8 |
aoqi@1 | 2584 | FP_off, |
aoqi@1 | 2585 | ret_address, |
aoqi@1 | 2586 | framesize |
aoqi@1 | 2587 | }; |
aoqi@1 | 2588 | |
aoqi@1 | 2589 | int insts_size = 2048; |
aoqi@1 | 2590 | int locs_size = 32; |
aoqi@1 | 2591 | |
aoqi@1 | 2592 | // CodeBuffer* code = new CodeBuffer(insts_size, locs_size, 0, 0, 0, false, |
aoqi@1 | 2593 | // NULL, NULL, NULL, false, NULL, name, false); |
aoqi@1 | 2594 | CodeBuffer code (name , insts_size, locs_size); |
aoqi@1 | 2595 | #ifdef aoqi_test |
aoqi@1 | 2596 | tty->print_cr("%s:%d name:%s", __func__, __LINE__, name); |
aoqi@1 | 2597 | #endif |
aoqi@1 | 2598 | OopMapSet* oop_maps = new OopMapSet(); |
aoqi@1 | 2599 | #ifdef aoqi_test |
aoqi@1 | 2600 | tty->print_cr("%s:%d name:%s", __func__, __LINE__, name); |
aoqi@1 | 2601 | #endif |
aoqi@1 | 2602 | MacroAssembler* masm = new MacroAssembler(&code); |
aoqi@1 | 2603 | #ifdef aoqi_test |
aoqi@1 | 2604 | tty->print_cr("%s:%d name:%s", __func__, __LINE__, name); |
aoqi@1 | 2605 | #endif |
aoqi@1 | 2606 | |
aoqi@1 | 2607 | address start = __ pc(); |
aoqi@1 | 2608 | //__ stop("generate_throw_exception"); |
aoqi@1 | 2609 | /* |
aoqi@1 | 2610 | __ move(AT, (int)&jerome1 ); |
aoqi@1 | 2611 | __ sw(SP, AT, 0); |
aoqi@1 | 2612 | __ move(AT, (int)&jerome2 ); |
aoqi@1 | 2613 | __ sw(FP, AT, 0); |
aoqi@1 | 2614 | __ move(AT, (int)&jerome3 ); |
aoqi@1 | 2615 | __ sw(RA, AT, 0); |
aoqi@1 | 2616 | __ move(AT, (int)&jerome4 ); |
aoqi@1 | 2617 | __ sw(R0, AT, 0); |
aoqi@1 | 2618 | __ move(AT, (int)&jerome5 ); |
aoqi@1 | 2619 | __ sw(R0, AT, 0); |
aoqi@1 | 2620 | __ move(AT, (int)&jerome6 ); |
aoqi@1 | 2621 | __ sw(R0, AT, 0); |
aoqi@1 | 2622 | __ move(AT, (int)&jerome7 ); |
aoqi@1 | 2623 | __ sw(R0, AT, 0); |
aoqi@1 | 2624 | __ move(AT, (int)&jerome10 ); |
aoqi@1 | 2625 | __ sw(R0, AT, 0); |
aoqi@1 | 2626 | |
aoqi@1 | 2627 | __ pushad(); |
aoqi@1 | 2628 | |
aoqi@1 | 2629 | //__ enter(); |
aoqi@1 | 2630 | __ call(CAST_FROM_FN_PTR(address, SharedRuntime::print_call_statistics), |
aoqi@1 | 2631 | relocInfo::runtime_call_type); |
aoqi@1 | 2632 | __ delayed()->nop(); |
aoqi@1 | 2633 | |
aoqi@1 | 2634 | //__ leave(); |
aoqi@1 | 2635 | __ popad(); |
aoqi@1 | 2636 | |
aoqi@1 | 2637 | */ |
aoqi@1 | 2638 | |
aoqi@1 | 2639 | // This is an inlined and slightly modified version of call_VM |
aoqi@1 | 2640 | // which has the ability to fetch the return PC out of |
aoqi@1 | 2641 | // thread-local storage and also sets up last_Java_sp slightly |
aoqi@1 | 2642 | // differently than the real call_VM |
aoqi@1 | 2643 | #ifndef OPT_THREAD |
aoqi@1 | 2644 | Register java_thread = TREG; |
aoqi@1 | 2645 | __ get_thread(java_thread); |
aoqi@1 | 2646 | #else |
aoqi@1 | 2647 | Register java_thread = TREG; |
aoqi@1 | 2648 | #endif |
aoqi@1 | 2649 | #ifdef aoqi_test |
aoqi@1 | 2650 | tty->print_cr("%s:%d name:%s", __func__, __LINE__, name); |
aoqi@1 | 2651 | #endif |
aoqi@1 | 2652 | if (restore_saved_exception_pc) { |
aoqi@1 | 2653 | __ ld(RA, java_thread, in_bytes(JavaThread::saved_exception_pc_offset())); // eax |
aoqi@1 | 2654 | } |
aoqi@1 | 2655 | |
aoqi@1 | 2656 | __ enter(); // required for proper stackwalking of RuntimeStub frame |
aoqi@1 | 2657 | |
aoqi@1 | 2658 | __ addi(SP, SP, (-1) * (framesize-2) * wordSize); // prolog |
aoqi@1 | 2659 | __ sd(S0, SP, S0_off * wordSize); |
aoqi@1 | 2660 | __ sd(S1, SP, S1_off * wordSize); |
aoqi@1 | 2661 | __ sd(S2, SP, S2_off * wordSize); |
aoqi@1 | 2662 | __ sd(S3, SP, S3_off * wordSize); |
aoqi@1 | 2663 | __ sd(S4, SP, S4_off * wordSize); |
aoqi@1 | 2664 | __ sd(S5, SP, S5_off * wordSize); |
aoqi@1 | 2665 | __ sd(S6, SP, S6_off * wordSize); |
aoqi@1 | 2666 | __ sd(S7, SP, S7_off * wordSize); |
aoqi@1 | 2667 | |
aoqi@1 | 2668 | int frame_complete = __ pc() - start; |
aoqi@1 | 2669 | // push java thread (becomes first argument of C function) |
aoqi@1 | 2670 | __ sd(java_thread, SP, thread_off * wordSize); |
aoqi@1 | 2671 | if (java_thread!=A0) |
aoqi@1 | 2672 | __ move(A0, java_thread); |
aoqi@1 | 2673 | |
aoqi@1 | 2674 | // Set up last_Java_sp and last_Java_fp |
aoqi@1 | 2675 | __ set_last_Java_frame(java_thread, SP, FP, NULL); |
aoqi@1 | 2676 | __ relocate(relocInfo::internal_pc_type); |
aoqi@1 | 2677 | { |
aoqi@1 | 2678 | intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + NativeCall::return_address_offset + 4; |
aoqi@1 | 2679 | __ li48(AT, save_pc); |
aoqi@1 | 2680 | } |
aoqi@1 | 2681 | __ sd(AT, java_thread, in_bytes(JavaThread::last_Java_pc_offset())); |
aoqi@1 | 2682 | |
aoqi@1 | 2683 | // Call runtime |
aoqi@1 | 2684 | __ call(runtime_entry); |
aoqi@1 | 2685 | __ delayed()->nop(); |
aoqi@1 | 2686 | // Generate oop map |
aoqi@1 | 2687 | OopMap* map = new OopMap(framesize, 0); |
aoqi@1 | 2688 | oop_maps->add_gc_map(__ offset(), map); |
aoqi@1 | 2689 | |
aoqi@1 | 2690 | // restore the thread (cannot use the pushed argument since arguments |
aoqi@1 | 2691 | // may be overwritten by C code generated by an optimizing compiler); |
aoqi@1 | 2692 | // however can use the register value directly if it is callee saved. |
aoqi@1 | 2693 | #ifndef OPT_THREAD |
aoqi@1 | 2694 | __ get_thread(java_thread); |
aoqi@1 | 2695 | #endif |
aoqi@1 | 2696 | |
aoqi@1 | 2697 | __ ld(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); |
aoqi@1 | 2698 | // __ reset_last_Java_frame(java_thread, true); |
aoqi@1 | 2699 | __ reset_last_Java_frame(java_thread, true, true); |
aoqi@1 | 2700 | |
aoqi@1 | 2701 | // Restore callee save registers. This must be done after resetting the Java frame |
aoqi@1 | 2702 | __ ld(S0, SP, S0_off * wordSize); |
aoqi@1 | 2703 | __ ld(S1, SP, S1_off * wordSize); |
aoqi@1 | 2704 | __ ld(S2, SP, S2_off * wordSize); |
aoqi@1 | 2705 | __ ld(S3, SP, S3_off * wordSize); |
aoqi@1 | 2706 | __ ld(S4, SP, S4_off * wordSize); |
aoqi@1 | 2707 | __ ld(S5, SP, S5_off * wordSize); |
aoqi@1 | 2708 | __ ld(S6, SP, S6_off * wordSize); |
aoqi@1 | 2709 | __ ld(S7, SP, S7_off * wordSize); |
aoqi@1 | 2710 | |
aoqi@1 | 2711 | // discard arguments |
aoqi@1 | 2712 | __ addi(SP, SP, (framesize-2) * wordSize); // epilog |
aoqi@1 | 2713 | // __ leave(); // required for proper stackwalking of RuntimeStub frame |
aoqi@1 | 2714 | __ addi(SP, FP, wordSize); |
aoqi@1 | 2715 | __ ld(FP, SP, -1*wordSize); |
aoqi@1 | 2716 | // check for pending exceptions |
aoqi@1 | 2717 | #ifdef ASSERT |
aoqi@1 | 2718 | Label L; |
aoqi@1 | 2719 | __ lw(AT, java_thread, in_bytes(Thread::pending_exception_offset())); |
aoqi@1 | 2720 | __ bne(AT, R0, L); |
aoqi@1 | 2721 | __ delayed()->nop(); |
aoqi@1 | 2722 | __ should_not_reach_here(); |
aoqi@1 | 2723 | __ bind(L); |
aoqi@1 | 2724 | #endif //ASSERT |
aoqi@1 | 2725 | __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); |
aoqi@1 | 2726 | __ delayed()->nop(); |
aoqi@1 | 2727 | #ifdef aoqi_test |
aoqi@1 | 2728 | tty->print_cr("%s:%d name:%s", __func__, __LINE__, name); |
aoqi@1 | 2729 | #endif |
aoqi@1 | 2730 | RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code,frame_complete, |
aoqi@1 | 2731 | framesize, oop_maps, false); |
aoqi@1 | 2732 | #ifdef aoqi_test |
aoqi@1 | 2733 | tty->print_cr("%s:%d name:%s", __func__, __LINE__, name); |
aoqi@1 | 2734 | #endif |
aoqi@1 | 2735 | return stub->entry_point(); |
aoqi@1 | 2736 | } |
aoqi@1 | 2737 | |
aoqi@1 | 2738 | // Initialization |
aoqi@1 | 2739 | void generate_initial() { |
aoqi@1 | 2740 | /* |
aoqi@1 | 2741 | // Generates all stubs and initializes the entry points |
aoqi@1 | 2742 | |
aoqi@1 | 2743 | // This platform-specific stub is needed by generate_call_stub() |
aoqi@1 | 2744 | StubRoutines::mips::_mxcsr_std = generate_fp_mask("mxcsr_std", 0x0000000000001F80); |
aoqi@1 | 2745 | |
aoqi@1 | 2746 | // entry points that exist in all platforms Note: This is code |
aoqi@1 | 2747 | // that could be shared among different platforms - however the |
aoqi@1 | 2748 | // benefit seems to be smaller than the disadvantage of having a |
aoqi@1 | 2749 | // much more complicated generator structure. See also comment in |
aoqi@1 | 2750 | // stubRoutines.hpp. |
aoqi@1 | 2751 | |
aoqi@1 | 2752 | StubRoutines::_forward_exception_entry = generate_forward_exception(); |
aoqi@1 | 2753 | |
aoqi@1 | 2754 | StubRoutines::_call_stub_entry = |
aoqi@1 | 2755 | generate_call_stub(StubRoutines::_call_stub_return_address); |
aoqi@1 | 2756 | |
aoqi@1 | 2757 | // is referenced by megamorphic call |
aoqi@1 | 2758 | StubRoutines::_catch_exception_entry = generate_catch_exception(); |
aoqi@1 | 2759 | |
aoqi@1 | 2760 | // atomic calls |
aoqi@1 | 2761 | StubRoutines::_atomic_xchg_entry = generate_atomic_xchg(); |
aoqi@1 | 2762 | StubRoutines::_atomic_xchg_ptr_entry = generate_atomic_xchg_ptr(); |
aoqi@1 | 2763 | StubRoutines::_atomic_cmpxchg_entry = generate_atomic_cmpxchg(); |
aoqi@1 | 2764 | StubRoutines::_atomic_cmpxchg_long_entry = generate_atomic_cmpxchg_long(); |
aoqi@1 | 2765 | StubRoutines::_atomic_add_entry = generate_atomic_add(); |
aoqi@1 | 2766 | StubRoutines::_atomic_add_ptr_entry = generate_atomic_add_ptr(); |
aoqi@1 | 2767 | StubRoutines::_fence_entry = generate_orderaccess_fence(); |
aoqi@1 | 2768 | |
aoqi@1 | 2769 | StubRoutines::_handler_for_unsafe_access_entry = |
aoqi@1 | 2770 | generate_handler_for_unsafe_access(); |
aoqi@1 | 2771 | |
aoqi@1 | 2772 | // platform dependent |
aoqi@1 | 2773 | StubRoutines::mips::_get_previous_fp_entry = generate_get_previous_fp(); |
aoqi@1 | 2774 | |
aoqi@1 | 2775 | StubRoutines::mips::_verify_mxcsr_entry = generate_verify_mxcsr(); |
aoqi@1 | 2776 | */ |
aoqi@1 | 2777 | // Generates all stubs and initializes the entry points |
aoqi@1 | 2778 | |
aoqi@1 | 2779 | //------------------------------------------------------------- |
aoqi@1 | 2780 | //----------------------------------------------------------- |
aoqi@1 | 2781 | // entry points that exist in all platforms |
aoqi@1 | 2782 | // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller |
aoqi@1 | 2783 | // than the disadvantage of having a much more complicated generator structure. |
aoqi@1 | 2784 | // See also comment in stubRoutines.hpp. |
aoqi@1 | 2785 | StubRoutines::_forward_exception_entry = generate_forward_exception(); |
aoqi@1 | 2786 | StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address); |
aoqi@1 | 2787 | // is referenced by megamorphic call |
aoqi@1 | 2788 | StubRoutines::_catch_exception_entry = generate_catch_exception(); |
aoqi@1 | 2789 | |
aoqi@1 | 2790 | StubRoutines::_handler_for_unsafe_access_entry = generate_handler_for_unsafe_access(); |
aoqi@1 | 2791 | |
aoqi@1 | 2792 | // platform dependent |
aoqi@1 | 2793 | StubRoutines::gs2::_get_previous_fp_entry = generate_get_previous_fp(); |
aoqi@1 | 2794 | } |
aoqi@1 | 2795 | |
aoqi@1 | 2796 | void generate_all() { |
aoqi@1 | 2797 | #ifdef aoqi_test |
aoqi@1 | 2798 | tty->print_cr("%s:%d", __func__, __LINE__); |
aoqi@1 | 2799 | #endif |
aoqi@1 | 2800 | // Generates all stubs and initializes the entry points |
aoqi@1 | 2801 | |
aoqi@1 | 2802 | // These entry points require SharedInfo::stack0 to be set up in |
aoqi@1 | 2803 | // non-core builds and need to be relocatable, so they each |
aoqi@1 | 2804 | // fabricate a RuntimeStub internally. |
aoqi@1 | 2805 | /* |
aoqi@1 | 2806 | StubRoutines::_throw_AbstractMethodError_entry = |
aoqi@1 | 2807 | generate_throw_exception("AbstractMethodError throw_exception", |
aoqi@1 | 2808 | CAST_FROM_FN_PTR(address, |
aoqi@1 | 2809 | SharedRuntime:: |
aoqi@1 | 2810 | throw_AbstractMethodError), |
aoqi@1 | 2811 | false); |
aoqi@1 | 2812 | |
aoqi@1 | 2813 | StubRoutines::_throw_IncompatibleClassChangeError_entry = |
aoqi@1 | 2814 | generate_throw_exception("IncompatibleClassChangeError throw_exception", |
aoqi@1 | 2815 | CAST_FROM_FN_PTR(address, |
aoqi@1 | 2816 | SharedRuntime:: |
aoqi@1 | 2817 | throw_IncompatibleClassChangeError), |
aoqi@1 | 2818 | false); |
aoqi@1 | 2819 | |
aoqi@1 | 2820 | StubRoutines::_throw_ArithmeticException_entry = |
aoqi@1 | 2821 | generate_throw_exception("ArithmeticException throw_exception", |
aoqi@1 | 2822 | CAST_FROM_FN_PTR(address, |
aoqi@1 | 2823 | SharedRuntime:: |
aoqi@1 | 2824 | throw_ArithmeticException), |
aoqi@1 | 2825 | true); |
aoqi@1 | 2826 | |
aoqi@1 | 2827 | StubRoutines::_throw_NullPointerException_entry = |
aoqi@1 | 2828 | generate_throw_exception("NullPointerException throw_exception", |
aoqi@1 | 2829 | CAST_FROM_FN_PTR(address, |
aoqi@1 | 2830 | SharedRuntime:: |
aoqi@1 | 2831 | throw_NullPointerException), |
aoqi@1 | 2832 | true); |
aoqi@1 | 2833 | |
aoqi@1 | 2834 | StubRoutines::_throw_NullPointerException_at_call_entry = |
aoqi@1 | 2835 | generate_throw_exception("NullPointerException at call throw_exception", |
aoqi@1 | 2836 | CAST_FROM_FN_PTR(address, |
aoqi@1 | 2837 | SharedRuntime:: |
aoqi@1 | 2838 | throw_NullPointerException_at_call), |
aoqi@1 | 2839 | false); |
aoqi@1 | 2840 | |
aoqi@1 | 2841 | StubRoutines::_throw_StackOverflowError_entry = |
aoqi@1 | 2842 | generate_throw_exception("StackOverflowError throw_exception", |
aoqi@1 | 2843 | CAST_FROM_FN_PTR(address, |
aoqi@1 | 2844 | SharedRuntime:: |
aoqi@1 | 2845 | throw_StackOverflowError), |
aoqi@1 | 2846 | false); |
aoqi@1 | 2847 | |
aoqi@1 | 2848 | // entry points that are platform specific |
aoqi@1 | 2849 | StubRoutines::mips::_f2i_fixup = generate_f2i_fixup(); |
aoqi@1 | 2850 | StubRoutines::mips::_f2l_fixup = generate_f2l_fixup(); |
aoqi@1 | 2851 | StubRoutines::mips::_d2i_fixup = generate_d2i_fixup(); |
aoqi@1 | 2852 | StubRoutines::mips::_d2l_fixup = generate_d2l_fixup(); |
aoqi@1 | 2853 | |
aoqi@1 | 2854 | StubRoutines::mips::_float_sign_mask = generate_fp_mask("float_sign_mask", 0x7FFFFFFF7FFFFFFF); |
aoqi@1 | 2855 | StubRoutines::mips::_float_sign_flip = generate_fp_mask("float_sign_flip", 0x8000000080000000); |
aoqi@1 | 2856 | StubRoutines::mips::_double_sign_mask = generate_fp_mask("double_sign_mask", 0x7FFFFFFFFFFFFFFF); |
aoqi@1 | 2857 | StubRoutines::mips::_double_sign_flip = generate_fp_mask("double_sign_flip", 0x8000000000000000); |
aoqi@1 | 2858 | |
aoqi@1 | 2859 | // support for verify_oop (must happen after universe_init) |
aoqi@1 | 2860 | StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop(); |
aoqi@1 | 2861 | |
aoqi@1 | 2862 | // arraycopy stubs used by compilers |
aoqi@1 | 2863 | generate_arraycopy_stubs(); |
aoqi@1 | 2864 | */ |
aoqi@1 | 2865 | #ifdef aoqi_test |
aoqi@1 | 2866 | tty->print_cr("%s:%d", __func__, __LINE__); |
aoqi@1 | 2867 | #endif |
aoqi@1 | 2868 | StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError), false); |
aoqi@1 | 2869 | #ifdef aoqi_test |
aoqi@1 | 2870 | tty->print_cr("%s:%d", __func__, __LINE__); |
aoqi@1 | 2871 | #endif |
aoqi@1 | 2872 | // StubRoutines::_throw_ArithmeticException_entry = generate_throw_exception("ArithmeticException throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_ArithmeticException), true); |
aoqi@1 | 2873 | #ifdef aoqi_test |
aoqi@1 | 2874 | tty->print_cr("%s:%d", __func__, __LINE__); |
aoqi@1 | 2875 | #endif |
aoqi@1 | 2876 | // StubRoutines::_throw_NullPointerException_entry = generate_throw_exception("NullPointerException throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException), true); |
aoqi@1 | 2877 | #ifdef aoqi_test |
aoqi@1 | 2878 | tty->print_cr("%s:%d", __func__, __LINE__); |
aoqi@1 | 2879 | #endif |
aoqi@1 | 2880 | StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false); |
aoqi@1 | 2881 | #ifdef aoqi_test |
aoqi@1 | 2882 | tty->print_cr("%s:%d", __func__, __LINE__); |
aoqi@1 | 2883 | #endif |
aoqi@1 | 2884 | StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), false); |
aoqi@1 | 2885 | #ifdef aoqi_test |
aoqi@1 | 2886 | tty->print_cr("%s:%d", __func__, __LINE__); |
aoqi@1 | 2887 | #endif |
aoqi@1 | 2888 | |
aoqi@1 | 2889 | //------------------------------------------------------ |
aoqi@1 | 2890 | //------------------------------------------------------------------ |
aoqi@1 | 2891 | // entry points that are platform specific |
aoqi@1 | 2892 | |
aoqi@1 | 2893 | // support for verify_oop (must happen after universe_init) |
aoqi@1 | 2894 | #ifdef aoqi_test |
aoqi@1 | 2895 | tty->print_cr("%s:%d", __func__, __LINE__); |
aoqi@1 | 2896 | #endif |
aoqi@1 | 2897 | StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop(); |
aoqi@1 | 2898 | #ifdef aoqi_test |
aoqi@1 | 2899 | tty->print_cr("%s:%d", __func__, __LINE__); |
aoqi@1 | 2900 | #endif |
aoqi@1 | 2901 | #ifndef CORE |
aoqi@1 | 2902 | // arraycopy stubs used by compilers |
aoqi@1 | 2903 | generate_arraycopy_stubs(); |
aoqi@1 | 2904 | #ifdef aoqi_test |
aoqi@1 | 2905 | tty->print_cr("%s:%d", __func__, __LINE__); |
aoqi@1 | 2906 | #endif |
aoqi@1 | 2907 | #endif |
aoqi@1 | 2908 | |
aoqi@1 | 2909 | // Safefetch stubs. |
aoqi@1 | 2910 | generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, |
aoqi@1 | 2911 | &StubRoutines::_safefetch32_fault_pc, |
aoqi@1 | 2912 | &StubRoutines::_safefetch32_continuation_pc); |
aoqi@1 | 2913 | generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, |
aoqi@1 | 2914 | &StubRoutines::_safefetchN_fault_pc, |
aoqi@1 | 2915 | &StubRoutines::_safefetchN_continuation_pc); |
aoqi@1 | 2916 | } |
aoqi@1 | 2917 | |
aoqi@1 | 2918 | public: |
aoqi@1 | 2919 | StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { |
aoqi@1 | 2920 | if (all) { |
aoqi@1 | 2921 | generate_all(); |
aoqi@1 | 2922 | } else { |
aoqi@1 | 2923 | generate_initial(); |
aoqi@1 | 2924 | } |
aoqi@1 | 2925 | } |
aoqi@1 | 2926 | }; // end class declaration |
aoqi@1 | 2927 | /* |
aoqi@1 | 2928 | address StubGenerator::disjoint_byte_copy_entry = NULL; |
aoqi@1 | 2929 | address StubGenerator::disjoint_short_copy_entry = NULL; |
aoqi@1 | 2930 | address StubGenerator::disjoint_int_copy_entry = NULL; |
aoqi@1 | 2931 | address StubGenerator::disjoint_long_copy_entry = NULL; |
aoqi@1 | 2932 | address StubGenerator::disjoint_oop_copy_entry = NULL; |
aoqi@1 | 2933 | |
aoqi@1 | 2934 | address StubGenerator::byte_copy_entry = NULL; |
aoqi@1 | 2935 | address StubGenerator::short_copy_entry = NULL; |
aoqi@1 | 2936 | address StubGenerator::int_copy_entry = NULL; |
aoqi@1 | 2937 | address StubGenerator::long_copy_entry = NULL; |
aoqi@1 | 2938 | address StubGenerator::oop_copy_entry = NULL; |
aoqi@1 | 2939 | |
aoqi@1 | 2940 | address StubGenerator::checkcast_copy_entry = NULL; |
aoqi@1 | 2941 | */ |
aoqi@1 | 2942 | void StubGenerator_generate(CodeBuffer* code, bool all) { |
aoqi@1 | 2943 | StubGenerator g(code, all); |
aoqi@1 | 2944 | } |