src/cpu/mips/vm/stubGenerator_mips_64.cpp

Tue, 31 May 2016 00:22:06 -0400

author
aoqi
date
Tue, 31 May 2016 00:22:06 -0400
changeset 16
3cedde979d75
parent 13
bc227c49eaae
child 32
3b95e10c12fa
permissions
-rw-r--r--

[Code Reorganization] load_two_bytes_from_at_bcp -> get_2_byte_integer_at_bcp
remove useless MacroAssembler::store_two_byts_to_at_bcp
change MacroAssembler::load_two_bytes_from_at_bcp to InterpreterMacroAssembler::get_2_byte_integer_at_bcp
change MacroAssembler::get_4_byte_integer_at_bcp to InterpreterMacroAssembler::get_4_byte_integer_at_bcp

aoqi@1 1 /*
aoqi@1 2 * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
aoqi@1 3 * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
aoqi@1 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
aoqi@1 5 *
aoqi@1 6 * This code is free software; you can redistribute it and/or modify it
aoqi@1 7 * under the terms of the GNU General Public License version 2 only, as
aoqi@1 8 * published by the Free Software Foundation.
aoqi@1 9 *
aoqi@1 10 * This code is distributed in the hope that it will be useful, but WITHOUT
aoqi@1 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
aoqi@1 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
aoqi@1 13 * version 2 for more details (a copy is included in the LICENSE file that
aoqi@1 14 * accompanied this code).
aoqi@1 15 *
aoqi@1 16 * You should have received a copy of the GNU General Public License version
aoqi@1 17 * 2 along with this work; if not, write to the Free Software Foundation,
aoqi@1 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
aoqi@1 19 *
aoqi@1 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
aoqi@1 21 * or visit www.oracle.com if you need additional information or have any
aoqi@1 22 * questions.
aoqi@1 23 *
aoqi@1 24 */
aoqi@1 25
aoqi@1 26 #include "precompiled.hpp"
aoqi@1 27 #include "asm/macroAssembler.hpp"
aoqi@1 28 #include "asm/macroAssembler.inline.hpp"
aoqi@1 29 #include "interpreter/interpreter.hpp"
aoqi@1 30 #include "nativeInst_mips.hpp"
aoqi@1 31 #include "oops/instanceOop.hpp"
aoqi@1 32 #include "oops/method.hpp"
aoqi@1 33 #include "oops/objArrayKlass.hpp"
aoqi@1 34 #include "oops/oop.inline.hpp"
aoqi@1 35 #include "prims/methodHandles.hpp"
aoqi@1 36 #include "runtime/frame.inline.hpp"
aoqi@1 37 #include "runtime/handles.inline.hpp"
aoqi@1 38 #include "runtime/sharedRuntime.hpp"
aoqi@1 39 #include "runtime/stubCodeGenerator.hpp"
aoqi@1 40 #include "runtime/stubRoutines.hpp"
aoqi@1 41 #include "runtime/thread.inline.hpp"
aoqi@1 42 #include "utilities/top.hpp"
aoqi@1 43 #ifdef COMPILER2
aoqi@1 44 #include "opto/runtime.hpp"
aoqi@1 45 #endif
aoqi@1 46
aoqi@1 47
aoqi@1 48 // Declaration and definition of StubGenerator (no .hpp file).
aoqi@1 49 // For a more detailed description of the stub routine structure
aoqi@1 50 // see the comment in stubRoutines.hpp
aoqi@1 51
aoqi@1 52 #define __ _masm->
aoqi@1 53 //#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)
aoqi@1 54 //#define a__ ((Assembler*)_masm)->
aoqi@1 55
aoqi@1 56 //#ifdef PRODUCT
aoqi@1 57 //#define BLOCK_COMMENT(str) /* nothing */
aoqi@1 58 //#else
aoqi@1 59 //#define BLOCK_COMMENT(str) __ block_comment(str)
aoqi@1 60 //#endif
aoqi@1 61
aoqi@1 62 //#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
aoqi@1 63 const int MXCSR_MASK = 0xFFC0; // Mask out any pending exceptions
aoqi@1 64
aoqi@1 65 // Stub Code definitions
aoqi@1 66
aoqi@1 67 static address handle_unsafe_access() {
aoqi@1 68 JavaThread* thread = JavaThread::current();
aoqi@1 69 address pc = thread->saved_exception_pc();
aoqi@1 70 // pc is the instruction which we must emulate
aoqi@1 71 // doing a no-op is fine: return garbage from the load
aoqi@1 72 // therefore, compute npc
aoqi@1 73 //address npc = Assembler::locate_next_instruction(pc);
aoqi@1 74 address npc = (address)((unsigned long)pc + sizeof(unsigned long));
aoqi@1 75
aoqi@1 76 // request an async exception
aoqi@1 77 thread->set_pending_unsafe_access_error();
aoqi@1 78
aoqi@1 79 // return address of next instruction to execute
aoqi@1 80 return npc;
aoqi@1 81 }
aoqi@1 82
aoqi@1 83 class StubGenerator: public StubCodeGenerator {
aoqi@1 84 private:
aoqi@1 85
aoqi@1 86 // ABI mips n64
aoqi@1 87 // This fig is not MIPS ABI. It is call Java from C ABI.
aoqi@1 88 // Call stubs are used to call Java from C
aoqi@1 89 //
aoqi@1 90 // [ return_from_Java ]
aoqi@1 91 // [ argument word n-1 ] <--- sp
aoqi@1 92 // ...
aoqi@1 93 // [ argument word 0 ]
aoqi@1 94 // ...
aoqi@1 95 //-10 [ S6 ]
aoqi@1 96 // -9 [ S5 ]
aoqi@1 97 // -8 [ S4 ]
aoqi@1 98 // -7 [ S3 ]
aoqi@1 99 // -6 [ S0 ]
aoqi@1 100 // -5 [ TSR(S2) ]
aoqi@1 101 // -4 [ LVP(S7) ]
aoqi@1 102 // -3 [ BCP(S1) ]
aoqi@1 103 // -2 [ saved fp ] <--- fp_after_call
aoqi@1 104 // -1 [ return address ]
aoqi@1 105 // 0 [ ptr. to call wrapper ] <--- a0 (old sp -->)fp
aoqi@1 106 // 1 [ result ] <--- a1
aoqi@1 107 // 2 [ result_type ] <--- a2
aoqi@1 108 // 3 [ method ] <--- a3
aoqi@1 109 // 4 [ entry_point ] <--- a4
aoqi@1 110 // 5 [ parameters ] <--- a5
aoqi@1 111 // 6 [ parameter_size ] <--- a6
aoqi@1 112 // 7 [ thread ] <--- a7
aoqi@1 113
aoqi@1 114 //
aoqi@1 115 // _LP64: n64 does not save paras in sp.
aoqi@1 116 //
aoqi@1 117 // [ return_from_Java ]
aoqi@1 118 // [ argument word n-1 ] <--- sp
aoqi@1 119 // ...
aoqi@1 120 // [ argument word 0 ]
aoqi@1 121 // ...
aoqi@1 122 //-14 [ thread ]
aoqi@1 123 //-13 [ result_type ] <--- a2
aoqi@1 124 //-12 [ result ] <--- a1
aoqi@1 125 //-11 [ ptr. to call wrapper ] <--- a0
aoqi@1 126 //-10 [ S6 ]
aoqi@1 127 // -9 [ S5 ]
aoqi@1 128 // -8 [ S4 ]
aoqi@1 129 // -7 [ S3 ]
aoqi@1 130 // -6 [ S0 ]
aoqi@1 131 // -5 [ TSR(S2) ]
aoqi@1 132 // -4 [ LVP(S7) ]
aoqi@1 133 // -3 [ BCP(S1) ]
aoqi@1 134 // -2 [ saved fp ] <--- fp_after_call
aoqi@1 135 // -1 [ return address ]
aoqi@1 136 // 0 [ ] <--- old sp
aoqi@1 137 /*
aoqi@1 138 * 2014/01/16 Fu: Find a right place in the call_stub for GP.
aoqi@1 139 * GP will point to the starting point of Interpreter::dispatch_table(itos).
aoqi@1 140 * It should be saved/restored before/after Java calls.
aoqi@1 141 *
aoqi@1 142 */
aoqi@1 143 enum call_stub_layout {
aoqi@1 144 RA_off = -1,
aoqi@1 145 FP_off = -2,
aoqi@1 146 BCP_off = -3,
aoqi@1 147 LVP_off = -4,
aoqi@1 148 TSR_off = -5,
aoqi@1 149 S1_off = -6,
aoqi@1 150 S3_off = -7,
aoqi@1 151 S4_off = -8,
aoqi@1 152 S5_off = -9,
aoqi@1 153 S6_off = -10,
aoqi@1 154 result_off = -11,
aoqi@1 155 result_type_off = -12,
aoqi@1 156 thread_off = -13,
aoqi@1 157 total_off = thread_off - 3,
aoqi@1 158 GP_off = -16,
aoqi@1 159 };
aoqi@1 160
aoqi@1 161 address generate_call_stub(address& return_address) {
aoqi@1 162
aoqi@1 163 StubCodeMark mark(this, "StubRoutines", "call_stub");
aoqi@1 164 address start = __ pc();
aoqi@1 165
aoqi@1 166 // same as in generate_catch_exception()!
aoqi@1 167
aoqi@1 168 // stub code
aoqi@1 169 // save ra and fp
aoqi@1 170 __ sd(RA, SP, RA_off * wordSize);
aoqi@1 171 __ sd(FP, SP, FP_off * wordSize);
aoqi@1 172 __ sd(BCP, SP, BCP_off * wordSize);
aoqi@1 173 __ sd(LVP, SP, LVP_off * wordSize);
aoqi@1 174 __ sd(GP, SP, GP_off * wordSize);
aoqi@1 175 __ sd(TSR, SP, TSR_off * wordSize);
aoqi@1 176 __ sd(S1, SP, S1_off * wordSize);
aoqi@1 177 __ sd(S3, SP, S3_off * wordSize);
aoqi@1 178 __ sd(S4, SP, S4_off * wordSize);
aoqi@1 179 __ sd(S5, SP, S5_off * wordSize);
aoqi@1 180 __ sd(S6, SP, S6_off * wordSize);
aoqi@1 181
aoqi@1 182
aoqi@1 183 __ li48(GP, (long)Interpreter::dispatch_table(itos));
aoqi@1 184
aoqi@1 185 // I think 14 is the max gap between argument and callee saved register
aoqi@1 186 __ daddi(FP, SP, (-2) * wordSize);
aoqi@1 187 __ daddi(SP, SP, total_off * wordSize);
aoqi@1 188 //FIXME, aoqi. find a suitable place to save A1 & A2.
aoqi@1 189 /*
aoqi@1 190 __ sd(A0, FP, frame::entry_frame_call_wrapper_offset * wordSize);
aoqi@1 191 __ sd(A1, FP, 3 * wordSize);
aoqi@1 192 __ sd(A2, FP, 4 * wordSize);
aoqi@1 193 __ sd(A3, FP, 5 * wordSize);
aoqi@1 194 __ sd(A4, FP, 6 * wordSize);
aoqi@1 195 __ sd(A5, FP, 7 * wordSize);
aoqi@1 196 __ sd(A6, FP, 8 * wordSize);
aoqi@1 197 __ sd(A7, FP, 9 * wordSize);
aoqi@1 198 */
aoqi@1 199 __ sd(A0, FP, frame::entry_frame_call_wrapper_offset * wordSize);
aoqi@1 200 __ sd(A1, FP, result_off * wordSize);
aoqi@1 201 __ sd(A2, FP, result_type_off * wordSize);
aoqi@1 202 __ sd(A7, FP, thread_off * wordSize);
aoqi@1 203
aoqi@1 204 #ifdef OPT_THREAD
aoqi@1 205 //__ get_thread(TREG);
aoqi@1 206 __ move(TREG, A7);
aoqi@1 207
aoqi@1 208 //__ ld(TREG, FP, thread_off * wordSize);
aoqi@1 209 #endif
aoqi@1 210 //add for compressedoops
aoqi@1 211 __ reinit_heapbase();
aoqi@1 212
aoqi@1 213 #ifdef ASSERT
aoqi@1 214 // make sure we have no pending exceptions
aoqi@1 215 {
aoqi@1 216 Label L;
aoqi@1 217 __ ld(AT, A7, in_bytes(Thread::pending_exception_offset()));
aoqi@1 218 __ beq(AT, R0, L);
aoqi@1 219 __ delayed()->nop();
aoqi@1 220 /* FIXME: I do not know how to realize stop in mips arch, do it in the future */
aoqi@1 221 __ stop("StubRoutines::call_stub: entered with pending exception");
aoqi@1 222 __ bind(L);
aoqi@1 223 }
aoqi@1 224 #endif
aoqi@1 225
aoqi@1 226 // pass parameters if any
aoqi@1 227 // A5: parameter
aoqi@1 228 // A6: parameter_size
aoqi@1 229 // T0: parameter_size_tmp(--)
aoqi@1 230 // T2: offset(++)
aoqi@1 231 // T3: tmp
aoqi@1 232 Label parameters_done;
aoqi@1 233 // judge if the parameter_size equals 0
aoqi@1 234 __ beq(A6, R0, parameters_done);
aoqi@1 235 __ delayed()->nop();
aoqi@1 236 __ dsll(AT, A6, Interpreter::logStackElementSize);
aoqi@1 237 __ dsub(SP, SP, AT);
aoqi@1 238 __ move(AT, -StackAlignmentInBytes);
aoqi@1 239 __ andr(SP, SP , AT);
aoqi@1 240 // Copy Java parameters in reverse order (receiver last)
aoqi@1 241 // Note that the argument order is inverted in the process
aoqi@1 242 // source is edx[ecx: N-1..0]
aoqi@1 243 // dest is esp[ebx: 0..N-1]
aoqi@1 244 Label loop;
aoqi@1 245 __ move(T0, A6);
aoqi@1 246 __ move(T2, R0);
aoqi@1 247 __ bind(loop);
aoqi@1 248
aoqi@1 249 // get parameter
aoqi@1 250 __ dsll(T3, T0, LogBytesPerWord);
aoqi@1 251 __ dadd(T3, T3, A5);
aoqi@1 252 __ ld(AT, T3, -wordSize);
aoqi@1 253 __ dsll(T3, T2, LogBytesPerWord);
aoqi@1 254 __ dadd(T3, T3, SP);
aoqi@1 255 __ sd(AT, T3, Interpreter::expr_offset_in_bytes(0));
aoqi@1 256 __ daddi(T2, T2, 1);
aoqi@1 257 __ daddi(T0, T0, -1);
aoqi@1 258 __ bne(T0, R0, loop);
aoqi@1 259 __ delayed()->nop();
aoqi@1 260 // advance to next parameter
aoqi@1 261
aoqi@1 262 // call Java function
aoqi@1 263 __ bind(parameters_done);
aoqi@1 264
aoqi@1 265 // receiver in V0, methodOop in Rmethod
aoqi@1 266
aoqi@1 267 __ move(Rmethod, A3);
aoqi@1 268 __ move(Rsender, SP); //set sender sp
aoqi@1 269 __ jalr(A4);
aoqi@1 270 __ delayed()->nop();
aoqi@1 271 return_address = __ pc();
aoqi@1 272
aoqi@1 273 Label common_return;
aoqi@1 274 __ bind(common_return);
aoqi@1 275
aoqi@1 276 // store result depending on type
aoqi@1 277 // (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
aoqi@1 278 __ ld(T0, FP, result_off * wordSize); // result --> T0
aoqi@1 279 Label is_long, is_float, is_double, exit;
aoqi@1 280 __ ld(T2, FP, result_type_off * wordSize); // result_type --> T2
aoqi@1 281 __ daddi(T3, T2, (-1) * T_LONG);
aoqi@1 282 __ beq(T3, R0, is_long);
aoqi@1 283 __ delayed()->daddi(T3, T2, (-1) * T_FLOAT);
aoqi@1 284 __ beq(T3, R0, is_float);
aoqi@1 285 __ delayed()->daddi(T3, T2, (-1) * T_DOUBLE);
aoqi@1 286 __ beq(T3, R0, is_double);
aoqi@1 287 __ delayed()->nop();
aoqi@1 288
aoqi@1 289 // handle T_INT case
aoqi@1 290 __ sd(V0, T0, 0 * wordSize);
aoqi@1 291 __ bind(exit);
aoqi@1 292
aoqi@1 293 // restore
aoqi@1 294 __ daddi(SP, FP, 2 * wordSize );
aoqi@1 295 __ ld(RA, SP, RA_off * wordSize);
aoqi@1 296 __ ld(FP, SP, FP_off * wordSize);
aoqi@1 297 __ ld(BCP, SP, BCP_off * wordSize);
aoqi@1 298 __ ld(LVP, SP, LVP_off * wordSize);
aoqi@1 299 __ ld(GP, SP, GP_off * wordSize);
aoqi@1 300 __ ld(TSR, SP, TSR_off * wordSize);
aoqi@1 301
aoqi@1 302 __ ld(S1, SP, S1_off * wordSize);
aoqi@1 303 __ ld(S3, SP, S3_off * wordSize);
aoqi@1 304 __ ld(S4, SP, S4_off * wordSize);
aoqi@1 305 __ ld(S5, SP, S5_off * wordSize);
aoqi@1 306 __ ld(S6, SP, S6_off * wordSize);
aoqi@1 307
aoqi@1 308 // return
aoqi@1 309 __ jr(RA);
aoqi@1 310 __ delayed()->nop();
aoqi@1 311
aoqi@1 312 // handle return types different from T_INT
aoqi@1 313 __ bind(is_long);
aoqi@1 314 __ sd(V0, T0, 0 * wordSize);
aoqi@1 315 //__ sd(V1, T0, 1 * wordSize);
aoqi@1 316 __ sd(R0, T0, 1 * wordSize);
aoqi@1 317 __ b(exit);
aoqi@1 318 __ delayed()->nop();
aoqi@1 319
aoqi@1 320 __ bind(is_float);
aoqi@1 321 __ swc1(F0, T0, 0 * wordSize);
aoqi@1 322 __ b(exit);
aoqi@1 323 __ delayed()->nop();
aoqi@1 324
aoqi@1 325 __ bind(is_double);
aoqi@1 326 __ sdc1(F0, T0, 0 * wordSize);
aoqi@1 327 //__ sdc1(F1, T0, 1 * wordSize);
aoqi@1 328 __ sd(R0, T0, 1 * wordSize);
aoqi@1 329 __ b(exit);
aoqi@1 330 __ delayed()->nop();
aoqi@1 331 //FIXME, 1.6 mips version add operation of fpu here
aoqi@1 332 StubRoutines::gs2::set_call_stub_compiled_return(__ pc());
aoqi@1 333 __ b(common_return);
aoqi@1 334 __ delayed()->nop();
aoqi@1 335 return start;
aoqi@1 336 }
aoqi@1 337
aoqi@1 338 // Return point for a Java call if there's an exception thrown in
aoqi@1 339 // Java code. The exception is caught and transformed into a
aoqi@1 340 // pending exception stored in JavaThread that can be tested from
aoqi@1 341 // within the VM.
aoqi@1 342 //
aoqi@1 343 // Note: Usually the parameters are removed by the callee. In case
aoqi@1 344 // of an exception crossing an activation frame boundary, that is
aoqi@1 345 // not the case if the callee is compiled code => need to setup the
aoqi@1 346 // rsp.
aoqi@1 347 //
aoqi@1 348 // rax: exception oop
aoqi@1 349
aoqi@1 350 address generate_catch_exception() {
aoqi@1 351 StubCodeMark mark(this, "StubRoutines", "catch_exception");
aoqi@1 352 address start = __ pc();
aoqi@1 353
aoqi@1 354 Register thread = TREG;
aoqi@1 355
aoqi@1 356 // get thread directly
aoqi@1 357 #ifndef OPT_THREAD
aoqi@1 358 __ ld(thread, FP, thread_off * wordSize);
aoqi@1 359 #endif
aoqi@1 360
aoqi@1 361 #ifdef ASSERT
aoqi@1 362 // verify that threads correspond
aoqi@1 363 { Label L;
aoqi@1 364 __ get_thread(T8);
aoqi@1 365 __ beq(T8, thread, L);
aoqi@1 366 __ delayed()->nop();
aoqi@1 367 __ stop("StubRoutines::catch_exception: threads must correspond");
aoqi@1 368 __ bind(L);
aoqi@1 369 }
aoqi@1 370 #endif
aoqi@1 371 // set pending exception
aoqi@1 372 __ verify_oop(V0);
aoqi@1 373 __ sd(V0, thread, in_bytes(Thread::pending_exception_offset()));
aoqi@1 374 __ li(AT, (long)__FILE__);
aoqi@1 375 __ sd(AT, thread, in_bytes(Thread::exception_file_offset ()));
aoqi@1 376 __ li(AT, (long)__LINE__);
aoqi@1 377 __ sd(AT, thread, in_bytes(Thread::exception_line_offset ()));
aoqi@1 378
aoqi@1 379 // complete return to VM
aoqi@1 380 assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before");
aoqi@1 381 __ jmp(StubRoutines::_call_stub_return_address, relocInfo::none);
aoqi@1 382 __ delayed()->nop();
aoqi@1 383
aoqi@1 384 return start;
aoqi@1 385 }
aoqi@1 386
aoqi@1 387 // Continuation point for runtime calls returning with a pending
aoqi@1 388 // exception. The pending exception check happened in the runtime
aoqi@1 389 // or native call stub. The pending exception in Thread is
aoqi@1 390 // converted into a Java-level exception.
aoqi@1 391 //
aoqi@1 392 // Contract with Java-level exception handlers:
aoqi@1 393 // rax: exception
aoqi@1 394 // rdx: throwing pc
aoqi@1 395 //
aoqi@1 396 // NOTE: At entry of this stub, exception-pc must be on stack !!
aoqi@1 397
aoqi@1 398 address generate_forward_exception() {
aoqi@1 399 StubCodeMark mark(this, "StubRoutines", "forward exception");
aoqi@1 400 //Register thread = TREG;
aoqi@1 401 Register thread = TREG;
aoqi@1 402 address start = __ pc();
aoqi@1 403
aoqi@1 404 // Upon entry, the sp points to the return address returning into Java
aoqi@1 405 // (interpreted or compiled) code; i.e., the return address becomes the
aoqi@1 406 // throwing pc.
aoqi@1 407 //
aoqi@1 408 // Arguments pushed before the runtime call are still on the stack but
aoqi@1 409 // the exception handler will reset the stack pointer -> ignore them.
aoqi@1 410 // A potential result in registers can be ignored as well.
aoqi@1 411
aoqi@1 412 #ifdef ASSERT
aoqi@1 413 // make sure this code is only executed if there is a pending exception
aoqi@1 414 #ifndef OPT_THREAD
aoqi@1 415 __ get_thread(thread);
aoqi@1 416 #endif
aoqi@1 417 { Label L;
aoqi@1 418 __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
aoqi@1 419 __ bne(AT, R0, L);
aoqi@1 420 __ delayed()->nop();
aoqi@1 421 __ stop("StubRoutines::forward exception: no pending exception (1)");
aoqi@1 422 __ bind(L);
aoqi@1 423 }
aoqi@1 424 #endif
aoqi@1 425
aoqi@1 426 // compute exception handler into T9
aoqi@1 427 __ ld(A1, SP, 0);
aoqi@1 428 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1);
aoqi@1 429 __ move(T9, V0);
aoqi@1 430 __ pop(V1);
aoqi@1 431
aoqi@1 432 #ifndef OPT_THREAD
aoqi@1 433 __ get_thread(thread);
aoqi@1 434 #endif
aoqi@1 435 __ ld(V0, thread, in_bytes(Thread::pending_exception_offset()));
aoqi@1 436 __ sd(R0, thread, in_bytes(Thread::pending_exception_offset()));
aoqi@1 437
aoqi@1 438 #ifdef ASSERT
aoqi@1 439 // make sure exception is set
aoqi@1 440 { Label L;
aoqi@1 441 __ bne(V0, R0, L);
aoqi@1 442 __ delayed()->nop();
aoqi@1 443 __ stop("StubRoutines::forward exception: no pending exception (2)");
aoqi@1 444 __ bind(L);
aoqi@1 445 }
aoqi@1 446 #endif
aoqi@1 447
aoqi@1 448 // continue at exception handler (return address removed)
aoqi@1 449 // V0: exception
aoqi@1 450 // T9: exception handler
aoqi@1 451 // V1: throwing pc
aoqi@1 452 __ verify_oop(V0);
aoqi@1 453 __ jr(T9);
aoqi@1 454 __ delayed()->nop();
aoqi@1 455
aoqi@1 456 return start;
aoqi@1 457 }
aoqi@1 458
aoqi@1 459 // Support for intptr_t get_previous_fp()
aoqi@1 460 //
aoqi@1 461 // This routine is used to find the previous frame pointer for the
aoqi@1 462 // caller (current_frame_guess). This is used as part of debugging
aoqi@1 463 // ps() is seemingly lost trying to find frames.
aoqi@1 464 // This code assumes that caller current_frame_guess) has a frame.
aoqi@1 465 address generate_get_previous_fp() {
aoqi@1 466 StubCodeMark mark(this, "StubRoutines", "get_previous_fp");
aoqi@1 467 const Address old_fp (FP, 0);
aoqi@1 468 const Address older_fp (V0, 0);
aoqi@1 469 address start = __ pc();
aoqi@1 470 __ enter();
aoqi@1 471 __ lw(V0, old_fp); // callers fp
aoqi@1 472 __ lw(V0, older_fp); // the frame for ps()
aoqi@1 473 __ leave();
aoqi@1 474 __ jr(RA);
aoqi@1 475 __ delayed()->nop();
aoqi@1 476 return start;
aoqi@1 477 }
aoqi@1 478 // The following routine generates a subroutine to throw an
aoqi@1 479 // asynchronous UnknownError when an unsafe access gets a fault that
aoqi@1 480 // could not be reasonably prevented by the programmer. (Example:
aoqi@1 481 // SIGBUS/OBJERR.)
aoqi@1 482 address generate_handler_for_unsafe_access() {
aoqi@1 483 StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access");
aoqi@1 484 address start = __ pc();
aoqi@1 485 __ pushad(); // push registers
aoqi@1 486 // Address next_pc(esp, RegisterImpl::number_of_registers * BytesPerWord);
aoqi@1 487 __ call(CAST_FROM_FN_PTR(address, handle_unsafe_access), relocInfo::runtime_call_type);
aoqi@1 488 __ delayed()->nop();
aoqi@1 489 __ sw(V0, SP, RegisterImpl::number_of_registers * BytesPerWord);
aoqi@1 490 __ popad();
aoqi@1 491 __ jr(RA);
aoqi@1 492 __ delayed()->nop();
aoqi@1 493 return start;
aoqi@1 494 }
aoqi@1 495
aoqi@1 496 // Non-destructive plausibility checks for oops
aoqi@1 497 //
aoqi@1 498 // Arguments:
aoqi@1 499 // all args on stack!
aoqi@1 500 //
aoqi@1 501 // Stack after saving c_rarg3:
aoqi@1 502 // [tos + 0]: saved c_rarg3
aoqi@1 503 // [tos + 1]: saved c_rarg2
aoqi@1 504 // [tos + 2]: saved r12 (several TemplateTable methods use it)
aoqi@1 505 // [tos + 3]: saved flags
aoqi@1 506 // [tos + 4]: return address
aoqi@1 507 // * [tos + 5]: error message (char*)
aoqi@1 508 // * [tos + 6]: object to verify (oop)
aoqi@1 509 // * [tos + 7]: saved rax - saved by caller and bashed
aoqi@1 510 // * = popped on exit
aoqi@1 511 address generate_verify_oop() {
aoqi@1 512 StubCodeMark mark(this, "StubRoutines", "verify_oop");
aoqi@1 513 address start = __ pc();
aoqi@1 514 __ reinit_heapbase();
aoqi@1 515 __ verify_oop_subroutine();
aoqi@1 516 address end = __ pc();
aoqi@1 517 return start;
aoqi@1 518 }
aoqi@1 519
aoqi@1 520 //
aoqi@1 521 // Generate overlap test for array copy stubs
aoqi@1 522 //
aoqi@1 523 // Input:
aoqi@1 524 // A0 - array1
aoqi@1 525 // A1 - array2
aoqi@1 526 // A2 - element count
aoqi@1 527 //
aoqi@1 528 // Note: this code can only use %eax, %ecx, and %edx
aoqi@1 529 //
aoqi@1 530
aoqi@1 531 // use T9 as temp
aoqi@1 532 void array_overlap_test(address no_overlap_target, int log2_elem_size) {
aoqi@1 533 int elem_size = 1 << log2_elem_size;
aoqi@1 534 Address::ScaleFactor sf = Address::times_1;
aoqi@1 535
aoqi@1 536 switch (log2_elem_size) {
aoqi@1 537 case 0: sf = Address::times_1; break;
aoqi@1 538 case 1: sf = Address::times_2; break;
aoqi@1 539 case 2: sf = Address::times_4; break;
aoqi@1 540 case 3: sf = Address::times_8; break;
aoqi@1 541 }
aoqi@1 542
aoqi@1 543 __ dsll(AT, A2, sf);
aoqi@1 544 __ dadd(AT, AT, A0);
aoqi@1 545 __ lea(T9, Address(AT, -elem_size));
aoqi@1 546 __ dsub(AT, A1, A0);
aoqi@1 547 __ blez(AT, no_overlap_target);
aoqi@1 548 __ delayed()->nop();
aoqi@1 549 __ dsub(AT, A1, T9);
aoqi@1 550 __ bgtz(AT, no_overlap_target);
aoqi@1 551 __ delayed()->nop();
aoqi@1 552
aoqi@8 553 // 2016/05/10 aoqi: If A0 = 0xf... and A1 = 0x0..., than goto no_overlap_target
aoqi@8 554 Label L;
aoqi@8 555 __ bgez(A0, L);
aoqi@8 556 __ delayed()->nop();
aoqi@8 557 __ bgtz(A1, no_overlap_target);
aoqi@8 558 __ delayed()->nop();
aoqi@8 559 __ bind(L);
aoqi@8 560
aoqi@1 561 }
aoqi@1 562
aoqi@1 563 //
aoqi@1 564 // Generate store check for array
aoqi@1 565 //
aoqi@1 566 // Input:
aoqi@1 567 // %edi - starting address
aoqi@1 568 // %ecx - element count
aoqi@1 569 //
aoqi@1 570 // The 2 input registers are overwritten
aoqi@1 571 //
aoqi@1 572
aoqi@1 573 //
aoqi@1 574 // Generate store check for array
aoqi@1 575 //
aoqi@1 576 // Input:
aoqi@1 577 // T0 - starting address(edi)
aoqi@1 578 // T1 - element count (ecx)
aoqi@1 579 //
aoqi@1 580 // The 2 input registers are overwritten
aoqi@1 581 //
aoqi@1 582
aoqi@1 583 #define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)
aoqi@1 584
aoqi@1 585 void array_store_check() {
aoqi@1 586 BarrierSet* bs = Universe::heap()->barrier_set();
aoqi@1 587 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
aoqi@1 588 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
aoqi@1 589 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
aoqi@1 590 Label l_0;
aoqi@1 591
aoqi@1 592 __ dsll(AT, T1, TIMES_OOP);
aoqi@1 593 __ dadd(AT, T0, AT);
aoqi@1 594 __ daddiu(T1, AT, - BytesPerHeapOop);
aoqi@1 595
aoqi@1 596 __ shr(T0, CardTableModRefBS::card_shift);
aoqi@1 597 __ shr(T1, CardTableModRefBS::card_shift);
aoqi@1 598
aoqi@1 599 __ dsub(T1, T1, T0); // end --> cards count
aoqi@1 600 __ bind(l_0);
aoqi@1 601
aoqi@1 602 __ li48(AT, (long)ct->byte_map_base);
aoqi@1 603 __ dadd(AT, AT, T0);
aoqi@1 604 __ dadd(AT, AT, T1);
aoqi@1 605 __ sb(R0, AT, 0);
aoqi@1 606 //__ daddi(T1, T1, -4);
aoqi@1 607 __ daddi(T1, T1, - 1);
aoqi@1 608 __ bgez(T1, l_0);
aoqi@1 609 __ delayed()->nop();
aoqi@1 610 }
aoqi@1 611
aoqi@1 612 // Arguments:
aoqi@1 613 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
aoqi@1 614 // ignored
aoqi@1 615 // name - stub name string
aoqi@1 616 //
aoqi@1 617 // Inputs:
aoqi@1 618 // c_rarg0 - source array address
aoqi@1 619 // c_rarg1 - destination array address
aoqi@1 620 // c_rarg2 - element count, treated as ssize_t, can be zero
aoqi@1 621 //
aoqi@1 622 // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
aoqi@1 623 // we let the hardware handle it. The one to eight bytes within words,
aoqi@1 624 // dwords or qwords that span cache line boundaries will still be loaded
aoqi@1 625 // and stored atomically.
aoqi@1 626 //
aoqi@1 627 // Side Effects:
aoqi@1 628 // disjoint_byte_copy_entry is set to the no-overlap entry point
aoqi@1 629 // used by generate_conjoint_byte_copy().
aoqi@1 630 //
aoqi@1 631 address generate_disjoint_byte_copy(bool aligned, const char *name) {
aoqi@1 632 StubCodeMark mark(this, "StubRoutines", name);
aoqi@1 633 __ align(CodeEntryAlignment);
aoqi@1 634 address start = __ pc();
aoqi@1 635 Label l_0, l_1, l_2, l_3, l_4, l_5, l_6;
aoqi@1 636
aoqi@1 637 __ push(T3);
aoqi@1 638 __ push(T0);
aoqi@1 639 __ push(T1);
aoqi@1 640 __ push(T8);
aoqi@1 641 __ move(T3, A0);
aoqi@1 642 __ move(T0, A1);
aoqi@1 643 __ move(T1, A2);
aoqi@1 644 __ move(T8, T1); // original count in T1
aoqi@1 645 __ daddi(AT, T1, -3);
aoqi@1 646 __ blez(AT, l_4);
aoqi@1 647 __ delayed()->nop();
aoqi@1 648 if (!aligned) {
aoqi@8 649 //TODO: copy 8 bytes at one time
Jin@7 650 // 2016/5/8 Jin: only when src and dest has the same alignment can we do lw/sw */
Jin@7 651 __ andi(AT, T3, 3);
Jin@7 652 __ andi(T9, T0, 3);
Jin@7 653 __ bne(AT, T9, l_5);
Jin@7 654 __ delayed()->nop();
Jin@7 655
aoqi@1 656 // align source address at dword address boundary
aoqi@1 657 __ move(T1, 4);
aoqi@1 658 __ sub(T1, T1, T3);
aoqi@1 659 __ andi(T1, T1, 3);
aoqi@1 660 __ beq(T1, R0, l_1);
aoqi@1 661 __ delayed()->nop();
aoqi@1 662 __ sub(T8,T8,T1);
aoqi@1 663 __ bind(l_0);
aoqi@1 664 __ lb(AT, T3, 0);
aoqi@1 665 __ sb(AT, T0, 0);
aoqi@1 666 __ addi(T3, T3, 1);
aoqi@1 667 __ addi(T0, T0, 1);
aoqi@1 668 __ addi(T1 ,T1, -1);
aoqi@1 669 __ bne(T1, R0, l_0);
aoqi@1 670 __ delayed()->nop();
aoqi@1 671 __ bind(l_1);
aoqi@1 672 __ move(T1, T8);
aoqi@1 673 }
aoqi@1 674 __ shr(T1, 2);
aoqi@1 675 __ beq(T1, R0, l_4); // no dwords to move
aoqi@1 676 __ delayed()->nop();
aoqi@1 677 // copy aligned dwords
aoqi@1 678 __ bind(l_2);
aoqi@1 679 __ align(16);
aoqi@1 680 __ bind(l_3);
aoqi@1 681 __ lw(AT, T3, 0);
aoqi@1 682 __ sw(AT, T0, 0 );
aoqi@1 683 __ addi(T3, T3, 4);
aoqi@1 684 __ addi(T0, T0, 4);
aoqi@1 685 __ addi(T1, T1, -1);
aoqi@1 686 __ bne(T1, R0, l_3);
aoqi@1 687 __ delayed()->nop();
aoqi@1 688 __ bind(l_4);
aoqi@1 689 __ move(T1, T8);
aoqi@1 690 __ andi(T1, T1, 3);
aoqi@1 691 __ beq(T1, R0, l_6);
aoqi@1 692 __ delayed()->nop();
aoqi@1 693 // copy suffix
aoqi@1 694 __ bind(l_5);
aoqi@1 695 __ lb(AT, T3, 0);
aoqi@1 696 __ sb(AT, T0, 0);
aoqi@1 697 __ addi(T3, T3, 1);
aoqi@1 698 __ addi(T0, T0, 1);
aoqi@1 699 __ addi(T1, T1, -1);
aoqi@1 700 __ bne(T1, R0, l_5 );
aoqi@1 701 __ delayed()->nop();
aoqi@1 702 __ bind(l_6);
aoqi@1 703 __ pop(T8);
aoqi@1 704 __ pop(T1);
aoqi@1 705 __ pop(T0);
aoqi@1 706 __ pop(T3);
aoqi@1 707 __ jr(RA);
aoqi@1 708 __ delayed()->nop();
aoqi@1 709 return start;
aoqi@1 710 }
aoqi@1 711
aoqi@1 712 // Arguments:
aoqi@1 713 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
aoqi@1 714 // ignored
aoqi@1 715 // name - stub name string
aoqi@1 716 //
aoqi@1 717 // Inputs:
aoqi@8 718 // A0 - source array address
aoqi@8 719 // A1 - destination array address
aoqi@8 720 // A2 - element count, treated as ssize_t, can be zero
aoqi@1 721 //
aoqi@1 722 // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
aoqi@1 723 // we let the hardware handle it. The one to eight bytes within words,
aoqi@1 724 // dwords or qwords that span cache line boundaries will still be loaded
aoqi@1 725 // and stored atomically.
aoqi@1 726 //
aoqi@1 727 address generate_conjoint_byte_copy(bool aligned, const char *name) {
aoqi@8 728 __ align(CodeEntryAlignment);
aoqi@8 729 StubCodeMark mark(this, "StubRoutines", name);
aoqi@8 730 address start = __ pc();
aoqi@1 731
aoqi@8 732 Label l_copy_4_bytes_loop, l_copy_suffix, l_copy_suffix_loop, l_exit;
aoqi@8 733 Label l_copy_byte, l_from_unaligned, l_unaligned, l_4_bytes_aligned;
aoqi@1 734
aoqi@8 735 address nooverlap_target = aligned ?
aoqi@8 736 StubRoutines::arrayof_jbyte_disjoint_arraycopy() :
aoqi@8 737 StubRoutines::jbyte_disjoint_arraycopy();
aoqi@1 738
aoqi@8 739 array_overlap_test(nooverlap_target, 0);
Jin@7 740
aoqi@8 741 const Register from = A0; // source array address
aoqi@8 742 const Register to = A1; // destination array address
aoqi@8 743 const Register count = A2; // elements count
aoqi@8 744 const Register end_from = T3; // source array end address
aoqi@8 745 const Register end_to = T0; // destination array end address
aoqi@8 746 const Register end_count = T1; // destination array end address
Jin@7 747
aoqi@8 748 __ push(end_from);
aoqi@8 749 __ push(end_to);
aoqi@8 750 __ push(end_count);
aoqi@8 751 __ push(T8);
Jin@7 752
aoqi@8 753 // copy from high to low
aoqi@8 754 __ move(end_count, count);
aoqi@8 755 __ dadd(end_from, from, end_count);
aoqi@8 756 __ dadd(end_to, to, end_count);
Jin@7 757
aoqi@8 758 // 2016/05/08 aoqi: If end_from and end_to has differante alignment, unaligned copy is performed.
aoqi@8 759 __ andi(AT, end_from, 3);
aoqi@8 760 __ andi(T8, end_to, 3);
aoqi@8 761 __ bne(AT, T8, l_copy_byte);
aoqi@8 762 __ delayed()->nop();
Jin@7 763
aoqi@8 764 // First deal with the unaligned data at the top.
aoqi@8 765 __ bind(l_unaligned);
aoqi@8 766 __ beq(end_count, R0, l_exit);
aoqi@8 767 __ delayed()->nop();
aoqi@8 768
aoqi@8 769 __ andi(AT, end_from, 3);
aoqi@8 770 __ bne(AT, R0, l_from_unaligned);
aoqi@8 771 __ delayed()->nop();
aoqi@8 772
aoqi@8 773 __ andi(AT, end_to, 3);
aoqi@8 774 __ beq(AT, R0, l_4_bytes_aligned);
aoqi@8 775 __ delayed()->nop();
aoqi@8 776
aoqi@8 777 __ bind(l_from_unaligned);
aoqi@8 778 __ lb(AT, end_from, -1);
aoqi@8 779 __ sb(AT, end_to, -1);
aoqi@8 780 __ daddi(end_from, end_from, -1);
aoqi@8 781 __ daddi(end_to, end_to, -1);
aoqi@8 782 __ daddi(end_count, end_count, -1);
aoqi@8 783 __ b(l_unaligned);
aoqi@8 784 __ delayed()->nop();
aoqi@8 785
aoqi@8 786 // now end_to, end_from point to 4-byte aligned high-ends
aoqi@8 787 // end_count contains byte count that is not copied.
aoqi@8 788 // copy 4 bytes at a time
aoqi@8 789 __ bind(l_4_bytes_aligned);
aoqi@8 790
aoqi@8 791 __ move(T8, end_count);
aoqi@8 792 __ daddi(AT, end_count, -3);
aoqi@8 793 __ blez(AT, l_copy_suffix);
aoqi@8 794 __ delayed()->nop();
aoqi@8 795
aoqi@8 796 //__ andi(T8, T8, 3);
aoqi@8 797 __ lea(end_from, Address(end_from, -4));
aoqi@8 798 __ lea(end_to, Address(end_to, -4));
aoqi@8 799
aoqi@8 800 __ dsrl(end_count, end_count, 2);
aoqi@8 801 __ align(16);
aoqi@8 802 __ bind(l_copy_4_bytes_loop); //l_copy_4_bytes
aoqi@8 803 __ lw(AT, end_from, 0);
aoqi@8 804 __ sw(AT, end_to, 0);
aoqi@8 805 __ addi(end_from, end_from, -4);
aoqi@8 806 __ addi(end_to, end_to, -4);
aoqi@8 807 __ addi(end_count, end_count, -1);
aoqi@8 808 __ bne(end_count, R0, l_copy_4_bytes_loop);
aoqi@8 809 __ delayed()->nop();
aoqi@8 810
aoqi@8 811 __ b(l_copy_suffix);
aoqi@8 812 __ delayed()->nop();
aoqi@8 813 // copy dwords aligned or not with repeat move
aoqi@8 814 // l_copy_suffix
aoqi@8 815 // copy suffix (0-3 bytes)
aoqi@8 816 __ bind(l_copy_suffix);
aoqi@8 817 __ andi(T8, T8, 3);
aoqi@8 818 __ beq(T8, R0, l_exit);
aoqi@8 819 __ delayed()->nop();
aoqi@8 820 __ addi(end_from, end_from, 3);
aoqi@8 821 __ addi(end_to, end_to, 3);
aoqi@8 822 __ bind(l_copy_suffix_loop);
aoqi@8 823 __ lb(AT, end_from, 0);
aoqi@8 824 __ sb(AT, end_to, 0);
aoqi@8 825 __ addi(end_from, end_from, -1);
aoqi@8 826 __ addi(end_to, end_to, -1);
aoqi@8 827 __ addi(T8, T8, -1);
aoqi@8 828 __ bne(T8, R0, l_copy_suffix_loop);
aoqi@8 829 __ delayed()->nop();
aoqi@8 830
aoqi@8 831 __ bind(l_copy_byte);
aoqi@8 832 __ beq(end_count, R0, l_exit);
aoqi@8 833 __ delayed()->nop();
aoqi@8 834 __ lb(AT, end_from, -1);
aoqi@8 835 __ sb(AT, end_to, -1);
aoqi@8 836 __ daddi(end_from, end_from, -1);
aoqi@8 837 __ daddi(end_to, end_to, -1);
aoqi@8 838 __ daddi(end_count, end_count, -1);
aoqi@8 839 __ b(l_copy_byte);
aoqi@8 840 __ delayed()->nop();
aoqi@8 841
aoqi@8 842 __ bind(l_exit);
aoqi@8 843 __ pop(T8);
aoqi@8 844 __ pop(end_count);
aoqi@8 845 __ pop(end_to);
aoqi@8 846 __ pop(end_from);
aoqi@8 847 __ jr(RA);
aoqi@8 848 __ delayed()->nop();
aoqi@8 849 return start;
aoqi@1 850 }
aoqi@1 851
aoqi@13 852 // Generate stub for disjoint short copy. If "aligned" is true, the
aoqi@13 853 // "from" and "to" addresses are assumed to be heapword aligned.
aoqi@1 854 //
aoqi@13 855 // Arguments for generated stub:
aoqi@13 856 // from: A0
aoqi@13 857 // to: A1
aoqi@13 858 // elm.count: A2 treated as signed
aoqi@13 859 // one element: 2 bytes
aoqi@1 860 //
aoqi@13 861 // Strategy for aligned==true:
aoqi@1 862 //
aoqi@13 863 // If length <= 9:
aoqi@13 864 // 1. copy 1 elements at a time (l_5)
aoqi@1 865 //
aoqi@13 866 // If length > 9:
aoqi@13 867 // 1. copy 4 elements at a time until less than 4 elements are left (l_7)
aoqi@13 868 // 2. copy 2 elements at a time until less than 2 elements are left (l_6)
aoqi@13 869 // 3. copy last element if one was left in step 2. (l_1)
aoqi@13 870 //
aoqi@13 871 //
aoqi@13 872 // Strategy for aligned==false:
aoqi@13 873 //
aoqi@13 874 // If length <= 9: same as aligned==true case
aoqi@13 875 //
aoqi@13 876 // If length > 9:
aoqi@13 877 // 1. continue with step 7. if the alignment of from and to mod 4
aoqi@13 878 // is different.
aoqi@13 879 // 2. align from and to to 4 bytes by copying 1 element if necessary
aoqi@13 880 // 3. at l_2 from and to are 4 byte aligned; continue with
aoqi@13 881 // 6. if they cannot be aligned to 8 bytes because they have
aoqi@13 882 // got different alignment mod 8.
aoqi@13 883 // 4. at this point we know that both, from and to, have the same
aoqi@13 884 // alignment mod 8, now copy one element if necessary to get
aoqi@13 885 // 8 byte alignment of from and to.
aoqi@13 886 // 5. copy 4 elements at a time until less than 4 elements are
aoqi@13 887 // left; depending on step 3. all load/stores are aligned.
aoqi@13 888 // 6. copy 2 elements at a time until less than 2 elements are
aoqi@13 889 // left. (l_6)
aoqi@13 890 // 7. copy 1 element at a time. (l_5)
aoqi@13 891 // 8. copy last element if one was left in step 6. (l_1)
aoqi@13 892 //
aoqi@13 893 // TODO:
aoqi@13 894 //
aoqi@13 895 // 1. use loongson 128-bit load/store
aoqi@13 896 // 2. use loop unrolling optimization when len is big enough, for example if len > 0x2000:
aoqi@13 897 // __ bind(l_x);
aoqi@13 898 // __ ld(AT, tmp1, 0);
aoqi@13 899 // __ ld(tmp, tmp1, 8);
aoqi@13 900 // __ sd(AT, tmp2, 0);
aoqi@13 901 // __ sd(tmp, tmp2, 8);
aoqi@13 902 // __ ld(AT, tmp1, 16);
aoqi@13 903 // __ ld(tmp, tmp1, 24);
aoqi@13 904 // __ sd(AT, tmp2, 16);
aoqi@13 905 // __ sd(tmp, tmp2, 24);
aoqi@13 906 // __ daddi(tmp1, tmp1, 32);
aoqi@13 907 // __ daddi(tmp2, tmp2, 32);
aoqi@13 908 // __ daddi(tmp3, tmp3, -16);
aoqi@13 909 // __ daddi(AT, tmp3, -16);
aoqi@13 910 // __ bgez(AT, l_x);
aoqi@13 911 // __ delayed()->nop();
aoqi@13 912 //
aoqi@13 913 address generate_disjoint_short_copy(bool aligned, const char * name) {
aoqi@13 914 StubCodeMark mark(this, "StubRoutines", name);
aoqi@13 915 __ align(CodeEntryAlignment);
aoqi@1 916
aoqi@13 917 Register tmp1 = T0;
aoqi@13 918 Register tmp2 = T1;
aoqi@13 919 Register tmp3 = T3;
aoqi@1 920
aoqi@13 921 address start = __ pc();
aoqi@13 922
aoqi@13 923 __ push(tmp1);
aoqi@13 924 __ push(tmp2);
aoqi@13 925 __ push(tmp3);
aoqi@13 926 __ move(tmp1, A0);
aoqi@13 927 __ move(tmp2, A1);
aoqi@13 928 __ move(tmp3, A2);
aoqi@13 929
aoqi@13 930 Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8;
aoqi@13 931 Label l_debug;
aoqi@13 932 // don't try anything fancy if arrays don't have many elements
aoqi@13 933 __ daddi(AT, tmp3, -9);
aoqi@13 934 __ blez(AT, l_1);
aoqi@13 935 __ delayed()->nop();
aoqi@13 936
aoqi@13 937 if (!aligned) {
aoqi@13 938 __ xorr(AT, A0, A1);
aoqi@13 939 __ andi(AT, AT, 1);
aoqi@13 940 __ bne(AT, R0, l_debug); // if arrays don't have the same alignment mod 2, can this happen?
aoqi@13 941 __ delayed()->nop();
aoqi@13 942
aoqi@13 943 __ xorr(AT, A0, A1);
aoqi@13 944 __ andi(AT, AT, 3);
aoqi@13 945 __ bne(AT, R0, l_1); // if arrays don't have the same alignment mod 4, do 1 element copy
aoqi@13 946 __ delayed()->nop();
aoqi@13 947
aoqi@13 948 // At this point it is guaranteed that both, from and to have the same alignment mod 4.
aoqi@13 949
aoqi@13 950 // Copy 1 element if necessary to align to 4 bytes.
aoqi@13 951 __ andi(AT, A0, 3);
aoqi@13 952 __ beq(AT, R0, l_2);
aoqi@13 953 __ delayed()->nop();
aoqi@13 954
aoqi@13 955 __ lhu(AT, tmp1, 0);
aoqi@13 956 __ daddi(tmp1, tmp1, 2);
aoqi@13 957 __ sh(AT, tmp2, 0);
aoqi@13 958 __ daddi(tmp2, tmp2, 2);
aoqi@13 959 __ daddi(tmp3, tmp3, -1);
aoqi@13 960 __ bind(l_2);
aoqi@13 961
aoqi@13 962 // At this point the positions of both, from and to, are at least 4 byte aligned.
aoqi@13 963
aoqi@13 964 // Copy 4 elements at a time.
aoqi@13 965 // Align to 8 bytes, but only if both, from and to, have same alignment mod 8.
aoqi@13 966 __ xorr(AT, tmp1, tmp2);
aoqi@13 967 __ andi(AT, AT, 7);
aoqi@13 968 __ bne(AT, R0, l_6); // not same alignment mod 8 -> copy 2, either from or to will be unaligned
aoqi@13 969 __ delayed()->nop();
aoqi@13 970
aoqi@13 971 // Copy a 2-element word if necessary to align to 8 bytes.
aoqi@13 972 __ andi(AT, tmp1, 7);
aoqi@13 973 __ beq(AT, R0, l_7);
aoqi@13 974 __ delayed()->nop();
aoqi@13 975
aoqi@13 976 __ lw(AT, tmp1, 0);
aoqi@13 977 __ daddi(tmp3, tmp3, -2);
aoqi@13 978 __ sw(AT, tmp2, 0);
aoqi@13 979 { // FasterArrayCopy
aoqi@13 980 __ daddi(tmp1, tmp1, 4);
aoqi@13 981 __ daddi(tmp2, tmp2, 4);
aoqi@13 982 }
aoqi@13 983 }
aoqi@13 984
aoqi@13 985 __ bind(l_7);
aoqi@13 986
aoqi@13 987 // Copy 4 elements at a time; either the loads or the stores can
aoqi@13 988 // be unaligned if aligned == false.
aoqi@13 989
aoqi@13 990 { // FasterArrayCopy
aoqi@13 991 __ daddi(AT, tmp3, -15);
aoqi@13 992 __ blez(AT, l_6); // copy 2 at a time if less than 16 elements remain
aoqi@13 993 __ delayed()->nop();
aoqi@13 994
aoqi@13 995 __ bind(l_8);
aoqi@13 996 // For Loongson, there is 128-bit memory access. TODO
aoqi@13 997 __ ld(AT, tmp1, 0);
aoqi@13 998 __ sd(AT, tmp2, 0);
aoqi@13 999 __ daddi(tmp1, tmp1, 8);
aoqi@13 1000 __ daddi(tmp2, tmp2, 8);
aoqi@13 1001 __ daddi(tmp3, tmp3, -4);
aoqi@13 1002 __ daddi(AT, tmp3, -4);
aoqi@13 1003 __ bgez(AT, l_8);
aoqi@13 1004 __ delayed()->nop();
aoqi@13 1005 }
aoqi@13 1006 __ bind(l_6);
aoqi@13 1007
aoqi@13 1008 // copy 2 element at a time
aoqi@13 1009 { // FasterArrayCopy
aoqi@13 1010 __ daddi(AT, tmp3, -1);
aoqi@13 1011 __ blez(AT, l_1);
aoqi@13 1012 __ delayed()->nop();
aoqi@13 1013
aoqi@13 1014 __ bind(l_3);
aoqi@13 1015 __ lw(AT, tmp1, 0);
aoqi@13 1016 __ sw(AT, tmp2, 0);
aoqi@13 1017 __ daddi(tmp1, tmp1, 4);
aoqi@13 1018 __ daddi(tmp2, tmp2, 4);
aoqi@13 1019 __ daddi(tmp3, tmp3, -2);
aoqi@13 1020 __ daddi(AT, tmp3, -2);
aoqi@13 1021 __ bgez(AT, l_3);
aoqi@13 1022 __ delayed()->nop();
aoqi@13 1023
aoqi@13 1024 }
aoqi@13 1025
aoqi@13 1026 // do single element copy (8 bit), can this happen?
aoqi@13 1027 __ bind(l_1);
aoqi@13 1028 __ beq(R0, tmp3, l_4);
aoqi@13 1029 __ delayed()->nop();
aoqi@13 1030
aoqi@13 1031 { // FasterArrayCopy
aoqi@13 1032
aoqi@13 1033 __ bind(l_5);
aoqi@13 1034 __ lhu(AT, tmp1, 0);
aoqi@13 1035 __ daddi(tmp3, tmp3, -1);
aoqi@13 1036 __ sh(AT, tmp2, 0);
aoqi@13 1037 __ daddi(tmp1, tmp1, 2);
aoqi@13 1038 __ daddi(tmp2, tmp2, 2);
aoqi@13 1039 __ daddi(AT, tmp3, -1);
aoqi@13 1040 __ bgez(AT, l_5);
aoqi@13 1041 __ delayed()->nop();
aoqi@13 1042 }
aoqi@13 1043 __ bind(l_4);
aoqi@13 1044 __ pop(tmp3);
aoqi@13 1045 __ pop(tmp2);
aoqi@13 1046 __ pop(tmp1);
aoqi@13 1047
aoqi@13 1048 __ jr(RA);
aoqi@13 1049 __ delayed()->nop();
aoqi@13 1050
aoqi@13 1051 __ bind(l_debug);
aoqi@13 1052 __ stop("generate_disjoint_short_copy should not reach here");
aoqi@13 1053 return start;
aoqi@1 1054 }
aoqi@1 1055
aoqi@1 1056 // Arguments:
aoqi@1 1057 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
aoqi@1 1058 // ignored
aoqi@1 1059 // name - stub name string
aoqi@1 1060 //
aoqi@1 1061 // Inputs:
aoqi@1 1062 // c_rarg0 - source array address
aoqi@1 1063 // c_rarg1 - destination array address
aoqi@1 1064 // c_rarg2 - element count, treated as ssize_t, can be zero
aoqi@1 1065 //
aoqi@1 1066 // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
aoqi@1 1067 // let the hardware handle it. The two or four words within dwords
aoqi@1 1068 // or qwords that span cache line boundaries will still be loaded
aoqi@1 1069 // and stored atomically.
aoqi@1 1070 //
aoqi@1 1071 address generate_conjoint_short_copy(bool aligned, const char *name) {
aoqi@1 1072 Label l_1, l_2, l_3, l_4, l_5;
aoqi@1 1073 StubCodeMark mark(this, "StubRoutines", name);
aoqi@1 1074 __ align(CodeEntryAlignment);
aoqi@1 1075 address start = __ pc();
aoqi@1 1076 address nooverlap_target = aligned ?
aoqi@1 1077 StubRoutines::arrayof_jshort_disjoint_arraycopy() :
aoqi@1 1078 StubRoutines::jshort_disjoint_arraycopy();
aoqi@1 1079
aoqi@1 1080 array_overlap_test(nooverlap_target, 1);
aoqi@1 1081
aoqi@1 1082 __ push(T3);
aoqi@1 1083 __ push(T0);
aoqi@1 1084 __ push(T1);
aoqi@1 1085 __ push(T8);
aoqi@1 1086
aoqi@1 1087 /*
aoqi@1 1088 __ pushl(esi);
aoqi@1 1089 __ movl(ecx, Address(esp, 4+12)); // count
aoqi@1 1090 __ pushl(edi);
aoqi@1 1091 __ movl(esi, Address(esp, 8+ 4)); // from
aoqi@1 1092 __ movl(edi, Address(esp, 8+ 8)); // to
aoqi@1 1093 */
aoqi@1 1094 __ move(T1, A2);
aoqi@1 1095 __ move(T3, A0);
aoqi@1 1096 __ move(T0, A1);
aoqi@1 1097
aoqi@1 1098
aoqi@1 1099 // copy dwords from high to low
aoqi@1 1100 // __ leal(esi, Address(esi, ecx, Address::times_2, -4)); // from + count*2 - 4
aoqi@1 1101 __ sll(AT, T1, Address::times_2);
aoqi@1 1102 __ add(AT, T3, AT);
aoqi@1 1103 __ lea(T3, Address( AT, -4));
aoqi@1 1104 //__ std();
aoqi@1 1105 //__ leal(edi, Address(edi, ecx, Address::times_2, -4)); // to + count*2 - 4
aoqi@1 1106 __ sll(AT,T1 , Address::times_2);
aoqi@1 1107 __ add(AT, T0, AT);
aoqi@1 1108 __ lea(T0, Address( AT, -4));
aoqi@1 1109 // __ movl(eax, ecx);
aoqi@1 1110 __ move(T8, T1);
aoqi@1 1111 __ bind(l_1);
aoqi@1 1112 // __ sarl(ecx, 1); // dword count
aoqi@1 1113 __ sra(T1,T1, 1);
aoqi@1 1114 //__ jcc(Assembler::equal, l_4); // no dwords to move
aoqi@1 1115 __ beq(T1, R0, l_4);
aoqi@1 1116 __ delayed()->nop();
aoqi@1 1117 /* __ cmpl(ecx, 32);
aoqi@1 1118 __ jcc(Assembler::above, l_3); // > 32 dwords
aoqi@1 1119 // copy dwords with loop
aoqi@1 1120 __ subl(edi, esi);
aoqi@1 1121 */ __ align(16);
aoqi@1 1122 __ bind(l_2);
aoqi@1 1123 //__ movl(edx, Address(esi));
aoqi@1 1124 __ lw(AT, T3, 0);
aoqi@1 1125 //__ movl(Address(edi, esi, Address::times_1), edx);
aoqi@1 1126 __ sw(AT, T0, 0);
aoqi@1 1127 //__ subl(esi, 4);
aoqi@1 1128 __ addi(T3, T3, -4);
aoqi@1 1129 __ addi(T0, T0, -4);
aoqi@1 1130 //__ decl(ecx);
aoqi@1 1131 __ addi(T1, T1, -1);
aoqi@1 1132 // __ jcc(Assembler::notEqual, l_2);
aoqi@1 1133 __ bne(T1, R0, l_2);
aoqi@1 1134 __ delayed()->nop();
aoqi@1 1135 // __ addl(edi, esi);
aoqi@1 1136 // __ jmp(l_4);
aoqi@1 1137 __ b(l_4);
aoqi@1 1138 __ delayed()->nop();
aoqi@1 1139 // copy dwords with repeat move
aoqi@1 1140 __ bind(l_3);
aoqi@1 1141 // __ rep_movl();
aoqi@1 1142 __ bind(l_4);
aoqi@1 1143 // __ andl(eax, 1); // suffix count
aoqi@1 1144 __ andi(T8, T8, 1); // suffix count
aoqi@1 1145 //__ jcc(Assembler::equal, l_5); // no suffix
aoqi@1 1146 __ beq(T8, R0, l_5 );
aoqi@1 1147 __ delayed()->nop();
aoqi@1 1148 // copy suffix
aoqi@1 1149 // __ movw(edx, Address(esi, 2));
aoqi@1 1150 __ lh(AT, T3, 2);
aoqi@1 1151 // __ movw(Address(edi, 2), edx);
aoqi@1 1152 __ sh(AT, T0, 2);
aoqi@1 1153 __ bind(l_5);
aoqi@1 1154 // __ cld();
aoqi@1 1155 // __ popl(edi);
aoqi@1 1156 // __ popl(esi);
aoqi@1 1157 // __ ret(0);
aoqi@1 1158 __ pop(T8);
aoqi@1 1159 __ pop(T1);
aoqi@1 1160 __ pop(T0);
aoqi@1 1161 __ pop(T3);
aoqi@1 1162 __ jr(RA);
aoqi@1 1163 __ delayed()->nop();
aoqi@1 1164 return start;
aoqi@1 1165 }
aoqi@1 1166
aoqi@1 1167 // Arguments:
aoqi@1 1168 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
aoqi@1 1169 // ignored
aoqi@1 1170 // is_oop - true => oop array, so generate store check code
aoqi@1 1171 // name - stub name string
aoqi@1 1172 //
aoqi@1 1173 // Inputs:
aoqi@1 1174 // c_rarg0 - source array address
aoqi@1 1175 // c_rarg1 - destination array address
aoqi@1 1176 // c_rarg2 - element count, treated as ssize_t, can be zero
aoqi@1 1177 //
aoqi@1 1178 // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
aoqi@1 1179 // the hardware handle it. The two dwords within qwords that span
aoqi@1 1180 // cache line boundaries will still be loaded and stored atomicly.
aoqi@1 1181 //
aoqi@1 1182 // Side Effects:
aoqi@1 1183 // disjoint_int_copy_entry is set to the no-overlap entry point
aoqi@1 1184 // used by generate_conjoint_int_oop_copy().
aoqi@1 1185 //
aoqi@1 1186 address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, const char *name) {
aoqi@1 1187 Label l_2, l_3, l_4, l_stchk;
aoqi@1 1188 StubCodeMark mark(this, "StubRoutines", name);
aoqi@1 1189 __ align(CodeEntryAlignment);
aoqi@1 1190 address start = __ pc();
aoqi@1 1191 /*
aoqi@1 1192 __ pushl(esi);
aoqi@1 1193 __ movl(ecx, Address(esp, 4+12)); // count
aoqi@1 1194 __ pushl(edi);
aoqi@1 1195 __ movl(esi, Address(esp, 8+ 4)); // from
aoqi@1 1196 __ movl(edi, Address(esp, 8+ 8)); // to
aoqi@1 1197 */
aoqi@1 1198 __ push(T3);
aoqi@1 1199 __ push(T0);
aoqi@1 1200 __ push(T1);
aoqi@1 1201 __ push(T8);
aoqi@1 1202 __ move(T1, A2);
aoqi@1 1203 __ move(T3, A0);
aoqi@1 1204 __ move(T0, A1);
aoqi@1 1205
aoqi@1 1206 // __ cmpl(ecx, 32);
aoqi@1 1207 // __ jcc(Assembler::belowEqual, l_2); // <= 32 dwords
aoqi@1 1208 // __ rep_movl();
aoqi@1 1209 __ b(l_2);
aoqi@1 1210 __ delayed()->nop();
aoqi@1 1211 if (is_oop) {
aoqi@1 1212 // __ jmp(l_stchk);
aoqi@1 1213 __ b(l_stchk);
aoqi@1 1214 __ delayed()->nop();
aoqi@1 1215 }
aoqi@1 1216 // __ popl(edi);
aoqi@1 1217 // __ popl(esi);
aoqi@1 1218 // __ ret(0);
aoqi@1 1219 __ pop(T8);
aoqi@1 1220 __ pop(T1);
aoqi@1 1221 __ pop(T0);
aoqi@1 1222 __ pop(T3);
aoqi@1 1223 __ jr(RA);
aoqi@1 1224 __ delayed()->nop();
aoqi@1 1225
aoqi@1 1226 __ bind(l_2);
aoqi@1 1227 // __ subl(edi, esi);
aoqi@1 1228 // __ testl(ecx, ecx);
aoqi@1 1229 // __ jcc(Assembler::zero, l_4);
aoqi@1 1230 __ beq(T1, R0, l_4);
aoqi@1 1231 __ delayed()->nop();
aoqi@1 1232 __ align(16);
aoqi@1 1233 __ bind(l_3);
aoqi@1 1234 //__ movl(edx, Address(esi));
aoqi@1 1235 __ lw(AT, T3, 0);
aoqi@1 1236 // __ movl(Address(edi, esi, Address::times_1), edx);
aoqi@1 1237 __ sw(AT, T0, 0);
aoqi@1 1238 // __ addl(esi, 4);
aoqi@1 1239 __ addi(T3, T3, 4);
aoqi@1 1240 __ addi(T0, T0, 4);
aoqi@1 1241 // __ decl(ecx);
aoqi@1 1242 __ addi(T1, T1, -1);
aoqi@1 1243 // __ jcc(Assembler::notEqual, l_3);
aoqi@1 1244 __ bne(T1, R0, l_3);
aoqi@1 1245 __ delayed()->nop();
aoqi@1 1246 if (is_oop) {
aoqi@1 1247 __ bind(l_stchk);
aoqi@1 1248 // __ movl(edi, Address(esp, 8+ 8));
aoqi@1 1249 // __ movl(ecx, Address(esp, 8+ 12));
aoqi@1 1250 __ move(T0, A1);
aoqi@1 1251 __ move(T1, A2);
aoqi@1 1252 array_store_check();
aoqi@1 1253 }
aoqi@1 1254 __ bind(l_4);
aoqi@1 1255 // __ popl(edi);
aoqi@1 1256 // __ popl(esi);
aoqi@1 1257 // __ ret(0);
aoqi@1 1258 __ pop(T8);
aoqi@1 1259 __ pop(T1);
aoqi@1 1260 __ pop(T0);
aoqi@1 1261 __ pop(T3);
aoqi@1 1262 __ jr(RA);
aoqi@1 1263 __ delayed()->nop();
aoqi@1 1264 return start;
aoqi@1 1265 }
aoqi@1 1266
aoqi@1 1267 // Arguments:
aoqi@1 1268 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
aoqi@1 1269 // ignored
aoqi@1 1270 // is_oop - true => oop array, so generate store check code
aoqi@1 1271 // name - stub name string
aoqi@1 1272 //
aoqi@1 1273 // Inputs:
aoqi@1 1274 // c_rarg0 - source array address
aoqi@1 1275 // c_rarg1 - destination array address
aoqi@1 1276 // c_rarg2 - element count, treated as ssize_t, can be zero
aoqi@1 1277 //
aoqi@1 1278 // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
aoqi@1 1279 // the hardware handle it. The two dwords within qwords that span
aoqi@1 1280 // cache line boundaries will still be loaded and stored atomicly.
aoqi@1 1281 //
aoqi@1 1282 address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, const char *name) {
aoqi@1 1283 Label l_2, l_3, l_4, l_stchk;
aoqi@1 1284 StubCodeMark mark(this, "StubRoutines", name);
aoqi@1 1285 __ align(CodeEntryAlignment);
aoqi@1 1286 address start = __ pc();
aoqi@1 1287 address nooverlap_target;
aoqi@1 1288
aoqi@1 1289 if (is_oop) {
aoqi@1 1290 nooverlap_target = aligned ?
aoqi@1 1291 StubRoutines::arrayof_oop_disjoint_arraycopy() :
aoqi@1 1292 StubRoutines::oop_disjoint_arraycopy();
aoqi@1 1293 }else {
aoqi@1 1294 nooverlap_target = aligned ?
aoqi@1 1295 StubRoutines::arrayof_jint_disjoint_arraycopy() :
aoqi@1 1296 StubRoutines::jint_disjoint_arraycopy();
aoqi@1 1297 }
aoqi@1 1298
aoqi@1 1299 array_overlap_test(nooverlap_target, 2);
aoqi@1 1300
aoqi@1 1301 __ push(T3);
aoqi@1 1302 __ push(T0);
aoqi@1 1303 __ push(T1);
aoqi@1 1304 __ push(T8);
aoqi@1 1305
aoqi@1 1306 /*
aoqi@1 1307 __ pushl(esi);
aoqi@1 1308 __ movl(ecx, Address(esp, 4+12)); // count
aoqi@1 1309 __ pushl(edi);
aoqi@1 1310 __ movl(esi, Address(esp, 8+ 4)); // from
aoqi@1 1311 __ movl(edi, Address(esp, 8+ 8)); // to
aoqi@1 1312 */
aoqi@1 1313 __ move(T1, A2);
aoqi@1 1314 __ move(T3, A0);
aoqi@1 1315 __ move(T0, A1);
aoqi@1 1316
aoqi@1 1317 //__ leal(esi, Address(esi, ecx, Address::times_4, -4)); // from + count*4 - 4
aoqi@1 1318 __ sll(AT, T1, Address::times_4);
aoqi@1 1319 __ add(AT, T3, AT);
aoqi@1 1320 __ lea(T3 , Address(AT, -4));
aoqi@1 1321 //__ std();
aoqi@1 1322 //__ leal(edi, Address(edi, ecx, Address::times_4, -4)); // to + count*4 - 4
aoqi@1 1323 __ sll(AT, T1, Address::times_4);
aoqi@1 1324 __ add(AT, T0, AT);
aoqi@1 1325 __ lea(T0 , Address(AT, -4));
aoqi@1 1326
aoqi@1 1327 // __ cmpl(ecx, 32);
aoqi@1 1328 // __ jcc(Assembler::above, l_3); // > 32 dwords
aoqi@1 1329 // __ testl(ecx, ecx);
aoqi@1 1330 //__ jcc(Assembler::zero, l_4);
aoqi@1 1331 __ beq(T1, R0, l_4);
aoqi@1 1332 __ delayed()->nop();
aoqi@1 1333 // __ subl(edi, esi);
aoqi@1 1334 __ align(16);
aoqi@1 1335 __ bind(l_2);
aoqi@1 1336 // __ movl(edx, Address(esi));
aoqi@1 1337 __ lw(AT, T3, 0);
aoqi@1 1338 // __ movl(Address(esi, edi, Address::times_1), edx);
aoqi@1 1339 __ sw(AT, T0, 0);
aoqi@1 1340 // __ subl(esi, 4);
aoqi@1 1341 __ addi(T3, T3, -4);
aoqi@1 1342 __ addi(T0, T0, -4);
aoqi@1 1343 // __ decl(ecx);
aoqi@1 1344 __ addi(T1, T1, -1);
aoqi@1 1345 //__ jcc(Assembler::notEqual, l_2);
aoqi@1 1346 __ bne(T1, R0, l_2);
aoqi@1 1347 __ delayed()->nop();
aoqi@1 1348 if (is_oop) {
aoqi@1 1349 // __ jmp(l_stchk);
aoqi@1 1350 __ b( l_stchk);
aoqi@1 1351 __ delayed()->nop();
aoqi@1 1352 }
aoqi@1 1353 __ bind(l_4);
aoqi@1 1354 // __ cld();
aoqi@1 1355 // __ popl(edi);
aoqi@1 1356 // __ popl(esi);
aoqi@1 1357 // __ ret(0);
aoqi@1 1358 __ pop(T8);
aoqi@1 1359 __ pop(T1);
aoqi@1 1360 __ pop(T0);
aoqi@1 1361 __ pop(T3);
aoqi@1 1362 __ jr(RA);
aoqi@1 1363 __ delayed()->nop();
aoqi@1 1364 __ bind(l_3);
aoqi@1 1365 // __ rep_movl();
aoqi@1 1366 if (is_oop) {
aoqi@1 1367 __ bind(l_stchk);
aoqi@1 1368 // __ movl(edi, Address(esp, 8+ 8));
aoqi@1 1369 __ move(T0, A1);
aoqi@1 1370 // __ movl(ecx, Address(esp, 8+ 12));
aoqi@1 1371 __ move(T1, A2);
aoqi@1 1372 array_store_check();
aoqi@1 1373 }
aoqi@1 1374 // __ cld();
aoqi@1 1375 // __ popl(edi);
aoqi@1 1376 // __ popl(esi);
aoqi@1 1377 // __ ret(0);
aoqi@1 1378 __ pop(T8);
aoqi@1 1379 __ pop(T1);
aoqi@1 1380 __ pop(T0);
aoqi@1 1381 __ pop(T3);
aoqi@1 1382 __ jr(RA);
aoqi@1 1383 __ delayed()->nop();
aoqi@1 1384 return start;
aoqi@1 1385 }
aoqi@1 1386
aoqi@1 1387 // Arguments:
aoqi@1 1388 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
aoqi@1 1389 // ignored
aoqi@1 1390 // is_oop - true => oop array, so generate store check code
aoqi@1 1391 // name - stub name string
aoqi@1 1392 //
aoqi@1 1393 // Inputs:
aoqi@1 1394 // c_rarg0 - source array address
aoqi@1 1395 // c_rarg1 - destination array address
aoqi@1 1396 // c_rarg2 - element count, treated as ssize_t, can be zero
aoqi@1 1397 //
aoqi@1 1398 // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
aoqi@1 1399 // the hardware handle it. The two dwords within qwords that span
aoqi@1 1400 // cache line boundaries will still be loaded and stored atomicly.
aoqi@1 1401 //
aoqi@1 1402 // Side Effects:
aoqi@1 1403 // disjoint_int_copy_entry is set to the no-overlap entry point
aoqi@1 1404 // used by generate_conjoint_int_oop_copy().
aoqi@1 1405 //
aoqi@1 1406 address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, const char *name) {
aoqi@1 1407 Label l_2, l_3, l_4, l_stchk;
aoqi@1 1408 StubCodeMark mark(this, "StubRoutines", name);
aoqi@1 1409 __ align(CodeEntryAlignment);
aoqi@1 1410 address start = __ pc();
aoqi@1 1411 __ push(T3);
aoqi@1 1412 __ push(T0);
aoqi@1 1413 __ push(T1);
aoqi@1 1414 __ push(T8);
aoqi@1 1415 __ move(T1, A2);
aoqi@1 1416 __ move(T3, A0);
aoqi@1 1417 __ move(T0, A1);
aoqi@1 1418
aoqi@1 1419 // __ cmpl(ecx, 32);
aoqi@1 1420 // __ jcc(Assembler::belowEqual, l_2); // <= 32 dwords
aoqi@1 1421 // __ rep_movl();
aoqi@1 1422 __ b(l_2);
aoqi@1 1423 __ delayed()->nop();
aoqi@1 1424 if (is_oop) {
aoqi@1 1425 // __ jmp(l_stchk);
aoqi@1 1426 __ b(l_stchk);
aoqi@1 1427 __ delayed()->nop();
aoqi@1 1428 }
aoqi@1 1429 // __ popl(edi);
aoqi@1 1430 // __ popl(esi);
aoqi@1 1431 // __ ret(0);
aoqi@1 1432 __ pop(T8);
aoqi@1 1433 __ pop(T1);
aoqi@1 1434 __ pop(T0);
aoqi@1 1435 __ pop(T3);
aoqi@1 1436 __ jr(RA);
aoqi@1 1437 __ delayed()->nop();
aoqi@1 1438
aoqi@1 1439 __ bind(l_2);
aoqi@1 1440 // __ subl(edi, esi);
aoqi@1 1441 // __ testl(ecx, ecx);
aoqi@1 1442 // __ jcc(Assembler::zero, l_4);
aoqi@1 1443 __ beq(T1, R0, l_4);
aoqi@1 1444 __ delayed()->nop();
aoqi@1 1445 __ align(16);
aoqi@1 1446 __ bind(l_3);
aoqi@1 1447 //__ movl(edx, Address(esi));
aoqi@1 1448 __ ld(AT, T3, 0);
aoqi@1 1449 // __ movl(Address(edi, esi, Address::times_1), edx);
aoqi@1 1450 __ sd(AT, T0, 0);
aoqi@1 1451 // __ addl(esi, 4);
aoqi@1 1452 __ addi(T3, T3, 8);
aoqi@1 1453 __ addi(T0, T0, 8);
aoqi@1 1454 // __ decl(ecx);
aoqi@1 1455 __ addi(T1, T1, -1);
aoqi@1 1456 // __ jcc(Assembler::notEqual, l_3);
aoqi@1 1457 __ bne(T1, R0, l_3);
aoqi@1 1458 __ delayed()->nop();
aoqi@1 1459 if (is_oop) {
aoqi@1 1460 __ bind(l_stchk);
aoqi@1 1461 // __ movl(edi, Address(esp, 8+ 8));
aoqi@1 1462 // __ movl(ecx, Address(esp, 8+ 12));
aoqi@1 1463 __ move(T0, A1);
aoqi@1 1464 __ move(T1, A2);
aoqi@1 1465 array_store_check();
aoqi@1 1466 }
aoqi@1 1467 __ bind(l_4);
aoqi@1 1468 // __ popl(edi);
aoqi@1 1469 // __ popl(esi);
aoqi@1 1470 // __ ret(0);
aoqi@1 1471 __ pop(T8);
aoqi@1 1472 __ pop(T1);
aoqi@1 1473 __ pop(T0);
aoqi@1 1474 __ pop(T3);
aoqi@1 1475 __ jr(RA);
aoqi@1 1476 __ delayed()->nop();
aoqi@1 1477 return start;
aoqi@1 1478 }
aoqi@1 1479
aoqi@1 1480 // Arguments:
aoqi@1 1481 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
aoqi@1 1482 // ignored
aoqi@1 1483 // is_oop - true => oop array, so generate store check code
aoqi@1 1484 // name - stub name string
aoqi@1 1485 //
aoqi@1 1486 // Inputs:
aoqi@1 1487 // c_rarg0 - source array address
aoqi@1 1488 // c_rarg1 - destination array address
aoqi@1 1489 // c_rarg2 - element count, treated as ssize_t, can be zero
aoqi@1 1490 //
aoqi@1 1491 // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
aoqi@1 1492 // the hardware handle it. The two dwords within qwords that span
aoqi@1 1493 // cache line boundaries will still be loaded and stored atomicly.
aoqi@1 1494 //
aoqi@1 1495 address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, const char *name) {
aoqi@1 1496 Label l_2, l_3, l_4, l_stchk;
aoqi@1 1497 StubCodeMark mark(this, "StubRoutines", name);
aoqi@1 1498 __ align(CodeEntryAlignment);
aoqi@1 1499 address start = __ pc();
aoqi@1 1500 address nooverlap_target;
aoqi@1 1501
aoqi@1 1502 if (is_oop) {
aoqi@1 1503 nooverlap_target = aligned ?
aoqi@1 1504 StubRoutines::arrayof_oop_disjoint_arraycopy() :
aoqi@1 1505 StubRoutines::oop_disjoint_arraycopy();
aoqi@1 1506 }else {
aoqi@1 1507 nooverlap_target = aligned ?
aoqi@1 1508 StubRoutines::arrayof_jlong_disjoint_arraycopy() :
aoqi@1 1509 StubRoutines::jlong_disjoint_arraycopy();
aoqi@1 1510 }
aoqi@1 1511
aoqi@1 1512 array_overlap_test(nooverlap_target, 3);
aoqi@1 1513
aoqi@1 1514 __ push(T3);
aoqi@1 1515 __ push(T0);
aoqi@1 1516 __ push(T1);
aoqi@1 1517 __ push(T8);
aoqi@1 1518
aoqi@1 1519 __ move(T1, A2);
aoqi@1 1520 __ move(T3, A0);
aoqi@1 1521 __ move(T0, A1);
aoqi@1 1522
aoqi@1 1523 //__ leal(esi, Address(esi, ecx, Address::times_4, -4)); // from + count*4 - 4
aoqi@1 1524 __ sll(AT, T1, Address::times_8);
aoqi@1 1525 __ add(AT, T3, AT);
aoqi@1 1526 __ lea(T3 , Address(AT, -8));
aoqi@1 1527 //__ std();
aoqi@1 1528 //__ leal(edi, Address(edi, ecx, Address::times_4, -4)); // to + count*4 - 4
aoqi@1 1529 __ sll(AT, T1, Address::times_8);
aoqi@1 1530 __ add(AT, T0, AT);
aoqi@1 1531 __ lea(T0 , Address(AT, -8));
aoqi@1 1532
aoqi@1 1533 // __ cmpl(ecx, 32);
aoqi@1 1534 // __ jcc(Assembler::above, l_3); // > 32 dwords
aoqi@1 1535 // __ testl(ecx, ecx);
aoqi@1 1536 //__ jcc(Assembler::zero, l_4);
aoqi@1 1537 __ beq(T1, R0, l_4);
aoqi@1 1538 __ delayed()->nop();
aoqi@1 1539 // __ subl(edi, esi);
aoqi@1 1540 __ align(16);
aoqi@1 1541 __ bind(l_2);
aoqi@1 1542 // __ movl(edx, Address(esi));
aoqi@1 1543 __ ld(AT, T3, 0);
aoqi@1 1544 // __ movl(Address(esi, edi, Address::times_1), edx);
aoqi@1 1545 __ sd(AT, T0, 0);
aoqi@1 1546 // __ subl(esi, 4);
aoqi@1 1547 __ addi(T3, T3, -8);
aoqi@1 1548 __ addi(T0, T0, -8);
aoqi@1 1549 // __ decl(ecx);
aoqi@1 1550 __ addi(T1, T1, -1);
aoqi@1 1551 //__ jcc(Assembler::notEqual, l_2);
aoqi@1 1552 __ bne(T1, R0, l_2);
aoqi@1 1553 __ delayed()->nop();
aoqi@1 1554 if (is_oop) {
aoqi@1 1555 // __ jmp(l_stchk);
aoqi@1 1556 __ b( l_stchk);
aoqi@1 1557 __ delayed()->nop();
aoqi@1 1558 }
aoqi@1 1559 __ bind(l_4);
aoqi@1 1560 // __ cld();
aoqi@1 1561 // __ popl(edi);
aoqi@1 1562 // __ popl(esi);
aoqi@1 1563 // __ ret(0);
aoqi@1 1564 __ pop(T8);
aoqi@1 1565 __ pop(T1);
aoqi@1 1566 __ pop(T0);
aoqi@1 1567 __ pop(T3);
aoqi@1 1568 __ jr(RA);
aoqi@1 1569 __ delayed()->nop();
aoqi@1 1570 __ bind(l_3);
aoqi@1 1571 // __ rep_movl();
aoqi@1 1572 if (is_oop) {
aoqi@1 1573 __ bind(l_stchk);
aoqi@1 1574 // __ movl(edi, Address(esp, 8+ 8));
aoqi@1 1575 __ move(T0, A1);
aoqi@1 1576 // __ movl(ecx, Address(esp, 8+ 12));
aoqi@1 1577 __ move(T1, A2);
aoqi@1 1578 array_store_check();
aoqi@1 1579 }
aoqi@1 1580 // __ cld();
aoqi@1 1581 // __ popl(edi);
aoqi@1 1582 // __ popl(esi);
aoqi@1 1583 // __ ret(0);
aoqi@1 1584 __ pop(T8);
aoqi@1 1585 __ pop(T1);
aoqi@1 1586 __ pop(T0);
aoqi@1 1587 __ pop(T3);
aoqi@1 1588 __ jr(RA);
aoqi@1 1589 __ delayed()->nop();
aoqi@1 1590 return start;
aoqi@1 1591 }
aoqi@1 1592 #if 0
aoqi@1 1593 // Arguments:
aoqi@1 1594 // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
aoqi@1 1595 // ignored
aoqi@1 1596 // is_oop - true => oop array, so generate store check code
aoqi@1 1597 // name - stub name string
aoqi@1 1598 //
aoqi@1 1599 // Inputs:
aoqi@1 1600 // c_rarg0 - source array address
aoqi@1 1601 // c_rarg1 - destination array address
aoqi@1 1602 // c_rarg2 - element count, treated as ssize_t, can be zero
aoqi@1 1603 //
aoqi@1 1604 address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, const char *name) {
aoqi@1 1605 __ align(CodeEntryAlignment);
aoqi@1 1606 StubCodeMark mark(this, "StubRoutines", name);
aoqi@1 1607 address start = __ pc();
aoqi@1 1608
aoqi@1 1609 Label L_copy_32_bytes, L_copy_8_bytes, L_exit;
aoqi@1 1610 const Register from = rdi; // source array address
aoqi@1 1611 const Register to = rsi; // destination array address
aoqi@1 1612 const Register qword_count = rdx; // elements count
aoqi@1 1613 const Register saved_count = rcx;
aoqi@1 1614
aoqi@1 1615 __ enter(); // required for proper stackwalking of RuntimeStub frame
aoqi@1 1616 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
aoqi@1 1617
aoqi@1 1618 address disjoint_copy_entry = NULL;
aoqi@1 1619 if (is_oop) {
aoqi@1 1620 assert(!UseCompressedOops, "shouldn't be called for compressed oops");
aoqi@1 1621 disjoint_copy_entry = disjoint_oop_copy_entry;
aoqi@1 1622 oop_copy_entry = __ pc();
aoqi@1 1623 array_overlap_test(disjoint_oop_copy_entry, Address::times_8);
aoqi@1 1624 } else {
aoqi@1 1625 disjoint_copy_entry = disjoint_long_copy_entry;
aoqi@1 1626 long_copy_entry = __ pc();
aoqi@1 1627 array_overlap_test(disjoint_long_copy_entry, Address::times_8);
aoqi@1 1628 }
aoqi@1 1629 BLOCK_COMMENT("Entry:");
aoqi@1 1630 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
aoqi@1 1631
aoqi@1 1632 array_overlap_test(disjoint_copy_entry, Address::times_8);
aoqi@1 1633 setup_arg_regs(); // from => rdi, to => rsi, count => rdx
aoqi@1 1634 // r9 and r10 may be used to save non-volatile registers
aoqi@1 1635
aoqi@1 1636 // 'from', 'to' and 'qword_count' are now valid
aoqi@1 1637
aoqi@1 1638 if (is_oop) {
aoqi@1 1639 // Save to and count for store barrier
aoqi@1 1640 __ movptr(saved_count, qword_count);
aoqi@1 1641 // No registers are destroyed by this call
aoqi@1 1642 gen_write_ref_array_pre_barrier(to, saved_count);
aoqi@1 1643 }
aoqi@1 1644
aoqi@1 1645 __ jmp(L_copy_32_bytes);
aoqi@1 1646
aoqi@1 1647 // Copy trailing qwords
aoqi@1 1648 __ BIND(L_copy_8_bytes);
aoqi@1 1649 __ movq(rax, Address(from, qword_count, Address::times_8, -8));
aoqi@1 1650 __ movq(Address(to, qword_count, Address::times_8, -8), rax);
aoqi@1 1651 __ decrement(qword_count);
aoqi@1 1652 __ jcc(Assembler::notZero, L_copy_8_bytes);
aoqi@1 1653
aoqi@1 1654 if (is_oop) {
aoqi@1 1655 __ jmp(L_exit);
aoqi@1 1656 } else {
aoqi@1 1657 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr);
aoqi@1 1658 restore_arg_regs();
aoqi@1 1659 __ xorptr(rax, rax); // return 0
aoqi@1 1660 __ leave(); // required for proper stackwalking of RuntimeStub frame
aoqi@1 1661 __ ret(0);
aoqi@1 1662 }
aoqi@1 1663
aoqi@1 1664 // Copy in 32-bytes chunks
aoqi@1 1665 copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
aoqi@1 1666
aoqi@1 1667 if (is_oop) {
aoqi@1 1668 __ BIND(L_exit);
aoqi@1 1669 __ lea(rcx, Address(to, saved_count, Address::times_8, -8));
aoqi@1 1670 gen_write_ref_array_post_barrier(to, rcx, rax);
aoqi@1 1671 inc_counter_np(SharedRuntime::_oop_array_copy_ctr);
aoqi@1 1672 } else {
aoqi@1 1673 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr);
aoqi@1 1674 }
aoqi@1 1675 restore_arg_regs();
aoqi@1 1676 __ xorptr(rax, rax); // return 0
aoqi@1 1677 __ leave(); // required for proper stackwalking of RuntimeStub frame
aoqi@1 1678 __ ret(0);
aoqi@1 1679
aoqi@1 1680 return start;
aoqi@1 1681 }
aoqi@1 1682
aoqi@1 1683
aoqi@1 1684 // Helper for generating a dynamic type check.
aoqi@1 1685 // Smashes no registers.
aoqi@1 1686 void generate_type_check(Register sub_klass,
aoqi@1 1687 Register super_check_offset,
aoqi@1 1688 Register super_klass,
aoqi@1 1689 Label& L_success) {
aoqi@1 1690 assert_different_registers(sub_klass, super_check_offset, super_klass);
aoqi@1 1691
aoqi@1 1692 BLOCK_COMMENT("type_check:");
aoqi@1 1693
aoqi@1 1694 Label L_miss;
aoqi@1 1695
aoqi@1 1696 // a couple of useful fields in sub_klass:
aoqi@1 1697 int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
aoqi@1 1698 Klass::secondary_supers_offset_in_bytes());
aoqi@1 1699 int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
aoqi@1 1700 Klass::secondary_super_cache_offset_in_bytes());
aoqi@1 1701 Address secondary_supers_addr(sub_klass, ss_offset);
aoqi@1 1702 Address super_cache_addr( sub_klass, sc_offset);
aoqi@1 1703
aoqi@1 1704 // if the pointers are equal, we are done (e.g., String[] elements)
aoqi@1 1705 __ cmpptr(super_klass, sub_klass);
aoqi@1 1706 __ jcc(Assembler::equal, L_success);
aoqi@1 1707
aoqi@1 1708 // check the supertype display:
aoqi@1 1709 Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0);
aoqi@1 1710 __ cmpptr(super_klass, super_check_addr); // test the super type
aoqi@1 1711 __ jcc(Assembler::equal, L_success);
aoqi@1 1712
aoqi@1 1713 // if it was a primary super, we can just fail immediately
aoqi@1 1714 __ cmpl(super_check_offset, sc_offset);
aoqi@1 1715 __ jcc(Assembler::notEqual, L_miss);
aoqi@1 1716
aoqi@1 1717 // Now do a linear scan of the secondary super-klass chain.
aoqi@1 1718 // The repne_scan instruction uses fixed registers, which we must spill.
aoqi@1 1719 // (We need a couple more temps in any case.)
aoqi@1 1720 // This code is rarely used, so simplicity is a virtue here.
aoqi@1 1721 inc_counter_np(SharedRuntime::_partial_subtype_ctr);
aoqi@1 1722 {
aoqi@1 1723 __ push(rax);
aoqi@1 1724 __ push(rcx);
aoqi@1 1725 __ push(rdi);
aoqi@1 1726 assert_different_registers(sub_klass, super_klass, rax, rcx, rdi);
aoqi@1 1727
aoqi@1 1728 __ movptr(rdi, secondary_supers_addr);
aoqi@1 1729 // Load the array length.
aoqi@1 1730 __ movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
aoqi@1 1731 // Skip to start of data.
aoqi@1 1732 __ addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
aoqi@1 1733 // Scan rcx words at [rdi] for occurance of rax
aoqi@1 1734 // Set NZ/Z based on last compare
aoqi@1 1735 __ movptr(rax, super_klass);
aoqi@1 1736 if (UseCompressedOops) {
aoqi@1 1737 // Compare against compressed form. Don't need to uncompress because
aoqi@1 1738 // looks like orig rax is restored in popq below.
aoqi@1 1739 __ encode_heap_oop(rax);
aoqi@1 1740 __ repne_scanl();
aoqi@1 1741 } else {
aoqi@1 1742 __ repne_scan();
aoqi@1 1743 }
aoqi@1 1744
aoqi@1 1745 // Unspill the temp. registers:
aoqi@1 1746 __ pop(rdi);
aoqi@1 1747 __ pop(rcx);
aoqi@1 1748 __ pop(rax);
aoqi@1 1749
aoqi@1 1750 __ jcc(Assembler::notEqual, L_miss);
aoqi@1 1751 }
aoqi@1 1752
aoqi@1 1753 // Success. Cache the super we found and proceed in triumph.
aoqi@1 1754 __ movptr(super_cache_addr, super_klass); // note: rax is dead
aoqi@1 1755 __ jmp(L_success);
aoqi@1 1756
aoqi@1 1757 // Fall through on failure!
aoqi@1 1758 __ BIND(L_miss);
aoqi@1 1759 }
aoqi@1 1760
aoqi@1 1761 //
aoqi@1 1762 // Generate checkcasting array copy stub
aoqi@1 1763 //
aoqi@1 1764 // Input:
aoqi@1 1765 // c_rarg0 - source array address
aoqi@1 1766 // c_rarg1 - destination array address
aoqi@1 1767 // c_rarg2 - element count, treated as ssize_t, can be zero
aoqi@1 1768 // c_rarg3 - size_t ckoff (super_check_offset)
aoqi@1 1769 // not Win64
aoqi@1 1770 // c_rarg4 - oop ckval (super_klass)
aoqi@1 1771 // Win64
aoqi@1 1772 // rsp+40 - oop ckval (super_klass)
aoqi@1 1773 //
aoqi@1 1774 // Output:
aoqi@1 1775 // rax == 0 - success
aoqi@1 1776 // rax == -1^K - failure, where K is partial transfer count
aoqi@1 1777 //
aoqi@1 1778 address generate_checkcast_copy(const char *name) {
aoqi@1 1779
aoqi@1 1780 Label L_load_element, L_store_element, L_do_card_marks, L_done;
aoqi@1 1781
aoqi@1 1782 // Input registers (after setup_arg_regs)
aoqi@1 1783 const Register from = rdi; // source array address
aoqi@1 1784 const Register to = rsi; // destination array address
aoqi@1 1785 const Register length = rdx; // elements count
aoqi@1 1786 const Register ckoff = rcx; // super_check_offset
aoqi@1 1787 const Register ckval = r8; // super_klass
aoqi@1 1788
aoqi@1 1789 // Registers used as temps (r13, r14 are save-on-entry)
aoqi@1 1790 const Register end_from = from; // source array end address
aoqi@1 1791 const Register end_to = r13; // destination array end address
aoqi@1 1792 const Register count = rdx; // -(count_remaining)
aoqi@1 1793 const Register r14_length = r14; // saved copy of length
aoqi@1 1794 // End pointers are inclusive, and if length is not zero they point
aoqi@1 1795 // to the last unit copied: end_to[0] := end_from[0]
aoqi@1 1796
aoqi@1 1797 const Register rax_oop = rax; // actual oop copied
aoqi@1 1798 const Register r11_klass = r11; // oop._klass
aoqi@1 1799
aoqi@1 1800 //---------------------------------------------------------------
aoqi@1 1801 // Assembler stub will be used for this call to arraycopy
aoqi@1 1802 // if the two arrays are subtypes of Object[] but the
aoqi@1 1803 // destination array type is not equal to or a supertype
aoqi@1 1804 // of the source type. Each element must be separately
aoqi@1 1805 // checked.
aoqi@1 1806
aoqi@1 1807 __ align(CodeEntryAlignment);
aoqi@1 1808 StubCodeMark mark(this, "StubRoutines", name);
aoqi@1 1809 address start = __ pc();
aoqi@1 1810
aoqi@1 1811 __ enter(); // required for proper stackwalking of RuntimeStub frame
aoqi@1 1812
aoqi@1 1813 checkcast_copy_entry = __ pc();
aoqi@1 1814 BLOCK_COMMENT("Entry:");
aoqi@1 1815
aoqi@1 1816 #ifdef ASSERT
aoqi@1 1817 // caller guarantees that the arrays really are different
aoqi@1 1818 // otherwise, we would have to make conjoint checks
aoqi@1 1819 { Label L;
aoqi@1 1820 array_overlap_test(L, TIMES_OOP);
aoqi@1 1821 __ stop("checkcast_copy within a single array");
aoqi@1 1822 __ bind(L);
aoqi@1 1823 }
aoqi@1 1824 #endif //ASSERT
aoqi@1 1825
aoqi@1 1826 // allocate spill slots for r13, r14
aoqi@1 1827 enum {
aoqi@1 1828 saved_r13_offset,
aoqi@1 1829 saved_r14_offset,
aoqi@1 1830 saved_rbp_offset,
aoqi@1 1831 saved_rip_offset,
aoqi@1 1832 saved_rarg0_offset
aoqi@1 1833 };
aoqi@1 1834 __ subptr(rsp, saved_rbp_offset * wordSize);
aoqi@1 1835 __ movptr(Address(rsp, saved_r13_offset * wordSize), r13);
aoqi@1 1836 __ movptr(Address(rsp, saved_r14_offset * wordSize), r14);
aoqi@1 1837 setup_arg_regs(4); // from => rdi, to => rsi, length => rdx
aoqi@1 1838 // ckoff => rcx, ckval => r8
aoqi@1 1839 // r9 and r10 may be used to save non-volatile registers
aoqi@1 1840 #ifdef _WIN64
aoqi@1 1841 // last argument (#4) is on stack on Win64
aoqi@1 1842 const int ckval_offset = saved_rarg0_offset + 4;
aoqi@1 1843 __ movptr(ckval, Address(rsp, ckval_offset * wordSize));
aoqi@1 1844 #endif
aoqi@1 1845
aoqi@1 1846 // check that int operands are properly extended to size_t
aoqi@1 1847 assert_clean_int(length, rax);
aoqi@1 1848 assert_clean_int(ckoff, rax);
aoqi@1 1849
aoqi@1 1850 #ifdef ASSERT
aoqi@1 1851 BLOCK_COMMENT("assert consistent ckoff/ckval");
aoqi@1 1852 // The ckoff and ckval must be mutually consistent,
aoqi@1 1853 // even though caller generates both.
aoqi@1 1854 { Label L;
aoqi@1 1855 int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
aoqi@1 1856 Klass::super_check_offset_offset_in_bytes());
aoqi@1 1857 __ cmpl(ckoff, Address(ckval, sco_offset));
aoqi@1 1858 __ jcc(Assembler::equal, L);
aoqi@1 1859 __ stop("super_check_offset inconsistent");
aoqi@1 1860 __ bind(L);
aoqi@1 1861 }
aoqi@1 1862 #endif //ASSERT
aoqi@1 1863
aoqi@1 1864 // Loop-invariant addresses. They are exclusive end pointers.
aoqi@1 1865 Address end_from_addr(from, length, TIMES_OOP, 0);
aoqi@1 1866 Address end_to_addr(to, length, TIMES_OOP, 0);
aoqi@1 1867 // Loop-variant addresses. They assume post-incremented count < 0.
aoqi@1 1868 Address from_element_addr(end_from, count, TIMES_OOP, 0);
aoqi@1 1869 Address to_element_addr(end_to, count, TIMES_OOP, 0);
aoqi@1 1870
aoqi@1 1871 gen_write_ref_array_pre_barrier(to, count);
aoqi@1 1872
aoqi@1 1873 // Copy from low to high addresses, indexed from the end of each array.
aoqi@1 1874 __ lea(end_from, end_from_addr);
aoqi@1 1875 __ lea(end_to, end_to_addr);
aoqi@1 1876 __ movptr(r14_length, length); // save a copy of the length
aoqi@1 1877 assert(length == count, ""); // else fix next line:
aoqi@1 1878 __ negptr(count); // negate and test the length
aoqi@1 1879 __ jcc(Assembler::notZero, L_load_element);
aoqi@1 1880
aoqi@1 1881 // Empty array: Nothing to do.
aoqi@1 1882 __ xorptr(rax, rax); // return 0 on (trivial) success
aoqi@1 1883 __ jmp(L_done);
aoqi@1 1884
aoqi@1 1885 // ======== begin loop ========
aoqi@1 1886 // (Loop is rotated; its entry is L_load_element.)
aoqi@1 1887 // Loop control:
aoqi@1 1888 // for (count = -count; count != 0; count++)
aoqi@1 1889 // Base pointers src, dst are biased by 8*(count-1),to last element.
aoqi@1 1890 __ align(16);
aoqi@1 1891
aoqi@1 1892 __ BIND(L_store_element);
aoqi@1 1893 __ store_heap_oop(rax_oop, to_element_addr); // store the oop
aoqi@1 1894 __ increment(count); // increment the count toward zero
aoqi@1 1895 __ jcc(Assembler::zero, L_do_card_marks);
aoqi@1 1896
aoqi@1 1897 // ======== loop entry is here ========
aoqi@1 1898 __ BIND(L_load_element);
aoqi@1 1899 __ load_heap_oop(rax_oop, from_element_addr); // load the oop
aoqi@1 1900 __ testptr(rax_oop, rax_oop);
aoqi@1 1901 __ jcc(Assembler::zero, L_store_element);
aoqi@1 1902
aoqi@1 1903 __ load_klass(r11_klass, rax_oop);// query the object klass
aoqi@1 1904 generate_type_check(r11_klass, ckoff, ckval, L_store_element);
aoqi@1 1905 // ======== end loop ========
aoqi@1 1906
aoqi@1 1907 // It was a real error; we must depend on the caller to finish the job.
aoqi@1 1908 // Register rdx = -1 * number of *remaining* oops, r14 = *total* oops.
aoqi@1 1909 // Emit GC store barriers for the oops we have copied (r14 + rdx),
aoqi@1 1910 // and report their number to the caller.
aoqi@1 1911 assert_different_registers(rax, r14_length, count, to, end_to, rcx);
aoqi@1 1912 __ lea(end_to, to_element_addr);
aoqi@1 1913 gen_write_ref_array_post_barrier(to, end_to, rscratch1);
aoqi@1 1914 __ movptr(rax, r14_length); // original oops
aoqi@1 1915 __ addptr(rax, count); // K = (original - remaining) oops
aoqi@1 1916 __ notptr(rax); // report (-1^K) to caller
aoqi@1 1917 __ jmp(L_done);
aoqi@1 1918
aoqi@1 1919 // Come here on success only.
aoqi@1 1920 __ BIND(L_do_card_marks);
aoqi@1 1921 __ addptr(end_to, -wordSize); // make an inclusive end pointer
aoqi@1 1922 gen_write_ref_array_post_barrier(to, end_to, rscratch1);
aoqi@1 1923 __ xorptr(rax, rax); // return 0 on success
aoqi@1 1924
aoqi@1 1925 // Common exit point (success or failure).
aoqi@1 1926 __ BIND(L_done);
aoqi@1 1927 __ movptr(r13, Address(rsp, saved_r13_offset * wordSize));
aoqi@1 1928 __ movptr(r14, Address(rsp, saved_r14_offset * wordSize));
aoqi@1 1929 inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr);
aoqi@1 1930 restore_arg_regs();
aoqi@1 1931 __ leave(); // required for proper stackwalking of RuntimeStub frame
aoqi@1 1932 __ ret(0);
aoqi@1 1933
aoqi@1 1934 return start;
aoqi@1 1935 }
aoqi@1 1936
aoqi@1 1937 //
aoqi@1 1938 // Generate 'unsafe' array copy stub
aoqi@1 1939 // Though just as safe as the other stubs, it takes an unscaled
aoqi@1 1940 // size_t argument instead of an element count.
aoqi@1 1941 //
aoqi@1 1942 // Input:
aoqi@1 1943 // c_rarg0 - source array address
aoqi@1 1944 // c_rarg1 - destination array address
aoqi@1 1945 // c_rarg2 - byte count, treated as ssize_t, can be zero
aoqi@1 1946 //
aoqi@1 1947 // Examines the alignment of the operands and dispatches
aoqi@1 1948 // to a long, int, short, or byte copy loop.
aoqi@1 1949 //
aoqi@1 1950 address generate_unsafe_copy(const char *name) {
aoqi@1 1951
aoqi@1 1952 Label L_long_aligned, L_int_aligned, L_short_aligned;
aoqi@1 1953
aoqi@1 1954 // Input registers (before setup_arg_regs)
aoqi@1 1955 const Register from = c_rarg0; // source array address
aoqi@1 1956 const Register to = c_rarg1; // destination array address
aoqi@1 1957 const Register size = c_rarg2; // byte count (size_t)
aoqi@1 1958
aoqi@1 1959 // Register used as a temp
aoqi@1 1960 const Register bits = rax; // test copy of low bits
aoqi@1 1961
aoqi@1 1962 __ align(CodeEntryAlignment);
aoqi@1 1963 StubCodeMark mark(this, "StubRoutines", name);
aoqi@1 1964 address start = __ pc();
aoqi@1 1965
aoqi@1 1966 __ enter(); // required for proper stackwalking of RuntimeStub frame
aoqi@1 1967
aoqi@1 1968 // bump this on entry, not on exit:
aoqi@1 1969 inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr);
aoqi@1 1970
aoqi@1 1971 __ mov(bits, from);
aoqi@1 1972 __ orptr(bits, to);
aoqi@1 1973 __ orptr(bits, size);
aoqi@1 1974
aoqi@1 1975 __ testb(bits, BytesPerLong-1);
aoqi@1 1976 __ jccb(Assembler::zero, L_long_aligned);
aoqi@1 1977
aoqi@1 1978 __ testb(bits, BytesPerInt-1);
aoqi@1 1979 __ jccb(Assembler::zero, L_int_aligned);
aoqi@1 1980
aoqi@1 1981 __ testb(bits, BytesPerShort-1);
aoqi@1 1982 __ jump_cc(Assembler::notZero, RuntimeAddress(byte_copy_entry));
aoqi@1 1983
aoqi@1 1984 __ BIND(L_short_aligned);
aoqi@1 1985 __ shrptr(size, LogBytesPerShort); // size => short_count
aoqi@1 1986 __ jump(RuntimeAddress(short_copy_entry));
aoqi@1 1987
aoqi@1 1988 __ BIND(L_int_aligned);
aoqi@1 1989 __ shrptr(size, LogBytesPerInt); // size => int_count
aoqi@1 1990 __ jump(RuntimeAddress(int_copy_entry));
aoqi@1 1991
aoqi@1 1992 __ BIND(L_long_aligned);
aoqi@1 1993 __ shrptr(size, LogBytesPerLong); // size => qword_count
aoqi@1 1994 __ jump(RuntimeAddress(long_copy_entry));
aoqi@1 1995
aoqi@1 1996 return start;
aoqi@1 1997 }
aoqi@1 1998
aoqi@1 1999 // Perform range checks on the proposed arraycopy.
aoqi@1 2000 // Kills temp, but nothing else.
aoqi@1 2001 // Also, clean the sign bits of src_pos and dst_pos.
aoqi@1 2002 void arraycopy_range_checks(Register src, // source array oop (c_rarg0)
aoqi@1 2003 Register src_pos, // source position (c_rarg1)
aoqi@1 2004 Register dst, // destination array oo (c_rarg2)
aoqi@1 2005 Register dst_pos, // destination position (c_rarg3)
aoqi@1 2006 Register length,
aoqi@1 2007 Register temp,
aoqi@1 2008 Label& L_failed) {
aoqi@1 2009 BLOCK_COMMENT("arraycopy_range_checks:");
aoqi@1 2010
aoqi@1 2011 // if (src_pos + length > arrayOop(src)->length()) FAIL;
aoqi@1 2012 __ movl(temp, length);
aoqi@1 2013 __ addl(temp, src_pos); // src_pos + length
aoqi@1 2014 __ cmpl(temp, Address(src, arrayOopDesc::length_offset_in_bytes()));
aoqi@1 2015 __ jcc(Assembler::above, L_failed);
aoqi@1 2016
aoqi@1 2017 // if (dst_pos + length > arrayOop(dst)->length()) FAIL;
aoqi@1 2018 __ movl(temp, length);
aoqi@1 2019 __ addl(temp, dst_pos); // dst_pos + length
aoqi@1 2020 __ cmpl(temp, Address(dst, arrayOopDesc::length_offset_in_bytes()));
aoqi@1 2021 __ jcc(Assembler::above, L_failed);
aoqi@1 2022
aoqi@1 2023 // Have to clean up high 32-bits of 'src_pos' and 'dst_pos'.
aoqi@1 2024 // Move with sign extension can be used since they are positive.
aoqi@1 2025 __ movslq(src_pos, src_pos);
aoqi@1 2026 __ movslq(dst_pos, dst_pos);
aoqi@1 2027
aoqi@1 2028 BLOCK_COMMENT("arraycopy_range_checks done");
aoqi@1 2029 }
aoqi@1 2030
aoqi@1 2031 //
aoqi@1 2032 // Generate generic array copy stubs
aoqi@1 2033 //
aoqi@1 2034 // Input:
aoqi@1 2035 // c_rarg0 - src oop
aoqi@1 2036 // c_rarg1 - src_pos (32-bits)
aoqi@1 2037 // c_rarg2 - dst oop
aoqi@1 2038 // c_rarg3 - dst_pos (32-bits)
aoqi@1 2039 // not Win64
aoqi@1 2040 // c_rarg4 - element count (32-bits)
aoqi@1 2041 // Win64
aoqi@1 2042 // rsp+40 - element count (32-bits)
aoqi@1 2043 //
aoqi@1 2044 // Output:
aoqi@1 2045 // rax == 0 - success
aoqi@1 2046 // rax == -1^K - failure, where K is partial transfer count
aoqi@1 2047 //
aoqi@1 2048 address generate_generic_copy(const char *name) {
aoqi@1 2049
aoqi@1 2050 Label L_failed, L_failed_0, L_objArray;
aoqi@1 2051 Label L_copy_bytes, L_copy_shorts, L_copy_ints, L_copy_longs;
aoqi@1 2052
aoqi@1 2053 // Input registers
aoqi@1 2054 const Register src = c_rarg0; // source array oop
aoqi@1 2055 const Register src_pos = c_rarg1; // source position
aoqi@1 2056 const Register dst = c_rarg2; // destination array oop
aoqi@1 2057 const Register dst_pos = c_rarg3; // destination position
aoqi@1 2058 // elements count is on stack on Win64
aoqi@1 2059 #ifdef _WIN64
aoqi@1 2060 #define C_RARG4 Address(rsp, 6 * wordSize)
aoqi@1 2061 #else
aoqi@1 2062 #define C_RARG4 c_rarg4
aoqi@1 2063 #endif
aoqi@1 2064
aoqi@1 2065 { int modulus = CodeEntryAlignment;
aoqi@1 2066 int target = modulus - 5; // 5 = sizeof jmp(L_failed)
aoqi@1 2067 int advance = target - (__ offset() % modulus);
aoqi@1 2068 if (advance < 0) advance += modulus;
aoqi@1 2069 if (advance > 0) __ nop(advance);
aoqi@1 2070 }
aoqi@1 2071 StubCodeMark mark(this, "StubRoutines", name);
aoqi@1 2072
aoqi@1 2073 // Short-hop target to L_failed. Makes for denser prologue code.
aoqi@1 2074 __ BIND(L_failed_0);
aoqi@1 2075 __ jmp(L_failed);
aoqi@1 2076 assert(__ offset() % CodeEntryAlignment == 0, "no further alignment needed");
aoqi@1 2077
aoqi@1 2078 __ align(CodeEntryAlignment);
aoqi@1 2079 address start = __ pc();
aoqi@1 2080
aoqi@1 2081 __ enter(); // required for proper stackwalking of RuntimeStub frame
aoqi@1 2082
aoqi@1 2083 // bump this on entry, not on exit:
aoqi@1 2084 inc_counter_np(SharedRuntime::_generic_array_copy_ctr);
aoqi@1 2085
aoqi@1 2086 //-----------------------------------------------------------------------
aoqi@1 2087 // Assembler stub will be used for this call to arraycopy
aoqi@1 2088 // if the following conditions are met:
aoqi@1 2089 //
aoqi@1 2090 // (1) src and dst must not be null.
aoqi@1 2091 // (2) src_pos must not be negative.
aoqi@1 2092 // (3) dst_pos must not be negative.
aoqi@1 2093 // (4) length must not be negative.
aoqi@1 2094 // (5) src klass and dst klass should be the same and not NULL.
aoqi@1 2095 // (6) src and dst should be arrays.
aoqi@1 2096 // (7) src_pos + length must not exceed length of src.
aoqi@1 2097 // (8) dst_pos + length must not exceed length of dst.
aoqi@1 2098 //
aoqi@1 2099
aoqi@1 2100 // if (src == NULL) return -1;
aoqi@1 2101 __ testptr(src, src); // src oop
aoqi@1 2102 size_t j1off = __ offset();
aoqi@1 2103 __ jccb(Assembler::zero, L_failed_0);
aoqi@1 2104
aoqi@1 2105 // if (src_pos < 0) return -1;
aoqi@1 2106 __ testl(src_pos, src_pos); // src_pos (32-bits)
aoqi@1 2107 __ jccb(Assembler::negative, L_failed_0);
aoqi@1 2108
aoqi@1 2109 // if (dst == NULL) return -1;
aoqi@1 2110 __ testptr(dst, dst); // dst oop
aoqi@1 2111 __ jccb(Assembler::zero, L_failed_0);
aoqi@1 2112
aoqi@1 2113 // if (dst_pos < 0) return -1;
aoqi@1 2114 __ testl(dst_pos, dst_pos); // dst_pos (32-bits)
aoqi@1 2115 size_t j4off = __ offset();
aoqi@1 2116 __ jccb(Assembler::negative, L_failed_0);
aoqi@1 2117
aoqi@1 2118 // The first four tests are very dense code,
aoqi@1 2119 // but not quite dense enough to put four
aoqi@1 2120 // jumps in a 16-byte instruction fetch buffer.
aoqi@1 2121 // That's good, because some branch predicters
aoqi@1 2122 // do not like jumps so close together.
aoqi@1 2123 // Make sure of this.
aoqi@1 2124 guarantee(((j1off ^ j4off) & ~15) != 0, "I$ line of 1st & 4th jumps");
aoqi@1 2125
aoqi@1 2126 // registers used as temp
aoqi@1 2127 const Register r11_length = r11; // elements count to copy
aoqi@1 2128 const Register r10_src_klass = r10; // array klass
aoqi@1 2129 const Register r9_dst_klass = r9; // dest array klass
aoqi@1 2130
aoqi@1 2131 // if (length < 0) return -1;
aoqi@1 2132 __ movl(r11_length, C_RARG4); // length (elements count, 32-bits value)
aoqi@1 2133 __ testl(r11_length, r11_length);
aoqi@1 2134 __ jccb(Assembler::negative, L_failed_0);
aoqi@1 2135
aoqi@1 2136 __ load_klass(r10_src_klass, src);
aoqi@1 2137 #ifdef ASSERT
aoqi@1 2138 // assert(src->klass() != NULL);
aoqi@1 2139 BLOCK_COMMENT("assert klasses not null");
aoqi@1 2140 { Label L1, L2;
aoqi@1 2141 __ testptr(r10_src_klass, r10_src_klass);
aoqi@1 2142 __ jcc(Assembler::notZero, L2); // it is broken if klass is NULL
aoqi@1 2143 __ bind(L1);
aoqi@1 2144 __ stop("broken null klass");
aoqi@1 2145 __ bind(L2);
aoqi@1 2146 __ load_klass(r9_dst_klass, dst);
aoqi@1 2147 __ cmpq(r9_dst_klass, 0);
aoqi@1 2148 __ jcc(Assembler::equal, L1); // this would be broken also
aoqi@1 2149 BLOCK_COMMENT("assert done");
aoqi@1 2150 }
aoqi@1 2151 #endif
aoqi@1 2152
aoqi@1 2153 // Load layout helper (32-bits)
aoqi@1 2154 //
aoqi@1 2155 // |array_tag| | header_size | element_type | |log2_element_size|
aoqi@1 2156 // 32 30 24 16 8 2 0
aoqi@1 2157 //
aoqi@1 2158 // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
aoqi@1 2159 //
aoqi@1 2160
aoqi@1 2161 int lh_offset = klassOopDesc::header_size() * HeapWordSize +
aoqi@1 2162 Klass::layout_helper_offset_in_bytes();
aoqi@1 2163
aoqi@1 2164 const Register rax_lh = rax; // layout helper
aoqi@1 2165
aoqi@1 2166 __ movl(rax_lh, Address(r10_src_klass, lh_offset));
aoqi@1 2167
aoqi@1 2168 // Handle objArrays completely differently...
aoqi@1 2169 jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
aoqi@1 2170 __ cmpl(rax_lh, objArray_lh);
aoqi@1 2171 __ jcc(Assembler::equal, L_objArray);
aoqi@1 2172
aoqi@1 2173 // if (src->klass() != dst->klass()) return -1;
aoqi@1 2174 __ load_klass(r9_dst_klass, dst);
aoqi@1 2175 __ cmpq(r10_src_klass, r9_dst_klass);
aoqi@1 2176 __ jcc(Assembler::notEqual, L_failed);
aoqi@1 2177
aoqi@1 2178 // if (!src->is_Array()) return -1;
aoqi@1 2179 __ cmpl(rax_lh, Klass::_lh_neutral_value);
aoqi@1 2180 __ jcc(Assembler::greaterEqual, L_failed);
aoqi@1 2181
aoqi@1 2182 // At this point, it is known to be a typeArray (array_tag 0x3).
aoqi@1 2183 #ifdef ASSERT
aoqi@1 2184 { Label L;
aoqi@1 2185 __ cmpl(rax_lh, (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift));
aoqi@1 2186 __ jcc(Assembler::greaterEqual, L);
aoqi@1 2187 __ stop("must be a primitive array");
aoqi@1 2188 __ bind(L);
aoqi@1 2189 }
aoqi@1 2190 #endif
aoqi@1 2191
aoqi@1 2192 arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length,
aoqi@1 2193 r10, L_failed);
aoqi@1 2194
aoqi@1 2195 // typeArrayKlass
aoqi@1 2196 //
aoqi@1 2197 // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize);
aoqi@1 2198 // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize);
aoqi@1 2199 //
aoqi@1 2200
aoqi@1 2201 const Register r10_offset = r10; // array offset
aoqi@1 2202 const Register rax_elsize = rax_lh; // element size
aoqi@1 2203
aoqi@1 2204 __ movl(r10_offset, rax_lh);
aoqi@1 2205 __ shrl(r10_offset, Klass::_lh_header_size_shift);
aoqi@1 2206 __ andptr(r10_offset, Klass::_lh_header_size_mask); // array_offset
aoqi@1 2207 __ addptr(src, r10_offset); // src array offset
aoqi@1 2208 __ addptr(dst, r10_offset); // dst array offset
aoqi@1 2209 BLOCK_COMMENT("choose copy loop based on element size");
aoqi@1 2210 __ andl(rax_lh, Klass::_lh_log2_element_size_mask); // rax_lh -> rax_elsize
aoqi@1 2211
aoqi@1 2212 // next registers should be set before the jump to corresponding stub
aoqi@1 2213 const Register from = c_rarg0; // source array address
aoqi@1 2214 const Register to = c_rarg1; // destination array address
aoqi@1 2215 const Register count = c_rarg2; // elements count
aoqi@1 2216
aoqi@1 2217 // 'from', 'to', 'count' registers should be set in such order
aoqi@1 2218 // since they are the same as 'src', 'src_pos', 'dst'.
aoqi@1 2219
aoqi@1 2220 __ BIND(L_copy_bytes);
aoqi@1 2221 __ cmpl(rax_elsize, 0);
aoqi@1 2222 __ jccb(Assembler::notEqual, L_copy_shorts);
aoqi@1 2223 __ lea(from, Address(src, src_pos, Address::times_1, 0));// src_addr
aoqi@1 2224 __ lea(to, Address(dst, dst_pos, Address::times_1, 0));// dst_addr
aoqi@1 2225 __ movl2ptr(count, r11_length); // length
aoqi@1 2226 __ jump(RuntimeAddress(byte_copy_entry));
aoqi@1 2227
aoqi@1 2228 __ BIND(L_copy_shorts);
aoqi@1 2229 __ cmpl(rax_elsize, LogBytesPerShort);
aoqi@1 2230 __ jccb(Assembler::notEqual, L_copy_ints);
aoqi@1 2231 __ lea(from, Address(src, src_pos, Address::times_2, 0));// src_addr
aoqi@1 2232 __ lea(to, Address(dst, dst_pos, Address::times_2, 0));// dst_addr
aoqi@1 2233 __ movl2ptr(count, r11_length); // length
aoqi@1 2234 __ jump(RuntimeAddress(short_copy_entry));
aoqi@1 2235
aoqi@1 2236 __ BIND(L_copy_ints);
aoqi@1 2237 __ cmpl(rax_elsize, LogBytesPerInt);
aoqi@1 2238 __ jccb(Assembler::notEqual, L_copy_longs);
aoqi@1 2239 __ lea(from, Address(src, src_pos, Address::times_4, 0));// src_addr
aoqi@1 2240 __ lea(to, Address(dst, dst_pos, Address::times_4, 0));// dst_addr
aoqi@1 2241 __ movl2ptr(count, r11_length); // length
aoqi@1 2242 __ jump(RuntimeAddress(int_copy_entry));
aoqi@1 2243
aoqi@1 2244 __ BIND(L_copy_longs);
aoqi@1 2245 #ifdef ASSERT
aoqi@1 2246 { Label L;
aoqi@1 2247 __ cmpl(rax_elsize, LogBytesPerLong);
aoqi@1 2248 __ jcc(Assembler::equal, L);
aoqi@1 2249 __ stop("must be long copy, but elsize is wrong");
aoqi@1 2250 __ bind(L);
aoqi@1 2251 }
aoqi@1 2252 #endif
aoqi@1 2253 __ lea(from, Address(src, src_pos, Address::times_8, 0));// src_addr
aoqi@1 2254 __ lea(to, Address(dst, dst_pos, Address::times_8, 0));// dst_addr
aoqi@1 2255 __ movl2ptr(count, r11_length); // length
aoqi@1 2256 __ jump(RuntimeAddress(long_copy_entry));
aoqi@1 2257
aoqi@1 2258 // objArrayKlass
aoqi@1 2259 __ BIND(L_objArray);
aoqi@1 2260 // live at this point: r10_src_klass, src[_pos], dst[_pos]
aoqi@1 2261
aoqi@1 2262 Label L_plain_copy, L_checkcast_copy;
aoqi@1 2263 // test array classes for subtyping
aoqi@1 2264 __ load_klass(r9_dst_klass, dst);
aoqi@1 2265 __ cmpq(r10_src_klass, r9_dst_klass); // usual case is exact equality
aoqi@1 2266 __ jcc(Assembler::notEqual, L_checkcast_copy);
aoqi@1 2267
aoqi@1 2268 // Identically typed arrays can be copied without element-wise checks.
aoqi@1 2269 arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length,
aoqi@1 2270 r10, L_failed);
aoqi@1 2271
aoqi@1 2272 __ lea(from, Address(src, src_pos, TIMES_OOP,
aoqi@1 2273 arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // src_addr
aoqi@1 2274 __ lea(to, Address(dst, dst_pos, TIMES_OOP,
aoqi@1 2275 arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // dst_addr
aoqi@1 2276 __ movl2ptr(count, r11_length); // length
aoqi@1 2277 __ BIND(L_plain_copy);
aoqi@1 2278 __ jump(RuntimeAddress(oop_copy_entry));
aoqi@1 2279
aoqi@1 2280 __ BIND(L_checkcast_copy);
aoqi@1 2281 // live at this point: r10_src_klass, !r11_length
aoqi@1 2282 {
aoqi@1 2283 // assert(r11_length == C_RARG4); // will reload from here
aoqi@1 2284 Register r11_dst_klass = r11;
aoqi@1 2285 __ load_klass(r11_dst_klass, dst);
aoqi@1 2286
aoqi@1 2287 // Before looking at dst.length, make sure dst is also an objArray.
aoqi@1 2288 __ cmpl(Address(r11_dst_klass, lh_offset), objArray_lh);
aoqi@1 2289 __ jcc(Assembler::notEqual, L_failed);
aoqi@1 2290
aoqi@1 2291 // It is safe to examine both src.length and dst.length.
aoqi@1 2292 #ifndef _WIN64
aoqi@1 2293 arraycopy_range_checks(src, src_pos, dst, dst_pos, C_RARG4,
aoqi@1 2294 rax, L_failed);
aoqi@1 2295 #else
aoqi@1 2296 __ movl(r11_length, C_RARG4); // reload
aoqi@1 2297 arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length,
aoqi@1 2298 rax, L_failed);
aoqi@1 2299 __ load_klass(r11_dst_klass, dst); // reload
aoqi@1 2300 #endif
aoqi@1 2301
aoqi@1 2302 // Marshal the base address arguments now, freeing registers.
aoqi@1 2303 __ lea(from, Address(src, src_pos, TIMES_OOP,
aoqi@1 2304 arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
aoqi@1 2305 __ lea(to, Address(dst, dst_pos, TIMES_OOP,
aoqi@1 2306 arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
aoqi@1 2307 __ movl(count, C_RARG4); // length (reloaded)
aoqi@1 2308 Register sco_temp = c_rarg3; // this register is free now
aoqi@1 2309 assert_different_registers(from, to, count, sco_temp,
aoqi@1 2310 r11_dst_klass, r10_src_klass);
aoqi@1 2311 assert_clean_int(count, sco_temp);
aoqi@1 2312
aoqi@1 2313 // Generate the type check.
aoqi@1 2314 int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
aoqi@1 2315 Klass::super_check_offset_offset_in_bytes());
aoqi@1 2316 __ movl(sco_temp, Address(r11_dst_klass, sco_offset));
aoqi@1 2317 assert_clean_int(sco_temp, rax);
aoqi@1 2318 generate_type_check(r10_src_klass, sco_temp, r11_dst_klass, L_plain_copy);
aoqi@1 2319
aoqi@1 2320 // Fetch destination element klass from the objArrayKlass header.
aoqi@1 2321 int ek_offset = (klassOopDesc::header_size() * HeapWordSize +
aoqi@1 2322 objArrayKlass::element_klass_offset_in_bytes());
aoqi@1 2323 __ movptr(r11_dst_klass, Address(r11_dst_klass, ek_offset));
aoqi@1 2324 __ movl(sco_temp, Address(r11_dst_klass, sco_offset));
aoqi@1 2325 assert_clean_int(sco_temp, rax);
aoqi@1 2326
aoqi@1 2327 // the checkcast_copy loop needs two extra arguments:
aoqi@1 2328 assert(c_rarg3 == sco_temp, "#3 already in place");
aoqi@1 2329 __ movptr(C_RARG4, r11_dst_klass); // dst.klass.element_klass
aoqi@1 2330 __ jump(RuntimeAddress(checkcast_copy_entry));
aoqi@1 2331 }
aoqi@1 2332
aoqi@1 2333 __ BIND(L_failed);
aoqi@1 2334 __ xorptr(rax, rax);
aoqi@1 2335 __ notptr(rax); // return -1
aoqi@1 2336 __ leave(); // required for proper stackwalking of RuntimeStub frame
aoqi@1 2337 __ ret(0);
aoqi@1 2338
aoqi@1 2339 return start;
aoqi@1 2340 }
aoqi@1 2341
aoqi@1 2342 #undef length_arg
aoqi@1 2343 #endif
aoqi@1 2344
aoqi@1 2345 //FIXME
aoqi@1 2346 address generate_disjoint_long_copy(bool aligned, const char *name) {
aoqi@1 2347 Label l_1, l_2;
aoqi@1 2348 StubCodeMark mark(this, "StubRoutines", name);
aoqi@1 2349 __ align(CodeEntryAlignment);
aoqi@1 2350 address start = __ pc();
aoqi@1 2351
aoqi@1 2352 // __ movl(ecx, Address(esp, 4+8)); // count
aoqi@1 2353 // __ movl(eax, Address(esp, 4+0)); // from
aoqi@1 2354 // __ movl(edx, Address(esp, 4+4)); // to
aoqi@1 2355 __ move(T1, A2);
aoqi@1 2356 __ move(T3, A0);
aoqi@1 2357 __ move(T0, A1);
aoqi@1 2358 __ push(T3);
aoqi@1 2359 __ push(T0);
aoqi@1 2360 __ push(T1);
aoqi@1 2361 //__ subl(edx, eax);
aoqi@1 2362 //__ jmp(l_2);
aoqi@1 2363 __ b(l_2);
aoqi@1 2364 __ delayed()->nop();
aoqi@1 2365 __ align(16);
aoqi@1 2366 __ bind(l_1);
aoqi@1 2367 // if (VM_Version::supports_mmx()) {
aoqi@1 2368 // __ movq(mmx0, Address(eax));
aoqi@1 2369 // __ movq(Address(eax, edx, Address::times_1), mmx0);
aoqi@1 2370 // } else {
aoqi@1 2371 // __ fild_d(Address(eax));
aoqi@1 2372 __ ld(AT, T3, 0);
aoqi@1 2373 // __ fistp_d(Address(eax, edx, Address::times_1));
aoqi@1 2374 __ sd (AT, T0, 0);
aoqi@1 2375 // }
aoqi@1 2376 // __ addl(eax, 8);
aoqi@1 2377 __ addi(T3, T3, 8);
aoqi@1 2378 __ addi(T0, T0, 8);
aoqi@1 2379 __ bind(l_2);
aoqi@1 2380 // __ decl(ecx);
aoqi@1 2381 __ addi(T1, T1, -1);
aoqi@1 2382 // __ jcc(Assembler::greaterEqual, l_1);
aoqi@1 2383 __ bgez(T1, l_1);
aoqi@1 2384 __ delayed()->nop();
aoqi@1 2385 // if (VM_Version::supports_mmx()) {
aoqi@1 2386 // __ emms();
aoqi@1 2387 // }
aoqi@1 2388 // __ ret(0);
aoqi@1 2389 __ pop(T1);
aoqi@1 2390 __ pop(T0);
aoqi@1 2391 __ pop(T3);
aoqi@1 2392 __ jr(RA);
aoqi@1 2393 __ delayed()->nop();
aoqi@1 2394 return start;
aoqi@1 2395 }
aoqi@1 2396
aoqi@1 2397
aoqi@1 2398 address generate_conjoint_long_copy(bool aligned, const char *name) {
aoqi@1 2399 Label l_1, l_2;
aoqi@1 2400 StubCodeMark mark(this, "StubRoutines", name);
aoqi@1 2401 __ align(CodeEntryAlignment);
aoqi@1 2402 address start = __ pc();
aoqi@1 2403 address nooverlap_target = aligned ?
aoqi@1 2404 StubRoutines::arrayof_jlong_disjoint_arraycopy() :
aoqi@1 2405 StubRoutines::jlong_disjoint_arraycopy();
aoqi@1 2406 array_overlap_test(nooverlap_target, 3);
aoqi@1 2407
aoqi@1 2408 __ push(T3);
aoqi@1 2409 __ push(T0);
aoqi@1 2410 __ push(T1);
aoqi@1 2411
aoqi@1 2412 /* __ movl(ecx, Address(esp, 4+8)); // count
aoqi@1 2413 __ movl(eax, Address(esp, 4+0)); // from
aoqi@1 2414 __ movl(edx, Address(esp, 4+4)); // to
aoqi@1 2415 __ jmp(l_2);
aoqi@1 2416
aoqi@1 2417 */
aoqi@1 2418 __ move(T1, A2);
aoqi@1 2419 __ move(T3, A0);
aoqi@1 2420 __ move(T0, A1);
aoqi@1 2421 __ sll(AT, T1, Address::times_8);
aoqi@1 2422 __ add(AT, T3, AT);
aoqi@1 2423 __ lea(T3 , Address(AT, -8));
aoqi@1 2424 __ sll(AT, T1, Address::times_8);
aoqi@1 2425 __ add(AT, T0, AT);
aoqi@1 2426 __ lea(T0 , Address(AT, -8));
aoqi@1 2427
aoqi@1 2428
aoqi@1 2429
aoqi@1 2430 __ b(l_2);
aoqi@1 2431 __ delayed()->nop();
aoqi@1 2432 __ align(16);
aoqi@1 2433 __ bind(l_1);
aoqi@1 2434 /* if (VM_Version::supports_mmx()) {
aoqi@1 2435 __ movq(mmx0, Address(eax, ecx, Address::times_8));
aoqi@1 2436 __ movq(Address(edx, ecx,Address::times_8), mmx0);
aoqi@1 2437 } else {
aoqi@1 2438 __ fild_d(Address(eax, ecx, Address::times_8));
aoqi@1 2439 __ fistp_d(Address(edx, ecx,Address::times_8));
aoqi@1 2440 }
aoqi@1 2441 */
aoqi@1 2442 __ ld(AT, T3, 0);
aoqi@1 2443 __ sd (AT, T0, 0);
aoqi@1 2444 __ addi(T3, T3, -8);
aoqi@1 2445 __ addi(T0, T0,-8);
aoqi@1 2446 __ bind(l_2);
aoqi@1 2447 // __ decl(ecx);
aoqi@1 2448 __ addi(T1, T1, -1);
aoqi@1 2449 //__ jcc(Assembler::greaterEqual, l_1);
aoqi@1 2450 __ bgez(T1, l_1);
aoqi@1 2451 __ delayed()->nop();
aoqi@1 2452 // if (VM_Version::supports_mmx()) {
aoqi@1 2453 // __ emms();
aoqi@1 2454 // }
aoqi@1 2455 // __ ret(0);
aoqi@1 2456 __ pop(T1);
aoqi@1 2457 __ pop(T0);
aoqi@1 2458 __ pop(T3);
aoqi@1 2459 __ jr(RA);
aoqi@1 2460 __ delayed()->nop();
aoqi@1 2461 return start;
aoqi@1 2462 }
aoqi@1 2463
aoqi@1 2464 void generate_arraycopy_stubs() {
aoqi@1 2465 if (UseCompressedOops) {
aoqi@1 2466 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, true, "oop_disjoint_arraycopy");
aoqi@1 2467 StubRoutines::_oop_arraycopy = generate_conjoint_int_oop_copy(false, true, "oop_arraycopy");
aoqi@1 2468 } else {
aoqi@1 2469 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, true, "oop_disjoint_arraycopy");
aoqi@1 2470 StubRoutines::_oop_arraycopy = generate_conjoint_long_oop_copy(false, true, "oop_arraycopy");
aoqi@1 2471 }
aoqi@1 2472
aoqi@1 2473 StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy");
aoqi@1 2474 StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy");
aoqi@1 2475 StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, false, "jint_disjoint_arraycopy");
aoqi@1 2476 StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_copy(false, "jlong_disjoint_arraycopy");
aoqi@1 2477 StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(true, "arrayof_jbyte_disjoint_arraycopy");
aoqi@1 2478
aoqi@1 2479 // if (VM_Version::supports_mmx())
aoqi@1 2480 //if (false)
aoqi@1 2481 // StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_mmx_copy_aligned("arrayof_jshort_disjoint_arraycopy");
aoqi@1 2482 // else
aoqi@1 2483 StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, "arrayof_jshort_disjoint_arraycopy");
aoqi@1 2484 StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_oop_copy(true, false, "arrayof_jint_disjoint_arraycopy");
aoqi@1 2485 //StubRoutines::_arrayof_oop_disjoint_arraycopy = generate_disjoint_int_oop_copy(true, true, "arrayof_oop_disjoint_arraycopy");
aoqi@1 2486 StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy(true, "arrayof_jlong_disjoint_arraycopy");
aoqi@1 2487
aoqi@1 2488 StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, "jbyte_arraycopy");
aoqi@1 2489 StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, "jshort_arraycopy");
aoqi@1 2490 StubRoutines::_jint_arraycopy = generate_conjoint_int_oop_copy(false, false, "jint_arraycopy");
aoqi@1 2491 StubRoutines::_jlong_arraycopy = generate_conjoint_long_copy(false, "jlong_arraycopy");
aoqi@1 2492
aoqi@1 2493 StubRoutines::_arrayof_jbyte_arraycopy = generate_conjoint_byte_copy(true, "arrayof_jbyte_arraycopy");
aoqi@1 2494 StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_short_copy(true, "arrayof_jshort_arraycopy");
aoqi@1 2495 StubRoutines::_arrayof_jint_arraycopy = generate_conjoint_int_oop_copy(true, false, "arrayof_jint_arraycopy");
aoqi@1 2496 //StubRoutines::_arrayof_oop_arraycopy = generate_conjoint_int_oop_copy(true, true, "arrayof_oop_arraycopy");
aoqi@1 2497 StubRoutines::_arrayof_jlong_arraycopy = generate_conjoint_long_copy(true, "arrayof_jlong_arraycopy");
aoqi@1 2498
aoqi@1 2499 StubRoutines::_arrayof_oop_disjoint_arraycopy = StubRoutines::_oop_disjoint_arraycopy;
aoqi@1 2500 StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy;
aoqi@1 2501 }
aoqi@1 2502
aoqi@1 2503 //Wang: add a function to implement SafeFetch32 and SafeFetchN
aoqi@1 2504 void generate_safefetch(const char* name, int size, address* entry,
aoqi@1 2505 address* fault_pc, address* continuation_pc) {
aoqi@1 2506 // safefetch signatures:
aoqi@1 2507 // int SafeFetch32(int* adr, int errValue);
aoqi@1 2508 // intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue);
aoqi@1 2509 //
aoqi@1 2510 // arguments:
aoqi@1 2511 // A0 = adr
aoqi@1 2512 // A1 = errValue
aoqi@1 2513 //
aoqi@1 2514 // result:
aoqi@1 2515 // PPC_RET = *adr or errValue
aoqi@1 2516
aoqi@1 2517 StubCodeMark mark(this, "StubRoutines", name);
aoqi@1 2518
aoqi@1 2519 // Entry point, pc or function descriptor.
aoqi@1 2520 *entry = __ pc();
aoqi@1 2521
aoqi@1 2522 // Load *adr into A1, may fault.
aoqi@1 2523 *fault_pc = __ pc();
aoqi@1 2524 switch (size) {
aoqi@1 2525 case 4:
aoqi@1 2526 // int32_t
aoqi@1 2527 __ lw(A1, A0, 0);
aoqi@1 2528 break;
aoqi@1 2529 case 8:
aoqi@1 2530 // int64_t
aoqi@1 2531 __ ld(A1, A0, 0);
aoqi@1 2532 break;
aoqi@1 2533 default:
aoqi@1 2534 ShouldNotReachHere();
aoqi@1 2535 }
aoqi@1 2536
aoqi@1 2537 // return errValue or *adr
aoqi@1 2538 *continuation_pc = __ pc();
aoqi@1 2539 __ addu(V0,A1,R0);
aoqi@1 2540 __ jr(RA);
aoqi@1 2541 __ delayed()->nop();
aoqi@1 2542 }
aoqi@1 2543
aoqi@1 2544
aoqi@1 2545 #undef __
aoqi@1 2546 #define __ masm->
aoqi@1 2547
aoqi@1 2548 // Continuation point for throwing of implicit exceptions that are
aoqi@1 2549 // not handled in the current activation. Fabricates an exception
aoqi@1 2550 // oop and initiates normal exception dispatching in this
aoqi@1 2551 // frame. Since we need to preserve callee-saved values (currently
aoqi@1 2552 // only for C2, but done for C1 as well) we need a callee-saved oop
aoqi@1 2553 // map and therefore have to make these stubs into RuntimeStubs
aoqi@1 2554 // rather than BufferBlobs. If the compiler needs all registers to
aoqi@1 2555 // be preserved between the fault point and the exception handler
aoqi@1 2556 // then it must assume responsibility for that in
aoqi@1 2557 // AbstractCompiler::continuation_for_implicit_null_exception or
aoqi@1 2558 // continuation_for_implicit_division_by_zero_exception. All other
aoqi@1 2559 // implicit exceptions (e.g., NullPointerException or
aoqi@1 2560 // AbstractMethodError on entry) are either at call sites or
aoqi@1 2561 // otherwise assume that stack unwinding will be initiated, so
aoqi@1 2562 // caller saved registers were assumed volatile in the compiler.
aoqi@1 2563 address generate_throw_exception(const char* name,
aoqi@1 2564 address runtime_entry,
aoqi@1 2565 bool restore_saved_exception_pc) {
aoqi@1 2566 // Information about frame layout at time of blocking runtime call.
aoqi@1 2567 // Note that we only have to preserve callee-saved registers since
aoqi@1 2568 // the compilers are responsible for supplying a continuation point
aoqi@1 2569 // if they expect all registers to be preserved.
aoqi@1 2570 //#define aoqi_test
aoqi@1 2571 #ifdef aoqi_test
aoqi@1 2572 tty->print_cr("%s:%d name:%s", __func__, __LINE__, name);
aoqi@1 2573 #endif
aoqi@1 2574 enum layout {
aoqi@1 2575 thread_off, // last_java_sp
aoqi@1 2576 S7_off, // callee saved register sp + 1
aoqi@1 2577 S6_off, // callee saved register sp + 2
aoqi@1 2578 S5_off, // callee saved register sp + 3
aoqi@1 2579 S4_off, // callee saved register sp + 4
aoqi@1 2580 S3_off, // callee saved register sp + 5
aoqi@1 2581 S2_off, // callee saved register sp + 6
aoqi@1 2582 S1_off, // callee saved register sp + 7
aoqi@1 2583 S0_off, // callee saved register sp + 8
aoqi@1 2584 FP_off,
aoqi@1 2585 ret_address,
aoqi@1 2586 framesize
aoqi@1 2587 };
aoqi@1 2588
aoqi@1 2589 int insts_size = 2048;
aoqi@1 2590 int locs_size = 32;
aoqi@1 2591
aoqi@1 2592 // CodeBuffer* code = new CodeBuffer(insts_size, locs_size, 0, 0, 0, false,
aoqi@1 2593 // NULL, NULL, NULL, false, NULL, name, false);
aoqi@1 2594 CodeBuffer code (name , insts_size, locs_size);
aoqi@1 2595 #ifdef aoqi_test
aoqi@1 2596 tty->print_cr("%s:%d name:%s", __func__, __LINE__, name);
aoqi@1 2597 #endif
aoqi@1 2598 OopMapSet* oop_maps = new OopMapSet();
aoqi@1 2599 #ifdef aoqi_test
aoqi@1 2600 tty->print_cr("%s:%d name:%s", __func__, __LINE__, name);
aoqi@1 2601 #endif
aoqi@1 2602 MacroAssembler* masm = new MacroAssembler(&code);
aoqi@1 2603 #ifdef aoqi_test
aoqi@1 2604 tty->print_cr("%s:%d name:%s", __func__, __LINE__, name);
aoqi@1 2605 #endif
aoqi@1 2606
aoqi@1 2607 address start = __ pc();
aoqi@1 2608 //__ stop("generate_throw_exception");
aoqi@1 2609 /*
aoqi@1 2610 __ move(AT, (int)&jerome1 );
aoqi@1 2611 __ sw(SP, AT, 0);
aoqi@1 2612 __ move(AT, (int)&jerome2 );
aoqi@1 2613 __ sw(FP, AT, 0);
aoqi@1 2614 __ move(AT, (int)&jerome3 );
aoqi@1 2615 __ sw(RA, AT, 0);
aoqi@1 2616 __ move(AT, (int)&jerome4 );
aoqi@1 2617 __ sw(R0, AT, 0);
aoqi@1 2618 __ move(AT, (int)&jerome5 );
aoqi@1 2619 __ sw(R0, AT, 0);
aoqi@1 2620 __ move(AT, (int)&jerome6 );
aoqi@1 2621 __ sw(R0, AT, 0);
aoqi@1 2622 __ move(AT, (int)&jerome7 );
aoqi@1 2623 __ sw(R0, AT, 0);
aoqi@1 2624 __ move(AT, (int)&jerome10 );
aoqi@1 2625 __ sw(R0, AT, 0);
aoqi@1 2626
aoqi@1 2627 __ pushad();
aoqi@1 2628
aoqi@1 2629 //__ enter();
aoqi@1 2630 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::print_call_statistics),
aoqi@1 2631 relocInfo::runtime_call_type);
aoqi@1 2632 __ delayed()->nop();
aoqi@1 2633
aoqi@1 2634 //__ leave();
aoqi@1 2635 __ popad();
aoqi@1 2636
aoqi@1 2637 */
aoqi@1 2638
aoqi@1 2639 // This is an inlined and slightly modified version of call_VM
aoqi@1 2640 // which has the ability to fetch the return PC out of
aoqi@1 2641 // thread-local storage and also sets up last_Java_sp slightly
aoqi@1 2642 // differently than the real call_VM
aoqi@1 2643 #ifndef OPT_THREAD
aoqi@1 2644 Register java_thread = TREG;
aoqi@1 2645 __ get_thread(java_thread);
aoqi@1 2646 #else
aoqi@1 2647 Register java_thread = TREG;
aoqi@1 2648 #endif
aoqi@1 2649 #ifdef aoqi_test
aoqi@1 2650 tty->print_cr("%s:%d name:%s", __func__, __LINE__, name);
aoqi@1 2651 #endif
aoqi@1 2652 if (restore_saved_exception_pc) {
aoqi@1 2653 __ ld(RA, java_thread, in_bytes(JavaThread::saved_exception_pc_offset())); // eax
aoqi@1 2654 }
aoqi@1 2655
aoqi@1 2656 __ enter(); // required for proper stackwalking of RuntimeStub frame
aoqi@1 2657
aoqi@1 2658 __ addi(SP, SP, (-1) * (framesize-2) * wordSize); // prolog
aoqi@1 2659 __ sd(S0, SP, S0_off * wordSize);
aoqi@1 2660 __ sd(S1, SP, S1_off * wordSize);
aoqi@1 2661 __ sd(S2, SP, S2_off * wordSize);
aoqi@1 2662 __ sd(S3, SP, S3_off * wordSize);
aoqi@1 2663 __ sd(S4, SP, S4_off * wordSize);
aoqi@1 2664 __ sd(S5, SP, S5_off * wordSize);
aoqi@1 2665 __ sd(S6, SP, S6_off * wordSize);
aoqi@1 2666 __ sd(S7, SP, S7_off * wordSize);
aoqi@1 2667
aoqi@1 2668 int frame_complete = __ pc() - start;
aoqi@1 2669 // push java thread (becomes first argument of C function)
aoqi@1 2670 __ sd(java_thread, SP, thread_off * wordSize);
aoqi@1 2671 if (java_thread!=A0)
aoqi@1 2672 __ move(A0, java_thread);
aoqi@1 2673
aoqi@1 2674 // Set up last_Java_sp and last_Java_fp
aoqi@1 2675 __ set_last_Java_frame(java_thread, SP, FP, NULL);
aoqi@1 2676 __ relocate(relocInfo::internal_pc_type);
aoqi@1 2677 {
aoqi@1 2678 intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + NativeCall::return_address_offset + 4;
aoqi@1 2679 __ li48(AT, save_pc);
aoqi@1 2680 }
aoqi@1 2681 __ sd(AT, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
aoqi@1 2682
aoqi@1 2683 // Call runtime
aoqi@1 2684 __ call(runtime_entry);
aoqi@1 2685 __ delayed()->nop();
aoqi@1 2686 // Generate oop map
aoqi@1 2687 OopMap* map = new OopMap(framesize, 0);
aoqi@1 2688 oop_maps->add_gc_map(__ offset(), map);
aoqi@1 2689
aoqi@1 2690 // restore the thread (cannot use the pushed argument since arguments
aoqi@1 2691 // may be overwritten by C code generated by an optimizing compiler);
aoqi@1 2692 // however can use the register value directly if it is callee saved.
aoqi@1 2693 #ifndef OPT_THREAD
aoqi@1 2694 __ get_thread(java_thread);
aoqi@1 2695 #endif
aoqi@1 2696
aoqi@1 2697 __ ld(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
aoqi@1 2698 // __ reset_last_Java_frame(java_thread, true);
aoqi@1 2699 __ reset_last_Java_frame(java_thread, true, true);
aoqi@1 2700
aoqi@1 2701 // Restore callee save registers. This must be done after resetting the Java frame
aoqi@1 2702 __ ld(S0, SP, S0_off * wordSize);
aoqi@1 2703 __ ld(S1, SP, S1_off * wordSize);
aoqi@1 2704 __ ld(S2, SP, S2_off * wordSize);
aoqi@1 2705 __ ld(S3, SP, S3_off * wordSize);
aoqi@1 2706 __ ld(S4, SP, S4_off * wordSize);
aoqi@1 2707 __ ld(S5, SP, S5_off * wordSize);
aoqi@1 2708 __ ld(S6, SP, S6_off * wordSize);
aoqi@1 2709 __ ld(S7, SP, S7_off * wordSize);
aoqi@1 2710
aoqi@1 2711 // discard arguments
aoqi@1 2712 __ addi(SP, SP, (framesize-2) * wordSize); // epilog
aoqi@1 2713 // __ leave(); // required for proper stackwalking of RuntimeStub frame
aoqi@1 2714 __ addi(SP, FP, wordSize);
aoqi@1 2715 __ ld(FP, SP, -1*wordSize);
aoqi@1 2716 // check for pending exceptions
aoqi@1 2717 #ifdef ASSERT
aoqi@1 2718 Label L;
aoqi@1 2719 __ lw(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
aoqi@1 2720 __ bne(AT, R0, L);
aoqi@1 2721 __ delayed()->nop();
aoqi@1 2722 __ should_not_reach_here();
aoqi@1 2723 __ bind(L);
aoqi@1 2724 #endif //ASSERT
aoqi@1 2725 __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
aoqi@1 2726 __ delayed()->nop();
aoqi@1 2727 #ifdef aoqi_test
aoqi@1 2728 tty->print_cr("%s:%d name:%s", __func__, __LINE__, name);
aoqi@1 2729 #endif
aoqi@1 2730 RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code,frame_complete,
aoqi@1 2731 framesize, oop_maps, false);
aoqi@1 2732 #ifdef aoqi_test
aoqi@1 2733 tty->print_cr("%s:%d name:%s", __func__, __LINE__, name);
aoqi@1 2734 #endif
aoqi@1 2735 return stub->entry_point();
aoqi@1 2736 }
aoqi@1 2737
aoqi@1 2738 // Initialization
aoqi@1 2739 void generate_initial() {
aoqi@1 2740 /*
aoqi@1 2741 // Generates all stubs and initializes the entry points
aoqi@1 2742
aoqi@1 2743 // This platform-specific stub is needed by generate_call_stub()
aoqi@1 2744 StubRoutines::mips::_mxcsr_std = generate_fp_mask("mxcsr_std", 0x0000000000001F80);
aoqi@1 2745
aoqi@1 2746 // entry points that exist in all platforms Note: This is code
aoqi@1 2747 // that could be shared among different platforms - however the
aoqi@1 2748 // benefit seems to be smaller than the disadvantage of having a
aoqi@1 2749 // much more complicated generator structure. See also comment in
aoqi@1 2750 // stubRoutines.hpp.
aoqi@1 2751
aoqi@1 2752 StubRoutines::_forward_exception_entry = generate_forward_exception();
aoqi@1 2753
aoqi@1 2754 StubRoutines::_call_stub_entry =
aoqi@1 2755 generate_call_stub(StubRoutines::_call_stub_return_address);
aoqi@1 2756
aoqi@1 2757 // is referenced by megamorphic call
aoqi@1 2758 StubRoutines::_catch_exception_entry = generate_catch_exception();
aoqi@1 2759
aoqi@1 2760 // atomic calls
aoqi@1 2761 StubRoutines::_atomic_xchg_entry = generate_atomic_xchg();
aoqi@1 2762 StubRoutines::_atomic_xchg_ptr_entry = generate_atomic_xchg_ptr();
aoqi@1 2763 StubRoutines::_atomic_cmpxchg_entry = generate_atomic_cmpxchg();
aoqi@1 2764 StubRoutines::_atomic_cmpxchg_long_entry = generate_atomic_cmpxchg_long();
aoqi@1 2765 StubRoutines::_atomic_add_entry = generate_atomic_add();
aoqi@1 2766 StubRoutines::_atomic_add_ptr_entry = generate_atomic_add_ptr();
aoqi@1 2767 StubRoutines::_fence_entry = generate_orderaccess_fence();
aoqi@1 2768
aoqi@1 2769 StubRoutines::_handler_for_unsafe_access_entry =
aoqi@1 2770 generate_handler_for_unsafe_access();
aoqi@1 2771
aoqi@1 2772 // platform dependent
aoqi@1 2773 StubRoutines::mips::_get_previous_fp_entry = generate_get_previous_fp();
aoqi@1 2774
aoqi@1 2775 StubRoutines::mips::_verify_mxcsr_entry = generate_verify_mxcsr();
aoqi@1 2776 */
aoqi@1 2777 // Generates all stubs and initializes the entry points
aoqi@1 2778
aoqi@1 2779 //-------------------------------------------------------------
aoqi@1 2780 //-----------------------------------------------------------
aoqi@1 2781 // entry points that exist in all platforms
aoqi@1 2782 // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller
aoqi@1 2783 // than the disadvantage of having a much more complicated generator structure.
aoqi@1 2784 // See also comment in stubRoutines.hpp.
aoqi@1 2785 StubRoutines::_forward_exception_entry = generate_forward_exception();
aoqi@1 2786 StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address);
aoqi@1 2787 // is referenced by megamorphic call
aoqi@1 2788 StubRoutines::_catch_exception_entry = generate_catch_exception();
aoqi@1 2789
aoqi@1 2790 StubRoutines::_handler_for_unsafe_access_entry = generate_handler_for_unsafe_access();
aoqi@1 2791
aoqi@1 2792 // platform dependent
aoqi@1 2793 StubRoutines::gs2::_get_previous_fp_entry = generate_get_previous_fp();
aoqi@1 2794 }
aoqi@1 2795
aoqi@1 2796 void generate_all() {
aoqi@1 2797 #ifdef aoqi_test
aoqi@1 2798 tty->print_cr("%s:%d", __func__, __LINE__);
aoqi@1 2799 #endif
aoqi@1 2800 // Generates all stubs and initializes the entry points
aoqi@1 2801
aoqi@1 2802 // These entry points require SharedInfo::stack0 to be set up in
aoqi@1 2803 // non-core builds and need to be relocatable, so they each
aoqi@1 2804 // fabricate a RuntimeStub internally.
aoqi@1 2805 /*
aoqi@1 2806 StubRoutines::_throw_AbstractMethodError_entry =
aoqi@1 2807 generate_throw_exception("AbstractMethodError throw_exception",
aoqi@1 2808 CAST_FROM_FN_PTR(address,
aoqi@1 2809 SharedRuntime::
aoqi@1 2810 throw_AbstractMethodError),
aoqi@1 2811 false);
aoqi@1 2812
aoqi@1 2813 StubRoutines::_throw_IncompatibleClassChangeError_entry =
aoqi@1 2814 generate_throw_exception("IncompatibleClassChangeError throw_exception",
aoqi@1 2815 CAST_FROM_FN_PTR(address,
aoqi@1 2816 SharedRuntime::
aoqi@1 2817 throw_IncompatibleClassChangeError),
aoqi@1 2818 false);
aoqi@1 2819
aoqi@1 2820 StubRoutines::_throw_ArithmeticException_entry =
aoqi@1 2821 generate_throw_exception("ArithmeticException throw_exception",
aoqi@1 2822 CAST_FROM_FN_PTR(address,
aoqi@1 2823 SharedRuntime::
aoqi@1 2824 throw_ArithmeticException),
aoqi@1 2825 true);
aoqi@1 2826
aoqi@1 2827 StubRoutines::_throw_NullPointerException_entry =
aoqi@1 2828 generate_throw_exception("NullPointerException throw_exception",
aoqi@1 2829 CAST_FROM_FN_PTR(address,
aoqi@1 2830 SharedRuntime::
aoqi@1 2831 throw_NullPointerException),
aoqi@1 2832 true);
aoqi@1 2833
aoqi@1 2834 StubRoutines::_throw_NullPointerException_at_call_entry =
aoqi@1 2835 generate_throw_exception("NullPointerException at call throw_exception",
aoqi@1 2836 CAST_FROM_FN_PTR(address,
aoqi@1 2837 SharedRuntime::
aoqi@1 2838 throw_NullPointerException_at_call),
aoqi@1 2839 false);
aoqi@1 2840
aoqi@1 2841 StubRoutines::_throw_StackOverflowError_entry =
aoqi@1 2842 generate_throw_exception("StackOverflowError throw_exception",
aoqi@1 2843 CAST_FROM_FN_PTR(address,
aoqi@1 2844 SharedRuntime::
aoqi@1 2845 throw_StackOverflowError),
aoqi@1 2846 false);
aoqi@1 2847
aoqi@1 2848 // entry points that are platform specific
aoqi@1 2849 StubRoutines::mips::_f2i_fixup = generate_f2i_fixup();
aoqi@1 2850 StubRoutines::mips::_f2l_fixup = generate_f2l_fixup();
aoqi@1 2851 StubRoutines::mips::_d2i_fixup = generate_d2i_fixup();
aoqi@1 2852 StubRoutines::mips::_d2l_fixup = generate_d2l_fixup();
aoqi@1 2853
aoqi@1 2854 StubRoutines::mips::_float_sign_mask = generate_fp_mask("float_sign_mask", 0x7FFFFFFF7FFFFFFF);
aoqi@1 2855 StubRoutines::mips::_float_sign_flip = generate_fp_mask("float_sign_flip", 0x8000000080000000);
aoqi@1 2856 StubRoutines::mips::_double_sign_mask = generate_fp_mask("double_sign_mask", 0x7FFFFFFFFFFFFFFF);
aoqi@1 2857 StubRoutines::mips::_double_sign_flip = generate_fp_mask("double_sign_flip", 0x8000000000000000);
aoqi@1 2858
aoqi@1 2859 // support for verify_oop (must happen after universe_init)
aoqi@1 2860 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
aoqi@1 2861
aoqi@1 2862 // arraycopy stubs used by compilers
aoqi@1 2863 generate_arraycopy_stubs();
aoqi@1 2864 */
aoqi@1 2865 #ifdef aoqi_test
aoqi@1 2866 tty->print_cr("%s:%d", __func__, __LINE__);
aoqi@1 2867 #endif
aoqi@1 2868 StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError), false);
aoqi@1 2869 #ifdef aoqi_test
aoqi@1 2870 tty->print_cr("%s:%d", __func__, __LINE__);
aoqi@1 2871 #endif
aoqi@1 2872 // StubRoutines::_throw_ArithmeticException_entry = generate_throw_exception("ArithmeticException throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_ArithmeticException), true);
aoqi@1 2873 #ifdef aoqi_test
aoqi@1 2874 tty->print_cr("%s:%d", __func__, __LINE__);
aoqi@1 2875 #endif
aoqi@1 2876 // StubRoutines::_throw_NullPointerException_entry = generate_throw_exception("NullPointerException throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException), true);
aoqi@1 2877 #ifdef aoqi_test
aoqi@1 2878 tty->print_cr("%s:%d", __func__, __LINE__);
aoqi@1 2879 #endif
aoqi@1 2880 StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false);
aoqi@1 2881 #ifdef aoqi_test
aoqi@1 2882 tty->print_cr("%s:%d", __func__, __LINE__);
aoqi@1 2883 #endif
aoqi@1 2884 StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), false);
aoqi@1 2885 #ifdef aoqi_test
aoqi@1 2886 tty->print_cr("%s:%d", __func__, __LINE__);
aoqi@1 2887 #endif
aoqi@1 2888
aoqi@1 2889 //------------------------------------------------------
aoqi@1 2890 //------------------------------------------------------------------
aoqi@1 2891 // entry points that are platform specific
aoqi@1 2892
aoqi@1 2893 // support for verify_oop (must happen after universe_init)
aoqi@1 2894 #ifdef aoqi_test
aoqi@1 2895 tty->print_cr("%s:%d", __func__, __LINE__);
aoqi@1 2896 #endif
aoqi@1 2897 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
aoqi@1 2898 #ifdef aoqi_test
aoqi@1 2899 tty->print_cr("%s:%d", __func__, __LINE__);
aoqi@1 2900 #endif
aoqi@1 2901 #ifndef CORE
aoqi@1 2902 // arraycopy stubs used by compilers
aoqi@1 2903 generate_arraycopy_stubs();
aoqi@1 2904 #ifdef aoqi_test
aoqi@1 2905 tty->print_cr("%s:%d", __func__, __LINE__);
aoqi@1 2906 #endif
aoqi@1 2907 #endif
aoqi@1 2908
aoqi@1 2909 // Safefetch stubs.
aoqi@1 2910 generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
aoqi@1 2911 &StubRoutines::_safefetch32_fault_pc,
aoqi@1 2912 &StubRoutines::_safefetch32_continuation_pc);
aoqi@1 2913 generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
aoqi@1 2914 &StubRoutines::_safefetchN_fault_pc,
aoqi@1 2915 &StubRoutines::_safefetchN_continuation_pc);
aoqi@1 2916 }
aoqi@1 2917
aoqi@1 2918 public:
aoqi@1 2919 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
aoqi@1 2920 if (all) {
aoqi@1 2921 generate_all();
aoqi@1 2922 } else {
aoqi@1 2923 generate_initial();
aoqi@1 2924 }
aoqi@1 2925 }
aoqi@1 2926 }; // end class declaration
aoqi@1 2927 /*
aoqi@1 2928 address StubGenerator::disjoint_byte_copy_entry = NULL;
aoqi@1 2929 address StubGenerator::disjoint_short_copy_entry = NULL;
aoqi@1 2930 address StubGenerator::disjoint_int_copy_entry = NULL;
aoqi@1 2931 address StubGenerator::disjoint_long_copy_entry = NULL;
aoqi@1 2932 address StubGenerator::disjoint_oop_copy_entry = NULL;
aoqi@1 2933
aoqi@1 2934 address StubGenerator::byte_copy_entry = NULL;
aoqi@1 2935 address StubGenerator::short_copy_entry = NULL;
aoqi@1 2936 address StubGenerator::int_copy_entry = NULL;
aoqi@1 2937 address StubGenerator::long_copy_entry = NULL;
aoqi@1 2938 address StubGenerator::oop_copy_entry = NULL;
aoqi@1 2939
aoqi@1 2940 address StubGenerator::checkcast_copy_entry = NULL;
aoqi@1 2941 */
aoqi@1 2942 void StubGenerator_generate(CodeBuffer* code, bool all) {
aoqi@1 2943 StubGenerator g(code, all);
aoqi@1 2944 }

mercurial